Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions colossalai/engine/_base_engine.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# this code is inspired by the DeepSpeed library and implemented with our own design from scratch

from typing import List, Iterable
from typing import Iterable, List, Optional, Type

from torch import Tensor
from torch.nn import Module
from torch.nn.modules.loss import _Loss

from colossalai.logging import get_dist_logger
from torch import Tensor
from colossalai.gemini.ophooks import register_ophooks_recursively, BaseOpHook
from colossalai.engine.schedule import BaseSchedule, NonPipelineSchedule, PipelineSchedule, InterleavedPipelineSchedule
from typing import Optional, Type
from colossalai.engine.gradient_handler import BaseGradientHandler
from colossalai.engine.schedule import BaseSchedule, InterleavedPipelineSchedule, NonPipelineSchedule, PipelineSchedule
from colossalai.gemini.ophooks import BaseOpHook, register_ophooks_recursively
from colossalai.logging import get_dist_logger


Expand Down Expand Up @@ -93,7 +93,7 @@ def __init__(self,
if self.uses_pipeline:
self._schedule.pre_processing(self)

#register hook if any
# register hook if any
if len(self._ophook_list) > 0:
register_ophooks_recursively(self._model, self._ophook_list)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
# Copyright (c) Microsoft Corporation.

# Licensed under the MIT License.
import operator
from functools import reduce
from typing import Any, Optional, Tuple, Union

import torch

from ..registry import meta_profiler_function


Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
# Copyright (c) Microsoft Corporation.

# Licensed under the MIT License.
import math
import operator
from functools import reduce
import math
from typing import Tuple

import torch

from ..registry import meta_profiler_module


Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Copyright (c) Microsoft Corporation.

# Licensed under the MIT License.
from typing import Tuple, Union

import torch

from ..registry import meta_profiler_module


Expand Down
6 changes: 3 additions & 3 deletions colossalai/gemini/ophooks/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import torch
from typing import List, Callable, Optional

# this code is inspired by the DeepSpeed library and implemented with our own design from scratch
from abc import ABC, abstractmethod
from typing import Callable, List, Optional

import torch


Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/* Copyright 2021 The LightSeq Team
Copyright Microsoft DeepSpeed
This file is adapted from Microsoft DeepSpeed
Licensed under the MIT License.
*/
#include "cublas_wrappers.h"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/* Copyright 2021 The LightSeq Team
Copyright Microsoft DeepSpeed
This file is adapted from Microsoft DeepSpeed
Licensed under the MIT License.
*/
#pragma once

Expand Down
137 changes: 69 additions & 68 deletions colossalai/kernel/cuda_native/csrc/kernels/include/feed_forward.h
Original file line number Diff line number Diff line change
@@ -1,68 +1,69 @@
#pragma once

/* Copyright 2021 The LightSeq Team
Copyright Microsoft DeepSpeed
This file is adapted from Microsoft DeepSpeed
*/
#include <cuda.h>
#include <cuda_fp16.h>
#include <stdio.h>

#include <array>

#include "cublas_wrappers.h"
#include "kernels.h"

template <typename T>
class FeedForward {
public:
struct Config {
int outputSize;
int inputSize;
std::array<int, 3> gemm_algos;
Config(int outputs, int inputs)
: outputSize(outputs),
inputSize(inputs),
gemm_algos(std::array<int, 3>({99, 99, 99})) {}
};

FeedForward(Config config) : config_(config) {}

~FeedForward() {}

void Forward(int bsz, const T *input_ptr, const T *weights, T *out,
cublasHandle_t &_cublasHandle) {
float alpha = T(1.);
float beta = T(0.);

cublas_gemm_ex(_cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, config_.outputSize,
bsz, config_.inputSize, &alpha, &beta, weights, input_ptr,
out, cublasGemmAlgo_t(config_.gemm_algos[0]));
}
void Backward(int bsz, const T *out_grad, const T *input_ptr,
const T *weights, T *weights_grad, T *bias_grad,
cublasHandle_t &_cublasHandle, cudaStream_t &stream,
T *inp_grad_out = nullptr, T *out_grad_trans_out = nullptr,
bool compute_bias = true) {
float alpha = (T)1.0, beta = (T)0.0;
cublas_gemm_ex(_cublasHandle, CUBLAS_OP_N, CUBLAS_OP_T, config_.inputSize,
config_.outputSize, bsz, &alpha, &beta, input_ptr, out_grad,
weights_grad, cublasGemmAlgo_t(config_.gemm_algos[1]));

cublas_gemm_ex(_cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N, config_.inputSize,
bsz, config_.outputSize, &alpha, &beta, weights, out_grad,
inp_grad_out, cublasGemmAlgo_t(config_.gemm_algos[2]));
if (compute_bias) {
launch_fuse_transpose_bias_kernel<T>(out_grad, bias_grad, bsz,
config_.outputSize, stream);
}
}

void reset_size(int outputSize, int inputSize) {
config_.outputSize = outputSize;
config_.inputSize = inputSize;
}

private:
Config config_;
};
#pragma once

/* Copyright 2021 The LightSeq Team
Copyright Microsoft DeepSpeed
This file is adapted from Microsoft DeepSpeed
Licensed under the MIT License.
*/
#include <cuda.h>
#include <cuda_fp16.h>
#include <stdio.h>

#include <array>

#include "cublas_wrappers.h"
#include "kernels.h"

template <typename T>
class FeedForward {
public:
struct Config {
int outputSize;
int inputSize;
std::array<int, 3> gemm_algos;
Config(int outputs, int inputs)
: outputSize(outputs),
inputSize(inputs),
gemm_algos(std::array<int, 3>({99, 99, 99})) {}
};

FeedForward(Config config) : config_(config) {}

~FeedForward() {}

void Forward(int bsz, const T *input_ptr, const T *weights, T *out,
cublasHandle_t &_cublasHandle) {
float alpha = T(1.);
float beta = T(0.);

cublas_gemm_ex(_cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, config_.outputSize,
bsz, config_.inputSize, &alpha, &beta, weights, input_ptr,
out, cublasGemmAlgo_t(config_.gemm_algos[0]));
}
void Backward(int bsz, const T *out_grad, const T *input_ptr,
const T *weights, T *weights_grad, T *bias_grad,
cublasHandle_t &_cublasHandle, cudaStream_t &stream,
T *inp_grad_out = nullptr, T *out_grad_trans_out = nullptr,
bool compute_bias = true) {
float alpha = (T)1.0, beta = (T)0.0;
cublas_gemm_ex(_cublasHandle, CUBLAS_OP_N, CUBLAS_OP_T, config_.inputSize,
config_.outputSize, bsz, &alpha, &beta, input_ptr, out_grad,
weights_grad, cublasGemmAlgo_t(config_.gemm_algos[1]));

cublas_gemm_ex(_cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N, config_.inputSize,
bsz, config_.outputSize, &alpha, &beta, weights, out_grad,
inp_grad_out, cublasGemmAlgo_t(config_.gemm_algos[2]));
if (compute_bias) {
launch_fuse_transpose_bias_kernel<T>(out_grad, bias_grad, bsz,
config_.outputSize, stream);
}
}

void reset_size(int outputSize, int inputSize) {
config_.outputSize = outputSize;
config_.inputSize = inputSize;
}

private:
Config config_;
};
Loading