Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
ee9baed
[feat] add zerobubble pp (just a frame now); add POC test for dx_dw; …
duanjunwen Aug 22, 2024
c18ef06
[feat] add dw test;
duanjunwen Aug 23, 2024
203033e
[fix] fix weight not close;
duanjunwen Aug 23, 2024
107230d
[update] update text;
duanjunwen Aug 26, 2024
fd5526b
Merge branch 'main' into dev/zero_bubble
duanjunwen Aug 26, 2024
1d75045
[feat] add test run_fwd_bwd automatic scheduling;
duanjunwen Aug 26, 2024
5e09c8b
[feat] split communication and calculation; fix pop empty send_bwd_bu…
duanjunwen Aug 27, 2024
f1c1a87
[feat] add test for p & p grad;
duanjunwen Aug 27, 2024
1b4bb2b
[feat] add comments for ZBV func;
duanjunwen Aug 27, 2024
283c9ff
[fix] rm useless assign and comments;
duanjunwen Aug 27, 2024
9e0bd1a
[fix] fix ci test; add pytest;
duanjunwen Aug 27, 2024
8b37323
[feat] add run_fwd_bwd_with_microbatch (replace input) & test; add p…
duanjunwen Aug 27, 2024
fe20916
[feat] add apply v_schedule graph; p & p.grad assert err exist;
duanjunwen Aug 27, 2024
29383b2
[fix] update
duanjunwen Aug 28, 2024
d6e3d7d
[feat] fix ci; add assert;
duanjunwen Aug 28, 2024
b5f7b4d
[feat] fix poc format
duanjunwen Aug 28, 2024
582ba0d
[feat] fix func name & ci; add comments;
duanjunwen Aug 28, 2024
b1419ef
[fix] fix poc test; add comments in poc;
duanjunwen Aug 28, 2024
4c4b01b
[feat] add optim backward_b_by_grad
duanjunwen Aug 29, 2024
48ba22d
[feat] fix optimizer bwd b & w; support return accum loss & output
duanjunwen Aug 29, 2024
6af81d8
[feat] add fwd_bwd_step, run_fwd_only;
duanjunwen Aug 30, 2024
8eb6eac
[fix] fix optim bwd; add license for v_schedule; remove redundant att…
duanjunwen Aug 30, 2024
a7b767b
[fix] fix communication_map;
duanjunwen Aug 30, 2024
6d18d38
[feat] update test; rm comments;
duanjunwen Sep 2, 2024
77fe442
[fix] rm zbv in hybridplugin
duanjunwen Sep 2, 2024
591a13b
[fix] fix optim bwd;
duanjunwen Sep 2, 2024
a48afc4
[fix] fix optim bwd;
duanjunwen Sep 3, 2024
ab643c9
[fix] rm output.data after send fwd;
duanjunwen Sep 3, 2024
4c1f81c
[fix] fix bwd step if condition; remove useless comments and format i…
duanjunwen Sep 3, 2024
b4103f1
[fix] fix detach output & release output;
duanjunwen Sep 3, 2024
20503cd
[fix] rm requir_grad for output;
duanjunwen Sep 3, 2024
e6e1a97
[fix] fix requir grad position and detach position and input&output l…
duanjunwen Sep 4, 2024
2f09c37
[feat] add memory assertation;
duanjunwen Sep 4, 2024
4a35834
[fix] fix mem check;
duanjunwen Sep 4, 2024
400e5e5
[fix] mem assertation'
duanjunwen Sep 9, 2024
35a7b63
[fix] fix mem assertation
duanjunwen Sep 9, 2024
a5ec3d4
[fix] fix mem; use a new model shape; only assert mem less and equal …
duanjunwen Sep 9, 2024
fed8b15
[fix] fix model zoo import;
duanjunwen Sep 9, 2024
7568b34
[fix] fix redundant detach & clone; add buffer assertation in the end;
duanjunwen Sep 9, 2024
ce58d8e
[fix] add output_obj_grad assert None at bwd b step; replace input_ob…
duanjunwen Sep 9, 2024
8366a78
[fix] update optim state dict assert (include param group & state); f…
duanjunwen Sep 9, 2024
6c2a120
[fix] add testcase with microbatch 4;
duanjunwen Sep 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion colossalai/booster/plugin/hybrid_parallel_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -1103,7 +1103,7 @@ def __init__(
self.stage_manager = PipelineStageManager(
self.pg_mesh,
pipeline_axis=self.pp_axis,
enable_interleave=pp_style == "interleaved",
enable_interleave=(pp_style == "interleaved"),
num_model_chunks=num_model_chunks,
num_layers_per_stage=num_layers_per_stage,
)
Expand Down
21 changes: 19 additions & 2 deletions colossalai/interface/optimizer.py
Comment thread
ver217 marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,25 @@ def backward(self, loss: Tensor, *args, **kwargs):
"""
loss.backward(*args, **kwargs)

def backward_by_grad(self, tensor: Tensor, grad: Tensor):
torch.autograd.backward(tensor, grad)
def backward_by_grad(self, tensor: Tensor, grad: Tensor, inputs: Tensor = None, retain_graph: bool = False):
"""
Performs a backward pass for dx or dw,
for dx, we only calculate dx = w*dy here
for dw, we only calculate dw = x*dy here

Args:
tensor (Tensor): y or loss of current chunk;
grad_tensors (Tensor): dy of current chunk;
input_obj (Tensor): for dx, input_obj is x of current chunk;
for dw, input_obj is w of current chunk;
retain_graph (bool): default to be True, we retain graph in backward_b
"""
torch.autograd.backward(
tensors=tensor,
grad_tensors=grad,
inputs=inputs,
retain_graph=retain_graph,
)

def state_dict(self):
"""
Expand Down
3 changes: 2 additions & 1 deletion colossalai/pipeline/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from .p2p import PipelineP2PCommunication
from .schedule import InterleavedSchedule, OneForwardOneBackwardSchedule, PipelineSchedule
from .schedule import InterleavedSchedule, OneForwardOneBackwardSchedule, PipelineSchedule, ZeroBubbleVPipeScheduler
from .stage_manager import PipelineStageManager

__all__ = [
"PipelineSchedule",
"OneForwardOneBackwardSchedule",
"InterleavedSchedule",
"ZeroBubbleVPipeScheduler",
"PipelineP2PCommunication",
"PipelineStageManager",
]
2 changes: 2 additions & 0 deletions colossalai/pipeline/schedule/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from .base import PipelineSchedule
from .interleaved_pp import InterleavedSchedule
from .one_f_one_b import OneForwardOneBackwardSchedule
from .zero_bubble_pp import ZeroBubbleVPipeScheduler

__all__ = [
"PipelineSchedule",
"OneForwardOneBackwardSchedule",
"InterleavedSchedule",
"ZeroBubbleVPipeScheduler",
]
Loading