Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
04e5272
Merge pull request #1 from hpcaitech/main
Cypher30 Jul 14, 2022
75618b3
Merge pull request #2 from hpcaitech/main
Cypher30 Jul 15, 2022
3e4620c
Merge pull request #3 from hpcaitech/main
Cypher30 Jul 20, 2022
cf24049
Merge remote-tracking branch 'upstream/main' into main
Jul 20, 2022
3d223b6
Merge remote-tracking branch 'upstream/main' into main
Jul 21, 2022
644115c
Merge branch 'hpcaitech:main' into main
Cypher30 Jul 22, 2022
d995ade
Merge branch 'hpcaitech:main' into main
Cypher30 Jul 25, 2022
bba2dbe
Merge branch 'hpcaitech:main' into main
Cypher30 Jul 26, 2022
05ca628
Merge branch 'hpcaitech:main' into main
Cypher30 Jul 26, 2022
0a967da
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 6, 2022
0637c0d
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 8, 2022
74a6227
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 10, 2022
e550490
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 10, 2022
2d7f5d9
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 11, 2022
b62e870
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 12, 2022
b4b0974
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 15, 2022
65c20de
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 16, 2022
1660bfc
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 17, 2022
6eb0ad0
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 20, 2022
56df059
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 26, 2022
480e932
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 30, 2022
0fa66ee
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 30, 2022
1d013b0
Merge branch 'hpcaitech:main' into main
Cypher30 Aug 31, 2022
5774db2
Merge branch 'hpcaitech:main' into main
Cypher30 Sep 5, 2022
e8ff699
Merge branch 'hpcaitech:main' into main
Cypher30 Sep 6, 2022
855c728
Merge branch 'hpcaitech:main' into main
Cypher30 Sep 7, 2022
2c113ea
Merge branch 'main' of github.com:Cypher30/ColossalAI into main
Sep 8, 2022
838ba70
Merge branch 'hpcaitech:main' into main
Cypher30 Sep 13, 2022
cacec2b
Merge branch 'main' of github.com:Cypher30/ColossalAI into main
Sep 13, 2022
5ed6ef0
Merge branch 'hpcaitech:main' into main
Cypher30 Sep 14, 2022
668af30
Merge branch 'hpcaitech:main' into main
Cypher30 Sep 14, 2022
df79772
Merge branch 'hpcaitech:main' into main
Cypher30 Sep 15, 2022
7b6a0fc
Merge branch 'hpcaitech:main' into main
Cypher30 Sep 20, 2022
c30022e
Merge branch 'hpcaitech:main' into main
Cypher30 Sep 23, 2022
df20f4d
Merge branch 'hpcaitech:main' into main
Cypher30 Sep 26, 2022
2d5a6a0
Merge branch 'hpcaitech:main' into main
Cypher30 Sep 29, 2022
07d27a6
Merge branch 'hpcaitech:main' into main
Cypher30 Oct 3, 2022
dc68ba9
Merge branch 'hpcaitech:main' into main
Cypher30 Oct 5, 2022
929e7d3
Merge branch 'hpcaitech:main' into main
Cypher30 Oct 6, 2022
90aa46a
Merge branch 'hpcaitech:main' into main
Cypher30 Oct 11, 2022
40363da
Merge branch 'hpcaitech:main' into main
Cypher30 Oct 13, 2022
fe3fca5
Merge branch 'hpcaitech:main' into main
Cypher30 Oct 25, 2022
956156e
Merge branch 'hpcaitech:main' into main
Cypher30 Oct 26, 2022
cb20212
Merge branch 'hpcaitech:main' into main
Cypher30 Oct 26, 2022
744a775
Merge branch 'hpcaitech:main' into main
Cypher30 Nov 10, 2022
1629a90
Merge branch 'hpcaitech:main' into main
Cypher30 Nov 10, 2022
f0558e3
Merge branch 'hpcaitech:main' into main
Cypher30 Dec 1, 2022
bb7bd4a
Merge branch 'hpcaitech:main' into main
Cypher30 Dec 21, 2022
26de8e5
Merge branch 'hpcaitech:main' into main
Cypher30 Dec 23, 2022
83a1418
Merge branch 'hpcaitech:main' into main
Cypher30 Dec 30, 2022
0802b94
Merge branch 'hpcaitech:main' into main
Cypher30 Dec 30, 2022
16ea2c7
Merge branch 'hpcaitech:main' into main
Cypher30 Dec 31, 2022
44df1f9
Merge branch 'hpcaitech:main' into main
Cypher30 Jan 2, 2023
fcd117c
Merge branch 'hpcaitech:main' into main
Cypher30 Jan 9, 2023
f20106e
Merge branch 'hpcaitech:main' into main
Cypher30 Feb 2, 2023
e7cf700
Merge branch 'hpcaitech:main' into main
Cypher30 Feb 8, 2023
4f6bd1c
Merge branch 'hpcaitech:main' into main
Cypher30 Feb 15, 2023
afe135d
Merge branch 'hpcaitech:main' into main
Cypher30 Feb 16, 2023
4ac66bd
Merge branch 'hpcaitech:main' into main
Cypher30 Feb 17, 2023
956dbb4
Merge branch 'hpcaitech:main' into main
Cypher30 Feb 18, 2023
8f1b9dc
[autoparallel] patch meta information of torch.where
Feb 19, 2023
73fd657
Merge branch 'main' into feature/where_meta_info
Cypher30 Feb 20, 2023
834625a
[autoparallel] pre-commit modified
Feb 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
from .norm import *
from .pooling import *
from .tensor import *
from .where import *
60 changes: 60 additions & 0 deletions colossalai/auto_parallel/meta_profiler/meta_registry/where.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from typing import List, Tuple

import torch

from colossalai.auto_parallel.tensor_shard.sharding_strategy import MemoryCost, OperationDataType, TrainCycleItem
from colossalai.fx.profiler.memory_utils import activation_size
from colossalai.fx.profiler.opcount import flop_mapping

from ..registry import meta_register

__all__ = ["where_meta_info"]


@meta_register.register(torch.where)
def where_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycleItem, List[torch.Tensor]]:
"""torch.where meta information generator

Returns:
Tuple[TrainCycleItem, TrainCycleItem, List[torch.Tensor]]: compute cost, memory cost and forward inputs
"""

condition_tensor, x_tensor, y_tensor, output_tensor = [arg.data for arg in args]

# compute cost
fwd_compute_cost = 0

# if we need to broadcast the condition tensor, during backward we need to do a reduce_sum
bwd_compute_cost = 0
if x_tensor.shape != output_tensor.shape:
bwd_compute_cost += flop_mapping[torch.ops.aten.sum.dim_IntList]([output_tensor], [x_tensor])
if y_tensor.shape != output_tensor.shape:
bwd_compute_cost += flop_mapping[torch.ops.aten.sum.dim_IntList]([output_tensor], [y_tensor])

compute_cost = TrainCycleItem(fwd=fwd_compute_cost, bwd=bwd_compute_cost, total=fwd_compute_cost + bwd_compute_cost)

# memory cost
# during the forward phase, torch.where will allocate memory for output tensor and condition tensor
# during the backward phase, torch.where will allocate temp memory which is 3 times as output tensor, then generate
# gradient matrix for input x and input y, remove the temp memory and condition tensor generated in forward phase
# NOTE: currently in SPMD solver we always believe that there will be a new input tensor created in forward
fwd_mem_cost = MemoryCost(activation=activation_size([condition_tensor, x_tensor, y_tensor, output_tensor]))
bwd_mem_cost = MemoryCost(activation=activation_size([x_tensor, y_tensor]) - activation_size([condition_tensor]),
parameter=0,
temp=activation_size([output_tensor]) * 3 + activation_size([condition_tensor]) -
activation_size([x_tensor, y_tensor]),
buffer=0)

total_mem_cost = MemoryCost(activation=fwd_mem_cost.activation + bwd_mem_cost.activation,
parameter=fwd_mem_cost.parameter + bwd_mem_cost.parameter,
temp=fwd_mem_cost.temp + bwd_mem_cost.temp,
buffer=fwd_mem_cost.buffer + bwd_mem_cost.buffer)

memory_cost = TrainCycleItem(fwd=fwd_mem_cost, bwd=bwd_mem_cost, total=total_mem_cost)

# store fwd_in, fwd_buffer, fwd_out
fwd_in = [condition_tensor]
fwd_buffer = []
fwd_out = [output_tensor]

return compute_cost, memory_cost, fwd_in, fwd_buffer, fwd_out
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import pytest
import torch
import torch.multiprocessing as mp
import torch.nn as nn

from colossalai.auto_parallel.tensor_shard.node_handler import LinearModuleHandler
from colossalai.auto_parallel.tensor_shard.sharding_strategy import (
MemoryCost,
OperationData,
OperationDataType,
ShardingStrategy,
StrategiesVector,
TrainCycleItem,
)
from colossalai.device.device_mesh import DeviceMesh
from colossalai.fx import ColoGraphModule, ColoTracer
from colossalai.initialize import launch
from colossalai.logging import disable_existing_loggers
from colossalai.testing.pytest_wrapper import run_on_environment_flag
from colossalai.testing.utils import parameterize, rerun_if_address_is_in_use
from colossalai.utils import free_port
from tests.test_auto_parallel.test_tensor_shard.test_metainfo.utils import print_results

if torch.__version__ >= '1.12.0':
from colossalai.auto_parallel.meta_profiler import MetaInfo, meta_register


@pytest.mark.skipif(torch.__version__ < '1.12.0', reason="need pytorch 1.12.0 or higher for aten level operations")
def test_where_meta_info():
meta_func = meta_register.get(torch.where)

# construct meta tensors
condition_tensor = torch.rand(1, 1, 1024, 1024) > 0.5
condition_tensor = condition_tensor.to(device="meta")
x_tensor = torch.rand(8, 16, 1024, 1024, device="meta")
y_tensor = torch.tensor(0, device="meta")
output_tensor = torch.rand(8, 16, 1024, 1024)

# construct operation data
condition_data = OperationData(
name="condition",
data=condition_tensor,
type=OperationDataType.ARG,
logical_shape=condition_tensor.shape,
)
x_data = OperationData(
name="x",
data=x_tensor,
type=OperationDataType.ARG,
logical_shape=x_tensor.shape,
)
y_data = OperationData(
name="y",
data=y_tensor,
type=OperationDataType.ARG,
logical_shape=y_tensor.shape,
)
output_data = OperationData(
name="output",
data=output_tensor,
type=OperationDataType.OUTPUT,
logical_shape=output_tensor.shape,
)

# construct args and kwargs
args = [condition_data, x_data, y_data, output_data]
kwargs = {'inplace': False}

# estimated results
compute_cost, memory_cost, fwd_in, fwd_buffer, fwd_out = meta_func(*args, **kwargs)

# actual results
condition_real_tensor = torch.rand(1, 1, 1024, 1024) > 0.5
condition_real_tensor = condition_real_tensor.to(device="cuda")
x_real_tensor = torch.rand(8, 16, 1024, 1024, device="cuda")
y_real_tensor = torch.tensor(0.0, device="cuda")

x_real_tensor.requires_grad = True
y_real_tensor.requires_grad = True

# fwd
torch.cuda.reset_peak_memory_stats()
mem_stamp0 = torch.cuda.memory_allocated()
output_real_tensor = torch.where(condition_real_tensor, x_real_tensor, y_real_tensor)
fwd_allocated = torch.cuda.memory_allocated() - mem_stamp0
fwd_peak = torch.cuda.max_memory_allocated() - mem_stamp0

# bwd
upstream_grad = torch.rand_like(output_real_tensor)
torch.cuda.reset_peak_memory_stats()
mem_stamp0 = torch.cuda.memory_allocated()
torch.autograd.backward(output_real_tensor, upstream_grad)
bwd_allocated = torch.cuda.memory_allocated() - mem_stamp0
bwd_peak = torch.cuda.max_memory_allocated() - mem_stamp0

compute_cost: TrainCycleItem
memory_cost: TrainCycleItem

print_results([condition_real_tensor, x_real_tensor, y_real_tensor], [output_real_tensor], compute_cost,
memory_cost, fwd_allocated, fwd_peak, bwd_allocated, bwd_peak)


if __name__ == '__main__':
test_where_meta_info()