-
Notifications
You must be signed in to change notification settings - Fork 79
Closed
Description
The following fails in TransposeScheduler::canScheduleRuntime using the #397 branch:
import torch
from nvfuser import FusionDefinition, DataType
def nvfuser_fusion_id0(fd : FusionDefinition) -> None :
T0 = fd.define_tensor(symbolic_sizes=[-1, -1, -1], contiguity=[True, True, True], dtype=DataType.Float, is_cpu=False)
T1 = fd.define_tensor(symbolic_sizes=[-1, -1, -1, -1], contiguity=[True, True, True, True], dtype=DataType.Float, is_cpu=False)
T2 = fd.ops.permute(T1, dims=[0, 2, 1, 3])
T3 = fd.ops.slice(T0, start_indices=[0, 0, 0], end_indices=[1024, 64, 2], strides=[1, 1, 1])
T4 = fd.ops.reshape(T3, original_shape=[1024, 64, 2], new_shape=[1, 1024, 1, 64, 2])
T5 = fd.ops.slice(T4, start_indices=[0, 0, 0, 0, 0], end_indices=[1, 1024, 1, 64, 1], strides=[1, 1, 1, 1, 1])
T6 = fd.ops.reshape(T2, original_shape=[8, 1024, 32, 128], new_shape=[8, 1024, 32, 64, 2])
T7 = fd.ops.slice(T6, start_indices=[0, 0, 0, 0, 0], end_indices=[8, 1024, 32, 64, 1], strides=[1, 1, 1, 1, 1])
T8 = fd.ops.squeeze(T7, original_shape=[8, 1024, 32, 64, 1], dims=[4])
T9 = fd.ops.squeeze(T5, original_shape=[1, 1024, 1, 64, 1], dims=[4])
T10 = fd.ops.broadcast_in_dim(T9, output_shape=[8, 1024, 32, 64], broadcast_dims=[0, 1, 2, 3])
T11 = fd.ops.mul(T8, T10)
T12 = fd.ops.slice(T6, start_indices=[0, 0, 0, 0, 1], end_indices=[8, 1024, 32, 64, 2], strides=[1, 1, 1, 1, 1])
T13 = fd.ops.squeeze(T12, original_shape=[8, 1024, 32, 64, 1], dims=[4])
T14 = fd.ops.slice(T4, start_indices=[0, 0, 0, 0, 1], end_indices=[1, 1024, 1, 64, 2], strides=[1, 1, 1, 1, 1])
T15 = fd.ops.squeeze(T14, original_shape=[1, 1024, 1, 64, 1], dims=[4])
T16 = fd.ops.broadcast_in_dim(T15, output_shape=[8, 1024, 32, 64], broadcast_dims=[0, 1, 2, 3])
T17 = fd.ops.mul(T13, T16)
T18 = fd.ops.sub(T11, T17)
T19 = fd.ops.slice(T6, start_indices=[0, 0, 0, 0, 1], end_indices=[8, 1024, 32, 64, 2], strides=[1, 1, 1, 1, 1])
T20 = fd.ops.squeeze(T19, original_shape=[8, 1024, 32, 64, 1], dims=[4])
T21 = fd.ops.slice(T4, start_indices=[0, 0, 0, 0, 0], end_indices=[1, 1024, 1, 64, 1], strides=[1, 1, 1, 1, 1])
T22 = fd.ops.squeeze(T21, original_shape=[1, 1024, 1, 64, 1], dims=[4])
T23 = fd.ops.broadcast_in_dim(T22, output_shape=[8, 1024, 32, 64], broadcast_dims=[0, 1, 2, 3])
T24 = fd.ops.mul(T20, T23)
T25 = fd.ops.slice(T6, start_indices=[0, 0, 0, 0, 0], end_indices=[8, 1024, 32, 64, 1], strides=[1, 1, 1, 1, 1])
T26 = fd.ops.squeeze(T25, original_shape=[8, 1024, 32, 64, 1], dims=[4])
T27 = fd.ops.slice(T4, start_indices=[0, 0, 0, 0, 1], end_indices=[1, 1024, 1, 64, 2], strides=[1, 1, 1, 1, 1])
T28 = fd.ops.squeeze(T27, original_shape=[1, 1024, 1, 64, 1], dims=[4])
T29 = fd.ops.broadcast_in_dim(T28, output_shape=[8, 1024, 32, 64], broadcast_dims=[0, 1, 2, 3])
T30 = fd.ops.mul(T26, T29)
T31 = fd.ops.add(T24, T30)
fd.add_output(T18)
fd.add_output(T31)
with FusionDefinition() as fd:
nvfuser_fusion_id0(fd)
inputs = [
torch.randn((4096, 64, 2), dtype=torch.float32, device='cuda:0').as_strided((4096, 64, 2), (128, 2, 1)),
torch.randn((8, 32, 1024, 128), dtype=torch.float32, device='cuda:0').as_strided((8, 32, 1024, 128), (4194304, 131072, 128, 1)),
]
fd.execute(inputs)The error we encounter is
RuntimeError: downcast_ptr != nullptr INTERNAL ASSERT FAILED at "/opt/pytorch/nvfuser/csrc/utils.h":196, please report a bug to PyTorch.
This occurs inDomainMap::getInnerLeafDimwhich assumes that any transforms between root and leaf domains are eitherSplitorMerge. We should handle theResizeoperation here, and we should place a better error message here for unhandled expressions since we may introduce more in the future.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels