Skip to content

Transpose scheduler fails with Resize ops #398

@jacobhinkle

Description

@jacobhinkle

The following fails in TransposeScheduler::canScheduleRuntime using the #397 branch:

import torch
from nvfuser import FusionDefinition, DataType

def nvfuser_fusion_id0(fd : FusionDefinition) -> None :
    T0 = fd.define_tensor(symbolic_sizes=[-1, -1, -1], contiguity=[True, True, True], dtype=DataType.Float, is_cpu=False)
    T1 = fd.define_tensor(symbolic_sizes=[-1, -1, -1, -1], contiguity=[True, True, True, True], dtype=DataType.Float, is_cpu=False)
    T2 = fd.ops.permute(T1, dims=[0, 2, 1, 3])
    T3 = fd.ops.slice(T0, start_indices=[0, 0, 0], end_indices=[1024, 64, 2], strides=[1, 1, 1])
    T4 = fd.ops.reshape(T3, original_shape=[1024, 64, 2], new_shape=[1, 1024, 1, 64, 2])
    T5 = fd.ops.slice(T4, start_indices=[0, 0, 0, 0, 0], end_indices=[1, 1024, 1, 64, 1], strides=[1, 1, 1, 1, 1])
    T6 = fd.ops.reshape(T2, original_shape=[8, 1024, 32, 128], new_shape=[8, 1024, 32, 64, 2])
    T7 = fd.ops.slice(T6, start_indices=[0, 0, 0, 0, 0], end_indices=[8, 1024, 32, 64, 1], strides=[1, 1, 1, 1, 1])
    T8 = fd.ops.squeeze(T7, original_shape=[8, 1024, 32, 64, 1], dims=[4])
    T9 = fd.ops.squeeze(T5, original_shape=[1, 1024, 1, 64, 1], dims=[4])
    T10 = fd.ops.broadcast_in_dim(T9, output_shape=[8, 1024, 32, 64], broadcast_dims=[0, 1, 2, 3])
    T11 = fd.ops.mul(T8, T10)
    T12 = fd.ops.slice(T6, start_indices=[0, 0, 0, 0, 1], end_indices=[8, 1024, 32, 64, 2], strides=[1, 1, 1, 1, 1])
    T13 = fd.ops.squeeze(T12, original_shape=[8, 1024, 32, 64, 1], dims=[4])
    T14 = fd.ops.slice(T4, start_indices=[0, 0, 0, 0, 1], end_indices=[1, 1024, 1, 64, 2], strides=[1, 1, 1, 1, 1])
    T15 = fd.ops.squeeze(T14, original_shape=[1, 1024, 1, 64, 1], dims=[4])
    T16 = fd.ops.broadcast_in_dim(T15, output_shape=[8, 1024, 32, 64], broadcast_dims=[0, 1, 2, 3])
    T17 = fd.ops.mul(T13, T16)
    T18 = fd.ops.sub(T11, T17)
    T19 = fd.ops.slice(T6, start_indices=[0, 0, 0, 0, 1], end_indices=[8, 1024, 32, 64, 2], strides=[1, 1, 1, 1, 1])
    T20 = fd.ops.squeeze(T19, original_shape=[8, 1024, 32, 64, 1], dims=[4])
    T21 = fd.ops.slice(T4, start_indices=[0, 0, 0, 0, 0], end_indices=[1, 1024, 1, 64, 1], strides=[1, 1, 1, 1, 1])
    T22 = fd.ops.squeeze(T21, original_shape=[1, 1024, 1, 64, 1], dims=[4])
    T23 = fd.ops.broadcast_in_dim(T22, output_shape=[8, 1024, 32, 64], broadcast_dims=[0, 1, 2, 3])
    T24 = fd.ops.mul(T20, T23)
    T25 = fd.ops.slice(T6, start_indices=[0, 0, 0, 0, 0], end_indices=[8, 1024, 32, 64, 1], strides=[1, 1, 1, 1, 1])
    T26 = fd.ops.squeeze(T25, original_shape=[8, 1024, 32, 64, 1], dims=[4])
    T27 = fd.ops.slice(T4, start_indices=[0, 0, 0, 0, 1], end_indices=[1, 1024, 1, 64, 2], strides=[1, 1, 1, 1, 1])
    T28 = fd.ops.squeeze(T27, original_shape=[1, 1024, 1, 64, 1], dims=[4])
    T29 = fd.ops.broadcast_in_dim(T28, output_shape=[8, 1024, 32, 64], broadcast_dims=[0, 1, 2, 3])
    T30 = fd.ops.mul(T26, T29)
    T31 = fd.ops.add(T24, T30)
    fd.add_output(T18)
    fd.add_output(T31)

with FusionDefinition() as fd:
    nvfuser_fusion_id0(fd)

inputs = [
    torch.randn((4096, 64, 2), dtype=torch.float32, device='cuda:0').as_strided((4096, 64, 2), (128, 2, 1)),
    torch.randn((8, 32, 1024, 128), dtype=torch.float32, device='cuda:0').as_strided((8, 32, 1024, 128), (4194304, 131072, 128, 1)),
]
fd.execute(inputs)

The error we encounter is

RuntimeError: downcast_ptr != nullptr INTERNAL ASSERT FAILED at "/opt/pytorch/nvfuser/csrc/utils.h":196, please report a bug to PyTorch.
This occurs in DomainMap::getInnerLeafDim which assumes that any transforms between root and leaf domains are either Split or Merge. We should handle the Resize operation here, and we should place a better error message here for unhandled expressions since we may introduce more in the future.

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions