Skip to content

Extent root scalars can get lost during segmentation #629

@jacobhinkle

Description

@jacobhinkle

In fixing #418 I encountered this segmentation error. First, the offending test

TEST_F(NVFuserTest, DynamicTransformIssue418_CUDA) {
  auto fusion = std::make_unique<Fusion>();
  FusionGuard fg(fusion.get());

  auto tv0 = makeSymbolicTensor(4);
  fusion->addInput(tv0);
  auto s0 = IrBuilder::create<Val>(DataType::Int);
  fusion->addInput(s0);

  auto sh = tensor_sizes(tv0);
  auto tv1 = reshape(tv0, {sh[0], div(sh[1], s0), s0, sh[2], sh[3]});
  // Reducing along axis 2 in tv1 is equivalent to a partial reduction across
  // axis 1 of tv0.
  auto vm = variance_mean(tv1, {2, 3, 4}, 0, true);
  fusion->addOutput(vm.mean);
  fusion->addOutput(vm.var);

  FusionExecutorCache fusion_executor_cache(std::move(fusion));

  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
  at::Tensor at0 = at::randn({256, 128, 28, 28}, options);
  std::vector<c10::IValue> aten_inputs = {at0, 32};
  auto outputs = fusion_executor_cache.runFusionWithInputs(aten_inputs);
  /*
Concretized Fusion:
Inputs:
  T0_g[ iS0{i0}, iS1{i2}, iS2{i3}, iS3{i4} ], float
  i5, int64_t
Outputs:
  T7_g[ iS49{i0}, iS50{( i2 / i5 )}, bS37{1}, bS38{1}, bS39{1} ], float
  T6_g[ iS55{i0}, iS56{( i2 / i5 )}, bS32{1}, bS33{1}, bS34{1} ], float
%kernel_math {
T8_l[ iS40{i0}, iS45{4}rf, iS46{( ceilDiv(i2, 4) )}rf, iS42{i3}, iS43{i4} ]
  = view( T0_g[ iS0{i0}, iS1{i2}, iS2{i3}, iS3{i4} ] )
T2_l[ iS47{i0}, iS48{( i2 / i5 )}, rS15{i5}, rS16{i3}, rS17{i4} ](Avg),
T3_l[ iS51{i0}, iS52{( i2 / i5 )}, rS20{i5}, rS21{i3}, rS22{i4} ](Var),
T4_l[ iS57{i0}, iS58{( i2 / i5 )}, rS25{i5}, rS26{i3}, rS27{i4} ](Count)
 = Welford ( T8_l[ iS40{i0}, iS45{4}rf, iS46{( ceilDiv(i2, 4) )}rf, iS42{i3}, iS43{i4} ](Avg),
  allreduce = false )
T7_g[ iS49{i0}, iS50{( i2 / i5 )}, bS37{1}, bS38{1}, bS39{1} ]
  = broadcast( T2_l[ iS47{i0}, iS48{( i2 / i5 )}, rS15{i5}, rS16{i3}, rS17{i4} ] )
d17 = (double)(i5);
d19 = double(1) * d17;
d21 = (double)(i3);
d23 = d19 * d21;
d25 = (double)(i4);
d27 = d23 * d25;
d33 = reciprocal(d27);
T5_l[ iS53{i0}, iS54{( i2 / i5 )} ]
  = T3_l[ iS51{i0}, iS52{( i2 / i5 )}, rS20{i5}, rS21{i3}, rS22{i4} ]
  * d33;
T6_g[ iS55{i0}, iS56{( i2 / i5 )}, bS32{1}, bS33{1}, bS34{1} ]
  = broadcast( T5_l[ iS53{i0}, iS54{( i2 / i5 )} ] )
}
  */
  /*
  what():  inferred_val.hasValue() INTERNAL ASSERT FAILED at "/opt/pytorch/nvfuser/csrc/executor.cpp":477,
 please report a bug to PyTorch. Could not launch kernel as program could not infer
 ( i2 / i5 )(i6) for the buffer T7_g[ iS49{i0}, iS50{( i2 / i5 )}, bS37{1}, bS38{1}, bS39{1} ]
Exception raised from inferShape at /opt/pytorch/nvfuser/csrc/executor.cpp:477 (most recent call first):
  */
  auto at1 = at0.reshape({256, 4, 32, 28, 28});
  auto atmean = at1.mean({2, 3, 4});
  auto atvar = at1.var({2, 3, 4});
  testValidate(
      fusion_executor_cache.fusion(),
      outputs,
      aten_inputs,
      {atmean, atvar},
      __LINE__,
      __FILE__);
}

This error occurs because the Fusion is segmented at tv1 (printed as T8_l[ iS40{i0}, iS45{4}rf, iS46{( ceilDiv(i2, 4) )}rf, iS42{i3}, iS43{i4} ]), since there is a split axis with a reduction along only one branch of the split. In the second segment, that tensor is an input, along with the scalars used to compute T5: i3, i4, i5. However, i2 is not an input so it can't evaluate the extent, even though it is actually provided as an input.

We could either attempt to add missing input scalars to fusion segments so that each segment will have all inputs necessary to compute any extent expressions contained in that segment, or we could modify bindInputs to be able to bind values to derived extents like i2 / i5, so that that extent would be available directly in the ExpressionEvaluator.

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions