-
Notifications
You must be signed in to change notification settings - Fork 79
Description
In fixing #418 I encountered this segmentation error. First, the offending test
TEST_F(NVFuserTest, DynamicTransformIssue418_CUDA) {
auto fusion = std::make_unique<Fusion>();
FusionGuard fg(fusion.get());
auto tv0 = makeSymbolicTensor(4);
fusion->addInput(tv0);
auto s0 = IrBuilder::create<Val>(DataType::Int);
fusion->addInput(s0);
auto sh = tensor_sizes(tv0);
auto tv1 = reshape(tv0, {sh[0], div(sh[1], s0), s0, sh[2], sh[3]});
// Reducing along axis 2 in tv1 is equivalent to a partial reduction across
// axis 1 of tv0.
auto vm = variance_mean(tv1, {2, 3, 4}, 0, true);
fusion->addOutput(vm.mean);
fusion->addOutput(vm.var);
FusionExecutorCache fusion_executor_cache(std::move(fusion));
auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
at::Tensor at0 = at::randn({256, 128, 28, 28}, options);
std::vector<c10::IValue> aten_inputs = {at0, 32};
auto outputs = fusion_executor_cache.runFusionWithInputs(aten_inputs);
/*
Concretized Fusion:
Inputs:
T0_g[ iS0{i0}, iS1{i2}, iS2{i3}, iS3{i4} ], float
i5, int64_t
Outputs:
T7_g[ iS49{i0}, iS50{( i2 / i5 )}, bS37{1}, bS38{1}, bS39{1} ], float
T6_g[ iS55{i0}, iS56{( i2 / i5 )}, bS32{1}, bS33{1}, bS34{1} ], float
%kernel_math {
T8_l[ iS40{i0}, iS45{4}rf, iS46{( ceilDiv(i2, 4) )}rf, iS42{i3}, iS43{i4} ]
= view( T0_g[ iS0{i0}, iS1{i2}, iS2{i3}, iS3{i4} ] )
T2_l[ iS47{i0}, iS48{( i2 / i5 )}, rS15{i5}, rS16{i3}, rS17{i4} ](Avg),
T3_l[ iS51{i0}, iS52{( i2 / i5 )}, rS20{i5}, rS21{i3}, rS22{i4} ](Var),
T4_l[ iS57{i0}, iS58{( i2 / i5 )}, rS25{i5}, rS26{i3}, rS27{i4} ](Count)
= Welford ( T8_l[ iS40{i0}, iS45{4}rf, iS46{( ceilDiv(i2, 4) )}rf, iS42{i3}, iS43{i4} ](Avg),
allreduce = false )
T7_g[ iS49{i0}, iS50{( i2 / i5 )}, bS37{1}, bS38{1}, bS39{1} ]
= broadcast( T2_l[ iS47{i0}, iS48{( i2 / i5 )}, rS15{i5}, rS16{i3}, rS17{i4} ] )
d17 = (double)(i5);
d19 = double(1) * d17;
d21 = (double)(i3);
d23 = d19 * d21;
d25 = (double)(i4);
d27 = d23 * d25;
d33 = reciprocal(d27);
T5_l[ iS53{i0}, iS54{( i2 / i5 )} ]
= T3_l[ iS51{i0}, iS52{( i2 / i5 )}, rS20{i5}, rS21{i3}, rS22{i4} ]
* d33;
T6_g[ iS55{i0}, iS56{( i2 / i5 )}, bS32{1}, bS33{1}, bS34{1} ]
= broadcast( T5_l[ iS53{i0}, iS54{( i2 / i5 )} ] )
}
*/
/*
what(): inferred_val.hasValue() INTERNAL ASSERT FAILED at "/opt/pytorch/nvfuser/csrc/executor.cpp":477,
please report a bug to PyTorch. Could not launch kernel as program could not infer
( i2 / i5 )(i6) for the buffer T7_g[ iS49{i0}, iS50{( i2 / i5 )}, bS37{1}, bS38{1}, bS39{1} ]
Exception raised from inferShape at /opt/pytorch/nvfuser/csrc/executor.cpp:477 (most recent call first):
*/
auto at1 = at0.reshape({256, 4, 32, 28, 28});
auto atmean = at1.mean({2, 3, 4});
auto atvar = at1.var({2, 3, 4});
testValidate(
fusion_executor_cache.fusion(),
outputs,
aten_inputs,
{atmean, atvar},
__LINE__,
__FILE__);
}This error occurs because the Fusion is segmented at tv1 (printed as T8_l[ iS40{i0}, iS45{4}rf, iS46{( ceilDiv(i2, 4) )}rf, iS42{i3}, iS43{i4} ]), since there is a split axis with a reduction along only one branch of the split. In the second segment, that tensor is an input, along with the scalars used to compute T5: i3, i4, i5. However, i2 is not an input so it can't evaluate the extent, even though it is actually provided as an input.
We could either attempt to add missing input scalars to fusion segments so that each segment will have all inputs necessary to compute any extent expressions contained in that segment, or we could modify bindInputs to be able to bind values to derived extents like i2 / i5, so that that extent would be available directly in the ExpressionEvaluator.