Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
ea50620
Add test
jacobhinkle Sep 13, 2023
ed8f5b1
Use expanded extent and mutated out_id
jacobhinkle Sep 13, 2023
2ae5bf9
Bind tensorviews in PrecomputedValues::bindInputs
jacobhinkle Sep 13, 2023
d615688
Merge branch 'main' into fix_870
jacobhinkle Sep 15, 2023
9f379eb
Merge branch 'main' into fix_870
zasdfgbnm Sep 18, 2023
26a1f44
Bind extents and TensorMetaData instead of tensor itself
jacobhinkle Sep 19, 2023
a21ae8c
Revert "Bind extents and TensorMetaData instead of tensor itself"
jacobhinkle Sep 19, 2023
ef85ea1
Add comment about binding tensors to ExpressionEvaluator
jacobhinkle Sep 19, 2023
3f7e19f
Only bind tv metadata in PrecomputedValues
jacobhinkle Sep 21, 2023
41d6b2d
Add C++ test
jacobhinkle Sep 21, 2023
84ed3b4
Add StructHandle::operator==
jacobhinkle Sep 21, 2023
887e093
Merge remote-tracking branch 'origin/main' into fix_870
jacobhinkle Sep 21, 2023
c9f174a
Silence lint error about TORCH_CUDA_CU_API
jacobhinkle Sep 21, 2023
70c06ba
Use ExpressionEvaluator to build metadata for PV
jacobhinkle Sep 21, 2023
9624ca3
Merge branch 'main' into fix_870
jacobhinkle Sep 21, 2023
b66d638
Merge remote-tracking branch 'origin/main' into fix_870
jacobhinkle Sep 22, 2023
139580f
Bind this and add note about dangers
jacobhinkle Sep 22, 2023
12feadd
Merge remote-tracking branch 'origin/main' into fix_870
jacobhinkle Sep 26, 2023
205c8b2
Merge remote-tracking branch 'origin/main' into fix_870
jacobhinkle Sep 27, 2023
30807f2
Merge branch 'main' into fix_870
jacobhinkle Oct 4, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion csrc/dynamic_transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ void DynamicTransformConcretizationInfo::analyzeResizes(
"Found non-dynamic Resize in initial concretization info: ",
op->toString());

auto extent_val = expr_eval->evaluate(out_id->extent());
auto extent_val = expr_eval->evaluate(out_id->getMaybeExpandedExtent());
NVF_ERROR(
extent_val.hasValue(),
"Cannot evaluate the extent of a resized domain: ",
Expand Down
27 changes: 27 additions & 0 deletions csrc/evaluator_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <expr_evaluator.h>
#include <instrumentation.h>
#include <ir/utils.h>
#include <tensor_metadata.h>

#include <optional>

Expand Down Expand Up @@ -342,6 +343,32 @@ void PrecomputedValues::bindTensorMetaData(
bindValue(extent->evaluatorIndex(), value);
}
}

// Here we bind TensorMetaData so that GetMetaData expressions can be
// evaluated. Note that we do not bind the at::Tensor itself here since that
// would mean PrecomputedValues will own the tensor. Unlike
// ExpressionEvaluator, PrecomputedValues objects are typically long-lived, so
// we do not want them to own large objects.
// To do this we create a temporary ExpressionEvaluator so that we can compute
// the metadata once, then save it
ExpressionEvaluator ee;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's some sharp edges here. For example, if between rfactor domain and root domain, there's a split whose factor is a fusion input, then this will fail, because we do not have it binded. But for now, I think it's fine because we do not have such case. Let's leave this as is. In the future we might need to refactor if this becomes an issue.

Copy link
Collaborator Author

@jacobhinkle jacobhinkle Sep 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great point. We should at minimum bind this here so that any precomputed values that are already bound will be available to ee. I'll also add a comment about this pitfall.

ee.bindPrecomputedValues(this);
ee.bind(tv, tensor);
auto metadata_val = IrBuilder::metadataExpr(tv);
auto metadata = ee.evaluate(metadata_val);
// NOTE: In some cases we may not be able to evaluate metadata. For example,
// if there exists a split expression between the root and rfactor domains
// of tv whose split factor is not able to be evaluated. For that reason,
// calling code should ensure that all inputs required to propagate strides
// from root to allocation domains are already bound to "this" before binding
// a TensorView's metadata.
NVF_ERROR(
metadata.hasValue(),
"Could not evaluate metadata expression for ",
tv->toString(),
" with input tensor ",
tensor);
bindValue(metadata_val->evaluatorIndex(), metadata);
}

NaiveValueMachine::NaiveValueMachine(PrecomputedValues& precomputed_values)
Expand Down
3 changes: 3 additions & 0 deletions csrc/executor_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,9 @@ ExpressionEvaluator bindInputs(
ExpressionEvaluator expr_eval;
const auto& inputs = kernel->inputs();
for (const auto i : c10::irange(inputs.size())) {
// NOTE: we bind all inputs here, including at::Tensors. This means that
// expr_eval will create a PolymorphicValue containing *args[i], which means
// that at::Tensor's lifetime will be at least as long as that of expr_eval.
expr_eval.bind(inputs[i], *args[i], true);
}

Expand Down
6 changes: 6 additions & 0 deletions csrc/polymorphic_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,12 @@ class StructHandle {
StructHandle& operator=(const StructHandle& other) = default;
StructHandle& operator=(StructHandle&& other) = default;

//! This is a shallow comparison operator that just checks whether we point to
//! the same exact Struct
bool operator==(const StructHandle& other) const {
return struct_ptr_ == other.struct_ptr_;
}

Comment on lines +184 to +189
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After binding a TensorMetaData handle in PolymorphicValues, without this I was hitting the following error:

what(): ret.has_value() INTERNAL ASSERT FAILED at "/opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h":741, please report a bug with repro script to NVFuser at https://github.com/NVIDIA/Fuser/issues. Cannot compute N7nvfuser12StructHandleE == N7nvfuser12StructHandleE : incompatible type
Exception raised from operator== at /opt/pytorch/nvfuser/lib/dynamic_type/src/dynamic_type/dynamic_type.h:741 (most recent call first):

Adding this shallow pointer comparison fixes it, but @zasdfgbnm please let me know if this is safe to add.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively, I could add another special case in PolymorphicValue_functions::isSame?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this makes sense

template <typename T>
bool is() const {
return std::dynamic_pointer_cast<T>(struct_ptr_) != nullptr;
Expand Down
21 changes: 21 additions & 0 deletions python_tests/test_python_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2481,6 +2481,27 @@ def fusion_func(fd: FusionDefinition) -> None:
self.assertEqual(nvf_out[0].shape, (0, 0))
self.assertEqual(nvf_out[1].shape, (0, 0))

# Test that a pad of an expanded empty tensor works properly
# See https://github.com/NVIDIA/Fuser/issues/596#issuecomment-1714465618
def test_pad_expanded_empty(self):
inputs = [
torch.randn((0,), dtype=torch.float64, device="cuda:0").as_strided(
(2, 0, 3), (0, 0, 0)
),
]

def fusion_func(fd: FusionDefinition) -> None:
T0 = fd.from_pytorch(inputs[0])
S1 = fd.define_scalar(-3.70753, dtype=DataType.Double)
T2 = fd.ops.pad(T0, [0, 0, 1, 1, 1, 0], S1)
fd.add_output(T2)

nvf_out, _ = self.exec_nvfuser(fusion_func, inputs)

torch_ref = F.pad(inputs[0], (0, 0, 1, 1, 1, 0), "constant", -3.70753)

self.assertEqual(nvf_out[0], torch_ref)


if __name__ == "__main__":
run_tests()
47 changes: 47 additions & 0 deletions test/test_resize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3107,4 +3107,51 @@ TEST_F(ResizeTest, CatOfExpandedBroadcast) {
NVF_CHECK(ref.equal(cg_outputs[0]));
}

// Test that an empty input which is expanded in some non-zero directions can be
// padded in the empty dim as well as the expanded dims.
// This should match test_python_frontend.py::test_pad_expanded_empty
// See https://github.com/NVIDIA/Fuser/issues/870
TEST_F(ResizeTest, PadExpandedEmpty) {
auto fusion_ptr = std::make_unique<Fusion>();
auto& fusion = *fusion_ptr;
FusionGuard fg(&fusion);

auto i0 = IrBuilder::create<Val>(DataType::Index);
auto i1 = IrBuilder::create<Val>(DataType::Index);
auto i2 = IrBuilder::create<Val>(DataType::Index);

auto tv0 = TensorViewBuilder()
.shape({i0, i1, i2})
.expanded({true, false, true})
.dtype(DataType::Double)
.build();
fusion.addInput(tv0);

auto s0 = IrBuilder::create<Val>(-3.70753);

std::vector<Val*> pad_widths(
{fusion.zeroVal(DataType::Index),
fusion.zeroVal(DataType::Index),
fusion.oneVal(DataType::Index),
fusion.oneVal(DataType::Index),
fusion.oneVal(DataType::Index),
fusion.zeroVal(DataType::Index)});
auto tv1 = pad(tv0, pad_widths, s0);
fusion.addOutput(tv1);

auto options = at::TensorOptions().dtype(at::kDouble).device(at::kCUDA, 0);

auto t0 = at::randn({0}, options).as_strided({2, 0, 3}, {0, 0, 0});
std::vector<c10::IValue> aten_inputs({t0});

FusionExecutorCache executor_cache(std::move(fusion_ptr));
auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs);

std::cout << t0 << std::endl;
std::cout << t0.strides() << std::endl;

testValidate(
executor_cache.fusion(), cg_outputs, aten_inputs, __LINE__, __FILE__);
}

} // namespace nvfuser