From e10716b05e14ce14862c5a97f5b034400dfa1600 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 7 May 2024 22:23:06 +0000 Subject: [PATCH 01/30] linear op ir node --- csrc/ir/internal_nodes.h | 43 +++++++++++++++++++++++ csrc/ir/nodes.cpp | 44 +++++++++++++++++++++++ csrc/ops/composite.cpp | 75 +++++++++++++++++++++++----------------- 3 files changed, 130 insertions(+), 32 deletions(-) diff --git a/csrc/ir/internal_nodes.h b/csrc/ir/internal_nodes.h index ba9608909db..5125fdc8f94 100644 --- a/csrc/ir/internal_nodes.h +++ b/csrc/ir/internal_nodes.h @@ -2288,4 +2288,47 @@ class MatmulOp : public Expr { const std::vector& inputs) const override; }; +//! Linear Operator to be expression evaluated without decomposition. +class LinearOp : public Expr { + public: + using Expr::Expr; + + LinearOp(IrBuilderPasskey, Val* out, Val* in_a, Val* in_b); + + NVFUSER_DECLARE_CLONE_AND_CREATE + + const char* getOpString() const override { + return "LinearOp"; + } + + std::string toString(int indent_size = 0) const override; + std::string toInlineString(int indent_size = 0) const override; + + Val* out() const { + return output(0); + } + + Val* inA() const { + return input(0); + } + + Val* inB() const { + return input(1); + } + + Val* bias() const { + if (has_bias_) { + return input(2); + } else { + return nullptr; + } + } + + std::vector evaluate( + const ExpressionEvaluator& ee, + const std::vector& inputs) const override; +}; +private: + bool has_bias_ = false; + } // namespace nvfuser diff --git a/csrc/ir/nodes.cpp b/csrc/ir/nodes.cpp index 879504c07e2..db7d0c207b9 100644 --- a/csrc/ir/nodes.cpp +++ b/csrc/ir/nodes.cpp @@ -4501,4 +4501,48 @@ std::vector MatmulOp::evaluate( return {at::matmul(a, b)}; } +LinearOp::LinearOp(IrBuilderPasskey passkey, Val* out, Val* in_a, Val* in_b, Val* bias) + : Expr(passkey) { + addOutput(out); + addInput(in_a); + addInput(in_b); + + if (bias != nullptr){ + this->has_bias_ = true; + addInput(bias); + } +} + +NVFUSER_DEFINE_CLONE_AND_CREATE(LinearOp) + +std::string LinearOp::toString(int indent_size) const { + std::stringstream ss; + indent(ss, indent_size) << out()->toString() << "\n"; + indent(ss, indent_size + 1) << " = linear(" << inA()->toString() << ",\n"; + indent(ss, indent_size + 1) << " " << inB()->toString(); + if (this->has_bias_){ + indent(ss, indent_size + 1) << ",\n " << bias()->toString(); + } + indent(ss, indent_size + 1) << ")\n"; + return ss.str(); +} + +std::string LinearOp::toInlineString(int indent_size) const { + NVF_CHECK(false, "Tensor op can not be printed inline"); +} + +std::vector LinearOp::evaluate( + const ExpressionEvaluator& ee, + const std::vector& inputs) const { + const auto a = inputs.at(0).as(); + const auto b = inputs.at(1).as(); + + if (this->has_bias_) { + const auto bias = inputs.at(2).as(); + return {at::linear(a, b, bias)}; + } + return {at::linear(a, b)}; +} + + } // namespace nvfuser diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index cdeeaecb624..20a7a126bf1 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -54,42 +54,53 @@ TensorView* dropout_backward(TensorView* dy, TensorView* mask, Val* scale) { return dx; } -TensorView* linear(TensorView* a, TensorView* b, TensorView* bias) { - // TODO: Support 1+ dimensional A. - NVF_CHECK( - (a->nDims() == 2 && b->nDims() == 2), - "Only 2-D Inputs and Weights are currently supported in Linear!"); - - std::vector bcast_dims(a->nDims() + 1, false); - // A: [M, Bcast, K] - // B: [Bcast, N, K] - bcast_dims.at(bcast_dims.size() - 2) = true; - auto* tv0b = broadcast(a, bcast_dims); - bcast_dims.at(bcast_dims.size() - 2) = false; - bcast_dims.at(bcast_dims.size() - 3) = true; - auto* tv1b = broadcast(b, bcast_dims); +namespace { - NVF_CHECK( - a->getDataType().value() == b->getDataType().value(), - "data types of inputs to matmul don't match"); - - auto* output = fusedMultiplySum(tv0b, tv1b, {-1}); - if (bias) { - NVF_CHECK( - (bias->nDims() <= a->nDims()), "bias should be broadcastable to A"); - NVF_CHECK( - a->getDataType().value() == bias->getDataType().value(), - "bias doesn't match input/weight dtype"); - auto* bias_with_cast = maybeCastOp(output->getDataType().value(), bias); - auto* bcast_bias = ops::maybeBroadcast({output, bias_with_cast})[1]; - auto* bias_output = add(output, bcast_bias); - return maybeCastOp(a->getDataType().value(), bias_output); +static TensorView* newForLinear(TensorView* input, TensorView* weight, TensorView* bias) { + auto input_domain = + TensorDomain::noReductions(input->getMaybeRFactorDomain()); + + // Linear: inputs = {*, in_features}, weight = {out_features, in_features} / {in_features} + // For the linear output, all but the last dimension are the same shape as the input. + // The last dimension is out_features. + + auto ndims_out = (input.size() - 1)+ (weight.size() - 1); + std::vector out_domain(ndims_out, nullptr); + + for (auto idx : c10::irange(ndims_out - 1)) { + out_domain[idx] = ops::newOutputIterDomain({input_domain.at(idx)}); + } + if (weight.size() == 2){ + // Add out_features to output domain. + auto weight_domain = TensorDomain::noReductions(weight->getMaybeRFactorDomain()); + if (bias != nullptr) { + auto bias_domain = TensorDomain::noReductions(bias->getMaybeRFactorDomain()); + out_domain[ndims_out - 1] = ops::newOutputIterDomain({weight_domain.at(0), bias_domain.at(0)}); + } else { + out_domain[ndims_out - 1] = ops::newOutputIterDomain({weight_domain.at(0)}); + } } - return maybeCastOp(a->getDataType().value(), output); + + TensorDomain* td = IrBuilder::create( + out_domain, TensorDomain::getContiguityFilledWith(out_domain, true)); + + return IrBuilder::create(td, input->dtype()); +} + +} // namespace + +TensorView* linear(TensorView* tv_a, TensorView* tv_b, TensorView* bias) { + NVF_CHECK(tv_a->nDims() >= 1); + NVF_CHECK(tv_b->nDims() == 1 || tv_b->nDims() == 2); + + // For all other cases, create a new LinearOp + TensorView* out = newForLinear(tv_a, tv_b, bias); + IrBuilder::create(out, tv_a, tv_b, bias); + return out; } -TensorView* linear(TensorView* a, TensorView* b) { - return linear(a, b, nullptr /*bias*/); +TensorView* linear(TensorView* tv_a, TensorView* tv_b) { + return linear(tv_a, tv_b, /*bias=*/nullptr); } LstmResult lstm( From f5fa9558e2271cf13de8713d8159e9f7b05ce982 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 00:17:51 +0000 Subject: [PATCH 02/30] add linear op to dispatch --- csrc/dispatch.h | 1 + 1 file changed, 1 insertion(+) diff --git a/csrc/dispatch.h b/csrc/dispatch.h index c2150dda35e..714b37c3e15 100644 --- a/csrc/dispatch.h +++ b/csrc/dispatch.h @@ -108,6 +108,7 @@ class Val; f(Swizzle2D); \ f(Resize); \ f(MatmulOp); \ + f(LinearOp); \ f(Communication); #define DISPATCH_FOR_ALL_KIR_EXPRS(f) \ f(Allocate); \ From 1bd774ff946a56f4591a984e685390e60b4a2146 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 02:29:31 +0000 Subject: [PATCH 03/30] add bias, test --- csrc/ir/internal_nodes.h | 13 ++- csrc/ir/nodes.cpp | 9 +- csrc/ops/composite.cpp | 18 ++-- tests/cpp/test_matmul_aten_evaluation.cpp | 126 ++++++++++++++++++++++ 4 files changed, 148 insertions(+), 18 deletions(-) diff --git a/csrc/ir/internal_nodes.h b/csrc/ir/internal_nodes.h index 5125fdc8f94..9dbf80dbe5c 100644 --- a/csrc/ir/internal_nodes.h +++ b/csrc/ir/internal_nodes.h @@ -2293,7 +2293,7 @@ class LinearOp : public Expr { public: using Expr::Expr; - LinearOp(IrBuilderPasskey, Val* out, Val* in_a, Val* in_b); + LinearOp(IrBuilderPasskey, Val* out, Val* in_a, Val* in_b, Val* bias); NVFUSER_DECLARE_CLONE_AND_CREATE @@ -2317,7 +2317,7 @@ class LinearOp : public Expr { } Val* bias() const { - if (has_bias_) { + if (has_bias()) { return input(2); } else { return nullptr; @@ -2327,8 +2327,13 @@ class LinearOp : public Expr { std::vector evaluate( const ExpressionEvaluator& ee, const std::vector& inputs) const override; -}; + private: - bool has_bias_ = false; + bool has_bias() const { + return inputs().size() == 3; + } + +}; + } // namespace nvfuser diff --git a/csrc/ir/nodes.cpp b/csrc/ir/nodes.cpp index db7d0c207b9..a8062afad8e 100644 --- a/csrc/ir/nodes.cpp +++ b/csrc/ir/nodes.cpp @@ -4508,7 +4508,6 @@ LinearOp::LinearOp(IrBuilderPasskey passkey, Val* out, Val* in_a, Val* in_b, Val addInput(in_b); if (bias != nullptr){ - this->has_bias_ = true; addInput(bias); } } @@ -4519,9 +4518,9 @@ std::string LinearOp::toString(int indent_size) const { std::stringstream ss; indent(ss, indent_size) << out()->toString() << "\n"; indent(ss, indent_size + 1) << " = linear(" << inA()->toString() << ",\n"; - indent(ss, indent_size + 1) << " " << inB()->toString(); - if (this->has_bias_){ - indent(ss, indent_size + 1) << ",\n " << bias()->toString(); + indent(ss, indent_size + 1) << " " << inB()->toString(); + if (has_bias()){ + indent(ss, indent_size + 1) << ",\n " << bias()->toString(); } indent(ss, indent_size + 1) << ")\n"; return ss.str(); @@ -4537,7 +4536,7 @@ std::vector LinearOp::evaluate( const auto a = inputs.at(0).as(); const auto b = inputs.at(1).as(); - if (this->has_bias_) { + if (has_bias()) { const auto bias = inputs.at(2).as(); return {at::linear(a, b, bias)}; } diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index 20a7a126bf1..db5afa70a86 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -57,22 +57,22 @@ TensorView* dropout_backward(TensorView* dy, TensorView* mask, Val* scale) { namespace { static TensorView* newForLinear(TensorView* input, TensorView* weight, TensorView* bias) { - auto input_domain = - TensorDomain::noReductions(input->getMaybeRFactorDomain()); - + auto input_domain = TensorDomain::noReductions(input->getMaybeRFactorDomain()); + auto weight_domain = TensorDomain::noReductions(weight->getMaybeRFactorDomain()); + // Linear: inputs = {*, in_features}, weight = {out_features, in_features} / {in_features} // For the linear output, all but the last dimension are the same shape as the input. - // The last dimension is out_features. - - auto ndims_out = (input.size() - 1)+ (weight.size() - 1); + // The last dimension is out_features (if present). + auto ndims_out = (input_domain.size() - 1)+ (weight_domain.size() - 1); + std::vector out_domain(ndims_out, nullptr); - for (auto idx : c10::irange(ndims_out - 1)) { + for (auto idx : c10::irange(input_domain.size() - 1)) { out_domain[idx] = ops::newOutputIterDomain({input_domain.at(idx)}); } - if (weight.size() == 2){ + + if (weight_domain.size() == 2){ // Add out_features to output domain. - auto weight_domain = TensorDomain::noReductions(weight->getMaybeRFactorDomain()); if (bias != nullptr) { auto bias_domain = TensorDomain::noReductions(bias->getMaybeRFactorDomain()); out_domain[ndims_out - 1] = ops::newOutputIterDomain({weight_domain.at(0), bias_domain.at(0)}); diff --git a/tests/cpp/test_matmul_aten_evaluation.cpp b/tests/cpp/test_matmul_aten_evaluation.cpp index 94da409b504..9063d2904ec 100644 --- a/tests/cpp/test_matmul_aten_evaluation.cpp +++ b/tests/cpp/test_matmul_aten_evaluation.cpp @@ -47,6 +47,19 @@ class ATenNodesParametrizedTest optimization_guard_; }; +using LinearNodeParamType = std::tuple>; +class LinearNodeParametrizedTest + : public NVFuserFixtureParamTest { + protected: + // Allocation order set by the pass breaks matmul tests + // see issue https://github.com/NVIDIA/Fuser/issues/1810 + LinearNodeParametrizedTest() : optimization_guard_(false) {} + + private: + preseg_passes::OptimizationPassGuard + optimization_guard_; +}; + // fd.ops.matmul (a, b) where a = [M,K], b = [K,N] TEST_F(MatmulATenEvaluationTest, MmaOpAndCast) { auto fusion = std::make_unique(); @@ -552,6 +565,101 @@ TEST_P(ATenNodesParametrizedTest, MatmulNodeSymbolic) { EXPECT_TRUE(at::allclose(out[0], out_ref)); } +TEST_P(LinearNodeParametrizedTest, LinearNodeConcrete) { + auto fusion = std::make_unique(); + FusionGuard fg(fusion.get()); + + const auto& [a_shape, b_shape, bias_shape] = GetParam(); + + auto tv0 = makeConcreteTensor(a_shape, DataType::Half); + auto tv1 = makeConcreteTensor(b_shape, DataType::Half); + TensorView* bias = nullptr; + if (bias_shape.has_value()){ + bias = makeConcreteTensor(*bias_shape, DataType::Half); + } + auto tv2 = linear(tv0, tv1, bias); + + fusion->addInput(tv0); + fusion->addInput(tv1); + if (bias_shape.has_value()){ + fusion->addInput(bias); + } + fusion->addOutput(tv2); + + at::Tensor t0 = at::randn(a_shape, at::kHalf).cuda(); + at::Tensor t1 = at::randn(b_shape, at::kHalf).cuda(); + std::optional bias_opt = std::nullopt; + if (bias_shape.has_value()) { + bias_opt = at::randn(*bias_shape, at::kHalf).cuda(); + } + at::Tensor out_ref = at::linear(t0, t1, bias_opt); + + FusionExecutor fe; + fusion->aliasOutputToInput( + fusion->outputs()[0], /*input=*/nullptr, AllocationType::Evaluate); + + std::vector out = {}; + if (bias_shape.has_value()){ + fe.compileFusion(fusion.get(), {t0, t1, bias_opt}); + out = fe.runFusion({t0, t1, bias_opt}); + } else { + fe.compileFusion(fusion.get(), {t0, t1}); + out = fe.runFusion({t0, t1}); + } + + // Verify that fusion compilation was skipped. + EXPECT_FALSE(fe.hasCompiledKernel()); + EXPECT_TRUE(at::allclose(out[0], out_ref)); +} +TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { + auto fusion = std::make_unique(); + FusionGuard fg(fusion.get()); + + const auto& [a_shape, b_shape, bias_shape] = GetParam(); + + auto tv0 = makeSymbolicTensor(a_shape.size(), DataType::Half); + auto tv1 = makeSymbolicTensor(b_shape.size(), DataType::Half); + + TensorView* bias = nullptr; + if (bias_shape.has_value()){ + bias = makeSymbolicTensor(*bias_shape, DataType::Half); + } + + auto tv2 = linear(tv0, tv1, bias); + + fusion->addInput(tv0); + fusion->addInput(tv1); + if (bias_shape.has_value()){ + fusion->addInput(bias); + } + fusion->addOutput(tv2); + + at::Tensor t0 = at::randn(a_shape, at::kHalf).cuda(); + at::Tensor t1 = at::randn(b_shape, at::kHalf).cuda(); + std::optional bias_opt = std::nullopt; + if (bias_shape.has_value()) { + bias_opt = at::randn(*bias_shape, at::kHalf).cuda(); + } + at::Tensor out_ref = at::linear(t0, t1, bias_opt); + + FusionExecutor fe; + fusion->aliasOutputToInput( + fusion->outputs()[0], /*input=*/nullptr, AllocationType::Evaluate); + + std::vector out = {}; + if (bias_shape.has_value()){ + fe.compileFusion(fusion.get(), {t0, t1, bias_opt}); + out = fe.runFusion({t0, t1, bias_opt}); + } else { + fe.compileFusion(fusion.get(), {t0, t1}); + out = fe.runFusion({t0, t1}); + } + + // Verify that fusion compilation was skipped. + EXPECT_FALSE(fe.hasCompiledKernel()); + EXPECT_TRUE(at::allclose(out[0], out_ref)); +} + constexpr int64_t b = 128, m = 64, k = 32, n = 16; // Parametrize a_shape and b_shape @@ -588,4 +696,22 @@ INSTANTIATE_TEST_SUITE_P( Sizes({1, 1}), Sizes({b, 1, n})))); +INSTANTIATE_TEST_SUITE_P( + LinearWithoutBias, + LinearNodeParametrizedTest, + testing::Combine( + testing::Values(Sizes({k}), Sizes({m, k}), Sizes({b, m, k})), + testing::Values(Sizes({k}), Sizes({n, k})), + testing::Values(std::nullopt) + )); + +INSTANTIATE_TEST_SUITE_P( + LinearWithBias, + LinearNodeParametrizedTest, + testing::Combine( + testing::Values(Sizes({k}), Sizes({m, k}), Sizes({b, m, k})), + testing::Values(Sizes({n, k})), + testing::Values(Sizes({n})) + )); + } // namespace nvfuser From f189d6b2a7eff03fa747eef39f718e49a7f867aa Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 04:27:43 +0000 Subject: [PATCH 04/30] add linear op to scheduler --- csrc/root_domain_map.cpp | 42 +++++++++++++++++++++++ csrc/scheduler/expr_eval_sched.cpp | 6 ++-- tests/cpp/test_matmul_aten_evaluation.cpp | 36 +++++++++---------- tests/python/pytest_input_generators.py | 29 ++++++++-------- 4 files changed, 78 insertions(+), 35 deletions(-) diff --git a/csrc/root_domain_map.cpp b/csrc/root_domain_map.cpp index 0f6e01ded31..d4e8edc2572 100644 --- a/csrc/root_domain_map.cpp +++ b/csrc/root_domain_map.cpp @@ -198,6 +198,48 @@ std::unordered_map PairwiseRootDomainMap::map( return dom_map; } + // For LinearOp, all but the last dimension are the same shape as the input. + // The last dimension is out_features (if present). + if (LinearOp* op = dynamic_cast(consumer_tv_->definition())) { + auto out_size = consumer_root.size(); + + // Check if the producer is A, B or bias. + MatmulRole input_role; + if (producer->sameAs(op->inA()->as()->domain())) { + input_role = MatmulRole::INPUT_A; + } else if (producer->sameAs(op->inB()->as()->domain())) { + input_role = MatmulRole::INPUT_B; + } else { + input_role = MatmulRole::INPUT_C; + } + + switch (input_role) { + case MatmulRole::INPUT_A: { + for (auto inx : c10::irange(producer_root.size() - 1)) { + updatePairwiseRootDomainMap( + producer_root.at(inx), + consumer_root.at(inx)); + } + break; + } + case MatmulRole::INPUT_B: { + if (producer_root.size() == 1) { + // out_features is not present, no mapping required. + break; + } + } + case MatmulRole::INPUT_C: { + updatePairwiseRootDomainMap( + producer_root.at(0), + consumer_root.at(out_size - 1)); + break; + } + default: + NVF_ERROR("Unexpected input type."); + } + return dom_map; + } + size_t itc = 0, itp = 0; while (itc < consumer_root.size() && itp < producer_root.size()) { IterDomain* producer_id = producer_root.at(itp); diff --git a/csrc/scheduler/expr_eval_sched.cpp b/csrc/scheduler/expr_eval_sched.cpp index c600b2f0bea..a16acbf8f43 100644 --- a/csrc/scheduler/expr_eval_sched.cpp +++ b/csrc/scheduler/expr_eval_sched.cpp @@ -13,15 +13,15 @@ namespace nvfuser { -// Check if the fusion has a single MatmulOp node +// Check if the fusion has a single MatmulOp/LinearOp node bool ExprEvalScheduler::canScheduleCompileTime(Fusion* fusion) { auto exprs = fusion->exprs(); - if (exprs.size() == 1 && exprs.front()->isA()) { + if (exprs.size() == 1 && (exprs.front()->isA() || exprs.front()->isA())) { return true; } scheduler_debug_utils::canScheduleRejectReason( heuristicType(), - "Fusion must contain a single expression of type MatmulOp"); + "Fusion must contain a single expression of type MatmulOp or LinearOp"); return false; } diff --git a/tests/cpp/test_matmul_aten_evaluation.cpp b/tests/cpp/test_matmul_aten_evaluation.cpp index 9063d2904ec..09a3b48f1ac 100644 --- a/tests/cpp/test_matmul_aten_evaluation.cpp +++ b/tests/cpp/test_matmul_aten_evaluation.cpp @@ -594,21 +594,21 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeConcrete) { } at::Tensor out_ref = at::linear(t0, t1, bias_opt); - FusionExecutor fe; - fusion->aliasOutputToInput( - fusion->outputs()[0], /*input=*/nullptr, AllocationType::Evaluate); - + FusionExecutorCache fec(std::move(fusion)); + std::vector out = {}; if (bias_shape.has_value()){ - fe.compileFusion(fusion.get(), {t0, t1, bias_opt}); - out = fe.runFusion({t0, t1, bias_opt}); + out = fec.runFusionWithInputs({t0, t1, bias_opt}); } else { - fe.compileFusion(fusion.get(), {t0, t1}); - out = fe.runFusion({t0, t1}); + out = fec.runFusionWithInputs({t0, t1}); } + const std::vector& executors = + fec.getMostRecentKernelRuntime()->executors(); + EXPECT_EQ(executors.size(), 1); // Verify that fusion compilation was skipped. - EXPECT_FALSE(fe.hasCompiledKernel()); + EXPECT_FALSE(executors.front().hasCompiledKernel()); + EXPECT_TRUE(at::allclose(out[0], out_ref)); } TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { @@ -642,21 +642,21 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { } at::Tensor out_ref = at::linear(t0, t1, bias_opt); - FusionExecutor fe; - fusion->aliasOutputToInput( - fusion->outputs()[0], /*input=*/nullptr, AllocationType::Evaluate); - + FusionExecutorCache fec(std::move(fusion)); + std::vector out = {}; if (bias_shape.has_value()){ - fe.compileFusion(fusion.get(), {t0, t1, bias_opt}); - out = fe.runFusion({t0, t1, bias_opt}); + out = fec.runFusionWithInputs({t0, t1, bias_opt}); } else { - fe.compileFusion(fusion.get(), {t0, t1}); - out = fe.runFusion({t0, t1}); + out = fec.runFusionWithInputs({t0, t1}); } + const std::vector& executors = + fec.getMostRecentKernelRuntime()->executors(); + EXPECT_EQ(executors.size(), 1); // Verify that fusion compilation was skipped. - EXPECT_FALSE(fe.hasCompiledKernel()); + EXPECT_FALSE(executors.front().hasCompiledKernel()); + EXPECT_TRUE(at::allclose(out[0], out_ref)); } diff --git a/tests/python/pytest_input_generators.py b/tests/python/pytest_input_generators.py index 8237381d6ed..c776afbba87 100644 --- a/tests/python/pytest_input_generators.py +++ b/tests/python/pytest_input_generators.py @@ -1524,18 +1524,19 @@ def linear_input_generator( requires_grad=requires_grad, ) - def multiply_range(maximum, step): - assert maximum % step == 0 - num_steps = int(math.log(maximum, step)) - return tuple( - map(pow, itertools.repeat(step, num_steps), range(1, num_steps + 1)) - ) + B = 64 + M = 512 + N = 256 + K = 32 - # Ranges of tensor sizes: 8, 64, 512, 4096, 32768, ... - # Use a Cartesian product to create a wide range of matrix shapes - # I'll stop at 512 as possible numerical difference may show up. - M, N, K = itertools.repeat(multiply_range(512, 8), 3) - for M, N, K in itertools.product(M, N, K): - lhs_shape = (M, K) - rhs_shape = (N, K) - yield (SampleInput(make_arg(lhs_shape), make_arg(rhs_shape), make_arg((N,)))) + # Cases without bias + shapes_input = ((K), (M, K), (B, M, K)) + # shapes_weight = ((K), (N, K)) + # for shape_input, shape_weight in itertools.product(shapes_input, shapes_weight): + # yield SampleInput(make_arg(shape_input), make_arg(shape_weight)) + + # Cases with bias + shape_weight = (N, K) + shape_bias = (N,) + for shape_input in shapes_input: + yield SampleInput(make_arg(shape_input), make_arg(shape_weight), make_arg(shape_bias)) From d60023a2b9ccd42e90067d470ffa572cfc320194 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 04:57:28 +0000 Subject: [PATCH 05/30] allow variable number of inputs in input generators --- tests/python/pytest_fusion_definitions.py | 12 ++++++++---- tests/python/pytest_input_generators.py | 10 ++++++---- tests/python/pytest_ops.py | 12 ++++++++---- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/tests/python/pytest_fusion_definitions.py b/tests/python/pytest_fusion_definitions.py index d25448510c9..5e4d164584c 100644 --- a/tests/python/pytest_fusion_definitions.py +++ b/tests/python/pytest_fusion_definitions.py @@ -21,14 +21,18 @@ def parse_inputs_fusion_definition(fd: FusionDefinition, opinfo: OpInfo, *args): nvf_args = [] - if opinfo.symbolic_parameter_list is None: - opinfo.symbolic_parameter_list = [ArgumentType.Symbolic] * len(args) - num_symbolic_parameters = len(opinfo.symbolic_parameter_list) + symbolic_parameter_list = ( + opinfo.symbolic_parameter_list + if opinfo.symbolic_parameter_list is not None + else [ArgumentType.Symbolic] * len(args) + ) + + num_symbolic_parameters = len(symbolic_parameter_list) assert num_symbolic_parameters == len( args ), f"{num_symbolic_parameters} vs {len(args)}" - for arg_type, a in zip(opinfo.symbolic_parameter_list, args): + for arg_type, a in zip(symbolic_parameter_list, args): if arg_type == ArgumentType.Symbolic: if isinstance(a, torch.Tensor): nvf_args.append(fd.from_pytorch(a)) diff --git a/tests/python/pytest_input_generators.py b/tests/python/pytest_input_generators.py index c776afbba87..aa29430c751 100644 --- a/tests/python/pytest_input_generators.py +++ b/tests/python/pytest_input_generators.py @@ -1531,12 +1531,14 @@ def linear_input_generator( # Cases without bias shapes_input = ((K), (M, K), (B, M, K)) - # shapes_weight = ((K), (N, K)) - # for shape_input, shape_weight in itertools.product(shapes_input, shapes_weight): - # yield SampleInput(make_arg(shape_input), make_arg(shape_weight)) + shapes_weight = ((K), (N, K)) + for shape_input, shape_weight in itertools.product(shapes_input, shapes_weight): + yield SampleInput(make_arg(shape_input), make_arg(shape_weight)) # Cases with bias shape_weight = (N, K) shape_bias = (N,) for shape_input in shapes_input: - yield SampleInput(make_arg(shape_input), make_arg(shape_weight), make_arg(shape_bias)) + yield SampleInput( + make_arg(shape_input), make_arg(shape_weight), make_arg(shape_bias) + ) diff --git a/tests/python/pytest_ops.py b/tests/python/pytest_ops.py index 713f7dc35ef..ffcb8e7d535 100644 --- a/tests/python/pytest_ops.py +++ b/tests/python/pytest_ops.py @@ -26,12 +26,16 @@ def parse_args_fusion_execution(opinfo: OpInfo, *args): if len(args) == 0: return [] - if opinfo.symbolic_parameter_list is None: - opinfo.symbolic_parameter_list = [ArgumentType.Symbolic] * len(args) - assert len(opinfo.symbolic_parameter_list) == len(args) + symbolic_parameter_list = ( + opinfo.symbolic_parameter_list + if opinfo.symbolic_parameter_list is not None + else [ArgumentType.Symbolic] * len(args) + ) + + assert len(symbolic_parameter_list) == len(args) result = [] - for arg_type, a in zip(opinfo.symbolic_parameter_list, args): + for arg_type, a in zip(symbolic_parameter_list, args): if arg_type == ArgumentType.Symbolic: if isinstance(a, list) and all(map(is_tensor, a)): result.extend(a) From 69c1eda6426a686a78c228eac9d3e94eadb0d268 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 05:01:25 +0000 Subject: [PATCH 06/30] format --- csrc/ir/internal_nodes.h | 4 +- csrc/ir/nodes.cpp | 14 ++++-- csrc/ops/composite.cpp | 36 +++++++------ csrc/root_domain_map.cpp | 6 +-- csrc/scheduler/expr_eval_sched.cpp | 3 +- tests/cpp/test_matmul_aten_evaluation.cpp | 61 ++++++++++++++--------- 6 files changed, 74 insertions(+), 50 deletions(-) diff --git a/csrc/ir/internal_nodes.h b/csrc/ir/internal_nodes.h index 9dbf80dbe5c..38892ef581b 100644 --- a/csrc/ir/internal_nodes.h +++ b/csrc/ir/internal_nodes.h @@ -2328,12 +2328,10 @@ class LinearOp : public Expr { const ExpressionEvaluator& ee, const std::vector& inputs) const override; -private: + private: bool has_bias() const { return inputs().size() == 3; } - }; - } // namespace nvfuser diff --git a/csrc/ir/nodes.cpp b/csrc/ir/nodes.cpp index a8062afad8e..e3369dad1b7 100644 --- a/csrc/ir/nodes.cpp +++ b/csrc/ir/nodes.cpp @@ -4501,13 +4501,18 @@ std::vector MatmulOp::evaluate( return {at::matmul(a, b)}; } -LinearOp::LinearOp(IrBuilderPasskey passkey, Val* out, Val* in_a, Val* in_b, Val* bias) +LinearOp::LinearOp( + IrBuilderPasskey passkey, + Val* out, + Val* in_a, + Val* in_b, + Val* bias) : Expr(passkey) { addOutput(out); addInput(in_a); addInput(in_b); - if (bias != nullptr){ + if (bias != nullptr) { addInput(bias); } } @@ -4519,8 +4524,8 @@ std::string LinearOp::toString(int indent_size) const { indent(ss, indent_size) << out()->toString() << "\n"; indent(ss, indent_size + 1) << " = linear(" << inA()->toString() << ",\n"; indent(ss, indent_size + 1) << " " << inB()->toString(); - if (has_bias()){ - indent(ss, indent_size + 1) << ",\n " << bias()->toString(); + if (has_bias()) { + indent(ss, indent_size + 1) << ",\n " << bias()->toString(); } indent(ss, indent_size + 1) << ")\n"; return ss.str(); @@ -4543,5 +4548,4 @@ std::vector LinearOp::evaluate( return {at::linear(a, b)}; } - } // namespace nvfuser diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index db5afa70a86..f36d57ba3c1 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -56,28 +56,36 @@ TensorView* dropout_backward(TensorView* dy, TensorView* mask, Val* scale) { namespace { -static TensorView* newForLinear(TensorView* input, TensorView* weight, TensorView* bias) { - auto input_domain = TensorDomain::noReductions(input->getMaybeRFactorDomain()); - auto weight_domain = TensorDomain::noReductions(weight->getMaybeRFactorDomain()); - - // Linear: inputs = {*, in_features}, weight = {out_features, in_features} / {in_features} - // For the linear output, all but the last dimension are the same shape as the input. - // The last dimension is out_features (if present). - auto ndims_out = (input_domain.size() - 1)+ (weight_domain.size() - 1); - - std::vector out_domain(ndims_out, nullptr); +static TensorView* newForLinear( + TensorView* input, + TensorView* weight, + TensorView* bias) { + auto input_domain = + TensorDomain::noReductions(input->getMaybeRFactorDomain()); + auto weight_domain = + TensorDomain::noReductions(weight->getMaybeRFactorDomain()); + + // Linear: inputs = {*, in_features}, weight = {out_features, in_features} / + // {in_features} For the linear output, all but the last dimension are the + // same shape as the input. The last dimension is out_features (if present). + auto ndims_out = (input_domain.size() - 1) + (weight_domain.size() - 1); + + std::vector out_domain(ndims_out, nullptr); for (auto idx : c10::irange(input_domain.size() - 1)) { out_domain[idx] = ops::newOutputIterDomain({input_domain.at(idx)}); } - if (weight_domain.size() == 2){ + if (weight_domain.size() == 2) { // Add out_features to output domain. if (bias != nullptr) { - auto bias_domain = TensorDomain::noReductions(bias->getMaybeRFactorDomain()); - out_domain[ndims_out - 1] = ops::newOutputIterDomain({weight_domain.at(0), bias_domain.at(0)}); + auto bias_domain = + TensorDomain::noReductions(bias->getMaybeRFactorDomain()); + out_domain[ndims_out - 1] = + ops::newOutputIterDomain({weight_domain.at(0), bias_domain.at(0)}); } else { - out_domain[ndims_out - 1] = ops::newOutputIterDomain({weight_domain.at(0)}); + out_domain[ndims_out - 1] = + ops::newOutputIterDomain({weight_domain.at(0)}); } } diff --git a/csrc/root_domain_map.cpp b/csrc/root_domain_map.cpp index d4e8edc2572..b6ca1730f74 100644 --- a/csrc/root_domain_map.cpp +++ b/csrc/root_domain_map.cpp @@ -217,8 +217,7 @@ std::unordered_map PairwiseRootDomainMap::map( case MatmulRole::INPUT_A: { for (auto inx : c10::irange(producer_root.size() - 1)) { updatePairwiseRootDomainMap( - producer_root.at(inx), - consumer_root.at(inx)); + producer_root.at(inx), consumer_root.at(inx)); } break; } @@ -230,8 +229,7 @@ std::unordered_map PairwiseRootDomainMap::map( } case MatmulRole::INPUT_C: { updatePairwiseRootDomainMap( - producer_root.at(0), - consumer_root.at(out_size - 1)); + producer_root.at(0), consumer_root.at(out_size - 1)); break; } default: diff --git a/csrc/scheduler/expr_eval_sched.cpp b/csrc/scheduler/expr_eval_sched.cpp index a16acbf8f43..b25a290ce99 100644 --- a/csrc/scheduler/expr_eval_sched.cpp +++ b/csrc/scheduler/expr_eval_sched.cpp @@ -16,7 +16,8 @@ namespace nvfuser { // Check if the fusion has a single MatmulOp/LinearOp node bool ExprEvalScheduler::canScheduleCompileTime(Fusion* fusion) { auto exprs = fusion->exprs(); - if (exprs.size() == 1 && (exprs.front()->isA() || exprs.front()->isA())) { + if (exprs.size() == 1 && + (exprs.front()->isA() || exprs.front()->isA())) { return true; } scheduler_debug_utils::canScheduleRejectReason( diff --git a/tests/cpp/test_matmul_aten_evaluation.cpp b/tests/cpp/test_matmul_aten_evaluation.cpp index 09a3b48f1ac..f95e9fc4dbd 100644 --- a/tests/cpp/test_matmul_aten_evaluation.cpp +++ b/tests/cpp/test_matmul_aten_evaluation.cpp @@ -574,14 +574,14 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeConcrete) { auto tv0 = makeConcreteTensor(a_shape, DataType::Half); auto tv1 = makeConcreteTensor(b_shape, DataType::Half); TensorView* bias = nullptr; - if (bias_shape.has_value()){ + if (bias_shape.has_value()) { bias = makeConcreteTensor(*bias_shape, DataType::Half); } auto tv2 = linear(tv0, tv1, bias); fusion->addInput(tv0); fusion->addInput(tv1); - if (bias_shape.has_value()){ + if (bias_shape.has_value()) { fusion->addInput(bias); } fusion->addOutput(tv2); @@ -595,10 +595,10 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeConcrete) { at::Tensor out_ref = at::linear(t0, t1, bias_opt); FusionExecutorCache fec(std::move(fusion)); - - std::vector out = {}; + + d::vector out = {}; if (bias_shape.has_value()){ - out = fec.runFusionWithInputs({t0, t1, bias_opt}); + out = fec.runFusionWithInputs({t0, t1, bias_opt}); } else { out = fec.runFusionWithInputs({t0, t1}); } @@ -608,21 +608,25 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeConcrete) { EXPECT_EQ(executors.size(), 1); // Verify that fusion compilation was skipped. EXPECT_FALSE(executors.front().hasCompiledKernel()); - - EXPECT_TRUE(at::allclose(out[0], out_ref)); + + + + CT_TRUE(at::allclose(out[0], out_ref)); } TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { auto fusion = std::make_unique(); FusionGuard fg(fusion.get()); const auto& [a_shape, b_shape, bias_shape] = GetParam(); + - auto tv0 = makeSymbolicTensor(a_shape.size(), DataType::Half); + + v0 = makeSymbolicTensor(a_shape.size(), DataType::Half); auto tv1 = makeSymbolicTensor(b_shape.size(), DataType::Half); TensorView* bias = nullptr; if (bias_shape.has_value()){ - bias = makeSymbolicTensor(*bias_shape, DataType::Half); + bias = makeSymbolicTensor(*bias_shape, DataType::Half); } auto tv2 = linear(tv0, tv1, bias); @@ -630,7 +634,7 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { fusion->addInput(tv0); fusion->addInput(tv1); if (bias_shape.has_value()){ - fusion->addInput(bias); + fusion->addInput(bias); } fusion->addOutput(tv2); @@ -643,10 +647,12 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { at::Tensor out_ref = at::linear(t0, t1, bias_opt); FusionExecutorCache fec(std::move(fusion)); - - std::vector out = {}; + + st + + tor out = {}; if (bias_shape.has_value()){ - out = fec.runFusionWithInputs({t0, t1, bias_opt}); + ou t = fec.runFusionWithInputs({t0, t1, bias_opt}); } else { out = fec.runFusionWithInputs({t0, t1}); } @@ -656,8 +662,10 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { EXPECT_EQ(executors.size(), 1); // Verify that fusion compilation was skipped. EXPECT_FALSE(executors.front().hasCompiledKernel()); - - EXPECT_TRUE(at::allclose(out[0], out_ref)); + + EXPE + + E(at::allclose(out[0], out_ref)); } constexpr int64_t b = 128, m = 64, k = 32, n = 16; @@ -700,18 +708,25 @@ INSTANTIATE_TEST_SUITE_P( LinearWithoutBias, LinearNodeParametrizedTest, testing::Combine( - testing::Values(Sizes({k}), Sizes({m, k}), Sizes({b, m, k})), - testing::Values(Sizes({k}), Sizes({n, k})), - testing::Values(std::nullopt) + tes + alues(Sizes({k}), Sizes({m, k}), Sizes({b, m, k})), + tes + alues(Sizes({k}), Sizes({n, k})), + tes + alues(std::nullopt) )); -INSTANTIATE_TEST_SUITE_P( +NTIATE_TEST_SUITE_P( LinearWithBias, LinearNodeParametrizedTest, testing::Combine( - testing::Values(Sizes({k}), Sizes({m, k}), Sizes({b, m, k})), - testing::Values(Sizes({n, k})), - testing::Values(Sizes({n})) + tes + alues(Sizes({k}), Sizes({m, k}), Sizes({b, m, k})), + tes + lues(Sizes({n, k})), + test + ues(Sizes({n})) )); -} // namespace nvfuser +} mespace nvfuser + \ No newline at end of file From 234f0debba7faf17cf98a7a70f757b3646dc1027 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 05:06:06 +0000 Subject: [PATCH 07/30] fix formatting --- tests/cpp/test_matmul_aten_evaluation.cpp | 55 ++++++++--------------- 1 file changed, 19 insertions(+), 36 deletions(-) diff --git a/tests/cpp/test_matmul_aten_evaluation.cpp b/tests/cpp/test_matmul_aten_evaluation.cpp index f95e9fc4dbd..c7254461d8d 100644 --- a/tests/cpp/test_matmul_aten_evaluation.cpp +++ b/tests/cpp/test_matmul_aten_evaluation.cpp @@ -597,8 +597,8 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeConcrete) { FusionExecutorCache fec(std::move(fusion)); d::vector out = {}; - if (bias_shape.has_value()){ - out = fec.runFusionWithInputs({t0, t1, bias_opt}); + if (bias_shape.has_value()) { + out = fec.runFusionWithInputs({t0, t1, bias_opt}); } else { out = fec.runFusionWithInputs({t0, t1}); } @@ -609,9 +609,7 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeConcrete) { // Verify that fusion compilation was skipped. EXPECT_FALSE(executors.front().hasCompiledKernel()); - - - CT_TRUE(at::allclose(out[0], out_ref)); + EXPECT_TRUE(at::allclose(out[0], out_ref)); } TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { auto fusion = std::make_unique(); @@ -619,22 +617,20 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { const auto& [a_shape, b_shape, bias_shape] = GetParam(); - - - v0 = makeSymbolicTensor(a_shape.size(), DataType::Half); + auto tv0 = makeSymbolicTensor(a_shape.size(), DataType::Half); auto tv1 = makeSymbolicTensor(b_shape.size(), DataType::Half); TensorView* bias = nullptr; - if (bias_shape.has_value()){ - bias = makeSymbolicTensor(*bias_shape, DataType::Half); + if (bias_shape.has_value()) { + bias = makeSymbolicTensor(*bias_shape, DataType::Half); } auto tv2 = linear(tv0, tv1, bias); fusion->addInput(tv0); fusion->addInput(tv1); - if (bias_shape.has_value()){ - fusion->addInput(bias); + if (bias_shape.has_value()) { + fusion->addInput(bias); } fusion->addOutput(tv2); @@ -648,10 +644,8 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { FusionExecutorCache fec(std::move(fusion)); - st - - tor out = {}; - if (bias_shape.has_value()){ + std::vector out = {}; + if (bias_shape.has_value()) { ou t = fec.runFusionWithInputs({t0, t1, bias_opt}); } else { out = fec.runFusionWithInputs({t0, t1}); @@ -663,9 +657,7 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { // Verify that fusion compilation was skipped. EXPECT_FALSE(executors.front().hasCompiledKernel()); - EXPE - - E(at::allclose(out[0], out_ref)); + EXPECT(at::allclose(out[0], out_ref)); } constexpr int64_t b = 128, m = 64, k = 32, n = 16; @@ -708,25 +700,16 @@ INSTANTIATE_TEST_SUITE_P( LinearWithoutBias, LinearNodeParametrizedTest, testing::Combine( - tes - alues(Sizes({k}), Sizes({m, k}), Sizes({b, m, k})), - tes - alues(Sizes({k}), Sizes({n, k})), - tes - alues(std::nullopt) - )); + testing::Values(Sizes({k}), Sizes({m, k}), Sizes({b, m, k})), + testing::Values(Sizes({k}), Sizes({n, k})), + testing::Values(std::nullopt))); NTIATE_TEST_SUITE_P( LinearWithBias, LinearNodeParametrizedTest, testing::Combine( - tes - alues(Sizes({k}), Sizes({m, k}), Sizes({b, m, k})), - tes - lues(Sizes({n, k})), - test - ues(Sizes({n})) - )); - -} mespace nvfuser - \ No newline at end of file + testing::Values(Sizes({k}), Sizes({m, k}), Sizes({b, m, k})), + testing::Values(Sizes({n, k})), + testing::Values(Sizes({n})))); + +} // namespace nvfuser From 855091c0d76e6cea732285d7f073b08eec7199e5 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 05:21:40 +0000 Subject: [PATCH 08/30] comments --- csrc/ops/composite.cpp | 25 +++++++++++++++---------- csrc/root_domain_map.cpp | 11 +++++++++-- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index f36d57ba3c1..03b08a493c9 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -78,28 +78,33 @@ static TensorView* newForLinear( if (weight_domain.size() == 2) { // Add out_features to output domain. + std::vector out_features_ids = {weight_domain.at(0)}; if (bias != nullptr) { auto bias_domain = TensorDomain::noReductions(bias->getMaybeRFactorDomain()); - out_domain[ndims_out - 1] = - ops::newOutputIterDomain({weight_domain.at(0), bias_domain.at(0)}); - } else { - out_domain[ndims_out - 1] = - ops::newOutputIterDomain({weight_domain.at(0)}); + out_features_ids.emplace_back(bias_domain.at(0)); } + out_domain[ndims_out - 1] = ops::newOutputIterDomain(out_features_ids); } +} - TensorDomain* td = IrBuilder::create( - out_domain, TensorDomain::getContiguityFilledWith(out_domain, true)); +TensorDomain* td = IrBuilder::create( + out_domain, + TensorDomain::getContiguityFilledWith(out_domain, true)); - return IrBuilder::create(td, input->dtype()); +return IrBuilder::create(td, input->dtype()); } } // namespace TensorView* linear(TensorView* tv_a, TensorView* tv_b, TensorView* bias) { - NVF_CHECK(tv_a->nDims() >= 1); - NVF_CHECK(tv_b->nDims() == 1 || tv_b->nDims() == 2); + NVF_CHECK(tv_a->nDims() >= 1, "Input A must be atleast 1D."); + NVF_CHECK( + tv_b->nDims() == 1 || tv_b->nDims() == 2, + "Input B must be a 1D / 2D tensor."); + NVF_CHECK( + tv_b->nDims() == 1 && bias != nullptr, + "Input B must be a 2D tensor if bias is present, got 1D.") // For all other cases, create a new LinearOp TensorView* out = newForLinear(tv_a, tv_b, bias); diff --git a/csrc/root_domain_map.cpp b/csrc/root_domain_map.cpp index b6ca1730f74..964580efb17 100644 --- a/csrc/root_domain_map.cpp +++ b/csrc/root_domain_map.cpp @@ -198,8 +198,6 @@ std::unordered_map PairwiseRootDomainMap::map( return dom_map; } - // For LinearOp, all but the last dimension are the same shape as the input. - // The last dimension is out_features (if present). if (LinearOp* op = dynamic_cast(consumer_tv_->definition())) { auto out_size = consumer_root.size(); @@ -213,8 +211,15 @@ std::unordered_map PairwiseRootDomainMap::map( input_role = MatmulRole::INPUT_C; } + // LinearOp: + // inputs (INPUT_A) = {*, in_features} + // weight (INPUT_B) = {out_features, in_features} / {in_features} + // bias (INPUT_C) = {out_features} / {} + // output = {*, out_features} / {*} + switch (input_role) { case MatmulRole::INPUT_A: { + // Linear output is same as input for all but the last dimension for (auto inx : c10::irange(producer_root.size() - 1)) { updatePairwiseRootDomainMap( producer_root.at(inx), consumer_root.at(inx)); @@ -228,6 +233,7 @@ std::unordered_map PairwiseRootDomainMap::map( } } case MatmulRole::INPUT_C: { + // The last dimension of LinearOp is out_features. updatePairwiseRootDomainMap( producer_root.at(0), consumer_root.at(out_size - 1)); break; @@ -1441,3 +1447,4 @@ const DisjointSets& ExactRootDomainMap::getMappedSets() } } // namespace nvfuser + From e4049356977b53aa77715323ed0dea6dd173c1b3 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 05:35:07 +0000 Subject: [PATCH 09/30] remove null chars --- csrc/root_domain_map.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/csrc/root_domain_map.cpp b/csrc/root_domain_map.cpp index 964580efb17..0c9ff785109 100644 --- a/csrc/root_domain_map.cpp +++ b/csrc/root_domain_map.cpp @@ -1447,4 +1447,3 @@ const DisjointSets& ExactRootDomainMap::getMappedSets() } } // namespace nvfuser - From 669a3f006d48cce878512e9495610ddeae86253a Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 05:37:59 +0000 Subject: [PATCH 10/30] bump version --- version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.txt b/version.txt index abd410582de..3a4036fb450 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.2.4 +0.2.5 From 3278f3b677920911b730b02a59e4d81d17370582 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 21:15:50 +0000 Subject: [PATCH 11/30] fix lintrunner formatting, refactor linear op mapping, add bcast dims to test --- csrc/ops/composite.cpp | 50 +++++++++----------- csrc/ops/utils.cpp | 56 +++++++++++++++++++++++ csrc/ops/utils.h | 8 ++++ csrc/root_domain_map.cpp | 32 ++++--------- tests/cpp/test_matmul_aten_evaluation.cpp | 22 ++++----- 5 files changed, 106 insertions(+), 62 deletions(-) diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index 03b08a493c9..b92e6c25f9b 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -57,42 +57,37 @@ TensorView* dropout_backward(TensorView* dy, TensorView* mask, Val* scale) { namespace { static TensorView* newForLinear( - TensorView* input, - TensorView* weight, + TensorView* tv_a, + TensorView* tv_b, TensorView* bias) { - auto input_domain = - TensorDomain::noReductions(input->getMaybeRFactorDomain()); - auto weight_domain = - TensorDomain::noReductions(weight->getMaybeRFactorDomain()); + auto orig_domain_a = + TensorDomain::noReductions(tv_a->getMaybeRFactorDomain()); + auto orig_domain_b = + TensorDomain::noReductions(tv_b->getMaybeRFactorDomain()); - // Linear: inputs = {*, in_features}, weight = {out_features, in_features} / + // Linear: a = {*, in_features}, b = {out_features, in_features} / // {in_features} For the linear output, all but the last dimension are the - // same shape as the input. The last dimension is out_features (if present). - auto ndims_out = (input_domain.size() - 1) + (weight_domain.size() - 1); + // same shape as the first input. The last dimension is out_features (if present). + auto ndims_out = (orig_domain_a.size() - 1) + (orig_domain_b.size() - 1); std::vector out_domain(ndims_out, nullptr); - for (auto idx : c10::irange(input_domain.size() - 1)) { - out_domain[idx] = ops::newOutputIterDomain({input_domain.at(idx)}); + const std::vector& mapping_a = ops::mapLinearOpIterDomains( + orig_domain_a, MatmulRole::INPUT_A, ndims_out); + const std::vector& mapping_b = ops::mapLinearOpIterDomains( + orig_domain_b, MatmulRole::INPUT_B, ndims_out); + std::vector mapping_bias (ndims_out, nullptr); + if (bias != nullptr){ + auto bias_domain = TensorDomain::noReductions(bias->getMaybeRFactorDomain()); + mapping_bias = ops::mapLinearOpIterDomains(bias_domain, MatmulRole::INPUT_C, ndims_out); } - if (weight_domain.size() == 2) { - // Add out_features to output domain. - std::vector out_features_ids = {weight_domain.at(0)}; - if (bias != nullptr) { - auto bias_domain = - TensorDomain::noReductions(bias->getMaybeRFactorDomain()); - out_features_ids.emplace_back(bias_domain.at(0)); - } - out_domain[ndims_out - 1] = ops::newOutputIterDomain(out_features_ids); - } -} + out_domain = ops::newOutputDomain({mapping_a, mapping_b, mapping_bias}); -TensorDomain* td = IrBuilder::create( - out_domain, - TensorDomain::getContiguityFilledWith(out_domain, true)); + TensorDomain* td = IrBuilder::create( + out_domain, TensorDomain::getContiguityFilledWith(out_domain, true)); -return IrBuilder::create(td, input->dtype()); + return IrBuilder::create(td, tv_a->dtype()); } } // namespace @@ -102,9 +97,6 @@ TensorView* linear(TensorView* tv_a, TensorView* tv_b, TensorView* bias) { NVF_CHECK( tv_b->nDims() == 1 || tv_b->nDims() == 2, "Input B must be a 1D / 2D tensor."); - NVF_CHECK( - tv_b->nDims() == 1 && bias != nullptr, - "Input B must be a 2D tensor if bias is present, got 1D.") // For all other cases, create a new LinearOp TensorView* out = newForLinear(tv_a, tv_b, bias); diff --git a/csrc/ops/utils.cpp b/csrc/ops/utils.cpp index 16928f25002..b49d6f788cb 100644 --- a/csrc/ops/utils.cpp +++ b/csrc/ops/utils.cpp @@ -221,6 +221,43 @@ std::vector mapMatmulOpIterDomains( return mapping; } +std::vector mapLinearOpIterDomains( + const std::vector& input_domain, + MatmulRole input_role, + size_t out_size) { + + std::vector mapping(out_size, nullptr); + auto inp_size = input_domain.size(); + + // Input A: {*, M, K} + // Input B: {*, N, K} / {K} + // Bias: {N} / {} + + switch (input_role) { + case MatmulRole::INPUT_A: { + // Linear output is same as input for all but the last dimension + for (auto inx : c10::irange(inp_size - 1)) { + mapping[inx] = input_domain[inx]; + } + break; + } + case MatmulRole::INPUT_B: { + if (inp_size == 1) { + // out_features is not present, no mapping required. + break; + } + } + case MatmulRole::INPUT_C: { + // The last dimension of LinearOp is out_features. + mapping[out_size - 1] = input_domain[0]; + break; + } + default: + NVF_ERROR("Unexpected input type."); + } + return mapping; +} + // Adding these pragmas since gcc-12.2.1 // incorrectly reports a warning with the use of evaluate #if defined(__GNUC__) && !defined(__clang__) @@ -311,6 +348,25 @@ IterDomain* newOutputIterDomain( #pragma GCC diagnostic pop #endif +std::vector newOutputDomain(const std::vector>& input_ids) { + NVF_CHECK( + !input_ids.empty(), + "Tried to create new output Tensorview but received empty list."); + + std::vector out_domain(input_ids.front().size(), nullptr); + + for (const auto dim_i : c10::irange(out_domain.size())) { + std::vector ids_i; + ids_i.reserve(input_ids.size()); + for (auto ids : input_ids) { + if (ids[dim_i] != nullptr) + ids_i.emplace_back(ids[dim_i]); + } + out_domain[dim_i] = newOutputIterDomain(ids_i); + } + return out_domain; +} + std::vector newOutputDomain(const std::vector& vals) { std::vector tvs; for (auto val : vals) { diff --git a/csrc/ops/utils.h b/csrc/ops/utils.h index 47ac94b7d04..6e4340b2b54 100644 --- a/csrc/ops/utils.h +++ b/csrc/ops/utils.h @@ -51,6 +51,11 @@ std::vector mapMatmulOpIterDomains( MatmulRole input_role, size_t out_size); +std::vector mapLinearOpIterDomains( + const std::vector& input_domain, + MatmulRole input_role, + size_t out_size); + // Takes a vector of aligned input iterdomains to create the output iterdomain. // This is used if the input iterdomains are not trivially mapped to the output // iterdomains. For eg: MatmulOp. If given, the forced_iter_type argument will @@ -60,6 +65,9 @@ IterDomain* newOutputIterDomain( const std::vector& ids, const std::optional force_iter_type = std::nullopt); +// Takes multiple vectors of input iterdomains and assumes they are aligned to create the output tensorview. +std::vector newOutputDomain(const std::vector>& input_ids); + // Takes a vector of tensorviews and assumes they are all aligned to create the // output tensorview. For eg: BinaryOp. std::vector newOutputDomain(const std::vector& vals); diff --git a/csrc/root_domain_map.cpp b/csrc/root_domain_map.cpp index 0c9ff785109..980811ff05f 100644 --- a/csrc/root_domain_map.cpp +++ b/csrc/root_domain_map.cpp @@ -217,30 +217,18 @@ std::unordered_map PairwiseRootDomainMap::map( // bias (INPUT_C) = {out_features} / {} // output = {*, out_features} / {*} - switch (input_role) { - case MatmulRole::INPUT_A: { - // Linear output is same as input for all but the last dimension - for (auto inx : c10::irange(producer_root.size() - 1)) { - updatePairwiseRootDomainMap( - producer_root.at(inx), consumer_root.at(inx)); - } - break; - } - case MatmulRole::INPUT_B: { - if (producer_root.size() == 1) { - // out_features is not present, no mapping required. - break; - } - } - case MatmulRole::INPUT_C: { - // The last dimension of LinearOp is out_features. - updatePairwiseRootDomainMap( - producer_root.at(0), consumer_root.at(out_size - 1)); - break; + const std::vector& aligned_producer_ids = + ops::mapLinearOpIterDomains(producer_root, input_role, out_size); + + for (auto inx : c10::irange(out_size)) { + IterDomain* producer_id = aligned_producer_ids.at(inx); + IterDomain* consumer_id = consumer_root.at(inx); + if (producer_id == nullptr) { + continue; } - default: - NVF_ERROR("Unexpected input type."); + updatePairwiseRootDomainMap(producer_id, consumer_id); } + return dom_map; } diff --git a/tests/cpp/test_matmul_aten_evaluation.cpp b/tests/cpp/test_matmul_aten_evaluation.cpp index c7254461d8d..e453f0f2767 100644 --- a/tests/cpp/test_matmul_aten_evaluation.cpp +++ b/tests/cpp/test_matmul_aten_evaluation.cpp @@ -35,12 +35,12 @@ class MatmulATenEvaluationTest : public NVFuserTest { using Sizes = std::vector; using MatmulNodeParamType = std::tuple; -class ATenNodesParametrizedTest +class MatmulNodeParametrizedTest : public NVFuserFixtureParamTest { protected: // Allocation order set by the pass breaks matmul tests // see issue https://github.com/NVIDIA/Fuser/issues/1810 - ATenNodesParametrizedTest() : optimization_guard_(false) {} + MatmulNodeParametrizedTest() : optimization_guard_(false) {} private: preseg_passes::OptimizationPassGuard @@ -539,7 +539,7 @@ TEST_P(ATenNodesParametrizedTest, MatmulNodeConcrete) { EXPECT_TRUE(at::allclose(out[0], out_ref)); } -TEST_P(ATenNodesParametrizedTest, MatmulNodeSymbolic) { +TEST_P(MatmulNodeParametrizedTest, MatmulNodeSymbolic) { auto fusion = std::make_unique(); FusionGuard fg(fusion.get()); @@ -596,7 +596,7 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeConcrete) { FusionExecutorCache fec(std::move(fusion)); - d::vector out = {}; + std::vector out = {}; if (bias_shape.has_value()) { out = fec.runFusionWithInputs({t0, t1, bias_opt}); } else { @@ -646,7 +646,7 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { std::vector out = {}; if (bias_shape.has_value()) { - ou t = fec.runFusionWithInputs({t0, t1, bias_opt}); + out = fec.runFusionWithInputs({t0, t1, bias_opt}); } else { out = fec.runFusionWithInputs({t0, t1}); } @@ -657,7 +657,7 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { // Verify that fusion compilation was skipped. EXPECT_FALSE(executors.front().hasCompiledKernel()); - EXPECT(at::allclose(out[0], out_ref)); + EXPECT_TRUE(at::allclose(out[0], out_ref)); } constexpr int64_t b = 128, m = 64, k = 32, n = 16; @@ -665,7 +665,7 @@ constexpr int64_t b = 128, m = 64, k = 32, n = 16; // Parametrize a_shape and b_shape INSTANTIATE_TEST_SUITE_P( , - ATenNodesParametrizedTest, + MatmulNodeParametrizedTest, testing::Combine( testing::Values( Sizes({k}), @@ -700,15 +700,15 @@ INSTANTIATE_TEST_SUITE_P( LinearWithoutBias, LinearNodeParametrizedTest, testing::Combine( - testing::Values(Sizes({k}), Sizes({m, k}), Sizes({b, m, k})), - testing::Values(Sizes({k}), Sizes({n, k})), + testing::Values(Sizes({k}), Sizes({m, k}), Sizes({b, m, k}), Sizes({1, k}), Sizes({b, 1, k})), + testing::Values(Sizes({k}), Sizes({n, k}), Sizes({1, k})), testing::Values(std::nullopt))); -NTIATE_TEST_SUITE_P( +INSTANTIATE_TEST_SUITE_P( LinearWithBias, LinearNodeParametrizedTest, testing::Combine( - testing::Values(Sizes({k}), Sizes({m, k}), Sizes({b, m, k})), + testing::Values(Sizes({k}), Sizes({m, k}), Sizes({b, m, k}), Sizes({1, k}), Sizes({b, 1, k})), testing::Values(Sizes({n, k})), testing::Values(Sizes({n})))); From c1b7a924a96e4c6adb6cefd69785e808b82bd356 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 21:20:21 +0000 Subject: [PATCH 12/30] util function --- csrc/ops/composite.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index b92e6c25f9b..9ee77fe3f3b 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -70,8 +70,6 @@ static TensorView* newForLinear( // same shape as the first input. The last dimension is out_features (if present). auto ndims_out = (orig_domain_a.size() - 1) + (orig_domain_b.size() - 1); - std::vector out_domain(ndims_out, nullptr); - const std::vector& mapping_a = ops::mapLinearOpIterDomains( orig_domain_a, MatmulRole::INPUT_A, ndims_out); const std::vector& mapping_b = ops::mapLinearOpIterDomains( @@ -82,7 +80,7 @@ static TensorView* newForLinear( mapping_bias = ops::mapLinearOpIterDomains(bias_domain, MatmulRole::INPUT_C, ndims_out); } - out_domain = ops::newOutputDomain({mapping_a, mapping_b, mapping_bias}); + std::vector out_domain = ops::newOutputDomain({mapping_a, mapping_b, mapping_bias}); TensorDomain* td = IrBuilder::create( out_domain, TensorDomain::getContiguityFilledWith(out_domain, true)); @@ -301,8 +299,6 @@ static TensorView* newForMatmul(TensorView* tv_a, TensorView* tv_b) { ndims_out = std::max(ndims_a, ndims_b); } - std::vector out_domain(ndims_out, nullptr); - const std::vector& mapping_a = ops::mapMatmulOpIterDomains( orig_domain_a, MatmulRole::INPUT_A, ndims_out); const std::vector& mapping_b = ops::mapMatmulOpIterDomains( From c1111482ef427a1171e474eb8fdb74349c7b0646 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 21:45:02 +0000 Subject: [PATCH 13/30] bcast test, bias checks --- csrc/ops/composite.cpp | 22 ++++++++++++++++++---- csrc/ops/utils.cpp | 3 ++- tests/python/pytest_input_generators.py | 2 +- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index 9ee77fe3f3b..0f7021e645c 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -91,11 +91,25 @@ static TensorView* newForLinear( } // namespace TensorView* linear(TensorView* tv_a, TensorView* tv_b, TensorView* bias) { - NVF_CHECK(tv_a->nDims() >= 1, "Input A must be atleast 1D."); - NVF_CHECK( - tv_b->nDims() == 1 || tv_b->nDims() == 2, - "Input B must be a 1D / 2D tensor."); + auto ndims_a = TensorDomain::noReductions(tv_a->getMaybeRFactorDomain()).size(); + NVF_CHECK(ndims_a > 0, "Input A must be atleast 1D."); + + auto ndims_b = TensorDomain::noReductions(tv_b->getMaybeRFactorDomain()).size(); + NVF_CHECK(ndims_b == 1 || ndims_b == 2, "Input B must be a 1D / 2D tensor."); + + NVF_CHECK(ndims_b == 2 || bias == nullptr, "Expected B to be a 2D matrix if bias is given, got 1D.") + NVF_CHECK( + tv_a->dtype() == tv_b->dtype(), + "Expected A and B dtypes to have the same dtype, got: ", + tv_a->dtype(), + " and ", + tv_b->dtype()); + + NVF_CHECK( + bias == nullptr || bias->dtype() == tv_a->dtype(), + "Expected bias to have the same dtype as A and B, got: ", bias->dtype(), " and ", tv_b->dtype() + ); // For all other cases, create a new LinearOp TensorView* out = newForLinear(tv_a, tv_b, bias); IrBuilder::create(out, tv_a, tv_b, bias); diff --git a/csrc/ops/utils.cpp b/csrc/ops/utils.cpp index b49d6f788cb..a88ffb130b3 100644 --- a/csrc/ops/utils.cpp +++ b/csrc/ops/utils.cpp @@ -359,8 +359,9 @@ std::vector newOutputDomain(const std::vector ids_i; ids_i.reserve(input_ids.size()); for (auto ids : input_ids) { - if (ids[dim_i] != nullptr) + if (ids[dim_i] != nullptr){ ids_i.emplace_back(ids[dim_i]); + } } out_domain[dim_i] = newOutputIterDomain(ids_i); } diff --git a/tests/python/pytest_input_generators.py b/tests/python/pytest_input_generators.py index aa29430c751..24a036ea768 100644 --- a/tests/python/pytest_input_generators.py +++ b/tests/python/pytest_input_generators.py @@ -1530,7 +1530,7 @@ def linear_input_generator( K = 32 # Cases without bias - shapes_input = ((K), (M, K), (B, M, K)) + shapes_input = ((K), (M, K), (B, M, K), (B, 1, M, K)) shapes_weight = ((K), (N, K)) for shape_input, shape_weight in itertools.product(shapes_input, shapes_weight): yield SampleInput(make_arg(shape_input), make_arg(shape_weight)) From 229fbf6627250e860637fc71515f2ca59a950ce9 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 23:13:49 +0000 Subject: [PATCH 14/30] 0-D bias, comments --- csrc/ir/internal_nodes.h | 3 ++- csrc/ops/composite.cpp | 1 + csrc/ops/utils.cpp | 14 ++++++++------ tests/cpp/test_matmul_aten_evaluation.cpp | 6 +++--- tests/python/test_python_frontend.py | 3 +-- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/csrc/ir/internal_nodes.h b/csrc/ir/internal_nodes.h index 38892ef581b..3dc644e10af 100644 --- a/csrc/ir/internal_nodes.h +++ b/csrc/ir/internal_nodes.h @@ -2288,7 +2288,8 @@ class MatmulOp : public Expr { const std::vector& inputs) const override; }; -//! Linear Operator to be expression evaluated without decomposition. +// Linear Operator to be expression evaluated without decomposition. +// This node has the same functionality as F.linear (https://pytorch.org/docs/stable/generated/torch.nn.functional.linear.html#torch.nn.functional.linear) class LinearOp : public Expr { public: using Expr::Expr; diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index 0f7021e645c..81ff3073d77 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -97,6 +97,7 @@ TensorView* linear(TensorView* tv_a, TensorView* tv_b, TensorView* bias) { auto ndims_b = TensorDomain::noReductions(tv_b->getMaybeRFactorDomain()).size(); NVF_CHECK(ndims_b == 1 || ndims_b == 2, "Input B must be a 1D / 2D tensor."); + // Note: This constraint is not documented but F.linear errors out if bias is given with 1D weights. NVF_CHECK(ndims_b == 2 || bias == nullptr, "Expected B to be a 2D matrix if bias is given, got 1D.") NVF_CHECK( diff --git a/csrc/ops/utils.cpp b/csrc/ops/utils.cpp index a88ffb130b3..2aa51a28c9d 100644 --- a/csrc/ops/utils.cpp +++ b/csrc/ops/utils.cpp @@ -232,7 +232,6 @@ std::vector mapLinearOpIterDomains( // Input A: {*, M, K} // Input B: {*, N, K} / {K} // Bias: {N} / {} - switch (input_role) { case MatmulRole::INPUT_A: { // Linear output is same as input for all but the last dimension @@ -242,14 +241,17 @@ std::vector mapLinearOpIterDomains( break; } case MatmulRole::INPUT_B: { - if (inp_size == 1) { - // out_features is not present, no mapping required. - break; + if (inp_size > 1) { + // Weight is of shape {out_features, in_features} + mapping[out_size - 1] = input_domain[0]; } + break; } case MatmulRole::INPUT_C: { - // The last dimension of LinearOp is out_features. - mapping[out_size - 1] = input_domain[0]; + if (inp_size > 0){ + // Bias is 1D tensor of shape {out_features} + mapping[out_size - 1] = input_domain[0]; + } break; } default: diff --git a/tests/cpp/test_matmul_aten_evaluation.cpp b/tests/cpp/test_matmul_aten_evaluation.cpp index e453f0f2767..7ef14e4ffbf 100644 --- a/tests/cpp/test_matmul_aten_evaluation.cpp +++ b/tests/cpp/test_matmul_aten_evaluation.cpp @@ -590,7 +590,7 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeConcrete) { at::Tensor t1 = at::randn(b_shape, at::kHalf).cuda(); std::optional bias_opt = std::nullopt; if (bias_shape.has_value()) { - bias_opt = at::randn(*bias_shape, at::kHalf).cuda(); + bias_opt = bias_shape.value().empty() ? at::scalar_tensor(3.14).to(at::kHalf).cuda(): at::randn(*bias_shape, at::kHalf).cuda(); } at::Tensor out_ref = at::linear(t0, t1, bias_opt); @@ -638,7 +638,7 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { at::Tensor t1 = at::randn(b_shape, at::kHalf).cuda(); std::optional bias_opt = std::nullopt; if (bias_shape.has_value()) { - bias_opt = at::randn(*bias_shape, at::kHalf).cuda(); + bias_opt = bias_shape.value().empty() ? at::scalar_tensor(3.14).to(at::kHalf).cuda() : at::randn(*bias_shape, at::kHalf).cuda(); } at::Tensor out_ref = at::linear(t0, t1, bias_opt); @@ -710,6 +710,6 @@ INSTANTIATE_TEST_SUITE_P( testing::Combine( testing::Values(Sizes({k}), Sizes({m, k}), Sizes({b, m, k}), Sizes({1, k}), Sizes({b, 1, k})), testing::Values(Sizes({n, k})), - testing::Values(Sizes({n})))); + testing::Values(Sizes({}), Sizes({n})))); } // namespace nvfuser diff --git a/tests/python/test_python_frontend.py b/tests/python/test_python_frontend.py index 41664ccb0c1..00b78c89757 100644 --- a/tests/python/test_python_frontend.py +++ b/tests/python/test_python_frontend.py @@ -2408,7 +2408,6 @@ def test_linear(self): k = 8 bias0d = torch.tensor(3.14, device="cuda", dtype=torch.float16) bias1d = torch.randn(n, device="cuda", dtype=torch.float16) - bias2d = torch.rand(m, n, device="cuda", dtype=torch.float16) inputs_mk_nk = [ torch.randn(m, k, device="cuda", dtype=torch.float16), @@ -2446,7 +2445,7 @@ def fusion_func( fd.add_output(t_out) in_tensors = [inputs_mk_nk, inputs_mk_kn, inputs_km_nk, inputs_km_kn] - use_bias = [None, bias0d, bias1d, bias2d] + use_bias = [None, bias0d, bias1d] for [inp, wt], use_bias in list(itertools.product(in_tensors, use_bias)): with self.subTest(inp=inp, wt=wt, use_bias=use_bias): input_tensors = ( From 66ee73375c06126e93c4309edd4fcabd176dc578 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 23:24:10 +0000 Subject: [PATCH 15/30] comments --- csrc/ops/composite.h | 6 ++---- csrc/ops/utils.h | 5 ++++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/csrc/ops/composite.h b/csrc/ops/composite.h index fa617e75154..13ffe8f8884 100644 --- a/csrc/ops/composite.h +++ b/csrc/ops/composite.h @@ -47,10 +47,8 @@ NVF_API LstmResult lstm( TensorView* cell_x, TensorView* out_x); -// Linear functions which takes in two tensors of shapes A[M,K] and -// B[N,K]. Takes in a options bias of shape [N] and performs -// out = A * B_Transpose + bias. The output dtype matches the dtype -// ofthe inputs which should match. +// Linear functions which takes in two tensors of shapes A[* , in_features], B[out_features, in_features] / [in_features] and an optional bias of shape [out_features] or 0D scalar. +// Bias can only be given if B is a 2-D tensor. TensorView* linear(TensorView* a, TensorView* b, TensorView* bias); // This is an implementation detail to reflect when linear is called // without a bias. This calls the above function. We use this function diff --git a/csrc/ops/utils.h b/csrc/ops/utils.h index 6e4340b2b54..f6750ef9f5e 100644 --- a/csrc/ops/utils.h +++ b/csrc/ops/utils.h @@ -51,11 +51,14 @@ std::vector mapMatmulOpIterDomains( MatmulRole input_role, size_t out_size); +// For LinearOp, the output is the same as the first input (A[*, in_features])for all but the last dimension. +// If the second input is 2D (B[out_features, in_features]), the last dimension of output is out_features. +// If bias is 1D (bias[out_features]) it maps to the last dimension of the output. std::vector mapLinearOpIterDomains( const std::vector& input_domain, MatmulRole input_role, size_t out_size); - + // Takes a vector of aligned input iterdomains to create the output iterdomain. // This is used if the input iterdomains are not trivially mapped to the output // iterdomains. For eg: MatmulOp. If given, the forced_iter_type argument will From a70e0a08742651ee84384a15fdb25c78ae100e72 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 23:37:22 +0000 Subject: [PATCH 16/30] update tests --- tests/cpp/test_matmul_aten_evaluation.cpp | 8 ++++++++ tests/python/pytest_input_generators.py | 8 ++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/cpp/test_matmul_aten_evaluation.cpp b/tests/cpp/test_matmul_aten_evaluation.cpp index 7ef14e4ffbf..29c8fd32d7e 100644 --- a/tests/cpp/test_matmul_aten_evaluation.cpp +++ b/tests/cpp/test_matmul_aten_evaluation.cpp @@ -712,4 +712,12 @@ INSTANTIATE_TEST_SUITE_P( testing::Values(Sizes({n, k})), testing::Values(Sizes({}), Sizes({n})))); +INSTANTIATE_TEST_SUITE_P( + LinearReductionAxisIsOne, + LinearNodeParametrizedTest, + testing::Combine( + testing::Values(Sizes({m, 1}), Sizes({b, m, 1}))), + testing::Values(Sizes({n, 1})), + testing::Values(Sizes({}), Sizes({n}))); + } // namespace nvfuser diff --git a/tests/python/pytest_input_generators.py b/tests/python/pytest_input_generators.py index 24a036ea768..83f2cb35efe 100644 --- a/tests/python/pytest_input_generators.py +++ b/tests/python/pytest_input_generators.py @@ -1530,15 +1530,15 @@ def linear_input_generator( K = 32 # Cases without bias - shapes_input = ((K), (M, K), (B, M, K), (B, 1, M, K)) - shapes_weight = ((K), (N, K)) + shapes_input = ((K), (M, K)) + shapes_weight = ((K), (N, K), (1, K)) for shape_input, shape_weight in itertools.product(shapes_input, shapes_weight): yield SampleInput(make_arg(shape_input), make_arg(shape_weight)) # Cases with bias shape_weight = (N, K) - shape_bias = (N,) - for shape_input in shapes_input: + shapes_bias = (()) + for shape_input, shape_bias in itertools.product(shapes_input, shapes_bias): yield SampleInput( make_arg(shape_input), make_arg(shape_weight), make_arg(shape_bias) ) From dc41b593ccc6723cb4cb09b1f72988a1d5578297 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 14 May 2024 23:40:54 +0000 Subject: [PATCH 17/30] update tests --- tests/python/pytest_input_generators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python/pytest_input_generators.py b/tests/python/pytest_input_generators.py index 83f2cb35efe..cf76e12dfa1 100644 --- a/tests/python/pytest_input_generators.py +++ b/tests/python/pytest_input_generators.py @@ -1530,14 +1530,14 @@ def linear_input_generator( K = 32 # Cases without bias - shapes_input = ((K), (M, K)) + shapes_input = ((K), (M, K), (B, M, K), (B, 1, M, K)) shapes_weight = ((K), (N, K), (1, K)) for shape_input, shape_weight in itertools.product(shapes_input, shapes_weight): yield SampleInput(make_arg(shape_input), make_arg(shape_weight)) # Cases with bias shape_weight = (N, K) - shapes_bias = (()) + shapes_bias = ((), (N,)) for shape_input, shape_bias in itertools.product(shapes_input, shapes_bias): yield SampleInput( make_arg(shape_input), make_arg(shape_weight), make_arg(shape_bias) From eeb1a9d72554eed00df2dacd5f5182e5ad462cc7 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Wed, 15 May 2024 00:42:27 +0000 Subject: [PATCH 18/30] clangtidy --- csrc/root_domain_map.cpp | 8 +++++--- tests/cpp/test_matmul_aten_evaluation.cpp | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/csrc/root_domain_map.cpp b/csrc/root_domain_map.cpp index 980811ff05f..0f68bf88270 100644 --- a/csrc/root_domain_map.cpp +++ b/csrc/root_domain_map.cpp @@ -202,13 +202,15 @@ std::unordered_map PairwiseRootDomainMap::map( auto out_size = consumer_root.size(); // Check if the producer is A, B or bias. - MatmulRole input_role; + std::optional input_role = std::nullopt; if (producer->sameAs(op->inA()->as()->domain())) { input_role = MatmulRole::INPUT_A; } else if (producer->sameAs(op->inB()->as()->domain())) { input_role = MatmulRole::INPUT_B; - } else { + } else if (producer->sameAs(op->bias()->as()->domain())){ input_role = MatmulRole::INPUT_C; + } else { + NVF_ERROR(false, "Producer did not match any LinearOp input.") } // LinearOp: @@ -218,7 +220,7 @@ std::unordered_map PairwiseRootDomainMap::map( // output = {*, out_features} / {*} const std::vector& aligned_producer_ids = - ops::mapLinearOpIterDomains(producer_root, input_role, out_size); + ops::mapLinearOpIterDomains(producer_root, input_role.value(), out_size); for (auto inx : c10::irange(out_size)) { IterDomain* producer_id = aligned_producer_ids.at(inx); diff --git a/tests/cpp/test_matmul_aten_evaluation.cpp b/tests/cpp/test_matmul_aten_evaluation.cpp index 29c8fd32d7e..2105a50dd74 100644 --- a/tests/cpp/test_matmul_aten_evaluation.cpp +++ b/tests/cpp/test_matmul_aten_evaluation.cpp @@ -716,8 +716,8 @@ INSTANTIATE_TEST_SUITE_P( LinearReductionAxisIsOne, LinearNodeParametrizedTest, testing::Combine( - testing::Values(Sizes({m, 1}), Sizes({b, m, 1}))), + testing::Values(Sizes({m, 1}), Sizes({b, m, 1})), testing::Values(Sizes({n, 1})), - testing::Values(Sizes({}), Sizes({n}))); + testing::Values(Sizes({}), Sizes({n})))); } // namespace nvfuser From f338a4b036f4549e77e292bf0b6b6059be062741 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Thu, 16 May 2024 04:18:36 +0000 Subject: [PATCH 19/30] update mapping to include K --- csrc/device_lower/utils.cpp | 1 + csrc/ops/composite.cpp | 6 +++--- csrc/ops/utils.cpp | 9 +++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/csrc/device_lower/utils.cpp b/csrc/device_lower/utils.cpp index 99b22087589..f9f9e848fff 100644 --- a/csrc/device_lower/utils.cpp +++ b/csrc/device_lower/utils.cpp @@ -151,6 +151,7 @@ bool isTvOp(const Expr* expr) { LoadStoreOp, MatmulOp, MmaOp, + LinearOp, BroadcastOp, SqueezeOp, ExpandOp, diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index 81ff3073d77..8d2a7e84d5f 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -66,9 +66,9 @@ static TensorView* newForLinear( TensorDomain::noReductions(tv_b->getMaybeRFactorDomain()); // Linear: a = {*, in_features}, b = {out_features, in_features} / - // {in_features} For the linear output, all but the last dimension are the - // same shape as the first input. The last dimension is out_features (if present). - auto ndims_out = (orig_domain_a.size() - 1) + (orig_domain_b.size() - 1); + // {in_features}.The linear output is {*, (out_features), rK}. + // The first out_size -2 dimensions are as the first input, followed by out_features (if present) and an additional reduction axis K. + auto ndims_out = (orig_domain_a.size() - 1) + orig_domain_b.size(); const std::vector& mapping_a = ops::mapLinearOpIterDomains( orig_domain_a, MatmulRole::INPUT_A, ndims_out); diff --git a/csrc/ops/utils.cpp b/csrc/ops/utils.cpp index 2aa51a28c9d..32d47f7c60f 100644 --- a/csrc/ops/utils.cpp +++ b/csrc/ops/utils.cpp @@ -238,19 +238,20 @@ std::vector mapLinearOpIterDomains( for (auto inx : c10::irange(inp_size - 1)) { mapping[inx] = input_domain[inx]; } + mapping[out_size - 1] = input_domain.back(); break; } case MatmulRole::INPUT_B: { - if (inp_size > 1) { - // Weight is of shape {out_features, in_features} - mapping[out_size - 1] = input_domain[0]; + for (auto inx: c10::irange(inp_size - 1)) { + // Map N, K to the last two positions of the output. + mapping[out_size - 1 - inx] = input_domain[inp_size - 1 - inx]; } break; } case MatmulRole::INPUT_C: { if (inp_size > 0){ // Bias is 1D tensor of shape {out_features} - mapping[out_size - 1] = input_domain[0]; + mapping[out_size - 2] = input_domain[0]; } break; } From cb6ebffa93704e0303c7d14d26d3d381854ca454 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Thu, 16 May 2024 04:59:37 +0000 Subject: [PATCH 20/30] add reduction dim --- csrc/ops/composite.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index 8d2a7e84d5f..405b18b9af8 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -81,6 +81,8 @@ static TensorView* newForLinear( } std::vector out_domain = ops::newOutputDomain({mapping_a, mapping_b, mapping_bias}); + // Specify the iterdomain for K as reduction + out_domain[ndims_out - 1] = IterDomainBuilder(out_domain.back()).iter_type(IterType::Reduction).build(); TensorDomain* td = IrBuilder::create( out_domain, TensorDomain::getContiguityFilledWith(out_domain, true)); @@ -314,6 +316,8 @@ static TensorView* newForMatmul(TensorView* tv_a, TensorView* tv_b) { ndims_out = std::max(ndims_a, ndims_b); } + std::vector out_domain(ndims_out, nullptr); + const std::vector& mapping_a = ops::mapMatmulOpIterDomains( orig_domain_a, MatmulRole::INPUT_A, ndims_out); const std::vector& mapping_b = ops::mapMatmulOpIterDomains( From 08b52d01709bb2610380780c2cc3beb64b7d2ce8 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Thu, 16 May 2024 05:48:25 +0000 Subject: [PATCH 21/30] error generator --- tests/python/pytest_input_generators.py | 28 +++++++++++++++++++++++++ tests/python/pytest_opinfos.py | 2 ++ 2 files changed, 30 insertions(+) diff --git a/tests/python/pytest_input_generators.py b/tests/python/pytest_input_generators.py index cf76e12dfa1..d7cfb152f37 100644 --- a/tests/python/pytest_input_generators.py +++ b/tests/python/pytest_input_generators.py @@ -1542,3 +1542,31 @@ def linear_input_generator( yield SampleInput( make_arg(shape_input), make_arg(shape_weight), make_arg(shape_bias) ) + +def linear_error_generator(op, dtype=torch.float32, requires_grad: bool = False, **kwargs): + make_arg = partial( + make_tensor, device="cuda", dtype=dtype, requires_grad=requires_grad + ) + # shapes, dim, exception type, exception string + M = 512 + N = 256 + K = 32 + + bias_with_1dweight = ( + ((M, K), (K), (N)), + RuntimeError, + "Expected B to be a 2D matrix if bias is given, got 1D.", + ) + + # mismatched_bias_extent = ( + # ((M, K), (1, K), (N)), + # RuntimeError, + # f"The expanded size of the tensor (1) must match the existing size ({N}) at non-singleton dimension 1. Target sizes: [{M}, 1]. Tensor sizes: [{N}]", + # ) + + for input_shapes, ex_type, ex_str in [bias_with_1dweight]: + shape_input, shape_weight, shape_bias = input_shapes + print (input_shapes) + yield SampleInput( + make_arg(shape_input), make_arg(shape_weight), make_arg(shape_bias) + ), ex_type, ex_str \ No newline at end of file diff --git a/tests/python/pytest_opinfos.py b/tests/python/pytest_opinfos.py index 5d69f57891a..480810d6927 100644 --- a/tests/python/pytest_opinfos.py +++ b/tests/python/pytest_opinfos.py @@ -50,6 +50,7 @@ where_error_generator, matmul_input_generator, linear_input_generator, + linear_error_generator, ) from pytest_utils import ( bool_int_dtypes, @@ -1133,6 +1134,7 @@ def torch_reshape_sym_fn(input_tensor, output_shaped_tensor): else (torch.float16,) ), sample_input_generator=linear_input_generator, + error_input_generator=linear_error_generator, reference=torch.nn.functional.linear, ) linear_ops.append(linear_opinfo) From 4e6ceaba51b5cf2a4a2676913af3bcf81330b3ca Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Thu, 16 May 2024 19:28:30 +0000 Subject: [PATCH 22/30] rename --- csrc/ops/composite.cpp | 28 +++++++++++------------ csrc/ops/composite.h | 8 +++---- csrc/ops/utils.cpp | 12 +++++++++- tests/cpp/test_matmul_aten_evaluation.cpp | 4 ++-- 4 files changed, 31 insertions(+), 21 deletions(-) diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index 405b18b9af8..b5ddd5c0ce3 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -92,30 +92,30 @@ static TensorView* newForLinear( } // namespace -TensorView* linear(TensorView* tv_a, TensorView* tv_b, TensorView* bias) { - auto ndims_a = TensorDomain::noReductions(tv_a->getMaybeRFactorDomain()).size(); - NVF_CHECK(ndims_a > 0, "Input A must be atleast 1D."); +TensorView* linear(TensorView* input, TensorView* weight, TensorView* bias) { + auto input_ndims = TensorDomain::noReductions(input->getMaybeRFactorDomain()).size(); + NVF_CHECK(input_ndims > 0, "Input A must be atleast 1D."); - auto ndims_b = TensorDomain::noReductions(tv_b->getMaybeRFactorDomain()).size(); - NVF_CHECK(ndims_b == 1 || ndims_b == 2, "Input B must be a 1D / 2D tensor."); + auto weight_ndims = TensorDomain::noReductions(weight->getMaybeRFactorDomain()).size(); + NVF_CHECK(weight_ndims == 1 || weight_ndims == 2, "Input B must be a 1D / 2D tensor."); // Note: This constraint is not documented but F.linear errors out if bias is given with 1D weights. - NVF_CHECK(ndims_b == 2 || bias == nullptr, "Expected B to be a 2D matrix if bias is given, got 1D.") + NVF_CHECK(weight_ndims == 2 || bias == nullptr, "Expected B to be a 2D matrix if bias is given, got 1D.") NVF_CHECK( - tv_a->dtype() == tv_b->dtype(), - "Expected A and B dtypes to have the same dtype, got: ", - tv_a->dtype(), + input->dtype() == weight->dtype(), + "Expected input and weight dtypes to have the same dtype, got: ", + input->dtype(), " and ", - tv_b->dtype()); + weight->dtype()); NVF_CHECK( - bias == nullptr || bias->dtype() == tv_a->dtype(), - "Expected bias to have the same dtype as A and B, got: ", bias->dtype(), " and ", tv_b->dtype() + bias == nullptr || bias->dtype() == input->dtype(), + "Expected bias to have the same dtype as A and B, got: ", bias->dtype(), " and ", input->dtype() ); // For all other cases, create a new LinearOp - TensorView* out = newForLinear(tv_a, tv_b, bias); - IrBuilder::create(out, tv_a, tv_b, bias); + TensorView* out = newForLinear(input, weight, bias); + IrBuilder::create(out, input, weight, bias); return out; } diff --git a/csrc/ops/composite.h b/csrc/ops/composite.h index 13ffe8f8884..3cd38a5d5da 100644 --- a/csrc/ops/composite.h +++ b/csrc/ops/composite.h @@ -47,15 +47,15 @@ NVF_API LstmResult lstm( TensorView* cell_x, TensorView* out_x); -// Linear functions which takes in two tensors of shapes A[* , in_features], B[out_features, in_features] / [in_features] and an optional bias of shape [out_features] or 0D scalar. -// Bias can only be given if B is a 2-D tensor. -TensorView* linear(TensorView* a, TensorView* b, TensorView* bias); +// Linear functions which takes in two tensors of shapes input[* , in_features], weight[out_features, in_features] / [in_features] and an optional bias of shape [out_features] or 0D scalar. +// Bias can only be given if weight is a 2-D tensor. +TensorView* linear(TensorView* input, TensorView* weight, TensorView* bias); // This is an implementation detail to reflect when linear is called // without a bias. This calls the above function. We use this function // since it simplifies creating a Python API which takes optional arguments. // Other options include using lambdas or creating a new RecordFunctor for // Linear. -TensorView* linear(TensorView* a, TensorView* b); +TensorView* linear(TensorView* input, TensorView* weight); NVF_API TensorView* sign(TensorView* x); NVF_API Val* sign(Val* x); diff --git a/csrc/ops/utils.cpp b/csrc/ops/utils.cpp index 32d47f7c60f..532a122e0ff 100644 --- a/csrc/ops/utils.cpp +++ b/csrc/ops/utils.cpp @@ -268,7 +268,7 @@ std::vector mapLinearOpIterDomains( #pragma GCC diagnostic ignored "-Wfree-nonheap-object" #endif IterDomain* newOutputIterDomain( - const std::vector& ids, + const std::vector& input_ids, const std::optional force_iter_type) { // For the start and stop offsets, take the maximum of input axes. // For now, the offsets of both start and stop are always integer @@ -282,6 +282,16 @@ IterDomain* newOutputIterDomain( Val* expanded_extent_val = nullptr; std::optional iter_type = std::nullopt; + std::vector ids; + ids.reserve(input_ids.size()); + + // Filter out any nullptrs + std::copy_if( + input_ids.begin(), + input_ids.end(), + std::back_inserter(ids), + [](IterDomain* id) { return id!=nullptr;}); + for (auto id : ids) { if (id->isBroadcast()) { if (id->hasExpandedExtent()) { diff --git a/tests/cpp/test_matmul_aten_evaluation.cpp b/tests/cpp/test_matmul_aten_evaluation.cpp index 2105a50dd74..21da8f9efa7 100644 --- a/tests/cpp/test_matmul_aten_evaluation.cpp +++ b/tests/cpp/test_matmul_aten_evaluation.cpp @@ -513,7 +513,7 @@ void checkMatmulOpIdMapping( } } -TEST_P(ATenNodesParametrizedTest, MatmulNodeConcrete) { +TEST_P(MatmulNodeParametrizedTest, MatmulNodeConcrete) { auto fusion = std::make_unique(); FusionGuard fg(fusion.get()); @@ -682,7 +682,7 @@ INSTANTIATE_TEST_SUITE_P( // Test case where K=1 INSTANTIATE_TEST_SUITE_P( ReductionAxisIsOne, - ATenNodesParametrizedTest, + MatmulNodeParametrizedTest, testing::Combine( testing::Values( Sizes({1}), From 9e9060b71e3afa14edc4a7842628cc2272cb9bcb Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Thu, 16 May 2024 20:42:59 +0000 Subject: [PATCH 23/30] filter nullptr --- csrc/ops/composite.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index b5ddd5c0ce3..16dec3df970 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -81,8 +81,14 @@ static TensorView* newForLinear( } std::vector out_domain = ops::newOutputDomain({mapping_a, mapping_b, mapping_bias}); + + for (auto idx : c10::irange(ndims_out - 1)){ + out_domain[idx] = ops::newOutputIterDomain({mapping_a.at(idx), mapping_b.at(idx), mapping_bias.at(idx)}); + } // Specify the iterdomain for K as reduction - out_domain[ndims_out - 1] = IterDomainBuilder(out_domain.back()).iter_type(IterType::Reduction).build(); + out_domain[ndims_out - 1] = ops::newOutputIterDomain( + {mapping_a.back(), mapping_b.back()}, + /*force_iter_type=*/IterType::Reduction); TensorDomain* td = IrBuilder::create( out_domain, TensorDomain::getContiguityFilledWith(out_domain, true)); @@ -323,16 +329,8 @@ static TensorView* newForMatmul(TensorView* tv_a, TensorView* tv_b) { const std::vector& mapping_b = ops::mapMatmulOpIterDomains( orig_domain_b, MatmulRole::INPUT_B, ndims_out); - for (auto idx : c10::irange(ndims_out - 1)) { - std::vector input_ids; - input_ids.reserve(2); - if (mapping_a[idx] != nullptr) { - input_ids.emplace_back(mapping_a[idx]); - } - if (mapping_b[idx] != nullptr) { - input_ids.emplace_back(mapping_b[idx]); - } - out_domain[idx] = ops::newOutputIterDomain(input_ids); + for (auto idx : c10::irange(ndims_out - 1)){ + out_domain[idx] = ops::newOutputIterDomain({mapping_a.at(idx), mapping_b.at(idx)}); } out_domain[ndims_out - 1] = ops::newOutputIterDomain( From 89e258f5478550ccbc9261e201f9222e492e18c7 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Thu, 16 May 2024 22:25:03 +0000 Subject: [PATCH 24/30] check id mapping --- csrc/ops/composite.cpp | 20 ++++---- csrc/ops/utils.cpp | 2 +- tests/cpp/test_matmul_aten_evaluation.cpp | 61 +++++++++++++++++++++-- 3 files changed, 68 insertions(+), 15 deletions(-) diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index 16dec3df970..b7194c3bc10 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -57,23 +57,23 @@ TensorView* dropout_backward(TensorView* dy, TensorView* mask, Val* scale) { namespace { static TensorView* newForLinear( - TensorView* tv_a, - TensorView* tv_b, + TensorView* input, + TensorView* weight, TensorView* bias) { - auto orig_domain_a = - TensorDomain::noReductions(tv_a->getMaybeRFactorDomain()); - auto orig_domain_b = - TensorDomain::noReductions(tv_b->getMaybeRFactorDomain()); + auto input_domain = + TensorDomain::noReductions(input->getMaybeRFactorDomain()); + auto weight_domain = + TensorDomain::noReductions(weight->getMaybeRFactorDomain()); // Linear: a = {*, in_features}, b = {out_features, in_features} / // {in_features}.The linear output is {*, (out_features), rK}. // The first out_size -2 dimensions are as the first input, followed by out_features (if present) and an additional reduction axis K. - auto ndims_out = (orig_domain_a.size() - 1) + orig_domain_b.size(); + auto ndims_out = input_domain.size() + weight_domain.size() - 1; const std::vector& mapping_a = ops::mapLinearOpIterDomains( - orig_domain_a, MatmulRole::INPUT_A, ndims_out); + input_domain, MatmulRole::INPUT_A, ndims_out); const std::vector& mapping_b = ops::mapLinearOpIterDomains( - orig_domain_b, MatmulRole::INPUT_B, ndims_out); + weight_domain, MatmulRole::INPUT_B, ndims_out); std::vector mapping_bias (ndims_out, nullptr); if (bias != nullptr){ auto bias_domain = TensorDomain::noReductions(bias->getMaybeRFactorDomain()); @@ -93,7 +93,7 @@ static TensorView* newForLinear( TensorDomain* td = IrBuilder::create( out_domain, TensorDomain::getContiguityFilledWith(out_domain, true)); - return IrBuilder::create(td, tv_a->dtype()); + return IrBuilder::create(td, input->dtype()); } } // namespace diff --git a/csrc/ops/utils.cpp b/csrc/ops/utils.cpp index 532a122e0ff..fd13e245421 100644 --- a/csrc/ops/utils.cpp +++ b/csrc/ops/utils.cpp @@ -242,7 +242,7 @@ std::vector mapLinearOpIterDomains( break; } case MatmulRole::INPUT_B: { - for (auto inx: c10::irange(inp_size - 1)) { + for (auto inx: c10::irange(inp_size)) { // Map N, K to the last two positions of the output. mapping[out_size - 1 - inx] = input_domain[inp_size - 1 - inx]; } diff --git a/tests/cpp/test_matmul_aten_evaluation.cpp b/tests/cpp/test_matmul_aten_evaluation.cpp index 21da8f9efa7..943bbd76daf 100644 --- a/tests/cpp/test_matmul_aten_evaluation.cpp +++ b/tests/cpp/test_matmul_aten_evaluation.cpp @@ -513,6 +513,55 @@ void checkMatmulOpIdMapping( } } +// Check that ID exact mapping works as expected +void checkLinearOpIdMapping( + Fusion* fusion, + TensorView* input, + TensorView* weight, + TensorView* bias, + TensorView* output) { + IdModel id_model(fusion); + const ValGraph& vg = id_model.idGraph(IdMappingMode::EXACT); + vg.validateConsistency(); + + const auto checkMapped = [&vg](IterDomain* x, IterDomain* y) -> bool { + if (!vg.hasGroup(x) || !vg.hasGroup(y)) { + return false; + } + const ValGroup& gx = vg.toGroup(x); + const ValGroup& gy = vg.toGroup(y); + return gx.get() == gy.get(); + }; + + // input: [* , in_features] + // weight: [out_features, in_features] / [out_features] + // bias (optional): [out_features]/[] + // output = [*, (out_features), rK] + + ASSERT_EQ(output->nDims(), input->nDims() + weight->nDims() - 1); + + // Check that the first input_size - 1 dims are mapped for input + for (auto i: c10::irange(input->nDims() - 1)){ + if (!input->axis(i)->isBroadcast()){ + EXPECT_TRUE(checkMapped(input->axis(i), output->axis(i))); + } + } + // Check out_features dim is mapped in weight & bias if present. + if (weight->nDims() > 1){ + if (!weight->axis(0)->isBroadcast()){ + EXPECT_TRUE(checkMapped(weight->axis(0), output->axis(-2))); + } + if (bias != nullptr && bias->nDims() > 0 && !bias->axis(0)->isBroadcast()) { + EXPECT_TRUE(checkMapped(bias->axis(0), output->axis(-2))); + } + } + // Check mapping for reduction axis in input and weight + if (!input->axis(-1)->isBroadcast()){ + EXPECT_TRUE(checkMapped(input->axis(-1), weight->axis(-1))); + EXPECT_TRUE(checkMapped(input->axis(-1), output->axis(-1))); + } +} + TEST_P(MatmulNodeParametrizedTest, MatmulNodeConcrete) { auto fusion = std::make_unique(); FusionGuard fg(fusion.get()); @@ -586,11 +635,13 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeConcrete) { } fusion->addOutput(tv2); + checkLinearOpIdMapping(fusion.get(), tv0, tv1, bias, tv2); + at::Tensor t0 = at::randn(a_shape, at::kHalf).cuda(); at::Tensor t1 = at::randn(b_shape, at::kHalf).cuda(); std::optional bias_opt = std::nullopt; if (bias_shape.has_value()) { - bias_opt = bias_shape.value().empty() ? at::scalar_tensor(3.14).to(at::kHalf).cuda(): at::randn(*bias_shape, at::kHalf).cuda(); + bias_opt = at::randn(*bias_shape, at::kHalf).cuda(); } at::Tensor out_ref = at::linear(t0, t1, bias_opt); @@ -617,8 +668,8 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { const auto& [a_shape, b_shape, bias_shape] = GetParam(); - auto tv0 = makeSymbolicTensor(a_shape.size(), DataType::Half); - auto tv1 = makeSymbolicTensor(b_shape.size(), DataType::Half); + auto tv0 = makeSymbolicTensor(a_shape, DataType::Half); + auto tv1 = makeSymbolicTensor(b_shape, DataType::Half); TensorView* bias = nullptr; if (bias_shape.has_value()) { @@ -634,11 +685,13 @@ TEST_P(LinearNodeParametrizedTest, LinearNodeSymbolic) { } fusion->addOutput(tv2); + checkLinearOpIdMapping(fusion.get(), tv0, tv1, bias, tv2); + at::Tensor t0 = at::randn(a_shape, at::kHalf).cuda(); at::Tensor t1 = at::randn(b_shape, at::kHalf).cuda(); std::optional bias_opt = std::nullopt; if (bias_shape.has_value()) { - bias_opt = bias_shape.value().empty() ? at::scalar_tensor(3.14).to(at::kHalf).cuda() : at::randn(*bias_shape, at::kHalf).cuda(); + bias_opt = at::randn(*bias_shape, at::kHalf).cuda(); } at::Tensor out_ref = at::linear(t0, t1, bias_opt); From 701715340327699a2e0de98c52bf54e59b5bbf97 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Thu, 16 May 2024 23:10:09 +0000 Subject: [PATCH 25/30] comments, error cases, K=1 cases --- csrc/ir/internal_nodes.h | 3 +-- csrc/ops/utils.h | 8 ++++++++ tests/cpp/test_matmul_aten_evaluation.cpp | 2 +- tests/python/pytest_input_generators.py | 14 ++++++++------ tests/python/pytest_ops.py | 4 +++- 5 files changed, 21 insertions(+), 10 deletions(-) diff --git a/csrc/ir/internal_nodes.h b/csrc/ir/internal_nodes.h index 3dc644e10af..9bf493c5f7e 100644 --- a/csrc/ir/internal_nodes.h +++ b/csrc/ir/internal_nodes.h @@ -2288,8 +2288,7 @@ class MatmulOp : public Expr { const std::vector& inputs) const override; }; -// Linear Operator to be expression evaluated without decomposition. -// This node has the same functionality as F.linear (https://pytorch.org/docs/stable/generated/torch.nn.functional.linear.html#torch.nn.functional.linear) +// Linear node with same functionality as F.linear (https://pytorch.org/docs/stable/generated/torch.nn.functional.linear.html#torch.nn.functional.linear) class LinearOp : public Expr { public: using Expr::Expr; diff --git a/csrc/ops/utils.h b/csrc/ops/utils.h index f6750ef9f5e..75d7f413193 100644 --- a/csrc/ops/utils.h +++ b/csrc/ops/utils.h @@ -46,6 +46,10 @@ IterType promoteIterType(IterType type1, IterType type2); // Mapping B: {nullptr, id_N}) // 3. A/B are atleast 1D and one of them is > 2D: [B, M, K] x [K, N] -> [B, M, // N] (Mapping A: {id_B, id_M, nullptr}, Mapping B: {nullptr, nullptr, id_N}) +// Args: +// 1. input_domain: root/rfactor domain without reductions for any input to MatmulOp +// 2. input_role: Specifies if the input is A / B (MatmulRole::Input_A/Input_B) +// 3: out_size: MatmulOp output dimension (input and output may not be the same size). std::vector mapMatmulOpIterDomains( const std::vector& input_domain, MatmulRole input_role, @@ -54,6 +58,10 @@ std::vector mapMatmulOpIterDomains( // For LinearOp, the output is the same as the first input (A[*, in_features])for all but the last dimension. // If the second input is 2D (B[out_features, in_features]), the last dimension of output is out_features. // If bias is 1D (bias[out_features]) it maps to the last dimension of the output. +// Args: +// 1. input_domain: root/rfactor domain without reductions for any input to LinearOp +// 2. input_role: Specifies if the input is A / B / Bias (MatmulRole::Input_A/Input_B/Input_C) +// 3: out_size: LinearOp output dimension (input and output may not be the same size). std::vector mapLinearOpIterDomains( const std::vector& input_domain, MatmulRole input_role, diff --git a/tests/cpp/test_matmul_aten_evaluation.cpp b/tests/cpp/test_matmul_aten_evaluation.cpp index 943bbd76daf..b1c9a39c499 100644 --- a/tests/cpp/test_matmul_aten_evaluation.cpp +++ b/tests/cpp/test_matmul_aten_evaluation.cpp @@ -769,7 +769,7 @@ INSTANTIATE_TEST_SUITE_P( LinearReductionAxisIsOne, LinearNodeParametrizedTest, testing::Combine( - testing::Values(Sizes({m, 1}), Sizes({b, m, 1})), + testing::Values(Sizes({1}), Sizes({m, 1}), Sizes({b, m, 1}), Sizes({1, 1}), Sizes({b, 1, 1})), testing::Values(Sizes({n, 1})), testing::Values(Sizes({}), Sizes({n})))); diff --git a/tests/python/pytest_input_generators.py b/tests/python/pytest_input_generators.py index d7cfb152f37..c87537dd0ed 100644 --- a/tests/python/pytest_input_generators.py +++ b/tests/python/pytest_input_generators.py @@ -1558,13 +1558,15 @@ def linear_error_generator(op, dtype=torch.float32, requires_grad: bool = False, "Expected B to be a 2D matrix if bias is given, got 1D.", ) - # mismatched_bias_extent = ( - # ((M, K), (1, K), (N)), - # RuntimeError, - # f"The expanded size of the tensor (1) must match the existing size ({N}) at non-singleton dimension 1. Target sizes: [{M}, 1]. Tensor sizes: [{N}]", - # ) + mismatched_bias_extent = ( + ((M, K), (1, K), (N)), + RuntimeError, + f"The expanded size of the tensor (1) must match the existing size ({N}) at non-singleton dimension 1. Target sizes: [{M}, 1]. Tensor sizes: [{N}]", + ) + + error_cases = [bias_with_1dweight, mismatched_bias_extent] - for input_shapes, ex_type, ex_str in [bias_with_1dweight]: + for input_shapes, ex_type, ex_str in error_cases: shape_input, shape_weight, shape_bias = input_shapes print (input_shapes) yield SampleInput( diff --git a/tests/python/pytest_ops.py b/tests/python/pytest_ops.py index ffcb8e7d535..216c687a418 100644 --- a/tests/python/pytest_ops.py +++ b/tests/python/pytest_ops.py @@ -209,10 +209,12 @@ def errors_test_fn( fd.execute(parse_args_fusion_execution(nvf_op, *sample.args)) -# A pair of parentheses () represents a capture group in regex. +# A pair of parentheses ()/[] represents a capture group in regex. # Escape parenthesis in regex string to match raw characters. def _regex_escape_parenthesis(a: str) -> str: b = a.replace(r"(", r"\(") + b = b.replace(r"[", r"\[") + b = b.replace(r"]", r"\]") return b.replace(r")", r"\)") From 1d501acd7f358024282d02b28db185be8b67b0cc Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Thu, 16 May 2024 23:15:10 +0000 Subject: [PATCH 26/30] remove unused fn --- csrc/ops/utils.cpp | 20 -------------------- csrc/ops/utils.h | 3 --- 2 files changed, 23 deletions(-) diff --git a/csrc/ops/utils.cpp b/csrc/ops/utils.cpp index fd13e245421..493b8679468 100644 --- a/csrc/ops/utils.cpp +++ b/csrc/ops/utils.cpp @@ -361,26 +361,6 @@ IterDomain* newOutputIterDomain( #pragma GCC diagnostic pop #endif -std::vector newOutputDomain(const std::vector>& input_ids) { - NVF_CHECK( - !input_ids.empty(), - "Tried to create new output Tensorview but received empty list."); - - std::vector out_domain(input_ids.front().size(), nullptr); - - for (const auto dim_i : c10::irange(out_domain.size())) { - std::vector ids_i; - ids_i.reserve(input_ids.size()); - for (auto ids : input_ids) { - if (ids[dim_i] != nullptr){ - ids_i.emplace_back(ids[dim_i]); - } - } - out_domain[dim_i] = newOutputIterDomain(ids_i); - } - return out_domain; -} - std::vector newOutputDomain(const std::vector& vals) { std::vector tvs; for (auto val : vals) { diff --git a/csrc/ops/utils.h b/csrc/ops/utils.h index 75d7f413193..42e887c7d31 100644 --- a/csrc/ops/utils.h +++ b/csrc/ops/utils.h @@ -76,9 +76,6 @@ IterDomain* newOutputIterDomain( const std::vector& ids, const std::optional force_iter_type = std::nullopt); -// Takes multiple vectors of input iterdomains and assumes they are aligned to create the output tensorview. -std::vector newOutputDomain(const std::vector>& input_ids); - // Takes a vector of tensorviews and assumes they are all aligned to create the // output tensorview. For eg: BinaryOp. std::vector newOutputDomain(const std::vector& vals); From 4e5a11e392d997e62ee8fca376cd97b2ad0503f0 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Fri, 17 May 2024 02:59:25 +0000 Subject: [PATCH 27/30] reuse code --- csrc/ops/composite.cpp | 2 +- csrc/root_domain_map.cpp | 36 +++++------- tests/cpp/test_matmul_aten_evaluation.cpp | 68 ++++++++++------------- 3 files changed, 45 insertions(+), 61 deletions(-) diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index b7194c3bc10..26670166c32 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -80,7 +80,7 @@ static TensorView* newForLinear( mapping_bias = ops::mapLinearOpIterDomains(bias_domain, MatmulRole::INPUT_C, ndims_out); } - std::vector out_domain = ops::newOutputDomain({mapping_a, mapping_b, mapping_bias}); + std::vector out_domain(ndims_out, nullptr); for (auto idx : c10::irange(ndims_out - 1)){ out_domain[idx] = ops::newOutputIterDomain({mapping_a.at(idx), mapping_b.at(idx), mapping_bias.at(idx)}); diff --git a/csrc/root_domain_map.cpp b/csrc/root_domain_map.cpp index 0f68bf88270..050f2208058 100644 --- a/csrc/root_domain_map.cpp +++ b/csrc/root_domain_map.cpp @@ -165,6 +165,18 @@ std::unordered_map PairwiseRootDomainMap::map( } }; + // Assumes producer and consumer IDs to be trivially aligned and adds them to domain map. + auto pairwiseMapAllIds = [&](std::vector producer_ids, std::vector consumer_ids){ + for (auto idx : c10::irange(consumer_ids.size())) { + IterDomain* producer_id = producer_ids.at(idx); + IterDomain* consumer_id = consumer_ids.at(idx); + if (producer_id == nullptr) { + continue; + } + updatePairwiseRootDomainMap(producer_id, consumer_id); + } + }; + // For MatmulOp, use the corresponding mapped input iterdomains. if (MatmulOp* op = dynamic_cast(consumer_tv_->definition())) { // Check if the producer is lhs/rhs input @@ -183,18 +195,7 @@ std::unordered_map PairwiseRootDomainMap::map( // maps to the third output iterdomain. const std::vector& aligned_producer_ids = ops::mapMatmulOpIterDomains(producer_root, input_role, out_size); - - NVF_ERROR(aligned_producer_ids.size() == consumer_root.size()); - - for (auto inx : c10::irange(out_size)) { - IterDomain* producer_id = aligned_producer_ids.at(inx); - IterDomain* consumer_id = consumer_root.at(inx); - if (producer_id == nullptr) { - continue; - } - updatePairwiseRootDomainMap(producer_id, consumer_id); - } - + pairwiseMapAllIds(aligned_producer_ids, consumer_root); return dom_map; } @@ -221,16 +222,7 @@ std::unordered_map PairwiseRootDomainMap::map( const std::vector& aligned_producer_ids = ops::mapLinearOpIterDomains(producer_root, input_role.value(), out_size); - - for (auto inx : c10::irange(out_size)) { - IterDomain* producer_id = aligned_producer_ids.at(inx); - IterDomain* consumer_id = consumer_root.at(inx); - if (producer_id == nullptr) { - continue; - } - updatePairwiseRootDomainMap(producer_id, consumer_id); - } - + pairwiseMapAllIds(aligned_producer_ids, consumer_root); return dom_map; } diff --git a/tests/cpp/test_matmul_aten_evaluation.cpp b/tests/cpp/test_matmul_aten_evaluation.cpp index b1c9a39c499..bb73b4e9c4b 100644 --- a/tests/cpp/test_matmul_aten_evaluation.cpp +++ b/tests/cpp/test_matmul_aten_evaluation.cpp @@ -423,6 +423,16 @@ TEST_F(MatmulATenEvaluationTest, LinearWithBias) { EXPECT_TRUE(at::allclose(out[0], out_ref)); } + +const bool checkMapped (const ValGraph& vg, IterDomain* x, IterDomain* y){ + if (!vg.hasGroup(x) || !vg.hasGroup(y)) { + return false; + } + const ValGroup& gx = vg.toGroup(x); + const ValGroup& gy = vg.toGroup(y); + return gx.get() == gy.get(); +}; + // Check that ID exact mapping works as expected void checkMatmulOpIdMapping( Fusion* fusion, @@ -433,15 +443,6 @@ void checkMatmulOpIdMapping( const ValGraph& vg = id_model.idGraph(IdMappingMode::EXACT); vg.validateConsistency(); - const auto checkMapped = [&vg](IterDomain* x, IterDomain* y) -> bool { - if (!vg.hasGroup(x) || !vg.hasGroup(y)) { - return false; - } - const ValGroup& gx = vg.toGroup(x); - const ValGroup& gy = vg.toGroup(y); - return gx.get() == gy.get(); - }; - // If K is Broadcast then we will not have a reduction dim bool k_bcast = A->axis(-1)->isBroadcast(); int64_t red_dims = k_bcast ? 0 : 1; @@ -453,44 +454,44 @@ void checkMatmulOpIdMapping( EXPECT_EQ(output->nDims(), 0); // When K is Broadcast, we squeeze then multiply then cast instead if (!k_bcast) { - EXPECT_TRUE(checkMapped(A->axis(0), B->axis(0))); // K + EXPECT_TRUE(checkMapped(vg, A->axis(0), B->axis(0))); // K } } else if (A->nDims() > 1 && B->nDims() == 1) { // [..., iM, iK] @ [iK] = [..., iM, rK] ASSERT_EQ(output->nDims(), A->nDims() + red_dims - 1); - EXPECT_TRUE(checkMapped(A->axis(-2), output->axis(-1 - red_dims))); // M + EXPECT_TRUE(checkMapped(vg, A->axis(-2), output->axis(-1 - red_dims))); // M if (!k_bcast) { - EXPECT_TRUE(checkMapped(A->axis(-1), B->axis(0))); // K - EXPECT_TRUE(checkMapped(A->axis(-1), output->axis(-1))); // K + EXPECT_TRUE(checkMapped(vg, A->axis(-1), B->axis(0))); // K + EXPECT_TRUE(checkMapped(vg, A->axis(-1), output->axis(-1))); // K } // Check that batch dims are mapped for (int64_t i : c10::irange(output->nDims() - red_dims - 1)) { if (!A->axis(i)->isBroadcast()) { - EXPECT_TRUE(checkMapped(A->axis(i), output->axis(i))); + EXPECT_TRUE(checkMapped(vg, A->axis(i), output->axis(i))); } } } else if (A->nDims() == 1 && B->nDims() > 1) { // [iK] @ [..., iK, iN] = [..., iN, rK] ASSERT_EQ(output->nDims(), B->nDims() + red_dims - 1); - EXPECT_TRUE(checkMapped(B->axis(-1), output->axis(-1 - red_dims))); // N + EXPECT_TRUE(checkMapped(vg, B->axis(-1), output->axis(-1 - red_dims))); // N if (!k_bcast) { - EXPECT_TRUE(checkMapped(A->axis(0), B->axis(-2))); // K - EXPECT_TRUE(checkMapped(A->axis(0), output->axis(-1))); // K + EXPECT_TRUE(checkMapped(vg, A->axis(0), B->axis(-2))); // K + EXPECT_TRUE(checkMapped(vg, A->axis(0), output->axis(-1))); // K } // Check that batch dims are mapped for (int64_t i : c10::irange(output->nDims() - red_dims - 1)) { if (!B->axis(i)->isBroadcast()) { - EXPECT_TRUE(checkMapped(B->axis(i), output->axis(i))); + EXPECT_TRUE(checkMapped(vg, B->axis(i), output->axis(i))); } } } else if (A->nDims() > 1 && B->nDims() > 1) { // [..., iM, iK] @ [..., iK, iN] = [..., iM, iN, rK] ASSERT_EQ(output->nDims(), std::max(A->nDims(), B->nDims()) + red_dims); - EXPECT_TRUE(checkMapped(A->axis(-2), output->axis(-2 - red_dims))); // M - EXPECT_TRUE(checkMapped(B->axis(-1), output->axis(-1 - red_dims))); // N + EXPECT_TRUE(checkMapped(vg, A->axis(-2), output->axis(-2 - red_dims))); // M + EXPECT_TRUE(checkMapped(vg, B->axis(-1), output->axis(-1 - red_dims))); // N if (!k_bcast) { - EXPECT_TRUE(checkMapped(A->axis(-1), B->axis(-2))); // K - EXPECT_TRUE(checkMapped(A->axis(-1), output->axis(-1))); // K + EXPECT_TRUE(checkMapped(vg, A->axis(-1), B->axis(-2))); // K + EXPECT_TRUE(checkMapped(vg, A->axis(-1), output->axis(-1))); // K } // Check that batch dims are mapped // Note that A and B can have different dimensions, so here we count @@ -501,10 +502,10 @@ void checkMatmulOpIdMapping( int64_t i_b = B->nDims() - 3 - i; int64_t i_out = output->nDims() - red_dims - 3 - i; if (i_a >= 0 && !A->axis(i_a)->isBroadcast()) { - EXPECT_TRUE(checkMapped(A->axis(i_a), output->axis(i_out))); + EXPECT_TRUE(checkMapped(vg, A->axis(i_a), output->axis(i_out))); } if (i_b >= 0 && !B->axis(i_b)->isBroadcast()) { - EXPECT_TRUE(checkMapped(B->axis(i_b), output->axis(i_out))); + EXPECT_TRUE(checkMapped(vg, B->axis(i_b), output->axis(i_out))); } } } else { @@ -524,15 +525,6 @@ void checkLinearOpIdMapping( const ValGraph& vg = id_model.idGraph(IdMappingMode::EXACT); vg.validateConsistency(); - const auto checkMapped = [&vg](IterDomain* x, IterDomain* y) -> bool { - if (!vg.hasGroup(x) || !vg.hasGroup(y)) { - return false; - } - const ValGroup& gx = vg.toGroup(x); - const ValGroup& gy = vg.toGroup(y); - return gx.get() == gy.get(); - }; - // input: [* , in_features] // weight: [out_features, in_features] / [out_features] // bias (optional): [out_features]/[] @@ -543,22 +535,22 @@ void checkLinearOpIdMapping( // Check that the first input_size - 1 dims are mapped for input for (auto i: c10::irange(input->nDims() - 1)){ if (!input->axis(i)->isBroadcast()){ - EXPECT_TRUE(checkMapped(input->axis(i), output->axis(i))); + EXPECT_TRUE(checkMapped(vg, input->axis(i), output->axis(i))); } } // Check out_features dim is mapped in weight & bias if present. if (weight->nDims() > 1){ if (!weight->axis(0)->isBroadcast()){ - EXPECT_TRUE(checkMapped(weight->axis(0), output->axis(-2))); + EXPECT_TRUE(checkMapped(vg, weight->axis(0), output->axis(-2))); } if (bias != nullptr && bias->nDims() > 0 && !bias->axis(0)->isBroadcast()) { - EXPECT_TRUE(checkMapped(bias->axis(0), output->axis(-2))); + EXPECT_TRUE(checkMapped(vg, bias->axis(0), output->axis(-2))); } } // Check mapping for reduction axis in input and weight if (!input->axis(-1)->isBroadcast()){ - EXPECT_TRUE(checkMapped(input->axis(-1), weight->axis(-1))); - EXPECT_TRUE(checkMapped(input->axis(-1), output->axis(-1))); + EXPECT_TRUE(checkMapped(vg, input->axis(-1), weight->axis(-1))); + EXPECT_TRUE(checkMapped(vg, input->axis(-1), output->axis(-1))); } } From c41dd513d029a99c67f98a10acd0289973890d8f Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Fri, 17 May 2024 03:23:31 +0000 Subject: [PATCH 28/30] lint --- csrc/ir/internal_nodes.h | 3 +- csrc/ops/composite.cpp | 58 ++++++++++++++--------- csrc/ops/composite.h | 6 ++- csrc/ops/utils.cpp | 51 ++++++++++---------- csrc/ops/utils.h | 25 ++++++---- csrc/root_domain_map.cpp | 13 +++-- tests/cpp/test_matmul_aten_evaluation.cpp | 44 +++++++++++------ tests/python/pytest_input_generators.py | 8 ++-- 8 files changed, 124 insertions(+), 84 deletions(-) diff --git a/csrc/ir/internal_nodes.h b/csrc/ir/internal_nodes.h index 9bf493c5f7e..0ddd0a704a1 100644 --- a/csrc/ir/internal_nodes.h +++ b/csrc/ir/internal_nodes.h @@ -2288,7 +2288,8 @@ class MatmulOp : public Expr { const std::vector& inputs) const override; }; -// Linear node with same functionality as F.linear (https://pytorch.org/docs/stable/generated/torch.nn.functional.linear.html#torch.nn.functional.linear) +// Linear node with same functionality as F.linear +// (https://pytorch.org/docs/stable/generated/torch.nn.functional.linear.html#torch.nn.functional.linear) class LinearOp : public Expr { public: using Expr::Expr; diff --git a/csrc/ops/composite.cpp b/csrc/ops/composite.cpp index 26670166c32..4685e0cf5ab 100644 --- a/csrc/ops/composite.cpp +++ b/csrc/ops/composite.cpp @@ -67,23 +67,27 @@ static TensorView* newForLinear( // Linear: a = {*, in_features}, b = {out_features, in_features} / // {in_features}.The linear output is {*, (out_features), rK}. - // The first out_size -2 dimensions are as the first input, followed by out_features (if present) and an additional reduction axis K. + // The first out_size -2 dimensions are as the first input, followed by + // out_features (if present) and an additional reduction axis K. auto ndims_out = input_domain.size() + weight_domain.size() - 1; - const std::vector& mapping_a = ops::mapLinearOpIterDomains( - input_domain, MatmulRole::INPUT_A, ndims_out); + const std::vector& mapping_a = + ops::mapLinearOpIterDomains(input_domain, MatmulRole::INPUT_A, ndims_out); const std::vector& mapping_b = ops::mapLinearOpIterDomains( weight_domain, MatmulRole::INPUT_B, ndims_out); - std::vector mapping_bias (ndims_out, nullptr); - if (bias != nullptr){ - auto bias_domain = TensorDomain::noReductions(bias->getMaybeRFactorDomain()); - mapping_bias = ops::mapLinearOpIterDomains(bias_domain, MatmulRole::INPUT_C, ndims_out); + std::vector mapping_bias(ndims_out, nullptr); + if (bias != nullptr) { + auto bias_domain = + TensorDomain::noReductions(bias->getMaybeRFactorDomain()); + mapping_bias = ops::mapLinearOpIterDomains( + bias_domain, MatmulRole::INPUT_C, ndims_out); } std::vector out_domain(ndims_out, nullptr); - for (auto idx : c10::irange(ndims_out - 1)){ - out_domain[idx] = ops::newOutputIterDomain({mapping_a.at(idx), mapping_b.at(idx), mapping_bias.at(idx)}); + for (auto idx : c10::irange(ndims_out - 1)) { + out_domain[idx] = ops::newOutputIterDomain( + {mapping_a.at(idx), mapping_b.at(idx), mapping_bias.at(idx)}); } // Specify the iterdomain for K as reduction out_domain[ndims_out - 1] = ops::newOutputIterDomain( @@ -99,14 +103,21 @@ static TensorView* newForLinear( } // namespace TensorView* linear(TensorView* input, TensorView* weight, TensorView* bias) { - auto input_ndims = TensorDomain::noReductions(input->getMaybeRFactorDomain()).size(); + auto input_ndims = + TensorDomain::noReductions(input->getMaybeRFactorDomain()).size(); NVF_CHECK(input_ndims > 0, "Input A must be atleast 1D."); - - auto weight_ndims = TensorDomain::noReductions(weight->getMaybeRFactorDomain()).size(); - NVF_CHECK(weight_ndims == 1 || weight_ndims == 2, "Input B must be a 1D / 2D tensor."); - // Note: This constraint is not documented but F.linear errors out if bias is given with 1D weights. - NVF_CHECK(weight_ndims == 2 || bias == nullptr, "Expected B to be a 2D matrix if bias is given, got 1D.") + auto weight_ndims = + TensorDomain::noReductions(weight->getMaybeRFactorDomain()).size(); + NVF_CHECK( + weight_ndims == 1 || weight_ndims == 2, + "Input B must be a 1D / 2D tensor."); + + // Note: This constraint is not documented but F.linear errors out if bias is + // given with 1D weights. + NVF_CHECK( + weight_ndims == 2 || bias == nullptr, + "Expected B to be a 2D matrix if bias is given, got 1D.") NVF_CHECK( input->dtype() == weight->dtype(), @@ -114,11 +125,13 @@ TensorView* linear(TensorView* input, TensorView* weight, TensorView* bias) { input->dtype(), " and ", weight->dtype()); - + NVF_CHECK( - bias == nullptr || bias->dtype() == input->dtype(), - "Expected bias to have the same dtype as A and B, got: ", bias->dtype(), " and ", input->dtype() - ); + bias == nullptr || bias->dtype() == input->dtype(), + "Expected bias to have the same dtype as A and B, got: ", + bias->dtype(), + " and ", + input->dtype()); // For all other cases, create a new LinearOp TensorView* out = newForLinear(input, weight, bias); IrBuilder::create(out, input, weight, bias); @@ -323,14 +336,15 @@ static TensorView* newForMatmul(TensorView* tv_a, TensorView* tv_b) { } std::vector out_domain(ndims_out, nullptr); - + const std::vector& mapping_a = ops::mapMatmulOpIterDomains( orig_domain_a, MatmulRole::INPUT_A, ndims_out); const std::vector& mapping_b = ops::mapMatmulOpIterDomains( orig_domain_b, MatmulRole::INPUT_B, ndims_out); - for (auto idx : c10::irange(ndims_out - 1)){ - out_domain[idx] = ops::newOutputIterDomain({mapping_a.at(idx), mapping_b.at(idx)}); + for (auto idx : c10::irange(ndims_out - 1)) { + out_domain[idx] = + ops::newOutputIterDomain({mapping_a.at(idx), mapping_b.at(idx)}); } out_domain[ndims_out - 1] = ops::newOutputIterDomain( diff --git a/csrc/ops/composite.h b/csrc/ops/composite.h index 3cd38a5d5da..0ef555ebc59 100644 --- a/csrc/ops/composite.h +++ b/csrc/ops/composite.h @@ -47,8 +47,10 @@ NVF_API LstmResult lstm( TensorView* cell_x, TensorView* out_x); -// Linear functions which takes in two tensors of shapes input[* , in_features], weight[out_features, in_features] / [in_features] and an optional bias of shape [out_features] or 0D scalar. -// Bias can only be given if weight is a 2-D tensor. +// Linear functions which takes in two tensors of shapes input[* , in_features], +// weight[out_features, in_features] / [in_features] and an optional bias of +// shape [out_features] or 0D scalar. Bias can only be given if weight is a 2-D +// tensor. TensorView* linear(TensorView* input, TensorView* weight, TensorView* bias); // This is an implementation detail to reflect when linear is called // without a bias. This calls the above function. We use this function diff --git a/csrc/ops/utils.cpp b/csrc/ops/utils.cpp index 493b8679468..43570a50ede 100644 --- a/csrc/ops/utils.cpp +++ b/csrc/ops/utils.cpp @@ -225,7 +225,6 @@ std::vector mapLinearOpIterDomains( const std::vector& input_domain, MatmulRole input_role, size_t out_size) { - std::vector mapping(out_size, nullptr); auto inp_size = input_domain.size(); @@ -233,30 +232,30 @@ std::vector mapLinearOpIterDomains( // Input B: {*, N, K} / {K} // Bias: {N} / {} switch (input_role) { - case MatmulRole::INPUT_A: { - // Linear output is same as input for all but the last dimension - for (auto inx : c10::irange(inp_size - 1)) { - mapping[inx] = input_domain[inx]; - } - mapping[out_size - 1] = input_domain.back(); - break; + case MatmulRole::INPUT_A: { + // Linear output is same as input for all but the last dimension + for (auto inx : c10::irange(inp_size - 1)) { + mapping[inx] = input_domain[inx]; } - case MatmulRole::INPUT_B: { - for (auto inx: c10::irange(inp_size)) { - // Map N, K to the last two positions of the output. - mapping[out_size - 1 - inx] = input_domain[inp_size - 1 - inx]; - } - break; + mapping[out_size - 1] = input_domain.back(); + break; + } + case MatmulRole::INPUT_B: { + for (auto inx : c10::irange(inp_size)) { + // Map N, K to the last two positions of the output. + mapping[out_size - 1 - inx] = input_domain[inp_size - 1 - inx]; } - case MatmulRole::INPUT_C: { - if (inp_size > 0){ - // Bias is 1D tensor of shape {out_features} - mapping[out_size - 2] = input_domain[0]; - } - break; + break; + } + case MatmulRole::INPUT_C: { + if (inp_size > 0) { + // Bias is 1D tensor of shape {out_features} + mapping[out_size - 2] = input_domain[0]; } - default: - NVF_ERROR("Unexpected input type."); + break; + } + default: + NVF_ERROR("Unexpected input type."); } return mapping; } @@ -287,10 +286,10 @@ IterDomain* newOutputIterDomain( // Filter out any nullptrs std::copy_if( - input_ids.begin(), - input_ids.end(), - std::back_inserter(ids), - [](IterDomain* id) { return id!=nullptr;}); + input_ids.begin(), + input_ids.end(), + std::back_inserter(ids), + [](IterDomain* id) { return id != nullptr; }); for (auto id : ids) { if (id->isBroadcast()) { diff --git a/csrc/ops/utils.h b/csrc/ops/utils.h index 42e887c7d31..5c0982bb39e 100644 --- a/csrc/ops/utils.h +++ b/csrc/ops/utils.h @@ -46,22 +46,27 @@ IterType promoteIterType(IterType type1, IterType type2); // Mapping B: {nullptr, id_N}) // 3. A/B are atleast 1D and one of them is > 2D: [B, M, K] x [K, N] -> [B, M, // N] (Mapping A: {id_B, id_M, nullptr}, Mapping B: {nullptr, nullptr, id_N}) -// Args: -// 1. input_domain: root/rfactor domain without reductions for any input to MatmulOp +// Args: +// 1. input_domain: root/rfactor domain without reductions for any input to +// MatmulOp // 2. input_role: Specifies if the input is A / B (MatmulRole::Input_A/Input_B) -// 3: out_size: MatmulOp output dimension (input and output may not be the same size). +// 3: out_size: MatmulOp output dimension (input and output may not be the same +// size). std::vector mapMatmulOpIterDomains( const std::vector& input_domain, MatmulRole input_role, size_t out_size); -// For LinearOp, the output is the same as the first input (A[*, in_features])for all but the last dimension. -// If the second input is 2D (B[out_features, in_features]), the last dimension of output is out_features. -// If bias is 1D (bias[out_features]) it maps to the last dimension of the output. -// Args: -// 1. input_domain: root/rfactor domain without reductions for any input to LinearOp -// 2. input_role: Specifies if the input is A / B / Bias (MatmulRole::Input_A/Input_B/Input_C) -// 3: out_size: LinearOp output dimension (input and output may not be the same size). +// For LinearOp, the output is the same as the first input (A[*, +// in_features])for all but the last dimension. If the second input is 2D +// (B[out_features, in_features]), the last dimension of output is out_features. +// If bias is 1D (bias[out_features]) it maps to the last dimension of the +// output. Args: +// 1. input_domain: root/rfactor domain without reductions for any input to +// LinearOp +// 2. input_role: Specifies if the input is A / B / Bias +// (MatmulRole::Input_A/Input_B/Input_C) 3: out_size: LinearOp output dimension +// (input and output may not be the same size). std::vector mapLinearOpIterDomains( const std::vector& input_domain, MatmulRole input_role, diff --git a/csrc/root_domain_map.cpp b/csrc/root_domain_map.cpp index 050f2208058..d8dcea09ae8 100644 --- a/csrc/root_domain_map.cpp +++ b/csrc/root_domain_map.cpp @@ -165,8 +165,10 @@ std::unordered_map PairwiseRootDomainMap::map( } }; - // Assumes producer and consumer IDs to be trivially aligned and adds them to domain map. - auto pairwiseMapAllIds = [&](std::vector producer_ids, std::vector consumer_ids){ + // Assumes producer and consumer IDs to be trivially aligned and adds them to + // domain map. + auto pairwiseMapAllIds = [&](std::vector producer_ids, + std::vector consumer_ids) { for (auto idx : c10::irange(consumer_ids.size())) { IterDomain* producer_id = producer_ids.at(idx); IterDomain* consumer_id = consumer_ids.at(idx); @@ -176,7 +178,7 @@ std::unordered_map PairwiseRootDomainMap::map( updatePairwiseRootDomainMap(producer_id, consumer_id); } }; - + // For MatmulOp, use the corresponding mapped input iterdomains. if (MatmulOp* op = dynamic_cast(consumer_tv_->definition())) { // Check if the producer is lhs/rhs input @@ -208,7 +210,7 @@ std::unordered_map PairwiseRootDomainMap::map( input_role = MatmulRole::INPUT_A; } else if (producer->sameAs(op->inB()->as()->domain())) { input_role = MatmulRole::INPUT_B; - } else if (producer->sameAs(op->bias()->as()->domain())){ + } else if (producer->sameAs(op->bias()->as()->domain())) { input_role = MatmulRole::INPUT_C; } else { NVF_ERROR(false, "Producer did not match any LinearOp input.") @@ -221,7 +223,8 @@ std::unordered_map PairwiseRootDomainMap::map( // output = {*, out_features} / {*} const std::vector& aligned_producer_ids = - ops::mapLinearOpIterDomains(producer_root, input_role.value(), out_size); + ops::mapLinearOpIterDomains( + producer_root, input_role.value(), out_size); pairwiseMapAllIds(aligned_producer_ids, consumer_root); return dom_map; } diff --git a/tests/cpp/test_matmul_aten_evaluation.cpp b/tests/cpp/test_matmul_aten_evaluation.cpp index bb73b4e9c4b..80287793480 100644 --- a/tests/cpp/test_matmul_aten_evaluation.cpp +++ b/tests/cpp/test_matmul_aten_evaluation.cpp @@ -423,8 +423,7 @@ TEST_F(MatmulATenEvaluationTest, LinearWithBias) { EXPECT_TRUE(at::allclose(out[0], out_ref)); } - -const bool checkMapped (const ValGraph& vg, IterDomain* x, IterDomain* y){ +const bool checkMapped(const ValGraph& vg, IterDomain* x, IterDomain* y) { if (!vg.hasGroup(x) || !vg.hasGroup(y)) { return false; } @@ -525,22 +524,22 @@ void checkLinearOpIdMapping( const ValGraph& vg = id_model.idGraph(IdMappingMode::EXACT); vg.validateConsistency(); - // input: [* , in_features] - // weight: [out_features, in_features] / [out_features] - // bias (optional): [out_features]/[] - // output = [*, (out_features), rK] + // input: [* , in_features] + // weight: [out_features, in_features] / [out_features] + // bias (optional): [out_features]/[] + // output = [*, (out_features), rK] ASSERT_EQ(output->nDims(), input->nDims() + weight->nDims() - 1); - + // Check that the first input_size - 1 dims are mapped for input - for (auto i: c10::irange(input->nDims() - 1)){ - if (!input->axis(i)->isBroadcast()){ + for (auto i : c10::irange(input->nDims() - 1)) { + if (!input->axis(i)->isBroadcast()) { EXPECT_TRUE(checkMapped(vg, input->axis(i), output->axis(i))); } } // Check out_features dim is mapped in weight & bias if present. - if (weight->nDims() > 1){ - if (!weight->axis(0)->isBroadcast()){ + if (weight->nDims() > 1) { + if (!weight->axis(0)->isBroadcast()) { EXPECT_TRUE(checkMapped(vg, weight->axis(0), output->axis(-2))); } if (bias != nullptr && bias->nDims() > 0 && !bias->axis(0)->isBroadcast()) { @@ -548,7 +547,7 @@ void checkLinearOpIdMapping( } } // Check mapping for reduction axis in input and weight - if (!input->axis(-1)->isBroadcast()){ + if (!input->axis(-1)->isBroadcast()) { EXPECT_TRUE(checkMapped(vg, input->axis(-1), weight->axis(-1))); EXPECT_TRUE(checkMapped(vg, input->axis(-1), output->axis(-1))); } @@ -745,7 +744,12 @@ INSTANTIATE_TEST_SUITE_P( LinearWithoutBias, LinearNodeParametrizedTest, testing::Combine( - testing::Values(Sizes({k}), Sizes({m, k}), Sizes({b, m, k}), Sizes({1, k}), Sizes({b, 1, k})), + testing::Values( + Sizes({k}), + Sizes({m, k}), + Sizes({b, m, k}), + Sizes({1, k}), + Sizes({b, 1, k})), testing::Values(Sizes({k}), Sizes({n, k}), Sizes({1, k})), testing::Values(std::nullopt))); @@ -753,7 +757,12 @@ INSTANTIATE_TEST_SUITE_P( LinearWithBias, LinearNodeParametrizedTest, testing::Combine( - testing::Values(Sizes({k}), Sizes({m, k}), Sizes({b, m, k}), Sizes({1, k}), Sizes({b, 1, k})), + testing::Values( + Sizes({k}), + Sizes({m, k}), + Sizes({b, m, k}), + Sizes({1, k}), + Sizes({b, 1, k})), testing::Values(Sizes({n, k})), testing::Values(Sizes({}), Sizes({n})))); @@ -761,7 +770,12 @@ INSTANTIATE_TEST_SUITE_P( LinearReductionAxisIsOne, LinearNodeParametrizedTest, testing::Combine( - testing::Values(Sizes({1}), Sizes({m, 1}), Sizes({b, m, 1}), Sizes({1, 1}), Sizes({b, 1, 1})), + testing::Values( + Sizes({1}), + Sizes({m, 1}), + Sizes({b, m, 1}), + Sizes({1, 1}), + Sizes({b, 1, 1})), testing::Values(Sizes({n, 1})), testing::Values(Sizes({}), Sizes({n})))); diff --git a/tests/python/pytest_input_generators.py b/tests/python/pytest_input_generators.py index c87537dd0ed..137dab7c229 100644 --- a/tests/python/pytest_input_generators.py +++ b/tests/python/pytest_input_generators.py @@ -1543,7 +1543,10 @@ def linear_input_generator( make_arg(shape_input), make_arg(shape_weight), make_arg(shape_bias) ) -def linear_error_generator(op, dtype=torch.float32, requires_grad: bool = False, **kwargs): + +def linear_error_generator( + op, dtype=torch.float32, requires_grad: bool = False, **kwargs +): make_arg = partial( make_tensor, device="cuda", dtype=dtype, requires_grad=requires_grad ) @@ -1568,7 +1571,6 @@ def linear_error_generator(op, dtype=torch.float32, requires_grad: bool = False, for input_shapes, ex_type, ex_str in error_cases: shape_input, shape_weight, shape_bias = input_shapes - print (input_shapes) yield SampleInput( make_arg(shape_input), make_arg(shape_weight), make_arg(shape_bias) - ), ex_type, ex_str \ No newline at end of file + ), ex_type, ex_str From 0acc863ade12f4cf3c5d07cbc4b6ec210e0257a6 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Fri, 17 May 2024 03:29:02 +0000 Subject: [PATCH 29/30] add check --- csrc/root_domain_map.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/csrc/root_domain_map.cpp b/csrc/root_domain_map.cpp index d8dcea09ae8..01b2adbd4c6 100644 --- a/csrc/root_domain_map.cpp +++ b/csrc/root_domain_map.cpp @@ -169,6 +169,7 @@ std::unordered_map PairwiseRootDomainMap::map( // domain map. auto pairwiseMapAllIds = [&](std::vector producer_ids, std::vector consumer_ids) { + NVF_ERROR(producer_ids.size() == consumer_ids.size()); for (auto idx : c10::irange(consumer_ids.size())) { IterDomain* producer_id = producer_ids.at(idx); IterDomain* consumer_id = consumer_ids.at(idx); From 8c2afd158615f6396511ab7964e5393c107f12ec Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Fri, 17 May 2024 03:34:42 +0000 Subject: [PATCH 30/30] chain replace --- tests/python/pytest_ops.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/python/pytest_ops.py b/tests/python/pytest_ops.py index 216c687a418..690e0294eca 100644 --- a/tests/python/pytest_ops.py +++ b/tests/python/pytest_ops.py @@ -212,10 +212,8 @@ def errors_test_fn( # A pair of parentheses ()/[] represents a capture group in regex. # Escape parenthesis in regex string to match raw characters. def _regex_escape_parenthesis(a: str) -> str: - b = a.replace(r"(", r"\(") - b = b.replace(r"[", r"\[") - b = b.replace(r"]", r"\]") - return b.replace(r")", r"\)") + b = a.replace(r"[", r"\[").replace(r"]", r"\]") + return b.replace(r"(", r"\(").replace(r")", r"\)") @create_op_test(tuple(op for op in opinfos if op.error_input_generator is not None))