diff --git a/csrc/executor_utils.cpp b/csrc/executor_utils.cpp index 4fc62a59b96..096300b4d38 100644 --- a/csrc/executor_utils.cpp +++ b/csrc/executor_utils.cpp @@ -1378,10 +1378,6 @@ std::tuple> getCompiledKernel( const int max_register_heuristic, bool return_compiled_binary) { FUSER_PERF_SCOPE("executor_utils::NVRTC"); - if (isOptionDisabled(DisableOption::ArchCheck)) { - TORCH_WARN( - "NVFuser Compile: arch check disabled, should not compile any kernel"); - } at::cuda::jit::initializeCudaContext(); @@ -1477,10 +1473,6 @@ std::tuple> getCompiledKernel( object_code.clear(); } - TORCH_CHECK( - !isOptionDisabled(DisableOption::ArchCheck), - "NVFuser Compile: arch check disabled, should not return any compiled kernel"); - return {compiled_kernel, log.str(), object_code}; } diff --git a/csrc/lower_validation.cpp b/csrc/lower_validation.cpp index b11923f5e89..f18b3bb61bf 100644 --- a/csrc/lower_validation.cpp +++ b/csrc/lower_validation.cpp @@ -844,21 +844,6 @@ void validatePartialSplit(Fusion* fusion) { namespace { -//! Utility to make sure targeted gpu capability is -//! higher than provided major.minor. -void validateMinimumArch(int major, int minor) { - // Skip checking arch if disabled. - if (isOptionDisabled(DisableOption::ArchCheck)) { - return; - } - - auto prop = at::cuda::getCurrentDeviceProperties(); - TORCH_INTERNAL_ASSERT(prop->major >= major); - if (prop->major == major) { - TORCH_INTERNAL_ASSERT(prop->minor >= minor); - } -} - //! Validates that the operand and result tensors //! of mma ops are swizzled and also validates //! specialization of tidx as lane id. @@ -1024,12 +1009,10 @@ void validateArchMemoryOp(LoadStoreOp* ldst) { switch (ldst->opType()) { case LoadStoreOpType::LdMatrix: case LoadStoreOpType::LdMatrixTranspose: - validateMinimumArch(7, 5); validateLdMatrixOutput(ldst->out()->as()); return; case LoadStoreOpType::CpAsyncCg: case LoadStoreOpType::CpAsyncCa: - validateMinimumArch(8, 0); return; default: return; @@ -1049,12 +1032,9 @@ void validateMma(Fusion* fusion) { switch (mma->options().macro) { case MmaOptions::MacroType::Volta_16_16_4: - validateMinimumArch(7, 0); break; case MmaOptions::MacroType::Turing_16_8_16: case MmaOptions::MacroType::Turing_16_16_16: - validateMinimumArch(7, 5); - // Check that operands come from ldmatrix, can be // relaxed once swizzles can be labeled on iterdomains. validateTuringMmaInput(mma->inA()->as()); @@ -1062,8 +1042,6 @@ void validateMma(Fusion* fusion) { break; case MmaOptions::MacroType::Ampere_16_8_16: case MmaOptions::MacroType::Ampere_16_16_16: - validateMinimumArch(8, 0); - // Check that operands come from ldmatrix, can be // relaxed once swizzles can be labeled on iterdomains. validateTuringMmaInput(mma->inA()->as()); diff --git a/csrc/utils.cpp b/csrc/utils.cpp index 72ffa6e168d..f7535477369 100644 --- a/csrc/utils.cpp +++ b/csrc/utils.cpp @@ -150,7 +150,6 @@ const auto& getDebugDumpOptions() { auto parseDisableOptions() { const std::unordered_map available_options = { - {"arch_check", DisableOption::ArchCheck}, {"compile_to_sass", DisableOption::CompileToSass}, {"fallback", DisableOption::Fallback}, {"fma", DisableOption::Fma}, diff --git a/csrc/utils.h b/csrc/utils.h index 9099488eda7..0c9eaa59031 100644 --- a/csrc/utils.h +++ b/csrc/utils.h @@ -94,7 +94,6 @@ TORCH_CUDA_CU_API const std::vector& getDebugDumpArguments( //! These can be set through the `PYTORCH_NVFUSER_DISABLE` environment variable //! enum class DisableOption { - ArchCheck, //! Disable hardware-specific checks to enable cross arch debug CompileToSass, //! Disable direct compilation to sass so the ptx can be //! examined Fallback, //! Disable fallback