NVIDIA · zasdfgbnm · Apr 17, 2023 · Apr 17, 2023 · Apr 17, 2023 · Apr 17, 2023
diff --git a/csrc/executor_utils.cpp b/csrc/executor_utils.cpp
@@ -1378,10 +1378,6 @@ std::tuple<NvrtcFunction, std::string, std::vector<char>> getCompiledKernel(
     const int max_register_heuristic,
     bool return_compiled_binary) {
   FUSER_PERF_SCOPE("executor_utils::NVRTC");
-  if (isOptionDisabled(DisableOption::ArchCheck)) {
-    TORCH_WARN(
-        "NVFuser Compile: arch check disabled, should not compile any kernel");
-  }
 
   at::cuda::jit::initializeCudaContext();
 
@@ -1477,10 +1473,6 @@ std::tuple<NvrtcFunction, std::string, std::vector<char>> getCompiledKernel(
     object_code.clear();
   }
 
-  TORCH_CHECK(
-      !isOptionDisabled(DisableOption::ArchCheck),
-      "NVFuser Compile: arch check disabled, should not return any compiled kernel");
-
   return {compiled_kernel, log.str(), object_code};
 }
 

diff --git a/csrc/lower_validation.cpp b/csrc/lower_validation.cpp
@@ -844,21 +844,6 @@ void validatePartialSplit(Fusion* fusion) {
 
 namespace {
 
-//! Utility to make sure targeted gpu capability is
-//!  higher than provided major.minor.
-void validateMinimumArch(int major, int minor) {
-  // Skip checking arch if disabled.
-  if (isOptionDisabled(DisableOption::ArchCheck)) {
-    return;
-  }
-
-  auto prop = at::cuda::getCurrentDeviceProperties();
-  TORCH_INTERNAL_ASSERT(prop->major >= major);
-  if (prop->major == major) {
-    TORCH_INTERNAL_ASSERT(prop->minor >= minor);
-  }
-}
-
 //! Validates that the operand and result tensors
 //!  of mma ops are swizzled and also validates
 //!  specialization of tidx as lane id.
@@ -1024,12 +1009,10 @@ void validateArchMemoryOp(LoadStoreOp* ldst) {
   switch (ldst->opType()) {
     case LoadStoreOpType::LdMatrix:
     case LoadStoreOpType::LdMatrixTranspose:
-      validateMinimumArch(7, 5);
       validateLdMatrixOutput(ldst->out()->as<TensorView>());
       return;
     case LoadStoreOpType::CpAsyncCg:
     case LoadStoreOpType::CpAsyncCa:
-      validateMinimumArch(8, 0);
       return;
     default:
       return;
@@ -1049,21 +1032,16 @@ void validateMma(Fusion* fusion) {
 
       switch (mma->options().macro) {
         case MmaOptions::MacroType::Volta_16_16_4:
-          validateMinimumArch(7, 0);
           break;
         case MmaOptions::MacroType::Turing_16_8_16:
         case MmaOptions::MacroType::Turing_16_16_16:
-          validateMinimumArch(7, 5);
-
           // Check that operands come from ldmatrix, can be
           //  relaxed once swizzles can be labeled on iterdomains.
           validateTuringMmaInput(mma->inA()->as<TensorView>());
           validateTuringMmaInput(mma->inB()->as<TensorView>());
           break;
         case MmaOptions::MacroType::Ampere_16_8_16:
         case MmaOptions::MacroType::Ampere_16_16_16:
-          validateMinimumArch(8, 0);
-
           // Check that operands come from ldmatrix, can be
           //  relaxed once swizzles can be labeled on iterdomains.
           validateTuringMmaInput(mma->inA()->as<TensorView>());

diff --git a/csrc/utils.cpp b/csrc/utils.cpp
@@ -150,7 +150,6 @@ const auto& getDebugDumpOptions() {
 
 auto parseDisableOptions() {
   const std::unordered_map<std::string, DisableOption> available_options = {
-      {"arch_check", DisableOption::ArchCheck},
       {"compile_to_sass", DisableOption::CompileToSass},
       {"fallback", DisableOption::Fallback},
       {"fma", DisableOption::Fma},

diff --git a/csrc/utils.h b/csrc/utils.h
@@ -94,7 +94,6 @@ TORCH_CUDA_CU_API const std::vector<std::string>& getDebugDumpArguments(
 //! These can be set through the `PYTORCH_NVFUSER_DISABLE` environment variable
 //!
 enum class DisableOption {
-  ArchCheck, //! Disable hardware-specific checks to enable cross arch debug
   CompileToSass, //! Disable direct compilation to sass so the ptx can be
                  //! examined
   Fallback, //! Disable fallback