From 3d88e4afd90fa18ec5b0c851ee1a67d81d23c32c Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Mon, 8 May 2023 05:09:11 +0900 Subject: [PATCH] add NDEBUG option to CUTLASS compile to speed up attention kernel --- python/tvm/contrib/cutlass/build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/tvm/contrib/cutlass/build.py b/python/tvm/contrib/cutlass/build.py index 363548fb2ba0..3e5cda53d95a 100644 --- a/python/tvm/contrib/cutlass/build.py +++ b/python/tvm/contrib/cutlass/build.py @@ -59,6 +59,7 @@ def _get_cutlass_compile_options(sm, threads, use_fast_math=False): "-c", "-DCUTLASS_ENABLE_TENSOR_CORE_MMA=1", "-gencode=arch=compute_%d,code=[sm_%d,compute_%d]" % (sm, sm, sm), + "-DNDEBUG", "-Xcompiler=-fPIC", "-Xcompiler=-Wconversion", "-Xcompiler=-fno-strict-aliasing",