hpcaitech · tiandiao123 · Aug 28, 2023 · Aug 22, 2023 · Aug 22, 2023 · Aug 22, 2023
diff --git a/colossalai/kernel/__init__.py b/colossalai/kernel/__init__.py
@@ -1,7 +1,14 @@
 from .cuda_native import FusedScaleMaskSoftmax, LayerNorm, MultiHeadAttention
+from .triton import llama_context_attn_fwd, bloom_context_attn_fwd
+from .triton import softmax
+from .triton import copy_kv_cache_to_dest
 
 __all__ = [
     "LayerNorm",
     "FusedScaleMaskSoftmax",
     "MultiHeadAttention",
+    "llama_context_attn_fwd",
+    "bloom_context_attn_fwd",
+    "softmax",
+    "copy_kv_cache_to_dest",
 ]
diff --git a/colossalai/kernel/triton/__init__.py b/colossalai/kernel/triton/__init__.py
@@ -0,0 +1,3 @@
+from .context_attention import llama_context_attn_fwd, bloom_context_attn_fwd
+from .softmax import softmax
+from .copy_kv_cache_dest import copy_kv_cache_to_dest
diff --git a/tests/test_infer_ops/triton/test_bloom_context_attention.py b/tests/test_infer_ops/triton/test_bloom_context_attention.py
@@ -9,8 +9,8 @@
 try:
     import triton
     import triton.language as tl
-    from tests.test_kernels.triton.utils import benchmark, torch_context_attention
-    from colossalai.kernel.triton.context_attention import bloom_context_attn_fwd
+    from tests.test_infer_ops.triton.utils import benchmark, torch_context_attention
+    from colossalai.kernel.triton import bloom_context_attn_fwd
     HAS_TRITON = True
 except ImportError:
     HAS_TRITON = False

diff --git a/tests/test_infer_ops/triton/test_llama_context_attention.py b/tests/test_infer_ops/triton/test_llama_context_attention.py
@@ -9,8 +9,8 @@
 try:
     import triton
     import triton.language as tl
-    from tests.test_kernels.triton.utils import benchmark, torch_context_attention
-    from colossalai.kernel.triton.context_attention import llama_context_attn_fwd
+    from tests.test_infer_ops.triton.utils import benchmark, torch_context_attention
+    from colossalai.kernel.triton import llama_context_attn_fwd
     HAS_TRITON = True
 except ImportError:
     HAS_TRITON = False