diff --git a/colossalai/kernel/__init__.py b/colossalai/kernel/__init__.py index a99cb497c3e7..8933fc0a3c2f 100644 --- a/colossalai/kernel/__init__.py +++ b/colossalai/kernel/__init__.py @@ -1,14 +1,7 @@ from .cuda_native import FusedScaleMaskSoftmax, LayerNorm, MultiHeadAttention -from .triton import llama_context_attn_fwd, bloom_context_attn_fwd -from .triton import softmax -from .triton import copy_kv_cache_to_dest __all__ = [ "LayerNorm", "FusedScaleMaskSoftmax", "MultiHeadAttention", - "llama_context_attn_fwd", - "bloom_context_attn_fwd", - "softmax", - "copy_kv_cache_to_dest", ] diff --git a/colossalai/kernel/triton/__init__.py b/colossalai/kernel/triton/__init__.py index eb0335c01ce2..e69de29bb2d1 100644 --- a/colossalai/kernel/triton/__init__.py +++ b/colossalai/kernel/triton/__init__.py @@ -1,5 +0,0 @@ -from .context_attention import bloom_context_attn_fwd, llama_context_attn_fwd -from .copy_kv_cache_dest import copy_kv_cache_to_dest -from .fused_layernorm import layer_norm -from .rms_norm import rmsnorm_forward -from .softmax import softmax diff --git a/tests/test_infer_ops/triton/test_llama_context_attention.py b/tests/test_infer_ops/triton/test_llama_context_attention.py index 4ea6095d4109..0ff06b5007e4 100644 --- a/tests/test_infer_ops/triton/test_llama_context_attention.py +++ b/tests/test_infer_ops/triton/test_llama_context_attention.py @@ -10,7 +10,7 @@ import triton import triton.language as tl - from colossalai.kernel.triton import llama_context_attn_fwd + from colossalai.kernel.triton.context_attention import llama_context_attn_fwd from tests.test_infer_ops.triton.kernel_utils import torch_context_attention HAS_TRITON = True except ImportError: