diff --git a/colossalai/shardformer/modeling/t5.py b/colossalai/shardformer/modeling/t5.py index 81fb6d28ad05..a23b24e6f571 100644 --- a/colossalai/shardformer/modeling/t5.py +++ b/colossalai/shardformer/modeling/t5.py @@ -30,9 +30,6 @@ def t5_flash_attention_forward( # Input is (batch_size, seq_length, dim) # Mask is (batch_size, key_length) (non-causal) or (batch_size, key_length, key_length) # past_key_value[0] is (batch_size, n_heads, q_len - 1, dim_per_head) - import pathlib - pathlib.Path("/home/lcjmy/code/personal/ColossalAI/colossalai/shardformer/modeling/mask.txt").write_text( - str(mask) + str(mask.shape)) batch_size, seq_length = hidden_states.shape[:2] real_seq_length = seq_length