From 9d8021494fc546339c2dd57e0b1cd59c6db22cef Mon Sep 17 00:00:00 2001
From: yuanheng-zhao <jonathan.zhaoyh@gmail.com>
Date: Thu, 31 Aug 2023 09:41:20 +0800
Subject: [PATCH] (nfc) fix docstring

---
 .../tensor_parallel/kvcache_manager.py        | 14 ----------
 .../tensor_parallel/modeling/bloom.py         | 27 +++++--------------
 2 files changed, 6 insertions(+), 35 deletions(-)

diff --git a/colossalai/inference/tensor_parallel/kvcache_manager.py b/colossalai/inference/tensor_parallel/kvcache_manager.py
index 8f8c40a20890..2ddb6c5cdb35 100644
--- a/colossalai/inference/tensor_parallel/kvcache_manager.py
+++ b/colossalai/inference/tensor_parallel/kvcache_manager.py
@@ -1,20 +1,6 @@
 # Adapted from lightllm/common/mem_manager.py
 # of the ModelTC/lightllm GitHub repository
 # https://github.com/ModelTC/lightllm/blob/050af3ce65edca617e2f30ec2479397d5bb248c9/lightllm/common/mem_manager.py
-#
-# Copyright 2023 ModelTC Team
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 
 import torch
 
diff --git a/colossalai/inference/tensor_parallel/modeling/bloom.py b/colossalai/inference/tensor_parallel/modeling/bloom.py
index 1a5dbf4b5a1b..0fd08d3721e6 100644
--- a/colossalai/inference/tensor_parallel/modeling/bloom.py
+++ b/colossalai/inference/tensor_parallel/modeling/bloom.py
@@ -4,7 +4,7 @@
 
 import torch
 import torch.distributed as dist
-from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+from torch.nn import CrossEntropyLoss
 from torch.nn import functional as F
 from transformers.models.bloom.modeling_bloom import (
     BaseModelOutputWithPastAndCrossAttentions,
@@ -30,21 +30,6 @@ def generate_alibi(n_head, dtype=torch.float16):
     This method is originally the `build_alibi_tensor` function
     in `transformers/models/bloom/modeling_bloom.py`
     of the huggingface/transformers GitHub repository.
-
-    Copyright 2023 ModelTC Team
-    Copyright 2022 HuggingFace Inc. team and BigScience workshop
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
     """
 
     def get_slopes_power_of_2(n):
@@ -67,7 +52,11 @@ def get_slopes(n):
 
 class BloomInferenceForwards:
     """
-    This class serves a micro library for bloom inference forwards
+    This class serves a micro library for bloom inference forwards.
+    We intend to replace the forward methods for BloomForCausalLM, BloomModel, BloomBlock, and BloomAttention,
+    as well as prepare_inputs_for_generation method for BloomForCausalLM.
+    For future improvement, we might want to skip replacing methods for BloomForCausalLM,
+    and call BloomModel.forward iteratively in TpInferEngine
     """
 
     @staticmethod
@@ -372,8 +361,6 @@ def bloom_for_causal_lm_prepare_inputs_for_generation(
         })
         return model_inputs
 
-    # replace decoder layer forward:
-    #   used to replace BloomBlock.forward
     @staticmethod
     def bloom_block_forward(
         self: BloomBlock,
@@ -432,8 +419,6 @@ def bloom_block_forward(
 
         return outputs    # hidden_states, present, attentions
 
-    # replace attention forward:
-    #   used to replace BloomAttention.forward
     @staticmethod
     def bloom_attention_forward(
         self: BloomAttention,