Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,34 +1,34 @@
repos:

- repo: https://github.com/PyCQA/autoflake
rev: v2.2.1
rev: v2.3.1
hooks:
- id: autoflake
name: autoflake (python)
args: ['--in-place', '--remove-unused-variables', '--remove-all-unused-imports', '--ignore-init-module-imports']

- repo: https://github.com/pycqa/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort
name: sort all imports (python)

- repo: https://github.com/psf/black-pre-commit-mirror
rev: 23.9.1
rev: 24.4.2
hooks:
- id: black
name: black formatter
args: ['--line-length=120', '--target-version=py37', '--target-version=py38', '--target-version=py39','--target-version=py310']

- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v13.0.1
rev: v18.1.7
hooks:
- id: clang-format
name: clang formatter
types_or: [c++, c]

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
rev: v4.6.0
hooks:
- id: check-yaml
- id: check-merge-conflict
Expand Down
16 changes: 10 additions & 6 deletions applications/ColossalChat/coati/dataset/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,19 @@ def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[str, torch

# `List[torch.Tensor]`
batch_input_ids = [
torch.LongTensor(instance["input_ids"][: self.max_length])
if len(instance["input_ids"]) > self.max_length
else torch.LongTensor(instance["input_ids"])
(
torch.LongTensor(instance["input_ids"][: self.max_length])
if len(instance["input_ids"]) > self.max_length
else torch.LongTensor(instance["input_ids"])
)
for instance in instances
]
batch_labels = [
torch.LongTensor(instance["labels"][: self.max_length])
if len(instance["labels"]) > self.max_length
else torch.LongTensor(instance["labels"])
(
torch.LongTensor(instance["labels"][: self.max_length])
if len(instance["labels"]) > self.max_length
else torch.LongTensor(instance["labels"])
)
for instance in instances
]
if self.tokenizer.padding_side == "right":
Expand Down
1 change: 1 addition & 0 deletions applications/ColossalChat/coati/models/loss.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
loss functions
"""

from typing import Optional, Tuple

import torch
Expand Down
1 change: 1 addition & 0 deletions applications/ColossalChat/coati/models/reward_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
reward model
"""

from typing import Optional

import torch
Expand Down
1 change: 1 addition & 0 deletions applications/ColossalChat/coati/trainer/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Training utilities for Coati.
"""

from typing import Any

import torch
Expand Down
14 changes: 12 additions & 2 deletions applications/ColossalEval/colossal_eval/dataset/agieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ def get_prompt(line: Dict, dataset_name: str, logger: DistributedLogger) -> Dict
option_string = "ABCDEFG"
count = len(line["options"])

input = "问题:" + line["question"] + " " + "从以下选项中选择:" + " ".join(line["options"]) + "\n" + "答案:"
input = (
"问题:" + line["question"] + " " + "从以下选项中选择:" + " ".join(line["options"]) + "\n" + "答案:"
)

all_classes = list(option_string[0:count])

Expand Down Expand Up @@ -150,7 +152,15 @@ def combine_prompt(prompt_path, dataset_name, load_explanation=True, chat_mode=F
)
elif dataset_name in chinese_qa_datasets:
question_input = (
"问题:" + passage + " " + question + "\n" + "从以下选项中选择:" + " ".join(options) + "\n" + "答案:{}".format(label)
"问题:"
+ passage
+ " "
+ question
+ "\n"
+ "从以下选项中选择:"
+ " ".join(options)
+ "\n"
+ "答案:{}".format(label)
)
elif dataset_name in english_cloze_datasets:
question_input = "Question: ".format(idx + 1) + question + "\n" + "Answer: {}".format(answer)
Expand Down
6 changes: 5 additions & 1 deletion applications/ColossalEval/colossal_eval/dataset/ceval.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,11 @@
"urban_and_rural_planner": ["Urban and Rural Planner", "注册城乡规划师", "Other"],
"accountant": ["Accountant", "注册会计师", "Other"],
"fire_engineer": ["Fire Engineer", "注册消防工程师", "Other"],
"environmental_impact_assessment_engineer": ["Environmental Impact Assessment Engineer", "环境影响评价工程师", "Other"],
"environmental_impact_assessment_engineer": [
"Environmental Impact Assessment Engineer",
"环境影响评价工程师",
"Other",
],
"tax_accountant": ["Tax Accountant", "税务师", "Other"],
"physician": ["Physician", "医师资格", "Other"],
}
Expand Down
8 changes: 5 additions & 3 deletions applications/ColossalEval/colossal_eval/dataset/mtbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,11 @@ def load(path: str, logger: DistributedLogger, few_shot: bool) -> List[Dict]:
"instruction": question["turns"],
"input": "",
"output": [],
"target": [""] * turn_number
if question["question_id"] not in reference
else reference[question["question_id"]],
"target": (
[""] * turn_number
if question["question_id"] not in reference
else reference[question["question_id"]]
),
}

if category in dataset["test"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ def _get_choices_indices(self, language: str):
self.indices_for_choices[0].append(
self.tokenizer(f"Answer: {choice}", add_special_tokens=False).input_ids[-1]
)
self.indices_for_choices[1].append(self.tokenizer(f"答案:{choice}", add_special_tokens=False).input_ids[-1])
self.indices_for_choices[1].append(
self.tokenizer(f"答案:{choice}", add_special_tokens=False).input_ids[-1]
)

def _load_tokenizer(self, path: str, tokenizer_path: Optional[str], tokenizer_kwargs: dict):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
https://github.com/langchain-ai/langchain
The original code is licensed under the MIT license.
"""

from __future__ import annotations

import copy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
https://github.com/langchain-ai/langchain
The original code is licensed under the MIT license.
"""

import copy
from typing import Any, Mapping, Optional, Protocol

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
https://github.com/langchain-ai/langchain
The original code is licensed under the MIT license.
"""

import copy
from typing import Any, List

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Class for loading table type data. please refer to Pandas-Input/Output for file format details.
"""


import glob
import os

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
print(resp) # super-heavyweight awesome-natured yawning Australian creature!

"""

import json
from typing import Any, Mapping

Expand Down
1 change: 1 addition & 0 deletions applications/ColossalQA/colossalqa/local/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
logger.info(llm(TEST_PROMPT_CHATGLM, max_new_tokens=100), verbose=True)

"""

from typing import Any, List, Mapping, Optional

import torch
Expand Down
1 change: 1 addition & 0 deletions applications/ColossalQA/colossalqa/local/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Generation utilities
"""

import json
from typing import List

Expand Down
1 change: 1 addition & 0 deletions applications/ColossalQA/colossalqa/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Implement a memory class for storing conversation history
Support long term and short term memory
"""

from typing import Any, Dict, List

from colossalqa.chain.memory.summary import ConversationSummaryMemory
Expand Down
1 change: 1 addition & 0 deletions applications/ColossalQA/colossalqa/mylogging.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Class for logging with extra control for debugging
"""

import logging


Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Script for Chinese retrieval based conversation system backed by ChatGLM
"""

from typing import Tuple

from colossalqa.chain.retrieval_qa.base import RetrievalQA
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Multilingual retrieval based conversation system
"""

from typing import List

from colossalqa.data_loader.document_loader import DocumentLoader
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Script for Chinese retrieval based conversation system backed by ChatGLM
"""

from typing import Tuple

from colossalqa.chain.retrieval_qa.base import RetrievalQA
Expand Down
1 change: 1 addition & 0 deletions applications/ColossalQA/colossalqa/retriever.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Code for custom retriver with incremental update
"""

import copy
import hashlib
import os
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Code for Chinese text splitter
"""

from typing import Any, List, Optional

from colossalqa.text_splitter.utils import get_cleaned_paragraph
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Script for English retrieval based conversation system backed by LLaMa2
"""

import argparse
import os

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Script for English retrieval based conversation system backed by LLaMa2
"""

import argparse
import json
import os
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Script for Chinese retrieval based conversation system backed by ChatGLM
"""

import argparse
import os

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Script for English retrieval based conversation system backed by LLaMa2
"""

import argparse
import os

Expand Down
20 changes: 11 additions & 9 deletions colossalai/auto_parallel/meta_profiler/meta_registry/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,20 +107,22 @@ def convnd_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycleItem, L
# NOTE: currently in SPMD solver we always believe that there will be a new tensor created in forward
fwd_memory_cost = MemoryCost(
activation=compute_size_in_bytes([input_tensor, output_tensor]),
parameter=compute_size_in_bytes([weight_tensor, bias_tensor])
if has_bias
else compute_size_in_bytes(weight_tensor),
parameter=(
compute_size_in_bytes([weight_tensor, bias_tensor]) if has_bias else compute_size_in_bytes(weight_tensor)
),
temp=0,
buffer=0,
)

bwd_memory_cost = MemoryCost(
activation=compute_size_in_bytes([input_tensor, weight_tensor, bias_tensor])
if has_bias
else compute_size_in_bytes([input_tensor, weight_tensor]),
parameter=compute_size_in_bytes([weight_tensor, bias_tensor])
if has_bias
else compute_size_in_bytes(weight_tensor),
activation=(
compute_size_in_bytes([input_tensor, weight_tensor, bias_tensor])
if has_bias
else compute_size_in_bytes([input_tensor, weight_tensor])
),
parameter=(
compute_size_in_bytes([weight_tensor, bias_tensor]) if has_bias else compute_size_in_bytes(weight_tensor)
),
temp=0,
buffer=0,
)
Expand Down
12 changes: 6 additions & 6 deletions colossalai/inference/batch_bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,16 +247,16 @@ def add_seqs(
self._sequences_dict[seq.request_id] = seq
self._sequences_indexes[seq.request_id] = self._current_batch_size + i
# TODO external (rename): modify Sequence.sentence_len to seq_len
self._sequence_lengths[
self._current_batch_size : self._current_batch_size + num_seqs_to_add
] = torch.tensor([seq.sentence_len for seq in seqs[:num_seqs_to_add]], dtype=torch.int32)
self._sequence_lengths[self._current_batch_size : self._current_batch_size + num_seqs_to_add] = (
torch.tensor([seq.sentence_len for seq in seqs[:num_seqs_to_add]], dtype=torch.int32)
)
# NOTE block tables to be updated by kvcache manager
block_tables = self._block_tables[self._current_batch_size : self._current_batch_size + num_seqs_to_add]
if alloc_block_tables is not None:
# copy block ids from provided block tables
self._block_tables[
self._current_batch_size : self._current_batch_size + num_seqs_to_add
] = alloc_block_tables
self._block_tables[self._current_batch_size : self._current_batch_size + num_seqs_to_add] = (
alloc_block_tables
)
elif alloc_block_tables_fn:
alloc_block_tables_fn(
block_tables,
Expand Down
Loading