From cbdf0f1939a95f1da6cd25c4ac8cc0ef76c8ad89 Mon Sep 17 00:00:00 2001 From: Yuanheng Date: Thu, 28 Mar 2024 15:30:27 +0800 Subject: [PATCH 1/2] [fix] use tokenizer from the same pretrained path --- examples/language/grok-1/inference.py | 9 ++++----- examples/language/grok-1/inference_tp.py | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/examples/language/grok-1/inference.py b/examples/language/grok-1/inference.py index a73820af90bb..2073b813f417 100644 --- a/examples/language/grok-1/inference.py +++ b/examples/language/grok-1/inference.py @@ -1,7 +1,7 @@ import time import torch -from transformers import AutoModelForCausalLM, LlamaTokenizerFast +from transformers import AutoModelForCausalLM, AutoTokenizer from utils import get_defualt_parser, inference, print_output if __name__ == "__main__": @@ -9,6 +9,9 @@ args = parser.parse_args() start = time.time() torch.set_default_dtype(torch.bfloat16) + + tokenizer = AutoTokenizer.from_pretrained(args.pretrained) + model = AutoModelForCausalLM.from_pretrained( args.pretrained, trust_remote_code=True, @@ -18,10 +21,6 @@ model.eval() init_time = time.time() - start - # A transformers-compatible version of the grok-1 tokenizer by Xenova - # https://huggingface.co/Xenova/grok-1-tokenizer - tokenizer = LlamaTokenizerFast.from_pretrained("Xenova/grok-1-tokenizer") - for text in args.text: output = inference( model, diff --git a/examples/language/grok-1/inference_tp.py b/examples/language/grok-1/inference_tp.py index 604de14877f5..13423b3f68db 100644 --- a/examples/language/grok-1/inference_tp.py +++ b/examples/language/grok-1/inference_tp.py @@ -2,7 +2,7 @@ import torch from grok1_policy import Grok1ForCausalLMPolicy -from transformers import AutoModelForCausalLM, LlamaTokenizerFast +from transformers import AutoModelForCausalLM, AutoTokenizer from utils import get_defualt_parser, inference, print_output import colossalai @@ -27,6 +27,9 @@ ) booster = Booster(plugin=plugin) torch.set_default_dtype(torch.bfloat16) + + tokenizer = AutoTokenizer.from_pretrained(args.pretrained) + with LazyInitContext(default_device=get_current_device()): model = AutoModelForCausalLM.from_pretrained( args.pretrained, trust_remote_code=True, torch_dtype=torch.bfloat16 @@ -35,10 +38,6 @@ model.eval() init_time = time.time() - start - # A transformers-compatible version of the grok-1 tokenizer by Xenova - # https://huggingface.co/Xenova/grok-1-tokenizer - tokenizer = LlamaTokenizerFast.from_pretrained("Xenova/grok-1-tokenizer") - for text in args.text: output = inference( model.unwrap(), From 61922c2c6d6d0188e1b1296cf0d454bc7e6ad424 Mon Sep 17 00:00:00 2001 From: Yuanheng Date: Thu, 28 Mar 2024 16:12:29 +0800 Subject: [PATCH 2/2] trust remote code --- examples/language/grok-1/inference.py | 2 +- examples/language/grok-1/inference_tp.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/language/grok-1/inference.py b/examples/language/grok-1/inference.py index 2073b813f417..faef7ae9d7ca 100644 --- a/examples/language/grok-1/inference.py +++ b/examples/language/grok-1/inference.py @@ -10,7 +10,7 @@ start = time.time() torch.set_default_dtype(torch.bfloat16) - tokenizer = AutoTokenizer.from_pretrained(args.pretrained) + tokenizer = AutoTokenizer.from_pretrained(args.pretrained, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( args.pretrained, diff --git a/examples/language/grok-1/inference_tp.py b/examples/language/grok-1/inference_tp.py index 13423b3f68db..cf05880dc21d 100644 --- a/examples/language/grok-1/inference_tp.py +++ b/examples/language/grok-1/inference_tp.py @@ -28,7 +28,7 @@ booster = Booster(plugin=plugin) torch.set_default_dtype(torch.bfloat16) - tokenizer = AutoTokenizer.from_pretrained(args.pretrained) + tokenizer = AutoTokenizer.from_pretrained(args.pretrained, trust_remote_code=True) with LazyInitContext(default_device=get_current_device()): model = AutoModelForCausalLM.from_pretrained(