From 62438270a4528ae450b932994712a31248e83295 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 24 Aug 2023 08:14:49 +0000 Subject: [PATCH 1/2] add correct installation of GPTQ library --- docker/transformers-all-latest-gpu/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile index c96b4cc79b3b..a6c672e1a9df 100644 --- a/docker/transformers-all-latest-gpu/Dockerfile +++ b/docker/transformers-all-latest-gpu/Dockerfile @@ -50,7 +50,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef RUN python3 -m pip install --no-cache-dir bitsandbytes # Add auto-gptq for gtpq quantization testing -RUN python3 -m pip install --no-cache-dir auto-gptq +RUN python3 -m pip install --no-cache-dir auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ # Add einops for additional model testing RUN python3 -m pip install --no-cache-dir einops From e88e9876ba83dbcc2f1c67e43de993016190618a Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 24 Aug 2023 08:27:48 +0000 Subject: [PATCH 2/2] update tests values --- tests/quantization/gptq/test_gptq.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/quantization/gptq/test_gptq.py b/tests/quantization/gptq/test_gptq.py index 257c6f020dd3..c7530471fa27 100644 --- a/tests/quantization/gptq/test_gptq.py +++ b/tests/quantization/gptq/test_gptq.py @@ -87,7 +87,8 @@ class GPTQTest(unittest.TestCase): EXPECTED_OUTPUTS = set() EXPECTED_OUTPUTS.add("Hello my name is John and I am a professional photographer. I") EXPECTED_OUTPUTS.add("Hello my name is John and I am a very good looking man.") - EXPECTED_OUTPUTS.add("Hello my name is Alyson and I am a professional photographer") + EXPECTED_OUTPUTS.add("Hello my name is Alyson, I am a student in the") + EXPECTED_OUTPUTS.add("Hello my name is Alyson and I am a very sweet,") # this seems a little small considering that we are doing 4bit quant but we have a small model and ww don't quantize the embeddings EXPECTED_RELATIVE_DIFFERENCE = 1.664253062 @@ -215,7 +216,7 @@ def test_change_loading_attributes(self): self.assertEqual(self.quantized_model.config.quantization_config.disable_exllama, True) # we need to put it directly to the gpu. Otherwise, we won't be able to initialize the exllama kernel quantized_model_from_saved = AutoModelForCausalLM.from_pretrained( - tmpdirname, quantization_config=GPTQConfig(disable_exllama=False, bits=6), device_map={"": 0} + tmpdirname, quantization_config=GPTQConfig(disable_exllama=False, bits=4), device_map={"": 0} ) self.assertEqual(quantized_model_from_saved.config.quantization_config.disable_exllama, False) self.assertEqual(quantized_model_from_saved.config.quantization_config.bits, self.bits)