Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions auto_round/export/export_to_autogptq/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ def pack_layer(name, model, backend, device=None):
zero = int(zero.flatten()[0])
else:
layer, scale, zero = layer.to("cpu"), scale.to("cpu"), zero
if isinstance(zero, torch.Tensor) and zero.dtype == torch.bfloat16:
zero = zero.float()
sig = inspect.signature(qlayer.pack)
param_count = len(sig.parameters)
if param_count == 2:
Expand Down
3 changes: 1 addition & 2 deletions auto_round/utils/missing_tensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,7 @@ def _is_truly_missing(name: str) -> bool:
parent_summary = compress_layer_names(list({name.rsplit(".", 1)[0] for name in missing_tensor_names}))
logger.info(
f"Found {len(missing_tensor_names)} tensor(s) in the source checkpoint that are "
f"absent from the saved output (e.g., MTP parameters). Copying them now...\n"
f" Layers: {parent_summary}"
f"absent from the saved output (e.g., MTP parameters): {parent_summary}. Copying them now...\n"
)

# ------------------------------------------------------------------ #
Expand Down
2 changes: 1 addition & 1 deletion test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
try:
Version(_gguf_mod.__version__)
except Exception:
_gguf_mod.__version__ = "0.0.0"
_gguf_mod.__version__ = "0.10.0"
Comment thread
xin3he marked this conversation as resolved.
except ImportError:
pass

Expand Down
6 changes: 4 additions & 2 deletions test/test_cpu/integrations/test_llmc_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
w8a8_dynamic_recipe_modifier = AutoRoundModifier(
ignore=["lm_head"],
iters=0,
enable_torch_compile=False,
config_groups={
"group_0": QuantizationScheme(
targets=["Linear"],
Expand All @@ -66,6 +67,7 @@
w8a8_static_recipe_modifier = AutoRoundModifier(
ignore=["lm_head"],
iters=0,
enable_torch_compile=False,
config_groups={
"group_0": QuantizationScheme(
targets=["Linear"],
Expand Down Expand Up @@ -192,9 +194,9 @@ def test_oneshot_with_device_ids(tiny_tiny_llama_model_path, tmp_path):
"recipe",
[w8a8_dynamic_recipe_modifier, w8a8_static_recipe_modifier],
)
def test_rtn_oneshot(recipe, tmp_path):
def test_rtn_oneshot(recipe, tmp_path, tiny_tiny_llama_model_path):
output = tmp_path / "oneshot_output"
model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model = tiny_tiny_llama_model_path
tokenizer = AutoTokenizer.from_pretrained(model)
dataset = get_dataset(
tokenizer=tokenizer,
Expand Down
4 changes: 2 additions & 2 deletions test/test_cuda/algorithms/test_auto_scheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,13 +263,13 @@ def test_auto_scheme_export(self):
model_name = get_model_path("facebook/opt-125m")
scheme = AutoScheme(avg_bits=3, options=("W2A16", "W4A16", "W8A16", "BF16"))
ar = AutoRound(model=model_name, scheme=scheme)
ar.quantize()
ar.quantize_and_save(output_dir=self.save_dir)
evaluate_accuracy(self.save_dir, threshold=0.25)

@pytest.mark.skip_ci(reason="The evaluation is time-consuming")
def test_enable_torch_compile(self):
model_name = get_model_path("facebook/opt-125m")
scheme = AutoScheme(avg_bits=2, options=("W2A16"), ignore_scale_zp_bits=True)
ar = AutoRound(model=model_name, scheme=scheme, enable_torch_compile=True)
ar.quantize()
ar.quantize_and_save(output_dir=self.save_dir)
evaluate_accuracy(self.save_dir, threshold=0.10)
5 changes: 3 additions & 2 deletions test/test_cuda/export/test_auto_gptq_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from auto_round import AutoRound

from ...envs import require_optimum
from ...envs import require_gptqmodel
from ...helpers import eval_generated_prompt, get_model_path, get_tiny_model, transformers_version


Expand All @@ -33,7 +33,7 @@ def _save_dir(self, tmp_path):
yield
shutil.rmtree(self.save_dir, ignore_errors=True)

@require_optimum
@require_gptqmodel
def test_autogptq_format(self, tiny_opt_model_path):
bits, group_size, sym = 4, 128, False
autoround = AutoRound(
Expand All @@ -52,6 +52,7 @@ def test_autogptq_format(self, tiny_opt_model_path):
assert model is not None, "Loaded model should not be None."

@pytest.mark.skip_ci(reason="Only tiny model is suggested") # skip this test in CI
@require_gptqmodel
def test_autogptq_format_qsave_ignore_layers(self):
model = AutoModelForCausalLM.from_pretrained(get_model_path("facebook/opt-125m"))

Expand Down
3 changes: 3 additions & 0 deletions test/test_cuda/models/test_support_vlms.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def test_qwen2(self):
shutil.rmtree(quantized_model_path, ignore_errors=True)

@require_vlm_env
# This model is not updated for new transformers. https://huggingface.co/microsoft/Phi-3.5-vision-instruct/discussions/39
@require_package_version_ut("transformers", "<4.54.0")
def test_phi3(self):
model_path = get_model_path("microsoft/Phi-3.5-vision-instruct")
Expand Down Expand Up @@ -135,6 +136,8 @@ def test_phi3(self):
shutil.rmtree(quantized_model_path, ignore_errors=True)

@require_vlm_env
# This model is not updated for new transformers. https://huggingface.co/microsoft/Phi-3.5-vision-instruct/discussions/39
@require_package_version_ut("transformers", "<4.54.0")
def test_phi3_vision_awq(self):
model_path = get_model_path("microsoft/Phi-3.5-vision-instruct")
## test tune
Expand Down
Loading