Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions QEfficient/transformers/models/gemma3/configs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# -----------------------------------------------------------------------------
#
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
# SPDX-License-Identifier: BSD-3-Clause
#
# -----------------------------------------------------------------------------

Large diffs are not rendered by default.

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions QEfficient/transformers/models/gemma3/modeling_gemma3.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,14 @@ def forward(
logits = logits.float()
return logits, pixel_values, image_idx, outputs.past_key_values

def get_npi_file(self, model_name: str) -> str:
if constants.NPI_MAPPING[model_name] is not None:
return constants.NPI_MAPPING[model_name]
else:
raise ValueError(
f"For Model {self.pretrained_model_name_or_path} default NPI file is not supported/added for this particular model. Please use one of the following: google/gemma-3-4b-it, google/gemma-3-27b-it"
)

def get_specializations(
self,
batch_size: int,
Expand Down
10 changes: 7 additions & 3 deletions QEfficient/transformers/models/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -1191,7 +1191,6 @@ def compile(
compiler_options.pop("continuous_batching", None)
compiler_options.pop("kv_cache_batch_size", None)
compiler_options.pop("full_batch_size", None)

if not skip_vision:
self.vision_model._compile(
compile_dir=compile_dir,
Expand All @@ -1207,6 +1206,10 @@ def compile(
**compiler_options,
)

# Custom NPI file options
if hasattr(self.model, "get_npi_file") and "node_precision_info" not in compiler_options:
compiler_options["node_precision_info"] = self.model.get_npi_file(self.model.name_or_path)

if not skip_lang:
custom_io_lang = {}
# Inputs
Expand All @@ -1220,7 +1223,6 @@ def compile(
for output_name in output_names["lang"]:
if output_name.endswith("_RetainedState"):
custom_io_lang[output_name] = "float16" if "vision_embeds" in output_name else kv_cache_dtype

self.lang_model._compile(
compile_dir=compile_dir,
compile_only=True,
Expand Down Expand Up @@ -1817,6 +1819,9 @@ def compile(
**compiler_options,
)

if hasattr(self.model, "get_npi_file") and "node_precision_info" not in compiler_options:
compiler_options["node_precision_info"] = self.model.get_npi_file(self.model.name_or_path)

custom_io = {}
kv_cache_dtype = "mxint8" if mxint8_kv_cache else "float16"
# inputs
Expand All @@ -1835,7 +1840,6 @@ def compile(
compiler_options.pop("continuous_batching", None)
compiler_options.pop("kv_cache_batch_size", None)
compiler_options.pop("full_batch_size", None)

self._compile(
onnx_path=onnx_path,
compile_dir=compile_dir,
Expand Down
9 changes: 9 additions & 0 deletions QEfficient/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@
ONNX_EXPORT_IMAGE_DEPTH = 3
ONNX_EXPORT_CTX_LEN = 1024

NPI_MAPPING = {
"google/gemma-3-4b-it": os.path.join(
QEFF_DIR, "transformers", "models", "gemma3", "configs", "fp32_nodes_gemma3_4b.yaml"
),
"google/gemma-3-27b-it": os.path.join(
QEFF_DIR, "transformers", "models", "gemma3", "configs", "fp32_nodes_gemma3_27b.yaml"
),
}

# Compiler defaults
DEFAULT_AIC_NUM_CORES = 16
DEFAULT_AIC_MXPF6_MATMUL = False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
config = AutoConfig.from_pretrained(model_id)

# For Testing Purpose Only
config.text_config.num_hidden_layers = 1
config.vision_config.num_hidden_layers = 2
# config.text_config.num_hidden_layers = 1
# config.vision_config.num_hidden_layers = 2

tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
processor = AutoProcessor.from_pretrained(model_id)
Expand All @@ -44,7 +44,6 @@
aic_enable_depth_first=True,
skip_vision=True,
mos=1,
node_precision_info="examples/gemma3_example/fp32_nodes_gemma3_4b.yaml", # Change to fp32_nodes_gemma3_27b.yaml for 27B model
)

messages = [
Expand Down Expand Up @@ -80,7 +79,6 @@
mxint8_kv_cache=False,
aic_enable_depth_first=True,
mos=1,
node_precision_info="examples/gemma3_example/fp32_nodes_gemma3_4b.yaml", # Change to fp32_nodes_gemma3_27b.yaml for 27B model
)

### IMAGE + TEXT ###
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ dependencies = [
test = ["pytest","pytest-mock"]
docs = ["Sphinx==7.1.2","sphinx-rtd-theme==2.0.0","myst-parser==3.0.1","sphinx-multiversion"]
quality = ["black", "ruff", "hf_doc_builder@git+https://github.com/huggingface/doc-builder.git"]

[tool.setuptools.package-data]
"QEfficient.transformers.models.gemma3.configs" = ["*.yaml"]

[build-system]
requires = ["setuptools>=62.0.0"]
build-backend = "setuptools.build_meta"
Expand Down
Loading