diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py index 88c2c155b..edbf8b365 100644 --- a/QEfficient/base/modeling_qeff.py +++ b/QEfficient/base/modeling_qeff.py @@ -5,7 +5,6 @@ # # ---------------------------------------------------------------------------- -import hashlib import inspect import json import logging @@ -23,7 +22,7 @@ from QEfficient.base.pytorch_transforms import PytorchTransform from QEfficient.generation.cloud_infer import QAICInferenceSession from QEfficient.utils import constants -from QEfficient.utils.cache import QEFF_HOME, to_hashable +from QEfficient.utils.cache import QEFF_HOME logger = logging.getLogger(__name__) @@ -67,9 +66,13 @@ def __init__(self, model: torch.nn.Module) -> None: @abstractmethod def model_name(self) -> str: ... - @property + @classmethod + @abstractmethod + def model_hash(cls) -> str: ... + + @classmethod @abstractmethod - def model_hash(self) -> str: ... + def compile_hash(cls) -> str: ... @abstractmethod def export(self, export_dir: Optional[str] = None) -> Path: @@ -115,6 +118,7 @@ def _export( example_inputs: Dict[str, torch.Tensor], output_names: List[str], dynamic_axes: Dict[str, Dict[int, str]], + model_hash: str, export_kwargs: Optional[Dict[str, any]] = None, onnx_transform_kwargs: Optional[Dict[str, any]] = None, export_dir: Optional[str] = None, @@ -130,9 +134,9 @@ def _export( :onnx_transform_kwargs (dict): Additional arguments to be passed to `Transform.apply` for this class. :export_dir (str): Specify the export directory. The export_dir will be suffixed with a hash corresponding to current model. """ + onnx_path = self._get_onnx_path(model_hash, export_dir) export_dir = Path(export_dir or (QEFF_HOME / self.model_name)) - export_dir = export_dir.with_name(export_dir.name + "-" + self.model_hash) - onnx_path = export_dir / f"{self.model_name}.onnx" + export_dir = export_dir.with_name(export_dir.name + "-" + model_hash) if onnx_path.is_file(): self.onnx_path = onnx_path return onnx_path @@ -193,8 +197,22 @@ def _export( self.onnx_path = onnx_path return onnx_path + def _get_onnx_path(self, model_hash: str, export_dir: Optional[str] = None): + export_dir = Path(export_dir or (QEFF_HOME / self.model_name)) + export_dir = export_dir.with_name(export_dir.name + "-" + model_hash) + onnx_path = export_dir / f"{self.model_name}.onnx" + return onnx_path + + def _get_qpc_path(self, compile_hash: str, onnx_path: Optional[str] = None, compile_dir: Optional[str] = None): + onnx_path = Path(onnx_path or onnx_path) + compile_dir = Path(compile_dir or onnx_path.parent) + qpc_path = compile_dir / "qpc" + qpc_path = qpc_path.with_name(qpc_path.name + "-" + compile_hash) + return qpc_path + def _compile( self, + compile_hash: str, onnx_path: Optional[str] = None, compile_dir: Optional[str] = None, *, @@ -225,6 +243,14 @@ def _compile( if not onnx_path.is_file(): raise FileNotFoundError(f"ONNX file not found at: {onnx_path}") + qpc_path = qpc_path.with_name(qpc_path.name + "-" + compile_hash) + if qpc_path.is_dir(): + if (qpc_path / "programqpc.bin").is_file(): + self.qpc_path = qpc_path + return qpc_path + # Probably compilation failure last time, delete directory to start over + shutil.rmtree(qpc_path) + command = constants.COMPILER + [f"-m={onnx_path}"] for key, value in compiler_options.items(): option = "-" + key.replace("_", "-") @@ -233,26 +259,6 @@ def _compile( command.append(option) continue command.append(f"{option}={value}") - compile_hash = hashlib.sha256(to_hashable(command)) - - if specializations is not None: - compile_hash.update(to_hashable(specializations)) - - if custom_io is not None: - compile_hash.update(to_hashable(custom_io)) - - if mdp_ts_num_devices > 1: - compile_hash.update(to_hashable({"mdp_ts_num_devices": mdp_ts_num_devices})) - - # Check if already compiled - compile_hash = compile_hash.hexdigest()[:16] - qpc_path = qpc_path.with_name(qpc_path.name + "-" + compile_hash) - if qpc_path.is_dir(): - if (qpc_path / "programqpc.bin").is_file(): - self.qpc_path = qpc_path - return qpc_path - # Probably compilation failure last time, delete directory to start over - shutil.rmtree(qpc_path) # Write specializations.json file if specializations is not None: diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index dcf68aa00..f592f814c 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -7,6 +7,7 @@ import hashlib import logging +import os import warnings from pathlib import Path from typing import Any, List, Optional, Union @@ -70,8 +71,8 @@ def model_name(self) -> str: mname = mname[4:] return mname - @property - def model_hash(self) -> str: + @classmethod + def model_hash(self, model_config) -> str: # NOTE: model_config.to_diff_dict() has "_name_or_path" attribute which is the model card name or path. # Using same card name will result in same hash. But, using a relative path for one run and # absolute path for another run will result in different hash. @@ -80,7 +81,7 @@ def model_hash(self) -> str: # Compute the hash with: model_config, transforms mhash = hashlib.sha256() - mhash.update(to_hashable(self.model.config.to_diff_dict())) + mhash.update(to_hashable(model_config.to_diff_dict())) mhash.update(to_hashable(self._transform_names())) mhash = mhash.hexdigest()[:16] return mhash @@ -159,16 +160,123 @@ def from_pretrained(cls, pretrained_model_name_or_path, continuous_batching: boo self.continuous_batching = continuous_batching return self - @property - def model_hash(self) -> str: + @classmethod + def model_hash(cls, model_config, continuous_batching: bool) -> str: # Compute the hash with: model_config, continuous_batching, transforms mhash = hashlib.sha256() - mhash.update(to_hashable(self.model.config.to_diff_dict())) - mhash.update(to_hashable({"continuous_batching": self.continuous_batching})) - mhash.update(to_hashable(self._transform_names())) + mhash.update(to_hashable(model_config.to_diff_dict())) + mhash.update(to_hashable({"continuous_batching": continuous_batching})) + mhash.update(to_hashable(cls._transform_names())) mhash = mhash.hexdigest()[:16] return mhash + @classmethod + def get_onnx_path(cls, model_config, continuous_batching: bool = False, export_dir: Optional[str] = None) -> str: + mhash = cls.model_hash(model_config, continuous_batching=continuous_batching) + return cls._get_onnx_path(model_hash=mhash, export_dir=export_dir) + + @classmethod + def compile_hash( + cls, + model_config, + num_cores: int, + continuous_batching: bool = False, + export_dir: Optional[str] = None, + prefill_seq_len: int = 32, + ctx_len: int = 128, + batch_size: int = 1, + full_batch_size: Optional[int] = None, + num_devices: int = 1, + mxfp6_matmul: bool = False, + mxint8_kv_cache: bool = False, + **compiler_options, + ): + onnx_path = cls.get_onnx_path(model_config, continuous_batching, export_dir=export_dir) + # Specializations + if cls.continuous_batching: + if full_batch_size is None: + raise TypeError("missing required argument: 'full_batch_size'") + + specializations = [ + {"full_batch_size": full_batch_size, "batch_size": 1, "seq_len": prefill_seq_len, "ctx_len": ctx_len}, + {"full_batch_size": full_batch_size, "batch_size": full_batch_size, "seq_len": 1, "ctx_len": ctx_len}, + ] + else: + specializations = [ + {"batch_size": batch_size, "seq_len": prefill_seq_len, "ctx_len": ctx_len}, + {"batch_size": batch_size, "seq_len": 1, "ctx_len": ctx_len}, + ] + + # Custom IO + custom_io = {} + kv_cache_dtype = "mxint8" if mxint8_kv_cache else "float16" + for suffix in ["", "_RetainedState"]: + for i in range(model_config.num_hidden_layers): + for kv in ["key", "value"]: + custom_io[f"past_{kv}.{i}{suffix}"] = kv_cache_dtype + + compile_hash = hashlib.sha256( + to_hashable( + { + "onnx_path": onnx_path, + "num_cores": num_cores, + "prefill_seq_len": prefill_seq_len, + "ctx_len": ctx_len, + "batch_size": batch_size, + "full_batch_size": full_batch_size, + "mxfp6_matmul": mxfp6_matmul, + "mxint8_kv_cache": mxint8_kv_cache, + **compiler_options, + } + ) + ) + + if specializations is not None: + compile_hash.update(to_hashable(specializations)) + + if custom_io is not None: + compile_hash.update(to_hashable(custom_io)) + + if num_devices > 1: + compile_hash.update(to_hashable({"mdp_ts_num_devices": num_devices})) + + # Check if already compiled + compile_hash = compile_hash.hexdigest()[:16] + return compile_hash + + @classmethod + def get_qpc_path( + cls, + model_config, + num_cores, + continuous_batching: bool = False, + prefill_seq_len: int = 32, + ctx_len: int = 128, + batch_size: Optional[int] = 1, + full_batch_size: Optional[int] = None, + num_devices: int = 1, + mxfp6_matmul: bool = False, + mxint8_kv_cache: bool = False, + onnx_path: Optional[str] = None, + compile_dir: Optional[str] = None, + **compiler_options, + ): + compile_hash = cls.compile_hash( + model_config, + continuous_batching, + num_cores=num_cores, + export_dir=os.path.dirname(onnx_path) if onnx_path else None, + prefill_seq_len=prefill_seq_len, + ctx_len=ctx_len, + batch_size=batch_size, + full_batch_size=full_batch_size, + num_devices=num_devices, + mxfp6_matmul=mxfp6_matmul, + mxint8_kv_cache=mxint8_kv_cache, + **compiler_options, + ) + return cls._get_qpc_path(compile_hash, onnx_path, compile_dir) + def export(self, export_dir: Optional[str] = None) -> str: """ Exports the model to ``ONNX`` format using ``torch.onnx.export``. @@ -220,11 +328,13 @@ def export(self, export_dir: Optional[str] = None) -> str: example_inputs, output_names, dynamic_axes, + model_hash=self.model_hash(model_config=self.model.config, continuous_batching=self.continuous_batching), export_dir=export_dir, ) def compile( self, + num_cores: int, onnx_path: Optional[str] = None, compile_dir: Optional[str] = None, *, @@ -233,7 +343,6 @@ def compile( batch_size: int = 1, full_batch_size: Optional[int] = None, num_devices: int = 1, - num_cores: int = 16, # FIXME: Make this mandatory arg mxfp6_matmul: bool = False, mxint8_kv_cache: bool = False, **compiler_options, @@ -283,7 +392,23 @@ def compile( for kv in ["key", "value"]: custom_io[f"past_{kv}.{i}{suffix}"] = kv_cache_dtype + compile_hash = self.compile_hash( + self.model.config, + self.continuous_batching, + num_cores=num_cores, + export_dir=os.path.dirname(onnx_path) if onnx_path else None, + prefill_seq_len=prefill_seq_len, + ctx_len=ctx_len, + batch_size=batch_size, + full_batch_size=full_batch_size, + num_devices=num_devices, + mxfp6_matmul=mxfp6_matmul, + mxint8_kv_cache=mxint8_kv_cache, + **compiler_options, + ) + return self._compile( + compile_hash, onnx_path, compile_dir, compile_only=True,