From 2b8c3ff9093d38015013eb1513022eae924bc21a Mon Sep 17 00:00:00 2001 From: Arthur Cheng Date: Wed, 1 Apr 2026 12:11:44 -0700 Subject: [PATCH 1/3] move quant parsing to base model config --- pkg/hfutil/modelconfig/interface.go | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/pkg/hfutil/modelconfig/interface.go b/pkg/hfutil/modelconfig/interface.go index 162aed37..cbee9c94 100644 --- a/pkg/hfutil/modelconfig/interface.go +++ b/pkg/hfutil/modelconfig/interface.go @@ -67,6 +67,9 @@ type BaseModelConfig struct { TorchDtype string `json:"torch_dtype"` TransformerVersion string `json:"transformers_version"` + // Quantization config (optional, shared across all model types) + QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"` + // Internal fields (not in JSON) ConfigPath string `json:"-"` } @@ -91,6 +94,14 @@ func (c *BaseModelConfig) GetTorchDtype() string { return c.TorchDtype } +// GetQuantizationType returns the quantization method used (if any) +func (c *BaseModelConfig) GetQuantizationType() string { + if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" { + return c.QuantizationConfig.QuantMethod + } + return "" +} + // Default implementation for HasVision - most models don't have vision capabilities func (c *BaseModelConfig) HasVision() bool { return false @@ -238,9 +249,6 @@ type GenericModelConfig struct { IntermediateSize int `json:"intermediate_size"` MaxPositionEmbeddings int `json:"max_position_embeddings"` VocabSize int `json:"vocab_size"` - - // Quantization config (optional) - QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"` } // GetParameterCount attempts to get parameter count from safetensors, falls back to estimation @@ -278,13 +286,6 @@ func estimateGenericParams(hiddenSize, numLayers, intermediateSize, vocabSize in return embeddingParams + totalLayerParams } -func (c *GenericModelConfig) GetQuantizationType() string { - if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" { - return c.QuantizationConfig.QuantMethod - } - return "" -} - func (c *GenericModelConfig) GetContextLength() int { return c.MaxPositionEmbeddings } @@ -380,11 +381,6 @@ func (c *GenericDiffusionModelConfig) GetParameterCount() int64 { return total } -func (c *GenericDiffusionModelConfig) GetQuantizationType() string { - // Not supported. Doesn't seem to be standardized in HF. - return "" -} - func (c *GenericDiffusionModelConfig) GetContextLength() int { if c.ConfigPath == "" { return 0 From 524cd8ccf5ed12faf2d79808540fa6dda1981c2a Mon Sep 17 00:00:00 2001 From: Arthur Cheng Date: Wed, 1 Apr 2026 12:12:26 -0700 Subject: [PATCH 2/3] remove dup quant parsing from individual model class --- pkg/hfutil/modelconfig/baichuan.go | 5 ----- pkg/hfutil/modelconfig/bert.go | 5 ----- pkg/hfutil/modelconfig/chatglm.go | 5 ----- pkg/hfutil/modelconfig/command_r.go | 5 ----- pkg/hfutil/modelconfig/dbrx.go | 5 ----- pkg/hfutil/modelconfig/deepseek_v3.go | 11 ----------- pkg/hfutil/modelconfig/deepseek_vl.go | 5 ----- pkg/hfutil/modelconfig/exaone.go | 5 ----- pkg/hfutil/modelconfig/gemma.go | 5 ----- pkg/hfutil/modelconfig/gemma3.go | 5 ----- pkg/hfutil/modelconfig/internlm.go | 5 ----- pkg/hfutil/modelconfig/kimi_k2.go | 11 ----------- pkg/hfutil/modelconfig/llava.go | 5 ----- pkg/hfutil/modelconfig/minicpm.go | 5 ----- pkg/hfutil/modelconfig/mistral.go | 5 ----- pkg/hfutil/modelconfig/mixtral.go | 5 ----- pkg/hfutil/modelconfig/mllama.go | 5 ----- pkg/hfutil/modelconfig/phi3.go | 5 ----- pkg/hfutil/modelconfig/phi3_v.go | 5 ----- pkg/hfutil/modelconfig/phi3small.go | 5 ----- pkg/hfutil/modelconfig/phimoe.go | 5 ----- pkg/hfutil/modelconfig/qwen.go | 11 ----------- pkg/hfutil/modelconfig/qwen2.go | 11 ----------- pkg/hfutil/modelconfig/qwen2_vl.go | 11 ----------- pkg/hfutil/modelconfig/qwen3.go | 11 ----------- pkg/hfutil/modelconfig/qwen3_moe.go | 11 ----------- pkg/hfutil/modelconfig/qwen3_vl.go | 11 ----------- pkg/hfutil/modelconfig/stablelm.go | 5 ----- pkg/hfutil/modelconfig/xverse.go | 5 ----- 29 files changed, 193 deletions(-) diff --git a/pkg/hfutil/modelconfig/baichuan.go b/pkg/hfutil/modelconfig/baichuan.go index 83d5d936..314e6061 100644 --- a/pkg/hfutil/modelconfig/baichuan.go +++ b/pkg/hfutil/modelconfig/baichuan.go @@ -88,11 +88,6 @@ func (c *BaichuanConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *BaichuanConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false for Baichuan base models func (c *BaichuanConfig) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/bert.go b/pkg/hfutil/modelconfig/bert.go index 0607583e..2e22a9df 100644 --- a/pkg/hfutil/modelconfig/bert.go +++ b/pkg/hfutil/modelconfig/bert.go @@ -109,11 +109,6 @@ func (c *BertConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *BertConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false for BERT models func (c *BertConfig) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/chatglm.go b/pkg/hfutil/modelconfig/chatglm.go index 6a5ca74b..5bd5a15c 100644 --- a/pkg/hfutil/modelconfig/chatglm.go +++ b/pkg/hfutil/modelconfig/chatglm.go @@ -111,11 +111,6 @@ func (c *ChatGLMConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), dtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *ChatGLMConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false for ChatGLM base models func (c *ChatGLMConfig) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/command_r.go b/pkg/hfutil/modelconfig/command_r.go index dddb3d9e..57067885 100644 --- a/pkg/hfutil/modelconfig/command_r.go +++ b/pkg/hfutil/modelconfig/command_r.go @@ -100,11 +100,6 @@ func (c *CommandRConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *CommandRConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false for Command-R base models func (c *CommandRConfig) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/dbrx.go b/pkg/hfutil/modelconfig/dbrx.go index c33da8cc..d2830590 100644 --- a/pkg/hfutil/modelconfig/dbrx.go +++ b/pkg/hfutil/modelconfig/dbrx.go @@ -130,11 +130,6 @@ func (c *DBRXConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *DBRXConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false for DBRX base models func (c *DBRXConfig) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/deepseek_v3.go b/pkg/hfutil/modelconfig/deepseek_v3.go index b95d9436..88064ca6 100644 --- a/pkg/hfutil/modelconfig/deepseek_v3.go +++ b/pkg/hfutil/modelconfig/deepseek_v3.go @@ -64,9 +64,6 @@ type DeepseekV3Config struct { // RoPE scaling RopeScaling RopeScalingConfig `json:"rope_scaling"` - // Quantization settings - QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"` - // Misc options TieWordEmbeddings bool `json:"tie_word_embeddings"` UseCache bool `json:"use_cache"` @@ -146,14 +143,6 @@ func (c *DeepseekV3Config) GetTransformerVersion() string { return c.BaseModelConfig.TransformerVersion } -// GetQuantizationType returns the quantization method used (if any) -func (c *DeepseekV3Config) GetQuantizationType() string { - if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" { - return c.QuantizationConfig.QuantMethod - } - return "" -} - // GetArchitecture returns the model architecture func (c *DeepseekV3Config) GetArchitecture() string { if len(c.Architectures) > 0 { diff --git a/pkg/hfutil/modelconfig/deepseek_vl.go b/pkg/hfutil/modelconfig/deepseek_vl.go index c1178711..586b627a 100644 --- a/pkg/hfutil/modelconfig/deepseek_vl.go +++ b/pkg/hfutil/modelconfig/deepseek_vl.go @@ -171,11 +171,6 @@ func (c *DeepSeekVLConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), dtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *DeepSeekVLConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns true for DeepSeek VL models func (c *DeepSeekVLConfig) HasVision() bool { return true diff --git a/pkg/hfutil/modelconfig/exaone.go b/pkg/hfutil/modelconfig/exaone.go index 1f829234..e699583f 100644 --- a/pkg/hfutil/modelconfig/exaone.go +++ b/pkg/hfutil/modelconfig/exaone.go @@ -102,11 +102,6 @@ func (c *ExaoneConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *ExaoneConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false for ExaONE base models func (c *ExaoneConfig) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/gemma.go b/pkg/hfutil/modelconfig/gemma.go index 0df0f037..a66974de 100644 --- a/pkg/hfutil/modelconfig/gemma.go +++ b/pkg/hfutil/modelconfig/gemma.go @@ -127,11 +127,6 @@ func (c *GemmaConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *GemmaConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false for Gemma base models func (c *GemmaConfig) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/gemma3.go b/pkg/hfutil/modelconfig/gemma3.go index 6a7ac8eb..3ee6102d 100644 --- a/pkg/hfutil/modelconfig/gemma3.go +++ b/pkg/hfutil/modelconfig/gemma3.go @@ -133,11 +133,6 @@ func (c *Gemma3Config) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *Gemma3Config) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns true since Gemma3 is a multimodal vision model func (c *Gemma3Config) HasVision() bool { return true diff --git a/pkg/hfutil/modelconfig/internlm.go b/pkg/hfutil/modelconfig/internlm.go index 94abd62e..da7000d2 100644 --- a/pkg/hfutil/modelconfig/internlm.go +++ b/pkg/hfutil/modelconfig/internlm.go @@ -89,11 +89,6 @@ func (c *InternLMConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *InternLMConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false for InternLM base models func (c *InternLMConfig) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/kimi_k2.go b/pkg/hfutil/modelconfig/kimi_k2.go index a4d7014f..6b20313b 100644 --- a/pkg/hfutil/modelconfig/kimi_k2.go +++ b/pkg/hfutil/modelconfig/kimi_k2.go @@ -60,9 +60,6 @@ type KimiK2Config struct { // RoPE scaling (YARN type for Kimi-K2) RopeScaling RopeScalingConfig `json:"rope_scaling"` - // Quantization settings - QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"` - // Misc options TieWordEmbeddings bool `json:"tie_word_embeddings"` UseCache bool `json:"use_cache"` @@ -141,14 +138,6 @@ func (c *KimiK2Config) GetTransformerVersion() string { return c.BaseModelConfig.TransformerVersion } -// GetQuantizationType returns the quantization method used (if any) -func (c *KimiK2Config) GetQuantizationType() string { - if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" { - return c.QuantizationConfig.QuantMethod - } - return "" -} - // GetArchitecture returns the model architecture func (c *KimiK2Config) GetArchitecture() string { if len(c.Architectures) > 0 { diff --git a/pkg/hfutil/modelconfig/llava.go b/pkg/hfutil/modelconfig/llava.go index d4460e27..6ec98c3d 100644 --- a/pkg/hfutil/modelconfig/llava.go +++ b/pkg/hfutil/modelconfig/llava.go @@ -157,11 +157,6 @@ func (c *LLaVAConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), dtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *LLaVAConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns true for LLaVA models func (c *LLaVAConfig) HasVision() bool { return true diff --git a/pkg/hfutil/modelconfig/minicpm.go b/pkg/hfutil/modelconfig/minicpm.go index ca0b3b0a..177eea6c 100644 --- a/pkg/hfutil/modelconfig/minicpm.go +++ b/pkg/hfutil/modelconfig/minicpm.go @@ -95,11 +95,6 @@ func (c *MiniCPMConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *MiniCPMConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false for base MiniCPM models func (c *MiniCPMConfig) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/mistral.go b/pkg/hfutil/modelconfig/mistral.go index c05abc85..8a8f02c5 100644 --- a/pkg/hfutil/modelconfig/mistral.go +++ b/pkg/hfutil/modelconfig/mistral.go @@ -108,11 +108,6 @@ func (c *MistralConfig) GetTransformerVersion() string { return c.TransformerVersion } -func (c *MistralConfig) GetQuantizationType() string { - // Mistral doesn't have quantization in this config - return "" -} - func (c *MistralConfig) GetArchitecture() string { if len(c.Architectures) > 0 { return c.Architectures[0] diff --git a/pkg/hfutil/modelconfig/mixtral.go b/pkg/hfutil/modelconfig/mixtral.go index 5c838061..8a50f52d 100644 --- a/pkg/hfutil/modelconfig/mixtral.go +++ b/pkg/hfutil/modelconfig/mixtral.go @@ -117,11 +117,6 @@ func (c *MixtralConfig) GetTransformerVersion() string { return c.TransformerVersion } -// GetQuantizationType returns the quantization method used (if any) -func (c *MixtralConfig) GetQuantizationType() string { - return "" // No quantization config for Mixtral by default -} - // GetArchitecture returns the model architecture func (c *MixtralConfig) GetArchitecture() string { if len(c.Architectures) > 0 { diff --git a/pkg/hfutil/modelconfig/mllama.go b/pkg/hfutil/modelconfig/mllama.go index d543cbd7..e4d2eca9 100644 --- a/pkg/hfutil/modelconfig/mllama.go +++ b/pkg/hfutil/modelconfig/mllama.go @@ -145,11 +145,6 @@ func (c *MLlamaConfig) GetTransformerVersion() string { return c.TransformerVersion } -// GetQuantizationType returns the quantization method used (if any) -func (c *MLlamaConfig) GetQuantizationType() string { - return "" // MLlama models don't have quantization by default -} - // GetArchitecture returns the model architecture func (c *MLlamaConfig) GetArchitecture() string { if len(c.Architectures) > 0 { diff --git a/pkg/hfutil/modelconfig/phi3.go b/pkg/hfutil/modelconfig/phi3.go index 55d24de5..8229f344 100644 --- a/pkg/hfutil/modelconfig/phi3.go +++ b/pkg/hfutil/modelconfig/phi3.go @@ -103,11 +103,6 @@ func (c *Phi3Config) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *Phi3Config) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false since this is not a multimodal vision model func (c *Phi3Config) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/phi3_v.go b/pkg/hfutil/modelconfig/phi3_v.go index 138cd356..a6b86383 100644 --- a/pkg/hfutil/modelconfig/phi3_v.go +++ b/pkg/hfutil/modelconfig/phi3_v.go @@ -92,11 +92,6 @@ func (c *Phi3VConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *Phi3VConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns true since this is a multimodal vision model func (c *Phi3VConfig) HasVision() bool { return c.ImgProcessor != nil diff --git a/pkg/hfutil/modelconfig/phi3small.go b/pkg/hfutil/modelconfig/phi3small.go index 842418a6..61fd58cc 100644 --- a/pkg/hfutil/modelconfig/phi3small.go +++ b/pkg/hfutil/modelconfig/phi3small.go @@ -114,11 +114,6 @@ func (c *Phi3SmallConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *Phi3SmallConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false since this is not a multimodal vision model func (c *Phi3SmallConfig) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/phimoe.go b/pkg/hfutil/modelconfig/phimoe.go index def9dea3..888c95d3 100644 --- a/pkg/hfutil/modelconfig/phimoe.go +++ b/pkg/hfutil/modelconfig/phimoe.go @@ -116,11 +116,6 @@ func (c *PhiMoEConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *PhiMoEConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false since this is not a multimodal vision model func (c *PhiMoEConfig) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/qwen.go b/pkg/hfutil/modelconfig/qwen.go index b14fe06c..fd554c1c 100644 --- a/pkg/hfutil/modelconfig/qwen.go +++ b/pkg/hfutil/modelconfig/qwen.go @@ -48,9 +48,6 @@ type QwenConfig struct { FP32 bool `json:"fp32"` OnnxSafe *bool `json:"onnx_safe"` TokenizerClass string `json:"tokenizer_class"` - - // Quantization - QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"` } // LoadQwenConfig loads a Qwen v1 model configuration from a JSON file @@ -110,14 +107,6 @@ func (c *QwenConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *QwenConfig) GetQuantizationType() string { - if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" { - return c.QuantizationConfig.QuantMethod - } - return "" -} - // HasVision returns false for Qwen v1 base models func (c *QwenConfig) HasVision() bool { return false // Base Qwen v1 models don't have vision capabilities diff --git a/pkg/hfutil/modelconfig/qwen2.go b/pkg/hfutil/modelconfig/qwen2.go index 4ddbc09d..3a909c94 100644 --- a/pkg/hfutil/modelconfig/qwen2.go +++ b/pkg/hfutil/modelconfig/qwen2.go @@ -39,9 +39,6 @@ type Qwen2Config struct { TieWordEmbeddings bool `json:"tie_word_embeddings"` UseCache bool `json:"use_cache"` UseSlidingWindow bool `json:"use_sliding_window"` - - // Quantization - QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"` } // LoadQwen2Config loads a Qwen2 model configuration from a JSON file @@ -100,14 +97,6 @@ func (c *Qwen2Config) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *Qwen2Config) GetQuantizationType() string { - if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" { - return c.QuantizationConfig.QuantMethod - } - return "" -} - // HasVision returns false for Qwen2 base models func (c *Qwen2Config) HasVision() bool { return false // Base Qwen2 models don't have vision capabilities diff --git a/pkg/hfutil/modelconfig/qwen2_vl.go b/pkg/hfutil/modelconfig/qwen2_vl.go index 4ed7f7c0..1a55e61d 100644 --- a/pkg/hfutil/modelconfig/qwen2_vl.go +++ b/pkg/hfutil/modelconfig/qwen2_vl.go @@ -75,9 +75,6 @@ type Qwen2VLConfig struct { TieWordEmbeddings bool `json:"tie_word_embeddings"` UseCache bool `json:"use_cache"` UseSlidingWindow bool `json:"use_sliding_window"` - - // Quantization - QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"` } // LoadQwen2VLConfig loads a Qwen2-VL model configuration from a JSON file @@ -141,14 +138,6 @@ func (c *Qwen2VLConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *Qwen2VLConfig) GetQuantizationType() string { - if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" { - return c.QuantizationConfig.QuantMethod - } - return "" -} - // HasVision returns true for Qwen2-VL models func (c *Qwen2VLConfig) HasVision() bool { return true // Qwen2-VL models have vision capabilities diff --git a/pkg/hfutil/modelconfig/qwen3.go b/pkg/hfutil/modelconfig/qwen3.go index b49905d2..1cd0038c 100644 --- a/pkg/hfutil/modelconfig/qwen3.go +++ b/pkg/hfutil/modelconfig/qwen3.go @@ -45,9 +45,6 @@ type Qwen3Config struct { // Embedding config SimilarityFnName string `json:"similarity_fn_name"` - - // Quantization - QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"` } // LoadQwen3Config loads a Qwen3 model configuration from a JSON file @@ -104,14 +101,6 @@ func (c *Qwen3Config) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *Qwen3Config) GetQuantizationType() string { - if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" { - return c.QuantizationConfig.QuantMethod - } - return "" -} - // HasVision returns false for Qwen3 base models func (c *Qwen3Config) HasVision() bool { return false // Base Qwen3 models don't have vision capabilities diff --git a/pkg/hfutil/modelconfig/qwen3_moe.go b/pkg/hfutil/modelconfig/qwen3_moe.go index 5d30fb77..53834d11 100644 --- a/pkg/hfutil/modelconfig/qwen3_moe.go +++ b/pkg/hfutil/modelconfig/qwen3_moe.go @@ -59,9 +59,6 @@ type Qwen3MoeConfig struct { // Misc options TieWordEmbeddings bool `json:"tie_word_embeddings"` UseCache bool `json:"use_cache"` - - // Quantization - QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"` } // LoadQwen3MoeConfig loads a Qwen3Moe model configuration from a JSON file @@ -106,14 +103,6 @@ func (c *Qwen3MoeConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *Qwen3MoeConfig) GetQuantizationType() string { - if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" { - return c.QuantizationConfig.QuantMethod - } - return "" -} - // HasVision returns false for Qwen3Moe base models func (c *Qwen3MoeConfig) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/qwen3_vl.go b/pkg/hfutil/modelconfig/qwen3_vl.go index 95e8a9c5..d890ae03 100644 --- a/pkg/hfutil/modelconfig/qwen3_vl.go +++ b/pkg/hfutil/modelconfig/qwen3_vl.go @@ -16,9 +16,6 @@ type Qwen3VLConfig struct { VisionConfig Qwen3VLVisionConfig `json:"vision_config"` VisionStartTokenId int `json:"vision_start_token_id"` VisionEndTokenId int `json:"vision_end_token_id"` - - // Quantization - QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"` } // Qwen3VLTextConfig represents the text transformer configuration. @@ -147,14 +144,6 @@ func (c *Qwen3VLConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any). -func (c *Qwen3VLConfig) GetQuantizationType() string { - if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" { - return c.QuantizationConfig.QuantMethod - } - return "" -} - // HasVision returns true for Qwen3-VL models. func (c *Qwen3VLConfig) HasVision() bool { return true diff --git a/pkg/hfutil/modelconfig/stablelm.go b/pkg/hfutil/modelconfig/stablelm.go index 5b366996..56637b3f 100644 --- a/pkg/hfutil/modelconfig/stablelm.go +++ b/pkg/hfutil/modelconfig/stablelm.go @@ -86,11 +86,6 @@ func (c *StableLMConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *StableLMConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false for StableLM base models func (c *StableLMConfig) HasVision() bool { return false diff --git a/pkg/hfutil/modelconfig/xverse.go b/pkg/hfutil/modelconfig/xverse.go index ff46d3e4..40d96bcc 100644 --- a/pkg/hfutil/modelconfig/xverse.go +++ b/pkg/hfutil/modelconfig/xverse.go @@ -86,11 +86,6 @@ func (c *XverseConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype) } -// GetQuantizationType returns the quantization method used (if any) -func (c *XverseConfig) GetQuantizationType() string { - return "" // No quantization by default -} - // HasVision returns false for XVERSE base models func (c *XverseConfig) HasVision() bool { return false From 43848f51eccbd33bb9cf67dbbd3bc7d0985a3205 Mon Sep 17 00:00:00 2001 From: Arthur Cheng Date: Wed, 1 Apr 2026 12:12:41 -0700 Subject: [PATCH 3/3] refactor phi to base on base model config --- pkg/hfutil/modelconfig/phi.go | 36 ++--------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/pkg/hfutil/modelconfig/phi.go b/pkg/hfutil/modelconfig/phi.go index 14f58722..fedc8920 100644 --- a/pkg/hfutil/modelconfig/phi.go +++ b/pkg/hfutil/modelconfig/phi.go @@ -8,9 +8,8 @@ import ( // PhiModelConfig represents the configuration for a Phi model type PhiModelConfig struct { - ConfigPath string `json:"-"` - Architectures []string `json:"architectures"` - ModelType string `json:"model_type"` + BaseModelConfig + AttentionDropout float64 `json:"attention_dropout"` AttentionProbsDropoutProb float64 `json:"attention_probs_dropout_prob"` BosTokenId int `json:"bos_token_id"` @@ -33,8 +32,6 @@ type PhiModelConfig struct { RopeScaling *struct{} `json:"rope_scaling"` RopeTheta float64 `json:"rope_theta"` TieWordEmbeddings bool `json:"tie_word_embeddings"` - TorchDtype string `json:"torch_dtype"` - TransformersVersion string `json:"transformers_version"` TypeVocabSize int `json:"type_vocab_size"` UseCache bool `json:"use_cache"` VocabSize int `json:"vocab_size"` @@ -72,30 +69,6 @@ func (c *PhiModelConfig) GetParameterCount() int64 { return 0 } -// GetTransformerVersion returns the transformers library version -func (c *PhiModelConfig) GetTransformerVersion() string { - return c.TransformersVersion -} - -// GetQuantizationType returns the quantization method used (if any) -// Phi models typically don't have quantization config directly in the config file -func (c *PhiModelConfig) GetQuantizationType() string { - return "" -} - -// GetArchitecture returns the model architecture -func (c *PhiModelConfig) GetArchitecture() string { - if len(c.Architectures) > 0 { - return c.Architectures[0] - } - return "" -} - -// GetModelType returns the model type -func (c *PhiModelConfig) GetModelType() string { - return c.ModelType -} - // GetContextLength returns the maximum context length func (c *PhiModelConfig) GetContextLength() int { return c.MaxPositionEmbeddings @@ -106,11 +79,6 @@ func (c *PhiModelConfig) GetModelSizeBytes() int64 { return EstimateModelSizeBytes(c.GetParameterCount(), c.GetTorchDtype()) } -// GetTorchDtype returns the torch data type used by the model -func (c *PhiModelConfig) GetTorchDtype() string { - return c.TorchDtype -} - // HasVision returns false since this is not a multimodal vision model func (c *PhiModelConfig) HasVision() bool { return false