From 2b8c3ff9093d38015013eb1513022eae924bc21a Mon Sep 17 00:00:00 2001
From: Arthur Cheng <arthur.cheng@oracle.com>
Date: Wed, 1 Apr 2026 12:11:44 -0700
Subject: [PATCH 1/3] move quant parsing to base model config

---
 pkg/hfutil/modelconfig/interface.go | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/pkg/hfutil/modelconfig/interface.go b/pkg/hfutil/modelconfig/interface.go
index 162aed37..cbee9c94 100644
--- a/pkg/hfutil/modelconfig/interface.go
+++ b/pkg/hfutil/modelconfig/interface.go
@@ -67,6 +67,9 @@ type BaseModelConfig struct {
 	TorchDtype         string   `json:"torch_dtype"`
 	TransformerVersion string   `json:"transformers_version"`
 
+	// Quantization config (optional, shared across all model types)
+	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
+
 	// Internal fields (not in JSON)
 	ConfigPath string `json:"-"`
 }
@@ -91,6 +94,14 @@ func (c *BaseModelConfig) GetTorchDtype() string {
 	return c.TorchDtype
 }
 
+// GetQuantizationType returns the quantization method used (if any)
+func (c *BaseModelConfig) GetQuantizationType() string {
+	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
+		return c.QuantizationConfig.QuantMethod
+	}
+	return ""
+}
+
 // Default implementation for HasVision - most models don't have vision capabilities
 func (c *BaseModelConfig) HasVision() bool {
 	return false
@@ -238,9 +249,6 @@ type GenericModelConfig struct {
 	IntermediateSize      int `json:"intermediate_size"`
 	MaxPositionEmbeddings int `json:"max_position_embeddings"`
 	VocabSize             int `json:"vocab_size"`
-
-	// Quantization config (optional)
-	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
 }
 
 // GetParameterCount attempts to get parameter count from safetensors, falls back to estimation
@@ -278,13 +286,6 @@ func estimateGenericParams(hiddenSize, numLayers, intermediateSize, vocabSize in
 	return embeddingParams + totalLayerParams
 }
 
-func (c *GenericModelConfig) GetQuantizationType() string {
-	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
-		return c.QuantizationConfig.QuantMethod
-	}
-	return ""
-}
-
 func (c *GenericModelConfig) GetContextLength() int {
 	return c.MaxPositionEmbeddings
 }
@@ -380,11 +381,6 @@ func (c *GenericDiffusionModelConfig) GetParameterCount() int64 {
 	return total
 }
 
-func (c *GenericDiffusionModelConfig) GetQuantizationType() string {
-	// Not supported. Doesn't seem to be standardized in HF.
-	return ""
-}
-
 func (c *GenericDiffusionModelConfig) GetContextLength() int {
 	if c.ConfigPath == "" {
 		return 0

From 524cd8ccf5ed12faf2d79808540fa6dda1981c2a Mon Sep 17 00:00:00 2001
From: Arthur Cheng <arthur.cheng@oracle.com>
Date: Wed, 1 Apr 2026 12:12:26 -0700
Subject: [PATCH 2/3] remove dup quant parsing from individual model class

---
 pkg/hfutil/modelconfig/baichuan.go    |  5 -----
 pkg/hfutil/modelconfig/bert.go        |  5 -----
 pkg/hfutil/modelconfig/chatglm.go     |  5 -----
 pkg/hfutil/modelconfig/command_r.go   |  5 -----
 pkg/hfutil/modelconfig/dbrx.go        |  5 -----
 pkg/hfutil/modelconfig/deepseek_v3.go | 11 -----------
 pkg/hfutil/modelconfig/deepseek_vl.go |  5 -----
 pkg/hfutil/modelconfig/exaone.go      |  5 -----
 pkg/hfutil/modelconfig/gemma.go       |  5 -----
 pkg/hfutil/modelconfig/gemma3.go      |  5 -----
 pkg/hfutil/modelconfig/internlm.go    |  5 -----
 pkg/hfutil/modelconfig/kimi_k2.go     | 11 -----------
 pkg/hfutil/modelconfig/llava.go       |  5 -----
 pkg/hfutil/modelconfig/minicpm.go     |  5 -----
 pkg/hfutil/modelconfig/mistral.go     |  5 -----
 pkg/hfutil/modelconfig/mixtral.go     |  5 -----
 pkg/hfutil/modelconfig/mllama.go      |  5 -----
 pkg/hfutil/modelconfig/phi3.go        |  5 -----
 pkg/hfutil/modelconfig/phi3_v.go      |  5 -----
 pkg/hfutil/modelconfig/phi3small.go   |  5 -----
 pkg/hfutil/modelconfig/phimoe.go      |  5 -----
 pkg/hfutil/modelconfig/qwen.go        | 11 -----------
 pkg/hfutil/modelconfig/qwen2.go       | 11 -----------
 pkg/hfutil/modelconfig/qwen2_vl.go    | 11 -----------
 pkg/hfutil/modelconfig/qwen3.go       | 11 -----------
 pkg/hfutil/modelconfig/qwen3_moe.go   | 11 -----------
 pkg/hfutil/modelconfig/qwen3_vl.go    | 11 -----------
 pkg/hfutil/modelconfig/stablelm.go    |  5 -----
 pkg/hfutil/modelconfig/xverse.go      |  5 -----
 29 files changed, 193 deletions(-)

diff --git a/pkg/hfutil/modelconfig/baichuan.go b/pkg/hfutil/modelconfig/baichuan.go
index 83d5d936..314e6061 100644
--- a/pkg/hfutil/modelconfig/baichuan.go
+++ b/pkg/hfutil/modelconfig/baichuan.go
@@ -88,11 +88,6 @@ func (c *BaichuanConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *BaichuanConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for Baichuan base models
 func (c *BaichuanConfig) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/bert.go b/pkg/hfutil/modelconfig/bert.go
index 0607583e..2e22a9df 100644
--- a/pkg/hfutil/modelconfig/bert.go
+++ b/pkg/hfutil/modelconfig/bert.go
@@ -109,11 +109,6 @@ func (c *BertConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *BertConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for BERT models
 func (c *BertConfig) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/chatglm.go b/pkg/hfutil/modelconfig/chatglm.go
index 6a5ca74b..5bd5a15c 100644
--- a/pkg/hfutil/modelconfig/chatglm.go
+++ b/pkg/hfutil/modelconfig/chatglm.go
@@ -111,11 +111,6 @@ func (c *ChatGLMConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), dtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *ChatGLMConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for ChatGLM base models
 func (c *ChatGLMConfig) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/command_r.go b/pkg/hfutil/modelconfig/command_r.go
index dddb3d9e..57067885 100644
--- a/pkg/hfutil/modelconfig/command_r.go
+++ b/pkg/hfutil/modelconfig/command_r.go
@@ -100,11 +100,6 @@ func (c *CommandRConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *CommandRConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for Command-R base models
 func (c *CommandRConfig) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/dbrx.go b/pkg/hfutil/modelconfig/dbrx.go
index c33da8cc..d2830590 100644
--- a/pkg/hfutil/modelconfig/dbrx.go
+++ b/pkg/hfutil/modelconfig/dbrx.go
@@ -130,11 +130,6 @@ func (c *DBRXConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *DBRXConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for DBRX base models
 func (c *DBRXConfig) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/deepseek_v3.go b/pkg/hfutil/modelconfig/deepseek_v3.go
index b95d9436..88064ca6 100644
--- a/pkg/hfutil/modelconfig/deepseek_v3.go
+++ b/pkg/hfutil/modelconfig/deepseek_v3.go
@@ -64,9 +64,6 @@ type DeepseekV3Config struct {
 	// RoPE scaling
 	RopeScaling RopeScalingConfig `json:"rope_scaling"`
 
-	// Quantization settings
-	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
-
 	// Misc options
 	TieWordEmbeddings bool    `json:"tie_word_embeddings"`
 	UseCache          bool    `json:"use_cache"`
@@ -146,14 +143,6 @@ func (c *DeepseekV3Config) GetTransformerVersion() string {
 	return c.BaseModelConfig.TransformerVersion
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *DeepseekV3Config) GetQuantizationType() string {
-	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
-		return c.QuantizationConfig.QuantMethod
-	}
-	return ""
-}
-
 // GetArchitecture returns the model architecture
 func (c *DeepseekV3Config) GetArchitecture() string {
 	if len(c.Architectures) > 0 {
diff --git a/pkg/hfutil/modelconfig/deepseek_vl.go b/pkg/hfutil/modelconfig/deepseek_vl.go
index c1178711..586b627a 100644
--- a/pkg/hfutil/modelconfig/deepseek_vl.go
+++ b/pkg/hfutil/modelconfig/deepseek_vl.go
@@ -171,11 +171,6 @@ func (c *DeepSeekVLConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), dtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *DeepSeekVLConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns true for DeepSeek VL models
 func (c *DeepSeekVLConfig) HasVision() bool {
 	return true
diff --git a/pkg/hfutil/modelconfig/exaone.go b/pkg/hfutil/modelconfig/exaone.go
index 1f829234..e699583f 100644
--- a/pkg/hfutil/modelconfig/exaone.go
+++ b/pkg/hfutil/modelconfig/exaone.go
@@ -102,11 +102,6 @@ func (c *ExaoneConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *ExaoneConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for ExaONE base models
 func (c *ExaoneConfig) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/gemma.go b/pkg/hfutil/modelconfig/gemma.go
index 0df0f037..a66974de 100644
--- a/pkg/hfutil/modelconfig/gemma.go
+++ b/pkg/hfutil/modelconfig/gemma.go
@@ -127,11 +127,6 @@ func (c *GemmaConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *GemmaConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for Gemma base models
 func (c *GemmaConfig) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/gemma3.go b/pkg/hfutil/modelconfig/gemma3.go
index 6a7ac8eb..3ee6102d 100644
--- a/pkg/hfutil/modelconfig/gemma3.go
+++ b/pkg/hfutil/modelconfig/gemma3.go
@@ -133,11 +133,6 @@ func (c *Gemma3Config) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *Gemma3Config) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns true since Gemma3 is a multimodal vision model
 func (c *Gemma3Config) HasVision() bool {
 	return true
diff --git a/pkg/hfutil/modelconfig/internlm.go b/pkg/hfutil/modelconfig/internlm.go
index 94abd62e..da7000d2 100644
--- a/pkg/hfutil/modelconfig/internlm.go
+++ b/pkg/hfutil/modelconfig/internlm.go
@@ -89,11 +89,6 @@ func (c *InternLMConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *InternLMConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for InternLM base models
 func (c *InternLMConfig) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/kimi_k2.go b/pkg/hfutil/modelconfig/kimi_k2.go
index a4d7014f..6b20313b 100644
--- a/pkg/hfutil/modelconfig/kimi_k2.go
+++ b/pkg/hfutil/modelconfig/kimi_k2.go
@@ -60,9 +60,6 @@ type KimiK2Config struct {
 	// RoPE scaling (YARN type for Kimi-K2)
 	RopeScaling RopeScalingConfig `json:"rope_scaling"`
 
-	// Quantization settings
-	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
-
 	// Misc options
 	TieWordEmbeddings bool    `json:"tie_word_embeddings"`
 	UseCache          bool    `json:"use_cache"`
@@ -141,14 +138,6 @@ func (c *KimiK2Config) GetTransformerVersion() string {
 	return c.BaseModelConfig.TransformerVersion
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *KimiK2Config) GetQuantizationType() string {
-	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
-		return c.QuantizationConfig.QuantMethod
-	}
-	return ""
-}
-
 // GetArchitecture returns the model architecture
 func (c *KimiK2Config) GetArchitecture() string {
 	if len(c.Architectures) > 0 {
diff --git a/pkg/hfutil/modelconfig/llava.go b/pkg/hfutil/modelconfig/llava.go
index d4460e27..6ec98c3d 100644
--- a/pkg/hfutil/modelconfig/llava.go
+++ b/pkg/hfutil/modelconfig/llava.go
@@ -157,11 +157,6 @@ func (c *LLaVAConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), dtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *LLaVAConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns true for LLaVA models
 func (c *LLaVAConfig) HasVision() bool {
 	return true
diff --git a/pkg/hfutil/modelconfig/minicpm.go b/pkg/hfutil/modelconfig/minicpm.go
index ca0b3b0a..177eea6c 100644
--- a/pkg/hfutil/modelconfig/minicpm.go
+++ b/pkg/hfutil/modelconfig/minicpm.go
@@ -95,11 +95,6 @@ func (c *MiniCPMConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *MiniCPMConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for base MiniCPM models
 func (c *MiniCPMConfig) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/mistral.go b/pkg/hfutil/modelconfig/mistral.go
index c05abc85..8a8f02c5 100644
--- a/pkg/hfutil/modelconfig/mistral.go
+++ b/pkg/hfutil/modelconfig/mistral.go
@@ -108,11 +108,6 @@ func (c *MistralConfig) GetTransformerVersion() string {
 	return c.TransformerVersion
 }
 
-func (c *MistralConfig) GetQuantizationType() string {
-	// Mistral doesn't have quantization in this config
-	return ""
-}
-
 func (c *MistralConfig) GetArchitecture() string {
 	if len(c.Architectures) > 0 {
 		return c.Architectures[0]
diff --git a/pkg/hfutil/modelconfig/mixtral.go b/pkg/hfutil/modelconfig/mixtral.go
index 5c838061..8a50f52d 100644
--- a/pkg/hfutil/modelconfig/mixtral.go
+++ b/pkg/hfutil/modelconfig/mixtral.go
@@ -117,11 +117,6 @@ func (c *MixtralConfig) GetTransformerVersion() string {
 	return c.TransformerVersion
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *MixtralConfig) GetQuantizationType() string {
-	return "" // No quantization config for Mixtral by default
-}
-
 // GetArchitecture returns the model architecture
 func (c *MixtralConfig) GetArchitecture() string {
 	if len(c.Architectures) > 0 {
diff --git a/pkg/hfutil/modelconfig/mllama.go b/pkg/hfutil/modelconfig/mllama.go
index d543cbd7..e4d2eca9 100644
--- a/pkg/hfutil/modelconfig/mllama.go
+++ b/pkg/hfutil/modelconfig/mllama.go
@@ -145,11 +145,6 @@ func (c *MLlamaConfig) GetTransformerVersion() string {
 	return c.TransformerVersion
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *MLlamaConfig) GetQuantizationType() string {
-	return "" // MLlama models don't have quantization by default
-}
-
 // GetArchitecture returns the model architecture
 func (c *MLlamaConfig) GetArchitecture() string {
 	if len(c.Architectures) > 0 {
diff --git a/pkg/hfutil/modelconfig/phi3.go b/pkg/hfutil/modelconfig/phi3.go
index 55d24de5..8229f344 100644
--- a/pkg/hfutil/modelconfig/phi3.go
+++ b/pkg/hfutil/modelconfig/phi3.go
@@ -103,11 +103,6 @@ func (c *Phi3Config) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *Phi3Config) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false since this is not a multimodal vision model
 func (c *Phi3Config) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/phi3_v.go b/pkg/hfutil/modelconfig/phi3_v.go
index 138cd356..a6b86383 100644
--- a/pkg/hfutil/modelconfig/phi3_v.go
+++ b/pkg/hfutil/modelconfig/phi3_v.go
@@ -92,11 +92,6 @@ func (c *Phi3VConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *Phi3VConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns true since this is a multimodal vision model
 func (c *Phi3VConfig) HasVision() bool {
 	return c.ImgProcessor != nil
diff --git a/pkg/hfutil/modelconfig/phi3small.go b/pkg/hfutil/modelconfig/phi3small.go
index 842418a6..61fd58cc 100644
--- a/pkg/hfutil/modelconfig/phi3small.go
+++ b/pkg/hfutil/modelconfig/phi3small.go
@@ -114,11 +114,6 @@ func (c *Phi3SmallConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *Phi3SmallConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false since this is not a multimodal vision model
 func (c *Phi3SmallConfig) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/phimoe.go b/pkg/hfutil/modelconfig/phimoe.go
index def9dea3..888c95d3 100644
--- a/pkg/hfutil/modelconfig/phimoe.go
+++ b/pkg/hfutil/modelconfig/phimoe.go
@@ -116,11 +116,6 @@ func (c *PhiMoEConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *PhiMoEConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false since this is not a multimodal vision model
 func (c *PhiMoEConfig) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/qwen.go b/pkg/hfutil/modelconfig/qwen.go
index b14fe06c..fd554c1c 100644
--- a/pkg/hfutil/modelconfig/qwen.go
+++ b/pkg/hfutil/modelconfig/qwen.go
@@ -48,9 +48,6 @@ type QwenConfig struct {
 	FP32           bool   `json:"fp32"`
 	OnnxSafe       *bool  `json:"onnx_safe"`
 	TokenizerClass string `json:"tokenizer_class"`
-
-	// Quantization
-	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
 }
 
 // LoadQwenConfig loads a Qwen v1 model configuration from a JSON file
@@ -110,14 +107,6 @@ func (c *QwenConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *QwenConfig) GetQuantizationType() string {
-	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
-		return c.QuantizationConfig.QuantMethod
-	}
-	return ""
-}
-
 // HasVision returns false for Qwen v1 base models
 func (c *QwenConfig) HasVision() bool {
 	return false // Base Qwen v1 models don't have vision capabilities
diff --git a/pkg/hfutil/modelconfig/qwen2.go b/pkg/hfutil/modelconfig/qwen2.go
index 4ddbc09d..3a909c94 100644
--- a/pkg/hfutil/modelconfig/qwen2.go
+++ b/pkg/hfutil/modelconfig/qwen2.go
@@ -39,9 +39,6 @@ type Qwen2Config struct {
 	TieWordEmbeddings bool `json:"tie_word_embeddings"`
 	UseCache          bool `json:"use_cache"`
 	UseSlidingWindow  bool `json:"use_sliding_window"`
-
-	// Quantization
-	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
 }
 
 // LoadQwen2Config loads a Qwen2 model configuration from a JSON file
@@ -100,14 +97,6 @@ func (c *Qwen2Config) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *Qwen2Config) GetQuantizationType() string {
-	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
-		return c.QuantizationConfig.QuantMethod
-	}
-	return ""
-}
-
 // HasVision returns false for Qwen2 base models
 func (c *Qwen2Config) HasVision() bool {
 	return false // Base Qwen2 models don't have vision capabilities
diff --git a/pkg/hfutil/modelconfig/qwen2_vl.go b/pkg/hfutil/modelconfig/qwen2_vl.go
index 4ed7f7c0..1a55e61d 100644
--- a/pkg/hfutil/modelconfig/qwen2_vl.go
+++ b/pkg/hfutil/modelconfig/qwen2_vl.go
@@ -75,9 +75,6 @@ type Qwen2VLConfig struct {
 	TieWordEmbeddings bool `json:"tie_word_embeddings"`
 	UseCache          bool `json:"use_cache"`
 	UseSlidingWindow  bool `json:"use_sliding_window"`
-
-	// Quantization
-	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
 }
 
 // LoadQwen2VLConfig loads a Qwen2-VL model configuration from a JSON file
@@ -141,14 +138,6 @@ func (c *Qwen2VLConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *Qwen2VLConfig) GetQuantizationType() string {
-	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
-		return c.QuantizationConfig.QuantMethod
-	}
-	return ""
-}
-
 // HasVision returns true for Qwen2-VL models
 func (c *Qwen2VLConfig) HasVision() bool {
 	return true // Qwen2-VL models have vision capabilities
diff --git a/pkg/hfutil/modelconfig/qwen3.go b/pkg/hfutil/modelconfig/qwen3.go
index b49905d2..1cd0038c 100644
--- a/pkg/hfutil/modelconfig/qwen3.go
+++ b/pkg/hfutil/modelconfig/qwen3.go
@@ -45,9 +45,6 @@ type Qwen3Config struct {
 
 	// Embedding config
 	SimilarityFnName string `json:"similarity_fn_name"`
-
-	// Quantization
-	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
 }
 
 // LoadQwen3Config loads a Qwen3 model configuration from a JSON file
@@ -104,14 +101,6 @@ func (c *Qwen3Config) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *Qwen3Config) GetQuantizationType() string {
-	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
-		return c.QuantizationConfig.QuantMethod
-	}
-	return ""
-}
-
 // HasVision returns false for Qwen3 base models
 func (c *Qwen3Config) HasVision() bool {
 	return false // Base Qwen3 models don't have vision capabilities
diff --git a/pkg/hfutil/modelconfig/qwen3_moe.go b/pkg/hfutil/modelconfig/qwen3_moe.go
index 5d30fb77..53834d11 100644
--- a/pkg/hfutil/modelconfig/qwen3_moe.go
+++ b/pkg/hfutil/modelconfig/qwen3_moe.go
@@ -59,9 +59,6 @@ type Qwen3MoeConfig struct {
 	// Misc options
 	TieWordEmbeddings bool `json:"tie_word_embeddings"`
 	UseCache          bool `json:"use_cache"`
-
-	// Quantization
-	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
 }
 
 // LoadQwen3MoeConfig loads a Qwen3Moe model configuration from a JSON file
@@ -106,14 +103,6 @@ func (c *Qwen3MoeConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *Qwen3MoeConfig) GetQuantizationType() string {
-	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
-		return c.QuantizationConfig.QuantMethod
-	}
-	return ""
-}
-
 // HasVision returns false for Qwen3Moe base models
 func (c *Qwen3MoeConfig) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/qwen3_vl.go b/pkg/hfutil/modelconfig/qwen3_vl.go
index 95e8a9c5..d890ae03 100644
--- a/pkg/hfutil/modelconfig/qwen3_vl.go
+++ b/pkg/hfutil/modelconfig/qwen3_vl.go
@@ -16,9 +16,6 @@ type Qwen3VLConfig struct {
 	VisionConfig       Qwen3VLVisionConfig `json:"vision_config"`
 	VisionStartTokenId int                 `json:"vision_start_token_id"`
 	VisionEndTokenId   int                 `json:"vision_end_token_id"`
-
-	// Quantization
-	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
 }
 
 // Qwen3VLTextConfig represents the text transformer configuration.
@@ -147,14 +144,6 @@ func (c *Qwen3VLConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any).
-func (c *Qwen3VLConfig) GetQuantizationType() string {
-	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
-		return c.QuantizationConfig.QuantMethod
-	}
-	return ""
-}
-
 // HasVision returns true for Qwen3-VL models.
 func (c *Qwen3VLConfig) HasVision() bool {
 	return true
diff --git a/pkg/hfutil/modelconfig/stablelm.go b/pkg/hfutil/modelconfig/stablelm.go
index 5b366996..56637b3f 100644
--- a/pkg/hfutil/modelconfig/stablelm.go
+++ b/pkg/hfutil/modelconfig/stablelm.go
@@ -86,11 +86,6 @@ func (c *StableLMConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *StableLMConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for StableLM base models
 func (c *StableLMConfig) HasVision() bool {
 	return false
diff --git a/pkg/hfutil/modelconfig/xverse.go b/pkg/hfutil/modelconfig/xverse.go
index ff46d3e4..40d96bcc 100644
--- a/pkg/hfutil/modelconfig/xverse.go
+++ b/pkg/hfutil/modelconfig/xverse.go
@@ -86,11 +86,6 @@ func (c *XverseConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *XverseConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for XVERSE base models
 func (c *XverseConfig) HasVision() bool {
 	return false

From 43848f51eccbd33bb9cf67dbbd3bc7d0985a3205 Mon Sep 17 00:00:00 2001
From: Arthur Cheng <arthur.cheng@oracle.com>
Date: Wed, 1 Apr 2026 12:12:41 -0700
Subject: [PATCH 3/3] refactor phi to base on base model config

---
 pkg/hfutil/modelconfig/phi.go | 36 ++---------------------------------
 1 file changed, 2 insertions(+), 34 deletions(-)

diff --git a/pkg/hfutil/modelconfig/phi.go b/pkg/hfutil/modelconfig/phi.go
index 14f58722..fedc8920 100644
--- a/pkg/hfutil/modelconfig/phi.go
+++ b/pkg/hfutil/modelconfig/phi.go
@@ -8,9 +8,8 @@ import (
 
 // PhiModelConfig represents the configuration for a Phi model
 type PhiModelConfig struct {
-	ConfigPath                string    `json:"-"`
-	Architectures             []string  `json:"architectures"`
-	ModelType                 string    `json:"model_type"`
+	BaseModelConfig
+
 	AttentionDropout          float64   `json:"attention_dropout"`
 	AttentionProbsDropoutProb float64   `json:"attention_probs_dropout_prob"`
 	BosTokenId                int       `json:"bos_token_id"`
@@ -33,8 +32,6 @@ type PhiModelConfig struct {
 	RopeScaling               *struct{} `json:"rope_scaling"`
 	RopeTheta                 float64   `json:"rope_theta"`
 	TieWordEmbeddings         bool      `json:"tie_word_embeddings"`
-	TorchDtype                string    `json:"torch_dtype"`
-	TransformersVersion       string    `json:"transformers_version"`
 	TypeVocabSize             int       `json:"type_vocab_size"`
 	UseCache                  bool      `json:"use_cache"`
 	VocabSize                 int       `json:"vocab_size"`
@@ -72,30 +69,6 @@ func (c *PhiModelConfig) GetParameterCount() int64 {
 	return 0
 }
 
-// GetTransformerVersion returns the transformers library version
-func (c *PhiModelConfig) GetTransformerVersion() string {
-	return c.TransformersVersion
-}
-
-// GetQuantizationType returns the quantization method used (if any)
-// Phi models typically don't have quantization config directly in the config file
-func (c *PhiModelConfig) GetQuantizationType() string {
-	return ""
-}
-
-// GetArchitecture returns the model architecture
-func (c *PhiModelConfig) GetArchitecture() string {
-	if len(c.Architectures) > 0 {
-		return c.Architectures[0]
-	}
-	return ""
-}
-
-// GetModelType returns the model type
-func (c *PhiModelConfig) GetModelType() string {
-	return c.ModelType
-}
-
 // GetContextLength returns the maximum context length
 func (c *PhiModelConfig) GetContextLength() int {
 	return c.MaxPositionEmbeddings
@@ -106,11 +79,6 @@ func (c *PhiModelConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.GetTorchDtype())
 }
 
-// GetTorchDtype returns the torch data type used by the model
-func (c *PhiModelConfig) GetTorchDtype() string {
-	return c.TorchDtype
-}
-
 // HasVision returns false since this is not a multimodal vision model
 func (c *PhiModelConfig) HasVision() bool {
 	return false