ome-projects · YouNeedCryDear · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
@@ -88,11 +88,6 @@ func (c *BaichuanConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *BaichuanConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for Baichuan base models
 func (c *BaichuanConfig) HasVision() bool {
 	return false

@@ -109,11 +109,6 @@ func (c *BertConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *BertConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for BERT models
 func (c *BertConfig) HasVision() bool {
 	return false

@@ -111,11 +111,6 @@ func (c *ChatGLMConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), dtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *ChatGLMConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for ChatGLM base models
 func (c *ChatGLMConfig) HasVision() bool {
 	return false

@@ -100,11 +100,6 @@ func (c *CommandRConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *CommandRConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for Command-R base models
 func (c *CommandRConfig) HasVision() bool {
 	return false

@@ -130,11 +130,6 @@ func (c *DBRXConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *DBRXConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for DBRX base models
 func (c *DBRXConfig) HasVision() bool {
 	return false

@@ -64,9 +64,6 @@ type DeepseekV3Config struct {
 	// RoPE scaling
 	RopeScaling RopeScalingConfig `json:"rope_scaling"`
 
-	// Quantization settings
-	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
-
 	// Misc options
 	TieWordEmbeddings bool    `json:"tie_word_embeddings"`
 	UseCache          bool    `json:"use_cache"`
@@ -146,14 +143,6 @@ func (c *DeepseekV3Config) GetTransformerVersion() string {
 	return c.BaseModelConfig.TransformerVersion
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *DeepseekV3Config) GetQuantizationType() string {
-	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
-		return c.QuantizationConfig.QuantMethod
-	}
-	return ""
-}
-
 // GetArchitecture returns the model architecture
 func (c *DeepseekV3Config) GetArchitecture() string {
 	if len(c.Architectures) > 0 {

@@ -171,11 +171,6 @@ func (c *DeepSeekVLConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), dtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *DeepSeekVLConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns true for DeepSeek VL models
 func (c *DeepSeekVLConfig) HasVision() bool {
 	return true

@@ -102,11 +102,6 @@ func (c *ExaoneConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *ExaoneConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for ExaONE base models
 func (c *ExaoneConfig) HasVision() bool {
 	return false

@@ -127,11 +127,6 @@ func (c *GemmaConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *GemmaConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for Gemma base models
 func (c *GemmaConfig) HasVision() bool {
 	return false

@@ -133,11 +133,6 @@ func (c *Gemma3Config) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *Gemma3Config) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns true since Gemma3 is a multimodal vision model
 func (c *Gemma3Config) HasVision() bool {
 	return true

@@ -67,6 +67,9 @@ type BaseModelConfig struct {
 	TorchDtype         string   `json:"torch_dtype"`
 	TransformerVersion string   `json:"transformers_version"`
 
+	// Quantization config (optional, shared across all model types)
+	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
+
 	// Internal fields (not in JSON)
 	ConfigPath string `json:"-"`
 }
@@ -91,6 +94,14 @@ func (c *BaseModelConfig) GetTorchDtype() string {
 	return c.TorchDtype
 }
 
+// GetQuantizationType returns the quantization method used (if any)
+func (c *BaseModelConfig) GetQuantizationType() string {
+	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
+		return c.QuantizationConfig.QuantMethod
+	}
+	return ""
+}
+
 // Default implementation for HasVision - most models don't have vision capabilities
 func (c *BaseModelConfig) HasVision() bool {
 	return false
@@ -238,9 +249,6 @@ type GenericModelConfig struct {
 	IntermediateSize      int `json:"intermediate_size"`
 	MaxPositionEmbeddings int `json:"max_position_embeddings"`
 	VocabSize             int `json:"vocab_size"`
-
-	// Quantization config (optional)
-	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
 }
 
 // GetParameterCount attempts to get parameter count from safetensors, falls back to estimation
@@ -278,13 +286,6 @@ func estimateGenericParams(hiddenSize, numLayers, intermediateSize, vocabSize in
 	return embeddingParams + totalLayerParams
 }
 
-func (c *GenericModelConfig) GetQuantizationType() string {
-	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
-		return c.QuantizationConfig.QuantMethod
-	}
-	return ""
-}
-
 func (c *GenericModelConfig) GetContextLength() int {
 	return c.MaxPositionEmbeddings
 }
@@ -380,11 +381,6 @@ func (c *GenericDiffusionModelConfig) GetParameterCount() int64 {
 	return total
 }
 
-func (c *GenericDiffusionModelConfig) GetQuantizationType() string {
-	// Not supported. Doesn't seem to be standardized in HF.
-	return ""
-}
-
 func (c *GenericDiffusionModelConfig) GetContextLength() int {
 	if c.ConfigPath == "" {
 		return 0

@@ -89,11 +89,6 @@ func (c *InternLMConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *InternLMConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for InternLM base models
 func (c *InternLMConfig) HasVision() bool {
 	return false

@@ -60,9 +60,6 @@ type KimiK2Config struct {
 	// RoPE scaling (YARN type for Kimi-K2)
 	RopeScaling RopeScalingConfig `json:"rope_scaling"`
 
-	// Quantization settings
-	QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
-
 	// Misc options
 	TieWordEmbeddings bool    `json:"tie_word_embeddings"`
 	UseCache          bool    `json:"use_cache"`
@@ -141,14 +138,6 @@ func (c *KimiK2Config) GetTransformerVersion() string {
 	return c.BaseModelConfig.TransformerVersion
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *KimiK2Config) GetQuantizationType() string {
-	if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
-		return c.QuantizationConfig.QuantMethod
-	}
-	return ""
-}
-
 // GetArchitecture returns the model architecture
 func (c *KimiK2Config) GetArchitecture() string {
 	if len(c.Architectures) > 0 {

@@ -157,11 +157,6 @@ func (c *LLaVAConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), dtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *LLaVAConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns true for LLaVA models
 func (c *LLaVAConfig) HasVision() bool {
 	return true

@@ -95,11 +95,6 @@ func (c *MiniCPMConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *MiniCPMConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false for base MiniCPM models
 func (c *MiniCPMConfig) HasVision() bool {
 	return false

@@ -108,11 +108,6 @@ func (c *MistralConfig) GetTransformerVersion() string {
 	return c.TransformerVersion
 }
 
-func (c *MistralConfig) GetQuantizationType() string {
-	// Mistral doesn't have quantization in this config
-	return ""
-}
-
 func (c *MistralConfig) GetArchitecture() string {
 	if len(c.Architectures) > 0 {
 		return c.Architectures[0]

@@ -117,11 +117,6 @@ func (c *MixtralConfig) GetTransformerVersion() string {
 	return c.TransformerVersion
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *MixtralConfig) GetQuantizationType() string {
-	return "" // No quantization config for Mixtral by default
-}
-
 // GetArchitecture returns the model architecture
 func (c *MixtralConfig) GetArchitecture() string {
 	if len(c.Architectures) > 0 {

@@ -145,11 +145,6 @@ func (c *MLlamaConfig) GetTransformerVersion() string {
 	return c.TransformerVersion
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *MLlamaConfig) GetQuantizationType() string {
-	return "" // MLlama models don't have quantization by default
-}
-
 // GetArchitecture returns the model architecture
 func (c *MLlamaConfig) GetArchitecture() string {
 	if len(c.Architectures) > 0 {

@@ -8,9 +8,8 @@ import (
 
 // PhiModelConfig represents the configuration for a Phi model
 type PhiModelConfig struct {
-	ConfigPath                string    `json:"-"`
-	Architectures             []string  `json:"architectures"`
-	ModelType                 string    `json:"model_type"`
+	BaseModelConfig
+
 	AttentionDropout          float64   `json:"attention_dropout"`
 	AttentionProbsDropoutProb float64   `json:"attention_probs_dropout_prob"`
 	BosTokenId                int       `json:"bos_token_id"`
@@ -33,8 +32,6 @@ type PhiModelConfig struct {
 	RopeScaling               *struct{} `json:"rope_scaling"`
 	RopeTheta                 float64   `json:"rope_theta"`
 	TieWordEmbeddings         bool      `json:"tie_word_embeddings"`
-	TorchDtype                string    `json:"torch_dtype"`
-	TransformersVersion       string    `json:"transformers_version"`
 	TypeVocabSize             int       `json:"type_vocab_size"`
 	UseCache                  bool      `json:"use_cache"`
 	VocabSize                 int       `json:"vocab_size"`
@@ -72,30 +69,6 @@ func (c *PhiModelConfig) GetParameterCount() int64 {
 	return 0
 }
 
-// GetTransformerVersion returns the transformers library version
-func (c *PhiModelConfig) GetTransformerVersion() string {
-	return c.TransformersVersion
-}
-
-// GetQuantizationType returns the quantization method used (if any)
-// Phi models typically don't have quantization config directly in the config file
-func (c *PhiModelConfig) GetQuantizationType() string {
-	return ""
-}
-
-// GetArchitecture returns the model architecture
-func (c *PhiModelConfig) GetArchitecture() string {
-	if len(c.Architectures) > 0 {
-		return c.Architectures[0]
-	}
-	return ""
-}
-
-// GetModelType returns the model type
-func (c *PhiModelConfig) GetModelType() string {
-	return c.ModelType
-}
-
 // GetContextLength returns the maximum context length
 func (c *PhiModelConfig) GetContextLength() int {
 	return c.MaxPositionEmbeddings
@@ -106,11 +79,6 @@ func (c *PhiModelConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.GetTorchDtype())
 }
 
-// GetTorchDtype returns the torch data type used by the model
-func (c *PhiModelConfig) GetTorchDtype() string {
-	return c.TorchDtype
-}
-
 // HasVision returns false since this is not a multimodal vision model
 func (c *PhiModelConfig) HasVision() bool {
 	return false

@@ -103,11 +103,6 @@ func (c *Phi3Config) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *Phi3Config) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false since this is not a multimodal vision model
 func (c *Phi3Config) HasVision() bool {
 	return false

@@ -92,11 +92,6 @@ func (c *Phi3VConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *Phi3VConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns true since this is a multimodal vision model
 func (c *Phi3VConfig) HasVision() bool {
 	return c.ImgProcessor != nil

@@ -114,11 +114,6 @@ func (c *Phi3SmallConfig) GetModelSizeBytes() int64 {
 	return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
 }
 
-// GetQuantizationType returns the quantization method used (if any)
-func (c *Phi3SmallConfig) GetQuantizationType() string {
-	return "" // No quantization by default
-}
-
 // HasVision returns false since this is not a multimodal vision model
 func (c *Phi3SmallConfig) HasVision() bool {
 	return false