Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/baichuan.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,6 @@ func (c *BaichuanConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *BaichuanConfig) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns false for Baichuan base models
func (c *BaichuanConfig) HasVision() bool {
return false
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/bert.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,6 @@ func (c *BertConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *BertConfig) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns false for BERT models
func (c *BertConfig) HasVision() bool {
return false
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/chatglm.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,6 @@ func (c *ChatGLMConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), dtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *ChatGLMConfig) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns false for ChatGLM base models
func (c *ChatGLMConfig) HasVision() bool {
return false
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/command_r.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,6 @@ func (c *CommandRConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *CommandRConfig) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns false for Command-R base models
func (c *CommandRConfig) HasVision() bool {
return false
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/dbrx.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,6 @@ func (c *DBRXConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *DBRXConfig) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns false for DBRX base models
func (c *DBRXConfig) HasVision() bool {
return false
Expand Down
11 changes: 0 additions & 11 deletions pkg/hfutil/modelconfig/deepseek_v3.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@ type DeepseekV3Config struct {
// RoPE scaling
RopeScaling RopeScalingConfig `json:"rope_scaling"`

// Quantization settings
QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`

// Misc options
TieWordEmbeddings bool `json:"tie_word_embeddings"`
UseCache bool `json:"use_cache"`
Expand Down Expand Up @@ -146,14 +143,6 @@ func (c *DeepseekV3Config) GetTransformerVersion() string {
return c.BaseModelConfig.TransformerVersion
}

// GetQuantizationType returns the quantization method used (if any)
func (c *DeepseekV3Config) GetQuantizationType() string {
if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
return c.QuantizationConfig.QuantMethod
}
return ""
}

// GetArchitecture returns the model architecture
func (c *DeepseekV3Config) GetArchitecture() string {
if len(c.Architectures) > 0 {
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/deepseek_vl.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,6 @@ func (c *DeepSeekVLConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), dtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *DeepSeekVLConfig) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns true for DeepSeek VL models
func (c *DeepSeekVLConfig) HasVision() bool {
return true
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/exaone.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,6 @@ func (c *ExaoneConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *ExaoneConfig) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns false for ExaONE base models
func (c *ExaoneConfig) HasVision() bool {
return false
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/gemma.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,6 @@ func (c *GemmaConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *GemmaConfig) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns false for Gemma base models
func (c *GemmaConfig) HasVision() bool {
return false
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/gemma3.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,6 @@ func (c *Gemma3Config) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *Gemma3Config) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns true since Gemma3 is a multimodal vision model
func (c *Gemma3Config) HasVision() bool {
return true
Expand Down
26 changes: 11 additions & 15 deletions pkg/hfutil/modelconfig/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ type BaseModelConfig struct {
TorchDtype string `json:"torch_dtype"`
TransformerVersion string `json:"transformers_version"`

// Quantization config (optional, shared across all model types)
QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`

// Internal fields (not in JSON)
ConfigPath string `json:"-"`
}
Expand All @@ -91,6 +94,14 @@ func (c *BaseModelConfig) GetTorchDtype() string {
return c.TorchDtype
}

// GetQuantizationType returns the quantization method used (if any)
func (c *BaseModelConfig) GetQuantizationType() string {
if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
return c.QuantizationConfig.QuantMethod
}
return ""
}

// Default implementation for HasVision - most models don't have vision capabilities
func (c *BaseModelConfig) HasVision() bool {
return false
Expand Down Expand Up @@ -238,9 +249,6 @@ type GenericModelConfig struct {
IntermediateSize int `json:"intermediate_size"`
MaxPositionEmbeddings int `json:"max_position_embeddings"`
VocabSize int `json:"vocab_size"`

// Quantization config (optional)
QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`
}

// GetParameterCount attempts to get parameter count from safetensors, falls back to estimation
Expand Down Expand Up @@ -278,13 +286,6 @@ func estimateGenericParams(hiddenSize, numLayers, intermediateSize, vocabSize in
return embeddingParams + totalLayerParams
}

func (c *GenericModelConfig) GetQuantizationType() string {
if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
return c.QuantizationConfig.QuantMethod
}
return ""
}

func (c *GenericModelConfig) GetContextLength() int {
return c.MaxPositionEmbeddings
}
Expand Down Expand Up @@ -380,11 +381,6 @@ func (c *GenericDiffusionModelConfig) GetParameterCount() int64 {
return total
}

func (c *GenericDiffusionModelConfig) GetQuantizationType() string {
// Not supported. Doesn't seem to be standardized in HF.
return ""
}

func (c *GenericDiffusionModelConfig) GetContextLength() int {
if c.ConfigPath == "" {
return 0
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/internlm.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,6 @@ func (c *InternLMConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *InternLMConfig) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns false for InternLM base models
func (c *InternLMConfig) HasVision() bool {
return false
Expand Down
11 changes: 0 additions & 11 deletions pkg/hfutil/modelconfig/kimi_k2.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ type KimiK2Config struct {
// RoPE scaling (YARN type for Kimi-K2)
RopeScaling RopeScalingConfig `json:"rope_scaling"`

// Quantization settings
QuantizationConfig *QuantizationConfig `json:"quantization_config,omitempty"`

// Misc options
TieWordEmbeddings bool `json:"tie_word_embeddings"`
UseCache bool `json:"use_cache"`
Expand Down Expand Up @@ -141,14 +138,6 @@ func (c *KimiK2Config) GetTransformerVersion() string {
return c.BaseModelConfig.TransformerVersion
}

// GetQuantizationType returns the quantization method used (if any)
func (c *KimiK2Config) GetQuantizationType() string {
if c.QuantizationConfig != nil && c.QuantizationConfig.QuantMethod != "" {
return c.QuantizationConfig.QuantMethod
}
return ""
}

// GetArchitecture returns the model architecture
func (c *KimiK2Config) GetArchitecture() string {
if len(c.Architectures) > 0 {
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/llava.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,11 +157,6 @@ func (c *LLaVAConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), dtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *LLaVAConfig) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns true for LLaVA models
func (c *LLaVAConfig) HasVision() bool {
return true
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/minicpm.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,6 @@ func (c *MiniCPMConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *MiniCPMConfig) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns false for base MiniCPM models
func (c *MiniCPMConfig) HasVision() bool {
return false
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/mistral.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,6 @@ func (c *MistralConfig) GetTransformerVersion() string {
return c.TransformerVersion
}

func (c *MistralConfig) GetQuantizationType() string {
// Mistral doesn't have quantization in this config
return ""
}

func (c *MistralConfig) GetArchitecture() string {
if len(c.Architectures) > 0 {
return c.Architectures[0]
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/mixtral.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,6 @@ func (c *MixtralConfig) GetTransformerVersion() string {
return c.TransformerVersion
}

// GetQuantizationType returns the quantization method used (if any)
func (c *MixtralConfig) GetQuantizationType() string {
return "" // No quantization config for Mixtral by default
}

// GetArchitecture returns the model architecture
func (c *MixtralConfig) GetArchitecture() string {
if len(c.Architectures) > 0 {
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/mllama.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,6 @@ func (c *MLlamaConfig) GetTransformerVersion() string {
return c.TransformerVersion
}

// GetQuantizationType returns the quantization method used (if any)
func (c *MLlamaConfig) GetQuantizationType() string {
return "" // MLlama models don't have quantization by default
}

// GetArchitecture returns the model architecture
func (c *MLlamaConfig) GetArchitecture() string {
if len(c.Architectures) > 0 {
Expand Down
36 changes: 2 additions & 34 deletions pkg/hfutil/modelconfig/phi.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@ import (

// PhiModelConfig represents the configuration for a Phi model
type PhiModelConfig struct {
ConfigPath string `json:"-"`
Architectures []string `json:"architectures"`
ModelType string `json:"model_type"`
BaseModelConfig

AttentionDropout float64 `json:"attention_dropout"`
AttentionProbsDropoutProb float64 `json:"attention_probs_dropout_prob"`
BosTokenId int `json:"bos_token_id"`
Expand All @@ -33,8 +32,6 @@ type PhiModelConfig struct {
RopeScaling *struct{} `json:"rope_scaling"`
RopeTheta float64 `json:"rope_theta"`
TieWordEmbeddings bool `json:"tie_word_embeddings"`
TorchDtype string `json:"torch_dtype"`
TransformersVersion string `json:"transformers_version"`
TypeVocabSize int `json:"type_vocab_size"`
UseCache bool `json:"use_cache"`
VocabSize int `json:"vocab_size"`
Expand Down Expand Up @@ -72,30 +69,6 @@ func (c *PhiModelConfig) GetParameterCount() int64 {
return 0
}

// GetTransformerVersion returns the transformers library version
func (c *PhiModelConfig) GetTransformerVersion() string {
return c.TransformersVersion
}

// GetQuantizationType returns the quantization method used (if any)
// Phi models typically don't have quantization config directly in the config file
func (c *PhiModelConfig) GetQuantizationType() string {
return ""
}

// GetArchitecture returns the model architecture
func (c *PhiModelConfig) GetArchitecture() string {
if len(c.Architectures) > 0 {
return c.Architectures[0]
}
return ""
}

// GetModelType returns the model type
func (c *PhiModelConfig) GetModelType() string {
return c.ModelType
}

// GetContextLength returns the maximum context length
func (c *PhiModelConfig) GetContextLength() int {
return c.MaxPositionEmbeddings
Expand All @@ -106,11 +79,6 @@ func (c *PhiModelConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), c.GetTorchDtype())
}

// GetTorchDtype returns the torch data type used by the model
func (c *PhiModelConfig) GetTorchDtype() string {
return c.TorchDtype
}

// HasVision returns false since this is not a multimodal vision model
func (c *PhiModelConfig) HasVision() bool {
return false
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/phi3.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,6 @@ func (c *Phi3Config) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *Phi3Config) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns false since this is not a multimodal vision model
func (c *Phi3Config) HasVision() bool {
return false
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/phi3_v.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,6 @@ func (c *Phi3VConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *Phi3VConfig) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns true since this is a multimodal vision model
func (c *Phi3VConfig) HasVision() bool {
return c.ImgProcessor != nil
Expand Down
5 changes: 0 additions & 5 deletions pkg/hfutil/modelconfig/phi3small.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,6 @@ func (c *Phi3SmallConfig) GetModelSizeBytes() int64 {
return EstimateModelSizeBytes(c.GetParameterCount(), c.TorchDtype)
}

// GetQuantizationType returns the quantization method used (if any)
func (c *Phi3SmallConfig) GetQuantizationType() string {
return "" // No quantization by default
}

// HasVision returns false since this is not a multimodal vision model
func (c *Phi3SmallConfig) HasVision() bool {
return false
Expand Down
Loading
Loading