Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 44 additions & 15 deletions pkg/hfutil/modelconfig/qwen3_vl.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,35 +26,54 @@ type Qwen3VLTextConfig struct {
// Attention mechanism
AttentionBias bool `json:"attention_bias"`
AttentionDropout float64 `json:"attention_dropout"`
AttnOutputGate bool `json:"attn_output_gate"`

// Special tokens and embeddings
BosTokenId int `json:"bos_token_id"`
EosTokenId int `json:"eos_token_id"`
VocabSize int `json:"vocab_size"`

// Model architecture
HeadDim int `json:"head_dim"`
HiddenSize int `json:"hidden_size"`
IntermediateSize int `json:"intermediate_size"`
MaxPositionEmbeddings int `json:"max_position_embeddings"`
ModelType string `json:"model_type"`
NumAttentionHeads int `json:"num_attention_heads"`
NumKeyValueHeads int `json:"num_key_value_heads"`
NumHiddenLayers int `json:"num_hidden_layers"`
HeadDim int `json:"head_dim"`
HiddenSize int `json:"hidden_size"`
IntermediateSize int `json:"intermediate_size"`
MaxPositionEmbeddings int `json:"max_position_embeddings"`
ModelType string `json:"model_type"`
NumAttentionHeads int `json:"num_attention_heads"`
NumKeyValueHeads int `json:"num_key_value_heads"`
NumHiddenLayers int `json:"num_hidden_layers"`
RmsNormEps float64 `json:"rms_norm_eps"`

// Hybrid linear/full attention (Qwen3.5)
FullAttentionInterval int `json:"full_attention_interval"`
LayerTypes []string `json:"layer_types,omitempty"`
LinearConvKernelDim int `json:"linear_conv_kernel_dim"`
LinearKeyHeadDim int `json:"linear_key_head_dim"`
LinearNumKeyHeads int `json:"linear_num_key_heads"`
LinearNumValueHeads int `json:"linear_num_value_heads"`
LinearValueHeadDim int `json:"linear_value_head_dim"`

// Multi-token prediction (Qwen3.5)
MtpNumHiddenLayers int `json:"mtp_num_hidden_layers"`
MtpUseDedicatedEmbeddings bool `json:"mtp_use_dedicated_embeddings"`
MambaSsmDtype string `json:"mamba_ssm_dtype"`

// Mixture-of-Experts (MoE)
NumExperts int `json:"num_experts"`
NumExpertsPerTok int `json:"num_experts_per_tok"`
MoeIntermediateSize int `json:"moe_intermediate_size"`
NormTopkProb bool `json:"norm_topk_prob"`
NumExperts int `json:"num_experts"`
NumExpertsPerTok int `json:"num_experts_per_tok"`
MoeIntermediateSize int `json:"moe_intermediate_size"`
NormTopkProb bool `json:"norm_topk_prob"`
SharedExpertIntermediateSize int `json:"shared_expert_intermediate_size"`
RouterAuxLossCoef float64 `json:"router_aux_loss_coef"`

// Activation and initialization
HiddenAct string `json:"hidden_act"`
InitializerRange float64 `json:"initializer_range"`

// Rotary Position Embeddings (RoPE)
RopeScaling Qwen3VLRopeScalingConfig `json:"rope_scaling"`
RopeTheta float64 `json:"rope_theta"`
RopeScaling Qwen3VLRopeScalingConfig `json:"rope_scaling"`
RopeTheta float64 `json:"rope_theta"`
RopeParameters *Qwen3VLRopeParametersConfig `json:"rope_parameters,omitempty"`

// Miscellaneous
DecoderSparseStep int `json:"decoder_sparse_step"`
Expand All @@ -81,13 +100,23 @@ type Qwen3VLVisionConfig struct {
TemporalPatchSize int `json:"temporal_patch_size"`
}

// Qwen3VLRopeScalingConfig represents ROPE scaling configuration.
// Qwen3VLRopeScalingConfig represents ROPE scaling configuration for Qwen3-VL models.
type Qwen3VLRopeScalingConfig struct {
MropeInterleaved bool `json:"mrope_interleaved"`
MropeSection []int `json:"mrope_section"`
RopeType string `json:"rope_type"`
}

// Qwen3VLRopeParametersConfig represents RoPE parameters for Qwen3.5 models,
// which use rope_parameters instead of rope_scaling.
type Qwen3VLRopeParametersConfig struct {
MropeInterleaved bool `json:"mrope_interleaved"`
MropeSection []int `json:"mrope_section"`
RopeType string `json:"rope_type"`
RopeTheta float64 `json:"rope_theta"`
PartialRotaryFactor float64 `json:"partial_rotary_factor"`
}

// LoadQwen3VLConfig loads a Qwen3VL model configuration from a JSON file.
func LoadQwen3VLConfig(configPath string) (*Qwen3VLConfig, error) {
data, err := os.ReadFile(configPath)
Expand Down
8 changes: 4 additions & 4 deletions pkg/hfutil/modelconfig/qwen3_vl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -319,8 +319,8 @@ func TestQwen35ConfigDense(t *testing.T) {
t.Error("Expected HasVision to return true for Qwen3.5 dense model")
}

if qwenConfig.TextConfig.HiddenSize != 3584 {
t.Errorf("Expected hidden size 3584, got %d", qwenConfig.TextConfig.HiddenSize)
if qwenConfig.TextConfig.HiddenSize != 5120 {
t.Errorf("Expected hidden size 5120, got %d", qwenConfig.TextConfig.HiddenSize)
}

if context := config.GetContextLength(); context != 262144 {
Expand Down Expand Up @@ -363,8 +363,8 @@ func TestQwen35ConfigMoE(t *testing.T) {
t.Error("Expected HasVision to return true for Qwen3.5 MoE model")
}

if qwenConfig.TextConfig.NumExperts != 128 {
t.Errorf("Expected num experts 128, got %d", qwenConfig.TextConfig.NumExperts)
if qwenConfig.TextConfig.NumExperts != 256 {
t.Errorf("Expected num experts 256, got %d", qwenConfig.TextConfig.NumExperts)
}

if qwenConfig.TextConfig.MoeIntermediateSize != 512 {
Expand Down
Loading
Loading