-
Notifications
You must be signed in to change notification settings - Fork 83
fix(modelconfig): return empty architecture when Mistral Architectures is missing #602
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -84,6 +84,65 @@ func TestLoadModelWithMistral(t *testing.T) { | |
| t.Logf("Mistral model parameter count via generic loader: %s", FormatParamCount(paramCount)) | ||
| } | ||
|
|
||
| // TestMistralConfigNoArchitectures verifies that when the Architectures field | ||
| // is missing from config.json, GetArchitecture() returns an empty string rather | ||
| // than a misleading fallback like "MistralModel" which would cause the model to | ||
| // be misclassified as an embedding model by the config parser. | ||
| // Regression test for https://github.com/ome-projects/ome/issues/601 | ||
| func TestMistralConfigNoArchitectures(t *testing.T) { | ||
| configPath := filepath.Join("testdata", "mistral_no_architectures.json") | ||
|
|
||
| config, err := LoadModelConfig(configPath) | ||
| if err != nil { | ||
| t.Fatalf("Failed to load Mistral config without architectures: %v", err) | ||
| } | ||
|
|
||
| // GetArchitecture must return empty string when Architectures is absent | ||
| arch := config.GetArchitecture() | ||
| if arch != "" { | ||
| t.Errorf("Expected empty architecture when Architectures field is missing, got '%s'", arch) | ||
| } | ||
|
|
||
| // IsEmbedding must return false for a generic Mistral model | ||
| if config.IsEmbedding() { | ||
| t.Error("Expected IsEmbedding() to return false for a generic Mistral model without explicit embedding architecture") | ||
| } | ||
|
|
||
| // Verify it's still recognized as a Mistral model | ||
| if config.GetModelType() != "mistral" { | ||
| t.Errorf("Expected model type 'mistral', got '%s'", config.GetModelType()) | ||
| } | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test covers the negative case (no architectures → not embedding) but there is no counterpart for the positive case. Please add a test with a fixture that has
That combination is what
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test covers the negative case (no architectures → not embedding) but there is no counterpart for the positive case. Please add a test with a fixture that has
That combination is what |
||
|
|
||
| // TestMistralConfigEmbedding verifies that a Mistral model with | ||
| // "architectures": ["MistralModel"] is correctly identified as an embedding | ||
| // model (e.g. intfloat/e5-mistral-7b-instruct). | ||
| // This is the load-bearing positive case for config_parser.go's EMBEDDING classification. | ||
| func TestMistralConfigEmbedding(t *testing.T) { | ||
| configPath := filepath.Join("testdata", "mistral_embedding.json") | ||
|
|
||
| config, err := LoadModelConfig(configPath) | ||
| if err != nil { | ||
| t.Fatalf("Failed to load Mistral embedding config: %v", err) | ||
| } | ||
|
|
||
| // GetArchitecture must return "MistralModel" | ||
| arch := config.GetArchitecture() | ||
| if arch != "MistralModel" { | ||
| t.Errorf("Expected architecture 'MistralModel', got '%s'", arch) | ||
| } | ||
|
|
||
| // GetModelType must return "mistral" | ||
| if config.GetModelType() != "mistral" { | ||
| t.Errorf("Expected model type 'mistral', got '%s'", config.GetModelType()) | ||
| } | ||
|
|
||
| // IsEmbedding must return true for MistralModel architecture | ||
| if !config.IsEmbedding() { | ||
| t.Error("Expected IsEmbedding() to return true for a model with 'MistralModel' architecture") | ||
| } | ||
| } | ||
|
|
||
| func TestMistralInstructConfig(t *testing.T) { | ||
| configPath := filepath.Join("testdata", "mistral_7b_instruct.json") | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| { | ||
| "architectures": ["MistralModel"], | ||
| "bos_token_id": 1, | ||
| "eos_token_id": 2, | ||
| "hidden_act": "silu", | ||
| "hidden_size": 4096, | ||
| "initializer_range": 0.02, | ||
| "intermediate_size": 14336, | ||
| "max_position_embeddings": 32768, | ||
| "model_type": "mistral", | ||
| "num_attention_heads": 32, | ||
| "num_hidden_layers": 32, | ||
| "num_key_value_heads": 8, | ||
| "rms_norm_eps": 1e-05, | ||
| "rope_theta": 10000.0, | ||
| "sliding_window": 4096, | ||
| "tie_word_embeddings": false, | ||
| "torch_dtype": "float16", | ||
| "transformers_version": "4.36.0", | ||
| "use_cache": true, | ||
| "vocab_size": 32000 | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| { | ||
| "bos_token_id": 1, | ||
| "eos_token_id": 2, | ||
| "hidden_act": "silu", | ||
| "hidden_size": 4096, | ||
| "initializer_range": 0.02, | ||
| "intermediate_size": 14336, | ||
| "max_position_embeddings": 32768, | ||
| "model_type": "mistral", | ||
| "num_attention_heads": 32, | ||
| "num_hidden_layers": 32, | ||
| "num_key_value_heads": 8, | ||
| "rms_norm_eps": 1e-05, | ||
| "rope_theta": 10000.0, | ||
| "sliding_window": 4096, | ||
| "tie_word_embeddings": false, | ||
| "torch_dtype": "bfloat16", | ||
| "transformers_version": "4.34.0.dev0", | ||
| "use_cache": true, | ||
| "vocab_size": 32000 | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How is the empty string as architecture going to be handled by the downstream matcher? Does the corresponding runtime have explicit "" as architecture?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@YouNeedCryDear Thanks for the review. I verified the downstream handling in pkg/modelagent/config_parser.go — all usages of GetArchitecture() normalize via strings.ToLower() and use strings.Contains() for pattern matching. When architecture is "", no specific patterns match, and determineModelCapabilitiesFromHF() correctly falls through to the default ModelCapabilityTextToText at line 586. The embedding check at line 575 relies on IsEmbedding() (which is now self-contained in MistralConfig) rather than the architecture string directly, so this is safe.
This is also consistent with how BaseModelConfig, PhiConfig, and DeepSeekV3Config handle missing architectures — they all return "".