Skip to content

Commit d4acfd0

Browse files
committed
[None][test] Add accuracy test for Qwen3Next model
- Updated L0_Test.groovy to include auto_trigger for Qwen3Next. - Added TestQwen3NextThinking class in test_llm_api_pytorch.py for accuracy testing. - Updated l0_dgx_h100.yml to include new test case for Qwen3Next under specific conditions. Signed-off-by: Robin Kobus <[email protected]>
1 parent ba8abea commit d4acfd0

File tree

5 files changed

+53
-0
lines changed

5 files changed

+53
-0
lines changed

jenkins/L0_Test.groovy

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,6 +1276,8 @@ def getMakoArgsFromStageName(stageName, parseSysinfo=false) {
12761276
makoArgs += ["auto_trigger=deepseek"]
12771277
} else if (stageName.contains("-GptOss-")) {
12781278
makoArgs += ["auto_trigger=gpt_oss"]
1279+
} else if (stageName.contains("-Qwen3Next-")) {
1280+
makoArgs += ["auto_trigger=qwen3_next"]
12791281
} else {
12801282
makoArgs += ["auto_trigger=others"]
12811283
}
@@ -2048,6 +2050,7 @@ def launchTestJobs(pipeline, testFilter)
20482050
"DGX_H100-4_GPUs-PyTorch-DeepSeek-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 2, 4],
20492051
"DGX_H100-2_GPUs-PyTorch-Others-1": ["dgx-h100-x2", "l0_dgx_h100", 1, 1, 2],
20502052
"DGX_H100-4_GPUs-PyTorch-GptOss-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
2053+
"DGX_H100-4_GPUs-PyTorch-Qwen3Next-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
20512054
"DGX_H100-4_GPUs-PyTorch-Others-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
20522055
"DGX_H100-4_GPUs-CPP-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
20532056
"A10-PyTorch-1": ["a10", "l0_a10", 1, 1],

tests/integration/defs/accuracy/references/gsm8k.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ Qwen3/Qwen3-235B-A22B:
123123
quant_algo: NVFP4
124124
kv_cache_quant_algo: FP8
125125
accuracy: 85.78
126+
Qwen3/Qwen3-Next-80B-A3B-Thinking:
127+
- accuracy: 81.577
126128
moonshotai/Kimi-K2-Instruct:
127129
- quant_algo: FP8_BLOCK_SCALES
128130
accuracy: 94.84

tests/integration/defs/accuracy/references/mmlu.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,8 @@ Qwen3/Qwen3-235B-A22B:
229229
quant_algo: NVFP4
230230
kv_cache_quant_algo: FP8
231231
accuracy: 86
232+
Qwen3/Qwen3-Next-80B-A3B-Thinking:
233+
- accuracy: 86
232234
moonshotai/Kimi-K2-Instruct:
233235
- quant_algo: FP8_BLOCK_SCALES
234236
accuracy: 87.65

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3559,6 +3559,37 @@ def test_auto_dtype_tp4(self):
35593559
task.evaluate(llm)
35603560

35613561

3562+
@pytest.mark.skip_less_device_memory(80000)
3563+
class TestQwen3NextThinking(LlmapiAccuracyTestHarness):
3564+
MODEL_NAME = "Qwen3/Qwen3-Next-80B-A3B-Thinking"
3565+
MODEL_PATH = f"{llm_models_root()}/{MODEL_NAME}"
3566+
3567+
@skip_pre_hopper
3568+
@pytest.mark.skip_less_device(4)
3569+
@pytest.mark.parametrize("tp_size,pp_size,ep_size", [(4, 1, 4)],
3570+
ids=["tp4ep4"])
3571+
def test_auto_dtype(self, tp_size, pp_size, ep_size):
3572+
if get_device_count() != tp_size * pp_size:
3573+
pytest.skip("Device count mismatch with world size")
3574+
3575+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6,
3576+
enable_block_reuse=False)
3577+
cuda_graph_config = CudaGraphConfig(enable_padding=True,
3578+
max_batch_size=720)
3579+
3580+
with LLM(self.MODEL_PATH,
3581+
max_num_tokens=4096,
3582+
tensor_parallel_size=tp_size,
3583+
pipeline_parallel_size=pp_size,
3584+
moe_expert_parallel_size=ep_size,
3585+
kv_cache_config=kv_cache_config,
3586+
cuda_graph_config=cuda_graph_config) as llm:
3587+
task = MMLU(self.MODEL_NAME)
3588+
task.evaluate(llm)
3589+
task = GSM8K(self.MODEL_NAME)
3590+
task.evaluate(llm)
3591+
3592+
35623593
class TestNano_V2_VLM(LlmapiAccuracyTestHarness):
35633594
MODEL_NAME = "nvidia/Nano-v2-VLM"
35643595
MODEL_PATH = f"{llm_models_root()}/Nano-v2-VLM"

tests/integration/test_lists/test-db/l0_dgx_h100.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,21 @@ l0_dgx_h100:
164164
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass-auto]
165165
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton-auto]
166166
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4-auto]
167+
- condition:
168+
ranges:
169+
system_gpu_count:
170+
gte: 4
171+
lte: 4
172+
wildcards:
173+
gpu:
174+
- '*h100*'
175+
linux_distribution_name: ubuntu*
176+
terms:
177+
stage: pre_merge
178+
backend: pytorch
179+
auto_trigger: qwen3_next
180+
tests:
181+
- accuracy/test_llm_api_pytorch.py::TestQwen3NextThinking::test_auto_dtype[tp4ep4]
167182
- condition:
168183
ranges:
169184
system_gpu_count:

0 commit comments

Comments
 (0)