From da281a9f4d7e6e6f36e3dd48b3ceb95682fb02f4 Mon Sep 17 00:00:00 2001 From: Kyle Sayers Date: Tue, 14 Oct 2025 17:30:25 -0400 Subject: [PATCH] try Signed-off-by: Kyle Sayers --- src/llmcompressor/observers/mse.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/llmcompressor/observers/mse.py b/src/llmcompressor/observers/mse.py index f21c675ab..84113032e 100644 --- a/src/llmcompressor/observers/mse.py +++ b/src/llmcompressor/observers/mse.py @@ -197,12 +197,22 @@ def _grid_search_mse( if optimize_global_scale: global_scale = generate_gparam(shrinked_min_val, shrinked_max_val) - candidate_scales, candidate_zero_points = calculate_qparams( - min_vals=shrinked_min_val, - max_vals=shrinked_max_val, - quantization_args=args, - global_scale=global_scale, - ) + # qparam minmax is constant + # since at runtime, the true minmax is chosen + candidate_scales, candidate_zero_points = calculate_qparams( + min_vals=min_val, + max_vals=max_val, + quantization_args=args, + global_scale=global_scale, + ) + + else: + candidate_scales, candidate_zero_points = calculate_qparams( + min_vals=shrinked_min_val, + max_vals=shrinked_max_val, + quantization_args=args, + global_scale=global_scale, + ) # Note that observed.shape = (num_observations, *qparams_shape, group_size). # For the purposes of fake quantization, this is equivalent to token quant