From 84e78e7e932e135f758066136d973f6bce2e1c9b Mon Sep 17 00:00:00 2001 From: Begum Cig Date: Tue, 24 Feb 2026 13:03:53 +0000 Subject: [PATCH 1/2] fix: fix awq tracing fx problem --- pyproject.toml | 10 ++++++++-- src/pruna/algorithms/llm_compressor.py | 8 +++++++- tests/algorithms/testers/awq.py | 3 +++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c199603c..40e127f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -135,13 +135,11 @@ dependencies = [ "whisper-s2t==1.3.1", "hqq==0.2.7.post1", "torchao>=0.12.0,<0.16.0", # 0.16.0 breaks diffusers 0.36.0, torch+torch: https://github.com/pytorch/ao/issues/2919#issue-3375688762 - "llmcompressor", "gliner; python_version >= '3.10'", "piq", "opencv-python", "kernels", "aenum", - "vbench-pruna; sys_platform != 'darwin'", "imageio-ffmpeg", "jaxtyping", "peft>=0.18.0", @@ -164,10 +162,18 @@ gptq = [ "gptqmodel==4.0.0.dev0+cu126torch2.7; sys_platform != 'darwin' or platform_machine != 'arm64'", "gptqmodel; sys_platform == 'darwin' and platform_machine == 'arm64'", ] +awq = [ + "compressed-tensors >= 0.13.0", + "llmcompressor>=0.9", + "torch>=2.9.0" +] full = [ "xformers>=0.0.30", "stable-fast-pruna==1.0.8", ] +vbench = [ + "vbench-pruna; sys_platform != 'darwin'", +] dev = [ "wget", "python-dotenv", diff --git a/src/pruna/algorithms/llm_compressor.py b/src/pruna/algorithms/llm_compressor.py index e9fc404b..d053cbbb 100644 --- a/src/pruna/algorithms/llm_compressor.py +++ b/src/pruna/algorithms/llm_compressor.py @@ -70,6 +70,12 @@ def get_hyperparameters(self) -> list: default_value="W4A16", meta=dict(desc="Quantization scheme to use. Use symmetric quantization to avoid decompression issues."), ), + CategoricalHyperparameter( + "calibration_pipeline", + choices=["independent", "basic", "datafree", "sequential", "layer_sequential"], + default_value="independent", + meta=dict(desc="Pipeline to use for calibration.") + ), TargetModules( "target_modules", default_value=None, @@ -173,7 +179,7 @@ def quantize_language_model( targets=["Linear"], ) ] - return imported["oneshot"](model=language_model, recipe=recipe, dataset=dataset, processor=processor) + return imported["oneshot"](model=language_model, recipe=recipe, dataset=dataset, processor=processor, pipeline=smash_config["calibration_pipeline"]) model = map_targeted_nn_roots(quantize_language_model, model, target_modules) return model diff --git a/tests/algorithms/testers/awq.py b/tests/algorithms/testers/awq.py index deb524e4..807e0fd0 100644 --- a/tests/algorithms/testers/awq.py +++ b/tests/algorithms/testers/awq.py @@ -14,3 +14,6 @@ class TestLLMCompressor(AlgorithmTesterBase): allow_pickle_files = False algorithm_class = LLMCompressor metrics = ["perplexity"] + hyperparameters = { + "awq_calibration_pipeline": "basic", + } From 5455492a5309a0118ca6ea36853701c157694259 Mon Sep 17 00:00:00 2001 From: Begum Cig Date: Wed, 25 Feb 2026 12:26:22 +0000 Subject: [PATCH 2/2] fix: add vbench and awq as conflicting dependencies --- pyproject.toml | 7 +++++++ src/pruna/algorithms/llm_compressor.py | 5 +++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 40e127f4..ca2bdf28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,6 +77,13 @@ url = "https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/" [tool.uv] index-strategy = "unsafe-best-match" +conflicts = [ + [ + { extra = "awq" }, + { extra = "vbench" }, + ] +] + [tool.uv.sources] gptqmodel = [ { index = "pruna_internal", marker = "sys_platform != 'darwin' or platform_machine != 'arm64'"}, diff --git a/src/pruna/algorithms/llm_compressor.py b/src/pruna/algorithms/llm_compressor.py index d053cbbb..621e1574 100644 --- a/src/pruna/algorithms/llm_compressor.py +++ b/src/pruna/algorithms/llm_compressor.py @@ -71,7 +71,7 @@ def get_hyperparameters(self) -> list: meta=dict(desc="Quantization scheme to use. Use symmetric quantization to avoid decompression issues."), ), CategoricalHyperparameter( - "calibration_pipeline", + "calibration_pipeline", choices=["independent", "basic", "datafree", "sequential", "layer_sequential"], default_value="independent", meta=dict(desc="Pipeline to use for calibration.") @@ -179,7 +179,8 @@ def quantize_language_model( targets=["Linear"], ) ] - return imported["oneshot"](model=language_model, recipe=recipe, dataset=dataset, processor=processor, pipeline=smash_config["calibration_pipeline"]) + return imported["oneshot"](model=language_model, recipe=recipe, dataset=dataset, processor=processor, + pipeline=smash_config["calibration_pipeline"]) model = map_targeted_nn_roots(quantize_language_model, model, target_modules) return model