From 84e78e7e932e135f758066136d973f6bce2e1c9b Mon Sep 17 00:00:00 2001
From: Begum Cig <ebc332@nyu.edu>
Date: Tue, 24 Feb 2026 13:03:53 +0000
Subject: [PATCH 1/2] fix: fix awq tracing fx problem

---
 pyproject.toml                         | 10 ++++++++--
 src/pruna/algorithms/llm_compressor.py |  8 +++++++-
 tests/algorithms/testers/awq.py        |  3 +++
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c199603c..40e127f4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -135,13 +135,11 @@ dependencies = [
     "whisper-s2t==1.3.1",
     "hqq==0.2.7.post1",
     "torchao>=0.12.0,<0.16.0", # 0.16.0 breaks diffusers 0.36.0, torch+torch: https://github.com/pytorch/ao/issues/2919#issue-3375688762
-    "llmcompressor",
     "gliner; python_version >= '3.10'",
     "piq",
     "opencv-python",
     "kernels",
     "aenum",
-    "vbench-pruna; sys_platform != 'darwin'",
     "imageio-ffmpeg",
     "jaxtyping",
     "peft>=0.18.0",
@@ -164,10 +162,18 @@ gptq = [
     "gptqmodel==4.0.0.dev0+cu126torch2.7; sys_platform != 'darwin' or platform_machine != 'arm64'",
     "gptqmodel; sys_platform == 'darwin' and platform_machine == 'arm64'",
 ]
+awq = [
+    "compressed-tensors >= 0.13.0",
+    "llmcompressor>=0.9",
+    "torch>=2.9.0"
+]
 full = [
     "xformers>=0.0.30",
     "stable-fast-pruna==1.0.8",
 ]
+vbench = [
+    "vbench-pruna; sys_platform != 'darwin'",
+]
 dev = [
     "wget",
     "python-dotenv",
diff --git a/src/pruna/algorithms/llm_compressor.py b/src/pruna/algorithms/llm_compressor.py
index e9fc404b..d053cbbb 100644
--- a/src/pruna/algorithms/llm_compressor.py
+++ b/src/pruna/algorithms/llm_compressor.py
@@ -70,6 +70,12 @@ def get_hyperparameters(self) -> list:
                 default_value="W4A16",
                 meta=dict(desc="Quantization scheme to use. Use symmetric quantization to avoid decompression issues."),
             ),
+            CategoricalHyperparameter(
+                "calibration_pipeline", 
+                choices=["independent", "basic", "datafree", "sequential", "layer_sequential"],
+                default_value="independent",
+                meta=dict(desc="Pipeline to use for calibration.")
+            ),
             TargetModules(
                 "target_modules",
                 default_value=None,
@@ -173,7 +179,7 @@ def quantize_language_model(
                     targets=["Linear"],
                 )
             ]
-            return imported["oneshot"](model=language_model, recipe=recipe, dataset=dataset, processor=processor)
+            return imported["oneshot"](model=language_model, recipe=recipe, dataset=dataset, processor=processor, pipeline=smash_config["calibration_pipeline"])
 
         model = map_targeted_nn_roots(quantize_language_model, model, target_modules)
         return model
diff --git a/tests/algorithms/testers/awq.py b/tests/algorithms/testers/awq.py
index deb524e4..807e0fd0 100644
--- a/tests/algorithms/testers/awq.py
+++ b/tests/algorithms/testers/awq.py
@@ -14,3 +14,6 @@ class TestLLMCompressor(AlgorithmTesterBase):
     allow_pickle_files = False
     algorithm_class = LLMCompressor
     metrics = ["perplexity"]
+    hyperparameters = {
+        "awq_calibration_pipeline": "basic",
+    }

From 5455492a5309a0118ca6ea36853701c157694259 Mon Sep 17 00:00:00 2001
From: Begum Cig <ebc332@nyu.edu>
Date: Wed, 25 Feb 2026 12:26:22 +0000
Subject: [PATCH 2/2] fix: add vbench and awq as conflicting dependencies

---
 pyproject.toml                         | 7 +++++++
 src/pruna/algorithms/llm_compressor.py | 5 +++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 40e127f4..ca2bdf28 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -77,6 +77,13 @@ url = "https://pytorch-extension.intel.com/release-whl/stable/cpu/cn/"
 [tool.uv]
 index-strategy = "unsafe-best-match"
 
+conflicts = [
+    [
+        { extra = "awq" },
+        { extra = "vbench" },
+    ]
+]
+
 [tool.uv.sources]
 gptqmodel = [
   { index = "pruna_internal", marker = "sys_platform != 'darwin' or platform_machine != 'arm64'"},
diff --git a/src/pruna/algorithms/llm_compressor.py b/src/pruna/algorithms/llm_compressor.py
index d053cbbb..621e1574 100644
--- a/src/pruna/algorithms/llm_compressor.py
+++ b/src/pruna/algorithms/llm_compressor.py
@@ -71,7 +71,7 @@ def get_hyperparameters(self) -> list:
                 meta=dict(desc="Quantization scheme to use. Use symmetric quantization to avoid decompression issues."),
             ),
             CategoricalHyperparameter(
-                "calibration_pipeline", 
+                "calibration_pipeline",
                 choices=["independent", "basic", "datafree", "sequential", "layer_sequential"],
                 default_value="independent",
                 meta=dict(desc="Pipeline to use for calibration.")
@@ -179,7 +179,8 @@ def quantize_language_model(
                     targets=["Linear"],
                 )
             ]
-            return imported["oneshot"](model=language_model, recipe=recipe, dataset=dataset, processor=processor, pipeline=smash_config["calibration_pipeline"])
+            return imported["oneshot"](model=language_model, recipe=recipe, dataset=dataset, processor=processor,
+            pipeline=smash_config["calibration_pipeline"])
 
         model = map_targeted_nn_roots(quantize_language_model, model, target_modules)
         return model