Refactor oneshot function parameters to use Optional types and enhance documentation

ArkaSanka · ArkaSanka · commit 83ebf567ebe1 · 2025-10-22T01:30:06.000+05:30
diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py
@@ -243,8 +243,8 @@ def oneshot(
     min_tokens_per_module: Optional[float] = None,
     calibrate_moe_context: bool = False,
     pipeline: str = "independent",
-    tracing_ignore: List[str] = None,
-    raw_kwargs: Dict[str, Any] = None,
+    tracing_ignore: Optional[List[str]] = None,
+    raw_kwargs: Optional[Dict[str, Any]] = None,
     preprocessing_func: Optional[Callable] = None,
     max_train_samples: Optional[int] = None,
     remove_columns: Optional[List[str]] = None,
@@ -320,6 +320,16 @@ def oneshot(
         during forward pass in calibration. When False, quantization is disabled
         during forward pass in calibration. Default is set to True.
 
+    :param pipeline: The pipeline configuration to use for calibration. Options include
+        'independent', 'sequential', or 'layer_sequential'.
+    :param tracing_ignore: List of module names to ignore during tracing.
+    :param raw_kwargs: Dictionary of raw keyword arguments passed to the function.
+    :param preprocessing_func: Optional callable for preprocessing the dataset.
+    :param max_train_samples: Maximum number of training samples to use.
+    :param remove_columns: List of column names to remove from the dataset.
+    :param dvc_data_repository: Path to the DVC data repository, if applicable.
+    :param sequential_targets: List of sequential targets for calibration.
+
     # Miscellaneous arguments
     :param output_dir: Path to save the output model after calibration.
         Nothing is saved if None.
@@ -333,11 +343,17 @@ def oneshot(
         raise ValueError(
             "Invalid configuration: "
             "sequential_targets' cannot be used with 'independent' pipeline. "
-            "Please use 'sequential' or 'layer_sequential' pipeline when specifying"
+            "Please use 'sequential' or 'layer_sequential' pipeline when specifying "
             "sequential_targets."
         )
 
     # pass all args directly into Oneshot
+    if raw_kwargs is None:
+        raw_kwargs = {}
+
+    local_args = {
+        k: v for k, v in locals().items() if k not in ("local_args", "kwargs")
+    }
     local_args = {
         k: v for k, v in locals().items() if k not in ("local_args", "kwargs")
     }
diff --git a/tests/llmcompressor/transformers/oneshot/test_api_inputs.py b/tests/llmcompressor/transformers/oneshot/test_api_inputs.py
@@ -1,5 +1,10 @@
 import pytest
 from transformers import AutoModelForCausalLM, AutoTokenizer
+import os
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 
 from llmcompressor import oneshot
 from tests.llmcompressor.transformers.oneshot.dataset_processing import get_data_utils
@@ -46,7 +51,7 @@ def wrapped_preprocess_func(sample):
     args["sequential_targets"] = config.get("sequential_targets", None)
     args["tracing_ignore"] = config.get("tracing_ignore", [])
     args["raw_kwargs"] = config.get("raw_kwargs", {})
-    args["preprocessing_func"] = (config.get("preprocessing_func", lambda x: x),)
+    args["preprocessing_func"] = config.get("preprocessing_func", lambda x: x)
     args["max_train_samples"] = config.get("max_train_samples", 50)
     args["remove_columns"] = config.get("remove_columns", None)
     args["dvc_data_repository"] = config.get("dvc_data_repository", None)
@@ -59,10 +64,10 @@ def wrapped_preprocess_func(sample):
 @pytest.mark.smoke
 @pytest.mark.integration
 def test_one_shot_inputs(one_shot_args, tmp_path):
-    print(f"Dataset type: {type(one_shot_args.get('dataset'))}")
+    logger.info(f"Dataset type: {type(one_shot_args.get('dataset'))}")
     if isinstance(one_shot_args.get("dataset"), str):
-        print(f"Dataset name: {one_shot_args.get('dataset')}")
-        print(f"Dataset config: {one_shot_args.get('dataset_config_name')}")
+        logger.info(f"Dataset name: {one_shot_args.get('dataset')}")
+        logger.info(f"Dataset config: {one_shot_args.get('dataset_config_name')}")
     try:
         # Call oneshot with all parameters as flat arguments
         oneshot(
@@ -76,18 +81,8 @@ def test_one_shot_inputs(one_shot_args, tmp_path):
         if "num_samples should be a positive integer value" in str(
             e
         ) or "Dataset is empty. Cannot create a calibration dataloader" in str(e):
-            print(f"Dataset is empty: {one_shot_args.get('dataset')}")
+            logger.warning(f"Dataset is empty: {one_shot_args.get('dataset')}")
             pytest.skip(f"Dataset is empty: {one_shot_args.get('dataset')}")
         else:
             raise  # Re-raise other ValueError exceptions
-    finally:
-        # Clean up temporary files to avoid the "megabytes of temp files" error
-        import os
-
-        # Clean up the output directory
-        if os.path.exists(tmp_path):
-            print(f"Cleaning up temp directory: {tmp_path}")
-            # Remove files but keep the directory structure
-            for root, dirs, files in os.walk(tmp_path):
-                for file in files:
-                    os.remove(os.path.join(root, file))
+