From bfe47dc65b37a0ef8e19eb95fa2c5e03c0550c24 Mon Sep 17 00:00:00 2001 From: Sugat Mahanti Date: Sat, 25 Oct 2025 08:40:44 -0400 Subject: [PATCH 1/4] Modernize entrypoint module with type hints and use generic types Part of #1927 --- src/llmcompressor/entrypoints/oneshot.py | 46 ++++++++++++------------ src/llmcompressor/entrypoints/utils.py | 17 +++++---- 2 files changed, 31 insertions(+), 32 deletions(-) diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py index b6e1e2b63..d06dc109c 100644 --- a/src/llmcompressor/entrypoints/oneshot.py +++ b/src/llmcompressor/entrypoints/oneshot.py @@ -10,7 +10,7 @@ import os from datetime import datetime from pathlib import Path -from typing import TYPE_CHECKING, Dict, List, Optional, Union +from typing import TYPE_CHECKING from loguru import logger from torch.utils.data import DataLoader @@ -35,7 +35,7 @@ class Oneshot: This class handles the entire lifecycle of one-shot calibration, including preprocessing (model and tokenizer/processor initialization), model optimization (quantization or sparsification), and postprocessing (saving outputs). The - intructions for model optimization can be specified by using a recipe. + instructions for model optimization can be specified by using a recipe. - **Input Keyword Arguments:** `kwargs` are parsed into: @@ -98,7 +98,7 @@ class Oneshot: def __init__( self, - log_dir: Optional[str] = None, + log_dir: str | None = None, **kwargs, ): """ @@ -178,8 +178,8 @@ def __call__(self): def apply_recipe_modifiers( self, - calibration_dataloader: Optional[DataLoader], - recipe_stage: Optional[str] = None, + calibration_dataloader: DataLoader | None, + recipe_stage: str | None = None, ): """ Applies recipe modifiers to the model during the lifecycle. @@ -197,7 +197,7 @@ def apply_recipe_modifiers( session = active_session() session.reset() - # (Helen INFERENG-661): validate recipe modifiers before intialization + # (Helen INFERENG-661): validate recipe modifiers before initialization session.initialize( model=self.model, start=-1, @@ -220,12 +220,12 @@ def apply_recipe_modifiers( def oneshot( # Model arguments - model: Union[str, PreTrainedModel], - distill_teacher: Optional[str] = None, - config_name: Optional[str] = None, - tokenizer: Optional[Union[str, PreTrainedTokenizerBase]] = None, - processor: Optional[Union[str, ProcessorMixin]] = None, - cache_dir: Optional[str] = None, + model: str | PreTrainedModel, + distill_teacher: str | None = None, + config_name: str | None = None, + tokenizer: str | PreTrainedTokenizerBase | None = None, + processor: str | ProcessorMixin | None = None, + cache_dir: str | None = None, use_auth_token: bool = False, precision: str = "auto", tie_word_embeddings: bool = False, @@ -233,15 +233,15 @@ def oneshot( save_compressed: bool = True, model_revision: str = "main", # Recipe arguments - recipe: Optional[Union[str, List[str]]] = None, - recipe_args: Optional[List[str]] = None, + recipe: str | list[str] | None = None, + recipe_args: list[str] | None = None, clear_sparse_session: bool = False, - stage: Optional[str] = None, + stage: str | None = None, # Dataset arguments - dataset: Optional[Union[str, "Dataset", "DatasetDict"]] = None, - dataset_config_name: Optional[str] = None, - dataset_path: Optional[str] = None, - splits: Optional[Union[str, List, Dict]] = None, + dataset: str | "Dataset" | "DatasetDict" | None = None, + dataset_config_name: str | None = None, + dataset_path: str | None = None, + splits: str | list | dict | None = None, num_calibration_samples: int = 512, shuffle_calibration_samples: bool = True, max_seq_length: int = 384, @@ -250,13 +250,13 @@ def oneshot( concatenate_data: bool = False, streaming: bool = False, overwrite_cache: bool = False, - preprocessing_num_workers: Optional[int] = None, - min_tokens_per_module: Optional[float] = None, + preprocessing_num_workers: int | None = None, + min_tokens_per_module: float | None = None, calibrate_moe_context: bool = False, quantization_aware_calibration: bool = True, # Miscellaneous arguments - output_dir: Optional[str] = None, - log_dir: Optional[str] = None, + output_dir: str | None = None, + log_dir: str | None = None, **kwargs, ) -> PreTrainedModel: """ diff --git a/src/llmcompressor/entrypoints/utils.py b/src/llmcompressor/entrypoints/utils.py index 32c02ba15..3958933a8 100644 --- a/src/llmcompressor/entrypoints/utils.py +++ b/src/llmcompressor/entrypoints/utils.py @@ -10,7 +10,6 @@ import inspect import os from pathlib import PosixPath -from typing import Optional, Tuple from compressed_tensors.utils import remove_dispatch from loguru import logger @@ -47,7 +46,7 @@ def pre_process( model_args: ModelArguments, dataset_args: DatasetArguments, - output_dir: Optional[str], + output_dir: str | None, ): """ Prepares the model and tokenizer/processor for calibration. @@ -103,9 +102,9 @@ def pre_process( def post_process( - model_args: Optional["ModelArguments"] = None, - recipe_args: Optional["RecipeArguments"] = None, - output_dir: Optional[str] = None, + model_args: "ModelArguments" | None = None, + recipe_args: "RecipeArguments" | None = None, + output_dir: str | None = None, ): """ Saves the model and tokenizer/processor to the output directory if model_args, @@ -167,8 +166,8 @@ def _warn_tied_embeddings(tie_word_embeddings: bool = False): def initialize_model_from_path( model_args: ModelArguments, - training_args: Optional[TrainingArguments] = None, -) -> Tuple[PreTrainedModel, Optional[PreTrainedModel]]: + training_args: TrainingArguments | None = None, +) -> tuple[PreTrainedModel, PreTrainedModel | None]: # Load pretrained model # The .from_pretrained methods guarantee that only one local process can # concurrently download model & vocab. @@ -256,7 +255,7 @@ def initialize_model_from_path( def initialize_processor_from_path( model_args: ModelArguments, model: PreTrainedModel, - teacher: Optional[PreTrainedModel] = None, + teacher: PreTrainedModel | None = None, ) -> Processor: processor_src = model_args.processor or get_processor_name_from_model( model, teacher @@ -295,7 +294,7 @@ def initialize_processor_from_path( return processor -def get_processor_name_from_model(student: Module, teacher: Optional[Module]) -> str: +def get_processor_name_from_model(student: Module, teacher: Module | None) -> str: """ Get a processor/tokenizer source used for both student and teacher, assuming that they could be shared From ce130cf5f6b1ca2688e3c93c0feb521f9c8fb0b4 Mon Sep 17 00:00:00 2001 From: Sugat Mahanti Date: Sat, 25 Oct 2025 09:02:25 -0400 Subject: [PATCH 2/4] Fix docstring and type hints for recipe and splits parameters respectively --- src/llmcompressor/entrypoints/oneshot.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py index d06dc109c..d45eb9069 100644 --- a/src/llmcompressor/entrypoints/oneshot.py +++ b/src/llmcompressor/entrypoints/oneshot.py @@ -241,7 +241,7 @@ def oneshot( dataset: str | "Dataset" | "DatasetDict" | None = None, dataset_config_name: str | None = None, dataset_path: str | None = None, - splits: str | list | dict | None = None, + splits: str | list[str] | dict[str, str] | None = None, num_calibration_samples: int = 512, shuffle_calibration_samples: bool = True, max_seq_length: int = 384, @@ -287,7 +287,8 @@ def oneshot( tag, or commit id). # Recipe arguments - :param recipe: Path to a LLM Compressor sparsification recipe. + :param recipe: Path to a LLM Compressor sparsification recipe, or a list of paths + to multiple LLM Compressor sparsification recipes. :param recipe_args: List of recipe arguments to evaluate, in the format "key1=value1", "key2=value2". :param clear_sparse_session: Whether to clear CompressionSession/ From 1b972c22a4771be92f9426b4cc435f18da9cf6bf Mon Sep 17 00:00:00 2001 From: Sugat Mahanti Date: Mon, 27 Oct 2025 18:56:16 -0400 Subject: [PATCH 3/4] Addressing comments: Clean up docstring and fix type hints --- src/llmcompressor/entrypoints/oneshot.py | 6 +++--- src/llmcompressor/entrypoints/utils.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py index d45eb9069..cdbc40e5d 100644 --- a/src/llmcompressor/entrypoints/oneshot.py +++ b/src/llmcompressor/entrypoints/oneshot.py @@ -238,7 +238,7 @@ def oneshot( clear_sparse_session: bool = False, stage: str | None = None, # Dataset arguments - dataset: str | "Dataset" | "DatasetDict" | None = None, + dataset: "str | Dataset | DatasetDict | None" = None, dataset_config_name: str | None = None, dataset_path: str | None = None, splits: str | list[str] | dict[str, str] | None = None, @@ -287,8 +287,8 @@ def oneshot( tag, or commit id). # Recipe arguments - :param recipe: Path to a LLM Compressor sparsification recipe, or a list of paths - to multiple LLM Compressor sparsification recipes. + :param recipe: Path to a LLM Compressor recipe, or a list of paths + to multiple LLM Compressor recipes. :param recipe_args: List of recipe arguments to evaluate, in the format "key1=value1", "key2=value2". :param clear_sparse_session: Whether to clear CompressionSession/ diff --git a/src/llmcompressor/entrypoints/utils.py b/src/llmcompressor/entrypoints/utils.py index 3958933a8..97c46644f 100644 --- a/src/llmcompressor/entrypoints/utils.py +++ b/src/llmcompressor/entrypoints/utils.py @@ -102,8 +102,8 @@ def pre_process( def post_process( - model_args: "ModelArguments" | None = None, - recipe_args: "RecipeArguments" | None = None, + model_args: ModelArguments | None = None, + recipe_args: RecipeArguments | None = None, output_dir: str | None = None, ): """ From 78224d7dd2424033613ef0ea0937cb351d036f07 Mon Sep 17 00:00:00 2001 From: Sugat Mahanti Date: Tue, 28 Oct 2025 18:55:28 -0400 Subject: [PATCH 4/4] Defer annotation evaluation via __future__ annotations import --- src/llmcompressor/entrypoints/oneshot.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py index cdbc40e5d..907aa6a9e 100644 --- a/src/llmcompressor/entrypoints/oneshot.py +++ b/src/llmcompressor/entrypoints/oneshot.py @@ -7,6 +7,8 @@ with various pipeline configurations for efficient model optimization. """ +from __future__ import annotations + import os from datetime import datetime from pathlib import Path @@ -238,7 +240,7 @@ def oneshot( clear_sparse_session: bool = False, stage: str | None = None, # Dataset arguments - dataset: "str | Dataset | DatasetDict | None" = None, + dataset: str | Dataset | DatasetDict | None = None, dataset_config_name: str | None = None, dataset_path: str | None = None, splits: str | list[str] | dict[str, str] | None = None,