diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py index ba3f0e1bb..e0211f0f1 100644 --- a/src/llmcompressor/entrypoints/oneshot.py +++ b/src/llmcompressor/entrypoints/oneshot.py @@ -7,10 +7,12 @@ with various pipeline configurations for efficient model optimization. """ +from __future__ import annotations + import os from datetime import datetime from pathlib import Path -from typing import TYPE_CHECKING, Dict, List, Optional, Union +from typing import TYPE_CHECKING from loguru import logger from torch.utils.data import DataLoader @@ -36,7 +38,7 @@ class Oneshot: This class handles the entire lifecycle of one-shot calibration, including preprocessing (model and tokenizer/processor initialization), model optimization (quantization or sparsification), and postprocessing (saving outputs). The - intructions for model optimization can be specified by using a recipe. + instructions for model optimization can be specified by using a recipe. - **Input Keyword Arguments:** `kwargs` are parsed into: @@ -99,7 +101,7 @@ class Oneshot: def __init__( self, - log_dir: Optional[str] = None, + log_dir: str | None = None, **kwargs, ): """ @@ -179,8 +181,8 @@ def __call__(self): def apply_recipe_modifiers( self, - calibration_dataloader: Optional[DataLoader], - recipe_stage: Optional[str] = None, + calibration_dataloader: DataLoader | None, + recipe_stage: str | None = None, ): """ Applies recipe modifiers to the model during the lifecycle. @@ -198,7 +200,7 @@ def apply_recipe_modifiers( session = active_session() session.reset() - # (Helen INFERENG-661): validate recipe modifiers before intialization + # (Helen INFERENG-661): validate recipe modifiers before initialization session.initialize( model=self.model, start=-1, @@ -226,12 +228,12 @@ def apply_recipe_modifiers( def oneshot( # Model arguments - model: Union[str, PreTrainedModel], - distill_teacher: Optional[str] = None, - config_name: Optional[str] = None, - tokenizer: Optional[Union[str, PreTrainedTokenizerBase]] = None, - processor: Optional[Union[str, ProcessorMixin]] = None, - cache_dir: Optional[str] = None, + model: str | PreTrainedModel, + distill_teacher: str | None = None, + config_name: str | None = None, + tokenizer: str | PreTrainedTokenizerBase | None = None, + processor: str | ProcessorMixin | None = None, + cache_dir: str | None = None, use_auth_token: bool = False, precision: str = "auto", tie_word_embeddings: bool = False, @@ -239,15 +241,15 @@ def oneshot( save_compressed: bool = True, model_revision: str = "main", # Recipe arguments - recipe: Optional[Union[str, List[str]]] = None, - recipe_args: Optional[List[str]] = None, + recipe: str | list[str] | None = None, + recipe_args: list[str] | None = None, clear_sparse_session: bool = False, - stage: Optional[str] = None, + stage: str | None = None, # Dataset arguments - dataset: Optional[Union[str, "Dataset", "DatasetDict"]] = None, - dataset_config_name: Optional[str] = None, - dataset_path: Optional[str] = None, - splits: Optional[Union[str, List, Dict]] = None, + dataset: str | Dataset | DatasetDict | None = None, + dataset_config_name: str | None = None, + dataset_path: str | None = None, + splits: str | list[str] | dict[str, str] | None = None, num_calibration_samples: int = 512, shuffle_calibration_samples: bool = True, max_seq_length: int = 384, @@ -256,13 +258,13 @@ def oneshot( concatenate_data: bool = False, streaming: bool = False, overwrite_cache: bool = False, - preprocessing_num_workers: Optional[int] = None, - min_tokens_per_module: Optional[float] = None, + preprocessing_num_workers: int | None = None, + min_tokens_per_module: float | None = None, moe_calibrate_all_experts: bool = True, quantization_aware_calibration: bool = True, # Miscellaneous arguments - output_dir: Optional[str] = None, - log_dir: Optional[str] = None, + output_dir: str | None = None, + log_dir: str | None = None, **kwargs, ) -> PreTrainedModel: """ @@ -293,7 +295,8 @@ def oneshot( tag, or commit id). # Recipe arguments - :param recipe: Path to a LLM Compressor sparsification recipe. + :param recipe: Path to a LLM Compressor recipe, or a list of paths + to multiple LLM Compressor recipes. :param recipe_args: List of recipe arguments to evaluate, in the format "key1=value1", "key2=value2". :param clear_sparse_session: Whether to clear CompressionSession/ diff --git a/src/llmcompressor/entrypoints/utils.py b/src/llmcompressor/entrypoints/utils.py index 32c02ba15..97c46644f 100644 --- a/src/llmcompressor/entrypoints/utils.py +++ b/src/llmcompressor/entrypoints/utils.py @@ -10,7 +10,6 @@ import inspect import os from pathlib import PosixPath -from typing import Optional, Tuple from compressed_tensors.utils import remove_dispatch from loguru import logger @@ -47,7 +46,7 @@ def pre_process( model_args: ModelArguments, dataset_args: DatasetArguments, - output_dir: Optional[str], + output_dir: str | None, ): """ Prepares the model and tokenizer/processor for calibration. @@ -103,9 +102,9 @@ def pre_process( def post_process( - model_args: Optional["ModelArguments"] = None, - recipe_args: Optional["RecipeArguments"] = None, - output_dir: Optional[str] = None, + model_args: ModelArguments | None = None, + recipe_args: RecipeArguments | None = None, + output_dir: str | None = None, ): """ Saves the model and tokenizer/processor to the output directory if model_args, @@ -167,8 +166,8 @@ def _warn_tied_embeddings(tie_word_embeddings: bool = False): def initialize_model_from_path( model_args: ModelArguments, - training_args: Optional[TrainingArguments] = None, -) -> Tuple[PreTrainedModel, Optional[PreTrainedModel]]: + training_args: TrainingArguments | None = None, +) -> tuple[PreTrainedModel, PreTrainedModel | None]: # Load pretrained model # The .from_pretrained methods guarantee that only one local process can # concurrently download model & vocab. @@ -256,7 +255,7 @@ def initialize_model_from_path( def initialize_processor_from_path( model_args: ModelArguments, model: PreTrainedModel, - teacher: Optional[PreTrainedModel] = None, + teacher: PreTrainedModel | None = None, ) -> Processor: processor_src = model_args.processor or get_processor_name_from_model( model, teacher @@ -295,7 +294,7 @@ def initialize_processor_from_path( return processor -def get_processor_name_from_model(student: Module, teacher: Optional[Module]) -> str: +def get_processor_name_from_model(student: Module, teacher: Module | None) -> str: """ Get a processor/tokenizer source used for both student and teacher, assuming that they could be shared