Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 25 additions & 24 deletions src/llmcompressor/entrypoints/oneshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import os
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING, Dict, List, Optional, Union
from typing import TYPE_CHECKING

from loguru import logger
from torch.utils.data import DataLoader
Expand All @@ -35,7 +35,7 @@ class Oneshot:
This class handles the entire lifecycle of one-shot calibration, including
preprocessing (model and tokenizer/processor initialization), model optimization
(quantization or sparsification), and postprocessing (saving outputs). The
intructions for model optimization can be specified by using a recipe.
instructions for model optimization can be specified by using a recipe.

- **Input Keyword Arguments:**
`kwargs` are parsed into:
Expand Down Expand Up @@ -98,7 +98,7 @@ class Oneshot:

def __init__(
self,
log_dir: Optional[str] = None,
log_dir: str | None = None,
**kwargs,
):
"""
Expand Down Expand Up @@ -178,8 +178,8 @@ def __call__(self):

def apply_recipe_modifiers(
self,
calibration_dataloader: Optional[DataLoader],
recipe_stage: Optional[str] = None,
calibration_dataloader: DataLoader | None,
recipe_stage: str | None = None,
):
"""
Applies recipe modifiers to the model during the lifecycle.
Expand All @@ -197,7 +197,7 @@ def apply_recipe_modifiers(
session = active_session()
session.reset()

# (Helen INFERENG-661): validate recipe modifiers before intialization
# (Helen INFERENG-661): validate recipe modifiers before initialization
session.initialize(
model=self.model,
start=-1,
Expand All @@ -220,28 +220,28 @@ def apply_recipe_modifiers(

def oneshot(
# Model arguments
model: Union[str, PreTrainedModel],
distill_teacher: Optional[str] = None,
config_name: Optional[str] = None,
tokenizer: Optional[Union[str, PreTrainedTokenizerBase]] = None,
processor: Optional[Union[str, ProcessorMixin]] = None,
cache_dir: Optional[str] = None,
model: str | PreTrainedModel,
distill_teacher: str | None = None,
config_name: str | None = None,
tokenizer: str | PreTrainedTokenizerBase | None = None,
processor: str | ProcessorMixin | None = None,
cache_dir: str | None = None,
use_auth_token: bool = False,
precision: str = "auto",
tie_word_embeddings: bool = False,
trust_remote_code_model: bool = False,
save_compressed: bool = True,
model_revision: str = "main",
# Recipe arguments
recipe: Optional[Union[str, List[str]]] = None,
recipe_args: Optional[List[str]] = None,
recipe: str | list[str] | None = None,
recipe_args: list[str] | None = None,
clear_sparse_session: bool = False,
stage: Optional[str] = None,
stage: str | None = None,
# Dataset arguments
dataset: Optional[Union[str, "Dataset", "DatasetDict"]] = None,
dataset_config_name: Optional[str] = None,
dataset_path: Optional[str] = None,
splits: Optional[Union[str, List, Dict]] = None,
dataset: "str | Dataset | DatasetDict | None" = None,
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can’t use quoted names with the | operator at runtime. So I made the whole annotation a single string. Type checkers should be able to parse and understand it.
I considered some alternatives which were:

  • Import Dataset and DatasetDict directly: Adds to import load.
  • Keep the old Union/Optional style: It still needs to import Union.
  • Add from __future__ import annotations at the top: But it makes all types in this file deferred as strings.

I want to keep it localized and consistent with the previous code. I am happy to consider any recommendations.

dataset_config_name: str | None = None,
dataset_path: str | None = None,
splits: str | list[str] | dict[str, str] | None = None,
num_calibration_samples: int = 512,
shuffle_calibration_samples: bool = True,
max_seq_length: int = 384,
Expand All @@ -250,13 +250,13 @@ def oneshot(
concatenate_data: bool = False,
streaming: bool = False,
overwrite_cache: bool = False,
preprocessing_num_workers: Optional[int] = None,
min_tokens_per_module: Optional[float] = None,
preprocessing_num_workers: int | None = None,
min_tokens_per_module: float | None = None,
calibrate_moe_context: bool = False,
quantization_aware_calibration: bool = True,
# Miscellaneous arguments
output_dir: Optional[str] = None,
log_dir: Optional[str] = None,
output_dir: str | None = None,
log_dir: str | None = None,
**kwargs,
) -> PreTrainedModel:
"""
Expand Down Expand Up @@ -287,7 +287,8 @@ def oneshot(
tag, or commit id).

# Recipe arguments
:param recipe: Path to a LLM Compressor sparsification recipe.
:param recipe: Path to a LLM Compressor recipe, or a list of paths
to multiple LLM Compressor recipes.
:param recipe_args: List of recipe arguments to evaluate, in the
format "key1=value1", "key2=value2".
:param clear_sparse_session: Whether to clear CompressionSession/
Expand Down
17 changes: 8 additions & 9 deletions src/llmcompressor/entrypoints/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import inspect
import os
from pathlib import PosixPath
from typing import Optional, Tuple

from compressed_tensors.utils import remove_dispatch
from loguru import logger
Expand Down Expand Up @@ -47,7 +46,7 @@
def pre_process(
model_args: ModelArguments,
dataset_args: DatasetArguments,
output_dir: Optional[str],
output_dir: str | None,
):
"""
Prepares the model and tokenizer/processor for calibration.
Expand Down Expand Up @@ -103,9 +102,9 @@ def pre_process(


def post_process(
model_args: Optional["ModelArguments"] = None,
recipe_args: Optional["RecipeArguments"] = None,
output_dir: Optional[str] = None,
model_args: ModelArguments | None = None,
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made it consistent with above pre_process method above

recipe_args: RecipeArguments | None = None,
output_dir: str | None = None,
):
"""
Saves the model and tokenizer/processor to the output directory if model_args,
Expand Down Expand Up @@ -167,8 +166,8 @@ def _warn_tied_embeddings(tie_word_embeddings: bool = False):

def initialize_model_from_path(
model_args: ModelArguments,
training_args: Optional[TrainingArguments] = None,
) -> Tuple[PreTrainedModel, Optional[PreTrainedModel]]:
training_args: TrainingArguments | None = None,
) -> tuple[PreTrainedModel, PreTrainedModel | None]:
# Load pretrained model
# The .from_pretrained methods guarantee that only one local process can
# concurrently download model & vocab.
Expand Down Expand Up @@ -256,7 +255,7 @@ def initialize_model_from_path(
def initialize_processor_from_path(
model_args: ModelArguments,
model: PreTrainedModel,
teacher: Optional[PreTrainedModel] = None,
teacher: PreTrainedModel | None = None,
) -> Processor:
processor_src = model_args.processor or get_processor_name_from_model(
model, teacher
Expand Down Expand Up @@ -295,7 +294,7 @@ def initialize_processor_from_path(
return processor


def get_processor_name_from_model(student: Module, teacher: Optional[Module]) -> str:
def get_processor_name_from_model(student: Module, teacher: Module | None) -> str:
"""
Get a processor/tokenizer source used for both student and teacher, assuming
that they could be shared
Expand Down