Remove cache_dir parameter in favor of environment variables

ralphbean · claude · ralphbean · commit 7eb882a1d00c · 2025-10-23T16:07:34.000-04:00
Following feedback on PR vllm-project#1902, this removes the cache_dir parameter entirely from ModelArguments, DatasetArguments, and the oneshot() API. By removing explicit cache_dir parameters and setting all calls to cache_dir=None, the HuggingFace libraries will automatically respect the standard environment variable hierarchy (HF_HOME, HF_HUB_CACHE) for determining cache locations. This approach: - Simplifies the codebase by removing parameter propagation - Follows standard HuggingFace patterns - Prevents cache_dir from being accidentally ignored - Still fully supports offline mode via environment variables Breaking change: Users who previously used the cache_dir parameter should now use HF_HOME or HF_HUB_CACHE environment variables instead. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: Ralph Bean <rbean@redhat.com>
diff --git a/src/llmcompressor/args/model_arguments.py b/src/llmcompressor/args/model_arguments.py
@@ -50,10 +50,6 @@ class ModelArguments:
             "help": "Pretrained processor name or path if not the same as model_name"
         },
     )
-    cache_dir: str | None = field(
-        default=None,
-        metadata={"help": "Where to store the pretrained data from huggingface.co"},
-    )
 
     use_auth_token: bool = field(
         default=False,
diff --git a/src/llmcompressor/args/utils.py b/src/llmcompressor/args/utils.py
@@ -83,9 +83,4 @@ def parse_args(
     # silently assign tokenizer to processor
     resolve_processor_from_model_args(model_args)
 
-    # copy cache_dir from model_args to dataset_args to support offline mode
-    # with a single unified cache directory. This allows both models and datasets
-    # to use the same cache when cache_dir is specified
-    dataset_args.cache_dir = model_args.cache_dir
-
     return model_args, dataset_args, recipe_args, training_args, output_dir
diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py
@@ -225,7 +225,6 @@ def oneshot(
     config_name: Optional[str] = None,
     tokenizer: Optional[Union[str, PreTrainedTokenizerBase]] = None,
     processor: Optional[Union[str, ProcessorMixin]] = None,
-    cache_dir: Optional[str] = None,
     use_auth_token: bool = False,
     precision: str = "auto",
     tie_word_embeddings: bool = False,
@@ -273,8 +272,6 @@ def oneshot(
         model_name.
     :param processor: Pretrained processor name or path if not the same as
         model_name.
-    :param cache_dir: Where to store the pretrained data from
-        huggingface.co.
     :param use_auth_token: Whether to use Hugging Face auth token for private
         models.
     :param precision: Precision to cast model weights to, default to auto.
diff --git a/src/llmcompressor/entrypoints/utils.py b/src/llmcompressor/entrypoints/utils.py
@@ -175,7 +175,7 @@ def initialize_model_from_path(
     model_path = model_args.model
     config = AutoConfig.from_pretrained(
         model_args.config_name if model_args.config_name else model_path,
-        cache_dir=model_args.cache_dir,
+        cache_dir=None,
         revision=model_args.model_revision,
         use_auth_token=True if model_args.use_auth_token else None,
         trust_remote_code=model_args.trust_remote_code_model,
@@ -211,7 +211,7 @@ def initialize_model_from_path(
             )
             teacher_kwargs = {
                 "config": teacher_config,
-                "cache_dir": model_args.cache_dir,
+                "cache_dir": None,
                 "use_auth_token": True if model_args.use_auth_token else None,
                 "torch_dtype": parse_dtype(model_args.precision),
                 "device_map": teacher_device_map,
@@ -233,7 +233,7 @@ def initialize_model_from_path(
 
     model_kwargs = {
         "config": config,
-        "cache_dir": model_args.cache_dir,
+        "cache_dir": None,
         "revision": model_args.model_revision,
         "use_auth_token": True if model_args.use_auth_token else None,
         "torch_dtype": parse_dtype(model_args.precision),
@@ -266,7 +266,7 @@ def initialize_processor_from_path(
     try:
         processor = AutoProcessor.from_pretrained(
             processor_src,
-            cache_dir=model_args.cache_dir,
+            cache_dir=None,
             use_fast=True,
             revision=model_args.model_revision,
             use_auth_token=True if model_args.use_auth_token else None,
@@ -285,7 +285,7 @@ def initialize_processor_from_path(
         logger.debug("Could not load fast processor, loading slow processor instead")
         processor = AutoProcessor.from_pretrained(
             processor_src,
-            cache_dir=model_args.cache_dir,
+            cache_dir=None,
             use_fast=False,
             revision=model_args.model_revision,
             use_auth_token=True if model_args.use_auth_token else None,
diff --git a/src/llmcompressor/transformers/finetune/data/base.py b/src/llmcompressor/transformers/finetune/data/base.py
@@ -195,7 +195,7 @@ def load_dataset(self):
         logger.debug(f"Loading dataset {self.dataset_args.dataset}")
         return get_raw_dataset(
             self.dataset_args,
-            cache_dir=self.dataset_args.cache_dir,
+            cache_dir=None,
             split=self.split,
             streaming=self.dataset_args.streaming,
             **self.dataset_args.raw_kwargs,