[Cache] Fix environment variable handling for offline mode

ralphbean · claude · ralphbean · commit 2dea601c3ea1 · 2025-10-07T11:10:56.000-04:00
Previously, llm-compressor ignored HF_HUB_CACHE and other environment
variables when loading models and datasets, making offline mode difficult
to use with unified cache directories.

This change:
- Removes hard-coded TRANSFORMERS_CACHE in model_load/helpers.py to
  respect HF_HOME, HF_HUB_CACHE environment variables
- Propagates cache_dir from model_args to dataset_args to enable
  unified cache directory for both models and datasets
- Updates dataset loading to use cache_dir parameter instead of
  hardcoded None

Now users can specify cache_dir parameter or use HF_HOME/HF_HUB_CACHE
environment variables for true offline operation.

Signed-off-by: Ralph Bean &lt;rbean@redhat.com&gt;
Co-Authored-By: Claude &lt;noreply@anthropic.com&gt;
diff --git a/src/llmcompressor/args/dataset_arguments.py b/src/llmcompressor/args/dataset_arguments.py
@@ -150,6 +150,14 @@ class DatasetArguments(CustomDatasetArguments):
         default=False,
         metadata={"help": "Overwrite the cached preprocessed datasets or not."},
     )
+    cache_dir: Optional[str] = field(
+        init=False,
+        default=None,
+        metadata={
+            "help": "Where to store the pretrained datasets from huggingface.co. "
+            "This field is set from model_args.cache_dir to enable unified caching."
+        },
+    )
     preprocessing_num_workers: Optional[int] = field(
         default=None,
         metadata={"help": "The number of processes to use for the preprocessing."},
diff --git a/src/llmcompressor/args/utils.py b/src/llmcompressor/args/utils.py
@@ -79,4 +79,9 @@ def parse_args(
     # silently assign tokenizer to processor
     resolve_processor_from_model_args(model_args)
 
+    # copy cache_dir from model_args to dataset_args to support offline mode
+    # with a single unified cache directory. This allows both models and datasets
+    # to use the same cache when cache_dir is specified
+    dataset_args.cache_dir = model_args.cache_dir
+
     return model_args, dataset_args, recipe_args, training_args, output_dir
diff --git a/src/llmcompressor/pytorch/model_load/helpers.py b/src/llmcompressor/pytorch/model_load/helpers.py
@@ -149,16 +149,17 @@ def copy_python_files_from_model_cache(model, save_path: str):
         import shutil
 
         from huggingface_hub import hf_hub_download
-        from transformers import TRANSFORMERS_CACHE
         from transformers.utils import http_user_agent
 
         cache_path = config._name_or_path
         if not os.path.exists(cache_path):
             user_agent = http_user_agent()
+            # Use cache_dir=None to respect HF_HOME, HF_HUB_CACHE, and other
+            # environment variables for cache location
             config_file_path = hf_hub_download(
                 repo_id=cache_path,
                 filename="config.json",
-                cache_dir=TRANSFORMERS_CACHE,
+                cache_dir=None,
                 force_download=False,
                 user_agent=user_agent,
             )
diff --git a/src/llmcompressor/transformers/finetune/data/base.py b/src/llmcompressor/transformers/finetune/data/base.py
@@ -195,7 +195,7 @@ def load_dataset(self):
         logger.debug(f"Loading dataset {self.dataset_args.dataset}")
         return get_raw_dataset(
             self.dataset_args,
-            None,
+            cache_dir=self.dataset_args.cache_dir,
             split=self.split,
             streaming=self.dataset_args.streaming,
             **self.dataset_args.raw_kwargs,