chore: added CI model layer caching

jcabrero · jcabrero · commit c09d912dfb98 · 2025-11-14T10:53:27.000+01:00
diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
@@ -110,14 +110,46 @@ jobs:
         include:
           - component: api
             build_args: "--target nilai --platform linux/amd64"
+          - component: vllm
+            model_to_cache: "openai/gpt-oss-20b"
     steps:
       - name: Checkout
         uses: actions/checkout@v2
 
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
       - name: Build ${{ matrix.component }} image
         run: |
           echo "Building ${{ matrix.component }} image..."
-          docker build -t nillion/nilai-${{ matrix.component }}:latest -f docker/${{ matrix.component }}.Dockerfile ${{ matrix.build_args || '' }} .
+
+          # Set cache and build args based on component
+          CACHE_FROM="type=registry,ref=ghcr.io/${{ github.repository }}/nilai-${{ matrix.component }}:buildcache"
+          CACHE_TO="type=registry,ref=ghcr.io/${{ github.repository }}/nilai-${{ matrix.component }}:buildcache,mode=max"
+
+          # Add model caching for vllm component
+          EXTRA_BUILD_ARGS=""
+          if [ "${{ matrix.component }}" = "vllm" ] && [ -n "${{ matrix.model_to_cache || '' }}" ]; then
+            EXTRA_BUILD_ARGS="--build-arg MODEL_TO_CACHE=${{ matrix.model_to_cache }} --build-arg HF_TOKEN=${{ secrets.HF_TOKEN }}"
+          fi
+
+          docker buildx build \
+            -t nillion/nilai-${{ matrix.component }}:latest \
+            -f docker/${{ matrix.component }}.Dockerfile \
+            --cache-from=${CACHE_FROM} \
+            --cache-to=${CACHE_TO} \
+            --load \
+            ${{ matrix.build_args || '' }} \
+            ${EXTRA_BUILD_ARGS} \
+            .
+
           echo "✅ ${{ matrix.component }} build completed successfully"
 
   e2e-tests:
diff --git a/docker/vllm.Dockerfile b/docker/vllm.Dockerfile
@@ -1,13 +1,8 @@
 FROM vllm/vllm-openai:v0.10.1
 
-# # Specify model name and path during build
-# ARG MODEL_NAME=llama_1b_cpu
-# ARG MODEL_PATH=meta-llama/Llama-3.1-8B-Instruct
-
-# # Set environment variables
-# ENV MODEL_NAME=${MODEL_NAME}
-# ENV MODEL_PATH=${MODEL_PATH}
-# ENV EXEC_PATH=nilai_models.models.${MODEL_NAME}:app
+# Specify model to pre-download during build (optional, for caching)
+ARG MODEL_TO_CACHE=""
+ARG HF_TOKEN=""
 
 COPY --link . /daemon/
 COPY --link vllm_templates /opt/vllm/templates
@@ -22,6 +17,16 @@ RUN apt-get update && \
     apt-get autoremove && \
     rm -rf /var/lib/apt/lists/*
 
+# Pre-download model if MODEL_TO_CACHE is provided
+# This creates a cached layer with the model to avoid re-downloading in CI
+RUN if [ -n "$MODEL_TO_CACHE" ]; then \
+        echo "Pre-downloading model: $MODEL_TO_CACHE"; \
+        export HF_TOKEN="${HF_TOKEN}"; \
+        python3 -c "from huggingface_hub import snapshot_download; snapshot_download('$MODEL_TO_CACHE', cache_dir='/root/.cache/huggingface')"; \
+    else \
+        echo "No model specified for caching, will download at runtime"; \
+    fi
+
 # Expose port 8000 for incoming requests
 EXPOSE 8000