Skip to content

Commit c09d912

Browse files
committed
chore: added CI model layer caching
1 parent 028dc61 commit c09d912

File tree

2 files changed

+46
-9
lines changed

2 files changed

+46
-9
lines changed

.github/workflows/cicd.yml

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,46 @@ jobs:
110110
include:
111111
- component: api
112112
build_args: "--target nilai --platform linux/amd64"
113+
- component: vllm
114+
model_to_cache: "openai/gpt-oss-20b"
113115
steps:
114116
- name: Checkout
115117
uses: actions/checkout@v2
116118

119+
- name: Set up Docker Buildx
120+
uses: docker/setup-buildx-action@v3
121+
122+
- name: Login to GitHub Container Registry
123+
uses: docker/login-action@v3
124+
with:
125+
registry: ghcr.io
126+
username: ${{ github.actor }}
127+
password: ${{ secrets.GITHUB_TOKEN }}
128+
117129
- name: Build ${{ matrix.component }} image
118130
run: |
119131
echo "Building ${{ matrix.component }} image..."
120-
docker build -t nillion/nilai-${{ matrix.component }}:latest -f docker/${{ matrix.component }}.Dockerfile ${{ matrix.build_args || '' }} .
132+
133+
# Set cache and build args based on component
134+
CACHE_FROM="type=registry,ref=ghcr.io/${{ github.repository }}/nilai-${{ matrix.component }}:buildcache"
135+
CACHE_TO="type=registry,ref=ghcr.io/${{ github.repository }}/nilai-${{ matrix.component }}:buildcache,mode=max"
136+
137+
# Add model caching for vllm component
138+
EXTRA_BUILD_ARGS=""
139+
if [ "${{ matrix.component }}" = "vllm" ] && [ -n "${{ matrix.model_to_cache || '' }}" ]; then
140+
EXTRA_BUILD_ARGS="--build-arg MODEL_TO_CACHE=${{ matrix.model_to_cache }} --build-arg HF_TOKEN=${{ secrets.HF_TOKEN }}"
141+
fi
142+
143+
docker buildx build \
144+
-t nillion/nilai-${{ matrix.component }}:latest \
145+
-f docker/${{ matrix.component }}.Dockerfile \
146+
--cache-from=${CACHE_FROM} \
147+
--cache-to=${CACHE_TO} \
148+
--load \
149+
${{ matrix.build_args || '' }} \
150+
${EXTRA_BUILD_ARGS} \
151+
.
152+
121153
echo "✅ ${{ matrix.component }} build completed successfully"
122154
123155
e2e-tests:

docker/vllm.Dockerfile

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,8 @@
11
FROM vllm/vllm-openai:v0.10.1
22

3-
# # Specify model name and path during build
4-
# ARG MODEL_NAME=llama_1b_cpu
5-
# ARG MODEL_PATH=meta-llama/Llama-3.1-8B-Instruct
6-
7-
# # Set environment variables
8-
# ENV MODEL_NAME=${MODEL_NAME}
9-
# ENV MODEL_PATH=${MODEL_PATH}
10-
# ENV EXEC_PATH=nilai_models.models.${MODEL_NAME}:app
3+
# Specify model to pre-download during build (optional, for caching)
4+
ARG MODEL_TO_CACHE=""
5+
ARG HF_TOKEN=""
116

127
COPY --link . /daemon/
138
COPY --link vllm_templates /opt/vllm/templates
@@ -22,6 +17,16 @@ RUN apt-get update && \
2217
apt-get autoremove && \
2318
rm -rf /var/lib/apt/lists/*
2419

20+
# Pre-download model if MODEL_TO_CACHE is provided
21+
# This creates a cached layer with the model to avoid re-downloading in CI
22+
RUN if [ -n "$MODEL_TO_CACHE" ]; then \
23+
echo "Pre-downloading model: $MODEL_TO_CACHE"; \
24+
export HF_TOKEN="${HF_TOKEN}"; \
25+
python3 -c "from huggingface_hub import snapshot_download; snapshot_download('$MODEL_TO_CACHE', cache_dir='/root/.cache/huggingface')"; \
26+
else \
27+
echo "No model specified for caching, will download at runtime"; \
28+
fi
29+
2530
# Expose port 8000 for incoming requests
2631
EXPOSE 8000
2732

0 commit comments

Comments
 (0)