Skip to content

Commit 8786c3c

Browse files
authored
Merge branch 'main' into refactor/ingress-config
2 parents fa2c3e5 + 78d2240 commit 8786c3c

File tree

23 files changed

+208
-85
lines changed

23 files changed

+208
-85
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
name: Publish vLLM CPU images
2+
3+
on:
4+
# NOTE(sd109): Since this is checking out an external
5+
# it's probably safer to leave this as workflow dispatch
6+
# only so that we can manually build images from specific
7+
# refs rather than automatically pulling in the latest
8+
# content from the remote repo.
9+
workflow_dispatch:
10+
inputs:
11+
vllm_ref:
12+
type: string
13+
description: The vLLM GitHub ref (tag, branch or commit) to build.
14+
required: true
15+
16+
jobs:
17+
build_push_x86_image:
18+
name: Build and push image
19+
runs-on: ubuntu-latest
20+
permissions:
21+
contents: read
22+
id-token: write # needed for signing the images with GitHub OIDC Token
23+
packages: write # required for pushing container images
24+
security-events: write # required for pushing SARIF files
25+
steps:
26+
- name: Check out the vLLM repository
27+
uses: actions/checkout@v4
28+
with:
29+
repository: vllm-project/vllm
30+
ref: ${{ inputs.vllm_ref }}
31+
32+
- name: Login to GitHub Container Registry
33+
uses: docker/login-action@v3
34+
with:
35+
registry: ghcr.io
36+
username: ${{ github.actor }}
37+
password: ${{ secrets.GITHUB_TOKEN }}
38+
39+
- name: Build and push image
40+
run: |
41+
IMAGE=ghcr.io/stackhpc/vllm-cpu:${{ inputs.vllm_ref }}
42+
docker build -f Dockerfile.cpu -t $IMAGE --shm-size=4g .
43+
docker push $IMAGE
44+
45+
build_push_arm64_image:
46+
name: Build and push image
47+
runs-on: ubuntu-24.04-arm
48+
permissions:
49+
contents: read
50+
id-token: write # needed for signing the images with GitHub OIDC Token
51+
packages: write # required for pushing container images
52+
security-events: write # required for pushing SARIF files
53+
steps:
54+
- name: Check out the vLLM repository
55+
uses: actions/checkout@v4
56+
with:
57+
repository: vllm-project/vllm
58+
ref: ${{ inputs.vllm_ref }}
59+
60+
- name: Login to GitHub Container Registry
61+
uses: docker/login-action@v3
62+
with:
63+
registry: ghcr.io
64+
username: ${{ github.actor }}
65+
password: ${{ secrets.GITHUB_TOKEN }}
66+
67+
- name: Build and push image
68+
run: |
69+
IMAGE=ghcr.io/stackhpc/vllm-cpu:${{ inputs.vllm_ref }}-arm64
70+
docker build -f Dockerfile.arm -t $IMAGE --shm-size=4g .
71+
docker push $IMAGE

.github/workflows/test-pr.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ jobs:
6666
with:
6767
cluster_name: ${{ env.CLUSTER_NAME }}
6868

69-
# NOTE(scott): Since the local Chart.yaml uses "appVersion: latest" and this
69+
# NOTE(scott): Since the local Chart.yaml uses "appVersion: latest" and this
7070
# only gets overwritten to the correct commit SHA during Helm chart build,
7171
# we need to pull these published images and load them into the kind cluster
7272
# with the tag correct tag.
Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,24 @@
11
azimuth-llm:
2+
huggingface:
3+
# Use the smallest LLM we can find
4+
model: &model HuggingFaceTB/SmolLM2-135M-Instruct
25
api:
3-
enabled: false
6+
# CI Kind cluster doesn't have kube-prometheus-stack
7+
monitoring:
8+
enabled: false
9+
# No GPUs in CI runners
10+
gpus: 0
411
ui:
512
service:
613
zenith:
714
enabled: false
815
appSettings:
16+
model_name: *model
917
# Verify that we can set non-standard LLM params
1018
llm_params:
1119
max_tokens: 101
1220
temperature: 0.1
21+
top_k: 2
1322
top_p: 0.15
14-
top_k: 1
1523
presence_penalty: 0.9
1624
frequency_penalty: 1
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
azimuth-llm:
2+
huggingface:
3+
# Use the smallest vision model we can find
4+
model: &model HuggingFaceTB/SmolVLM-256M-Instruct
5+
api:
6+
# CI Kind cluster doesn't have kube-prometheus-stack
7+
monitoring:
8+
enabled: false
9+
# No GPUs in CI runners
10+
gpus: 0
11+
ui:
12+
service:
13+
zenith:
14+
enabled: false
15+
appSettings:
16+
model_name: *model
17+
# Verify that we can set non-standard LLM params
18+
llm_params:
19+
max_tokens: 10 # Constrain response tokens to speed up CI test
20+
temperature: 0.1
21+
top_p: 0.15
22+
presence_penalty: 0.9
23+
frequency_penalty: 1

charts/azimuth-image-analysis/ci/ui-only-values.yaml

Lines changed: 0 additions & 15 deletions
This file was deleted.
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# This is intended to test the default chart values
2+
# as close as possible given the constraints of running
3+
# inside a Kind cluster within a CI runner
4+
huggingface:
5+
# Use the smallest LLM we can find
6+
model: &model HuggingFaceTB/SmolLM2-135M-Instruct
7+
api:
8+
# CI Kind cluster doesn't have kube-prometheus-stack
9+
monitoring:
10+
enabled: false
11+
# No GPUs in CI runners
12+
gpus: 0
13+
ui:
14+
service:
15+
zenith:
16+
enabled: false
17+
appSettings:
18+
model_name: *model

charts/azimuth-llm/ci/no-api-values.yaml

Lines changed: 0 additions & 6 deletions
This file was deleted.

charts/azimuth-llm/templates/api/deployment.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ spec:
1919
spec:
2020
containers:
2121
- name: {{ .Release.Name }}-api
22-
image: {{ printf "%s:%s" .Values.api.image.repository .Values.api.image.version }}
22+
{{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" "vllm-project/vllm" (eq (.Values.api.gpus | int) 0)) -}}
23+
image: {{ printf "%s:%s" $imageRepo .Values.api.image.version }}
2324
ports:
2425
- name: api
2526
containerPort: 8000
@@ -29,7 +30,7 @@ spec:
2930
args:
3031
- --model
3132
- {{ .Values.huggingface.model }}
32-
{{- include "azimuth-llm.chatTemplate" . | nindent 10 }}
33+
{{- include "azimuth-llm.chatTemplate" . | nindent 10 -}}
3334
{{- if .Values.api.modelMaxContextLength -}}
3435
- --max-model-len
3536
- {{ .Values.api.modelMaxContextLength | quote }}
@@ -41,7 +42,7 @@ spec:
4142
{{- if .Values.api.extraArgs -}}
4243
{{- .Values.api.extraArgs | toYaml | nindent 10 }}
4344
{{- end -}}
44-
{{- if .Values.huggingface.secretName }}
45+
{{- if .Values.huggingface.secretName -}}
4546
envFrom:
4647
- secretRef:
4748
name: {{ .Values.huggingface.secretName }}

charts/azimuth-llm/templates/api/service.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
apiVersion: v1
33
kind: Service
44
metadata:
5-
name: {{ .Values.api.service.name }}
5+
name: {{ .Release.Name }}-api
66
labels:
77
{{- include "azimuth-llm.api-selectorLabels" . | nindent 4 }}
88
spec:

charts/azimuth-llm/templates/api/zenith-client.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ metadata:
88
spec:
99
reservationName: {{ .Release.Name }}-api
1010
upstream:
11-
serviceName: {{ .Values.api.service.name }}
11+
serviceName: {{ .Release.Name }}-api
1212
auth:
1313
skip: {{ .Values.api.service.zenith.skipAuth }}
1414
{{- end -}}

0 commit comments

Comments
 (0)