File tree Expand file tree Collapse file tree 3 files changed +59
-4
lines changed Expand file tree Collapse file tree 3 files changed +59
-4
lines changed Original file line number Diff line number Diff line change 1+ name : Publish vLLM XPU images 
2+ 
3+ on :
4+   #  NOTE(sd109): Since this is checking out an external
5+   #  it's probably safer to leave this as workflow dispatch
6+   #  only so that we can manually build images from specific
7+   #  refs rather than automatically pulling in the latest
8+   #  content from the remote repo.
9+   workflow_dispatch :
10+     inputs :
11+       vllm_ref :
12+         type : string 
13+         description : The vLLM GitHub ref (tag, branch or commit) to build. 
14+         required : true 
15+ 
16+ jobs :
17+   build_push_xpu_image :
18+     name : Build and push image 
19+     runs-on : ubuntu-latest 
20+     permissions :
21+       contents : read 
22+       id-token : write          #  needed for signing the images with GitHub OIDC Token
23+       packages : write          #  required for pushing container images
24+       security-events : write   #  required for pushing SARIF files
25+     steps :
26+       - name : Check out the vLLM repository 
27+         uses : actions/checkout@v4 
28+         with :
29+           repository : vllm-project/vllm 
30+           ref : ${{ inputs.vllm_ref }} 
31+ 
32+       - name : Login to GitHub Container Registry 
33+         uses : docker/login-action@v3 
34+         with :
35+           registry : ghcr.io 
36+           username : ${{ github.actor }} 
37+           password : ${{ secrets.GITHUB_TOKEN }} 
38+ 
39+       - name : Build and push image 
40+         run : | 
41+           IMAGE=ghcr.io/stackhpc/vllm-xpu:${{ inputs.vllm_ref }} 
42+           docker build -f docker/Dockerfile.xpu -t $IMAGE --shm-size=4g . 
43+           docker push $IMAGE 
Original file line number Diff line number Diff line change 1919    spec :
2020      containers :
2121      - name : {{ .Release.Name }}-api 
22-         {{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" "vllm/vllm-openai" (eq (.Values.api.gpus | int) 0)) -}} 
23-         image : {{ printf "%s:%s" $imageRepo .Values.api.image.version }} 
22+         {{- if eq (.Values.api.gpus | int) 0 }} 
23+         image : " ghcr.io/stackhpc/vllm-cpu:{{ .Values.api.image.version }}" 
24+         {{- else if .Values.api.intelXPUsEnabled }} 
25+         image : " ghcr.io/stackhpc/vllm-xpu:{{ .Values.api.image.version }}" 
26+         {{- else }} 
27+         image : " vllm/vllm-openai:{{ .Values.api.image.version }}" 
28+         {{- end }} 
2429        ports :
2530        - name : api 
2631          containerPort : 8000 
6166          periodSeconds : 10 
6267        resources :
6368          limits :
69+             {{- if .Values.api.intelXPUsEnabled }} 
70+             gpu.intel.com/i915 : {{ .Values.api.gpus | int }} 
71+             {{- else }} 
6472            nvidia.com/gpu : {{ .Values.api.gpus | int }} 
73+             {{- end }} 
6574      volumes :
6675        - name : data 
6776          {{- .Values.api.cacheVolume | toYaml | nindent 10 }} 
Original file line number Diff line number Diff line change 3333  enabled : true 
3434  #  Container image config
3535  image :
36-     #  Defaults to vllm/vllm-openai when api.gpus > 0
37-     #  or ghrc.io/stackhpc/vllm-cpu when api.gpus == 0
36+     #  Defaults to vllm/vllm-openai when api.gpus > 0,
37+     #  ghcr.io/stackhpc/vllm-xpu when api.gpus > 0 and intelXPUsEnabled is true,
38+     #  or ghcr.io/stackhpc/vllm-cpu when api.gpus == 0
3839    repository :
3940    version : v0.8.5.post1 
4041  monitoring :
8081  #  distributed / multi-GPU support should be available, though it
8182  #  has not been tested against this app.
8283  gpus : 1 
84+   #  Whether pods should request Intel GPUs as opposed to the default Nvidia GPUs
85+   intelXPUsEnabled : false 
8386  #  The update strategy to use for the deployment
8487  #  See https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#updating-a-deployment
8588  #  NOTE: The following RollingUpdate strategy offers a zero-downtime update but requires additional GPU worker nodes.
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments