|
1 | 1 | name: JAX-vLLM offloading |
2 | 2 |
|
3 | 3 | on: |
| 4 | + schedule: |
| 5 | + - cron: '30 9 * * *' # Pacific Time 01:30 AM in UTC |
| 6 | + |
4 | 7 | workflow_call: |
5 | 8 | inputs: |
6 | 9 | JAX_VLLM_OFFLOADING_IMAGE: |
7 | 10 | type: string |
8 | 11 | description: MaxText image from ghcr.io/nvidia |
9 | 12 | default: ghcr.io/nvidia/jax-toolbox-internal:19461214142-jio-amd64 |
10 | 13 | required: false |
| 14 | + PUBLISH: |
| 15 | + type: boolean |
| 16 | + description: Publish dated images and update the 'latest' tag? |
| 17 | + default: false |
| 18 | + required: false |
| 19 | + |
11 | 20 | pull_request: |
12 | 21 | types: |
13 | 22 | - opened |
|
19 | 28 | - '.github/gke-workflow/jax-vllm-offloading/**' |
20 | 29 | - '.github/workflows/jax-vllm-offloading*.yml' |
21 | 30 |
|
| 31 | +concurrency: |
| 32 | + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} |
| 33 | + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} |
| 34 | + |
| 35 | +permissions: |
| 36 | + contents: read # to fetch code |
| 37 | + actions: write # to cancel previous workflows |
| 38 | + packages: write # to upload containers |
| 39 | + |
22 | 40 | jobs: |
| 41 | + metadata: |
| 42 | + runs-on: ubuntu-22.04 |
| 43 | + if: github.event.pull_request.draft == false || github.event_name != 'pull_request' |
| 44 | + outputs: |
| 45 | + BUILD_DATE: ${{ steps.date.outputs.BUILD_DATE }} |
| 46 | + PUBLISH: ${{ steps.if-publish.outputs.PUBLISH }} |
| 47 | + steps: |
| 48 | + - name: Set build date |
| 49 | + id: date |
| 50 | + shell: bash -x -e {0} |
| 51 | + run: | |
| 52 | + BUILD_DATE=$(TZ='US/Los_Angeles' date '+%Y-%m-%d') |
| 53 | + echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT |
| 54 | +
|
| 55 | + - name: Determine whether results will be 'published' |
| 56 | + id: if-publish |
| 57 | + shell: bash -x -e {0} |
| 58 | + run: | |
| 59 | + echo "PUBLISH=${{ github.event_name == 'schedule' || inputs.PUBLISH }}" >> $GITHUB_OUTPUT |
| 60 | +
|
| 61 | + build: |
| 62 | + needs: metadata |
| 63 | + strategy: |
| 64 | + fail-fast: true |
| 65 | + matrix: |
| 66 | + ARCHITECTURE: [amd64, arm64] |
| 67 | + runs-on: [self-hosted, "${{ matrix.ARCHITECTURE }}", "small"] |
| 68 | + steps: |
| 69 | + - name: Checkout repository |
| 70 | + uses: actions/checkout@v4 |
| 71 | + - name: Build container |
| 72 | + id: build-container |
| 73 | + uses: ./.github/actions/build-container |
| 74 | + with: |
| 75 | + ARCHITECTURE: ${{ matrix.ARCHITECTURE }} |
| 76 | + ARTIFACT_NAME: artifact-jio-build |
| 77 | + BADGE_FILENAME: badge-jio-build |
| 78 | + BASE_IMAGE: nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04 |
| 79 | + BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} |
| 80 | + CONTAINER_NAME: jio |
| 81 | + DOCKERFILE: jax-inference-offloading/dockerfile/oss.dockerfile |
| 82 | + RUNNER_SIZE: small |
| 83 | + ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }} |
| 84 | + ssh-known-hosts: ${{ vars.SSH_KNOWN_HOSTS }} |
| 85 | + github-token: ${{ secrets.GITHUB_TOKEN }} |
| 86 | + EXTRA_BUILD_ARGS: | |
| 87 | + REF_JIO=${{ github.ref }} |
| 88 | +
|
| 89 | + outputs: |
| 90 | + DOCKER_TAG_MEALKIT: ${{ steps.build-container.outputs.DOCKER_TAG_MEALKIT }} |
| 91 | + DOCKER_TAG_FINAL: ${{ steps.build-container.outputs.DOCKER_TAG_FINAL }} |
| 92 | + |
23 | 93 | jax-vllm-offloading-transfer-gke-xpk: |
24 | | - uses: ./.github/workflows/jax-vllm-offloading-gke-transfer.yml |
25 | | - # needs: build # .github/workflows/jio.yaml |
| 94 | + needs: build |
26 | 95 | with: |
27 | | - # JAX_VLLM_OFFLOADING_IMAGE: ${{ needs.build.outputs.DOCKER_TAG_FINAL }} |
28 | | - JAX_VLLM_OFFLOADING_IMAGE: ghcr.io/nvidia/jax-toolbox-internal:19461214142-jio-amd64 |
| 96 | + JAX_VLLM_OFFLOADING_IMAGE: ${{ needs.build.outputs.DOCKER_TAG_FINAL }} |
0 commit comments