diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 2ddfe8ccb932..1f8c45c770b2 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -37,11 +37,11 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = [] +build_frameworks = ["pytorch"] # By default we build both training and inference containers. Set true/false values to determine which to build. -build_training = true +build_training = false build_inference = true # Set do_build to "false" to skip builds and test the latest image built by this PR @@ -154,7 +154,7 @@ dlc-pr-tensorflow-2-habana-training = "" ### INFERENCE PR JOBS ### # Standard Framework Inference -dlc-pr-pytorch-inference = "" +dlc-pr-pytorch-inference = "pytorch/inference/buildspec-2-8-ec2.yml" dlc-pr-tensorflow-2-inference = "" dlc-pr-autogluon-inference = "" @@ -187,4 +187,4 @@ dlc-pr-tensorflow-2-eia-inference = "" dlc-pr-vllm = "" # sglang -dlc-pr-sglang = "" \ No newline at end of file +dlc-pr-sglang = "" diff --git a/pytorch/inference/buildspec-2-8-ec2.yml b/pytorch/inference/buildspec-2-8-ec2.yml new file mode 100644 index 000000000000..cbe87e11c3d4 --- /dev/null +++ b/pytorch/inference/buildspec-2-8-ec2.yml @@ -0,0 +1,53 @@ +account_id: &ACCOUNT_ID +prod_account_id: &PROD_ACCOUNT_ID 763104351884 +region: ®ION +framework: &FRAMEWORK pytorch +version: &VERSION 2.8.0 +short_version: &SHORT_VERSION "2.8" +arch_type: x86 +# autopatch_build: "True" + +repository_info: + inference_repository: &INFERENCE_REPOSITORY + image_type: &INFERENCE_IMAGE_TYPE inference + root: !join [ *FRAMEWORK, "/", *INFERENCE_IMAGE_TYPE ] + repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE ] + repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ] + release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE ] + release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ] + +context: + inference_context: &INFERENCE_CONTEXT + rayserve-entrypoint: + source: docker/build_artifacts/rayserve-entrypoint.py + target: rayserve-entrypoint.py + bash_telemetry: + source: ../../miscellaneous_scripts/bash_telemetry.sh + target: bash_telemetry.sh + setup_oss_compliance: + source: ../../scripts/setup_oss_compliance.sh + target: setup_oss_compliance.sh + install_python: + source: ../../scripts/install_python.sh + target: install_python.sh + deep_learning_container: + source: ../../src/deep_learning_container.py + target: deep_learning_container.py + +images: + BuildEC2GPUPTInferencePy3DockerImage: + <<: *INFERENCE_REPOSITORY + build: &PYTORCH_GPU_INFERENCE_PY3 true + image_size_baseline: 16000 + device_type: &DEVICE_TYPE gpu + python_version: &DOCKER_PYTHON_VERSION py3 + tag_python_version: &TAG_PYTHON_VERSION py312 + cuda_version: &CUDA_VERSION cu129 + os_version: &OS_VERSION ubuntu22.04 + tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ] + latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ] + # skip_build: "False" + docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ] + target: ec2 + context: + <<: *INFERENCE_CONTEXT diff --git a/pytorch/inference/docker/2.8/py3/cu129/Dockerfile.gpu b/pytorch/inference/docker/2.8/py3/cu129/Dockerfile.gpu new file mode 100644 index 000000000000..a03e5b2a90c0 --- /dev/null +++ b/pytorch/inference/docker/2.8/py3/cu129/Dockerfile.gpu @@ -0,0 +1,158 @@ +ARG PYTHON=python3 +ARG PYTHON_VERSION=3.12.10 +ARG PYTHON_SHORT_VERSION=3.12 +ARG PYTORCH_VERSION=2.8.0 +ARG TORCHAUDIO_VERSION=2.8.0 +ARG TORCHVISION_VERSION=0.23.0 +ARG RAY_VERSION=2.49.0 + +################################################################# +# ____ +# / ___| ___ _ __ ___ _ __ ___ ___ _ __ +# | | / _ \| '_ ` _ \| '_ ` _ \ / _ \| '_ \ +# | |___ (_) | | | | | | | | | | | (_) | | | | +# \____|\___/|_| |_| |_|_| |_| |_|\___/|_| |_| +# ___ ____ _ +# |_ _|_ __ ___ __ _ __ _ ___ | _ \ ___ ___(_)_ __ ___ +# | || '_ ` _ \ / _` |/ _` |/ _ \ | |_) / _ \/ __| | '_ \ / _ \ +# | || | | | | | (_| | (_| | __/ | _ < __/ (__| | |_) | __/ +# |___|_| |_| |_|\__,_|\__, |\___| |_| \_\___|\___|_| .__/ \___| +# |___/ |_| +################################################################# + +FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu22.04 AS common + +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" + +ARG PYTHON +ARG PYTHON_VERSION +ARG PYTORCH_VERSION +ARG TORCHAUDIO_VERSION +ARG TORCHVISION_VERSION +ARG RAY_VERSION + +ENV CUDA_HOME="/usr/local/cuda" +ENV PATH="${CUDA_HOME}/bin:${PATH}" + +# Python won't try to write .pyc or .pyo files on the import of source modules +# Force stdin, stdout and stderr to be totally unbuffered. Good for logging +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 +ENV PYTHONIOENCODING=UTF-8 +ENV LANG=C.UTF-8 +ENV LC_ALL=C.UTF-8 + +ENV DLC_CONTAINER_TYPE=inference +WORKDIR / + +RUN apt-get update \ + && apt-get -y upgrade --only-upgrade systemd \ + && apt-get install -y --allow-change-held-packages --no-install-recommends \ + libgl1-mesa-glx \ + build-essential \ + ca-certificates \ + zlib1g-dev \ + openssl \ + libssl-dev \ + pkg-config \ + check \ + llvm \ + xz-utils \ + curl \ + wget \ + unzip \ + libffi-dev \ + libbz2-dev \ + liblzma-dev \ + libsqlite3-dev \ + libreadline-dev \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get clean + +COPY install_python.sh install_python.sh +RUN bash install_python.sh ${PYTHON_VERSION} && rm install_python.sh + +# Python Path - nvidia/cuda base already has CUDA paths configured +ENV PATH="/usr/local/bin:${PATH}" + +# Install PyTorch first (needs specific index-url) +RUN pip install --no-cache-dir \ + torch==${PYTORCH_VERSION} \ + torchvision==${TORCHVISION_VERSION} \ + torchaudio==${TORCHAUDIO_VERSION} \ + --index-url https://download.pytorch.org/whl/cu129 \ + && pip install --no-cache-dir "triton==3.2.0" \ + && pip uninstall -y dataclasses || true + +# Install Ray Serve + all dependencies in one optimized command +RUN pip install --no-cache-dir \ + ray[serve]==${RAY_VERSION} \ + cryptography \ + pyOpenSSL \ + mkl \ + charset-normalizer \ + packaging \ + PyYAML \ + numpy \ + scipy \ + click \ + psutil \ + pillow \ + h5py \ + fsspec \ + "idna>=3.7" \ + "tqdm>=4.66.3" \ + "requests>=2.32.0" \ + "setuptools>=70.0.0" \ + "urllib3>=2.5.0" \ + opencv-python==4.11.0.86 \ + jinja2>=3.1.6 \ + tornado>=6.5.1 + +RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.8/license.txt + +# Removing the cache as it is needed for security verification +RUN rm -rf /root/.cache | true + +######################################################## +# _____ ____ ____ ___ +# | ____/ ___|___ \ |_ _|_ __ ___ __ _ __ _ ___ +# | _|| | __) | | || '_ ` _ \ / _` |/ _` |/ _ \ +# | |__| |___ / __/ | || | | | | | (_| | (_| | __/ +# |_____\____|_____| |___|_| |_| |_|\__,_|\__, |\___| +# |___/ +# ____ _ +# | _ \ ___ ___(_)_ __ ___ +# | |_) / _ \/ __| | '_ \ / _ \ +# | _ < __/ (__| | |_) | __/ +# |_| \_\___|\___|_| .__/ \___| +# |_| +######################################################## + +FROM common AS ec2 + +ARG PYTHON + +WORKDIR / + +COPY setup_oss_compliance.sh setup_oss_compliance.sh +RUN bash setup_oss_compliance.sh ${PYTHON} && rm setup_oss_compliance.sh + +# This arg required to stop docker build waiting for region configuration while installing tz data from ubuntu 20 +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get upgrade -y \ + && apt-get autoremove -y \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +COPY rayserve-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py + +# Port 8000: Ray Serve default serving port, Port 8265: Ray dashboard +# References : https://docs.ray.io/en/latest/serve/production-guide/config.html +# https://docs.ray.io/en/latest/ray-observability/getting-started.html +EXPOSE 8000 8265 +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] diff --git a/pytorch/inference/docker/build_artifacts/rayserve-entrypoint.py b/pytorch/inference/docker/build_artifacts/rayserve-entrypoint.py new file mode 100644 index 000000000000..7565577055a6 --- /dev/null +++ b/pytorch/inference/docker/build_artifacts/rayserve-entrypoint.py @@ -0,0 +1,39 @@ +# Copyright 2019-2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import + +import shlex +import subprocess +import sys + +# Auto-start Ray cluster and Ray Serve +subprocess.run( + [ + "ray", + "start", + "--head", + "--disable-usage-stats", + "--dashboard-host", + "0.0.0.0", + "--dashboard-port", + "8265", + ], + check=True, +) + +subprocess.run(["serve", "start", "--http-host", "0.0.0.0", "--http-port", "8000"], check=True) + +if len(sys.argv) > 1: + subprocess.check_call(shlex.split(" ".join(sys.argv[1:]))) +else: + subprocess.call(["tail", "-f", "/dev/null"])