From ffa6b241be1bd9a499335f5ed541951af5db76fd Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 28 Oct 2025 22:09:59 +0000 Subject: [PATCH] Add timeouts to queries and clean up Dockerfiles Co-authored-by: rkn --- 02_service_hello_world/query.py | 3 ++- 03_deploy_llama_3_8b/Dockerfile | 5 +++-- deploy_llama_3_1_70b/Dockerfile | 5 +++-- serve_tensor_parallel/query.py | 3 ++- skyrl/Dockerfile | 7 ++++--- video_generation_with_fastvideo/serve_fastvideo.py | 14 ++++---------- 6 files changed, 18 insertions(+), 19 deletions(-) diff --git a/02_service_hello_world/query.py b/02_service_hello_world/query.py index 905ea58..185631a 100644 --- a/02_service_hello_world/query.py +++ b/02_service_hello_world/query.py @@ -13,6 +13,7 @@ resp = requests.get( urljoin(base_url, "hello"), params={"name": "Theodore"}, - headers={"Authorization": f"Bearer {token}"}) + headers={"Authorization": f"Bearer {token}"}, + timeout=10) print(resp.text) \ No newline at end of file diff --git a/03_deploy_llama_3_8b/Dockerfile b/03_deploy_llama_3_8b/Dockerfile index 7c4782e..21a72dc 100644 --- a/03_deploy_llama_3_8b/Dockerfile +++ b/03_deploy_llama_3_8b/Dockerfile @@ -2,7 +2,8 @@ FROM anyscale/ray:2.49.0-slim-py312-cu128 # C compiler for Triton’s runtime build step (vLLM V1 engine) # https://github.com/vllm-project/vllm/issues/2997 -RUN sudo apt-get update && \ - sudo apt-get install -y --no-install-recommends build-essential +RUN apt-get update && \ + apt-get install -y --no-install-recommends build-essential && \ + rm -rf /var/lib/apt/lists/* RUN pip install vllm==0.10.0 diff --git a/deploy_llama_3_1_70b/Dockerfile b/deploy_llama_3_1_70b/Dockerfile index ea0023c..ce9f579 100644 --- a/deploy_llama_3_1_70b/Dockerfile +++ b/deploy_llama_3_1_70b/Dockerfile @@ -2,7 +2,8 @@ FROM anyscale/ray:2.50.0-slim-py312-cu128 # C compiler for Triton’s runtime build step (vLLM V1 engine) # https://github.com/vllm-project/vllm/issues/2997 -RUN sudo apt-get update && \ - sudo apt-get install -y --no-install-recommends build-essential +RUN apt-get update && \ + apt-get install -y --no-install-recommends build-essential && \ + rm -rf /var/lib/apt/lists/* RUN pip install vllm==0.11.0 diff --git a/serve_tensor_parallel/query.py b/serve_tensor_parallel/query.py index 36b9a9a..49f671c 100644 --- a/serve_tensor_parallel/query.py +++ b/serve_tensor_parallel/query.py @@ -12,6 +12,7 @@ resp = requests.get( urljoin(base_url, "infer"), params={"text": "What is the future of AI? "}, - headers={"Authorization": f"Bearer {token}"}) + headers={"Authorization": f"Bearer {token}"}, + timeout=15) print(resp.text) diff --git a/skyrl/Dockerfile b/skyrl/Dockerfile index 0cd352e..e556dc8 100644 --- a/skyrl/Dockerfile +++ b/skyrl/Dockerfile @@ -1,8 +1,9 @@ FROM anyscale/ray:2.48.0-slim-py312-cu128 -RUN sudo apt-get update -y \ - && sudo apt-get install --no-install-recommends -y build-essential libnuma-dev \ - && sudo rm -f /etc/apt/sources.list.d/* +RUN apt-get update -y \ + && apt-get install --no-install-recommends -y build-essential libnuma-dev \ + && rm -f /etc/apt/sources.list.d/* \ + && rm -rf /var/lib/apt/lists/* RUN curl -LsSf https://astral.sh/uv/install.sh | sh diff --git a/video_generation_with_fastvideo/serve_fastvideo.py b/video_generation_with_fastvideo/serve_fastvideo.py index c3cd997..ddc742a 100644 --- a/video_generation_with_fastvideo/serve_fastvideo.py +++ b/video_generation_with_fastvideo/serve_fastvideo.py @@ -1,4 +1,3 @@ -import asyncio from starlette.requests import Request from ray import serve from ray.serve._private.http_util import ASGIAppReplicaWrapper @@ -17,13 +16,8 @@ def gradio_builder(generator: serve.handle.DeploymentHandle): - def query_model(prompt, num_inference_steps): - - async def run_query_model(prompt, num_inference_steps): - video_base64 = await generator.generate.remote(prompt, num_inference_steps) - return video_base64 - - video_base64 = asyncio.run(run_query_model(prompt, num_inference_steps)) + async def query_model(prompt, num_inference_steps): + video_base64 = await generator.generate.remote(prompt, num_inference_steps) video_bytes = base64.b64decode(video_base64) video_filename = f"{uuid.uuid4()}.mp4" video_path = os.path.join(output_dir, video_filename) @@ -93,7 +87,7 @@ def __init__(self): num_gpus=1, # Adjust based on your hardware ) - def generate(self, prompt: str, num_inference_steps: int = 3) -> bytes: + def generate(self, prompt: str, num_inference_steps: int = 3) -> str: # Generate the video. video = self.generator.generate_video( prompt, @@ -110,7 +104,7 @@ def generate(self, prompt: str, num_inference_steps: int = 3) -> bytes: return video_base64 - async def __call__(self, http_request: Request) -> bytes: + async def __call__(self, http_request: Request) -> str: data = await http_request.json() prompt = data["prompt"] num_inference_steps = data["num_inference_steps"]