anyscale · robertnishihara · Oct 28, 2025
diff --git a/02_service_hello_world/query.py b/02_service_hello_world/query.py
@@ -13,6 +13,7 @@
 resp = requests.get(
     urljoin(base_url, "hello"),
     params={"name": "Theodore"},
-    headers={"Authorization": f"Bearer {token}"})
+    headers={"Authorization": f"Bearer {token}"},
+    timeout=10)
 
 print(resp.text)
diff --git a/03_deploy_llama_3_8b/Dockerfile b/03_deploy_llama_3_8b/Dockerfile
@@ -2,7 +2,8 @@ FROM anyscale/ray:2.49.0-slim-py312-cu128
 
 # C compiler for Triton’s runtime build step (vLLM V1 engine)
 # https://github.com/vllm-project/vllm/issues/2997
-RUN sudo apt-get update && \
-    sudo apt-get install -y --no-install-recommends build-essential
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends build-essential && \
+    rm -rf /var/lib/apt/lists/*
 
 RUN pip install vllm==0.10.0
diff --git a/deploy_llama_3_1_70b/Dockerfile b/deploy_llama_3_1_70b/Dockerfile
@@ -2,7 +2,8 @@ FROM anyscale/ray:2.50.0-slim-py312-cu128
 
 # C compiler for Triton’s runtime build step (vLLM V1 engine)
 # https://github.com/vllm-project/vllm/issues/2997
-RUN sudo apt-get update && \
-    sudo apt-get install -y --no-install-recommends build-essential
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends build-essential && \
+    rm -rf /var/lib/apt/lists/*
 
 RUN pip install vllm==0.11.0
diff --git a/serve_tensor_parallel/query.py b/serve_tensor_parallel/query.py
@@ -12,6 +12,7 @@
 resp = requests.get(
     urljoin(base_url, "infer"),
     params={"text": "What is the future of AI? "},
-    headers={"Authorization": f"Bearer {token}"})
+    headers={"Authorization": f"Bearer {token}"},
+    timeout=15)
 
 print(resp.text)
diff --git a/skyrl/Dockerfile b/skyrl/Dockerfile
@@ -1,8 +1,9 @@
 FROM anyscale/ray:2.48.0-slim-py312-cu128
 
-RUN sudo apt-get update -y \
-    && sudo apt-get install --no-install-recommends -y build-essential libnuma-dev \
-    && sudo rm -f /etc/apt/sources.list.d/*
+RUN apt-get update -y \
+    && apt-get install --no-install-recommends -y build-essential libnuma-dev \
+    && rm -f /etc/apt/sources.list.d/* \
+    && rm -rf /var/lib/apt/lists/*
 
 RUN curl -LsSf https://astral.sh/uv/install.sh | sh
 

diff --git a/video_generation_with_fastvideo/serve_fastvideo.py b/video_generation_with_fastvideo/serve_fastvideo.py
@@ -1,4 +1,3 @@
-import asyncio
 from starlette.requests import Request
 from ray import serve
 from ray.serve._private.http_util import ASGIAppReplicaWrapper
@@ -17,13 +16,8 @@
 
 
 def gradio_builder(generator: serve.handle.DeploymentHandle):
-    def query_model(prompt, num_inference_steps):
-
-        async def run_query_model(prompt, num_inference_steps):
-            video_base64 = await generator.generate.remote(prompt, num_inference_steps)
-            return video_base64
-
-        video_base64 = asyncio.run(run_query_model(prompt, num_inference_steps))
+    async def query_model(prompt, num_inference_steps):
+        video_base64 = await generator.generate.remote(prompt, num_inference_steps)
         video_bytes = base64.b64decode(video_base64)
         video_filename = f"{uuid.uuid4()}.mp4"
         video_path = os.path.join(output_dir, video_filename)
@@ -93,7 +87,7 @@ def __init__(self):
             num_gpus=1,  # Adjust based on your hardware
         )
 
-    def generate(self, prompt: str, num_inference_steps: int = 3) -> bytes:
+    def generate(self, prompt: str, num_inference_steps: int = 3) -> str:
         # Generate the video.
         video = self.generator.generate_video(
             prompt,
@@ -110,7 +104,7 @@ def generate(self, prompt: str, num_inference_steps: int = 3) -> bytes:
 
         return video_base64
 
-    async def __call__(self, http_request: Request) -> bytes:
+    async def __call__(self, http_request: Request) -> str:
         data = await http_request.json()
         prompt = data["prompt"]
         num_inference_steps = data["num_inference_steps"]