From bd492e504cdb830b8676c9f38288b08290bdc787 Mon Sep 17 00:00:00 2001 From: Paritosh Tripathi Date: Sat, 9 May 2026 22:37:26 +0530 Subject: [PATCH] fix(prod): bump gunicorn timeout to 120s; drop Bing (HTTP 410) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two prod issues from Render logs at 2026-05-09T17:00: 1. Gunicorn killed /answer mid-generation: [CRITICAL] WORKER TIMEOUT (pid:42) Worker (pid:42) was sent SIGKILL! Perhaps out of memory? The default 30s worker timeout is too short for the new chat path: parallel search providers (~3s) → LLM rerank (~1–2s) → main answer on a reasoning-tuned model (gpt-oss-120b 5–15s; qwq-32b 30–60s for chain-of-thought). Bumped to 120s — long-tail still gets a clean 504 instead of a SIGKILL. 2. Bing Search API returns 410 Gone on every call: Bing search error: 410 Client Error: Gone for url: https://api.bing.microsoft.com/v7.0/search?... Microsoft permanently retired the Bing v7 Search API on 2025-08-11. Removed from perform_search's executor pool. Still ~3s of pointless wait per /search before this; with it gone, /search drops noticeably. `search_bing` is left in the file as dead code — same shape if we want to slot a replacement (Tavily, Brave) in later. See PR #1. Co-Authored-By: Claude Opus 4.7 (1M context) --- backend/app/services/search_service.py | 24 +++++++++++------------- backend/gunicorn_config.py | 12 +++++++++++- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/backend/app/services/search_service.py b/backend/app/services/search_service.py index 9ee9b1a..1332058 100644 --- a/backend/app/services/search_service.py +++ b/backend/app/services/search_service.py @@ -302,24 +302,22 @@ def search_youtube(query: str) -> List[SearchResult]: raise YouTubeAPIError(f"YouTube search failed: {str(e)}") def perform_search(query: str) -> List[SearchResult]: - """Perform parallel searches on Google, Bing, and YouTube APIs. - - Args: - query: The search query to run - - Returns: - Combined list of unique SearchResult objects + """Perform parallel searches on Google + YouTube and merge results. + + Bing was removed in 2026-05 — Microsoft permanently retired the Bing + Search API on 2025-08-11; every call returned `410 Gone`. Keeping the + future in the pool meant ~3s of pointless wait + log noise on every + /search, contributing to gunicorn worker timeouts on /answer. The + `search_bing` function is retained for now as dead code in case we + swap a different provider in via the same shape — see PR #1 (Tavily). """ try: - with ThreadPoolExecutor(max_workers=3) as executor: - bing_future = executor.submit(search_bing, query) + with ThreadPoolExecutor(max_workers=2) as executor: google_future = executor.submit(search_google, query) youtube_future = executor.submit(search_youtube, query) - + results = [] - - # Gather results, handling potential failures - for future in [bing_future, google_future, youtube_future]: + for future in [google_future, youtube_future]: try: results.extend(future.result()) except (SearchAPIError, YouTubeAPIError) as e: diff --git a/backend/gunicorn_config.py b/backend/gunicorn_config.py index 1a52c2c..918f563 100644 --- a/backend/gunicorn_config.py +++ b/backend/gunicorn_config.py @@ -1,3 +1,13 @@ workers = 4 # Number of worker processes worker_class = "uvicorn.workers.UvicornWorker" -bind = "0.0.0.0:8000" # Bind to the appropriate host and port \ No newline at end of file +bind = "0.0.0.0:8000" # Bind to the appropriate host and port + +# `/answer` regularly does: parallel search providers (~3s) → LLM rerank +# (~1–2s) → main answer generation. With reasoning-tuned models like +# `gpt-oss-120b` or `qwq-32b` the answer step alone can run 15–60s +# (chain-of-thought before the final answer). Gunicorn's default 30s +# timeout was killing requests mid-generation — log evidence: +# "WORKER TIMEOUT (pid:42)" → "Worker was sent SIGKILL" +# Lift to 120s. Long-tail still gets a clean 504 rather than a 30s SIGKILL. +timeout = 120 +graceful_timeout = 30