Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 11 additions & 13 deletions backend/app/services/search_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,24 +302,22 @@ def search_youtube(query: str) -> List[SearchResult]:
raise YouTubeAPIError(f"YouTube search failed: {str(e)}")

def perform_search(query: str) -> List[SearchResult]:
"""Perform parallel searches on Google, Bing, and YouTube APIs.

Args:
query: The search query to run

Returns:
Combined list of unique SearchResult objects
"""Perform parallel searches on Google + YouTube and merge results.

Bing was removed in 2026-05 β€” Microsoft permanently retired the Bing
Search API on 2025-08-11; every call returned `410 Gone`. Keeping the
future in the pool meant ~3s of pointless wait + log noise on every
/search, contributing to gunicorn worker timeouts on /answer. The
`search_bing` function is retained for now as dead code in case we
swap a different provider in via the same shape β€” see PR #1 (Tavily).
"""
try:
with ThreadPoolExecutor(max_workers=3) as executor:
bing_future = executor.submit(search_bing, query)
with ThreadPoolExecutor(max_workers=2) as executor:
google_future = executor.submit(search_google, query)
youtube_future = executor.submit(search_youtube, query)

results = []

# Gather results, handling potential failures
for future in [bing_future, google_future, youtube_future]:
for future in [google_future, youtube_future]:
try:
results.extend(future.result())
except (SearchAPIError, YouTubeAPIError) as e:
Expand Down
12 changes: 11 additions & 1 deletion backend/gunicorn_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
workers = 4 # Number of worker processes
worker_class = "uvicorn.workers.UvicornWorker"
bind = "0.0.0.0:8000" # Bind to the appropriate host and port
bind = "0.0.0.0:8000" # Bind to the appropriate host and port

# `/answer` regularly does: parallel search providers (~3s) β†’ LLM rerank
# (~1–2s) β†’ main answer generation. With reasoning-tuned models like
# `gpt-oss-120b` or `qwq-32b` the answer step alone can run 15–60s
# (chain-of-thought before the final answer). Gunicorn's default 30s
# timeout was killing requests mid-generation β€” log evidence:
# "WORKER TIMEOUT (pid:42)" β†’ "Worker was sent SIGKILL"
# Lift to 120s. Long-tail still gets a clean 504 rather than a 30s SIGKILL.
timeout = 120
graceful_timeout = 30