diff --git a/src/api-utils.test.ts b/src/api-utils.test.ts new file mode 100644 index 0000000..e735852 --- /dev/null +++ b/src/api-utils.test.ts @@ -0,0 +1,186 @@ +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { fetchWithRetry, paginatedFetch } from "./api-utils.ts"; + +const originalFetch = globalThis.fetch; +const originalSetTimeout = globalThis.setTimeout; + +// Make all delays instant in tests so the suite stays fast +beforeEach(() => { + // biome-ignore lint/suspicious/noExplicitAny: test-only shim + globalThis.setTimeout = ((fn: () => void, _delay?: number) => { + fn(); + return 0; + }) as any; +}); + +afterEach(() => { + globalThis.fetch = originalFetch; + globalThis.setTimeout = originalSetTimeout; +}); + +// ─── fetchWithRetry ─────────────────────────────────────────────────────────── + +describe("fetchWithRetry", () => { + it("returns a 200 response immediately without retrying", async () => { + let calls = 0; + globalThis.fetch = (async () => { + calls++; + return new Response("ok", { status: 200 }); + }) as typeof fetch; + + const res = await fetchWithRetry("https://example.com", {}); + expect(res.status).toBe(200); + expect(calls).toBe(1); + }); + + it("returns a non-retryable error response (404) immediately without retrying", async () => { + let calls = 0; + globalThis.fetch = (async () => { + calls++; + return new Response("not found", { status: 404 }); + }) as typeof fetch; + + const res = await fetchWithRetry("https://example.com", {}, 3); + expect(res.status).toBe(404); + expect(calls).toBe(1); + }); + + it("returns a non-retryable error response (401) immediately without retrying", async () => { + let calls = 0; + globalThis.fetch = (async () => { + calls++; + return new Response("unauthorized", { status: 401 }); + }) as typeof fetch; + + const res = await fetchWithRetry("https://example.com", {}, 3); + expect(res.status).toBe(401); + expect(calls).toBe(1); + }); + + it("retries on 429 and succeeds on the second attempt", async () => { + let calls = 0; + globalThis.fetch = (async () => { + calls++; + if (calls === 1) return new Response("rate limited", { status: 429 }); + return new Response("ok", { status: 200 }); + }) as typeof fetch; + + const res = await fetchWithRetry("https://example.com", {}, 3); + expect(res.status).toBe(200); + expect(calls).toBe(2); + }); + + it("retries on 429 and reads the Retry-After header", async () => { + let calls = 0; + globalThis.fetch = (async () => { + calls++; + if (calls === 1) { + return new Response("rate limited", { + status: 429, + headers: { "Retry-After": "5" }, + }); + } + return new Response("ok", { status: 200 }); + }) as typeof fetch; + + const res = await fetchWithRetry("https://example.com", {}, 3); + expect(res.status).toBe(200); + expect(calls).toBe(2); + }); + + it("retries on 503 and succeeds on the third attempt", async () => { + let calls = 0; + globalThis.fetch = (async () => { + calls++; + if (calls < 3) return new Response("service unavailable", { status: 503 }); + return new Response("ok", { status: 200 }); + }) as typeof fetch; + + const res = await fetchWithRetry("https://example.com", {}, 3); + expect(res.status).toBe(200); + expect(calls).toBe(3); + }); + + it("returns the last 429 response after maxRetries exhausted", async () => { + let calls = 0; + globalThis.fetch = (async () => { + calls++; + return new Response("still rate limited", { status: 429 }); + }) as typeof fetch; + + const res = await fetchWithRetry("https://example.com", {}, 2); + expect(res.status).toBe(429); + // 1 initial + 2 retries = 3 total calls + expect(calls).toBe(3); + }); + + it("passes the request options to fetch", async () => { + let capturedOptions: RequestInit | undefined; + globalThis.fetch = (async (_url: string | URL | Request, opts?: RequestInit) => { + capturedOptions = opts; + return new Response("ok", { status: 200 }); + }) as typeof fetch; + + await fetchWithRetry("https://example.com", { + headers: { Authorization: "Bearer token" }, + }); + expect((capturedOptions?.headers as Record)?.["Authorization"]).toBe( + "Bearer token", + ); + }); +}); + +// ─── paginatedFetch ─────────────────────────────────────────────────────────── + +describe("paginatedFetch", () => { + it("returns empty array when first page is empty", async () => { + const result = await paginatedFetch(async () => []); + expect(result).toEqual([]); + }); + + it("returns all items when they fit in a single page", async () => { + const result = await paginatedFetch(async () => ["a", "b", "c"], 100); + expect(result).toEqual(["a", "b", "c"]); + }); + + it("fetches multiple pages until the last page has fewer items than pageSize", async () => { + const pages = [ + ["a", "b"], // full page (pageSize = 2) + ["c", "d"], // full page + ["e"], // last page (< 2 items) + ]; + let callCount = 0; + const result = await paginatedFetch(async (page) => pages[page - 1] ?? [], 2); + void callCount; + expect(result).toEqual(["a", "b", "c", "d", "e"]); + }); + + it("passes the correct page number to fetchPage", async () => { + const capturedPages: number[] = []; + await paginatedFetch(async (page) => { + capturedPages.push(page); + if (page < 3) return Array(2).fill("x"); + return ["x"]; // last page + }, 2); + expect(capturedPages).toEqual([1, 2, 3]); + }); + + it("triggers a second fetch when the page is exactly full, stops when the next page is empty", async () => { + const pages: string[][] = [["a", "b"], []]; // full then empty + const capturedPages: number[] = []; + const result = await paginatedFetch(async (page) => { + capturedPages.push(page); + return pages[page - 1] ?? []; + }, 2); + expect(result).toEqual(["a", "b"]); + expect(capturedPages).toEqual([1, 2]); + }); + + it("propagates errors thrown by fetchPage", async () => { + await expect( + paginatedFetch(async () => { + throw new Error("API error"); + }), + ).rejects.toThrow("API error"); + }); +}); diff --git a/src/api-utils.ts b/src/api-utils.ts new file mode 100644 index 0000000..bebe68b --- /dev/null +++ b/src/api-utils.ts @@ -0,0 +1,76 @@ +// ─── API utilities — pagination and retry helpers ───────────────────────────── +// +// Pure-async helpers with no side effects beyond network I/O. These are the +// only place in the codebase that knows about GitHub rate-limit semantics. + +const RETRYABLE_STATUSES = new Set([429, 503]); +const BASE_RETRY_DELAY_MS = 1_000; +const MAX_RETRY_DELAY_MS = 60_000; + +/** + * Performs a `fetch` with automatic retry on 429 (rate-limited) and 503 + * (server unavailable), using exponential backoff with optional `Retry-After` + * header support. + * + * Non-retryable responses (including successful ones) are returned immediately. + * After `maxRetries` exhausted the last response is returned — callers must + * still check `res.ok`. + */ +export async function fetchWithRetry( + url: string, + options: RequestInit, + maxRetries = 3, +): Promise { + let attempt = 0; + while (true) { + const res = await fetch(url, options); + if (!RETRYABLE_STATUSES.has(res.status) || attempt >= maxRetries) { + return res; + } + const retryAfterHeader = res.headers.get("Retry-After"); + let delayMs: number; + if (retryAfterHeader !== null) { + const seconds = parseInt(retryAfterHeader, 10); + delayMs = + Number.isFinite(seconds) && seconds > 0 + ? seconds * 1_000 + : Math.min(BASE_RETRY_DELAY_MS * 2 ** attempt, MAX_RETRY_DELAY_MS); + } else { + delayMs = Math.min(BASE_RETRY_DELAY_MS * 2 ** attempt, MAX_RETRY_DELAY_MS); + } + // Add ±10 % jitter to avoid thundering-herd on concurrent retries + delayMs = delayMs * (0.9 + Math.random() * 0.2); + await new Promise((r) => setTimeout(r, delayMs)); + attempt++; + } +} + +/** + * Fetches all pages from a paginated GitHub API endpoint. + * + * Calls `fetchPage(pageNumber)` starting at page 1 and stops when the + * returned array contains fewer items than `pageSize` (last page signal). + * + * @param fetchPage Function that fetches a single page and returns its items. + * Should throw on unrecoverable errors. + * @param pageSize Expected maximum items per page (default 100). Used as the + * stop condition: `items.length < pageSize → last page`. + * @param delayMs Optional inter-page delay in milliseconds. Useful to stay + * polite with rate limits on high-volume endpoints. + */ +export async function paginatedFetch( + fetchPage: (page: number) => Promise, + pageSize = 100, + delayMs = 0, +): Promise { + const all: T[] = []; + let page = 1; + while (true) { + const items = await fetchPage(page); + all.push(...items); + if (items.length < pageSize) break; + page++; + if (delayMs > 0) await new Promise((r) => setTimeout(r, delayMs)); + } + return all; +} diff --git a/src/api.ts b/src/api.ts index f97a809..4d9bf25 100644 --- a/src/api.ts +++ b/src/api.ts @@ -1,5 +1,6 @@ import pc from "picocolors"; import type { CodeMatch } from "./types.ts"; +import { fetchWithRetry, paginatedFetch } from "./api-utils.ts"; // ─── Raw GitHub API types (internal) ───────────────────────────────────────── @@ -36,6 +37,20 @@ interface RawRepo { // ─── API client ─────────────────────────────────────────────────────────────── +/** + * Build common GitHub API request headers. + */ +function githubHeaders( + token: string, + accept = "application/vnd.github.text-match+json", +): HeadersInit { + return { + Authorization: `Bearer ${token}`, + Accept: accept, + "X-GitHub-Api-Version": "2022-11-28", + }; +} + /** * Convert a GitHub blob URL to its raw.githubusercontent.com equivalent. * e.g. https://github.com/org/repo/blob/SHA/path → https://raw.githubusercontent.com/org/repo/SHA/path @@ -79,16 +94,14 @@ export async function searchCode( page = 1, ): Promise<{ items: RawCodeItem[]; total: number }> { const params = new URLSearchParams({ + // @see https://docs.github.com/en/rest/search/search?apiVersion=2022-11-28#constructing-a-search-query q: `${q} org:${org}`, per_page: "100", page: String(page), }); - const res = await fetch(`https://api.github.com/search/code?${params}`, { - headers: { - Authorization: `Bearer ${token}`, - Accept: "application/vnd.github.text-match+json", - "X-GitHub-Api-Version": "2022-11-28", - }, + // @see https://docs.github.com/en/rest/search/search?apiVersion=2022-11-28#search-code + const res = await fetchWithRetry(`https://api.github.com/search/code?${params}`, { + headers: githubHeaders(token), }); if (!res.ok) { const body = await res.text(); @@ -137,7 +150,7 @@ export async function fetchAllResults( await Promise.all( urlsToFetch.map(async (htmlUrl) => { try { - const res = await fetch(toRawUrl(htmlUrl), { + const res = await fetchWithRetry(toRawUrl(htmlUrl), { headers: { Authorization: `Bearer ${token}` }, }); if (res.ok) fileContentMap.set(htmlUrl, await res.text()); @@ -188,30 +201,22 @@ export async function fetchRepoTeams( const lowerPrefixes = prefixes.map((p) => p.toLowerCase()); // ── 1. List all org teams (paginated) ────────────────────────────────────── - const matchingTeamSlugs: string[] = []; - let page = 1; - while (true) { + // @see https://docs.github.com/en/rest/teams/teams?apiVersion=2022-11-28#list-teams + const allTeams = await paginatedFetch(async (page) => { const params = new URLSearchParams({ per_page: "100", page: String(page) }); - const res = await fetch(`https://api.github.com/orgs/${org}/teams?${params}`, { - headers: { - Authorization: `Bearer ${token}`, - Accept: "application/vnd.github+json", - "X-GitHub-Api-Version": "2022-11-28", - }, + const res = await fetchWithRetry(`https://api.github.com/orgs/${org}/teams?${params}`, { + headers: githubHeaders(token, "application/vnd.github+json"), }); if (!res.ok) { const body = await res.text(); throw new Error(`GitHub API error ${res.status} (list teams): ${body}`); } - const teams = (await res.json()) as RawTeam[]; - for (const t of teams) { - if (lowerPrefixes.some((p) => t.slug.toLowerCase().startsWith(p))) { - matchingTeamSlugs.push(t.slug); - } - } - if (teams.length < 100) break; - page++; - } + return (await res.json()) as RawTeam[]; + }); + + const matchingTeamSlugs = allTeams + .filter((t) => lowerPrefixes.some((p) => t.slug.toLowerCase().startsWith(p))) + .map((t) => t.slug); process.stderr.write( pc.dim( @@ -220,35 +225,33 @@ export async function fetchRepoTeams( ); // ── 2. For each matching team fetch its repos (paginated) ────────────────── + // @see https://docs.github.com/en/rest/teams/teams?apiVersion=2022-11-28#list-repos-in-a-team const repoTeams = new Map(); await Promise.all( matchingTeamSlugs.map(async (slug) => { - let p = 1; - while (true) { - const params = new URLSearchParams({ - per_page: "100", - page: String(p), - }); - const res = await fetch( + const repos = await paginatedFetch(async (p) => { + const params = new URLSearchParams({ per_page: "100", page: String(p) }); + const res = await fetchWithRetry( `https://api.github.com/orgs/${org}/teams/${slug}/repos?${params}`, - { - headers: { - Authorization: `Bearer ${token}`, - Accept: "application/vnd.github+json", - "X-GitHub-Api-Version": "2022-11-28", - }, - }, + { headers: githubHeaders(token, "application/vnd.github+json") }, ); - if (!res.ok) break; // Skip on error (e.g. 404 for nested teams) - const repos = (await res.json()) as RawRepo[]; - for (const r of repos) { - const list = repoTeams.get(r.full_name) ?? []; - if (!list.includes(slug)) list.push(slug); - repoTeams.set(r.full_name, list); + if (!res.ok) { + // 404 is expected for nested/secret teams — skip silently. + // Other errors are unexpected: log a warning and stop pagination. + if (res.status !== 404) { + process.stderr.write( + pc.dim(`Warning: could not fetch repos for team "${slug}" (HTTP ${res.status})\n`), + ); + } + return []; } - if (repos.length < 100) break; - p++; + return (await res.json()) as RawRepo[]; + }); + for (const r of repos) { + const list = repoTeams.get(r.full_name) ?? []; + if (!list.includes(slug)) list.push(slug); + repoTeams.set(r.full_name, list); } }), );