diff --git a/cmd/stdio.go b/cmd/stdio.go index 031ceb3..551840d 100644 --- a/cmd/stdio.go +++ b/cmd/stdio.go @@ -18,6 +18,7 @@ import ( "bufio" "cmp" "context" + "errors" "fmt" "io" "log/slog" @@ -122,8 +123,28 @@ type HealthCheckOutput struct { const defaultFreshnessTTL = 30 * time.Second const defaultReindexTimeout = 15 * time.Second const defaultSearchTimeout = 20 * time.Second + +// defaultEmbedTimeout bounds the per-call embed for a semantic_search query. +// Mirrors defaultSearchTimeout: protects against any embedder slowness +// (cold-start GPU, network drop, server crash mid-request) producing the +// 10-minute HTTP client wait visible as a hang in Claude Code. +const defaultEmbedTimeout = 20 * time.Second + +// defaultStaleEmbedTimeout bounds embedQuery when ensureIndexed has already +// indicated the index is being rebuilt. The embedding backend may be +// saturated by the concurrent indexer's batches; we give it a short window +// to respond (preserves stale results when fast) and fall back to +// warning-only when slow. +const defaultStaleEmbedTimeout = 3 * time.Second + const backgroundReindexMaxDuration = 10 * time.Minute +// staleIndexWarning is returned to the caller whenever ensureIndexed cannot +// produce a fresh index synchronously (background indexer holds the flock, +// in-process goroutine is already running, or reindex timed out). The text +// is identical across all four code paths. +const staleIndexWarning = "Index is being updated in the background. Results may be incomplete or outdated. Use grep/glob/find for code search until indexing finishes (usually a few minutes; longer for large repositories)." + type cacheEntry struct { idx *index.Indexer effectiveRoot string @@ -169,20 +190,22 @@ func (ic *indexerCache) cacheSet(projectPath, model string, entry cacheEntry) { // indexerCache manages one *index.Indexer per project path, creating them // lazily with a shared embedder. type indexerCache struct { - mu sync.RWMutex - cache map[string]cacheEntry - reindexing map[string]bool // projects with an active background reindex goroutine - embedder embedder.Embedder - cfg *config.ConfigService - freshnessTTL time.Duration // override for tests; 0 reads from cfg, then defaultFreshnessTTL - reindexTimeout time.Duration // override for tests; 0 reads from cfg, then defaultReindexTimeout - findDonorFunc func(string, string) string // nil uses config.FindDonorIndex - seedFunc func(string, string) (bool, error) // nil uses index.SeedFromDonor - ensureFreshFunc func(ctx context.Context, idx *index.Indexer, projectDir string, progress index.ProgressFunc) (bool, index.Stats, error) // nil uses idx.EnsureFresh - log *slog.Logger - wg sync.WaitGroup // tracks background reindex goroutines - closeCtx context.Context // cancelled by Close() to signal background goroutines - closeFn context.CancelFunc // cancels closeCtx + mu sync.RWMutex + cache map[string]cacheEntry + reindexing map[string]bool // projects with an active background reindex goroutine + embedder embedder.Embedder + cfg *config.ConfigService + freshnessTTL time.Duration // override for tests; 0 reads from cfg, then defaultFreshnessTTL + reindexTimeout time.Duration // override for tests; 0 reads from cfg, then defaultReindexTimeout + embedTimeout time.Duration // override for tests; 0 means defaultEmbedTimeout + staleEmbedTimeout time.Duration // override for tests; 0 means defaultStaleEmbedTimeout + findDonorFunc func(string, string) string // nil uses config.FindDonorIndex + seedFunc func(string, string) (bool, error) // nil uses index.SeedFromDonor + ensureFreshFunc func(ctx context.Context, idx *index.Indexer, projectDir string, progress index.ProgressFunc) (bool, index.Stats, error) // nil uses idx.EnsureFresh + log *slog.Logger + wg sync.WaitGroup // tracks background reindex goroutines + closeCtx context.Context // cancelled by Close() to signal background goroutines + closeFn context.CancelFunc // cancels closeCtx } // getFreshnessTTL returns the effective freshness TTL, checking the override @@ -213,6 +236,26 @@ func (ic *indexerCache) getReindexTimeout() time.Duration { return defaultReindexTimeout } +// getEmbedTimeout returns the effective embed timeout, checking the override +// field first then falling back to defaultEmbedTimeout. +func (ic *indexerCache) getEmbedTimeout() time.Duration { + if ic.embedTimeout != 0 { + return ic.embedTimeout + } + return defaultEmbedTimeout +} + +// getStaleEmbedTimeout returns the embed timeout used when StaleWarning is +// set — typically much shorter than the normal embed timeout so the call +// returns quickly even when the embedding backend is saturated by the +// concurrent indexer. +func (ic *indexerCache) getStaleEmbedTimeout() time.Duration { + if ic.staleEmbedTimeout != 0 { + return ic.staleEmbedTimeout + } + return defaultStaleEmbedTimeout +} + // logger returns ic.log, falling back to a discarding logger when the field // is nil (e.g. in unit tests that construct indexerCache directly). func (ic *indexerCache) logger() *slog.Logger { @@ -537,22 +580,34 @@ func (ic *indexerCache) handleSemanticSearch(ctx context.Context, req *mcp.CallT } out.SeedWarning = seedWarning - // When the index is being rebuilt by a concurrent indexer, the - // StaleWarning text already instructs the caller to skip semantic_search - // for the next 10 tool calls. Embedding and searching now would (a) waste - // work the caller is told to ignore, and (b) contend with the busy - // indexer for the embedding backend — on a single-instance LM Studio, - // the query embed can queue behind the indexer's batches indefinitely - // and hang the MCP call. Return the warning immediately instead. + // Bound embedQuery so a slow or saturated embedding backend cannot hang + // the MCP call. Two regimes: + // - StaleWarning set: a concurrent indexer is hammering the embedding + // server. Give the embed a short window; if it succeeds, return the + // stale-but-real results along with the warning. If it times out, + // return warning-only — the caller is already told to fall back. + // - StaleWarning empty: defense-in-depth. Bound at defaultEmbedTimeout + // so any embedder pathology (cold-start GPU, network drop, server + // crash mid-request) surfaces as a quick error rather than the + // 10-minute HTTP client timeout. + embedTimeout := ic.getEmbedTimeout() if out.StaleWarning != "" { - text := formatSearchResults(input.Path, out) - return &mcp.CallToolResult{ - Content: []mcp.Content{&mcp.TextContent{Text: text}}, - }, nil, nil + embedTimeout = ic.getStaleEmbedTimeout() } - - queryVec, err := ic.embedQuery(ctx, input.Query) + embedCtx, embedCancel := context.WithTimeout(ctx, embedTimeout) + queryVec, err := ic.embedQuery(embedCtx, input.Query) + embedCancel() if err != nil { + if out.StaleWarning != "" && (errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled)) { + ic.logger().Info("stale embed fast-fail: returning warning-only", + "project", effectiveRoot, + "timeout", embedTimeout, + ) + text := formatSearchResults(input.Path, out) + return &mcp.CallToolResult{ + Content: []mcp.Content{&mcp.TextContent{Text: text}}, + }, nil, nil + } return nil, nil, err } @@ -725,7 +780,7 @@ func (ic *indexerCache) ensureIndexed(idx *index.Indexer, input SemanticSearchIn // worst case is redundant work, not corruption (SQLite WAL mode). if indexlock.IsHeld(indexlock.LockPathForDB(dbPath)) { ic.logger().Info("skipping reindex: background indexer is running", "project", projectDir) - out.StaleWarning = "Index is being updated in the background. Results may be incomplete or outdated. Use standard tools for the next 10 tool calls before trying semantic_search again." + out.StaleWarning = staleIndexWarning return out, nil } @@ -741,7 +796,7 @@ func (ic *indexerCache) ensureIndexed(idx *index.Indexer, input SemanticSearchIn if ic.reindexing != nil && ic.reindexing[reindexKey] { ic.mu.Unlock() ic.logger().Debug("skipping reindex: in-process background goroutine already running", "project", projectDir) - out.StaleWarning = "Index is being updated in the background. Results may be incomplete or outdated. Use standard tools for the next 10 tool calls before trying semantic_search again." + out.StaleWarning = staleIndexWarning return out, nil } if ic.reindexing == nil { @@ -833,7 +888,7 @@ func (ic *indexerCache) ensureIndexed(idx *index.Indexer, input SemanticSearchIn bgCancel() // release context resources early if result.skipped { ic.logger().Info("reindex skipped: lock held by another process", "project", projectDir) - out.StaleWarning = "Index is being updated in the background. Results may be incomplete or outdated. Use standard tools for the next 10 tool calls before trying semantic_search again." + out.StaleWarning = staleIndexWarning return out, nil } if result.err != nil { @@ -874,7 +929,7 @@ func (ic *indexerCache) ensureIndexed(idx *index.Indexer, input SemanticSearchIn "project", projectDir, "timeout", timeout, ) - out.StaleWarning = "Index is being updated in the background. Results may be incomplete or outdated. Use standard tools for the next 10 tool calls before trying semantic_search again." + out.StaleWarning = staleIndexWarning return out, nil } } diff --git a/cmd/stdio_concurrency_test.go b/cmd/stdio_concurrency_test.go index fb34ad3..a771a0e 100644 --- a/cmd/stdio_concurrency_test.go +++ b/cmd/stdio_concurrency_test.go @@ -419,10 +419,11 @@ func Demo() {} cache: map[string]cacheEntry{ projectDir: {idx: idx, effectiveRoot: projectDir, model: blockEmb.ModelName()}, }, - embedder: blockEmb, - cfg: newTestConfigService(t, 512), - log: discardLog, - freshnessTTL: 1 * time.Nanosecond, // force the merkle/flock path; do not trust LastIndexedAt + embedder: blockEmb, + cfg: newTestConfigService(t, 512), + log: discardLog, + freshnessTTL: 1 * time.Nanosecond, // force the merkle/flock path; do not trust LastIndexedAt + staleEmbedTimeout: 200 * time.Millisecond, // tight bound so fast-fail completes quickly in the test } // Step 4: call handleSemanticSearch with a deadline that's much shorter @@ -457,8 +458,12 @@ func Demo() {} if out.err != nil { t.Fatalf("handleSemanticSearch returned error: %v (elapsed %v)", out.err, elapsed) } + // With the fast-fail design we DO attempt the embed but bound it + // tightly (~3s by default; the test overrides to 200ms). On timeout + // the handler returns warning-only — the embed was attempted but + // did not extend the call time meaningfully. if elapsed > 1*time.Second { - t.Fatalf("handleSemanticSearch took %v — expected sub-second short-circuit when StaleWarning is set", elapsed) + t.Fatalf("handleSemanticSearch took %v — expected fast-fail within staleEmbedTimeout", elapsed) } text := mustTextResult(t, out.result) if !strings.Contains(text, "Index is being updated in the background") { @@ -468,12 +473,13 @@ func Demo() {} t.Fatal("handleSemanticSearch did not return within 3s — bug: embedQuery contends with background indexer even when StaleWarning is set") } - // The embedder must NEVER have been called: the short-circuit must - // happen before embedQuery. + // With fast-fail, embed IS called but gets cancelled by the deadline. + // Assert started was signaled to confirm we attempted the embed. select { case <-blockEmb.started: - t.Fatal("embedQuery was called even though StaleWarning was set — handleSemanticSearch must short-circuit before embedding") + // expected — fast-fail attempted the embed before timing out default: + t.Fatal("expected fast-fail design to attempt embed (started signal) before timing out") } } @@ -488,3 +494,172 @@ func writeTestGoFile(t *testing.T, dir, name, content string) { t.Fatal(err) } } + +// TestHandleSemanticSearch_EmbedQueryBounded verifies defense-in-depth: even +// when no StaleWarning is set (no concurrent indexer), a slow embedder must +// not be able to hang the MCP call past defaultEmbedTimeout. The underlying +// http.Client timeout is 10 minutes, which is effectively a hang from +// Claude Code's perspective. +func TestHandleSemanticSearch_EmbedQueryBounded(t *testing.T) { + const dims = 4 + + rawDir := t.TempDir() + projectDir, err := filepath.EvalSymlinks(rawDir) + if err != nil { + t.Fatal(err) + } + writeTestGoFile(t, projectDir, "main.go", `package main + +func Demo() {} +`) + + fastEmb := &stubEmbedder{model: "blocking-stub"} + dbPath := config.DBPathForProject(projectDir, fastEmb.ModelName()) + if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = os.RemoveAll(filepath.Dir(dbPath)) }) + + idx, err := index.NewIndexer(dbPath, fastEmb, 512) + if err != nil { + t.Fatal(err) + } + if _, err := idx.Index(context.Background(), projectDir, false, nil); err != nil { + t.Fatal(err) + } + _ = idx.Close() + + // Re-open with a blocking embedder. No flock held — so StaleWarning will + // be empty and the short-circuit / fast-fail path does NOT trigger; this + // exercises the pure defense-in-depth bound. + blockEmb := newBlockingStubEmbedder(dims) + idx, err = index.NewIndexer(dbPath, blockEmb, 512) + if err != nil { + t.Fatal(err) + } + defer func() { + blockEmb.Unblock() + _ = idx.Close() + }() + + ic := &indexerCache{ + cache: map[string]cacheEntry{ + projectDir: {idx: idx, effectiveRoot: projectDir, model: blockEmb.ModelName(), lastCheckedAt: time.Now()}, + }, + embedder: blockEmb, + cfg: newTestConfigService(t, 512), + log: discardLog, + freshnessTTL: 1 * time.Hour, // skip merkle path so StaleWarning stays empty + embedTimeout: 500 * time.Millisecond, // tight bound so the test exercises the limit in <1s + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + start := time.Now() + req := &mcp.CallToolRequest{Params: &mcp.CallToolParamsRaw{}} + _, _, callErr := ic.handleSemanticSearch(ctx, req, SemanticSearchInput{ + Cwd: projectDir, + Path: projectDir, + Query: "demo", + Limit: 3, + }) + elapsed := time.Since(start) + + if callErr == nil { + t.Fatalf("expected timeout error, got nil (elapsed %v)", elapsed) + } + if elapsed > 2*time.Second { + t.Fatalf("handleSemanticSearch took %v — expected ~embedTimeout (500ms), not the 10-minute HTTP timeout", elapsed) + } + if !strings.Contains(callErr.Error(), "embed query") { + t.Fatalf("expected 'embed query' in error, got: %v", callErr) + } +} + +// TestHandleSemanticSearch_StaleWarningFastEmbedReturnsStaleResults verifies +// that when StaleWarning is set but the embedder responds within the short +// deadline, we still return the (stale) results from the existing DB along +// with the warning — strictly better UX than warning-only. +func TestHandleSemanticSearch_StaleWarningFastEmbedReturnsStaleResults(t *testing.T) { + rawDir := t.TempDir() + projectDir, err := filepath.EvalSymlinks(rawDir) + if err != nil { + t.Fatal(err) + } + writeTestGoFile(t, projectDir, "main.go", `package main + +// FindMe is the function the test searches for. +func FindMe() {} +`) + + fastEmb := &stubEmbedder{model: "fast-stub"} + dbPath := config.DBPathForProject(projectDir, fastEmb.ModelName()) + if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = os.RemoveAll(filepath.Dir(dbPath)) }) + + idx, err := index.NewIndexer(dbPath, fastEmb, 512) + if err != nil { + t.Fatal(err) + } + if _, err := idx.Index(context.Background(), projectDir, false, nil); err != nil { + t.Fatal(err) + } + _ = idx.Close() + + // Hold the flock externally so ensureIndexed sets StaleWarning. + lockPath := indexlock.LockPathForDB(dbPath) + lk, lockErr := indexlock.TryAcquire(lockPath) + if lockErr != nil { + t.Fatal(lockErr) + } + if lk == nil { + t.Fatal("expected to acquire indexlock for test setup") + } + defer lk.Release() + if !indexlock.IsHeld(lockPath) { + t.Skip("flock TryAcquire+IsHeld is reentrant in the same process on this OS") + } + + // Re-open with the SAME fast embedder — embed will succeed within the + // short stale deadline, so handleSemanticSearch should return the + // (stale) results from the existing DB. + idx, err = index.NewIndexer(dbPath, fastEmb, 512) + if err != nil { + t.Fatal(err) + } + defer func() { _ = idx.Close() }() + + ic := &indexerCache{ + cache: map[string]cacheEntry{ + projectDir: {idx: idx, effectiveRoot: projectDir, model: fastEmb.ModelName()}, + }, + embedder: fastEmb, + cfg: newTestConfigService(t, 512), + log: discardLog, + freshnessTTL: 1 * time.Nanosecond, + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + req := &mcp.CallToolRequest{Params: &mcp.CallToolParamsRaw{}} + result, _, callErr := ic.handleSemanticSearch(ctx, req, SemanticSearchInput{ + Cwd: projectDir, + Path: projectDir, + Query: "FindMe", + Limit: 3, + }) + if callErr != nil { + t.Fatalf("handleSemanticSearch returned error: %v", callErr) + } + text := mustTextResult(t, result) + if !strings.Contains(text, "Index is being updated in the background") { + t.Fatalf("expected StaleWarning text in result, got:\n%s", text) + } + if !strings.Contains(text, "FindMe") { + t.Fatalf("expected stale-but-real search results to include 'FindMe' from the pre-indexed file, got:\n%s", text) + } +} diff --git a/cmd/stdio_test.go b/cmd/stdio_test.go index 0df23c1..2b313ff 100644 --- a/cmd/stdio_test.go +++ b/cmd/stdio_test.go @@ -2029,3 +2029,20 @@ func TestIndexerCache_CloseCancelsBackgroundGoroutines(t *testing.T) { t.Fatalf("expected context.Canceled in ensureFresh, got: %v", finalErr) } } + +// TestStaleIndexWarning_NoBrittleToolCallCount locks in the wording change +// that drops the brittle "10 tool calls" advice. Initial indexing of large +// repos (e.g. cloud) takes 10+ minutes — far more than 10 tool calls — so +// the old text was misleading. The "Index is being updated in the +// background" prefix is preserved so callers that match on it still work. +func TestStaleIndexWarning_NoBrittleToolCallCount(t *testing.T) { + if strings.Contains(staleIndexWarning, "10 tool calls") { + t.Fatal("staleIndexWarning still references the brittle '10 tool calls' count; large repos exceed that during initial indexing") + } + if !strings.Contains(staleIndexWarning, "Index is being updated in the background") { + t.Fatal("staleIndexWarning must keep the 'Index is being updated in the background' prefix for callers that match on it") + } + if !strings.Contains(staleIndexWarning, "grep") { + t.Fatal("staleIndexWarning should point the LLM at concrete fallback tools (grep/glob/find)") + } +}