From 089ad63779b1e4aa56809d305818f0da4b0f9deb Mon Sep 17 00:00:00 2001 From: Ismael Garrido Date: Sat, 30 May 2026 11:27:02 -0300 Subject: [PATCH 1/2] fix(index): seed worktree index from sibling donor in lumen index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lumen index command (the background indexer spawned at SessionStart) created the DB and re-indexed every file from scratch instead of copying an existing sibling-worktree index — only the MCP getOrCreate seeded, and it lost the race to create the DB. runIndexer now seeds from a donor, under the index lock it already holds, before indexing. Co-authored-by: Claude Opus 4.8 --- cmd/index.go | 5 +++ cmd/seed.go | 56 +++++++++++++++++++++++++ cmd/seed_test.go | 106 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 cmd/seed.go create mode 100644 cmd/seed_test.go diff --git a/cmd/index.go b/cmd/index.go index 334b64f..4d6592d 100644 --- a/cmd/index.go +++ b/cmd/index.go @@ -251,6 +251,11 @@ func runIndexer(cmd *cobra.Command, cfg *config.ConfigService, emb *embedder.Fai } defer lock.Release() + // Seed before opening the DB, under the index lock acquired above so only + // one indexer seeds. Doing it here (not just in the MCP getOrCreate path) + // covers the background indexer that lumen index runs at SessionStart. + seedFromDonorIfNew(dbPath, projectPath, emb.ModelName(), logger) + // Cancel context on SIGTERM or SIGINT so the indexer stops cleanly and // the deferred lock.Release() runs before exit. ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) diff --git a/cmd/seed.go b/cmd/seed.go new file mode 100644 index 0000000..2c599d2 --- /dev/null +++ b/cmd/seed.go @@ -0,0 +1,56 @@ +// Copyright 2026 Aeneas Rekkas +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "log/slog" + "os" + + "github.com/ory/lumen/internal/config" + "github.com/ory/lumen/internal/index" +) + +// seedFromDonorIfNew copies a sibling git-worktree's index to dbPath when no +// index exists there yet, so indexing a fresh worktree reuses the parent's +// embeddings and only re-indexes changed files instead of re-embedding the +// whole tree from scratch. It is best-effort: any failure is logged and +// indexing proceeds from scratch. +// +// This mirrors the seeding the MCP server performs in +// indexerCache.getOrCreate. The background indexer that lumen index runs at +// SessionStart also creates a fresh DB, and it normally wins the race against +// the MCP path. Without seeding here, that background indexer leaves the +// worktree fully re-indexed from scratch and the getOrCreate seed is then +// skipped because the DB already exists. +func seedFromDonorIfNew(dbPath, projectPath, model string, logger *slog.Logger) { + if _, err := os.Stat(dbPath); !os.IsNotExist(err) { + // The DB already exists (or stat failed unexpectedly) — nothing to seed. + return + } + donorPath := config.FindDonorIndex(projectPath, model) + if donorPath == "" { + return + } + seeded, err := index.SeedFromDonor(donorPath, dbPath) + if err != nil { + logger.Warn("seed from donor worktree failed", + "project", projectPath, "donor_path", donorPath, "error", err) + return + } + if seeded { + logger.Info("seeded index from donor worktree", + "project", projectPath, "donor_path", donorPath) + } +} diff --git a/cmd/seed_test.go b/cmd/seed_test.go new file mode 100644 index 0000000..937ccac --- /dev/null +++ b/cmd/seed_test.go @@ -0,0 +1,106 @@ +// Copyright 2026 Aeneas Rekkas +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "io" + "log/slog" + "os" + "os/exec" + "path/filepath" + "testing" + + "github.com/ory/lumen/internal/config" + "github.com/ory/lumen/internal/store" +) + +// TestSeedFromDonorIfNew_SeedsWorktreeFromSibling reproduces the bug where +// `lumen index` (the background indexer spawned by the SessionStart hook) does +// not copy an existing sibling-worktree index before indexing a fresh +// worktree, causing a full re-index from scratch instead of an incremental +// merkle update. A worktree whose sibling is already indexed must be seeded. +func TestSeedFromDonorIfNew_SeedsWorktreeFromSibling(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not on PATH") + } + + t.Setenv("XDG_DATA_HOME", t.TempDir()) + const model = "test-model" + + // Main repo with a worktree, mirroring `git worktree add`. + main := t.TempDir() + runGit(t, main, "init") + runGit(t, main, "config", "user.email", "test@test.com") + runGit(t, main, "config", "user.name", "test") + runGit(t, main, "commit", "--allow-empty", "-m", "init") + + wt := filepath.Join(t.TempDir(), "wt") + runGit(t, main, "worktree", "add", wt) + + mainResolved, err := filepath.EvalSymlinks(main) + if err != nil { + t.Fatal(err) + } + wtResolved, err := filepath.EvalSymlinks(wt) + if err != nil { + t.Fatal(err) + } + + // Build a minimal but valid donor index for the main worktree: a real + // SQLite store carrying a non-empty root_hash, which is what SeedFromDonor + // requires to treat the donor as a completed index. + donorDB := config.DBPathForProject(mainResolved, model) + if err := os.MkdirAll(filepath.Dir(donorDB), 0o755); err != nil { + t.Fatal(err) + } + donor, err := store.New(donorDB, 4) + if err != nil { + t.Fatal(err) + } + if err := donor.SetMeta("root_hash", "donor-root-hash"); err != nil { + t.Fatal(err) + } + if err := donor.SetMeta("project_path", mainResolved); err != nil { + t.Fatal(err) + } + if err := donor.Close(); err != nil { + t.Fatal(err) + } + + dstDB := config.DBPathForProject(wtResolved, model) + if _, err := os.Stat(dstDB); !os.IsNotExist(err) { + t.Fatalf("precondition failed: worktree DB should not exist yet (stat err=%v)", err) + } + + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + seedFromDonorIfNew(dstDB, wtResolved, model, logger) + + if _, err := os.Stat(dstDB); err != nil { + t.Fatalf("worktree index was NOT seeded from sibling donor: %v", err) + } + + seeded, err := store.New(dstDB, 4) + if err != nil { + t.Fatal(err) + } + defer func() { _ = seeded.Close() }() + got, err := seeded.GetMeta("root_hash") + if err != nil { + t.Fatal(err) + } + if got != "donor-root-hash" { + t.Fatalf("expected seeded DB to carry donor root_hash, got %q", got) + } +} From 3e258e638a5a7c381bd0a38585f0d8cb0a884a09 Mon Sep 17 00:00:00 2001 From: Ismael Garrido Date: Sat, 30 May 2026 11:27:02 -0300 Subject: [PATCH 2/2] fix(stdio): serialize worktree seeding with the background indexer lock getOrCreate seeded and created a new DB without the index flock that lumen index holds, so both could run SeedFromDonor against the same fresh worktree DB concurrently and corrupt it. getOrCreate now takes the same lock to seed; when a peer holds it, it waits briefly for the peer to publish the DB instead of creating an empty one that clobbers the seed. Co-authored-by: Claude Opus 4.8 --- cmd/stdio.go | 114 ++++++++++++++++++++++++++++++------- cmd/stdio_seedrace_test.go | 83 +++++++++++++++++++++++++++ 2 files changed, 177 insertions(+), 20 deletions(-) create mode 100644 cmd/stdio_seedrace_test.go diff --git a/cmd/stdio.go b/cmd/stdio.go index 551840d..ae22b39 100644 --- a/cmd/stdio.go +++ b/cmd/stdio.go @@ -139,6 +139,18 @@ const defaultStaleEmbedTimeout = 3 * time.Second const backgroundReindexMaxDuration = 10 * time.Minute +// defaultCreateWaitTimeout bounds how long getOrCreate waits for a peer +// process (the background lumen index spawned at SessionStart) to publish a +// new index DB it is creating + seeding under the exclusive index lock. The +// peer publishes the file early (a donor copy or an empty DB) before the slow +// embedding pass, so this only needs to cover that brief window. On timeout +// getOrCreate falls back to creating the DB itself. +const defaultCreateWaitTimeout = 3 * time.Second + +// createWaitPollInterval is how often getOrCreate re-checks for the peer's DB +// file while waiting up to defaultCreateWaitTimeout. +const createWaitPollInterval = 25 * time.Millisecond + // staleIndexWarning is returned to the caller whenever ensureIndexed cannot // produce a fresh index synchronously (background indexer holds the flock, // in-process goroutine is already running, or reindex timed out). The text @@ -199,6 +211,7 @@ type indexerCache struct { reindexTimeout time.Duration // override for tests; 0 reads from cfg, then defaultReindexTimeout embedTimeout time.Duration // override for tests; 0 means defaultEmbedTimeout staleEmbedTimeout time.Duration // override for tests; 0 means defaultStaleEmbedTimeout + createWaitTimeout time.Duration // override for tests; 0 means defaultCreateWaitTimeout findDonorFunc func(string, string) string // nil uses config.FindDonorIndex seedFunc func(string, string) (bool, error) // nil uses index.SeedFromDonor ensureFreshFunc func(ctx context.Context, idx *index.Indexer, projectDir string, progress index.ProgressFunc) (bool, index.Stats, error) // nil uses idx.EnsureFresh @@ -256,6 +269,15 @@ func (ic *indexerCache) getStaleEmbedTimeout() time.Duration { return defaultStaleEmbedTimeout } +// getCreateWaitTimeout returns how long getOrCreate waits for a peer process +// to publish a new index DB before creating it itself. +func (ic *indexerCache) getCreateWaitTimeout() time.Duration { + if ic.createWaitTimeout != 0 { + return ic.createWaitTimeout + } + return defaultCreateWaitTimeout +} + // logger returns ic.log, falling back to a discarding logger when the field // is nil (e.g. in unit tests that construct indexerCache directly). func (ic *indexerCache) logger() *slog.Logger { @@ -494,27 +516,29 @@ func (ic *indexerCache) getOrCreate(projectPath string, preferredRoot string, mo "model", modelName, "index_version", config.IndexVersion, ) - findDonor := ic.findDonorFunc - if findDonor == nil { - findDonor = config.FindDonorIndex - } - if donorPath := findDonor(effectiveRoot, modelName); donorPath != "" { - ic.logger().Info("seeding index from donor worktree", - "effective_root", effectiveRoot, - "donor_path", donorPath, - ) - seedFn := ic.seedFunc - if seedFn == nil { - seedFn = index.SeedFromDonor - } - if _, seedErr := seedFn(donorPath, dbPath); seedErr != nil { - ic.logger().Warn("seed from donor worktree failed", - "effective_root", effectiveRoot, - "donor_path", donorPath, - "error", seedErr, - ) - seedWarning = fmt.Sprintf("index seeded from scratch (sibling copy failed: %v)", seedErr) + + // Serialize creation + seeding with the background indexer + // (lumen index, spawned by the SessionStart hook) via the same + // exclusive index lock it holds. Without this, both processes can run + // SeedFromDonor against the same dbPath concurrently — corrupting the + // SQLite file — or one can create an empty DB that makes the other skip + // seeding and re-index the worktree from scratch. + lockPath := indexlock.LockPathForDB(dbPath) + lk, lockErr := indexlock.TryAcquire(lockPath) + switch { + case lockErr == nil && lk != nil: + // We own creation. Re-check under the lock — a peer may have created + // the DB between our stat above and acquiring the lock. + if _, st := os.Stat(dbPath); os.IsNotExist(st) { + seedWarning = ic.seedFromDonor(effectiveRoot, modelName, dbPath) } + lk.Release() + default: + // A background indexer holds the lock and is creating + seeding the + // DB. Wait briefly for it to publish the file so NewIndexer opens + // the seeded copy instead of creating an empty DB that would clobber + // the seed. + ic.waitForDB(dbPath) } } @@ -551,6 +575,56 @@ func (ic *indexerCache) getOrCreate(projectPath string, preferredRoot string, mo return idx, effectiveRoot, seedWarning, nil } +// seedFromDonor copies a sibling worktree's index into dbPath, reusing the +// parent's embeddings instead of indexing from scratch. The caller must hold +// the exclusive index lock for dbPath. Returns a non-empty seedWarning when a +// donor was found but the copy failed (indexing will proceed from scratch). +func (ic *indexerCache) seedFromDonor(effectiveRoot, modelName, dbPath string) (seedWarning string) { + findDonor := ic.findDonorFunc + if findDonor == nil { + findDonor = config.FindDonorIndex + } + donorPath := findDonor(effectiveRoot, modelName) + if donorPath == "" { + return "" + } + ic.logger().Info("seeding index from donor worktree", + "effective_root", effectiveRoot, + "donor_path", donorPath, + ) + seedFn := ic.seedFunc + if seedFn == nil { + seedFn = index.SeedFromDonor + } + if _, seedErr := seedFn(donorPath, dbPath); seedErr != nil { + ic.logger().Warn("seed from donor worktree failed", + "effective_root", effectiveRoot, + "donor_path", donorPath, + "error", seedErr, + ) + return fmt.Sprintf("index seeded from scratch (sibling copy failed: %v)", seedErr) + } + return "" +} + +// waitForDB polls for dbPath to appear, up to getCreateWaitTimeout. It is used +// when another process holds the index lock and is creating + seeding the DB: +// waiting for it to publish the file lets the subsequent NewIndexer open the +// seeded copy rather than creating an empty DB that clobbers the seed. +func (ic *indexerCache) waitForDB(dbPath string) { + deadline := time.Now().Add(ic.getCreateWaitTimeout()) + for { + if _, err := os.Stat(dbPath); err == nil { + return + } + if time.Now().After(deadline) { + ic.logger().Debug("timed out waiting for peer indexer to publish DB", "db_path", dbPath) + return + } + time.Sleep(createWaitPollInterval) + } +} + // handleSemanticSearch is the tool handler for the semantic_search tool. // Uses Out=any so the SDK does not set StructuredContent — the LLM sees // only the plaintext in Content. diff --git a/cmd/stdio_seedrace_test.go b/cmd/stdio_seedrace_test.go new file mode 100644 index 0000000..f1f532a --- /dev/null +++ b/cmd/stdio_seedrace_test.go @@ -0,0 +1,83 @@ +// Copyright 2026 Aeneas Rekkas +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "os" + "path/filepath" + "sync/atomic" + "testing" + "time" + + "github.com/ory/lumen/internal/config" + "github.com/ory/lumen/internal/indexlock" +) + +// TestGetOrCreate_SkipsSeedWhenIndexLockHeld reproduces the cross-process race +// between the MCP server (getOrCreate) and the background indexer (`lumen +// index`, spawned by the SessionStart hook). Both target the same fresh +// worktree DB. The background indexer holds the exclusive index flock while it +// creates and seeds the DB; getOrCreate must NOT run SeedFromDonor at the same +// time — two concurrent seeds against the same dbPath corrupt the SQLite file +// (SeedFromDonor copies through a shared temp path and renames over the DB). +// +// getOrCreate must defer to the lock holder instead of racing it. +func TestGetOrCreate_SkipsSeedWhenIndexLockHeld(t *testing.T) { + tmpDir := t.TempDir() + t.Setenv("XDG_DATA_HOME", tmpDir) + + projectDir := filepath.Join(tmpDir, "project") + if err := os.MkdirAll(projectDir, 0o755); err != nil { + t.Fatal(err) + } + + const model = "test-model" + dbPath := config.DBPathForProject(projectDir, model) + if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil { + t.Fatal(err) + } + + // Simulate the background indexer holding the exclusive index lock while it + // creates + seeds the DB. + lk, err := indexlock.TryAcquire(indexlock.LockPathForDB(dbPath)) + if err != nil || lk == nil { + t.Fatalf("acquire index lock: err=%v lk=%v", err, lk) + } + defer lk.Release() + + var seedCalls int32 + ic := &indexerCache{ + embedder: &stubEmbedder{}, + cfg: newTestConfigService(t, 512), + findDonorFunc: func(_, _ string) string { return "/fake/donor.db" }, + seedFunc: func(_, _ string) (bool, error) { + atomic.AddInt32(&seedCalls, 1) + return true, nil + }, + // Keep the wait-for-peer bounded so the test is fast. + createWaitTimeout: 50 * time.Millisecond, + } + + idx, _, _, err := ic.getOrCreate(projectDir, "", model) + if err != nil { + t.Fatalf("getOrCreate: %v", err) + } + t.Cleanup(func() { _ = idx.Close() }) + + if n := atomic.LoadInt32(&seedCalls); n != 0 { + t.Fatalf("getOrCreate ran SeedFromDonor (%d times) while another process held "+ + "the index lock — this races the background indexer and can corrupt the DB", n) + } +}