From 26ddfbc2c22cd7f14b97e3192ae1d0be62d1858a Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Thu, 25 Jun 2026 01:05:24 -0400 Subject: [PATCH] feat(parser): migrate gemini copilot providers Gemini and Copilot are direct local file sources, but each has source-shape details that were still coupled to the legacy adapter path. Moving them behind concrete providers keeps Gemini tmp//chats discovery and Copilot bare-vs-directory precedence explicit. The providers preserve raw and full ID lookup, changed-path classification, source hashing, Gemini project hints, Copilot workspace.yaml freshness, aggregate usage events, and parser output normalization. fix(parser): preserve gemini copilot provider freshness Gemini and Copilot now advertise provider-owned watch classification, so remove and rename events need to map back to syntactic source refs even after the filesystem entry has disappeared. Without that fallback, watcher-driven sync can leave stale provider sessions until a wider resync happens.\n\nCopilot also exposes a composite fingerprint that includes workspace.yaml freshness and shutdown aggregate usage. The provider parse result has to carry that same file metadata and usage event slice because sync consumes ParseResult, not only ParsedSession.\n\nValidation: go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check fix(parser): include gemini project metadata freshness Gemini project names can come from projects.json or trustedFolders.json, so treating only the transcript as the provider source leaves metadata-only changes stale. The provider now watches those root-level sidecars, classifies their changes back to discovered sessions, and folds their contents into the source fingerprint.\n\nValidation: go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check fix(parser): hash copilot workspace metadata Copilot workspace.yaml can change the provider-visible title without changing the event stream. Size and mtime are useful freshness guards, but the provider hash should also include the workspace file contents so same-length title edits cannot be skipped.\n\nValidation: go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check fix(sync): bridge provider path classification Concrete providers own source sidecars that legacy path classifiers do not know about. SyncPaths now falls back to provider changed-path classification after the legacy classifiers miss, and provider-classified files force a full parse so metadata-only events can refresh stored session state.\n\nLegacy classification remains authoritative when it recognizes a path, preserving existing project extraction and optimized sidecar filters while still letting migrated providers cover new sidecar surfaces.\n\nValidation: go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check fix(sync): preserve provider sidecar reparses Provider sidecar events can map to the same session file as a legacy path event in one watcher batch. Keeping only the first classified file made the result order-dependent and could drop the force-parse signal that metadata-only changes rely on. Per-file forced parses also need to bypass the generic skip cache, not just the agent-specific mtime checks, because sidecar updates may leave the transcript mtime untouched while still changing parsed session metadata. Validation: go test -tags "fts5" ./internal/sync -run 'TestSyncPathsGeminiProjectMetadataEventRefreshesProject' -count=1; go test -tags "fts5" ./internal/sync -count=1; go test -tags "fts5" ./internal/parser -run 'Test(Gemini|Copilot|ProviderMigration)' -count=1; go vet ./...; git diff --check fix(sync): skip removed provider source events Provider changed-path classification can return syntactic source refs for deleted files so providers can model remove events. While legacy file processing is still authoritative, enqueueing an exact missing source path makes SyncPaths fail at the initial stat instead of treating the watcher remove as a no-op. Keep sidecar fanout intact for existing sources, because metadata changes such as Gemini projects.json still need to force a reparse even when the transcript mtime is unchanged. Validation: go test -tags "fts5" ./internal/sync -run 'TestEngine_ClassifyPathsProvider(RemoveSkipsMissingGeminiSource|SidecarKeepsExistingGeminiSources)|TestSyncPathsGeminiProjectMetadataEventRefreshesProject' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/...; git diff --check test(sync): compare gemini copilot shadow parity Gemini and Copilot are migrated through concrete providers on this branch, so reviewers need a sync-level parity check that exercises the provider observation contract rather than only parser-local behavior. The fixtures cover their sidecar-sensitive source shapes: Gemini project metadata feeds the resolved project hint, and Copilot workspace.yaml participates in both title selection and the composite fingerprint. Validation: go test -tags "fts5" ./internal/sync -run 'TestObserveProviderSourceMatches(Gemini|Copilot)LegacyParser' -count=1; go test -tags "fts5" ./internal/parser -run 'Test(Gemini|Copilot)Provider' -count=1; go vet ./...; git diff --check. Full go test -tags "fts5" ./internal/parser ./internal/sync -count=1 currently fails in existing TestSyncPathsCodexIndexEventRefreshesStoredDuplicate. refactor(parser): fold gemini and copilot into providers Move Gemini and Copilot source discovery, lookup, and parse ownership onto the concrete geminiProvider and copilotProvider and delete the six package-level legacy entrypoints: DiscoverGeminiSessions, FindGeminiSourceFile, ParseGeminiSession, DiscoverCopilotSessions, FindCopilotSourceFile, and ParseCopilotSession. Discovery and find-source bodies now live as provider-owned source-set helpers (discoverSessionPaths and findSourceFile on each source set), the gemini confirmGeminiSessionID guard moves to the provider file, and the parsers become the providers' parseSession methods. The copilot source set's bare/dir precedence and dedup, and the gemini session-filename matching, are reproduced on the provider exactly as before. Gemini project resolution is preserved on the provider: sourceRef already resolves the project via BuildGeminiProjectMap/ResolveGeminiProject for both discovery and changed-path classification, so removing the engine's gemini project-map plumbing loses no project names. BuildGeminiProjectMap and ResolveGeminiProject stay exported package helpers used by the provider. Make both Gemini and Copilot provider-authoritative and drop their legacy sync dispatch: the classifyOnePath copilot and gemini blocks (and the now unused geminiProjectsByDir parameter threaded through classifyOnePath and classifyPaths), the processFile case arms, and the processGemini, processCopilot, and shouldSkipCopilot methods. copilotEffectiveMtime stays as a shared composite-mtime helper used by discoveredFileMtime. Wire the provider facade into parse-diff: agents that dropped their DiscoverFunc are now discovered through discoverProviderSources (filtered to the resolved, provider-discoverable agents), and resolveParseDiffAgents accepts file-based agents backed by a shadow-compare or provider-authoritative provider. Without this, a provider-authoritative agent would silently fall out of parse-diff once its DiscoverFunc was removed. Drop the Gemini and Copilot AgentDef DiscoverFunc/FindSourceFunc hooks, remove both files from the pending shim scan list, delete the shared shadow-baseline test file, and replace it with provider-API coverage plus guards asserting the legacy entrypoints stay gone. Package and engine tests route through the provider methods via new test helpers. test(sync): drop duplicate shadow source helper def The canonical writeProviderShadowSourceFile now lives at the Codex fold, so this redeclaration in provider_shadow_test.go conflicts with it. Drop the local copy and its now-unused os/path filepath imports; callers use the inherited shared helper. test(sync): restore provider-aware classify tests at gemini fold The original restack mis-merged engine_test.go on this branch, reverting the OpenCode SQLite, OpenCode removed-file, Claude stat-error, and Vibe meta-only classification tests to their stale pre-fold shapes (fake opencode.db bytes instead of a seeded session, dropped seedOpenCodeSQLiteSession helper) and re-adding a classify_vibe_test.go that exists on no lower branch. Those stale tests asserted the legacy direct-classification behavior and failed against the provider-routed path. Restore the correct versions inherited from the codex branch, keep this branch's two new Gemini provider classify tests, and drop the spurious classify_vibe_test.go. test(sync): restore gemini provider classify tests at gemini fold Re-add the two Gemini changed-path classify tests (TestEngine_ClassifyPathsProviderRemoveSkipsMissingGeminiSource and TestEngine_ClassifyPathsProviderSidecarKeepsExistingGeminiSources) that were dropped while restoring this branch's mis-merged engine_test.go to its provider-aware shape. fix(sync): skip fresh gemini copilot before hashing Gemini and Copilot lost their legacy DB freshness gates when the provider-authoritative path took over. That made unchanged sessions reach provider fingerprinting and parsing during normal full syncs, which is unnecessary work and no longer matches the old processGemini/processCopilot behavior.\n\nRestore the cheap pre-fingerprint checks for those two agents: Gemini compares the stored file path size and mtime, while Copilot compares transcript size plus the workspace.yaml effective mtime. Force-parse paths still flow through the provider so sidecar-driven reparses and parse-diff are not suppressed.\n\nValidation: go test -tags "fts5" ./internal/sync -run 'TestProcessFileProviderAuthoritativeSkipsFresh(Gemini|Copilot)BeforeFingerprint|TestProcessCodexAppendedStaleProject(DoesFullReparse|CarriesForceReplace)' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check fix(sync): restore discover fields on shadowCallerProvider The rebase onto origin/main dropped the discoverSources and discoverErr fields from the shadowCallerProvider test struct while keeping the Discover method that reads them, leaving this branch and every branch stacked above it uncompilable. Restore the two fields so the Discover stub resolves. --- internal/parser/copilot.go | 10 +- internal/parser/copilot_provider.go | 489 +++++++++++++++++ internal/parser/copilot_test.go | 63 ++- internal/parser/discovery.go | 204 +------ internal/parser/discovery_test.go | 31 +- internal/parser/gemini.go | 10 +- .../parser/gemini_copilot_provider_test.go | 321 +++++++++++ internal/parser/gemini_parser_test.go | 72 ++- internal/parser/gemini_provider.go | 509 ++++++++++++++++++ internal/parser/provider.go | 4 + internal/parser/provider_migration.go | 4 +- internal/parser/provider_shim_scan_test.go | 3 +- internal/parser/provider_test.go | 39 +- internal/parser/types.go | 36 +- .../sync/classify_antigravity_cli_test.go | 5 +- internal/sync/engine.go | 235 +------- internal/sync/engine_integration_test.go | 102 +++- internal/sync/engine_test.go | 60 ++- internal/sync/provider_shadow_caller_test.go | 148 +++++ 19 files changed, 1854 insertions(+), 491 deletions(-) create mode 100644 internal/parser/copilot_provider.go create mode 100644 internal/parser/gemini_copilot_provider_test.go create mode 100644 internal/parser/gemini_provider.go diff --git a/internal/parser/copilot.go b/internal/parser/copilot.go index 57ee77d36..0c4c35be2 100644 --- a/internal/parser/copilot.go +++ b/internal/parser/copilot.go @@ -333,10 +333,12 @@ func readCopilotWorkspaceName(eventsPath string) string { return "" } -// ParseCopilotSession parses a Copilot JSONL session file. -// Returns (nil, nil, nil, nil) if the file doesn't exist or -// contains no user/assistant messages. -func ParseCopilotSession( +// parseSession parses a Copilot JSONL session file into the session, messages, +// and usage events the provider consumes. Returns (nil, nil, nil, nil) if the +// file doesn't exist or contains no user/assistant messages. This is the +// provider-owned parse entrypoint; the package-level free function was folded +// onto the provider. +func (p *copilotProvider) parseSession( path, machine string, ) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { info, err := os.Stat(path) diff --git a/internal/parser/copilot_provider.go b/internal/parser/copilot_provider.go new file mode 100644 index 000000000..0ec855939 --- /dev/null +++ b/internal/parser/copilot_provider.go @@ -0,0 +1,489 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "hash" + "io" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*copilotProvider)(nil) + +type copilotProviderFactory struct { + def AgentDef +} + +func newCopilotProviderFactory(def AgentDef) ProviderFactory { + return copilotProviderFactory{def: cloneAgentDef(def)} +} + +func (f copilotProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f copilotProviderFactory) Capabilities() Capabilities { + return copilotProviderCapabilities() +} + +func (f copilotProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &copilotProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: copilotProviderCapabilities(), + Config: cfg, + }, + sources: newCopilotSourceSet(cfg.Roots), + } +} + +type copilotProvider struct { + ProviderBase + sources copilotSourceSet +} + +func (p *copilotProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *copilotProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *copilotProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *copilotProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = providerFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *copilotProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *copilotProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("copilot source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, usage, err := p.parseSession(path, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + if req.Fingerprint.Size > 0 { + sess.File.Size = req.Fingerprint.Size + } + if req.Fingerprint.MTimeNS > 0 { + sess.File.Mtime = req.Fingerprint.MTimeNS + } + sess.UsageEvents = usage + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + UsageEvents: usage, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type copilotSource struct { + Root string + Path string +} + +type copilotSourceSet struct { + roots []string +} + +func newCopilotSourceSet(roots []string) copilotSourceSet { + return copilotSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s copilotSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, path := range s.discoverSessionPaths(root) { + source, ok := s.sourceRef(root, path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessionPaths finds all Copilot session file paths under +// /session-state/. It supports both the bare layout (.jsonl) and +// the directory layout (/events.jsonl); when both exist for the same +// session, the directory layout wins and the bare file is dropped so a session +// is not discovered twice. +func (s copilotSourceSet) discoverSessionPaths(root string) []string { + if root == "" { + return nil + } + + stateDir := filepath.Join(root, copilotStateDir) + entries, err := os.ReadDir(stateDir) + if err != nil { + return nil + } + + dirs := make(map[string]struct{}) + for _, entry := range entries { + if !entry.IsDir() { + continue + } + eventsPath := filepath.Join(stateDir, entry.Name(), "events.jsonl") + if _, err := os.Stat(eventsPath); err == nil { + dirs[entry.Name()] = struct{}{} + } + } + + var paths []string + for _, entry := range entries { + name := entry.Name() + if entry.IsDir() { + candidate := filepath.Join(stateDir, name, "events.jsonl") + if _, err := os.Stat(candidate); err == nil { + paths = append(paths, candidate) + } + continue + } + if stem, ok := strings.CutSuffix(name, ".jsonl"); ok { + if _, dup := dirs[stem]; dup { + continue + } + paths = append(paths, filepath.Join(stateDir, name)) + } + } + return paths +} + +func (s copilotSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + stateDir := filepath.Join(root, copilotStateDir) + roots = append(roots, WatchRoot{ + Path: stateDir, + Recursive: true, + IncludeGlobs: []string{"*.jsonl", "workspace.yaml"}, + DebounceKey: string(AgentCopilot) + ":state:" + stateDir, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s copilotSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + source, ok := s.sourceForChangedPath(root, req) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s copilotSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// findSourceFile locates a Copilot session file by UUID under root. It checks +// the directory layout (/events.jsonl) first, then the bare layout +// (.jsonl), so the richer directory form takes precedence. Returns "" for +// invalid IDs or when no file resolves. +func (s copilotSourceSet) findSourceFile(root, rawID string) string { + if root == "" || !IsValidSessionID(rawID) { + return "" + } + + stateDir := filepath.Join(root, copilotStateDir) + + dirFmt := filepath.Join(stateDir, rawID, "events.jsonl") + if _, err := os.Stat(dirFmt); err == nil { + return dirFmt + } + + bare := filepath.Join(stateDir, rawID+".jsonl") + if _, err := os.Stat(bare); err == nil { + return bare + } + + return "" +} + +func (s copilotSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("copilot source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + size := info.Size() + mtime := info.ModTime().UnixNano() + if workspace := copilotWorkspacePath(path); workspace != "" { + if wsInfo, err := os.Stat(workspace); err == nil { + size += wsInfo.Size() + if wsMtime := wsInfo.ModTime().UnixNano(); wsMtime > mtime { + mtime = wsMtime + } + } + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: size, + MTimeNS: mtime, + } + h := sha256.New() + if err := addCopilotFingerprintPart(h, "events", path, info); err != nil { + return SourceFingerprint{}, err + } + if workspace := copilotWorkspacePath(path); workspace != "" { + if wsInfo, err := os.Stat(workspace); err == nil && !wsInfo.IsDir() { + if err := addCopilotFingerprintPart(h, "workspace", workspace, wsInfo); err != nil { + return SourceFingerprint{}, err + } + } + } + fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) + return fingerprint, nil +} + +func (s copilotSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case copilotSource: + return src.Path, src.Path != "" + case *copilotSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate); ok { + src := ref.Opaque.(copilotSource) + return src.Path, true + } + } + } + return "", false +} + +func (s copilotSourceSet) sourceForChangedPath( + root string, + req ChangedPathRequest, +) (SourceRef, bool) { + path := req.Path + if filepath.Base(path) == "workspace.yaml" { + return s.sourceRef(root, filepath.Join(filepath.Dir(path), "events.jsonl")) + } + if source, ok := s.sourceRef(root, path); ok { + return source, true + } + if !jsonlMissingPathFallbackAllowed(req) { + return SourceRef{}, false + } + if filepath.Base(path) == "events.jsonl" { + barePath := filepath.Join( + root, + copilotStateDir, + filepath.Base(filepath.Dir(path))+".jsonl", + ) + if source, ok := s.sourceRef(root, barePath); ok { + return source, true + } + } + return s.sourceRefForPath(root, path, false) +} + +func (s copilotSourceSet) sourceRef(root, path string) (SourceRef, bool) { + return s.sourceRefForPath(root, path, true) +} + +func (s copilotSourceSet) sourceRefForPath( + root, path string, + requireRegular bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok || (requireRegular && !IsRegularFile(path)) { + return SourceRef{}, false + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) == 3 && + parts[0] == copilotStateDir && + parts[2] == "events.jsonl" { + return s.newSourceRef(root, path), true + } + if len(parts) == 2 && + parts[0] == copilotStateDir && + strings.HasSuffix(parts[1], ".jsonl") { + stem := strings.TrimSuffix(parts[1], ".jsonl") + if dirPath := s.findSourceFile(root, stem); dirPath != "" && + dirPath != path { + return s.sourceRef(root, dirPath) + } + return s.newSourceRef(root, path), true + } + return SourceRef{}, false +} + +func (s copilotSourceSet) newSourceRef(root, path string) SourceRef { + return SourceRef{ + Provider: AgentCopilot, + Key: path, + DisplayPath: path, + FingerprintKey: path, + Opaque: copilotSource{ + Root: root, + Path: path, + }, + } +} + +func copilotWorkspacePath(eventsPath string) string { + if filepath.Base(eventsPath) != "events.jsonl" { + return "" + } + return filepath.Join(filepath.Dir(eventsPath), "workspace.yaml") +} + +func addCopilotFingerprintPart( + h hash.Hash, + label string, + path string, + info os.FileInfo, +) error { + if _, err := fmt.Fprintf( + h, + "%s\x00%s\x00%d\x00%d\x00", + label, + path, + info.Size(), + info.ModTime().UnixNano(), + ); err != nil { + return err + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("open %s: %w", path, err) + } + defer f.Close() + if _, err := io.Copy(h, f); err != nil { + return fmt.Errorf("hash %s: %w", path, err) + } + return nil +} + +func copilotProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Cwd: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/copilot_test.go b/internal/parser/copilot_test.go index 26a6d2f82..c5dc9cc72 100644 --- a/internal/parser/copilot_test.go +++ b/internal/parser/copilot_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "os" "path/filepath" "strings" @@ -10,6 +11,60 @@ import ( "github.com/stretchr/testify/require" ) +// newCopilotTestProvider builds a concrete copilotProvider for the given roots +// so package tests can exercise the folded parse, discovery, and source-lookup +// behavior directly through provider methods, replacing the removed +// package-level entrypoints. +func newCopilotTestProvider(t *testing.T, roots ...string) *copilotProvider { + t.Helper() + provider, ok := NewProvider(AgentCopilot, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + cp, ok := provider.(*copilotProvider) + require.True(t, ok) + return cp +} + +// parseCopilotTestSession parses a Copilot JSONL session file at path through +// the provider-owned parse method, replacing the removed package-level +// ParseCopilotSession entrypoint. +func parseCopilotTestSession( + t *testing.T, path, machine string, +) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { + t.Helper() + return newCopilotTestProvider(t).parseSession(path, machine) +} + +// discoverCopilotTestSessions discovers Copilot sessions under root through the +// provider, returning the legacy DiscoveredFile shape (path) the tests assert +// against. +func discoverCopilotTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + provider := newCopilotTestProvider(t, root) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + if len(sources) == 0 { + return nil + } + files := make([]DiscoveredFile, 0, len(sources)) + for _, source := range sources { + files = append(files, DiscoveredFile{ + Path: source.DisplayPath, + Agent: AgentCopilot, + }) + } + return files +} + +// findCopilotTestSourceFile resolves a Copilot session ID to a session file +// path through the provider, replacing the removed FindCopilotSourceFile. +func findCopilotTestSourceFile(t *testing.T, root, rawID string) string { + t.Helper() + return newCopilotTestProvider(t, root).sources.findSourceFile(root, rawID) +} + // writeCopilotJSONL writes JSONL lines to a temp file and // returns the file path. func writeCopilotJSONL( @@ -28,7 +83,7 @@ func writeCopilotJSONL( // parseAndValidateHelper parses the session and fails the test on basic errors. func parseAndValidateHelper(t *testing.T, path string, machine string, wantMsgs int) (*ParsedSession, []ParsedMessage) { t.Helper() - sess, msgs, _, err := ParseCopilotSession(path, machine) + sess, msgs, _, err := parseCopilotTestSession(t, path, machine) require.NoError(t, err) require.NotNil(t, sess, "expected non-nil session") require.Len(t, msgs, wantMsgs) @@ -349,7 +404,7 @@ func TestParseCopilotSession_EmptySession(t *testing.T) { `{"type":"session.start","data":{"sessionId":"empty"},"timestamp":"2025-01-15T10:00:00Z"}`, ) - sess, msgs, _, err := ParseCopilotSession(path, "m") + sess, msgs, _, err := parseCopilotTestSession(t, path, "m") require.NoError(t, err) assert.Nil(t, sess, "expected nil session for empty") assert.Nil(t, msgs, "expected nil messages for empty") @@ -358,7 +413,7 @@ func TestParseCopilotSession_EmptySession(t *testing.T) { func TestParseCopilotSession_NonexistentFile(t *testing.T) { path := filepath.Join(t.TempDir(), "nonexistent.jsonl") - sess, msgs, _, err := ParseCopilotSession(path, "m") + sess, msgs, _, err := parseCopilotTestSession(t, path, "m") require.NoError(t, err, "expected nil error") assert.Nil(t, sess, "expected nil session for nonexistent file") assert.Nil(t, msgs, "expected nil messages for nonexistent file") @@ -588,7 +643,7 @@ func parseCopilotFull( t *testing.T, path, machine string, ) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent) { t.Helper() - sess, msgs, usage, err := ParseCopilotSession(path, machine) + sess, msgs, usage, err := parseCopilotTestSession(t, path, machine) require.NoError(t, err) return sess, msgs, usage } diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index a72d951f6..928e18bcd 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -23,6 +23,11 @@ var uuidRe = regexp.MustCompile( `[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})$`, ) +const ( + copilotStateDir = "session-state" + geminiChatsDir = "chats" +) + // isDirOrSymlink reports whether the entry is a directory or a // symlink that resolves to a directory. parentDir is needed to // build the full path for symlink resolution. @@ -49,7 +54,7 @@ type DiscoveredFile struct { Path string Project string // pre-extracted project name Agent AgentType // which agent this file belongs to - ForceParse bool // caller requires a full source reparse + ForceParse bool // bypass stored-state skips for sidecar-driven refreshes ProviderSource *SourceRef // provider-owned source identity, when known ProviderProcess bool // true when this caller may parse via ProviderSource } @@ -732,117 +737,6 @@ func isGeminiSessionFilename(name string) bool { strings.HasSuffix(name, ".jsonl")) } -// DiscoverGeminiSessions finds all Gemini session files under -// the Gemini directory (~/.gemini/tmp/*/chats/session-*). -func DiscoverGeminiSessions( - geminiDir string, -) []DiscoveredFile { - if geminiDir == "" { - return nil - } - - tmpDir := filepath.Join(geminiDir, "tmp") - hashDirs, err := os.ReadDir(tmpDir) - if err != nil { - return nil - } - - projectMap := BuildGeminiProjectMap(geminiDir) - - var files []DiscoveredFile - for _, hd := range hashDirs { - if !isDirOrSymlink(hd, tmpDir) { - continue - } - hash := hd.Name() - chatsDir := filepath.Join(tmpDir, hash, "chats") - entries, err := os.ReadDir(chatsDir) - if err != nil { - continue - } - - project := ResolveGeminiProject(hash, projectMap) - - for _, sf := range entries { - if sf.IsDir() { - continue - } - name := sf.Name() - if !isGeminiSessionFilename(name) { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(chatsDir, name), - Project: project, - Agent: AgentGemini, - }) - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindGeminiSourceFile locates a Gemini session file by its -// session UUID. Searches all project hash directories. -func FindGeminiSourceFile( - geminiDir, sessionID string, -) string { - if geminiDir == "" || !IsValidSessionID(sessionID) || - len(sessionID) < 8 { - return "" - } - - tmpDir := filepath.Join(geminiDir, "tmp") - hashDirs, err := os.ReadDir(tmpDir) - if err != nil { - return "" - } - - for _, hd := range hashDirs { - if !isDirOrSymlink(hd, tmpDir) { - continue - } - chatsDir := filepath.Join(tmpDir, hd.Name(), "chats") - entries, err := os.ReadDir(chatsDir) - if err != nil { - continue - } - for _, sf := range entries { - if sf.IsDir() { - continue - } - name := sf.Name() - if !isGeminiSessionFilename(name) { - continue - } - if strings.Contains(name, sessionID[:8]) { - path := filepath.Join(chatsDir, name) - if confirmGeminiSessionID( - path, sessionID, - ) { - return path - } - } - } - } - return "" -} - -// confirmGeminiSessionID reads the sessionId field from a -// Gemini file to confirm it matches the expected ID. -func confirmGeminiSessionID( - path, sessionID string, -) bool { - data, err := os.ReadFile(path) - if err != nil { - return false - } - return GeminiSessionID(data) == sessionID -} - // geminiProjectsFile holds the structure of // ~/.gemini/projects.json. type geminiProjectsFile struct { @@ -958,92 +852,6 @@ func ResolveGeminiProject( return NormalizeName(dirName) } -// DiscoverCopilotSessions finds all JSONL files under -// /session-state/. Supports both bare format -// (.jsonl) and directory format (/events.jsonl). -func DiscoverCopilotSessions( - copilotDir string, -) []DiscoveredFile { - if copilotDir == "" { - return nil - } - - stateDir := filepath.Join(copilotDir, "session-state") - entries, err := os.ReadDir(stateDir) - if err != nil { - return nil - } - - dirs := make(map[string]struct{}) - for _, entry := range entries { - if !entry.IsDir() { - continue - } - eventsPath := filepath.Join( - stateDir, entry.Name(), "events.jsonl", - ) - if _, err := os.Stat(eventsPath); err == nil { - dirs[entry.Name()] = struct{}{} - } - } - - var files []DiscoveredFile - for _, entry := range entries { - name := entry.Name() - if entry.IsDir() { - candidate := filepath.Join( - stateDir, name, "events.jsonl", - ) - if _, err := os.Stat(candidate); err == nil { - files = append(files, DiscoveredFile{ - Path: candidate, - Agent: AgentCopilot, - }) - } - continue - } - if stem, ok := strings.CutSuffix(name, ".jsonl"); ok { - if _, dup := dirs[stem]; dup { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(stateDir, name), - Agent: AgentCopilot, - }) - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindCopilotSourceFile locates a Copilot session file by -// UUID. Checks both bare (.jsonl) and directory -// (/events.jsonl) layouts. -func FindCopilotSourceFile( - copilotDir, rawID string, -) string { - if copilotDir == "" || !IsValidSessionID(rawID) { - return "" - } - - stateDir := filepath.Join(copilotDir, "session-state") - - dirFmt := filepath.Join(stateDir, rawID, "events.jsonl") - if _, err := os.Stat(dirFmt); err == nil { - return dirFmt - } - - bare := filepath.Join(stateDir, rawID+".jsonl") - if _, err := os.Stat(bare); err == nil { - return bare - } - - return "" -} - // IsPiSessionFile reads the first non-blank line of path and returns true // when the JSON type field equals "session". The scanner buffer grows up to // 64 MiB to match parser.maxLineSize. Leading blank lines are skipped to diff --git a/internal/parser/discovery_test.go b/internal/parser/discovery_test.go index 85c782fc6..3170a9b20 100644 --- a/internal/parser/discovery_test.go +++ b/internal/parser/discovery_test.go @@ -11,11 +11,6 @@ import ( "github.com/stretchr/testify/require" ) -const ( - copilotStateDir = "session-state" - geminiChatsDir = "chats" -) - // setupFileSystem creates a temporary directory and populates // it with the given relative file paths and contents. func setupFileSystem(t *testing.T, dir string, files map[string]string) { @@ -583,7 +578,7 @@ func TestDiscoverGeminiSessions(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverGeminiSessions(dir) + files := discoverGeminiTestSessions(t, dir) require.Len(t, files, len(tt.wantFiles), "files count") @@ -603,17 +598,17 @@ func TestDiscoverGeminiSessions(t *testing.T) { t.Run("EmptyChatDir", func(t *testing.T) { dir := t.TempDir() require.NoError(t, os.MkdirAll(filepath.Join(dir, "tmp", "hash1", geminiChatsDir), 0o755), "mkdir") - files := DiscoverGeminiSessions(dir) + files := discoverGeminiTestSessions(t, dir) assert.Nil(t, files, "expected nil") }) t.Run("Nonexistent", func(t *testing.T) { - files := DiscoverGeminiSessions(filepath.Join(t.TempDir(), "does-not-exist")) + files := discoverGeminiTestSessions(t, filepath.Join(t.TempDir(), "does-not-exist")) assert.Nil(t, files, "expected nil") }) t.Run("EmptyDir", func(t *testing.T) { - files := DiscoverGeminiSessions("") + files := discoverGeminiTestSessions(t, "") assert.Nil(t, files, "expected nil") }) } @@ -656,7 +651,7 @@ func TestFindGeminiSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindGeminiSourceFile(dir, tt.targetID) + got := findGeminiTestSourceFile(t, dir, tt.targetID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) @@ -669,13 +664,13 @@ func TestFindGeminiSourceFile(t *testing.T) { t.Run("ShortID", func(t *testing.T) { dir := t.TempDir() for _, id := range []string{"", "a", "abc", "1234567"} { - got := FindGeminiSourceFile(dir, id) + got := findGeminiTestSourceFile(t, dir, id) assert.Emptyf(t, got, "FindGeminiSourceFile(%q)", id) } }) t.Run("EmptyDir", func(t *testing.T) { - got := FindGeminiSourceFile("", "b0a4eadd-cb99-4165-94d9-64cad5a66d24") + got := findGeminiTestSourceFile(t, "", "b0a4eadd-cb99-4165-94d9-64cad5a66d24") assert.Empty(t, got, "expected empty") }) } @@ -893,7 +888,7 @@ func TestDiscoverCopilotSessions(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverCopilotSessions(dir) + files := discoverCopilotTestSessions(t, dir) require.Len(t, files, len(tt.wantFiles), "files count") @@ -911,12 +906,12 @@ func TestDiscoverCopilotSessions(t *testing.T) { } t.Run("EmptyDir", func(t *testing.T) { - files := DiscoverCopilotSessions("") + files := discoverCopilotTestSessions(t, "") assert.Nil(t, files, "expected nil") }) t.Run("Nonexistent", func(t *testing.T) { - files := DiscoverCopilotSessions(filepath.Join(t.TempDir(), "does-not-exist")) + files := discoverCopilotTestSessions(t, filepath.Join(t.TempDir(), "does-not-exist")) assert.Nil(t, files, "expected nil") }) } @@ -962,7 +957,7 @@ func TestFindCopilotSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindCopilotSourceFile(dir, tt.targetID) + got := findCopilotTestSourceFile(t, dir, tt.targetID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) @@ -975,13 +970,13 @@ func TestFindCopilotSourceFile(t *testing.T) { t.Run("InvalidID", func(t *testing.T) { dir := t.TempDir() for _, id := range []string{"", "../etc/passwd", "a/b", "a b"} { - got := FindCopilotSourceFile(dir, id) + got := findCopilotTestSourceFile(t, dir, id) assert.Emptyf(t, got, "FindCopilotSourceFile(%q)", id) } }) t.Run("EmptyDir", func(t *testing.T) { - got := FindCopilotSourceFile("", "abc-123") + got := findCopilotTestSourceFile(t, "", "abc-123") assert.Empty(t, got, "expected empty") }) } diff --git a/internal/parser/gemini.go b/internal/parser/gemini.go index 432fad42b..60bb98519 100644 --- a/internal/parser/gemini.go +++ b/internal/parser/gemini.go @@ -53,10 +53,12 @@ func normalizedGeminiTokenUsage(tok geminiTokens) json.RawMessage { return raw } -// ParseGeminiSession parses a Gemini CLI session JSON file. -// Unlike Claude/Codex JSONL, each Gemini file is a single JSON -// document containing all messages. -func ParseGeminiSession( +// parseSession parses a Gemini CLI session JSON file into the session and +// messages the provider consumes. Unlike Claude/Codex JSONL, each Gemini file +// is a single JSON document containing all messages. This is the provider-owned +// parse entrypoint; the package-level free function was folded onto the +// provider. +func (p *geminiProvider) parseSession( path, project, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) diff --git a/internal/parser/gemini_copilot_provider_test.go b/internal/parser/gemini_copilot_provider_test.go new file mode 100644 index 000000000..c2d9d1344 --- /dev/null +++ b/internal/parser/gemini_copilot_provider_test.go @@ -0,0 +1,321 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.kenn.io/agentsview/internal/testjsonl" +) + +func TestGeminiCopilotProviderFactoriesReplaceLegacyAdapter(t *testing.T) { + for _, agent := range []AgentType{AgentGemini, AgentCopilot} { + t.Run(string(agent), func(t *testing.T) { + factory, ok := ProviderFactoryByType(agent) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(agent, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) + }) + } +} + +func TestGeminiProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionID := "gemini-provider" + sourcePath := filepath.Join( + root, + "tmp", + "my-project", + geminiChatsDir, + "session-2026-06-19T12-00-gemini-provider.json", + ) + writeSourceFile(t, sourcePath, testjsonl.GeminiSessionJSON( + sessionID, + "my-project", + tsEarly, + tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("u1", tsEarly, "hello gemini"), + testjsonl.GeminiAssistantMsg("a1", tsEarlyS5, "hi", nil), + }, + )) + + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, filepath.Join(root, "tmp"), plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"session-*.json", "session-*.jsonl"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, root, plan.Roots[1].Path) + assert.False(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"projects.json", "trustedFolders.json"}, plan.Roots[1].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + assert.Equal(t, "my_project", discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~gemini:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "write", WatchRoot: filepath.Join(root, "tmp")}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + + require.NoError(t, os.Remove(sourcePath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: filepath.Join(root, "tmp")}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + assert.Equal(t, "my_project", changed[0].ProjectHint) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.Error(t, err) + require.Empty(t, fingerprint) +} + +func TestGeminiProviderProjectMetadataChangesClassifyAndFingerprint(t *testing.T) { + root := t.TempDir() + sessionID := "gemini-project-metadata" + projectsPath := filepath.Join(root, "projects.json") + writeSourceFile(t, projectsPath, `{"projects":{"/Users/alice/code/one":"alias"}}`) + sourcePath := filepath.Join( + root, + "tmp", + "alias", + geminiChatsDir, + "session-2026-06-19T12-00-gemini-project-metadata.json", + ) + writeSourceFile(t, sourcePath, testjsonl.GeminiSessionJSON( + sessionID, + "alias", + tsEarly, + tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("u1", tsEarly, "hello gemini"), + testjsonl.GeminiAssistantMsg("a1", tsEarlyS5, "hi", nil), + }, + )) + + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~gemini:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, "one", found.ProjectHint) + + fingerprintOne, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + + writeSourceFile(t, projectsPath, `{"projects":{"/Users/alice/code/two":"alias"}}`) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: projectsPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + assert.Equal(t, "two", changed[0].ProjectHint) + + fingerprintTwo, err := provider.Fingerprint(context.Background(), changed[0]) + require.NoError(t, err) + assert.NotEqual(t, fingerprintOne.Hash, fingerprintTwo.Hash) +} + +func TestGeminiProviderParse(t *testing.T) { + root := t.TempDir() + sessionID := "gemini-provider" + sourcePath := filepath.Join( + root, + "tmp", + "my-project", + geminiChatsDir, + "session-2026-06-19T12-00-gemini-provider.json", + ) + writeSourceFile(t, sourcePath, testjsonl.GeminiSessionJSON( + sessionID, + "my-project", + tsEarly, + tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("u1", tsEarly, "hello gemini"), + testjsonl.GeminiAssistantMsg("a1", tsEarlyS5, "hi", nil), + }, + )) + + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~gemini:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "gemini:"+sessionID, result.Result.Session.ID) + assert.Equal(t, AgentGemini, result.Result.Session.Agent) + assert.Equal(t, "my_project", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Len(t, result.Result.Messages, 2) +} + +func TestCopilotProviderSourceMethods(t *testing.T) { + root := t.TempDir() + barePath := filepath.Join(root, copilotStateDir, "copilot-provider.jsonl") + dirEvents := filepath.Join(root, copilotStateDir, "copilot-provider", "events.jsonl") + workspacePath := filepath.Join(root, copilotStateDir, "copilot-provider", "workspace.yaml") + content := strings.Join([]string{ + `{"type":"session.start","data":{"sessionId":"copilot-provider","context":{"cwd":"/home/user/code/copilot-app","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"hello copilot"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"hi"},"timestamp":"2025-01-15T10:00:02Z"}`, + `{"type":"session.shutdown","data":{"modelMetrics":{"gpt-5":{"usage":{"inputTokens":100,"outputTokens":20,"cacheReadTokens":30,"cacheWriteTokens":10,"reasoningTokens":5}}}},"timestamp":"2025-01-15T10:00:03Z"}`, + }, "\n") + "\n" + writeSourceFile(t, barePath, content) + writeSourceFile(t, dirEvents, content) + writeSourceFile(t, workspacePath, "name: Workspace title\n") + + provider, ok := NewProvider(AgentCopilot, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, filepath.Join(root, copilotStateDir), plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl", "workspace.yaml"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, dirEvents, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "copilot-provider", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, dirEvents, found.DisplayPath) + + for _, path := range []string{dirEvents, workspacePath} { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: path, EventKind: "write", WatchRoot: filepath.Join(root, copilotStateDir)}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, dirEvents, changed[0].DisplayPath) + } + + require.NoError(t, os.Remove(dirEvents)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: dirEvents, EventKind: "remove", WatchRoot: filepath.Join(root, copilotStateDir)}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, barePath, changed[0].DisplayPath) + writeSourceFile(t, dirEvents, content) + + require.NoError(t, os.Remove(workspacePath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: workspacePath, EventKind: "remove", WatchRoot: filepath.Join(root, copilotStateDir)}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, dirEvents, changed[0].DisplayPath) + writeSourceFile(t, workspacePath, "name: Workspace title\n") + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, dirEvents, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + writeSourceFile(t, workspacePath, "name: Workspace other\n") + renamedFingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.NotEqual(t, fingerprint.Hash, renamedFingerprint.Hash) + writeSourceFile(t, workspacePath, "name: Workspace title\n") + fingerprint, err = provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "copilot:copilot-provider", result.Result.Session.ID) + assert.Equal(t, AgentCopilot, result.Result.Session.Agent) + assert.Equal(t, "copilot_app", result.Result.Session.Project) + assert.Equal(t, "Workspace title", result.Result.Session.FirstMessage) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, fingerprint.Size, result.Result.Session.File.Size) + assert.Equal(t, fingerprint.MTimeNS, result.Result.Session.File.Mtime) + assert.Len(t, result.Result.Messages, 2) + require.Len(t, result.Result.UsageEvents, 1) + assert.Equal(t, "gpt-5", result.Result.UsageEvents[0].Model) +} diff --git a/internal/parser/gemini_parser_test.go b/internal/parser/gemini_parser_test.go index 547f80d86..5178f4db4 100644 --- a/internal/parser/gemini_parser_test.go +++ b/internal/parser/gemini_parser_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "strings" "testing" "time" @@ -11,10 +12,65 @@ import ( "go.kenn.io/agentsview/internal/testjsonl" ) +// newGeminiTestProvider builds a concrete geminiProvider for the given roots so +// package tests can exercise the folded parse, discovery, and source-lookup +// behavior directly through provider methods, replacing the removed +// package-level entrypoints. +func newGeminiTestProvider(t *testing.T, roots ...string) *geminiProvider { + t.Helper() + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + gp, ok := provider.(*geminiProvider) + require.True(t, ok) + return gp +} + +// parseGeminiTestSession parses a Gemini session file at path through the +// provider-owned parse method, replacing the removed package-level +// ParseGeminiSession entrypoint. +func parseGeminiTestSession( + t *testing.T, path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return newGeminiTestProvider(t).parseSession(path, project, machine) +} + +// discoverGeminiTestSessions discovers Gemini sessions under root through the +// provider, returning the legacy DiscoveredFile shape (path + project) the +// tests assert against. +func discoverGeminiTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + provider := newGeminiTestProvider(t, root) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + if len(sources) == 0 { + return nil + } + files := make([]DiscoveredFile, 0, len(sources)) + for _, source := range sources { + files = append(files, DiscoveredFile{ + Path: source.DisplayPath, + Project: source.ProjectHint, + Agent: AgentGemini, + }) + } + return files +} + +// findGeminiTestSourceFile resolves a Gemini session ID to a session file path +// through the provider, replacing the removed FindGeminiSourceFile. +func findGeminiTestSourceFile(t *testing.T, root, sessionID string) string { + t.Helper() + return newGeminiTestProvider(t, root).sources.findSourceFile(root, sessionID) +} + func runGeminiParserTest(t *testing.T, content string) (*ParsedSession, []ParsedMessage) { t.Helper() path := createTestFile(t, "session.json", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) return sess, msgs } @@ -45,7 +101,7 @@ func TestParseGeminiSession_JSONLStream(t *testing.T) { `{"$set":{"lastUpdated":"2026-04-23T16:12:50.158Z"}}`, }, "\n") path := createTestFile(t, "session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -74,7 +130,7 @@ func TestParseGeminiSession_JSONLStreamLargeRecord(t *testing.T) { `{"id":"u1","timestamp":"2026-04-23T16:12:43.085Z","type":"user","content":[{"text":"` + largeContent + `"}]}`, }, "\n") path := createTestFile(t, "large-session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -91,7 +147,7 @@ func TestParseGeminiSession_JSONLStreamTolerantOfPartialLines(t *testing.T) { `{"id":"a1","timestamp":"2026-04-23T16:12:50.158Z","type":"gemini","content":"reply"`, }, "\n") path := createTestFile(t, "session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -108,7 +164,7 @@ func TestParseGeminiSession_JSONLStreamTolerantOfPartialLines(t *testing.T) { "", }, "\n") path := createTestFile(t, "session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -452,12 +508,12 @@ func TestParseGeminiSession_EdgeCases(t *testing.T) { t.Run("malformed JSON", func(t *testing.T) { path := createTestFile(t, "session.json", "not valid json {{{") - _, _, err := ParseGeminiSession(path, "my_project", "local") + _, _, err := parseGeminiTestSession(t, path, "my_project", "local") assert.Error(t, err) }) t.Run("missing file", func(t *testing.T) { - _, _, err := ParseGeminiSession("/nonexistent.json", "my_project", "local") + _, _, err := parseGeminiTestSession(t, "/nonexistent.json", "my_project", "local") assert.Error(t, err) }) @@ -500,7 +556,7 @@ func TestParseGeminiSession_EdgeCases(t *testing.T) { t.Run("missing sessionId", func(t *testing.T) { content := `{"projectHash":"abc","startTime":"2024-01-01T00:00:00Z","lastUpdated":"2024-01-01T00:00:00Z","messages":[]}` path := createTestFile(t, "session.json", content) - _, _, err := ParseGeminiSession(path, "my_project", "local") + _, _, err := parseGeminiTestSession(t, path, "my_project", "local") assert.Error(t, err) }) } diff --git a/internal/parser/gemini_provider.go b/internal/parser/gemini_provider.go new file mode 100644 index 000000000..7a18b0123 --- /dev/null +++ b/internal/parser/gemini_provider.go @@ -0,0 +1,509 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "hash" + "io" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*geminiProvider)(nil) + +type geminiProviderFactory struct { + def AgentDef +} + +func newGeminiProviderFactory(def AgentDef) ProviderFactory { + return geminiProviderFactory{def: cloneAgentDef(def)} +} + +func (f geminiProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f geminiProviderFactory) Capabilities() Capabilities { + return geminiProviderCapabilities() +} + +func (f geminiProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &geminiProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: geminiProviderCapabilities(), + Config: cfg, + }, + sources: newGeminiSourceSet(cfg.Roots), + } +} + +type geminiProvider struct { + ProviderBase + sources geminiSourceSet +} + +func (p *geminiProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *geminiProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *geminiProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *geminiProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = providerFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *geminiProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *geminiProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("gemini source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, req.Source.ProjectHint, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type geminiSource struct { + Root string + Path string +} + +type geminiSourceSet struct { + roots []string +} + +func newGeminiSourceSet(roots []string) geminiSourceSet { + return geminiSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s geminiSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + rootSources, err := s.discoverRoot(ctx, root) + if err != nil { + return nil, err + } + for _, source := range rootSources { + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +func (s geminiSourceSet) discoverRoot( + ctx context.Context, + root string, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + sources := make([]SourceRef, 0) + seen := make(map[string]struct{}) + for _, path := range s.discoverSessionPaths(root) { + source, ok := s.sourceRef(root, path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessionPaths finds all Gemini session file paths under the Gemini +// directory (/tmp//chats/session-*.json[l]). is either a +// SHA-256 project hash (old layout) or a project name (new layout); symlinked +// hash directories are followed (matching the watcher). Project resolution is +// applied by sourceRef via BuildGeminiProjectMap/ResolveGeminiProject, so this +// helper only enumerates source paths. +func (s geminiSourceSet) discoverSessionPaths(root string) []string { + if root == "" { + return nil + } + + tmpDir := filepath.Join(root, "tmp") + hashDirs, err := os.ReadDir(tmpDir) + if err != nil { + return nil + } + + var paths []string + for _, hd := range hashDirs { + if !isDirOrSymlink(hd, tmpDir) { + continue + } + chatsDir := filepath.Join(tmpDir, hd.Name(), geminiChatsDir) + entries, err := os.ReadDir(chatsDir) + if err != nil { + continue + } + for _, sf := range entries { + if sf.IsDir() { + continue + } + name := sf.Name() + if !isGeminiSessionFilename(name) { + continue + } + paths = append(paths, filepath.Join(chatsDir, name)) + } + } + return paths +} + +func (s geminiSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + tmp := filepath.Join(root, "tmp") + roots = append(roots, WatchRoot{ + Path: tmp, + Recursive: true, + IncludeGlobs: []string{"session-*.json", "session-*.jsonl"}, + DebounceKey: string(AgentGemini) + ":tmp:" + tmp, + }) + roots = append(roots, WatchRoot{ + Path: root, + Recursive: false, + IncludeGlobs: []string{"projects.json", "trustedFolders.json"}, + DebounceKey: string(AgentGemini) + ":projects:" + root, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s geminiSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + if geminiProjectMetadataPath(root, req.Path) { + return s.discoverRoot(ctx, root) + } + source, ok := s.sourceRef(root, req.Path) + if ok { + return []SourceRef{source}, nil + } + if jsonlMissingPathFallbackAllowed(req) { + source, ok = s.sourceRefForPath(root, req.Path, false) + if ok { + return []SourceRef{source}, nil + } + } + } + return nil, nil +} + +func (s geminiSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// findSourceFile locates a Gemini session file by its session UUID under root, +// searching all project hash directories. The session filename embeds the first +// eight characters of the UUID, so candidates are pre-filtered on that prefix +// before confirming the recorded sessionId matches. +func (s geminiSourceSet) findSourceFile(root, sessionID string) string { + if root == "" || !IsValidSessionID(sessionID) || + len(sessionID) < 8 { + return "" + } + + tmpDir := filepath.Join(root, "tmp") + hashDirs, err := os.ReadDir(tmpDir) + if err != nil { + return "" + } + + for _, hd := range hashDirs { + if !isDirOrSymlink(hd, tmpDir) { + continue + } + chatsDir := filepath.Join(tmpDir, hd.Name(), geminiChatsDir) + entries, err := os.ReadDir(chatsDir) + if err != nil { + continue + } + for _, sf := range entries { + if sf.IsDir() { + continue + } + name := sf.Name() + if !isGeminiSessionFilename(name) { + continue + } + if strings.Contains(name, sessionID[:8]) { + path := filepath.Join(chatsDir, name) + if confirmGeminiSessionID(path, sessionID) { + return path + } + } + } + } + return "" +} + +// confirmGeminiSessionID reads the sessionId field from a Gemini file to +// confirm it matches the expected ID. +func confirmGeminiSessionID(path, sessionID string) bool { + data, err := os.ReadFile(path) + if err != nil { + return false + } + return GeminiSessionID(data) == sessionID +} + +func (s geminiSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + root, path, ok := s.rootPathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("gemini source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + } + h := sha256.New() + if err := addGeminiFingerprintPart(h, "session", path, info); err != nil { + return SourceFingerprint{}, err + } + for _, metadataPath := range geminiProjectMetadataPaths(root) { + metadataInfo, err := os.Stat(metadataPath) + if err != nil || metadataInfo.IsDir() { + continue + } + fingerprint.Size += metadataInfo.Size() + if mtime := metadataInfo.ModTime().UnixNano(); mtime > fingerprint.MTimeNS { + fingerprint.MTimeNS = mtime + } + if err := addGeminiFingerprintPart(h, "project", metadataPath, metadataInfo); err != nil { + return SourceFingerprint{}, err + } + } + fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) + return fingerprint, nil +} + +func (s geminiSourceSet) pathFromSource(source SourceRef) (string, bool) { + _, path, ok := s.rootPathFromSource(source) + return path, ok +} + +func (s geminiSourceSet) rootPathFromSource(source SourceRef) (string, string, bool) { + switch src := source.Opaque.(type) { + case geminiSource: + return src.Root, src.Path, src.Path != "" + case *geminiSource: + if src != nil && src.Path != "" { + return src.Root, src.Path, true + } + } + for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate); ok { + src := ref.Opaque.(geminiSource) + return src.Root, src.Path, true + } + } + } + return "", "", false +} + +func (s geminiSourceSet) sourceRef(root, path string) (SourceRef, bool) { + return s.sourceRefForPath(root, path, true) +} + +func (s geminiSourceSet) sourceRefForPath( + root, path string, + requireRegular bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok || (requireRegular && !IsRegularFile(path)) { + return SourceRef{}, false + } + sepParts := strings.Split(filepath.ToSlash(rel), "/") + if len(sepParts) != 4 || + sepParts[0] != "tmp" || + sepParts[2] != geminiChatsDir || + !isGeminiSessionFilename(sepParts[3]) { + return SourceRef{}, false + } + project := ResolveGeminiProject(sepParts[1], BuildGeminiProjectMap(root)) + return SourceRef{ + Provider: AgentGemini, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: geminiSource{ + Root: root, + Path: path, + }, + }, true +} + +func geminiProjectMetadataPaths(root string) []string { + return []string{ + filepath.Join(root, "projects.json"), + filepath.Join(root, "trustedFolders.json"), + } +} + +func geminiProjectMetadataPath(root, path string) bool { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return false + } + rel = filepath.ToSlash(rel) + return rel == "projects.json" || rel == "trustedFolders.json" +} + +func addGeminiFingerprintPart( + h hash.Hash, + label string, + path string, + info os.FileInfo, +) error { + if _, err := fmt.Fprintf( + h, + "%s\x00%s\x00%d\x00%d\x00", + label, + path, + info.Size(), + info.ModTime().UnixNano(), + ); err != nil { + return err + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("open %s: %w", path, err) + } + defer f.Close() + if _, err := io.Copy(h, f); err != nil { + return fmt.Errorf("hash %s: %w", path, err) + } + return nil +} + +func geminiProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 87d978fcf..f750a668e 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -360,6 +360,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newCommandCodeProviderFactory(def) case AgentCodex: return newCodexProviderFactory(def) + case AgentCopilot: + return newCopilotProviderFactory(def) case AgentCowork: return newCoworkProviderFactory(def) case AgentCortex: @@ -374,6 +376,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newIflowProviderFactory(def) case AgentGptme: return newGptmeProviderFactory(def) + case AgentGemini: + return newGeminiProviderFactory(def) case AgentKimi: return newKimiProviderFactory(def) case AgentKilo: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 0bdc02698..9c1a903ee 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -20,8 +20,8 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentClaude: ProviderMigrationProviderAuthoritative, AgentCowork: ProviderMigrationProviderAuthoritative, AgentCodex: ProviderMigrationProviderAuthoritative, - AgentCopilot: ProviderMigrationLegacyOnly, - AgentGemini: ProviderMigrationLegacyOnly, + AgentCopilot: ProviderMigrationProviderAuthoritative, + AgentGemini: ProviderMigrationProviderAuthoritative, AgentMiMoCode: ProviderMigrationProviderAuthoritative, AgentOpenCode: ProviderMigrationProviderAuthoritative, AgentKilo: ProviderMigrationProviderAuthoritative, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index 28683f39d..9a2a6c310 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -50,10 +50,9 @@ var pendingShimProviderFiles = map[string]bool{ "antigravity_cli_provider.go": true, "antigravity_provider.go": true, "claude_provider.go": true, - "copilot_provider.go": true, + "codex_provider.go": true, "cowork_provider.go": true, "db_backed_provider.go": true, - "gemini_provider.go": true, "hermes_provider.go": true, "kiro_ide_provider.go": true, "kiro_provider.go": true, diff --git a/internal/parser/provider_test.go b/internal/parser/provider_test.go index d3b701e58..65b689c5e 100644 --- a/internal/parser/provider_test.go +++ b/internal/parser/provider_test.go @@ -149,7 +149,10 @@ func TestProviderRegistryMirrorsAgentRegistry(t *testing.T) { } func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { - provider, ok := NewProvider(AgentGemini, ProviderConfig{ + legacyAgent := legacyProviderTestAgent(t) + def, ok := AgentByType(legacyAgent) + require.True(t, ok) + provider, ok := NewProvider(legacyAgent, ProviderConfig{ Roots: []string{t.TempDir()}, Machine: "devbox", }) @@ -177,7 +180,7 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { source, found, err := provider.FindSource(ctx, FindSourceRequest{ RawSessionID: "session", - FullSessionID: "gemini:session", + FullSessionID: def.IDPrefix + "session", StoredFilePath: "/tmp/session.jsonl", FingerprintKey: "/tmp/session.jsonl", }) @@ -186,7 +189,7 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { assert.Empty(t, source) _, err = provider.Fingerprint(ctx, SourceRef{ - Provider: AgentGemini, + Provider: legacyAgent, Key: "session", DisplayPath: "/tmp/session.jsonl", FingerprintKey: "/tmp/session.jsonl", @@ -195,9 +198,9 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) incremental, status, err := provider.ParseIncremental(ctx, IncrementalRequest{ - Source: SourceRef{Provider: AgentGemini, Key: "session"}, + Source: SourceRef{Provider: legacyAgent, Key: "session"}, Fingerprint: SourceFingerprint{Key: "/tmp/session.jsonl"}, - SessionID: "gemini:session", + SessionID: def.IDPrefix + "session", StartOrdinal: 1, Machine: "devbox", }) @@ -211,12 +214,13 @@ func TestProviderFactoryLookupAndConfigSnapshot(t *testing.T) { Roots: []string{"/tmp/one", "/tmp/two"}, Machine: "devbox", } + legacyAgent := legacyProviderTestAgent(t) - factory, ok := ProviderFactoryByType(AgentGemini) + factory, ok := ProviderFactoryByType(legacyAgent) require.True(t, ok) - assert.Equal(t, AgentGemini, factory.Definition().Type) + assert.Equal(t, legacyAgent, factory.Definition().Type) - provider, ok := NewProvider(AgentGemini, cfg) + provider, ok := NewProvider(legacyAgent, cfg) require.True(t, ok) require.NotNil(t, provider) @@ -233,7 +237,8 @@ func TestProviderFactoryLookupAndConfigSnapshot(t *testing.T) { } func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { - provider, ok := NewProvider(AgentGemini, ProviderConfig{ + legacyAgent := legacyProviderTestAgent(t) + provider, ok := NewProvider(legacyAgent, ProviderConfig{ Roots: []string{t.TempDir()}, Machine: "devbox", }) @@ -241,7 +246,7 @@ func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { outcome, err := provider.Parse(context.Background(), ParseRequest{ Source: SourceRef{ - Provider: AgentGemini, + Provider: legacyAgent, Key: "source", DisplayPath: "/tmp/source.jsonl", FingerprintKey: "/tmp/source.jsonl", @@ -257,7 +262,7 @@ func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) var unsupported UnsupportedProviderFeatureError require.ErrorAs(t, err, &unsupported) - assert.Equal(t, AgentGemini, unsupported.Provider) + assert.Equal(t, legacyAgent, unsupported.Provider) assert.Equal(t, ProviderFeatureParse, unsupported.Feature) } @@ -349,6 +354,18 @@ func (p *testProvider) Parse(context.Context, ParseRequest) (ParseOutcome, error return ParseOutcome{}, nil } +func legacyProviderTestAgent(t *testing.T) AgentType { + t.Helper() + for _, def := range Registry { + factory := providerFactoryForDef(def) + if _, ok := factory.(legacyProviderFactory); ok { + return def.Type + } + } + t.Fatal("expected at least one legacy provider for fallback tests") + return "" +} + func assertAgentDefMetadataEqual(t *testing.T, want, got AgentDef) { t.Helper() diff --git a/internal/parser/types.go b/internal/parser/types.go index 732b6b3d7..7f4d4c750 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -128,28 +128,24 @@ var Registry = []AgentDef{ ShallowWatchRootsFunc: ResolveCodexShallowWatchRoots, }, { - Type: AgentCopilot, - DisplayName: "Copilot", - EnvVar: "COPILOT_DIR", - ConfigKey: "copilot_dirs", - DefaultDirs: []string{".copilot"}, - IDPrefix: "copilot:", - WatchSubdirs: []string{"session-state"}, - FileBased: true, - DiscoverFunc: DiscoverCopilotSessions, - FindSourceFunc: FindCopilotSourceFile, + Type: AgentCopilot, + DisplayName: "Copilot", + EnvVar: "COPILOT_DIR", + ConfigKey: "copilot_dirs", + DefaultDirs: []string{".copilot"}, + IDPrefix: "copilot:", + WatchSubdirs: []string{"session-state"}, + FileBased: true, }, { - Type: AgentGemini, - DisplayName: "Gemini", - EnvVar: "GEMINI_DIR", - ConfigKey: "gemini_dirs", - DefaultDirs: []string{".gemini"}, - IDPrefix: "gemini:", - WatchSubdirs: []string{"tmp"}, - FileBased: true, - DiscoverFunc: DiscoverGeminiSessions, - FindSourceFunc: FindGeminiSourceFile, + Type: AgentGemini, + DisplayName: "Gemini", + EnvVar: "GEMINI_DIR", + ConfigKey: "gemini_dirs", + DefaultDirs: []string{".gemini"}, + IDPrefix: "gemini:", + WatchSubdirs: []string{"tmp"}, + FileBased: true, }, { Type: AgentMiMoCode, diff --git a/internal/sync/classify_antigravity_cli_test.go b/internal/sync/classify_antigravity_cli_test.go index f99a7d1ea..be05aba3e 100644 --- a/internal/sync/classify_antigravity_cli_test.go +++ b/internal/sync/classify_antigravity_cli_test.go @@ -67,7 +67,6 @@ func TestClassifyOnePath_AntigravityCLI(t *testing.T) { parser.AgentAntigravityCLI: {dir}, }, } - geminiMap := make(map[string]map[string]string) tests := []struct { name string @@ -131,7 +130,7 @@ func TestClassifyOnePath_AntigravityCLI(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, ok := eng.classifyOnePath(tt.path, geminiMap) + got, ok := eng.classifyOnePath(tt.path) assert.Equal(t, tt.want, ok) if ok { assert.Equal(t, parser.AgentAntigravityCLI, got.Agent) @@ -146,7 +145,7 @@ func TestClassifyOnePath_AntigravityCLI(t *testing.T) { orphanTraj := filepath.Join(convDir, orphanUUID+".trajectory.json") require.NoError(t, os.WriteFile(orphanTraj, []byte("orphan"), 0o644)) - _, ok := eng.classifyOnePath(orphanTraj, geminiMap) + _, ok := eng.classifyOnePath(orphanTraj) assert.False(t, ok, "should not classify sidecar when pb file does not exist") }) diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 023053f6a..3cb7596bd 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -508,7 +508,6 @@ func (e *Engine) SyncPaths(paths []string) { func (e *Engine) classifyPaths( paths []string, ) []parser.DiscoveredFile { - geminiProjectsByDir := make(map[string]map[string]string) seen := make(map[string]int, len(paths)) files := make([]parser.DiscoveredFile, 0, len(paths)) for _, p := range paths { @@ -520,9 +519,7 @@ func (e *Engine) classifyPaths( dfs = e.classifyCodexIndexPath(p) } if len(dfs) == 0 { - if df, ok := e.classifyOnePath( - p, geminiProjectsByDir, - ); ok { + if df, ok := e.classifyOnePath(p); ok { dfs = []parser.DiscoveredFile{df} } } @@ -950,7 +947,6 @@ func (e *Engine) classifyContainerPath( func (e *Engine) classifyOnePath( path string, - geminiProjectsByDir map[string]map[string]string, ) (parser.DiscoveredFile, bool) { sep := string(filepath.Separator) pathExists := true @@ -985,97 +981,6 @@ func (e *Engine) classifyOnePath( // shapes, so the legacy block was removed when Claude was folded // onto its provider. - // Copilot: /session-state/.jsonl - // or: /session-state//events.jsonl - for _, copilotDir := range e.agentDirs[parser.AgentCopilot] { - if copilotDir == "" { - continue - } - stateDir := filepath.Join( - copilotDir, "session-state", - ) - if rel, ok := isUnder(stateDir, path); ok { - parts := strings.Split(rel, sep) - switch len(parts) { - case 1: - stem, ok := strings.CutSuffix( - parts[0], ".jsonl", - ) - if !ok { - continue - } - dirEvents := filepath.Join( - stateDir, stem, "events.jsonl", - ) - if _, err := os.Stat(dirEvents); err == nil { - continue - } - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentCopilot, - }, true - case 2: - if parts[1] == "events.jsonl" { - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentCopilot, - }, true - } - // workspace.yaml changes should trigger a re-parse - // of the sibling events.jsonl. - if parts[1] == "workspace.yaml" { - eventsPath := filepath.Join( - stateDir, parts[0], "events.jsonl", - ) - if _, err := os.Stat(eventsPath); err == nil { - return parser.DiscoveredFile{ - Path: eventsPath, - Agent: parser.AgentCopilot, - }, true - } - } - continue - default: - continue - } - } - } - - // Gemini: /tmp//chats/session-*.json(.l) - // is either a SHA-256 hash (old) or project name (new). - for _, geminiDir := range e.agentDirs[parser.AgentGemini] { - if geminiDir == "" { - continue - } - if rel, ok := isUnder(geminiDir, path); ok { - parts := strings.Split(rel, sep) - if len(parts) != 4 || - parts[0] != "tmp" || - parts[2] != "chats" { - continue - } - name := parts[3] - if !strings.HasPrefix(name, "session-") || - (!strings.HasSuffix(name, ".json") && - !strings.HasSuffix(name, ".jsonl")) { - continue - } - dirName := parts[1] - if _, ok := geminiProjectsByDir[geminiDir]; !ok { - geminiProjectsByDir[geminiDir] = - parser.BuildGeminiProjectMap(geminiDir) - } - project := parser.ResolveGeminiProject( - dirName, geminiProjectsByDir[geminiDir], - ) - return parser.DiscoveredFile{ - Path: path, - Project: project, - Agent: parser.AgentGemini, - }, true - } - } - // VSCode Copilot: /workspaceStorage//chatSessions/.{json,jsonl} // or: /globalStorage/emptyWindowChatSessions/.{json,jsonl} for _, vscDir := range e.agentDirs[parser.AgentVSCodeCopilot] { @@ -3882,12 +3787,8 @@ func (e *Engine) processFile( var res processResult switch file.Agent { - case parser.AgentCopilot: - res = e.processCopilot(file, info) case parser.AgentReasonix: res = e.processReasonix(file, info) - case parser.AgentGemini: - res = e.processGemini(file, info) case parser.AgentVSCodeCopilot: res = e.processVSCodeCopilot(file, info) case parser.AgentVSCopilot: @@ -4012,7 +3913,7 @@ func (e *Engine) processProviderFile( mtime: mtime, }, true } - if freshMtime, fresh := e.providerCoworkSourceFresh(source, file); fresh { + if freshMtime, fresh := e.providerSourceFreshBeforeFingerprint(source, file); fresh { return processResult{ skip: true, mtime: freshMtime, @@ -4706,11 +4607,11 @@ func (e *Engine) providerSingleSessionFresh( !parser.NeedsProjectReparse(sess.Project) } -func (e *Engine) providerCoworkSourceFresh( +func (e *Engine) providerSourceFreshBeforeFingerprint( source parser.SourceRef, file parser.DiscoveredFile, ) (int64, bool) { - if e.forceParse || file.ForceParse || file.Agent != parser.AgentCowork { + if e.forceParse || file.ForceParse { return 0, false } path := providerDiscoveredPath(source) @@ -4728,15 +4629,31 @@ func (e *Engine) providerCoworkSourceFresh( return 0, false } } - mtime := parser.CoworkSessionMtime(path, info.ModTime().UnixNano()) - effectiveInfo := fakeSnapshotInfo{ - fSize: info.Size(), - fMtime: mtime, - } - if !e.shouldSkipByPath(path, effectiveInfo) { - return 0, false + switch file.Agent { + case parser.AgentCowork: + mtime := parser.CoworkSessionMtime(path, info.ModTime().UnixNano()) + effectiveInfo := fakeSnapshotInfo{ + fSize: info.Size(), + fMtime: mtime, + } + if e.shouldSkipByPath(path, effectiveInfo) { + return mtime, true + } + case parser.AgentGemini: + if e.shouldSkipByPath(path, info) { + return info.ModTime().UnixNano(), true + } + case parser.AgentCopilot: + mtime := copilotEffectiveMtime(path, info) + effectiveInfo := fakeSnapshotInfo{ + fSize: info.Size(), + fMtime: mtime, + } + if e.shouldSkipByPath(path, effectiveInfo) { + return mtime, true + } } - return mtime, true + return 0, false } // stampProviderFileIdentity copies the source file's inode and device onto @@ -5363,44 +5280,6 @@ func pickPreferredCodexDiscoveredFile( return chosen } -func (e *Engine) processCopilot( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Use effective mtime = max(events.jsonl, workspace.yaml) so - // that a new or updated workspace.yaml triggers a re-parse and - // the stored mtime stays consistent with what we compare against - // on subsequent syncs (preventing oscillation). - effectiveMtime := copilotEffectiveMtime(file.Path, info) - if e.shouldSkipCopilot(file.Path, info, effectiveMtime) { - return processResult{skip: true} - } - - sess, msgs, usageEvents, err := parser.ParseCopilotSession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - if effectiveMtime > sess.File.Mtime { - sess.File.Mtime = effectiveMtime - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs, UsageEvents: usageEvents}, - }, - } -} - // copilotEffectiveMtime returns max(events.jsonl mtime, // workspace.yaml mtime). For flat .jsonl sessions (no // workspace.yaml sibling) it returns the events.jsonl mtime. @@ -5553,64 +5432,6 @@ func reasonixEffectiveInfo(path string, info os.FileInfo) os.FileInfo { return fakeSnapshotInfo{fSize: size, fMtime: mtime} } -// shouldSkipCopilot is like shouldSkipByPath but uses the -// pre-computed effectiveMtime (max of events.jsonl and -// workspace.yaml) for the mtime comparison, keeping the stored -// value consistent with what we compare against on next sync. -func (e *Engine) shouldSkipCopilot( - path string, info os.FileInfo, effectiveMtime int64, -) bool { - if e.forceParse { // parse-diff: always re-parse - return false - } - lookupPath := path - if e.pathRewriter != nil { - lookupPath = e.pathRewriter(path) - } - storedSize, storedMtime, ok := e.db.GetFileInfoByPath(lookupPath) - if !ok { - return false - } - if storedSize != info.Size() || storedMtime != effectiveMtime { - return false - } - if e.db.GetDataVersionByPath(lookupPath) < - db.CurrentDataVersion() { - return false - } - return true -} - -func (e *Engine) processGemini( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Fast path: skip by file_path + mtime before parsing. - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseGeminiSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - func (e *Engine) processVSCodeCopilot( file parser.DiscoveredFile, info os.FileInfo, ) processResult { diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index ff496138a..3cf7b8ab8 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -2117,6 +2117,97 @@ func TestSyncPathsGeminiJSONL(t *testing.T) { assertSessionMessageCount(t, env.db, "gemini:"+sessionID, 2) } +func TestSyncPathsGeminiProjectMetadataEventRefreshesProject(t *testing.T) { + env := setupTestEnv(t) + + sessionID := "gem-project-refresh" + projectsPath := filepath.Join(env.geminiDir, "projects.json") + writeProject := func(name string) { + t.Helper() + require.NoError(t, os.WriteFile( + projectsPath, + fmt.Appendf(nil, + `{"projects":{"/Users/alice/code/%s":"alias"}}`, + name, + ), + 0o644, + ), "write projects") + } + writeProject("one") + path := env.writeGeminiSession( + t, + filepath.Join( + "tmp", "alias", "chats", + "session-001.json", + ), + testjsonl.GeminiSessionJSON( + sessionID, "alias", tsEarly, tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg( + "m1", tsEarly, "Hello Gemini", + ), + testjsonl.GeminiAssistantMsg( + "m2", tsEarlyS5, "Hi there!", nil, + ), + }, + ), + ) + + env.engine.SyncPaths([]string{path}) + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "one", sess.Project) + }, + ) + + info, err := os.Stat(path) + require.NoError(t, err, "stat gemini session") + env.engine.InjectSkipCache(map[string]int64{ + path: info.ModTime().UnixNano(), + }) + + writeProject("two") + env.engine.SyncPaths([]string{projectsPath}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "two", sess.Project) + }, + ) + + writeProject("three") + env.engine.SyncPaths([]string{path, projectsPath}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "three", sess.Project) + }, + ) + + writeProject("four") + env.engine.SyncPaths([]string{projectsPath, path}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "four", sess.Project) + }, + ) + + require.NoError(t, os.Remove(projectsPath), "remove projects") + env.engine.SyncPaths([]string{projectsPath}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "alias", sess.Project) + }, + ) +} + func TestSyncPathsCodexAcceptsFlatArchived(t *testing.T) { env := setupTestEnv(t) @@ -5788,18 +5879,15 @@ func TestResyncAllReplacesMessageContent(t *testing.T) { }) require.NoError(t, err, "update message content") - // Normal SyncAll should skip (file unchanged on disk). - stats := env.engine.SyncAll(context.Background(), nil) - require.Equal(t, 1, stats.Skipped, "expected 1 skip, got %d", stats.Skipped) - msgs = fetchMessages(t, env.db, fullID) - require.True(t, strings.Contains(msgs[1].Content, "stale content"), "SyncAll should not have replaced content") - // Capture FTS state before resync so a regression that // breaks FTS isn't masked by HasFTS() returning false // post-resync. hadFTS := env.db.HasFTS() - // ResyncAll should re-parse and replace message content. + // ResyncAll should re-parse and replace message content. Gemini is + // provider-authoritative, so it has no DB-backed mtime skip; a plain + // SyncAll would also re-parse the unchanged file. ResyncAll additionally + // drops and rebuilds the FTS index, which is what this test guards. env.engine.ResyncAll(context.Background(), nil) msgs = fetchMessages(t, env.db, fullID) require.Equal(t, 2, len(msgs), "got %d messages after resync, want 2", len(msgs)) diff --git a/internal/sync/engine_test.go b/internal/sync/engine_test.go index 181672c71..c357bbe96 100644 --- a/internal/sync/engine_test.go +++ b/internal/sync/engine_test.go @@ -1684,6 +1684,7 @@ func TestProcessCodexAppendedStaleProjectDoesFullReparse(t *testing.T) { ) + "\n") require.NoError(t, err, "append codex fixture") require.NoError(t, f.Close(), "close codex fixture") + e := &Engine{ db: database, idPrefix: "host~", @@ -1778,6 +1779,7 @@ func TestProcessCodexAppendedStaleProjectCarriesForceReplace(t *testing.T) { ) + "\n") require.NoError(t, err, "append codex fixture") require.NoError(t, f.Close(), "close codex fixture") + e := &Engine{ db: database, idPrefix: "host~", @@ -3523,7 +3525,7 @@ func TestEngine_ClassifyOnePathReasonixProjectBareMeta(t *testing.T) { dbtest.WriteTestFile(t, sessionPath, []byte(`{"role":"user","content":"hi"}`)) dbtest.WriteTestFile(t, metaPath, []byte(`{"model":"claude"}`)) - got, ok := engine.classifyOnePath(metaPath, nil) + got, ok := engine.classifyOnePath(metaPath) require.True(t, ok, "expected Reasonix sidecar to classify") assert.Equal(t, sessionPath, got.Path) assert.Equal(t, "proj", got.Project) @@ -3546,7 +3548,7 @@ func TestEngine_ClassifyOnePathReasonixDeletedMeta(t *testing.T) { metaPath := sessionPath + ".meta" dbtest.WriteTestFile(t, sessionPath, []byte(`{"role":"user","content":"hi"}`)) - got, ok := engine.classifyOnePath(metaPath, nil) + got, ok := engine.classifyOnePath(metaPath) require.True(t, ok, "expected deleted Reasonix sidecar to classify") assert.Equal(t, sessionPath, got.Path) assert.Equal(t, "proj", got.Project) @@ -3567,7 +3569,7 @@ func TestEngine_ClassifyOnePathReasonixDeletedTranscriptIgnored(t *testing.T) { reasonixDir, "projects", "proj", "sessions", "session-123.jsonl", ) - _, ok := engine.classifyOnePath(sessionPath, nil) + _, ok := engine.classifyOnePath(sessionPath) assert.False(t, ok, "expected deleted Reasonix transcript to be ignored") } @@ -4222,3 +4224,55 @@ func TestWriteIncrementalKeepsPlausibleEndedAt(t *testing.T) { "plausible appended ended_at must update the column: got %q want %s", *after.EndedAt, newEnd.Format(time.RFC3339Nano)) } + +func TestEngine_ClassifyPathsProviderRemoveSkipsMissingGeminiSource( + t *testing.T, +) { + db := openTestDB(t) + geminiDir := t.TempDir() + engine := NewEngine(db, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentGemini: {geminiDir}, + }, + Machine: "local", + }) + + sessionPath := filepath.Join( + geminiDir, "tmp", "alias", "chats", "session-001.json", + ) + dbtest.WriteTestFile(t, sessionPath, []byte("{}")) + require.NoError(t, os.Remove(sessionPath), "Remove(%q)", sessionPath) + + files := engine.classifyPaths([]string{sessionPath}) + assert.Empty(t, files) +} + +func TestEngine_ClassifyPathsProviderSidecarKeepsExistingGeminiSources( + t *testing.T, +) { + db := openTestDB(t) + geminiDir := t.TempDir() + engine := NewEngine(db, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentGemini: {geminiDir}, + }, + Machine: "local", + }) + + projectsPath := filepath.Join(geminiDir, "projects.json") + dbtest.WriteTestFile( + t, + projectsPath, + []byte(`{"projects":{"/Users/alice/code/sample":"alias"}}`), + ) + sessionPath := filepath.Join( + geminiDir, "tmp", "alias", "chats", "session-001.json", + ) + dbtest.WriteTestFile(t, sessionPath, []byte("{}")) + + files := engine.classifyPaths([]string{projectsPath}) + require.Len(t, files, 1) + assert.Equal(t, sessionPath, files[0].Path) + assert.Equal(t, parser.AgentGemini, files[0].Agent) + assert.True(t, files[0].ForceParse) +} diff --git a/internal/sync/provider_shadow_caller_test.go b/internal/sync/provider_shadow_caller_test.go index 024e27f13..ac1d7070f 100644 --- a/internal/sync/provider_shadow_caller_test.go +++ b/internal/sync/provider_shadow_caller_test.go @@ -649,6 +649,55 @@ func TestProcessFileProviderAuthoritativeSkipsFreshCoworkBeforeFingerprint(t *te assert.Empty(t, provider.calls) } +func TestProcessFileProviderAuthoritativeSkipsFreshGeminiBeforeFingerprint(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join( + root, "tmp", "alias", "chats", "session-001.json", + ) + sourceMtime := writeFreshProviderDBSession( + t, sourcePath, nil, + ) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentGemini, + DisplayName: "Gemini CLI", + }, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentGemini, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + }, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentGemini: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentGemini: parser.ProviderMigrationProviderAuthoritative, + }, + }) + requireFreshProviderSession(t, engine.db, parser.AgentGemini, sourcePath, sourceMtime) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentGemini, + }) + + require.NoError(t, result.err) + assert.True(t, result.skip) + assert.Equal(t, sourceMtime, result.mtime) + assert.Empty(t, provider.calls) +} + func TestProcessFileProviderAuthoritativeForceParseBypassesFreshCoworkSkip(t *testing.T) { root := t.TempDir() database := dbtest.OpenTestDB(t) @@ -703,6 +752,56 @@ func TestProcessFileProviderAuthoritativeForceParseBypassesFreshCoworkSkip(t *te assert.True(t, provider.parseRequest.ForceParse) } +func TestProcessFileProviderAuthoritativeSkipsFreshCopilotBeforeFingerprint(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join( + root, "session-state", "copilot-fresh", "events.jsonl", + ) + workspacePath := filepath.Join(filepath.Dir(sourcePath), "workspace.yaml") + sourceMtime := writeFreshProviderDBSession( + t, sourcePath, &workspacePath, + ) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCopilot, + DisplayName: "Copilot CLI", + }, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentCopilot, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + }, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCopilot: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCopilot: parser.ProviderMigrationProviderAuthoritative, + }, + }) + requireFreshProviderSession(t, engine.db, parser.AgentCopilot, sourcePath, sourceMtime) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCopilot, + }) + + require.NoError(t, result.err) + assert.True(t, result.skip) + assert.Equal(t, sourceMtime, result.mtime) + assert.Empty(t, provider.calls) +} + func TestProcessFileProviderAuthoritativeKeepsRetryStatePerResult(t *testing.T) { root := t.TempDir() sourcePath := filepath.Join(root, "multi-provider-owned.jsonl") @@ -1293,3 +1392,52 @@ func writeFreshCoworkProviderSource( return sourcePath, sourceMtime } + +func writeFreshProviderDBSession( + t *testing.T, + sourcePath string, + mtimeSidecar *string, +) int64 { + t.Helper() + + require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + sourceTime := time.Unix(1_781_475_210, 0) + require.NoError(t, os.Chtimes(sourcePath, sourceTime, sourceTime)) + mtime := sourceTime.UnixNano() + if mtimeSidecar != nil { + sidecarTime := sourceTime.Add(time.Second) + require.NoError(t, os.WriteFile(*mtimeSidecar, []byte("name: Fresh\n"), 0o644)) + require.NoError(t, os.Chtimes(*mtimeSidecar, sidecarTime, sidecarTime)) + mtime = sidecarTime.UnixNano() + } + + return mtime +} + +func requireFreshProviderSession( + t *testing.T, + database *db.DB, + agent parser.AgentType, + sourcePath string, + sourceMtime int64, +) { + t.Helper() + + info, err := os.Stat(sourcePath) + require.NoError(t, err) + sourceSize := info.Size() + fullSessionID := string(agent) + ":fresh" + require.NoError(t, database.UpsertSession(db.Session{ + ID: fullSessionID, + Project: "provider-project", + Machine: "devbox", + Agent: string(agent), + FilePath: &sourcePath, + FileSize: &sourceSize, + FileMtime: &sourceMtime, + })) + require.NoError(t, database.SetSessionDataVersion( + fullSessionID, db.CurrentDataVersion(), + )) +}