diff --git a/internal/parser/copilot.go b/internal/parser/copilot.go index 57ee77d36..0c4c35be2 100644 --- a/internal/parser/copilot.go +++ b/internal/parser/copilot.go @@ -333,10 +333,12 @@ func readCopilotWorkspaceName(eventsPath string) string { return "" } -// ParseCopilotSession parses a Copilot JSONL session file. -// Returns (nil, nil, nil, nil) if the file doesn't exist or -// contains no user/assistant messages. -func ParseCopilotSession( +// parseSession parses a Copilot JSONL session file into the session, messages, +// and usage events the provider consumes. Returns (nil, nil, nil, nil) if the +// file doesn't exist or contains no user/assistant messages. This is the +// provider-owned parse entrypoint; the package-level free function was folded +// onto the provider. +func (p *copilotProvider) parseSession( path, machine string, ) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { info, err := os.Stat(path) diff --git a/internal/parser/copilot_provider.go b/internal/parser/copilot_provider.go new file mode 100644 index 000000000..0ec855939 --- /dev/null +++ b/internal/parser/copilot_provider.go @@ -0,0 +1,489 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "hash" + "io" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*copilotProvider)(nil) + +type copilotProviderFactory struct { + def AgentDef +} + +func newCopilotProviderFactory(def AgentDef) ProviderFactory { + return copilotProviderFactory{def: cloneAgentDef(def)} +} + +func (f copilotProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f copilotProviderFactory) Capabilities() Capabilities { + return copilotProviderCapabilities() +} + +func (f copilotProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &copilotProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: copilotProviderCapabilities(), + Config: cfg, + }, + sources: newCopilotSourceSet(cfg.Roots), + } +} + +type copilotProvider struct { + ProviderBase + sources copilotSourceSet +} + +func (p *copilotProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *copilotProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *copilotProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *copilotProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = providerFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *copilotProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *copilotProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("copilot source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, usage, err := p.parseSession(path, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + if req.Fingerprint.Size > 0 { + sess.File.Size = req.Fingerprint.Size + } + if req.Fingerprint.MTimeNS > 0 { + sess.File.Mtime = req.Fingerprint.MTimeNS + } + sess.UsageEvents = usage + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + UsageEvents: usage, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type copilotSource struct { + Root string + Path string +} + +type copilotSourceSet struct { + roots []string +} + +func newCopilotSourceSet(roots []string) copilotSourceSet { + return copilotSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s copilotSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, path := range s.discoverSessionPaths(root) { + source, ok := s.sourceRef(root, path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessionPaths finds all Copilot session file paths under +// /session-state/. It supports both the bare layout (.jsonl) and +// the directory layout (/events.jsonl); when both exist for the same +// session, the directory layout wins and the bare file is dropped so a session +// is not discovered twice. +func (s copilotSourceSet) discoverSessionPaths(root string) []string { + if root == "" { + return nil + } + + stateDir := filepath.Join(root, copilotStateDir) + entries, err := os.ReadDir(stateDir) + if err != nil { + return nil + } + + dirs := make(map[string]struct{}) + for _, entry := range entries { + if !entry.IsDir() { + continue + } + eventsPath := filepath.Join(stateDir, entry.Name(), "events.jsonl") + if _, err := os.Stat(eventsPath); err == nil { + dirs[entry.Name()] = struct{}{} + } + } + + var paths []string + for _, entry := range entries { + name := entry.Name() + if entry.IsDir() { + candidate := filepath.Join(stateDir, name, "events.jsonl") + if _, err := os.Stat(candidate); err == nil { + paths = append(paths, candidate) + } + continue + } + if stem, ok := strings.CutSuffix(name, ".jsonl"); ok { + if _, dup := dirs[stem]; dup { + continue + } + paths = append(paths, filepath.Join(stateDir, name)) + } + } + return paths +} + +func (s copilotSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + stateDir := filepath.Join(root, copilotStateDir) + roots = append(roots, WatchRoot{ + Path: stateDir, + Recursive: true, + IncludeGlobs: []string{"*.jsonl", "workspace.yaml"}, + DebounceKey: string(AgentCopilot) + ":state:" + stateDir, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s copilotSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + source, ok := s.sourceForChangedPath(root, req) + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s copilotSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// findSourceFile locates a Copilot session file by UUID under root. It checks +// the directory layout (/events.jsonl) first, then the bare layout +// (.jsonl), so the richer directory form takes precedence. Returns "" for +// invalid IDs or when no file resolves. +func (s copilotSourceSet) findSourceFile(root, rawID string) string { + if root == "" || !IsValidSessionID(rawID) { + return "" + } + + stateDir := filepath.Join(root, copilotStateDir) + + dirFmt := filepath.Join(stateDir, rawID, "events.jsonl") + if _, err := os.Stat(dirFmt); err == nil { + return dirFmt + } + + bare := filepath.Join(stateDir, rawID+".jsonl") + if _, err := os.Stat(bare); err == nil { + return bare + } + + return "" +} + +func (s copilotSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok := s.pathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("copilot source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + size := info.Size() + mtime := info.ModTime().UnixNano() + if workspace := copilotWorkspacePath(path); workspace != "" { + if wsInfo, err := os.Stat(workspace); err == nil { + size += wsInfo.Size() + if wsMtime := wsInfo.ModTime().UnixNano(); wsMtime > mtime { + mtime = wsMtime + } + } + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: size, + MTimeNS: mtime, + } + h := sha256.New() + if err := addCopilotFingerprintPart(h, "events", path, info); err != nil { + return SourceFingerprint{}, err + } + if workspace := copilotWorkspacePath(path); workspace != "" { + if wsInfo, err := os.Stat(workspace); err == nil && !wsInfo.IsDir() { + if err := addCopilotFingerprintPart(h, "workspace", workspace, wsInfo); err != nil { + return SourceFingerprint{}, err + } + } + } + fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) + return fingerprint, nil +} + +func (s copilotSourceSet) pathFromSource(source SourceRef) (string, bool) { + switch src := source.Opaque.(type) { + case copilotSource: + return src.Path, src.Path != "" + case *copilotSource: + if src != nil && src.Path != "" { + return src.Path, true + } + } + for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate); ok { + src := ref.Opaque.(copilotSource) + return src.Path, true + } + } + } + return "", false +} + +func (s copilotSourceSet) sourceForChangedPath( + root string, + req ChangedPathRequest, +) (SourceRef, bool) { + path := req.Path + if filepath.Base(path) == "workspace.yaml" { + return s.sourceRef(root, filepath.Join(filepath.Dir(path), "events.jsonl")) + } + if source, ok := s.sourceRef(root, path); ok { + return source, true + } + if !jsonlMissingPathFallbackAllowed(req) { + return SourceRef{}, false + } + if filepath.Base(path) == "events.jsonl" { + barePath := filepath.Join( + root, + copilotStateDir, + filepath.Base(filepath.Dir(path))+".jsonl", + ) + if source, ok := s.sourceRef(root, barePath); ok { + return source, true + } + } + return s.sourceRefForPath(root, path, false) +} + +func (s copilotSourceSet) sourceRef(root, path string) (SourceRef, bool) { + return s.sourceRefForPath(root, path, true) +} + +func (s copilotSourceSet) sourceRefForPath( + root, path string, + requireRegular bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok || (requireRegular && !IsRegularFile(path)) { + return SourceRef{}, false + } + parts := strings.Split(filepath.ToSlash(rel), "/") + if len(parts) == 3 && + parts[0] == copilotStateDir && + parts[2] == "events.jsonl" { + return s.newSourceRef(root, path), true + } + if len(parts) == 2 && + parts[0] == copilotStateDir && + strings.HasSuffix(parts[1], ".jsonl") { + stem := strings.TrimSuffix(parts[1], ".jsonl") + if dirPath := s.findSourceFile(root, stem); dirPath != "" && + dirPath != path { + return s.sourceRef(root, dirPath) + } + return s.newSourceRef(root, path), true + } + return SourceRef{}, false +} + +func (s copilotSourceSet) newSourceRef(root, path string) SourceRef { + return SourceRef{ + Provider: AgentCopilot, + Key: path, + DisplayPath: path, + FingerprintKey: path, + Opaque: copilotSource{ + Root: root, + Path: path, + }, + } +} + +func copilotWorkspacePath(eventsPath string) string { + if filepath.Base(eventsPath) != "events.jsonl" { + return "" + } + return filepath.Join(filepath.Dir(eventsPath), "workspace.yaml") +} + +func addCopilotFingerprintPart( + h hash.Hash, + label string, + path string, + info os.FileInfo, +) error { + if _, err := fmt.Fprintf( + h, + "%s\x00%s\x00%d\x00%d\x00", + label, + path, + info.Size(), + info.ModTime().UnixNano(), + ); err != nil { + return err + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("open %s: %w", path, err) + } + defer f.Close() + if _, err := io.Copy(h, f); err != nil { + return fmt.Errorf("hash %s: %w", path, err) + } + return nil +} + +func copilotProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Cwd: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + AggregateUsageEvents: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/copilot_test.go b/internal/parser/copilot_test.go index 26a6d2f82..c5dc9cc72 100644 --- a/internal/parser/copilot_test.go +++ b/internal/parser/copilot_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "os" "path/filepath" "strings" @@ -10,6 +11,60 @@ import ( "github.com/stretchr/testify/require" ) +// newCopilotTestProvider builds a concrete copilotProvider for the given roots +// so package tests can exercise the folded parse, discovery, and source-lookup +// behavior directly through provider methods, replacing the removed +// package-level entrypoints. +func newCopilotTestProvider(t *testing.T, roots ...string) *copilotProvider { + t.Helper() + provider, ok := NewProvider(AgentCopilot, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + cp, ok := provider.(*copilotProvider) + require.True(t, ok) + return cp +} + +// parseCopilotTestSession parses a Copilot JSONL session file at path through +// the provider-owned parse method, replacing the removed package-level +// ParseCopilotSession entrypoint. +func parseCopilotTestSession( + t *testing.T, path, machine string, +) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { + t.Helper() + return newCopilotTestProvider(t).parseSession(path, machine) +} + +// discoverCopilotTestSessions discovers Copilot sessions under root through the +// provider, returning the legacy DiscoveredFile shape (path) the tests assert +// against. +func discoverCopilotTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + provider := newCopilotTestProvider(t, root) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + if len(sources) == 0 { + return nil + } + files := make([]DiscoveredFile, 0, len(sources)) + for _, source := range sources { + files = append(files, DiscoveredFile{ + Path: source.DisplayPath, + Agent: AgentCopilot, + }) + } + return files +} + +// findCopilotTestSourceFile resolves a Copilot session ID to a session file +// path through the provider, replacing the removed FindCopilotSourceFile. +func findCopilotTestSourceFile(t *testing.T, root, rawID string) string { + t.Helper() + return newCopilotTestProvider(t, root).sources.findSourceFile(root, rawID) +} + // writeCopilotJSONL writes JSONL lines to a temp file and // returns the file path. func writeCopilotJSONL( @@ -28,7 +83,7 @@ func writeCopilotJSONL( // parseAndValidateHelper parses the session and fails the test on basic errors. func parseAndValidateHelper(t *testing.T, path string, machine string, wantMsgs int) (*ParsedSession, []ParsedMessage) { t.Helper() - sess, msgs, _, err := ParseCopilotSession(path, machine) + sess, msgs, _, err := parseCopilotTestSession(t, path, machine) require.NoError(t, err) require.NotNil(t, sess, "expected non-nil session") require.Len(t, msgs, wantMsgs) @@ -349,7 +404,7 @@ func TestParseCopilotSession_EmptySession(t *testing.T) { `{"type":"session.start","data":{"sessionId":"empty"},"timestamp":"2025-01-15T10:00:00Z"}`, ) - sess, msgs, _, err := ParseCopilotSession(path, "m") + sess, msgs, _, err := parseCopilotTestSession(t, path, "m") require.NoError(t, err) assert.Nil(t, sess, "expected nil session for empty") assert.Nil(t, msgs, "expected nil messages for empty") @@ -358,7 +413,7 @@ func TestParseCopilotSession_EmptySession(t *testing.T) { func TestParseCopilotSession_NonexistentFile(t *testing.T) { path := filepath.Join(t.TempDir(), "nonexistent.jsonl") - sess, msgs, _, err := ParseCopilotSession(path, "m") + sess, msgs, _, err := parseCopilotTestSession(t, path, "m") require.NoError(t, err, "expected nil error") assert.Nil(t, sess, "expected nil session for nonexistent file") assert.Nil(t, msgs, "expected nil messages for nonexistent file") @@ -588,7 +643,7 @@ func parseCopilotFull( t *testing.T, path, machine string, ) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent) { t.Helper() - sess, msgs, usage, err := ParseCopilotSession(path, machine) + sess, msgs, usage, err := parseCopilotTestSession(t, path, machine) require.NoError(t, err) return sess, msgs, usage } diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index a72d951f6..928e18bcd 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -23,6 +23,11 @@ var uuidRe = regexp.MustCompile( `[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})$`, ) +const ( + copilotStateDir = "session-state" + geminiChatsDir = "chats" +) + // isDirOrSymlink reports whether the entry is a directory or a // symlink that resolves to a directory. parentDir is needed to // build the full path for symlink resolution. @@ -49,7 +54,7 @@ type DiscoveredFile struct { Path string Project string // pre-extracted project name Agent AgentType // which agent this file belongs to - ForceParse bool // caller requires a full source reparse + ForceParse bool // bypass stored-state skips for sidecar-driven refreshes ProviderSource *SourceRef // provider-owned source identity, when known ProviderProcess bool // true when this caller may parse via ProviderSource } @@ -732,117 +737,6 @@ func isGeminiSessionFilename(name string) bool { strings.HasSuffix(name, ".jsonl")) } -// DiscoverGeminiSessions finds all Gemini session files under -// the Gemini directory (~/.gemini/tmp/*/chats/session-*). -func DiscoverGeminiSessions( - geminiDir string, -) []DiscoveredFile { - if geminiDir == "" { - return nil - } - - tmpDir := filepath.Join(geminiDir, "tmp") - hashDirs, err := os.ReadDir(tmpDir) - if err != nil { - return nil - } - - projectMap := BuildGeminiProjectMap(geminiDir) - - var files []DiscoveredFile - for _, hd := range hashDirs { - if !isDirOrSymlink(hd, tmpDir) { - continue - } - hash := hd.Name() - chatsDir := filepath.Join(tmpDir, hash, "chats") - entries, err := os.ReadDir(chatsDir) - if err != nil { - continue - } - - project := ResolveGeminiProject(hash, projectMap) - - for _, sf := range entries { - if sf.IsDir() { - continue - } - name := sf.Name() - if !isGeminiSessionFilename(name) { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(chatsDir, name), - Project: project, - Agent: AgentGemini, - }) - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindGeminiSourceFile locates a Gemini session file by its -// session UUID. Searches all project hash directories. -func FindGeminiSourceFile( - geminiDir, sessionID string, -) string { - if geminiDir == "" || !IsValidSessionID(sessionID) || - len(sessionID) < 8 { - return "" - } - - tmpDir := filepath.Join(geminiDir, "tmp") - hashDirs, err := os.ReadDir(tmpDir) - if err != nil { - return "" - } - - for _, hd := range hashDirs { - if !isDirOrSymlink(hd, tmpDir) { - continue - } - chatsDir := filepath.Join(tmpDir, hd.Name(), "chats") - entries, err := os.ReadDir(chatsDir) - if err != nil { - continue - } - for _, sf := range entries { - if sf.IsDir() { - continue - } - name := sf.Name() - if !isGeminiSessionFilename(name) { - continue - } - if strings.Contains(name, sessionID[:8]) { - path := filepath.Join(chatsDir, name) - if confirmGeminiSessionID( - path, sessionID, - ) { - return path - } - } - } - } - return "" -} - -// confirmGeminiSessionID reads the sessionId field from a -// Gemini file to confirm it matches the expected ID. -func confirmGeminiSessionID( - path, sessionID string, -) bool { - data, err := os.ReadFile(path) - if err != nil { - return false - } - return GeminiSessionID(data) == sessionID -} - // geminiProjectsFile holds the structure of // ~/.gemini/projects.json. type geminiProjectsFile struct { @@ -958,92 +852,6 @@ func ResolveGeminiProject( return NormalizeName(dirName) } -// DiscoverCopilotSessions finds all JSONL files under -// /session-state/. Supports both bare format -// (.jsonl) and directory format (/events.jsonl). -func DiscoverCopilotSessions( - copilotDir string, -) []DiscoveredFile { - if copilotDir == "" { - return nil - } - - stateDir := filepath.Join(copilotDir, "session-state") - entries, err := os.ReadDir(stateDir) - if err != nil { - return nil - } - - dirs := make(map[string]struct{}) - for _, entry := range entries { - if !entry.IsDir() { - continue - } - eventsPath := filepath.Join( - stateDir, entry.Name(), "events.jsonl", - ) - if _, err := os.Stat(eventsPath); err == nil { - dirs[entry.Name()] = struct{}{} - } - } - - var files []DiscoveredFile - for _, entry := range entries { - name := entry.Name() - if entry.IsDir() { - candidate := filepath.Join( - stateDir, name, "events.jsonl", - ) - if _, err := os.Stat(candidate); err == nil { - files = append(files, DiscoveredFile{ - Path: candidate, - Agent: AgentCopilot, - }) - } - continue - } - if stem, ok := strings.CutSuffix(name, ".jsonl"); ok { - if _, dup := dirs[stem]; dup { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(stateDir, name), - Agent: AgentCopilot, - }) - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindCopilotSourceFile locates a Copilot session file by -// UUID. Checks both bare (.jsonl) and directory -// (/events.jsonl) layouts. -func FindCopilotSourceFile( - copilotDir, rawID string, -) string { - if copilotDir == "" || !IsValidSessionID(rawID) { - return "" - } - - stateDir := filepath.Join(copilotDir, "session-state") - - dirFmt := filepath.Join(stateDir, rawID, "events.jsonl") - if _, err := os.Stat(dirFmt); err == nil { - return dirFmt - } - - bare := filepath.Join(stateDir, rawID+".jsonl") - if _, err := os.Stat(bare); err == nil { - return bare - } - - return "" -} - // IsPiSessionFile reads the first non-blank line of path and returns true // when the JSON type field equals "session". The scanner buffer grows up to // 64 MiB to match parser.maxLineSize. Leading blank lines are skipped to diff --git a/internal/parser/discovery_test.go b/internal/parser/discovery_test.go index 85c782fc6..3170a9b20 100644 --- a/internal/parser/discovery_test.go +++ b/internal/parser/discovery_test.go @@ -11,11 +11,6 @@ import ( "github.com/stretchr/testify/require" ) -const ( - copilotStateDir = "session-state" - geminiChatsDir = "chats" -) - // setupFileSystem creates a temporary directory and populates // it with the given relative file paths and contents. func setupFileSystem(t *testing.T, dir string, files map[string]string) { @@ -583,7 +578,7 @@ func TestDiscoverGeminiSessions(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverGeminiSessions(dir) + files := discoverGeminiTestSessions(t, dir) require.Len(t, files, len(tt.wantFiles), "files count") @@ -603,17 +598,17 @@ func TestDiscoverGeminiSessions(t *testing.T) { t.Run("EmptyChatDir", func(t *testing.T) { dir := t.TempDir() require.NoError(t, os.MkdirAll(filepath.Join(dir, "tmp", "hash1", geminiChatsDir), 0o755), "mkdir") - files := DiscoverGeminiSessions(dir) + files := discoverGeminiTestSessions(t, dir) assert.Nil(t, files, "expected nil") }) t.Run("Nonexistent", func(t *testing.T) { - files := DiscoverGeminiSessions(filepath.Join(t.TempDir(), "does-not-exist")) + files := discoverGeminiTestSessions(t, filepath.Join(t.TempDir(), "does-not-exist")) assert.Nil(t, files, "expected nil") }) t.Run("EmptyDir", func(t *testing.T) { - files := DiscoverGeminiSessions("") + files := discoverGeminiTestSessions(t, "") assert.Nil(t, files, "expected nil") }) } @@ -656,7 +651,7 @@ func TestFindGeminiSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindGeminiSourceFile(dir, tt.targetID) + got := findGeminiTestSourceFile(t, dir, tt.targetID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) @@ -669,13 +664,13 @@ func TestFindGeminiSourceFile(t *testing.T) { t.Run("ShortID", func(t *testing.T) { dir := t.TempDir() for _, id := range []string{"", "a", "abc", "1234567"} { - got := FindGeminiSourceFile(dir, id) + got := findGeminiTestSourceFile(t, dir, id) assert.Emptyf(t, got, "FindGeminiSourceFile(%q)", id) } }) t.Run("EmptyDir", func(t *testing.T) { - got := FindGeminiSourceFile("", "b0a4eadd-cb99-4165-94d9-64cad5a66d24") + got := findGeminiTestSourceFile(t, "", "b0a4eadd-cb99-4165-94d9-64cad5a66d24") assert.Empty(t, got, "expected empty") }) } @@ -893,7 +888,7 @@ func TestDiscoverCopilotSessions(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverCopilotSessions(dir) + files := discoverCopilotTestSessions(t, dir) require.Len(t, files, len(tt.wantFiles), "files count") @@ -911,12 +906,12 @@ func TestDiscoverCopilotSessions(t *testing.T) { } t.Run("EmptyDir", func(t *testing.T) { - files := DiscoverCopilotSessions("") + files := discoverCopilotTestSessions(t, "") assert.Nil(t, files, "expected nil") }) t.Run("Nonexistent", func(t *testing.T) { - files := DiscoverCopilotSessions(filepath.Join(t.TempDir(), "does-not-exist")) + files := discoverCopilotTestSessions(t, filepath.Join(t.TempDir(), "does-not-exist")) assert.Nil(t, files, "expected nil") }) } @@ -962,7 +957,7 @@ func TestFindCopilotSourceFile(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - got := FindCopilotSourceFile(dir, tt.targetID) + got := findCopilotTestSourceFile(t, dir, tt.targetID) want := "" if tt.wantFile != "" { want = filepath.Join(dir, tt.wantFile) @@ -975,13 +970,13 @@ func TestFindCopilotSourceFile(t *testing.T) { t.Run("InvalidID", func(t *testing.T) { dir := t.TempDir() for _, id := range []string{"", "../etc/passwd", "a/b", "a b"} { - got := FindCopilotSourceFile(dir, id) + got := findCopilotTestSourceFile(t, dir, id) assert.Emptyf(t, got, "FindCopilotSourceFile(%q)", id) } }) t.Run("EmptyDir", func(t *testing.T) { - got := FindCopilotSourceFile("", "abc-123") + got := findCopilotTestSourceFile(t, "", "abc-123") assert.Empty(t, got, "expected empty") }) } diff --git a/internal/parser/gemini.go b/internal/parser/gemini.go index 432fad42b..60bb98519 100644 --- a/internal/parser/gemini.go +++ b/internal/parser/gemini.go @@ -53,10 +53,12 @@ func normalizedGeminiTokenUsage(tok geminiTokens) json.RawMessage { return raw } -// ParseGeminiSession parses a Gemini CLI session JSON file. -// Unlike Claude/Codex JSONL, each Gemini file is a single JSON -// document containing all messages. -func ParseGeminiSession( +// parseSession parses a Gemini CLI session JSON file into the session and +// messages the provider consumes. Unlike Claude/Codex JSONL, each Gemini file +// is a single JSON document containing all messages. This is the provider-owned +// parse entrypoint; the package-level free function was folded onto the +// provider. +func (p *geminiProvider) parseSession( path, project, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) diff --git a/internal/parser/gemini_copilot_provider_test.go b/internal/parser/gemini_copilot_provider_test.go new file mode 100644 index 000000000..c2d9d1344 --- /dev/null +++ b/internal/parser/gemini_copilot_provider_test.go @@ -0,0 +1,321 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.kenn.io/agentsview/internal/testjsonl" +) + +func TestGeminiCopilotProviderFactoriesReplaceLegacyAdapter(t *testing.T) { + for _, agent := range []AgentType{AgentGemini, AgentCopilot} { + t.Run(string(agent), func(t *testing.T) { + factory, ok := ProviderFactoryByType(agent) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(agent, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) + }) + } +} + +func TestGeminiProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionID := "gemini-provider" + sourcePath := filepath.Join( + root, + "tmp", + "my-project", + geminiChatsDir, + "session-2026-06-19T12-00-gemini-provider.json", + ) + writeSourceFile(t, sourcePath, testjsonl.GeminiSessionJSON( + sessionID, + "my-project", + tsEarly, + tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("u1", tsEarly, "hello gemini"), + testjsonl.GeminiAssistantMsg("a1", tsEarlyS5, "hi", nil), + }, + )) + + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 2) + assert.Equal(t, filepath.Join(root, "tmp"), plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"session-*.json", "session-*.jsonl"}, plan.Roots[0].IncludeGlobs) + assert.Equal(t, root, plan.Roots[1].Path) + assert.False(t, plan.Roots[1].Recursive) + assert.Equal(t, []string{"projects.json", "trustedFolders.json"}, plan.Roots[1].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + assert.Equal(t, "my_project", discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~gemini:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "write", WatchRoot: filepath.Join(root, "tmp")}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + + require.NoError(t, os.Remove(sourcePath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: filepath.Join(root, "tmp")}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + assert.Equal(t, "my_project", changed[0].ProjectHint) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.Error(t, err) + require.Empty(t, fingerprint) +} + +func TestGeminiProviderProjectMetadataChangesClassifyAndFingerprint(t *testing.T) { + root := t.TempDir() + sessionID := "gemini-project-metadata" + projectsPath := filepath.Join(root, "projects.json") + writeSourceFile(t, projectsPath, `{"projects":{"/Users/alice/code/one":"alias"}}`) + sourcePath := filepath.Join( + root, + "tmp", + "alias", + geminiChatsDir, + "session-2026-06-19T12-00-gemini-project-metadata.json", + ) + writeSourceFile(t, sourcePath, testjsonl.GeminiSessionJSON( + sessionID, + "alias", + tsEarly, + tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("u1", tsEarly, "hello gemini"), + testjsonl.GeminiAssistantMsg("a1", tsEarlyS5, "hi", nil), + }, + )) + + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~gemini:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, "one", found.ProjectHint) + + fingerprintOne, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + + writeSourceFile(t, projectsPath, `{"projects":{"/Users/alice/code/two":"alias"}}`) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: projectsPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + assert.Equal(t, "two", changed[0].ProjectHint) + + fingerprintTwo, err := provider.Fingerprint(context.Background(), changed[0]) + require.NoError(t, err) + assert.NotEqual(t, fingerprintOne.Hash, fingerprintTwo.Hash) +} + +func TestGeminiProviderParse(t *testing.T) { + root := t.TempDir() + sessionID := "gemini-provider" + sourcePath := filepath.Join( + root, + "tmp", + "my-project", + geminiChatsDir, + "session-2026-06-19T12-00-gemini-provider.json", + ) + writeSourceFile(t, sourcePath, testjsonl.GeminiSessionJSON( + sessionID, + "my-project", + tsEarly, + tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg("u1", tsEarly, "hello gemini"), + testjsonl.GeminiAssistantMsg("a1", tsEarlyS5, "hi", nil), + }, + )) + + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~gemini:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "gemini:"+sessionID, result.Result.Session.ID) + assert.Equal(t, AgentGemini, result.Result.Session.Agent) + assert.Equal(t, "my_project", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Len(t, result.Result.Messages, 2) +} + +func TestCopilotProviderSourceMethods(t *testing.T) { + root := t.TempDir() + barePath := filepath.Join(root, copilotStateDir, "copilot-provider.jsonl") + dirEvents := filepath.Join(root, copilotStateDir, "copilot-provider", "events.jsonl") + workspacePath := filepath.Join(root, copilotStateDir, "copilot-provider", "workspace.yaml") + content := strings.Join([]string{ + `{"type":"session.start","data":{"sessionId":"copilot-provider","context":{"cwd":"/home/user/code/copilot-app","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"hello copilot"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"hi"},"timestamp":"2025-01-15T10:00:02Z"}`, + `{"type":"session.shutdown","data":{"modelMetrics":{"gpt-5":{"usage":{"inputTokens":100,"outputTokens":20,"cacheReadTokens":30,"cacheWriteTokens":10,"reasoningTokens":5}}}},"timestamp":"2025-01-15T10:00:03Z"}`, + }, "\n") + "\n" + writeSourceFile(t, barePath, content) + writeSourceFile(t, dirEvents, content) + writeSourceFile(t, workspacePath, "name: Workspace title\n") + + provider, ok := NewProvider(AgentCopilot, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, filepath.Join(root, copilotStateDir), plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl", "workspace.yaml"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, dirEvents, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "copilot-provider", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, dirEvents, found.DisplayPath) + + for _, path := range []string{dirEvents, workspacePath} { + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: path, EventKind: "write", WatchRoot: filepath.Join(root, copilotStateDir)}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, dirEvents, changed[0].DisplayPath) + } + + require.NoError(t, os.Remove(dirEvents)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: dirEvents, EventKind: "remove", WatchRoot: filepath.Join(root, copilotStateDir)}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, barePath, changed[0].DisplayPath) + writeSourceFile(t, dirEvents, content) + + require.NoError(t, os.Remove(workspacePath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: workspacePath, EventKind: "remove", WatchRoot: filepath.Join(root, copilotStateDir)}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, dirEvents, changed[0].DisplayPath) + writeSourceFile(t, workspacePath, "name: Workspace title\n") + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, dirEvents, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) + + writeSourceFile(t, workspacePath, "name: Workspace other\n") + renamedFingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.NotEqual(t, fingerprint.Hash, renamedFingerprint.Hash) + writeSourceFile(t, workspacePath, "name: Workspace title\n") + fingerprint, err = provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: found, + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "copilot:copilot-provider", result.Result.Session.ID) + assert.Equal(t, AgentCopilot, result.Result.Session.Agent) + assert.Equal(t, "copilot_app", result.Result.Session.Project) + assert.Equal(t, "Workspace title", result.Result.Session.FirstMessage) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + assert.Equal(t, fingerprint.Size, result.Result.Session.File.Size) + assert.Equal(t, fingerprint.MTimeNS, result.Result.Session.File.Mtime) + assert.Len(t, result.Result.Messages, 2) + require.Len(t, result.Result.UsageEvents, 1) + assert.Equal(t, "gpt-5", result.Result.UsageEvents[0].Model) +} diff --git a/internal/parser/gemini_parser_test.go b/internal/parser/gemini_parser_test.go index 547f80d86..5178f4db4 100644 --- a/internal/parser/gemini_parser_test.go +++ b/internal/parser/gemini_parser_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "strings" "testing" "time" @@ -11,10 +12,65 @@ import ( "go.kenn.io/agentsview/internal/testjsonl" ) +// newGeminiTestProvider builds a concrete geminiProvider for the given roots so +// package tests can exercise the folded parse, discovery, and source-lookup +// behavior directly through provider methods, replacing the removed +// package-level entrypoints. +func newGeminiTestProvider(t *testing.T, roots ...string) *geminiProvider { + t.Helper() + provider, ok := NewProvider(AgentGemini, ProviderConfig{ + Roots: roots, + Machine: "local", + }) + require.True(t, ok) + gp, ok := provider.(*geminiProvider) + require.True(t, ok) + return gp +} + +// parseGeminiTestSession parses a Gemini session file at path through the +// provider-owned parse method, replacing the removed package-level +// ParseGeminiSession entrypoint. +func parseGeminiTestSession( + t *testing.T, path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return newGeminiTestProvider(t).parseSession(path, project, machine) +} + +// discoverGeminiTestSessions discovers Gemini sessions under root through the +// provider, returning the legacy DiscoveredFile shape (path + project) the +// tests assert against. +func discoverGeminiTestSessions(t *testing.T, root string) []DiscoveredFile { + t.Helper() + provider := newGeminiTestProvider(t, root) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + if len(sources) == 0 { + return nil + } + files := make([]DiscoveredFile, 0, len(sources)) + for _, source := range sources { + files = append(files, DiscoveredFile{ + Path: source.DisplayPath, + Project: source.ProjectHint, + Agent: AgentGemini, + }) + } + return files +} + +// findGeminiTestSourceFile resolves a Gemini session ID to a session file path +// through the provider, replacing the removed FindGeminiSourceFile. +func findGeminiTestSourceFile(t *testing.T, root, sessionID string) string { + t.Helper() + return newGeminiTestProvider(t, root).sources.findSourceFile(root, sessionID) +} + func runGeminiParserTest(t *testing.T, content string) (*ParsedSession, []ParsedMessage) { t.Helper() path := createTestFile(t, "session.json", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) return sess, msgs } @@ -45,7 +101,7 @@ func TestParseGeminiSession_JSONLStream(t *testing.T) { `{"$set":{"lastUpdated":"2026-04-23T16:12:50.158Z"}}`, }, "\n") path := createTestFile(t, "session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -74,7 +130,7 @@ func TestParseGeminiSession_JSONLStreamLargeRecord(t *testing.T) { `{"id":"u1","timestamp":"2026-04-23T16:12:43.085Z","type":"user","content":[{"text":"` + largeContent + `"}]}`, }, "\n") path := createTestFile(t, "large-session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -91,7 +147,7 @@ func TestParseGeminiSession_JSONLStreamTolerantOfPartialLines(t *testing.T) { `{"id":"a1","timestamp":"2026-04-23T16:12:50.158Z","type":"gemini","content":"reply"`, }, "\n") path := createTestFile(t, "session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -108,7 +164,7 @@ func TestParseGeminiSession_JSONLStreamTolerantOfPartialLines(t *testing.T) { "", }, "\n") path := createTestFile(t, "session.jsonl", content) - sess, msgs, err := ParseGeminiSession(path, "my_project", "local") + sess, msgs, err := parseGeminiTestSession(t, path, "my_project", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -452,12 +508,12 @@ func TestParseGeminiSession_EdgeCases(t *testing.T) { t.Run("malformed JSON", func(t *testing.T) { path := createTestFile(t, "session.json", "not valid json {{{") - _, _, err := ParseGeminiSession(path, "my_project", "local") + _, _, err := parseGeminiTestSession(t, path, "my_project", "local") assert.Error(t, err) }) t.Run("missing file", func(t *testing.T) { - _, _, err := ParseGeminiSession("/nonexistent.json", "my_project", "local") + _, _, err := parseGeminiTestSession(t, "/nonexistent.json", "my_project", "local") assert.Error(t, err) }) @@ -500,7 +556,7 @@ func TestParseGeminiSession_EdgeCases(t *testing.T) { t.Run("missing sessionId", func(t *testing.T) { content := `{"projectHash":"abc","startTime":"2024-01-01T00:00:00Z","lastUpdated":"2024-01-01T00:00:00Z","messages":[]}` path := createTestFile(t, "session.json", content) - _, _, err := ParseGeminiSession(path, "my_project", "local") + _, _, err := parseGeminiTestSession(t, path, "my_project", "local") assert.Error(t, err) }) } diff --git a/internal/parser/gemini_provider.go b/internal/parser/gemini_provider.go new file mode 100644 index 000000000..7a18b0123 --- /dev/null +++ b/internal/parser/gemini_provider.go @@ -0,0 +1,509 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "hash" + "io" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*geminiProvider)(nil) + +type geminiProviderFactory struct { + def AgentDef +} + +func newGeminiProviderFactory(def AgentDef) ProviderFactory { + return geminiProviderFactory{def: cloneAgentDef(def)} +} + +func (f geminiProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f geminiProviderFactory) Capabilities() Capabilities { + return geminiProviderCapabilities() +} + +func (f geminiProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &geminiProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: geminiProviderCapabilities(), + Config: cfg, + }, + sources: newGeminiSourceSet(cfg.Roots), + } +} + +type geminiProvider struct { + ProviderBase + sources geminiSourceSet +} + +func (p *geminiProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *geminiProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *geminiProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *geminiProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + req = providerFindRequestWithRawSessionID(p.Def, req) + return p.sources.FindSource(ctx, req) +} + +func (p *geminiProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *geminiProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok := p.sources.pathFromSource(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("gemini source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, req.Source.ProjectHint, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +type geminiSource struct { + Root string + Path string +} + +type geminiSourceSet struct { + roots []string +} + +func newGeminiSourceSet(roots []string) geminiSourceSet { + return geminiSourceSet{roots: cleanJSONLRoots(roots)} +} + +func (s geminiSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + rootSources, err := s.discoverRoot(ctx, root) + if err != nil { + return nil, err + } + for _, source := range rootSources { + addJSONLSource(source, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +func (s geminiSourceSet) discoverRoot( + ctx context.Context, + root string, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + sources := make([]SourceRef, 0) + seen := make(map[string]struct{}) + for _, path := range s.discoverSessionPaths(root) { + source, ok := s.sourceRef(root, path) + if !ok { + continue + } + addJSONLSource(source, &sources, seen) + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverSessionPaths finds all Gemini session file paths under the Gemini +// directory (/tmp//chats/session-*.json[l]). is either a +// SHA-256 project hash (old layout) or a project name (new layout); symlinked +// hash directories are followed (matching the watcher). Project resolution is +// applied by sourceRef via BuildGeminiProjectMap/ResolveGeminiProject, so this +// helper only enumerates source paths. +func (s geminiSourceSet) discoverSessionPaths(root string) []string { + if root == "" { + return nil + } + + tmpDir := filepath.Join(root, "tmp") + hashDirs, err := os.ReadDir(tmpDir) + if err != nil { + return nil + } + + var paths []string + for _, hd := range hashDirs { + if !isDirOrSymlink(hd, tmpDir) { + continue + } + chatsDir := filepath.Join(tmpDir, hd.Name(), geminiChatsDir) + entries, err := os.ReadDir(chatsDir) + if err != nil { + continue + } + for _, sf := range entries { + if sf.IsDir() { + continue + } + name := sf.Name() + if !isGeminiSessionFilename(name) { + continue + } + paths = append(paths, filepath.Join(chatsDir, name)) + } + } + return paths +} + +func (s geminiSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + tmp := filepath.Join(root, "tmp") + roots = append(roots, WatchRoot{ + Path: tmp, + Recursive: true, + IncludeGlobs: []string{"session-*.json", "session-*.jsonl"}, + DebounceKey: string(AgentGemini) + ":tmp:" + tmp, + }) + roots = append(roots, WatchRoot{ + Path: root, + Recursive: false, + IncludeGlobs: []string{"projects.json", "trustedFolders.json"}, + DebounceKey: string(AgentGemini) + ":projects:" + root, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s geminiSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + if geminiProjectMetadataPath(root, req.Path) { + return s.discoverRoot(ctx, root) + } + source, ok := s.sourceRef(root, req.Path) + if ok { + return []SourceRef{source}, nil + } + if jsonlMissingPathFallbackAllowed(req) { + source, ok = s.sourceRefForPath(root, req.Path, false) + if ok { + return []SourceRef{source}, nil + } + } + } + return nil, nil +} + +func (s geminiSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + path := s.findSourceFile(root, req.RawSessionID) + if path == "" { + continue + } + if source, ok := s.sourceRef(root, path); ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// findSourceFile locates a Gemini session file by its session UUID under root, +// searching all project hash directories. The session filename embeds the first +// eight characters of the UUID, so candidates are pre-filtered on that prefix +// before confirming the recorded sessionId matches. +func (s geminiSourceSet) findSourceFile(root, sessionID string) string { + if root == "" || !IsValidSessionID(sessionID) || + len(sessionID) < 8 { + return "" + } + + tmpDir := filepath.Join(root, "tmp") + hashDirs, err := os.ReadDir(tmpDir) + if err != nil { + return "" + } + + for _, hd := range hashDirs { + if !isDirOrSymlink(hd, tmpDir) { + continue + } + chatsDir := filepath.Join(tmpDir, hd.Name(), geminiChatsDir) + entries, err := os.ReadDir(chatsDir) + if err != nil { + continue + } + for _, sf := range entries { + if sf.IsDir() { + continue + } + name := sf.Name() + if !isGeminiSessionFilename(name) { + continue + } + if strings.Contains(name, sessionID[:8]) { + path := filepath.Join(chatsDir, name) + if confirmGeminiSessionID(path, sessionID) { + return path + } + } + } + } + return "" +} + +// confirmGeminiSessionID reads the sessionId field from a Gemini file to +// confirm it matches the expected ID. +func confirmGeminiSessionID(path, sessionID string) bool { + data, err := os.ReadFile(path) + if err != nil { + return false + } + return GeminiSessionID(data) == sessionID +} + +func (s geminiSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + root, path, ok := s.rootPathFromSource(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("gemini source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + } + h := sha256.New() + if err := addGeminiFingerprintPart(h, "session", path, info); err != nil { + return SourceFingerprint{}, err + } + for _, metadataPath := range geminiProjectMetadataPaths(root) { + metadataInfo, err := os.Stat(metadataPath) + if err != nil || metadataInfo.IsDir() { + continue + } + fingerprint.Size += metadataInfo.Size() + if mtime := metadataInfo.ModTime().UnixNano(); mtime > fingerprint.MTimeNS { + fingerprint.MTimeNS = mtime + } + if err := addGeminiFingerprintPart(h, "project", metadataPath, metadataInfo); err != nil { + return SourceFingerprint{}, err + } + } + fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) + return fingerprint, nil +} + +func (s geminiSourceSet) pathFromSource(source SourceRef) (string, bool) { + _, path, ok := s.rootPathFromSource(source) + return path, ok +} + +func (s geminiSourceSet) rootPathFromSource(source SourceRef) (string, string, bool) { + switch src := source.Opaque.(type) { + case geminiSource: + return src.Root, src.Path, src.Path != "" + case *geminiSource: + if src != nil && src.Path != "" { + return src.Root, src.Path, true + } + } + for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate); ok { + src := ref.Opaque.(geminiSource) + return src.Root, src.Path, true + } + } + } + return "", "", false +} + +func (s geminiSourceSet) sourceRef(root, path string) (SourceRef, bool) { + return s.sourceRefForPath(root, path, true) +} + +func (s geminiSourceSet) sourceRefForPath( + root, path string, + requireRegular bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok || (requireRegular && !IsRegularFile(path)) { + return SourceRef{}, false + } + sepParts := strings.Split(filepath.ToSlash(rel), "/") + if len(sepParts) != 4 || + sepParts[0] != "tmp" || + sepParts[2] != geminiChatsDir || + !isGeminiSessionFilename(sepParts[3]) { + return SourceRef{}, false + } + project := ResolveGeminiProject(sepParts[1], BuildGeminiProjectMap(root)) + return SourceRef{ + Provider: AgentGemini, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: geminiSource{ + Root: root, + Path: path, + }, + }, true +} + +func geminiProjectMetadataPaths(root string) []string { + return []string{ + filepath.Join(root, "projects.json"), + filepath.Join(root, "trustedFolders.json"), + } +} + +func geminiProjectMetadataPath(root, path string) bool { + root = filepath.Clean(root) + path = filepath.Clean(path) + rel, ok := relUnder(root, path) + if !ok { + return false + } + rel = filepath.ToSlash(rel) + return rel == "projects.json" || rel == "trustedFolders.json" +} + +func addGeminiFingerprintPart( + h hash.Hash, + label string, + path string, + info os.FileInfo, +) error { + if _, err := fmt.Fprintf( + h, + "%s\x00%s\x00%d\x00%d\x00", + label, + path, + info.Size(), + info.ModTime().UnixNano(), + ); err != nil { + return err + } + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("open %s: %w", path, err) + } + defer f.Close() + if _, err := io.Copy(h, f); err != nil { + return fmt.Errorf("hash %s: %w", path, err) + } + return nil +} + +func geminiProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + IncrementalAppend: CapabilityNotApplicable, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 87d978fcf..f750a668e 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -360,6 +360,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newCommandCodeProviderFactory(def) case AgentCodex: return newCodexProviderFactory(def) + case AgentCopilot: + return newCopilotProviderFactory(def) case AgentCowork: return newCoworkProviderFactory(def) case AgentCortex: @@ -374,6 +376,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newIflowProviderFactory(def) case AgentGptme: return newGptmeProviderFactory(def) + case AgentGemini: + return newGeminiProviderFactory(def) case AgentKimi: return newKimiProviderFactory(def) case AgentKilo: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 0bdc02698..9c1a903ee 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -20,8 +20,8 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentClaude: ProviderMigrationProviderAuthoritative, AgentCowork: ProviderMigrationProviderAuthoritative, AgentCodex: ProviderMigrationProviderAuthoritative, - AgentCopilot: ProviderMigrationLegacyOnly, - AgentGemini: ProviderMigrationLegacyOnly, + AgentCopilot: ProviderMigrationProviderAuthoritative, + AgentGemini: ProviderMigrationProviderAuthoritative, AgentMiMoCode: ProviderMigrationProviderAuthoritative, AgentOpenCode: ProviderMigrationProviderAuthoritative, AgentKilo: ProviderMigrationProviderAuthoritative, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index 28683f39d..9a2a6c310 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -50,10 +50,9 @@ var pendingShimProviderFiles = map[string]bool{ "antigravity_cli_provider.go": true, "antigravity_provider.go": true, "claude_provider.go": true, - "copilot_provider.go": true, + "codex_provider.go": true, "cowork_provider.go": true, "db_backed_provider.go": true, - "gemini_provider.go": true, "hermes_provider.go": true, "kiro_ide_provider.go": true, "kiro_provider.go": true, diff --git a/internal/parser/provider_test.go b/internal/parser/provider_test.go index d3b701e58..65b689c5e 100644 --- a/internal/parser/provider_test.go +++ b/internal/parser/provider_test.go @@ -149,7 +149,10 @@ func TestProviderRegistryMirrorsAgentRegistry(t *testing.T) { } func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { - provider, ok := NewProvider(AgentGemini, ProviderConfig{ + legacyAgent := legacyProviderTestAgent(t) + def, ok := AgentByType(legacyAgent) + require.True(t, ok) + provider, ok := NewProvider(legacyAgent, ProviderConfig{ Roots: []string{t.TempDir()}, Machine: "devbox", }) @@ -177,7 +180,7 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { source, found, err := provider.FindSource(ctx, FindSourceRequest{ RawSessionID: "session", - FullSessionID: "gemini:session", + FullSessionID: def.IDPrefix + "session", StoredFilePath: "/tmp/session.jsonl", FingerprintKey: "/tmp/session.jsonl", }) @@ -186,7 +189,7 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { assert.Empty(t, source) _, err = provider.Fingerprint(ctx, SourceRef{ - Provider: AgentGemini, + Provider: legacyAgent, Key: "session", DisplayPath: "/tmp/session.jsonl", FingerprintKey: "/tmp/session.jsonl", @@ -195,9 +198,9 @@ func TestLegacyProviderCapabilitiesMatchBaseDefaults(t *testing.T) { assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) incremental, status, err := provider.ParseIncremental(ctx, IncrementalRequest{ - Source: SourceRef{Provider: AgentGemini, Key: "session"}, + Source: SourceRef{Provider: legacyAgent, Key: "session"}, Fingerprint: SourceFingerprint{Key: "/tmp/session.jsonl"}, - SessionID: "gemini:session", + SessionID: def.IDPrefix + "session", StartOrdinal: 1, Machine: "devbox", }) @@ -211,12 +214,13 @@ func TestProviderFactoryLookupAndConfigSnapshot(t *testing.T) { Roots: []string{"/tmp/one", "/tmp/two"}, Machine: "devbox", } + legacyAgent := legacyProviderTestAgent(t) - factory, ok := ProviderFactoryByType(AgentGemini) + factory, ok := ProviderFactoryByType(legacyAgent) require.True(t, ok) - assert.Equal(t, AgentGemini, factory.Definition().Type) + assert.Equal(t, legacyAgent, factory.Definition().Type) - provider, ok := NewProvider(AgentGemini, cfg) + provider, ok := NewProvider(legacyAgent, cfg) require.True(t, ok) require.NotNil(t, provider) @@ -233,7 +237,8 @@ func TestProviderFactoryLookupAndConfigSnapshot(t *testing.T) { } func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { - provider, ok := NewProvider(AgentGemini, ProviderConfig{ + legacyAgent := legacyProviderTestAgent(t) + provider, ok := NewProvider(legacyAgent, ProviderConfig{ Roots: []string{t.TempDir()}, Machine: "devbox", }) @@ -241,7 +246,7 @@ func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { outcome, err := provider.Parse(context.Background(), ParseRequest{ Source: SourceRef{ - Provider: AgentGemini, + Provider: legacyAgent, Key: "source", DisplayPath: "/tmp/source.jsonl", FingerprintKey: "/tmp/source.jsonl", @@ -257,7 +262,7 @@ func TestLegacyProviderParseReturnsUnsupported(t *testing.T) { assert.True(t, errors.Is(err, ErrUnsupportedProviderFeature)) var unsupported UnsupportedProviderFeatureError require.ErrorAs(t, err, &unsupported) - assert.Equal(t, AgentGemini, unsupported.Provider) + assert.Equal(t, legacyAgent, unsupported.Provider) assert.Equal(t, ProviderFeatureParse, unsupported.Feature) } @@ -349,6 +354,18 @@ func (p *testProvider) Parse(context.Context, ParseRequest) (ParseOutcome, error return ParseOutcome{}, nil } +func legacyProviderTestAgent(t *testing.T) AgentType { + t.Helper() + for _, def := range Registry { + factory := providerFactoryForDef(def) + if _, ok := factory.(legacyProviderFactory); ok { + return def.Type + } + } + t.Fatal("expected at least one legacy provider for fallback tests") + return "" +} + func assertAgentDefMetadataEqual(t *testing.T, want, got AgentDef) { t.Helper() diff --git a/internal/parser/types.go b/internal/parser/types.go index 732b6b3d7..7f4d4c750 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -128,28 +128,24 @@ var Registry = []AgentDef{ ShallowWatchRootsFunc: ResolveCodexShallowWatchRoots, }, { - Type: AgentCopilot, - DisplayName: "Copilot", - EnvVar: "COPILOT_DIR", - ConfigKey: "copilot_dirs", - DefaultDirs: []string{".copilot"}, - IDPrefix: "copilot:", - WatchSubdirs: []string{"session-state"}, - FileBased: true, - DiscoverFunc: DiscoverCopilotSessions, - FindSourceFunc: FindCopilotSourceFile, + Type: AgentCopilot, + DisplayName: "Copilot", + EnvVar: "COPILOT_DIR", + ConfigKey: "copilot_dirs", + DefaultDirs: []string{".copilot"}, + IDPrefix: "copilot:", + WatchSubdirs: []string{"session-state"}, + FileBased: true, }, { - Type: AgentGemini, - DisplayName: "Gemini", - EnvVar: "GEMINI_DIR", - ConfigKey: "gemini_dirs", - DefaultDirs: []string{".gemini"}, - IDPrefix: "gemini:", - WatchSubdirs: []string{"tmp"}, - FileBased: true, - DiscoverFunc: DiscoverGeminiSessions, - FindSourceFunc: FindGeminiSourceFile, + Type: AgentGemini, + DisplayName: "Gemini", + EnvVar: "GEMINI_DIR", + ConfigKey: "gemini_dirs", + DefaultDirs: []string{".gemini"}, + IDPrefix: "gemini:", + WatchSubdirs: []string{"tmp"}, + FileBased: true, }, { Type: AgentMiMoCode, diff --git a/internal/sync/classify_antigravity_cli_test.go b/internal/sync/classify_antigravity_cli_test.go index f99a7d1ea..be05aba3e 100644 --- a/internal/sync/classify_antigravity_cli_test.go +++ b/internal/sync/classify_antigravity_cli_test.go @@ -67,7 +67,6 @@ func TestClassifyOnePath_AntigravityCLI(t *testing.T) { parser.AgentAntigravityCLI: {dir}, }, } - geminiMap := make(map[string]map[string]string) tests := []struct { name string @@ -131,7 +130,7 @@ func TestClassifyOnePath_AntigravityCLI(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, ok := eng.classifyOnePath(tt.path, geminiMap) + got, ok := eng.classifyOnePath(tt.path) assert.Equal(t, tt.want, ok) if ok { assert.Equal(t, parser.AgentAntigravityCLI, got.Agent) @@ -146,7 +145,7 @@ func TestClassifyOnePath_AntigravityCLI(t *testing.T) { orphanTraj := filepath.Join(convDir, orphanUUID+".trajectory.json") require.NoError(t, os.WriteFile(orphanTraj, []byte("orphan"), 0o644)) - _, ok := eng.classifyOnePath(orphanTraj, geminiMap) + _, ok := eng.classifyOnePath(orphanTraj) assert.False(t, ok, "should not classify sidecar when pb file does not exist") }) diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 023053f6a..3cb7596bd 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -508,7 +508,6 @@ func (e *Engine) SyncPaths(paths []string) { func (e *Engine) classifyPaths( paths []string, ) []parser.DiscoveredFile { - geminiProjectsByDir := make(map[string]map[string]string) seen := make(map[string]int, len(paths)) files := make([]parser.DiscoveredFile, 0, len(paths)) for _, p := range paths { @@ -520,9 +519,7 @@ func (e *Engine) classifyPaths( dfs = e.classifyCodexIndexPath(p) } if len(dfs) == 0 { - if df, ok := e.classifyOnePath( - p, geminiProjectsByDir, - ); ok { + if df, ok := e.classifyOnePath(p); ok { dfs = []parser.DiscoveredFile{df} } } @@ -950,7 +947,6 @@ func (e *Engine) classifyContainerPath( func (e *Engine) classifyOnePath( path string, - geminiProjectsByDir map[string]map[string]string, ) (parser.DiscoveredFile, bool) { sep := string(filepath.Separator) pathExists := true @@ -985,97 +981,6 @@ func (e *Engine) classifyOnePath( // shapes, so the legacy block was removed when Claude was folded // onto its provider. - // Copilot: /session-state/.jsonl - // or: /session-state//events.jsonl - for _, copilotDir := range e.agentDirs[parser.AgentCopilot] { - if copilotDir == "" { - continue - } - stateDir := filepath.Join( - copilotDir, "session-state", - ) - if rel, ok := isUnder(stateDir, path); ok { - parts := strings.Split(rel, sep) - switch len(parts) { - case 1: - stem, ok := strings.CutSuffix( - parts[0], ".jsonl", - ) - if !ok { - continue - } - dirEvents := filepath.Join( - stateDir, stem, "events.jsonl", - ) - if _, err := os.Stat(dirEvents); err == nil { - continue - } - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentCopilot, - }, true - case 2: - if parts[1] == "events.jsonl" { - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentCopilot, - }, true - } - // workspace.yaml changes should trigger a re-parse - // of the sibling events.jsonl. - if parts[1] == "workspace.yaml" { - eventsPath := filepath.Join( - stateDir, parts[0], "events.jsonl", - ) - if _, err := os.Stat(eventsPath); err == nil { - return parser.DiscoveredFile{ - Path: eventsPath, - Agent: parser.AgentCopilot, - }, true - } - } - continue - default: - continue - } - } - } - - // Gemini: /tmp//chats/session-*.json(.l) - // is either a SHA-256 hash (old) or project name (new). - for _, geminiDir := range e.agentDirs[parser.AgentGemini] { - if geminiDir == "" { - continue - } - if rel, ok := isUnder(geminiDir, path); ok { - parts := strings.Split(rel, sep) - if len(parts) != 4 || - parts[0] != "tmp" || - parts[2] != "chats" { - continue - } - name := parts[3] - if !strings.HasPrefix(name, "session-") || - (!strings.HasSuffix(name, ".json") && - !strings.HasSuffix(name, ".jsonl")) { - continue - } - dirName := parts[1] - if _, ok := geminiProjectsByDir[geminiDir]; !ok { - geminiProjectsByDir[geminiDir] = - parser.BuildGeminiProjectMap(geminiDir) - } - project := parser.ResolveGeminiProject( - dirName, geminiProjectsByDir[geminiDir], - ) - return parser.DiscoveredFile{ - Path: path, - Project: project, - Agent: parser.AgentGemini, - }, true - } - } - // VSCode Copilot: /workspaceStorage//chatSessions/.{json,jsonl} // or: /globalStorage/emptyWindowChatSessions/.{json,jsonl} for _, vscDir := range e.agentDirs[parser.AgentVSCodeCopilot] { @@ -3882,12 +3787,8 @@ func (e *Engine) processFile( var res processResult switch file.Agent { - case parser.AgentCopilot: - res = e.processCopilot(file, info) case parser.AgentReasonix: res = e.processReasonix(file, info) - case parser.AgentGemini: - res = e.processGemini(file, info) case parser.AgentVSCodeCopilot: res = e.processVSCodeCopilot(file, info) case parser.AgentVSCopilot: @@ -4012,7 +3913,7 @@ func (e *Engine) processProviderFile( mtime: mtime, }, true } - if freshMtime, fresh := e.providerCoworkSourceFresh(source, file); fresh { + if freshMtime, fresh := e.providerSourceFreshBeforeFingerprint(source, file); fresh { return processResult{ skip: true, mtime: freshMtime, @@ -4706,11 +4607,11 @@ func (e *Engine) providerSingleSessionFresh( !parser.NeedsProjectReparse(sess.Project) } -func (e *Engine) providerCoworkSourceFresh( +func (e *Engine) providerSourceFreshBeforeFingerprint( source parser.SourceRef, file parser.DiscoveredFile, ) (int64, bool) { - if e.forceParse || file.ForceParse || file.Agent != parser.AgentCowork { + if e.forceParse || file.ForceParse { return 0, false } path := providerDiscoveredPath(source) @@ -4728,15 +4629,31 @@ func (e *Engine) providerCoworkSourceFresh( return 0, false } } - mtime := parser.CoworkSessionMtime(path, info.ModTime().UnixNano()) - effectiveInfo := fakeSnapshotInfo{ - fSize: info.Size(), - fMtime: mtime, - } - if !e.shouldSkipByPath(path, effectiveInfo) { - return 0, false + switch file.Agent { + case parser.AgentCowork: + mtime := parser.CoworkSessionMtime(path, info.ModTime().UnixNano()) + effectiveInfo := fakeSnapshotInfo{ + fSize: info.Size(), + fMtime: mtime, + } + if e.shouldSkipByPath(path, effectiveInfo) { + return mtime, true + } + case parser.AgentGemini: + if e.shouldSkipByPath(path, info) { + return info.ModTime().UnixNano(), true + } + case parser.AgentCopilot: + mtime := copilotEffectiveMtime(path, info) + effectiveInfo := fakeSnapshotInfo{ + fSize: info.Size(), + fMtime: mtime, + } + if e.shouldSkipByPath(path, effectiveInfo) { + return mtime, true + } } - return mtime, true + return 0, false } // stampProviderFileIdentity copies the source file's inode and device onto @@ -5363,44 +5280,6 @@ func pickPreferredCodexDiscoveredFile( return chosen } -func (e *Engine) processCopilot( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Use effective mtime = max(events.jsonl, workspace.yaml) so - // that a new or updated workspace.yaml triggers a re-parse and - // the stored mtime stays consistent with what we compare against - // on subsequent syncs (preventing oscillation). - effectiveMtime := copilotEffectiveMtime(file.Path, info) - if e.shouldSkipCopilot(file.Path, info, effectiveMtime) { - return processResult{skip: true} - } - - sess, msgs, usageEvents, err := parser.ParseCopilotSession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - if effectiveMtime > sess.File.Mtime { - sess.File.Mtime = effectiveMtime - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs, UsageEvents: usageEvents}, - }, - } -} - // copilotEffectiveMtime returns max(events.jsonl mtime, // workspace.yaml mtime). For flat .jsonl sessions (no // workspace.yaml sibling) it returns the events.jsonl mtime. @@ -5553,64 +5432,6 @@ func reasonixEffectiveInfo(path string, info os.FileInfo) os.FileInfo { return fakeSnapshotInfo{fSize: size, fMtime: mtime} } -// shouldSkipCopilot is like shouldSkipByPath but uses the -// pre-computed effectiveMtime (max of events.jsonl and -// workspace.yaml) for the mtime comparison, keeping the stored -// value consistent with what we compare against on next sync. -func (e *Engine) shouldSkipCopilot( - path string, info os.FileInfo, effectiveMtime int64, -) bool { - if e.forceParse { // parse-diff: always re-parse - return false - } - lookupPath := path - if e.pathRewriter != nil { - lookupPath = e.pathRewriter(path) - } - storedSize, storedMtime, ok := e.db.GetFileInfoByPath(lookupPath) - if !ok { - return false - } - if storedSize != info.Size() || storedMtime != effectiveMtime { - return false - } - if e.db.GetDataVersionByPath(lookupPath) < - db.CurrentDataVersion() { - return false - } - return true -} - -func (e *Engine) processGemini( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Fast path: skip by file_path + mtime before parsing. - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseGeminiSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - func (e *Engine) processVSCodeCopilot( file parser.DiscoveredFile, info os.FileInfo, ) processResult { diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index ff496138a..3cf7b8ab8 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -2117,6 +2117,97 @@ func TestSyncPathsGeminiJSONL(t *testing.T) { assertSessionMessageCount(t, env.db, "gemini:"+sessionID, 2) } +func TestSyncPathsGeminiProjectMetadataEventRefreshesProject(t *testing.T) { + env := setupTestEnv(t) + + sessionID := "gem-project-refresh" + projectsPath := filepath.Join(env.geminiDir, "projects.json") + writeProject := func(name string) { + t.Helper() + require.NoError(t, os.WriteFile( + projectsPath, + fmt.Appendf(nil, + `{"projects":{"/Users/alice/code/%s":"alias"}}`, + name, + ), + 0o644, + ), "write projects") + } + writeProject("one") + path := env.writeGeminiSession( + t, + filepath.Join( + "tmp", "alias", "chats", + "session-001.json", + ), + testjsonl.GeminiSessionJSON( + sessionID, "alias", tsEarly, tsEarlyS5, + []map[string]any{ + testjsonl.GeminiUserMsg( + "m1", tsEarly, "Hello Gemini", + ), + testjsonl.GeminiAssistantMsg( + "m2", tsEarlyS5, "Hi there!", nil, + ), + }, + ), + ) + + env.engine.SyncPaths([]string{path}) + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "one", sess.Project) + }, + ) + + info, err := os.Stat(path) + require.NoError(t, err, "stat gemini session") + env.engine.InjectSkipCache(map[string]int64{ + path: info.ModTime().UnixNano(), + }) + + writeProject("two") + env.engine.SyncPaths([]string{projectsPath}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "two", sess.Project) + }, + ) + + writeProject("three") + env.engine.SyncPaths([]string{path, projectsPath}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "three", sess.Project) + }, + ) + + writeProject("four") + env.engine.SyncPaths([]string{projectsPath, path}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "four", sess.Project) + }, + ) + + require.NoError(t, os.Remove(projectsPath), "remove projects") + env.engine.SyncPaths([]string{projectsPath}) + + assertSessionState( + t, env.db, "gemini:"+sessionID, + func(sess *db.Session) { + assert.Equal(t, "alias", sess.Project) + }, + ) +} + func TestSyncPathsCodexAcceptsFlatArchived(t *testing.T) { env := setupTestEnv(t) @@ -5788,18 +5879,15 @@ func TestResyncAllReplacesMessageContent(t *testing.T) { }) require.NoError(t, err, "update message content") - // Normal SyncAll should skip (file unchanged on disk). - stats := env.engine.SyncAll(context.Background(), nil) - require.Equal(t, 1, stats.Skipped, "expected 1 skip, got %d", stats.Skipped) - msgs = fetchMessages(t, env.db, fullID) - require.True(t, strings.Contains(msgs[1].Content, "stale content"), "SyncAll should not have replaced content") - // Capture FTS state before resync so a regression that // breaks FTS isn't masked by HasFTS() returning false // post-resync. hadFTS := env.db.HasFTS() - // ResyncAll should re-parse and replace message content. + // ResyncAll should re-parse and replace message content. Gemini is + // provider-authoritative, so it has no DB-backed mtime skip; a plain + // SyncAll would also re-parse the unchanged file. ResyncAll additionally + // drops and rebuilds the FTS index, which is what this test guards. env.engine.ResyncAll(context.Background(), nil) msgs = fetchMessages(t, env.db, fullID) require.Equal(t, 2, len(msgs), "got %d messages after resync, want 2", len(msgs)) diff --git a/internal/sync/engine_test.go b/internal/sync/engine_test.go index 181672c71..c357bbe96 100644 --- a/internal/sync/engine_test.go +++ b/internal/sync/engine_test.go @@ -1684,6 +1684,7 @@ func TestProcessCodexAppendedStaleProjectDoesFullReparse(t *testing.T) { ) + "\n") require.NoError(t, err, "append codex fixture") require.NoError(t, f.Close(), "close codex fixture") + e := &Engine{ db: database, idPrefix: "host~", @@ -1778,6 +1779,7 @@ func TestProcessCodexAppendedStaleProjectCarriesForceReplace(t *testing.T) { ) + "\n") require.NoError(t, err, "append codex fixture") require.NoError(t, f.Close(), "close codex fixture") + e := &Engine{ db: database, idPrefix: "host~", @@ -3523,7 +3525,7 @@ func TestEngine_ClassifyOnePathReasonixProjectBareMeta(t *testing.T) { dbtest.WriteTestFile(t, sessionPath, []byte(`{"role":"user","content":"hi"}`)) dbtest.WriteTestFile(t, metaPath, []byte(`{"model":"claude"}`)) - got, ok := engine.classifyOnePath(metaPath, nil) + got, ok := engine.classifyOnePath(metaPath) require.True(t, ok, "expected Reasonix sidecar to classify") assert.Equal(t, sessionPath, got.Path) assert.Equal(t, "proj", got.Project) @@ -3546,7 +3548,7 @@ func TestEngine_ClassifyOnePathReasonixDeletedMeta(t *testing.T) { metaPath := sessionPath + ".meta" dbtest.WriteTestFile(t, sessionPath, []byte(`{"role":"user","content":"hi"}`)) - got, ok := engine.classifyOnePath(metaPath, nil) + got, ok := engine.classifyOnePath(metaPath) require.True(t, ok, "expected deleted Reasonix sidecar to classify") assert.Equal(t, sessionPath, got.Path) assert.Equal(t, "proj", got.Project) @@ -3567,7 +3569,7 @@ func TestEngine_ClassifyOnePathReasonixDeletedTranscriptIgnored(t *testing.T) { reasonixDir, "projects", "proj", "sessions", "session-123.jsonl", ) - _, ok := engine.classifyOnePath(sessionPath, nil) + _, ok := engine.classifyOnePath(sessionPath) assert.False(t, ok, "expected deleted Reasonix transcript to be ignored") } @@ -4222,3 +4224,55 @@ func TestWriteIncrementalKeepsPlausibleEndedAt(t *testing.T) { "plausible appended ended_at must update the column: got %q want %s", *after.EndedAt, newEnd.Format(time.RFC3339Nano)) } + +func TestEngine_ClassifyPathsProviderRemoveSkipsMissingGeminiSource( + t *testing.T, +) { + db := openTestDB(t) + geminiDir := t.TempDir() + engine := NewEngine(db, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentGemini: {geminiDir}, + }, + Machine: "local", + }) + + sessionPath := filepath.Join( + geminiDir, "tmp", "alias", "chats", "session-001.json", + ) + dbtest.WriteTestFile(t, sessionPath, []byte("{}")) + require.NoError(t, os.Remove(sessionPath), "Remove(%q)", sessionPath) + + files := engine.classifyPaths([]string{sessionPath}) + assert.Empty(t, files) +} + +func TestEngine_ClassifyPathsProviderSidecarKeepsExistingGeminiSources( + t *testing.T, +) { + db := openTestDB(t) + geminiDir := t.TempDir() + engine := NewEngine(db, EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentGemini: {geminiDir}, + }, + Machine: "local", + }) + + projectsPath := filepath.Join(geminiDir, "projects.json") + dbtest.WriteTestFile( + t, + projectsPath, + []byte(`{"projects":{"/Users/alice/code/sample":"alias"}}`), + ) + sessionPath := filepath.Join( + geminiDir, "tmp", "alias", "chats", "session-001.json", + ) + dbtest.WriteTestFile(t, sessionPath, []byte("{}")) + + files := engine.classifyPaths([]string{projectsPath}) + require.Len(t, files, 1) + assert.Equal(t, sessionPath, files[0].Path) + assert.Equal(t, parser.AgentGemini, files[0].Agent) + assert.True(t, files[0].ForceParse) +} diff --git a/internal/sync/provider_shadow_caller_test.go b/internal/sync/provider_shadow_caller_test.go index 024e27f13..ac1d7070f 100644 --- a/internal/sync/provider_shadow_caller_test.go +++ b/internal/sync/provider_shadow_caller_test.go @@ -649,6 +649,55 @@ func TestProcessFileProviderAuthoritativeSkipsFreshCoworkBeforeFingerprint(t *te assert.Empty(t, provider.calls) } +func TestProcessFileProviderAuthoritativeSkipsFreshGeminiBeforeFingerprint(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join( + root, "tmp", "alias", "chats", "session-001.json", + ) + sourceMtime := writeFreshProviderDBSession( + t, sourcePath, nil, + ) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentGemini, + DisplayName: "Gemini CLI", + }, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentGemini, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + }, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentGemini: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentGemini: parser.ProviderMigrationProviderAuthoritative, + }, + }) + requireFreshProviderSession(t, engine.db, parser.AgentGemini, sourcePath, sourceMtime) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentGemini, + }) + + require.NoError(t, result.err) + assert.True(t, result.skip) + assert.Equal(t, sourceMtime, result.mtime) + assert.Empty(t, provider.calls) +} + func TestProcessFileProviderAuthoritativeForceParseBypassesFreshCoworkSkip(t *testing.T) { root := t.TempDir() database := dbtest.OpenTestDB(t) @@ -703,6 +752,56 @@ func TestProcessFileProviderAuthoritativeForceParseBypassesFreshCoworkSkip(t *te assert.True(t, provider.parseRequest.ForceParse) } +func TestProcessFileProviderAuthoritativeSkipsFreshCopilotBeforeFingerprint(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join( + root, "session-state", "copilot-fresh", "events.jsonl", + ) + workspacePath := filepath.Join(filepath.Dir(sourcePath), "workspace.yaml") + sourceMtime := writeFreshProviderDBSession( + t, sourcePath, &workspacePath, + ) + provider := &shadowCallerProvider{ + shadowTestProvider: shadowTestProvider{ + ProviderBase: parser.ProviderBase{ + Def: parser.AgentDef{ + Type: parser.AgentCopilot, + DisplayName: "Copilot CLI", + }, + }, + }, + source: parser.SourceRef{ + Provider: parser.AgentCopilot, + Key: sourcePath, + DisplayPath: sourcePath, + FingerprintKey: sourcePath, + }, + } + engine := NewEngine(dbtest.OpenTestDB(t), EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCopilot: {root}, + }, + Machine: "devbox", + ProviderFactories: []parser.ProviderFactory{ + shadowCallerFactory{provider: provider}, + }, + ProviderMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCopilot: parser.ProviderMigrationProviderAuthoritative, + }, + }) + requireFreshProviderSession(t, engine.db, parser.AgentCopilot, sourcePath, sourceMtime) + + result := engine.processFile(context.Background(), parser.DiscoveredFile{ + Path: sourcePath, + Agent: parser.AgentCopilot, + }) + + require.NoError(t, result.err) + assert.True(t, result.skip) + assert.Equal(t, sourceMtime, result.mtime) + assert.Empty(t, provider.calls) +} + func TestProcessFileProviderAuthoritativeKeepsRetryStatePerResult(t *testing.T) { root := t.TempDir() sourcePath := filepath.Join(root, "multi-provider-owned.jsonl") @@ -1293,3 +1392,52 @@ func writeFreshCoworkProviderSource( return sourcePath, sourceMtime } + +func writeFreshProviderDBSession( + t *testing.T, + sourcePath string, + mtimeSidecar *string, +) int64 { + t.Helper() + + require.NoError(t, os.MkdirAll(filepath.Dir(sourcePath), 0o755)) + require.NoError(t, os.WriteFile(sourcePath, []byte("{}\n"), 0o644)) + sourceTime := time.Unix(1_781_475_210, 0) + require.NoError(t, os.Chtimes(sourcePath, sourceTime, sourceTime)) + mtime := sourceTime.UnixNano() + if mtimeSidecar != nil { + sidecarTime := sourceTime.Add(time.Second) + require.NoError(t, os.WriteFile(*mtimeSidecar, []byte("name: Fresh\n"), 0o644)) + require.NoError(t, os.Chtimes(*mtimeSidecar, sidecarTime, sidecarTime)) + mtime = sidecarTime.UnixNano() + } + + return mtime +} + +func requireFreshProviderSession( + t *testing.T, + database *db.DB, + agent parser.AgentType, + sourcePath string, + sourceMtime int64, +) { + t.Helper() + + info, err := os.Stat(sourcePath) + require.NoError(t, err) + sourceSize := info.Size() + fullSessionID := string(agent) + ":fresh" + require.NoError(t, database.UpsertSession(db.Session{ + ID: fullSessionID, + Project: "provider-project", + Machine: "devbox", + Agent: string(agent), + FilePath: &sourcePath, + FileSize: &sourceSize, + FileMtime: &sourceMtime, + })) + require.NoError(t, database.SetSessionDataVersion( + fullSessionID, db.CurrentDataVersion(), + )) +}