From da18de0e9f5123d79b709eb44ed3489d4a35e634 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Thu, 25 Jun 2026 12:06:10 -0400 Subject: [PATCH 01/24] feat(parser): add reusable source-set bases and functional options Introduce the SourceSet bases (JSONL, directory JSONL, single-file, multi-session container, sibling-metadata, SQLite fan-out), the functional with*() option set, the generic SourceSet provider/factory plumbing, and the virtual-path and source-identity helpers up front, so every provider migration constructs its source set through options instead of a struct literal. --- internal/parser/directory_jsonl_source_set.go | 59 ++ internal/parser/file_identity_unix.go | 15 + internal/parser/file_identity_windows.go | 9 + internal/parser/jsonl_source_set.go | 793 ++++++++++++++++++ internal/parser/jsonl_source_set_options.go | 139 +++ internal/parser/multi_session_container.go | 447 ++++++++++ internal/parser/provider.go | 1 + internal/parser/provider_lookup.go | 37 + .../parser/sibling_metadata_source_set.go | 206 +++++ internal/parser/single_file_source_set.go | 355 ++++++++ internal/parser/source_set.go | 126 +++ internal/parser/sqlite_fanout_source_set.go | 353 ++++++++ internal/parser/virtual_source_path.go | 38 + 13 files changed, 2578 insertions(+) create mode 100644 internal/parser/directory_jsonl_source_set.go create mode 100644 internal/parser/file_identity_unix.go create mode 100644 internal/parser/file_identity_windows.go create mode 100644 internal/parser/jsonl_source_set.go create mode 100644 internal/parser/jsonl_source_set_options.go create mode 100644 internal/parser/multi_session_container.go create mode 100644 internal/parser/provider_lookup.go create mode 100644 internal/parser/sibling_metadata_source_set.go create mode 100644 internal/parser/single_file_source_set.go create mode 100644 internal/parser/source_set.go create mode 100644 internal/parser/sqlite_fanout_source_set.go create mode 100644 internal/parser/virtual_source_path.go diff --git a/internal/parser/directory_jsonl_source_set.go b/internal/parser/directory_jsonl_source_set.go new file mode 100644 index 000000000..6a6aa9877 --- /dev/null +++ b/internal/parser/directory_jsonl_source_set.go @@ -0,0 +1,59 @@ +package parser + +import ( + "path/filepath" + "strings" +) + +// DirectoryJSONLSourceSet constrains JSONL sources to the common +// //. shape while keeping JSONLSourceSet's source +// methods available through embedding. +type DirectoryJSONLSourceSet struct { + JSONLSourceSet +} + +// newDirectoryJSONLSourceSet returns a JSONL source helper for providers whose +// transcripts live one project directory below each configured root. The +// returned helper is always recursive enough to classify watched project files, +// but it rejects root-level and deeper nested files through IncludePath. +func newDirectoryJSONLSourceSet( + provider AgentType, + roots []string, + opts ...jsonlOption, +) DirectoryJSONLSourceSet { + var options JSONLSourceSetOptions + for _, opt := range opts { + opt(&options) + } + userIncludePath := options.IncludePath + options.Recursive = true + options.IncludePath = func(root, path string) bool { + if !isDirectoryJSONLPath(root, path) { + return false + } + return userIncludePath == nil || userIncludePath(root, path) + } + if options.ProjectHint == nil { + options.ProjectHint = func(root, path string) string { + return directoryJSONLProjectFromPath(path) + } + } + return DirectoryJSONLSourceSet{ + JSONLSourceSet: jsonlSourceSetFromOptions(provider, roots, options), + } +} + +func isDirectoryJSONLPath(root, path string) bool { + rel, err := filepath.Rel(root, path) + if err != nil { + return false + } + parts := strings.Split(rel, string(filepath.Separator)) + return len(parts) == 2 && + parts[0] != "" && parts[0] != "." && parts[0] != ".." && + parts[1] != "" && parts[1] != "." && parts[1] != ".." +} + +func directoryJSONLProjectFromPath(path string) string { + return filepath.Base(filepath.Dir(path)) +} diff --git a/internal/parser/file_identity_unix.go b/internal/parser/file_identity_unix.go new file mode 100644 index 000000000..1af447416 --- /dev/null +++ b/internal/parser/file_identity_unix.go @@ -0,0 +1,15 @@ +//go:build unix + +package parser + +import ( + "os" + "syscall" +) + +func sourceFileIdentity(info os.FileInfo) (inode, device uint64) { + if stat, ok := info.Sys().(*syscall.Stat_t); ok { + return uint64(stat.Ino), uint64(stat.Dev) + } + return 0, 0 +} diff --git a/internal/parser/file_identity_windows.go b/internal/parser/file_identity_windows.go new file mode 100644 index 000000000..a0f0ff983 --- /dev/null +++ b/internal/parser/file_identity_windows.go @@ -0,0 +1,9 @@ +//go:build windows + +package parser + +import "os" + +func sourceFileIdentity(info os.FileInfo) (inode, device uint64) { + return 0, 0 +} diff --git a/internal/parser/jsonl_source_set.go b/internal/parser/jsonl_source_set.go new file mode 100644 index 000000000..c46a84248 --- /dev/null +++ b/internal/parser/jsonl_source_set.go @@ -0,0 +1,793 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "io" + "os" + "path/filepath" + "slices" + "sort" + "strings" +) + +// JSONLSource is the in-memory payload JSONLSourceSet stores in SourceRef. +type JSONLSource struct { + Root string + Path string + RelPath string +} + +// JSONLSourceSetOptions configures the reusable JSONL source helper. +type JSONLSourceSetOptions struct { + // Recursive enables traversal and changed-path classification below each + // configured root. When false, only direct child files are sources. + Recursive bool + // Extensions defaults to .jsonl. Matching is case-sensitive to mirror + // legacy parser discovery. + Extensions []string + // Hash includes a full content hash in SourceFingerprint. Providers should + // leave this false unless size/mtime freshness is insufficient. + Hash bool + // FollowSymlinkDirs treats symlinks to directories as directories while + // discovering recursive roots. Providers should enable it only when legacy + // discovery followed symlinked session directories; targets may be outside + // the configured root, so provider IncludePath filters should constrain the + // accepted source shape when that matters. + FollowSymlinkDirs bool + // FollowSymlinkFiles treats symlinks to regular files as sources. Providers + // should enable it when legacy discovery accepted matching symlinked files + // and the parser reads through the symlink target. + FollowSymlinkFiles bool + // DescendPath is a directory predicate for recursive discovery. It is also + // applied to source ancestors during direct source classification so + // changed-path events cannot accept paths discovery would have pruned. + DescendPath func(root, path string) bool + // IncludePath is a path-only source predicate. It runs before Include and is + // also used for deleted/renamed changed paths where os.FileInfo is + // unavailable. + IncludePath func(root, path string) bool + // Include is a source predicate for existing files. It is not called for + // deleted/renamed changed paths. + Include func(path string, info os.FileInfo) bool + // Key must be stable across process restarts and unique within a provider + // when every physical source should be parsed. If duplicates exist, + // discovery keeps the first configured root/traversal result. + Key func(root, path string) string + // DisplayPath is human-readable. When FingerprintKey is not set, it also + // becomes the persisted freshness key. + DisplayPath func(root, path string) string + // FingerprintKey is the persisted lookup and freshness identity. Override it + // when DisplayPath is not the stable value that should survive a provider + // migration. + FingerprintKey func(root, path string) string + // ProjectHint is display metadata only. + ProjectHint func(root, path string) string + // SessionIDFromPath returns the raw session ID used by FindSource fallback + // lookups. It should not include the provider ID prefix. + SessionIDFromPath func(root, path string) string + // LookupIDValid reports whether a raw session ID is shaped like an ID this + // provider could resolve, gating the FindSource discovery fallback. It + // defaults to IsValidSessionID. Providers whose SessionIDFromPath emits + // composite IDs (for example subagent IDs containing separators that + // IsValidSessionID rejects) supply their own validator so those lookups are + // not dropped before the comparison loop. + LookupIDValid func(rawID string) bool + // RawSessionIDForLookup normalizes a raw session ID before the FindSource + // discovery comparison. Providers whose stored IDs carry a suffix the + // discovered filename stem lacks (for example iFlow subagent IDs) reduce it + // to the base ID here so the comparison still matches. It runs after + // providerFindRequestWithRawSessionID and before the LookupIDValid gate. + RawSessionIDForLookup func(rawID string) string + // RawSessionIDSourceFiles reconstructs candidate file paths from a raw + // session ID for providers whose IDs encode the on-disk layout rather than + // being a discoverable filename stem. FindSource resolves each candidate + // through the same path->SourceRef machinery as a stored path and returns + // the first that exists, before falling through to the discovery scan. The + // closure iterates the provided roots itself and applies its own ID + // validation. + RawSessionIDSourceFiles func(roots []string, rawID string) []string + // StoredPathFallbackRoot resolves the configured root for a stored source + // path that is not under any current root, returning false to decline. It + // lets a provider honor a DB-recorded file_path whose root was removed or + // was a custom location by reconstructing the implicit root so the path + // still resolves to a SourceRef. FindSource consults it after the in-root + // path lookup misses. + StoredPathFallbackRoot func(storedPath string) (string, bool) + // ParseFile parses one discovered source file into zero or more sessions + // plus the IDs of any sessions to exclude. Empty results with no exclusions + // is a clean no-session. It is what makes JSONLSourceSet a full SourceSet + // (its Parse method); leave it nil for discovery-only embedders that supply + // their own Parse. ctx and req.Machine are supplied by sourceSetProvider. + ParseFile jsonlParseFileFunc + // ForceReplace marks every non-empty parse outcome from ParseFile as a full + // replacement of the source's existing sessions, for providers whose + // transcripts are rewritten wholesale rather than appended. + ForceReplace bool +} + +// JSONLSourceSet discovers, watches, locates, and fingerprints JSONL-like +// transcript files. With a ParseFile option it is also a full SourceSet; +// without one it is a discovery helper that providers compose as a named field +// and forward the methods they support. Missing or unreadable roots and +// subdirectories are treated as empty, matching legacy discovery's lenient +// local-filesystem behavior. +type JSONLSourceSet struct { + provider AgentType + roots []string + options JSONLSourceSetOptions + extensions []string +} + +// newJSONLSourceSet builds a JSONL source set for a provider's roots from +// functional options. Every option has a zero-value default, so callers state +// only what differs. +func newJSONLSourceSet( + provider AgentType, + roots []string, + opts ...jsonlOption, +) JSONLSourceSet { + var options JSONLSourceSetOptions + for _, opt := range opts { + opt(&options) + } + return jsonlSourceSetFromOptions(provider, roots, options) +} + +// jsonlSourceSetFromOptions is the shared constructor used by newJSONLSourceSet +// and newDirectoryJSONLSourceSet once options have been resolved. +func jsonlSourceSetFromOptions( + provider AgentType, + roots []string, + options JSONLSourceSetOptions, +) JSONLSourceSet { + return JSONLSourceSet{ + provider: provider, + roots: cleanJSONLRoots(roots), + options: options, + extensions: normalizeJSONLExtensions(options.Extensions), + } +} + +// Discover returns stable, deduped source references for configured roots. +func (s JSONLSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + info, err := os.Stat(root) + if err != nil || !info.IsDir() { + continue + } + if err := s.discoverDir(ctx, root, root, &sources, seen); err != nil { + return nil, err + } + } + sortJSONLSources(sources) + return sources, nil +} + +// WatchPlan returns one watch root for each configured JSONL root. +func (s JSONLSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + globs := s.includeGlobs() + for _, root := range s.roots { + roots = append(roots, WatchRoot{ + Path: root, + Recursive: s.options.Recursive, + IncludeGlobs: append([]string(nil), globs...), + DebounceKey: string(s.provider) + ":jsonl:" + root, + }) + } + return WatchPlan{Roots: roots}, nil +} + +// SourcesForChangedPath maps a filesystem event path back to JSONL sources. +func (s JSONLSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + source, ok, err := s.sourceForPath(ctx, req.Path) + if err != nil { + return nil, err + } + if !ok { + if !jsonlMissingPathFallbackAllowed(req) { + return nil, nil + } + source, ok, err = s.sourceForMissingPath(ctx, req.Path) + if err != nil { + return nil, err + } + if !ok { + return nil, nil + } + } + if req.WatchRoot != "" { + root := filepath.Clean(req.WatchRoot) + src := source.Opaque.(JSONLSource) + if !samePath(root, src.Root) { + return nil, nil + } + } + return []SourceRef{source}, nil +} + +// FindSource resolves persisted source hints or a raw filename-stem session ID. +func (s JSONLSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, stored := range []string{req.StoredFilePath, req.FingerprintKey} { + if stored == "" { + continue + } + source, ok, err := s.sourceForPath(ctx, stored) + if err != nil { + return SourceRef{}, false, err + } + if ok { + return source, true, nil + } + if s.options.StoredPathFallbackRoot != nil { + if root, ok := s.options.StoredPathFallbackRoot(stored); ok { + if source, ok := s.sourceRefFromPath( + root, filepath.Clean(stored), + ); ok { + return source, true, nil + } + } + } + } + if s.options.RawSessionIDForLookup != nil && req.RawSessionID != "" { + req.RawSessionID = s.options.RawSessionIDForLookup(req.RawSessionID) + } + if req.RawSessionID != "" && s.options.RawSessionIDSourceFiles != nil { + for _, candidate := range s.options.RawSessionIDSourceFiles( + s.roots, req.RawSessionID, + ) { + source, ok, err := s.sourceForPath(ctx, candidate) + if err != nil { + return SourceRef{}, false, err + } + if ok { + return source, true, nil + } + } + } + validRawID := req.RawSessionID != "" && s.lookupIDValid(req.RawSessionID) + if req.FingerprintKey == "" && !validRawID { + return SourceRef{}, false, nil + } + sources, err := s.Discover(ctx) + if err != nil { + return SourceRef{}, false, err + } + for _, source := range sources { + if req.FingerprintKey != "" && source.FingerprintKey == req.FingerprintKey { + return source, true, nil + } + if !validRawID { + continue + } + src := source.Opaque.(JSONLSource) + if s.sessionID(src.Root, src.Path) == req.RawSessionID { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +// Fingerprint returns the filesystem freshness identity for a JSONL source. +func (s JSONLSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok, err := s.pathFromSource(ctx, source) + if err != nil { + return SourceFingerprint{}, err + } + if !ok { + return SourceFingerprint{}, fmt.Errorf("jsonl source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + inode, device := sourceFileIdentity(info) + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString( + source.FingerprintKey, + source.Key, + path, + ), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + Inode: inode, + Device: device, + } + if s.options.Hash { + hash, err := hashJSONLSourceFile(path) + if err != nil { + return SourceFingerprint{}, err + } + fingerprint.Hash = hash + } + return fingerprint, nil +} + +// Parse resolves the request's source to a file and parses it via the ParseFile +// option, making JSONLSourceSet a full SourceSet. It mirrors the single-file +// base's parse semantics: empty results with no exclusions is a clean +// no-session skip. sourceSetProvider resolves req.Machine before calling in. +func (s JSONLSourceSet) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + if s.options.ParseFile == nil { + return ParseOutcome{}, fmt.Errorf( + "%s: JSONLSourceSet has no ParseFile configured", s.provider, + ) + } + path, ok, err := s.pathFromSource(ctx, req.Source) + if err != nil { + return ParseOutcome{}, err + } + if !ok { + return ParseOutcome{}, fmt.Errorf( + "%s source path unavailable", s.provider, + ) + } + results, excluded, err := s.options.ParseFile(ctx, path, req) + if err != nil { + return ParseOutcome{}, err + } + if len(results) == 0 && len(excluded) == 0 { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + out := make([]ParseResultOutcome, 0, len(results)) + for i := range results { + out = append(out, ParseResultOutcome{ + Result: results[i], + DataVersion: DataVersionCurrent, + }) + } + return ParseOutcome{ + Results: out, + ExcludedSessionIDs: excluded, + ResultSetComplete: true, + ForceReplace: s.options.ForceReplace, + }, nil +} + +var ( + _ SourceSet = JSONLSourceSet{} + _ SourceSet = DirectoryJSONLSourceSet{} +) + +func (s JSONLSourceSet) discoverDir( + ctx context.Context, + root string, + dir string, + sources *[]SourceRef, + seen map[string]struct{}, +) error { + entries, err := os.ReadDir(dir) + if err != nil { + if ctxErr := ctx.Err(); ctxErr != nil { + return ctxErr + } + return nil + } + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return err + } + path := filepath.Join(dir, entry.Name()) + if s.shouldDescend(entry, dir) { + if s.options.Recursive && s.descendPathIncluded(root, path) { + if err := s.discoverDir( + ctx, root, path, sources, seen, + ); err != nil { + return err + } + } + continue + } + info, err := s.sourceFileInfo(entry, path) + if err != nil || !info.Mode().IsRegular() { + continue + } + source, ok := s.sourceRef(root, path, info) + if !ok { + continue + } + addJSONLSource(source, sources, seen) + } + return nil +} + +func (s JSONLSourceSet) shouldDescend(entry os.DirEntry, dir string) bool { + if entry.IsDir() { + return true + } + return s.options.FollowSymlinkDirs && isDirOrSymlink(entry, dir) +} + +func (s JSONLSourceSet) sourceFileInfo( + entry os.DirEntry, + path string, +) (os.FileInfo, error) { + info, err := entry.Info() + if err != nil { + return nil, err + } + if !s.options.FollowSymlinkFiles || info.Mode()&os.ModeSymlink == 0 { + return info, nil + } + return os.Stat(path) +} + +func (s JSONLSourceSet) sourceForPath( + ctx context.Context, + path string, +) (SourceRef, bool, error) { + path = filepath.Clean(path) + info, err := s.sourcePathInfo(path) + if err != nil || !info.Mode().IsRegular() { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + if !s.pathAllowedByRoot(root, path) { + continue + } + if !s.sourcePathAllowedByDescendPath(root, path) { + continue + } + if !s.pathIncluded(root, path) { + continue + } + source, ok := s.sourceRef(root, path, info) + if !ok { + return SourceRef{}, false, nil + } + return s.discoveredSourceForCandidate(ctx, source) + } + return SourceRef{}, false, nil +} + +func (s JSONLSourceSet) sourcePathInfo(path string) (os.FileInfo, error) { + info, err := os.Lstat(path) + if err != nil { + return nil, err + } + if !s.options.FollowSymlinkFiles || info.Mode()&os.ModeSymlink == 0 { + return info, nil + } + return os.Stat(path) +} + +func (s JSONLSourceSet) sourceForMissingPath( + ctx context.Context, + path string, +) (SourceRef, bool, error) { + path = filepath.Clean(path) + for _, root := range s.roots { + if !s.pathAllowedByRoot(root, path) { + continue + } + if !s.sourcePathAllowedByDescendPath(root, path) { + continue + } + if !s.matchesExtension(path) || !s.pathIncluded(root, path) { + continue + } + source, ok := s.sourceRefFromPath(root, path) + if !ok { + return SourceRef{}, false, nil + } + return s.discoveredSourceForCandidate(ctx, source) + } + return SourceRef{}, false, nil +} + +func jsonlMissingPathFallbackAllowed(req ChangedPathRequest) bool { + if req.Path == "" { + return false + } + if _, err := os.Lstat(req.Path); err == nil { + return false + } else if os.IsNotExist(err) { + return true + } + switch strings.ToLower(req.EventKind) { + case "remove", "removed", "delete", "deleted", "rename", "renamed": + return true + default: + return false + } +} + +func (s JSONLSourceSet) pathAllowedByRoot(root, path string) bool { + if s.options.Recursive { + return pathIsUnderRoot(path, root) + } + return samePath(filepath.Dir(path), root) +} + +func (s JSONLSourceSet) sourceRef( + root string, + path string, + info os.FileInfo, +) (SourceRef, bool) { + if !s.matchesExtension(path) { + return SourceRef{}, false + } + if !s.pathIncluded(root, path) { + return SourceRef{}, false + } + if s.options.Include != nil && !s.options.Include(path, info) { + return SourceRef{}, false + } + return s.sourceRefFromPath(root, path) +} + +func (s JSONLSourceSet) sourceRefFromPath( + root string, + path string, +) (SourceRef, bool) { + rel, err := filepath.Rel(root, path) + if err != nil { + return SourceRef{}, false + } + displayPath := firstNonEmptyJSONLString( + callPathFunc(s.options.DisplayPath, root, path), + path, + ) + fingerprintKey := firstNonEmptyJSONLString( + callPathFunc(s.options.FingerprintKey, root, path), + displayPath, + ) + key := firstNonEmptyJSONLString( + callPathFunc(s.options.Key, root, path), + displayPath, + ) + return SourceRef{ + Provider: s.provider, + Key: key, + DisplayPath: displayPath, + FingerprintKey: fingerprintKey, + ProjectHint: callPathFunc(s.options.ProjectHint, root, path), + Opaque: JSONLSource{ + Root: root, + Path: path, + RelPath: rel, + }, + }, true +} + +func (s JSONLSourceSet) discoveredSourceForCandidate( + ctx context.Context, + candidate SourceRef, +) (SourceRef, bool, error) { + discovered, err := s.Discover(ctx) + if err != nil { + return SourceRef{}, false, err + } + for _, source := range discovered { + if source.Provider == candidate.Provider && source.Key == candidate.Key { + return source, true, nil + } + } + return candidate, true, nil +} + +func (s JSONLSourceSet) pathIncluded(root, path string) bool { + return s.options.IncludePath == nil || s.options.IncludePath(root, path) +} + +func (s JSONLSourceSet) descendPathIncluded(root, path string) bool { + return s.options.DescendPath == nil || s.options.DescendPath(root, path) +} + +func (s JSONLSourceSet) sourcePathAllowedByDescendPath(root, path string) bool { + if s.options.DescendPath == nil { + return true + } + rel, err := filepath.Rel(root, path) + if err != nil { + return false + } + dir := filepath.Dir(rel) + if dir == "." { + return true + } + current := root + for part := range strings.SplitSeq(dir, string(filepath.Separator)) { + if part == "" || part == "." || part == ".." { + return false + } + current = filepath.Join(current, part) + if !s.descendPathIncluded(root, current) { + return false + } + } + return true +} + +func (s JSONLSourceSet) matchesExtension(path string) bool { + ext := filepath.Ext(path) + return slices.Contains(s.extensions, ext) +} + +func (s JSONLSourceSet) includeGlobs() []string { + globs := make([]string, 0, len(s.extensions)) + for _, ext := range s.extensions { + globs = append(globs, "*"+ext) + } + return globs +} + +func (s JSONLSourceSet) sessionID(root, path string) string { + if s.options.SessionIDFromPath != nil { + return s.options.SessionIDFromPath(root, path) + } + return strings.TrimSuffix(filepath.Base(path), filepath.Ext(path)) +} + +func (s JSONLSourceSet) lookupIDValid(rawID string) bool { + if s.options.LookupIDValid != nil { + return s.options.LookupIDValid(rawID) + } + return IsValidSessionID(rawID) +} + +func (s JSONLSourceSet) pathFromSource( + ctx context.Context, + source SourceRef, +) (string, bool, error) { + switch src := source.Opaque.(type) { + case JSONLSource: + if src.Path != "" { + return src.Path, true, nil + } + case *JSONLSource: + if src != nil && src.Path != "" { + return src.Path, true, nil + } + } + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + if ref, ok, err := s.sourceForPath(ctx, candidate); err != nil { + return "", false, err + } else if ok { + src := ref.Opaque.(JSONLSource) + return src.Path, true, nil + } + } + return "", false, nil +} + +func cleanJSONLRoots(roots []string) []string { + cleaned := make([]string, 0, len(roots)) + for _, root := range roots { + if root == "" { + continue + } + cleaned = append(cleaned, filepath.Clean(root)) + } + return cleaned +} + +func normalizeJSONLExtensions(exts []string) []string { + if len(exts) == 0 { + return []string{".jsonl"} + } + seen := make(map[string]struct{}, len(exts)) + normalized := make([]string, 0, len(exts)) + for _, ext := range exts { + if ext == "" { + continue + } + if !strings.HasPrefix(ext, ".") { + ext = "." + ext + } + if _, ok := seen[ext]; ok { + continue + } + seen[ext] = struct{}{} + normalized = append(normalized, ext) + } + if len(normalized) == 0 { + return []string{".jsonl"} + } + sort.Strings(normalized) + return normalized +} + +func addJSONLSource( + source SourceRef, + sources *[]SourceRef, + seen map[string]struct{}, +) bool { + key := string(source.Provider) + "\x00" + source.Key + if _, ok := seen[key]; ok { + return false + } + seen[key] = struct{}{} + *sources = append(*sources, source) + return true +} + +func sortJSONLSources(sources []SourceRef) { + sort.Slice(sources, func(i, j int) bool { + if sources[i].DisplayPath != sources[j].DisplayPath { + return sources[i].DisplayPath < sources[j].DisplayPath + } + return sources[i].Key < sources[j].Key + }) +} + +func callPathFunc(fn func(root, path string) string, root, path string) string { + if fn == nil { + return "" + } + return fn(root, path) +} + +func pathIsUnderRoot(path, root string) bool { + rel, err := filepath.Rel(root, path) + return err == nil && rel != "." && rel != ".." && + !strings.HasPrefix(rel, ".."+string(filepath.Separator)) +} + +func samePath(a, b string) bool { + return filepath.Clean(a) == filepath.Clean(b) +} + +func firstNonEmptyJSONLString(values ...string) string { + for _, value := range values { + if value != "" { + return value + } + } + return "" +} + +func hashJSONLSourceFile(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", fmt.Errorf("open %s: %w", path, err) + } + defer f.Close() + + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return "", fmt.Errorf("hash %s: %w", path, err) + } + return fmt.Sprintf("%x", h.Sum(nil)), nil +} diff --git a/internal/parser/jsonl_source_set_options.go b/internal/parser/jsonl_source_set_options.go new file mode 100644 index 000000000..1ff3256d9 --- /dev/null +++ b/internal/parser/jsonl_source_set_options.go @@ -0,0 +1,139 @@ +package parser + +import ( + "context" + "os" +) + +// jsonlParseFileFunc parses one discovered source file into zero or more +// sessions plus the IDs of any sessions to exclude. +type jsonlParseFileFunc func( + ctx context.Context, path string, req ParseRequest, +) ([]ParseResult, []string, error) + +// jsonlOption configures a JSONLSourceSet (or DirectoryJSONLSourceSet) at +// construction. Options compose left to right; a later option of the same kind +// overwrites an earlier one. Every field has a sensible zero value, so a source +// set only states what differs from the default. +type jsonlOption func(*JSONLSourceSetOptions) + +// --- discovery shape --- + +// withRecursive traverses subdirectories below each root rather than only the +// direct children. +func withRecursive() jsonlOption { + return func(o *JSONLSourceSetOptions) { o.Recursive = true } +} + +// withExtensions restricts sources to the given file extensions (default +// .jsonl). Matching is case-sensitive to mirror legacy discovery. +func withExtensions(exts ...string) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.Extensions = exts } +} + +// withContentHashing includes a full content hash in the source fingerprint. +// Use only when size/mtime freshness is insufficient. +func withContentHashing() jsonlOption { + return func(o *JSONLSourceSetOptions) { o.Hash = true } +} + +// withSymlinkFollowing treats symlinks to both directories and regular files as +// traversable/source candidates. It is the common bundle for providers whose +// legacy discovery followed symlinked session trees. +func withSymlinkFollowing() jsonlOption { + return func(o *JSONLSourceSetOptions) { + o.FollowSymlinkDirs = true + o.FollowSymlinkFiles = true + } +} + +// withFollowSymlinkFiles treats symlinks to regular files as sources. +func withFollowSymlinkFiles() jsonlOption { + return func(o *JSONLSourceSetOptions) { o.FollowSymlinkFiles = true } +} + +// withDescendPath gates which directories recursive discovery descends into and +// which source ancestors a changed path may sit under. +func withDescendPath(fn func(root, path string) bool) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.DescendPath = fn } +} + +// withIncludePath sets the path-only source predicate, also used for +// deleted/renamed changed paths where os.FileInfo is unavailable. +func withIncludePath(fn func(root, path string) bool) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.IncludePath = fn } +} + +// withInclude sets a source predicate for existing files that also sees the +// os.FileInfo. It is not called for deleted/renamed changed paths. +func withInclude(fn func(path string, info os.FileInfo) bool) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.Include = fn } +} + +// --- identity / metadata --- + +// withKey sets the stable per-source dedup key. +func withKey(fn func(root, path string) string) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.Key = fn } +} + +// withFingerprintKey overrides the persisted lookup/freshness identity when the +// display path is not the value that should survive a provider migration. +func withFingerprintKey(fn func(root, path string) string) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.FingerprintKey = fn } +} + +// withProjectHint sets display-only project metadata for a source. +func withProjectHint(fn func(root, path string) string) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.ProjectHint = fn } +} + +// withSessionIDFromPath sets the raw (unprefixed) session ID used by FindSource +// fallback lookups. +func withSessionIDFromPath(fn func(root, path string) string) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.SessionIDFromPath = fn } +} + +// --- lookup --- + +// withLookupIDValid overrides the IsValidSessionID gate for the FindSource +// discovery fallback, for providers whose IDs carry separators it rejects. +func withLookupIDValid(fn func(rawID string) bool) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.LookupIDValid = fn } +} + +// withRawSessionIDForLookup normalizes a raw session ID before the FindSource +// discovery comparison. +func withRawSessionIDForLookup(fn func(rawID string) string) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.RawSessionIDForLookup = fn } +} + +// withRawSessionIDSourceFiles reconstructs candidate file paths from a raw +// session ID for providers whose IDs encode the on-disk layout. +func withRawSessionIDSourceFiles( + fn func(roots []string, rawID string) []string, +) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.RawSessionIDSourceFiles = fn } +} + +// withStoredPathFallbackRoot resolves the configured root for a stored source +// path that is not under any current root. +func withStoredPathFallbackRoot( + fn func(storedPath string) (string, bool), +) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.StoredPathFallbackRoot = fn } +} + +// --- parse --- + +// withParseFile makes the source set a full SourceSet by supplying its parse +// step. Leave it unset for discovery-only embedders that supply their own Parse. +func withParseFile(fn jsonlParseFileFunc) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.ParseFile = fn } +} + +// withForceReplace marks every non-empty ParseFile outcome as a full +// replacement of the source's existing sessions. +func withForceReplace() jsonlOption { + return func(o *JSONLSourceSetOptions) { o.ForceReplace = true } +} diff --git a/internal/parser/multi_session_container.go b/internal/parser/multi_session_container.go new file mode 100644 index 000000000..9e9c79091 --- /dev/null +++ b/internal/parser/multi_session_container.go @@ -0,0 +1,447 @@ +package parser + +import ( + "context" + "fmt" +) + +// multi_session_container.go provides a reusable source-set, provider, and +// factory for agents whose physical source is a *container* of many sessions +// surfaced to the engine as virtual per-member paths. Shelley (one SQLite DB -> +// many conversations) and Aider (one history file -> many runs) are the first +// two providers built on it; Zed, Kiro, OpenCode, and the other multi-session +// containers can follow. +// +// All agent-specific behavior is supplied through functional options (with*()), +// so a new special case is added as a new option rather than by widening a +// constructor or growing an interface. + +// multiSessionSource is the engine-visible Opaque payload for a container +// source. MemberID == "" means the source is the whole container (fan out every +// member on parse); a non-empty MemberID identifies a single member. +type multiSessionSource struct { + Root string + Path string + Container string + MemberID string +} + +// multiSessionMatch is what a classifier or member lookup resolves to: the +// canonical source path (a container path or a virtual member path) plus the +// physical container and, for a member, its ID. ProjectHint is surfaced on the +// SourceRef for providers that attribute a project at discovery time. +type multiSessionMatch struct { + Path string + Container string + MemberID string + ProjectHint string +} + +type multiSessionConfig struct { + // discoverContainers returns the physical container paths under one root; + // each becomes a whole-container source that fans out on parse. + discoverContainers func(root string) []string + // discoverSources returns fully-formed matches under one root, for providers + // that surface individual members (or a mix of members and containers) at + // discovery time rather than one source per container. Mutually exclusive + // with discoverContainers. + discoverSources func(root string) []multiSessionMatch + // watchRoots returns the provider WatchPlan roots for the configured roots. + watchRoots func(roots []string) []WatchRoot + // classifyPath maps a stored or changed path to its container/member. + // allowMissing relaxes existence checks so a deleted container (or a sibling + // such as a SQLite WAL file) still classifies for changed-path tombstones. + classifyPath func(root, path string, allowMissing bool) (multiSessionMatch, bool) + // findMember resolves a raw session ID to its member match under one root. + findMember func(root, rawID string) (multiSessionMatch, bool) + // storedPathFallback resolves a stored path that classifyPath could not + // match directly (for example a canonical remote-sync path that must be + // mapped back onto a local container). Optional. + storedPathFallback func(root, path string) (multiSessionMatch, bool) + // fingerprint returns the source freshness fingerprint (Size/MTime/Hash); + // the base supplies the Key. + fingerprint func(src multiSessionSource) (SourceFingerprint, error) + // parseContainer parses every member of a container into one result each. + // The full ParseRequest is passed so a closure can read req.Machine and + // per-request hints such as req.Source.ProjectHint. + parseContainer func(src multiSessionSource, req ParseRequest) ([]ParseResult, error) + // parseMember parses a single member; a nil result is a clean no-session. + parseMember func(src multiSessionSource, req ParseRequest) (*ParseResult, error) + // memberPresent reports whether a source still exists for RequireFreshSource + // lookups. Optional; the default treats every source as present. + memberPresent func(src multiSessionSource) bool + // stampContainerHash stamps the request fingerprint hash onto every fanned + // out container result (used when all members share the container's content + // hash). Member parses are always stamped. + stampContainerHash bool +} + +type multiSessionOption func(*multiSessionConfig) + +func withContainerDiscovery(fn func(root string) []string) multiSessionOption { + return func(c *multiSessionConfig) { c.discoverContainers = fn } +} + +func withSourceDiscovery( + fn func(root string) []multiSessionMatch, +) multiSessionOption { + return func(c *multiSessionConfig) { c.discoverSources = fn } +} + +func withWatchRoots(fn func(roots []string) []WatchRoot) multiSessionOption { + return func(c *multiSessionConfig) { c.watchRoots = fn } +} + +func withChangedPathClassifier( + fn func(root, path string, allowMissing bool) (multiSessionMatch, bool), +) multiSessionOption { + return func(c *multiSessionConfig) { c.classifyPath = fn } +} + +func withMemberLookup( + fn func(root, rawID string) (multiSessionMatch, bool), +) multiSessionOption { + return func(c *multiSessionConfig) { c.findMember = fn } +} + +func withStoredPathFallback( + fn func(root, path string) (multiSessionMatch, bool), +) multiSessionOption { + return func(c *multiSessionConfig) { c.storedPathFallback = fn } +} + +func withFingerprint( + fn func(src multiSessionSource) (SourceFingerprint, error), +) multiSessionOption { + return func(c *multiSessionConfig) { c.fingerprint = fn } +} + +func withContainerParse( + fn func(src multiSessionSource, req ParseRequest) ([]ParseResult, error), +) multiSessionOption { + return func(c *multiSessionConfig) { c.parseContainer = fn } +} + +func withMemberParse( + fn func(src multiSessionSource, req ParseRequest) (*ParseResult, error), +) multiSessionOption { + return func(c *multiSessionConfig) { c.parseMember = fn } +} + +func withMemberPresence(fn func(src multiSessionSource) bool) multiSessionOption { + return func(c *multiSessionConfig) { c.memberPresent = fn } +} + +func withContainerHashStamping() multiSessionOption { + return func(c *multiSessionConfig) { c.stampContainerHash = true } +} + +func newMultiSessionContainerSourceSet( + agent AgentType, + roots []string, + opts ...multiSessionOption, +) multiSessionContainerSourceSet { + cfg := multiSessionConfig{} + for _, opt := range opts { + opt(&cfg) + } + switch { + case cfg.discoverContainers == nil && cfg.discoverSources == nil: + panic("multi-session container: missing withContainerDiscovery or withSourceDiscovery") + case cfg.watchRoots == nil: + panic("multi-session container: missing withWatchRoots") + case cfg.classifyPath == nil: + panic("multi-session container: missing withChangedPathClassifier") + case cfg.findMember == nil: + panic("multi-session container: missing withMemberLookup") + case cfg.fingerprint == nil: + panic("multi-session container: missing withFingerprint") + case cfg.parseContainer == nil: + panic("multi-session container: missing withContainerParse") + case cfg.parseMember == nil: + panic("multi-session container: missing withMemberParse") + } + return multiSessionContainerSourceSet{ + agent: agent, + roots: cleanJSONLRoots(roots), + cfg: cfg, + } +} + +type multiSessionContainerSourceSet struct { + agent AgentType + roots []string + cfg multiSessionConfig +} + +func (s multiSessionContainerSourceSet) Discover( + ctx context.Context, +) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, match := range s.discoverMatches(root) { + if match.Path == "" { + continue + } + addJSONLSource(s.sourceRef(root, match), &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +// discoverMatches yields the discovery matches for one root: either the +// member-level matches from withSourceDiscovery, or one whole-container match +// per path from withContainerDiscovery. +func (s multiSessionContainerSourceSet) discoverMatches( + root string, +) []multiSessionMatch { + if s.cfg.discoverSources != nil { + return s.cfg.discoverSources(root) + } + containers := s.cfg.discoverContainers(root) + out := make([]multiSessionMatch, 0, len(containers)) + for _, container := range containers { + if container == "" { + continue + } + out = append(out, multiSessionMatch{Path: container, Container: container}) + } + return out +} + +func (s multiSessionContainerSourceSet) WatchPlan( + context.Context, +) (WatchPlan, error) { + return WatchPlan{Roots: s.cfg.watchRoots(s.roots)}, nil +} + +func (s multiSessionContainerSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + if match, ok := s.cfg.classifyPath(root, req.Path, true); ok { + return []SourceRef{s.sourceRef(root, match)}, nil + } + } + return nil, nil +} + +func (s multiSessionContainerSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + match, ok := s.cfg.classifyPath(root, path, false) + if !ok { + continue + } + source := s.sourceRef(root, match) + if req.RequireFreshSource && !s.memberPresent(match.toSource(root)) { + continue + } + return source, true, nil + } + if s.cfg.storedPathFallback != nil { + for _, root := range s.roots { + if match, ok := s.cfg.storedPathFallback(root, path); ok { + return s.sourceRef(root, match), true, nil + } + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + if match, ok := s.cfg.findMember(root, req.RawSessionID); ok { + return s.sourceRef(root, match), true, nil + } + } + return SourceRef{}, false, nil +} + +func (s multiSessionContainerSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + src, ok := s.sourceFromRef(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("%s source path unavailable", s.agent) + } + fingerprint, err := s.cfg.fingerprint(src) + if err != nil { + return SourceFingerprint{}, err + } + fingerprint.Key = firstNonEmptyJSONLString( + source.FingerprintKey, source.Key, src.Path, + ) + return fingerprint, nil +} + +func (s multiSessionContainerSourceSet) parse( + src multiSessionSource, req ParseRequest, +) (ParseOutcome, error) { + fingerprintHash := req.Fingerprint.Hash + if src.MemberID != "" { + result, err := s.cfg.parseMember(src, req) + if err != nil { + return ParseOutcome{}, err + } + if result == nil { + return multiSessionSkipOutcome(), nil + } + if fingerprintHash != "" { + result.Session.File.Hash = fingerprintHash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: *result, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + ForceReplace: true, + }, nil + } + + results, err := s.cfg.parseContainer(src, req) + if err != nil { + return ParseOutcome{}, err + } + if len(results) == 0 { + return multiSessionSkipOutcome(), nil + } + out := make([]ParseResultOutcome, 0, len(results)) + for i := range results { + if fingerprintHash != "" && s.cfg.stampContainerHash { + results[i].Session.File.Hash = fingerprintHash + } + out = append(out, ParseResultOutcome{ + Result: results[i], + DataVersion: DataVersionCurrent, + }) + } + return ParseOutcome{ + Results: out, + ResultSetComplete: true, + ForceReplace: true, + }, nil +} + +func multiSessionSkipOutcome() ParseOutcome { + return ParseOutcome{ + ResultSetComplete: true, + ForceReplace: true, + SkipReason: SkipNoSession, + } +} + +func (s multiSessionContainerSourceSet) memberPresent(src multiSessionSource) bool { + if s.cfg.memberPresent == nil { + return true + } + return s.cfg.memberPresent(src) +} + +func (s multiSessionContainerSourceSet) sourceRef( + root string, match multiSessionMatch, +) SourceRef { + return SourceRef{ + Provider: s.agent, + Key: match.Path, + DisplayPath: match.Path, + FingerprintKey: match.Path, + ProjectHint: match.ProjectHint, + Opaque: match.toSource(root), + } +} + +func (m multiSessionMatch) toSource(root string) multiSessionSource { + return multiSessionSource{ + Root: root, + Path: m.Path, + Container: m.Container, + MemberID: m.MemberID, + } +} + +func (s multiSessionContainerSourceSet) sourceFromRef( + source SourceRef, +) (multiSessionSource, bool) { + switch src := source.Opaque.(type) { + case multiSessionSource: + return src, src.Container != "" && src.Path != "" + case *multiSessionSource: + if src != nil && src.Container != "" && src.Path != "" { + return *src, true + } + } + for _, candidate := range []string{ + source.DisplayPath, source.FingerprintKey, source.Key, + } { + if candidate == "" { + continue + } + for _, root := range s.roots { + if match, ok := s.cfg.classifyPath(root, candidate, false); ok { + return match.toSource(root), true + } + } + } + return multiSessionSource{}, false +} + +var _ SourceSet = multiSessionContainerSourceSet{} + +// Parse resolves the request's source and parses it: a member source yields one +// result, a container source fans out every member. It satisfies the SourceSet +// interface; sourceSetProvider applies the request/config machine fallback +// before calling in, so req.Machine is already resolved here. +func (s multiSessionContainerSourceSet) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + src, ok := s.sourceFromRef(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("%s source path unavailable", s.agent) + } + return s.parse(src, req) +} + +// newMultiSessionProviderFactory builds a ProviderFactory for a multi-session +// container provider. It is a thin adapter over the generic sourceSetFactory; +// the build closure constructs the agent's configured source set. +func newMultiSessionProviderFactory( + def AgentDef, + caps Capabilities, + build func(cfg ProviderConfig) multiSessionContainerSourceSet, +) ProviderFactory { + return newSourceSetFactory( + def, caps, + func(cfg ProviderConfig) SourceSet { return build(cfg) }, + ) +} diff --git a/internal/parser/provider.go b/internal/parser/provider.go index d8c992a79..f71a5c242 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -199,6 +199,7 @@ type FindSourceRequest struct { StoredFilePath string FingerprintKey string RequireFreshSource bool + PreferStoredSource bool } // SourceFingerprint is the provider-normalized source freshness identity. diff --git a/internal/parser/provider_lookup.go b/internal/parser/provider_lookup.go new file mode 100644 index 000000000..5013ce400 --- /dev/null +++ b/internal/parser/provider_lookup.go @@ -0,0 +1,37 @@ +package parser + +import "strings" + +func providerFindRequestWithRawSessionID( + def AgentDef, + req FindSourceRequest, +) FindSourceRequest { + if req.RawSessionID != "" { + req.RawSessionID = providerNormalizeRawSessionID(def, req.RawSessionID) + return req + } + req.RawSessionID = providerRawSessionIDFromFull(def, req.FullSessionID) + return req +} + +func providerNormalizeRawSessionID(def AgentDef, id string) string { + _, id = StripHostPrefix(id) + if def.IDPrefix != "" && strings.HasPrefix(id, def.IDPrefix) { + return strings.TrimPrefix(id, def.IDPrefix) + } + return id +} + +func providerRawSessionIDFromFull(def AgentDef, id string) string { + if id == "" { + return "" + } + _, rawID := StripHostPrefix(id) + if def.IDPrefix == "" { + return rawID + } + if !strings.HasPrefix(rawID, def.IDPrefix) { + return "" + } + return strings.TrimPrefix(rawID, def.IDPrefix) +} diff --git a/internal/parser/sibling_metadata_source_set.go b/internal/parser/sibling_metadata_source_set.go new file mode 100644 index 000000000..fe9c6a355 --- /dev/null +++ b/internal/parser/sibling_metadata_source_set.go @@ -0,0 +1,206 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "os" + "path/filepath" +) + +// SiblingMetadataSourceSetOptions configures companion files that should map +// back to a primary JSONL source. +type SiblingMetadataSourceSetOptions struct { + SiblingGlobs []string + SiblingPaths func(root, sourcePath string) []string + SourcePathForSibling func(root, siblingPath string) (string, bool) +} + +// SiblingMetadataSourceSet extends JSONLSourceSet for source layouts where a +// primary transcript file has sibling metadata files that affect freshness. +type SiblingMetadataSourceSet struct { + JSONLSourceSet + siblingOptions SiblingMetadataSourceSetOptions +} + +// NewSiblingMetadataSourceSet returns a JSONL source helper with sibling +// metadata event and fingerprint support. +func NewSiblingMetadataSourceSet( + provider AgentType, + roots []string, + options JSONLSourceSetOptions, + siblingOptions SiblingMetadataSourceSetOptions, +) SiblingMetadataSourceSet { + return SiblingMetadataSourceSet{ + JSONLSourceSet: jsonlSourceSetFromOptions(provider, roots, options), + siblingOptions: siblingOptions, + } +} + +func (s SiblingMetadataSourceSet) WatchPlan(ctx context.Context) (WatchPlan, error) { + plan, err := s.JSONLSourceSet.WatchPlan(ctx) + if err != nil { + return WatchPlan{}, err + } + for i := range plan.Roots { + plan.Roots[i].IncludeGlobs = append( + plan.Roots[i].IncludeGlobs, + s.siblingOptions.SiblingGlobs..., + ) + } + return plan, nil +} + +func (s SiblingMetadataSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + sources, err := s.JSONLSourceSet.SourcesForChangedPath(ctx, req) + if err != nil || len(sources) > 0 { + return sources, err + } + if err := ctx.Err(); err != nil { + return nil, err + } + if s.siblingOptions.SourcePathForSibling == nil { + return nil, nil + } + for _, root := range s.roots { + if req.WatchRoot != "" && !samePath(req.WatchRoot, root) { + continue + } + sourcePath, ok := s.siblingOptions.SourcePathForSibling(root, req.Path) + if !ok { + continue + } + source, ok, err := s.sourceForPath(ctx, sourcePath) + if err != nil { + return nil, err + } + if ok { + return []SourceRef{source}, nil + } + } + return nil, nil +} + +func (s SiblingMetadataSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + resolved, ok, err := s.sourceFromRef(ctx, source) + if err != nil { + return SourceFingerprint{}, err + } + if !ok { + return SourceFingerprint{}, fmt.Errorf("sibling metadata source path unavailable") + } + src := resolved.Opaque.(JSONLSource) + path := src.Path + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString(source.FingerprintKey, source.Key, path), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + } + h := sha256.New() + if err := addSiblingMetadataFingerprintPart(h, "source", path, info); err != nil { + return SourceFingerprint{}, err + } + if s.siblingOptions.SiblingPaths != nil { + for _, siblingPath := range s.siblingOptions.SiblingPaths(src.Root, path) { + siblingInfo, err := siblingMetadataFileInfo(siblingPath) + if err != nil { + return SourceFingerprint{}, err + } + if siblingInfo == nil { + continue + } + fingerprint.Size += siblingInfo.Size() + if siblingMTime := siblingInfo.ModTime().UnixNano(); siblingMTime > fingerprint.MTimeNS { + fingerprint.MTimeNS = siblingMTime + } + if err := addSiblingMetadataFingerprintPart( + h, "sibling", siblingPath, siblingInfo, + ); err != nil { + return SourceFingerprint{}, err + } + } + } + fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) + return fingerprint, nil +} + +func (s SiblingMetadataSourceSet) sourceFromRef( + ctx context.Context, + source SourceRef, +) (SourceRef, bool, error) { + switch src := source.Opaque.(type) { + case JSONLSource: + if src.Root != "" && src.Path != "" { + return source, true, nil + } + case *JSONLSource: + if src != nil && src.Root != "" && src.Path != "" { + source.Opaque = *src + return source, true, nil + } + } + for _, candidate := range []string{ + source.DisplayPath, + source.FingerprintKey, + source.Key, + } { + if ref, ok, err := s.sourceForPath(ctx, candidate); err != nil { + return SourceRef{}, false, err + } else if ok { + return ref, true, nil + } + } + return SourceRef{}, false, nil +} + +func siblingMetadataFileInfo(path string) (os.FileInfo, error) { + info, err := os.Stat(path) + if os.IsNotExist(err) { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return nil, nil + } + return info, nil +} + +func addSiblingMetadataFingerprintPart( + h interface{ Write([]byte) (int, error) }, + label string, + path string, + info os.FileInfo, +) error { + hash, err := hashJSONLSourceFile(path) + if err != nil { + return err + } + _, _ = fmt.Fprintf( + h, + "%s:%s:%d:%d:%s\n", + label, + filepath.Base(path), + info.Size(), + info.ModTime().UnixNano(), + hash, + ) + return nil +} diff --git a/internal/parser/single_file_source_set.go b/internal/parser/single_file_source_set.go new file mode 100644 index 000000000..6ded0f3e0 --- /dev/null +++ b/internal/parser/single_file_source_set.go @@ -0,0 +1,355 @@ +package parser + +import ( + "context" + "fmt" + "path/filepath" +) + +// single_file_source_set.go provides a reusable SourceSet for providers whose +// physical source is a single file that parses into exactly one session: no +// virtual member paths and no fan-out. Reasonix (transcript + .jsonl.meta +// sidecar) is the first provider built on it; the other sidecar-fingerprint +// providers (vibe, commandcode, ...) can follow. +// +// Like multiSessionContainerSourceSet, all agent-specific behavior is supplied +// through functional options (withFile*()), and the type implements SourceSet +// so it plugs into newSourceSetFactory. The composite/sidecar fingerprint +// variance lives entirely inside each provider's withFileFingerprint closure, +// so the base stays agnostic about sidecars until a shared helper is warranted. + +// singleFileSource is the engine-visible Opaque payload for a single-file +// source: one physical file under a configured root. +type singleFileSource struct { + Root string + Path string +} + +// singleFileMatch is what discovery, classification, and lookup resolve to: the +// canonical source path plus an optional project hint surfaced on the SourceRef. +type singleFileMatch struct { + Path string + ProjectHint string +} + +func (m singleFileMatch) toSource(root string) singleFileSource { + return singleFileSource{Root: root, Path: m.Path} +} + +type singleFileConfig struct { + // discoverFiles returns the source files under one root. + discoverFiles func(root string) []singleFileMatch + // watchRoots returns the provider WatchPlan roots for the configured roots. + watchRoots func(roots []string) []WatchRoot + // classifyPath maps a stored or changed path (including a sidecar event) to + // its source. allowMissing relaxes existence checks for changed-path + // tombstones. + classifyPath func(root, path string, allowMissing bool) (singleFileMatch, bool) + // findFile resolves a raw session ID to its source under one root. + findFile func(root, rawID string) (singleFileMatch, bool) + // fingerprint returns the source freshness fingerprint (Size/MTime/Hash); + // the base supplies the Key. Sidecar/composite folding lives here. + fingerprint func(src singleFileSource) (SourceFingerprint, error) + // parseFile parses the single file into zero or more sessions plus the IDs + // of any sessions to exclude (remove). Empty results with no exclusions is a + // clean no-session. The full ParseRequest is passed so the closure can apply + // its own fingerprint stamping and project-hint fallback. + parseFile func(src singleFileSource, req ParseRequest) ([]ParseResult, []string, error) + // alwaysComplete reports the result set as complete even when parseFile + // yields nothing, instead of emitting SkipNoSession. Providers whose parse + // drives session removal through exclusions (cowork) set this. + alwaysComplete bool +} + +type singleFileOption func(*singleFileConfig) + +func withFileDiscovery( + fn func(root string) []singleFileMatch, +) singleFileOption { + return func(c *singleFileConfig) { c.discoverFiles = fn } +} + +func withFileWatchRoots( + fn func(roots []string) []WatchRoot, +) singleFileOption { + return func(c *singleFileConfig) { c.watchRoots = fn } +} + +func withFileChangedPathClassifier( + fn func(root, path string, allowMissing bool) (singleFileMatch, bool), +) singleFileOption { + return func(c *singleFileConfig) { c.classifyPath = fn } +} + +func withFileLookup( + fn func(root, rawID string) (singleFileMatch, bool), +) singleFileOption { + return func(c *singleFileConfig) { c.findFile = fn } +} + +func withFileFingerprint( + fn func(src singleFileSource) (SourceFingerprint, error), +) singleFileOption { + return func(c *singleFileConfig) { c.fingerprint = fn } +} + +func withFileParse( + fn func(src singleFileSource, req ParseRequest) ([]ParseResult, []string, error), +) singleFileOption { + return func(c *singleFileConfig) { c.parseFile = fn } +} + +// withAlwaysCompleteResultSet reports the result set as complete even when a +// parse yields no sessions, instead of skipping. Used by providers whose parse +// removes sessions via exclusions. +func withAlwaysCompleteResultSet() singleFileOption { + return func(c *singleFileConfig) { c.alwaysComplete = true } +} + +func newSingleFileSourceSet( + agent AgentType, + roots []string, + opts ...singleFileOption, +) singleFileSourceSet { + cfg := singleFileConfig{} + for _, opt := range opts { + opt(&cfg) + } + switch { + case cfg.discoverFiles == nil: + panic("single-file source set: missing withFileDiscovery") + case cfg.watchRoots == nil: + panic("single-file source set: missing withFileWatchRoots") + case cfg.classifyPath == nil: + panic("single-file source set: missing withFileChangedPathClassifier") + case cfg.findFile == nil: + panic("single-file source set: missing withFileLookup") + case cfg.fingerprint == nil: + panic("single-file source set: missing withFileFingerprint") + case cfg.parseFile == nil: + panic("single-file source set: missing withFileParse") + } + return singleFileSourceSet{ + agent: agent, + roots: cleanJSONLRoots(roots), + cfg: cfg, + } +} + +type singleFileSourceSet struct { + agent AgentType + roots []string + cfg singleFileConfig +} + +var _ SourceSet = singleFileSourceSet{} + +func (s singleFileSourceSet) Discover( + ctx context.Context, +) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, match := range s.cfg.discoverFiles(root) { + if match.Path == "" { + continue + } + addJSONLSource(s.sourceRef(root, match), &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +func (s singleFileSourceSet) WatchPlan( + context.Context, +) (WatchPlan, error) { + return WatchPlan{Roots: s.cfg.watchRoots(s.roots)}, nil +} + +func (s singleFileSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + allowMissing := jsonlMissingPathFallbackAllowed(req) + // A watch event may originate from a configured root or one of its watched + // subdirectories; resolve it back to the owning configured root before + // classifying, so per-subdir watches attribute correctly. + if req.WatchRoot != "" { + watchRoot := filepath.Clean(req.WatchRoot) + for _, configured := range s.roots { + if watchRoot == configured || samePath(watchRoot, configured) || + pathUnderRoot(configured, watchRoot) { + if match, ok := s.cfg.classifyPath(configured, req.Path, allowMissing); ok { + return []SourceRef{s.sourceRef(configured, match)}, nil + } + return nil, nil + } + } + return nil, nil + } + for _, root := range s.roots { + if match, ok := s.cfg.classifyPath(root, req.Path, allowMissing); ok { + return []SourceRef{s.sourceRef(root, match)}, nil + } + } + return nil, nil +} + +func (s singleFileSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + if match, ok := s.cfg.classifyPath(root, path, false); ok { + return s.sourceRef(root, match), true, nil + } + } + } + if req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + if match, ok := s.cfg.findFile(root, req.RawSessionID); ok { + return s.sourceRef(root, match), true, nil + } + } + return SourceRef{}, false, nil +} + +func (s singleFileSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + src, ok := s.sourceFromRef(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf( + "%s source path unavailable", s.agent, + ) + } + fingerprint, err := s.cfg.fingerprint(src) + if err != nil { + return SourceFingerprint{}, err + } + fingerprint.Key = firstNonEmptyJSONLString( + source.FingerprintKey, source.Key, src.Path, + ) + return fingerprint, nil +} + +// Parse resolves the request's source and parses its single file into one +// session. It satisfies the SourceSet interface; sourceSetProvider applies the +// request/config machine fallback before calling in, so req.Machine is already +// resolved here. +func (s singleFileSourceSet) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + src, ok := s.sourceFromRef(req.Source) + if !ok { + return ParseOutcome{}, fmt.Errorf("%s source path unavailable", s.agent) + } + results, excluded, err := s.cfg.parseFile(src, req) + if err != nil { + return ParseOutcome{}, err + } + if !s.cfg.alwaysComplete && len(results) == 0 && len(excluded) == 0 { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + out := make([]ParseResultOutcome, 0, len(results)) + for i := range results { + out = append(out, ParseResultOutcome{ + Result: results[i], + DataVersion: DataVersionCurrent, + }) + } + return ParseOutcome{ + Results: out, + ExcludedSessionIDs: excluded, + ResultSetComplete: true, + }, nil +} + +func (s singleFileSourceSet) sourceRef( + root string, match singleFileMatch, +) SourceRef { + return SourceRef{ + Provider: s.agent, + Key: match.Path, + DisplayPath: match.Path, + FingerprintKey: match.Path, + ProjectHint: match.ProjectHint, + Opaque: match.toSource(root), + } +} + +func (s singleFileSourceSet) sourceFromRef( + source SourceRef, +) (singleFileSource, bool) { + switch src := source.Opaque.(type) { + case singleFileSource: + return src, src.Path != "" + case *singleFileSource: + if src != nil && src.Path != "" { + return *src, true + } + } + for _, candidate := range []string{ + source.DisplayPath, source.FingerprintKey, source.Key, + } { + if candidate == "" { + continue + } + for _, root := range s.roots { + if match, ok := s.cfg.classifyPath(root, candidate, false); ok { + return match.toSource(root), true + } + } + } + return singleFileSource{}, false +} + +// newSingleFileProviderFactory builds a ProviderFactory for a single-file +// provider. It is a thin adapter over the generic sourceSetFactory; the build +// closure constructs the agent's configured source set. +func newSingleFileProviderFactory( + def AgentDef, + caps Capabilities, + build func(cfg ProviderConfig) singleFileSourceSet, +) ProviderFactory { + return newSourceSetFactory( + def, caps, + func(cfg ProviderConfig) SourceSet { return build(cfg) }, + ) +} + +// pathUnderRoot reports whether candidate is root itself or nested under it. +func pathUnderRoot(root, candidate string) bool { + _, ok := relUnder(filepath.Clean(root), filepath.Clean(candidate)) + return ok +} diff --git a/internal/parser/source_set.go b/internal/parser/source_set.go new file mode 100644 index 000000000..4d8fff320 --- /dev/null +++ b/internal/parser/source_set.go @@ -0,0 +1,126 @@ +package parser + +import "context" + +// source_set.go provides the generic plumbing shared by every reusable +// source-set base. A SourceSet owns source resolution and parsing for one +// provider; sourceSetProvider wraps any SourceSet into a full Provider, and +// sourceSetFactory builds those providers from an AgentDef + Capabilities + a +// per-config constructor. +// +// The point is that a base such as multiSessionContainerSourceSet (or +// singleFileSourceSet) implements SourceSet once and reuses this factory and +// this delegating provider, instead of each base re-hand-rolling the +// Definition/Capabilities/NewProvider factory and the six forwarding provider +// methods. Provider-level concerns that every provider shares -- folding the +// raw session ID into FindSource requests, and the request/config machine +// fallback for Parse -- live here so the SourceSet implementations stay focused +// on agent-specific source logic. + +// SourceSet is the source-resolution and parse core that a Provider delegates +// to. It is the Provider interface minus the Definition/Capabilities/config +// plumbing (supplied by sourceSetProvider) and minus ParseIncremental (which +// falls through to the ProviderBase "unsupported" default until a base needs +// it). +type SourceSet interface { + Discover(context.Context) ([]SourceRef, error) + WatchPlan(context.Context) (WatchPlan, error) + SourcesForChangedPath( + context.Context, ChangedPathRequest, + ) ([]SourceRef, error) + FindSource(context.Context, FindSourceRequest) (SourceRef, bool, error) + Fingerprint(context.Context, SourceRef) (SourceFingerprint, error) + Parse(context.Context, ParseRequest) (ParseOutcome, error) +} + +// sourceSetProvider adapts a SourceSet to the Provider interface. It supplies +// the AgentDef/Capabilities/config carried by ProviderBase, forwards the source +// methods to the SourceSet, and applies the two provider-level normalizations +// every provider performs: raw-session-ID injection on FindSource and the +// machine fallback on Parse. ParseIncremental is inherited from ProviderBase +// (unsupported) until a base opts in. +type sourceSetProvider struct { + ProviderBase + sources SourceSet +} + +func (p *sourceSetProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *sourceSetProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *sourceSetProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + return p.sources.SourcesForChangedPath(ctx, req) +} + +func (p *sourceSetProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + return p.sources.FindSource( + ctx, providerFindRequestWithRawSessionID(p.Def, req), + ) +} + +func (p *sourceSetProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *sourceSetProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + req.Machine = firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + return p.sources.Parse(ctx, req) +} + +// sourceSetFactory is the generic ProviderFactory for any SourceSet-backed +// provider. build constructs the SourceSet from the cloned per-provider config +// (roots, machine, path rewriter), so a base captures whatever config it needs +// in a closure rather than threading it through a struct. +type sourceSetFactory struct { + def AgentDef + caps Capabilities + build func(cfg ProviderConfig) SourceSet +} + +func newSourceSetFactory( + def AgentDef, + caps Capabilities, + build func(cfg ProviderConfig) SourceSet, +) ProviderFactory { + return sourceSetFactory{ + def: cloneAgentDef(def), + caps: caps, + build: build, + } +} + +func (f sourceSetFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f sourceSetFactory) Capabilities() Capabilities { + return f.caps +} + +func (f sourceSetFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &sourceSetProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: f.caps, + Config: cfg, + }, + sources: f.build(cfg), + } +} diff --git a/internal/parser/sqlite_fanout_source_set.go b/internal/parser/sqlite_fanout_source_set.go new file mode 100644 index 000000000..b0310c415 --- /dev/null +++ b/internal/parser/sqlite_fanout_source_set.go @@ -0,0 +1,353 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" +) + +// SQLiteFanoutSessionMeta describes one logical session inside a shared SQLite +// database source. +type SQLiteFanoutSessionMeta struct { + SessionID string + VirtualPath string + FileMtime int64 +} + +// SQLiteFanoutSourceSetOptions configures SQLite fan-out source discovery. +// FindDB returns the canonical SQLite database path for a root. Discovery, +// virtual source lookup, changed-path fan-out, and watch planning all use that +// path as the source of truth. If FindDB returns an empty path, the helper falls +// back to / so stored tombstone virtual paths still resolve. +type SQLiteFanoutSourceSetOptions struct { + DBName string + FindDB func(root string) string + ListMeta func(dbPath string) ([]SQLiteFanoutSessionMeta, error) +} + +// SQLiteFanoutSource is the in-memory payload stored in SQLite fan-out +// SourceRefs. +type SQLiteFanoutSource struct { + Root string + DBPath string + SessionID string +} + +// SQLiteFanoutSourceSet discovers one SourceRef per logical session inside a +// shared SQLite database file. +type SQLiteFanoutSourceSet struct { + provider AgentType + roots []string + options SQLiteFanoutSourceSetOptions +} + +func NewSQLiteFanoutSourceSet( + provider AgentType, + roots []string, + options SQLiteFanoutSourceSetOptions, +) SQLiteFanoutSourceSet { + return SQLiteFanoutSourceSet{ + provider: provider, + roots: cleanJSONLRoots(roots), + options: options, + } +} + +func (s SQLiteFanoutSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { + var sources []SourceRef + seen := make(map[string]struct{}) + for _, root := range s.roots { + if err := ctx.Err(); err != nil { + return nil, err + } + dbPath := s.findDB(root) + if dbPath == "" { + continue + } + metas, err := s.listMeta(dbPath) + if err != nil { + return nil, err + } + for _, meta := range metas { + addJSONLSource(s.newSourceRef(root, dbPath, meta), &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil +} + +func (s SQLiteFanoutSourceSet) WatchPlan(context.Context) (WatchPlan, error) { + roots := make([]WatchRoot, 0, len(s.roots)) + for _, root := range s.roots { + dbPath := s.canonicalDBPath(root) + if dbPath == "" { + continue + } + dbName := filepath.Base(dbPath) + roots = append(roots, WatchRoot{ + Path: filepath.Dir(dbPath), + Recursive: false, + IncludeGlobs: []string{dbName, dbName + "-wal", dbName + "-shm"}, + DebounceKey: string(s.provider) + ":sqlite:" + dbPath, + }) + } + return WatchPlan{Roots: roots}, nil +} + +func (s SQLiteFanoutSourceSet) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, root := range s.roots { + if req.WatchRoot != "" && !s.watchRootMatches(root, req.WatchRoot) { + continue + } + if ref, ok := s.sourceRef(root, req.Path, true); ok { + return []SourceRef{ref}, nil + } + dbPath, ok := s.dbPathForEvent(root, req.Path) + if !ok { + continue + } + metas, err := s.listMeta(dbPath) + if err != nil { + return nil, err + } + sources := make([]SourceRef, 0, len(metas)) + seen := make(map[string]struct{}, len(metas)) + for _, meta := range metas { + addJSONLSource(s.newSourceRef(root, dbPath, meta), &sources, seen) + } + for _, storedPath := range req.StoredSourcePaths { + ref, ok := s.sourceRef(root, storedPath, true) + if !ok { + continue + } + src := ref.Opaque.(SQLiteFanoutSource) + if samePath(src.DBPath, dbPath) { + addJSONLSource(ref, &sources, seen) + } + } + sortJSONLSources(sources) + return sources, nil + } + return nil, nil +} + +func (s SQLiteFanoutSourceSet) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + freshStoredSource := req.RequireFreshSource && + (req.StoredFilePath != "" || req.FingerprintKey != "") + for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { + if path == "" { + continue + } + for _, root := range s.roots { + source, ok := s.sourceRef(root, path, true) + if !ok { + continue + } + src := source.Opaque.(SQLiteFanoutSource) + if req.RawSessionID != "" && src.SessionID != req.RawSessionID { + continue + } + if req.RequireFreshSource { + fresh, err := s.sourceExists(src) + if err != nil { + return SourceRef{}, false, err + } + if !fresh { + continue + } + } + return source, true, nil + } + } + if freshStoredSource || req.RawSessionID == "" { + return SourceRef{}, false, nil + } + for _, root := range s.roots { + dbPath := s.findDB(root) + if dbPath == "" { + continue + } + metas, err := s.listMeta(dbPath) + if err != nil { + return SourceRef{}, false, err + } + for _, meta := range metas { + if meta.SessionID == req.RawSessionID { + return s.newSourceRef(root, dbPath, meta), true, nil + } + } + } + return SourceRef{}, false, nil +} + +func (s SQLiteFanoutSourceSet) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + src, ok := s.sourceFromRef(source) + if !ok { + return SourceFingerprint{}, fmt.Errorf("%s sqlite fan-out source path unavailable", s.provider) + } + key := firstNonEmptyJSONLString(source.FingerprintKey, source.Key, src.virtualPath()) + if _, err := os.Stat(src.DBPath); err != nil { + if os.IsNotExist(err) { + return SourceFingerprint{Key: key}, nil + } + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", src.DBPath, err) + } + metas, err := s.listMeta(src.DBPath) + if err != nil { + return SourceFingerprint{}, err + } + for _, meta := range metas { + if meta.SessionID == src.SessionID { + return SourceFingerprint{Key: key, MTimeNS: meta.FileMtime}, nil + } + } + return SourceFingerprint{Key: key}, nil +} + +func (s SQLiteFanoutSourceSet) sourceFromRef( + source SourceRef, +) (SQLiteFanoutSource, bool) { + switch src := source.Opaque.(type) { + case SQLiteFanoutSource: + return src, src.DBPath != "" && src.SessionID != "" + case *SQLiteFanoutSource: + if src != nil && src.DBPath != "" && src.SessionID != "" { + return *src, true + } + } + for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { + for _, root := range s.roots { + if ref, ok := s.sourceRef(root, candidate, true); ok { + return ref.Opaque.(SQLiteFanoutSource), true + } + } + } + return SQLiteFanoutSource{}, false +} + +func (s SQLiteFanoutSourceSet) sourceExists(src SQLiteFanoutSource) (bool, error) { + if !IsRegularFile(src.DBPath) { + return false, nil + } + metas, err := s.listMeta(src.DBPath) + if err != nil { + return false, err + } + for _, meta := range metas { + if meta.SessionID == src.SessionID { + return true, nil + } + } + return false, nil +} + +func (s SQLiteFanoutSourceSet) sourceRef( + root, path string, + allowMissing bool, +) (SourceRef, bool) { + root = filepath.Clean(root) + dbPath, sessionID, ok := ParseVirtualSourcePathForBase(path, s.options.DBName) + canonicalDBPath := s.canonicalDBPath(root) + if !ok || canonicalDBPath == "" || !samePath(dbPath, canonicalDBPath) { + return SourceRef{}, false + } + if !allowMissing && !IsRegularFile(dbPath) { + return SourceRef{}, false + } + return s.newSourceRef(root, dbPath, SQLiteFanoutSessionMeta{ + SessionID: sessionID, + VirtualPath: VirtualSourcePath(dbPath, sessionID), + }), true +} + +func (s SQLiteFanoutSourceSet) dbPathForEvent(root, path string) (string, bool) { + path = filepath.Clean(path) + dbPath := s.canonicalDBPath(root) + if dbPath == "" || !samePath(filepath.Dir(path), filepath.Dir(dbPath)) { + return "", false + } + base := filepath.Base(path) + dbName := filepath.Base(dbPath) + if base == dbName || base == dbName+"-wal" || base == dbName+"-shm" { + return dbPath, true + } + return "", false +} + +func (s SQLiteFanoutSourceSet) watchRootMatches(root, watchRoot string) bool { + if samePath(watchRoot, root) { + return true + } + dbPath := s.canonicalDBPath(root) + return dbPath != "" && samePath(watchRoot, filepath.Dir(dbPath)) +} + +func (s SQLiteFanoutSourceSet) newSourceRef( + root, dbPath string, + meta SQLiteFanoutSessionMeta, +) SourceRef { + virtualPath := firstNonEmptyJSONLString( + meta.VirtualPath, + VirtualSourcePath(dbPath, meta.SessionID), + ) + return SourceRef{ + Provider: s.provider, + Key: virtualPath, + DisplayPath: virtualPath, + FingerprintKey: virtualPath, + Opaque: SQLiteFanoutSource{ + Root: root, + DBPath: dbPath, + SessionID: meta.SessionID, + }, + } +} + +func (s SQLiteFanoutSourceSet) findDB(root string) string { + if s.options.FindDB == nil { + return "" + } + return s.options.FindDB(root) +} + +func (s SQLiteFanoutSourceSet) canonicalDBPath(root string) string { + if dbPath := s.findDB(root); dbPath != "" { + return filepath.Clean(dbPath) + } + if s.options.DBName == "" { + return "" + } + return filepath.Join(filepath.Clean(root), s.options.DBName) +} + +func (s SQLiteFanoutSourceSet) listMeta( + dbPath string, +) ([]SQLiteFanoutSessionMeta, error) { + if s.options.ListMeta == nil { + return nil, nil + } + return s.options.ListMeta(dbPath) +} + +func (s SQLiteFanoutSource) virtualPath() string { + return VirtualSourcePath(s.DBPath, s.SessionID) +} diff --git a/internal/parser/virtual_source_path.go b/internal/parser/virtual_source_path.go new file mode 100644 index 000000000..5ec572ce1 --- /dev/null +++ b/internal/parser/virtual_source_path.go @@ -0,0 +1,38 @@ +package parser + +import ( + "path/filepath" + "strings" +) + +// VirtualSourcePath identifies one logical source inside a shared physical +// container path, for example one conversation row inside a SQLite database. +func VirtualSourcePath(containerPath, sourceID string) string { + return containerPath + "#" + sourceID +} + +// ParseVirtualSourcePath splits a path created by VirtualSourcePath. +func ParseVirtualSourcePath(path string) (string, string, bool) { + idx := strings.LastIndex(path, "#") + if idx < 0 { + return "", "", false + } + containerPath, sourceID := path[:idx], path[idx+1:] + if containerPath == "" || sourceID == "" { + return "", "", false + } + return containerPath, sourceID, true +} + +// ParseVirtualSourcePathForBase splits a virtual source path and verifies that +// the physical container has the expected base filename. +func ParseVirtualSourcePathForBase( + path string, + baseName string, +) (string, string, bool) { + containerPath, sourceID, ok := ParseVirtualSourcePath(path) + if !ok || filepath.Base(containerPath) != baseName { + return "", "", false + } + return containerPath, sourceID, true +} From b85ee5afdbaebe637d29b36eea646a623d5d68aa Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Thu, 25 Jun 2026 17:33:37 -0400 Subject: [PATCH 02/24] refactor(parser): remove dead SQLiteFanoutSourceSet SQLiteFanoutSourceSet had no production callers -- it was referenced only by its own definition and duplicated multiSessionContainerSourceSet. The package-level helpers it relied on (cleanJSONLRoots, addJSONLSource, sortJSONLSources, and friends) are defined and used elsewhere, so removing it orphans nothing. --- internal/parser/sqlite_fanout_source_set.go | 353 -------------------- 1 file changed, 353 deletions(-) delete mode 100644 internal/parser/sqlite_fanout_source_set.go diff --git a/internal/parser/sqlite_fanout_source_set.go b/internal/parser/sqlite_fanout_source_set.go deleted file mode 100644 index b0310c415..000000000 --- a/internal/parser/sqlite_fanout_source_set.go +++ /dev/null @@ -1,353 +0,0 @@ -package parser - -import ( - "context" - "fmt" - "os" - "path/filepath" -) - -// SQLiteFanoutSessionMeta describes one logical session inside a shared SQLite -// database source. -type SQLiteFanoutSessionMeta struct { - SessionID string - VirtualPath string - FileMtime int64 -} - -// SQLiteFanoutSourceSetOptions configures SQLite fan-out source discovery. -// FindDB returns the canonical SQLite database path for a root. Discovery, -// virtual source lookup, changed-path fan-out, and watch planning all use that -// path as the source of truth. If FindDB returns an empty path, the helper falls -// back to / so stored tombstone virtual paths still resolve. -type SQLiteFanoutSourceSetOptions struct { - DBName string - FindDB func(root string) string - ListMeta func(dbPath string) ([]SQLiteFanoutSessionMeta, error) -} - -// SQLiteFanoutSource is the in-memory payload stored in SQLite fan-out -// SourceRefs. -type SQLiteFanoutSource struct { - Root string - DBPath string - SessionID string -} - -// SQLiteFanoutSourceSet discovers one SourceRef per logical session inside a -// shared SQLite database file. -type SQLiteFanoutSourceSet struct { - provider AgentType - roots []string - options SQLiteFanoutSourceSetOptions -} - -func NewSQLiteFanoutSourceSet( - provider AgentType, - roots []string, - options SQLiteFanoutSourceSetOptions, -) SQLiteFanoutSourceSet { - return SQLiteFanoutSourceSet{ - provider: provider, - roots: cleanJSONLRoots(roots), - options: options, - } -} - -func (s SQLiteFanoutSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { - var sources []SourceRef - seen := make(map[string]struct{}) - for _, root := range s.roots { - if err := ctx.Err(); err != nil { - return nil, err - } - dbPath := s.findDB(root) - if dbPath == "" { - continue - } - metas, err := s.listMeta(dbPath) - if err != nil { - return nil, err - } - for _, meta := range metas { - addJSONLSource(s.newSourceRef(root, dbPath, meta), &sources, seen) - } - } - sortJSONLSources(sources) - return sources, nil -} - -func (s SQLiteFanoutSourceSet) WatchPlan(context.Context) (WatchPlan, error) { - roots := make([]WatchRoot, 0, len(s.roots)) - for _, root := range s.roots { - dbPath := s.canonicalDBPath(root) - if dbPath == "" { - continue - } - dbName := filepath.Base(dbPath) - roots = append(roots, WatchRoot{ - Path: filepath.Dir(dbPath), - Recursive: false, - IncludeGlobs: []string{dbName, dbName + "-wal", dbName + "-shm"}, - DebounceKey: string(s.provider) + ":sqlite:" + dbPath, - }) - } - return WatchPlan{Roots: roots}, nil -} - -func (s SQLiteFanoutSourceSet) SourcesForChangedPath( - ctx context.Context, - req ChangedPathRequest, -) ([]SourceRef, error) { - if err := ctx.Err(); err != nil { - return nil, err - } - for _, root := range s.roots { - if req.WatchRoot != "" && !s.watchRootMatches(root, req.WatchRoot) { - continue - } - if ref, ok := s.sourceRef(root, req.Path, true); ok { - return []SourceRef{ref}, nil - } - dbPath, ok := s.dbPathForEvent(root, req.Path) - if !ok { - continue - } - metas, err := s.listMeta(dbPath) - if err != nil { - return nil, err - } - sources := make([]SourceRef, 0, len(metas)) - seen := make(map[string]struct{}, len(metas)) - for _, meta := range metas { - addJSONLSource(s.newSourceRef(root, dbPath, meta), &sources, seen) - } - for _, storedPath := range req.StoredSourcePaths { - ref, ok := s.sourceRef(root, storedPath, true) - if !ok { - continue - } - src := ref.Opaque.(SQLiteFanoutSource) - if samePath(src.DBPath, dbPath) { - addJSONLSource(ref, &sources, seen) - } - } - sortJSONLSources(sources) - return sources, nil - } - return nil, nil -} - -func (s SQLiteFanoutSourceSet) FindSource( - ctx context.Context, - req FindSourceRequest, -) (SourceRef, bool, error) { - if err := ctx.Err(); err != nil { - return SourceRef{}, false, err - } - freshStoredSource := req.RequireFreshSource && - (req.StoredFilePath != "" || req.FingerprintKey != "") - for _, path := range []string{req.StoredFilePath, req.FingerprintKey} { - if path == "" { - continue - } - for _, root := range s.roots { - source, ok := s.sourceRef(root, path, true) - if !ok { - continue - } - src := source.Opaque.(SQLiteFanoutSource) - if req.RawSessionID != "" && src.SessionID != req.RawSessionID { - continue - } - if req.RequireFreshSource { - fresh, err := s.sourceExists(src) - if err != nil { - return SourceRef{}, false, err - } - if !fresh { - continue - } - } - return source, true, nil - } - } - if freshStoredSource || req.RawSessionID == "" { - return SourceRef{}, false, nil - } - for _, root := range s.roots { - dbPath := s.findDB(root) - if dbPath == "" { - continue - } - metas, err := s.listMeta(dbPath) - if err != nil { - return SourceRef{}, false, err - } - for _, meta := range metas { - if meta.SessionID == req.RawSessionID { - return s.newSourceRef(root, dbPath, meta), true, nil - } - } - } - return SourceRef{}, false, nil -} - -func (s SQLiteFanoutSourceSet) Fingerprint( - ctx context.Context, - source SourceRef, -) (SourceFingerprint, error) { - if err := ctx.Err(); err != nil { - return SourceFingerprint{}, err - } - src, ok := s.sourceFromRef(source) - if !ok { - return SourceFingerprint{}, fmt.Errorf("%s sqlite fan-out source path unavailable", s.provider) - } - key := firstNonEmptyJSONLString(source.FingerprintKey, source.Key, src.virtualPath()) - if _, err := os.Stat(src.DBPath); err != nil { - if os.IsNotExist(err) { - return SourceFingerprint{Key: key}, nil - } - return SourceFingerprint{}, fmt.Errorf("stat %s: %w", src.DBPath, err) - } - metas, err := s.listMeta(src.DBPath) - if err != nil { - return SourceFingerprint{}, err - } - for _, meta := range metas { - if meta.SessionID == src.SessionID { - return SourceFingerprint{Key: key, MTimeNS: meta.FileMtime}, nil - } - } - return SourceFingerprint{Key: key}, nil -} - -func (s SQLiteFanoutSourceSet) sourceFromRef( - source SourceRef, -) (SQLiteFanoutSource, bool) { - switch src := source.Opaque.(type) { - case SQLiteFanoutSource: - return src, src.DBPath != "" && src.SessionID != "" - case *SQLiteFanoutSource: - if src != nil && src.DBPath != "" && src.SessionID != "" { - return *src, true - } - } - for _, candidate := range []string{source.DisplayPath, source.FingerprintKey, source.Key} { - for _, root := range s.roots { - if ref, ok := s.sourceRef(root, candidate, true); ok { - return ref.Opaque.(SQLiteFanoutSource), true - } - } - } - return SQLiteFanoutSource{}, false -} - -func (s SQLiteFanoutSourceSet) sourceExists(src SQLiteFanoutSource) (bool, error) { - if !IsRegularFile(src.DBPath) { - return false, nil - } - metas, err := s.listMeta(src.DBPath) - if err != nil { - return false, err - } - for _, meta := range metas { - if meta.SessionID == src.SessionID { - return true, nil - } - } - return false, nil -} - -func (s SQLiteFanoutSourceSet) sourceRef( - root, path string, - allowMissing bool, -) (SourceRef, bool) { - root = filepath.Clean(root) - dbPath, sessionID, ok := ParseVirtualSourcePathForBase(path, s.options.DBName) - canonicalDBPath := s.canonicalDBPath(root) - if !ok || canonicalDBPath == "" || !samePath(dbPath, canonicalDBPath) { - return SourceRef{}, false - } - if !allowMissing && !IsRegularFile(dbPath) { - return SourceRef{}, false - } - return s.newSourceRef(root, dbPath, SQLiteFanoutSessionMeta{ - SessionID: sessionID, - VirtualPath: VirtualSourcePath(dbPath, sessionID), - }), true -} - -func (s SQLiteFanoutSourceSet) dbPathForEvent(root, path string) (string, bool) { - path = filepath.Clean(path) - dbPath := s.canonicalDBPath(root) - if dbPath == "" || !samePath(filepath.Dir(path), filepath.Dir(dbPath)) { - return "", false - } - base := filepath.Base(path) - dbName := filepath.Base(dbPath) - if base == dbName || base == dbName+"-wal" || base == dbName+"-shm" { - return dbPath, true - } - return "", false -} - -func (s SQLiteFanoutSourceSet) watchRootMatches(root, watchRoot string) bool { - if samePath(watchRoot, root) { - return true - } - dbPath := s.canonicalDBPath(root) - return dbPath != "" && samePath(watchRoot, filepath.Dir(dbPath)) -} - -func (s SQLiteFanoutSourceSet) newSourceRef( - root, dbPath string, - meta SQLiteFanoutSessionMeta, -) SourceRef { - virtualPath := firstNonEmptyJSONLString( - meta.VirtualPath, - VirtualSourcePath(dbPath, meta.SessionID), - ) - return SourceRef{ - Provider: s.provider, - Key: virtualPath, - DisplayPath: virtualPath, - FingerprintKey: virtualPath, - Opaque: SQLiteFanoutSource{ - Root: root, - DBPath: dbPath, - SessionID: meta.SessionID, - }, - } -} - -func (s SQLiteFanoutSourceSet) findDB(root string) string { - if s.options.FindDB == nil { - return "" - } - return s.options.FindDB(root) -} - -func (s SQLiteFanoutSourceSet) canonicalDBPath(root string) string { - if dbPath := s.findDB(root); dbPath != "" { - return filepath.Clean(dbPath) - } - if s.options.DBName == "" { - return "" - } - return filepath.Join(filepath.Clean(root), s.options.DBName) -} - -func (s SQLiteFanoutSourceSet) listMeta( - dbPath string, -) ([]SQLiteFanoutSessionMeta, error) { - if s.options.ListMeta == nil { - return nil, nil - } - return s.options.ListMeta(dbPath) -} - -func (s SQLiteFanoutSource) virtualPath() string { - return VirtualSourcePath(s.DBPath, s.SessionID) -} From 3d58be37b18f020e76ad25b1650f2b4ef887feb0 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Thu, 25 Jun 2026 16:39:36 -0400 Subject: [PATCH 03/24] refactor(parser): unify path-under-root containment on root-first arg order pathIsUnderRoot took (path, root), reversing the root-first convention used by pathUnderRoot(root, candidate) and relUnder(dir, child). Its only caller already had root and path available, and pathUnderRoot has the same containment semantics (root==path is not "under"; rejects ".." escape; handles trailing separators via filepath.Clean). Drop the duplicate and repoint the caller so all containment checks share one root-first helper. --- internal/parser/jsonl_source_set.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/internal/parser/jsonl_source_set.go b/internal/parser/jsonl_source_set.go index c46a84248..465748765 100644 --- a/internal/parser/jsonl_source_set.go +++ b/internal/parser/jsonl_source_set.go @@ -531,7 +531,7 @@ func jsonlMissingPathFallbackAllowed(req ChangedPathRequest) bool { func (s JSONLSourceSet) pathAllowedByRoot(root, path string) bool { if s.options.Recursive { - return pathIsUnderRoot(path, root) + return pathUnderRoot(root, path) } return samePath(filepath.Dir(path), root) } @@ -759,12 +759,6 @@ func callPathFunc(fn func(root, path string) string, root, path string) string { return fn(root, path) } -func pathIsUnderRoot(path, root string) bool { - rel, err := filepath.Rel(root, path) - return err == nil && rel != "." && rel != ".." && - !strings.HasPrefix(rel, ".."+string(filepath.Separator)) -} - func samePath(a, b string) bool { return filepath.Clean(a) == filepath.Clean(b) } From 511beb056b061b1ed7b6bf1141a1716dca191138 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Thu, 25 Jun 2026 17:27:38 -0400 Subject: [PATCH 04/24] feat(parser): add withCompanionFiles sidecar option to jsonl source set Providers whose transcript freshness depends on a sidecar file had no base-level hook: only the SiblingMetadataSourceSet wrapper handled companions, forcing a separate wrapper type instead of a plain option. Add withCompanionFiles(transcriptPath -> companions) so the JSONLSourceSet base folds companions into the three places they matter: their basenames join the watch-plan include globs, their size/mtime (and content when hashing is enabled) fold into the SourceFingerprint, and a changed companion path maps back to its owning transcript so a sidecar write re-parses the session. The wiring reuses the existing sibling-metadata helpers rather than adding a third independent sidecar mechanism. --- internal/parser/jsonl_source_set.go | 165 +++++++++++++++++- .../parser/jsonl_source_set_companion_test.go | 156 +++++++++++++++++ internal/parser/jsonl_source_set_options.go | 11 ++ 3 files changed, 324 insertions(+), 8 deletions(-) create mode 100644 internal/parser/jsonl_source_set_companion_test.go diff --git a/internal/parser/jsonl_source_set.go b/internal/parser/jsonl_source_set.go index 465748765..6b6f4923f 100644 --- a/internal/parser/jsonl_source_set.go +++ b/internal/parser/jsonl_source_set.go @@ -105,6 +105,15 @@ type JSONLSourceSetOptions struct { // replacement of the source's existing sessions, for providers whose // transcripts are rewritten wholesale rather than appended. ForceReplace bool + // CompanionFiles returns the sidecar files that belong to a transcript + // source, given the transcript's path. The base folds each existing + // companion's basename into the watch plan globs, its size/mtime (and hash + // when Hash is set) into the SourceFingerprint, and maps a changed companion + // path back to its owning transcript in SourcesForChangedPath. It reuses the + // sibling-metadata helpers rather than introducing a separate mechanism, so + // providers describe companions once as transcript->companions and the base + // drives watch, freshness, and changed-path mapping from that single hook. + CompanionFiles func(transcriptPath string) []string } // JSONLSourceSet discovers, watches, locates, and fingerprints JSONL-like @@ -170,21 +179,63 @@ func (s JSONLSourceSet) Discover(ctx context.Context) ([]SourceRef, error) { return sources, nil } -// WatchPlan returns one watch root for each configured JSONL root. -func (s JSONLSourceSet) WatchPlan(context.Context) (WatchPlan, error) { +// WatchPlan returns one watch root for each configured JSONL root. When a +// CompanionFiles hook is configured, each discovered transcript's companion +// basenames are added to every root's include globs so sidecar events are not +// filtered out before SourcesForChangedPath can map them back. +func (s JSONLSourceSet) WatchPlan(ctx context.Context) (WatchPlan, error) { roots := make([]WatchRoot, 0, len(s.roots)) globs := s.includeGlobs() + companionGlobs, err := s.companionGlobs(ctx) + if err != nil { + return WatchPlan{}, err + } for _, root := range s.roots { + includeGlobs := append([]string(nil), globs...) + includeGlobs = append(includeGlobs, companionGlobs...) roots = append(roots, WatchRoot{ Path: root, Recursive: s.options.Recursive, - IncludeGlobs: append([]string(nil), globs...), + IncludeGlobs: includeGlobs, DebounceKey: string(s.provider) + ":jsonl:" + root, }) } return WatchPlan{Roots: roots}, nil } +// companionGlobs enumerates the distinct sidecar basenames across all +// discovered transcripts so they can be added to every root's include globs. +func (s JSONLSourceSet) companionGlobs(ctx context.Context) ([]string, error) { + if s.options.CompanionFiles == nil { + return nil, nil + } + sources, err := s.Discover(ctx) + if err != nil { + return nil, err + } + seen := make(map[string]struct{}) + var globs []string + for _, source := range sources { + src, ok := source.Opaque.(JSONLSource) + if !ok { + continue + } + for _, companion := range s.options.CompanionFiles(src.Path) { + base := filepath.Base(companion) + if base == "" || base == "." { + continue + } + if _, ok := seen[base]; ok { + continue + } + seen[base] = struct{}{} + globs = append(globs, base) + } + } + sort.Strings(globs) + return globs, nil +} + // SourcesForChangedPath maps a filesystem event path back to JSONL sources. func (s JSONLSourceSet) SourcesForChangedPath( ctx context.Context, @@ -198,15 +249,25 @@ func (s JSONLSourceSet) SourcesForChangedPath( return nil, err } if !ok { - if !jsonlMissingPathFallbackAllowed(req) { - return nil, nil - } - source, ok, err = s.sourceForMissingPath(ctx, req.Path) + // The changed path is not itself a source. A configured CompanionFiles + // hook lets an existing sidecar map back to its owning transcript; this + // runs before the missing-path fallback because a present companion file + // is not eligible for the tombstone path. + source, ok, err = s.sourceForCompanionPath(ctx, req.Path) if err != nil { return nil, err } if !ok { - return nil, nil + if !jsonlMissingPathFallbackAllowed(req) { + return nil, nil + } + source, ok, err = s.sourceForMissingPath(ctx, req.Path) + if err != nil { + return nil, err + } + if !ok { + return nil, nil + } } } if req.WatchRoot != "" { @@ -328,9 +389,67 @@ func (s JSONLSourceSet) Fingerprint( } fingerprint.Hash = hash } + if err := s.foldCompanionFingerprint(path, &fingerprint); err != nil { + return SourceFingerprint{}, err + } return fingerprint, nil } +// foldCompanionFingerprint folds each existing companion file's size and mtime +// into the transcript fingerprint, and when content hashing is enabled mixes the +// companion contents into the hash. It reuses the sibling-metadata helpers so a +// companion change is reflected in the source's freshness identity. Missing +// companions are ignored, matching sibling-metadata behavior. +func (s JSONLSourceSet) foldCompanionFingerprint( + transcriptPath string, + fingerprint *SourceFingerprint, +) error { + if s.options.CompanionFiles == nil { + return nil + } + companions := s.options.CompanionFiles(transcriptPath) + if len(companions) == 0 { + return nil + } + var hasher interface { + Write([]byte) (int, error) + Sum([]byte) []byte + } + if s.options.Hash { + h := sha256.New() + // Seed with the transcript's existing content hash so companion mixing + // stays anchored to the transcript while preserving its contribution. + _, _ = io.WriteString(h, fingerprint.Hash) + hasher = h + } + folded := false + for _, companion := range companions { + info, err := siblingMetadataFileInfo(companion) + if err != nil { + return err + } + if info == nil { + continue + } + fingerprint.Size += info.Size() + if mtime := info.ModTime().UnixNano(); mtime > fingerprint.MTimeNS { + fingerprint.MTimeNS = mtime + } + if hasher != nil { + if err := addSiblingMetadataFingerprintPart( + hasher, "companion", companion, info, + ); err != nil { + return err + } + } + folded = true + } + if hasher != nil && folded { + fingerprint.Hash = fmt.Sprintf("%x", hasher.Sum(nil)) + } + return nil +} + // Parse resolves the request's source to a file and parses it via the ParseFile // option, making JSONLSourceSet a full SourceSet. It mirrors the single-file // base's parse semantics: empty results with no exclusions is a clean @@ -512,6 +631,36 @@ func (s JSONLSourceSet) sourceForMissingPath( return SourceRef{}, false, nil } +// sourceForCompanionPath resolves a changed sidecar path back to the transcript +// source that owns it. It scans discovered transcripts and returns the one whose +// CompanionFiles list contains the changed path, so a companion write triggers a +// re-parse of its transcript. +func (s JSONLSourceSet) sourceForCompanionPath( + ctx context.Context, + path string, +) (SourceRef, bool, error) { + if s.options.CompanionFiles == nil { + return SourceRef{}, false, nil + } + path = filepath.Clean(path) + sources, err := s.Discover(ctx) + if err != nil { + return SourceRef{}, false, err + } + for _, source := range sources { + src, ok := source.Opaque.(JSONLSource) + if !ok { + continue + } + for _, companion := range s.options.CompanionFiles(src.Path) { + if samePath(companion, path) { + return source, true, nil + } + } + } + return SourceRef{}, false, nil +} + func jsonlMissingPathFallbackAllowed(req ChangedPathRequest) bool { if req.Path == "" { return false diff --git a/internal/parser/jsonl_source_set_companion_test.go b/internal/parser/jsonl_source_set_companion_test.go new file mode 100644 index 000000000..79614d205 --- /dev/null +++ b/internal/parser/jsonl_source_set_companion_test.go @@ -0,0 +1,156 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// companionFor maps a transcript path to its sibling ".meta" companion. It is +// the shape a provider passes to withCompanionFiles. +func companionFor(transcriptPath string) []string { + return []string{transcriptPath + ".meta"} +} + +func writeFile(t *testing.T, path, contents string) { + t.Helper() + require.NoError(t, os.WriteFile(path, []byte(contents), 0o644)) +} + +func TestJSONLSourceSetCompanionFingerprintReflectsCompanionChange(t *testing.T) { + ctx := context.Background() + root := t.TempDir() + transcript := filepath.Join(root, "session.jsonl") + companion := transcript + ".meta" + writeFile(t, transcript, `{"line":1}`+"\n") + writeFile(t, companion, "v1") + + set := newJSONLSourceSet( + AgentClaude, + []string{root}, + withCompanionFiles(companionFor), + ) + + sources, err := set.Discover(ctx) + require.NoError(t, err) + require.Len(t, sources, 1) + source := sources[0] + + before, err := set.Fingerprint(ctx, source) + require.NoError(t, err) + + // Changing only the companion must change the source fingerprint, since the + // companion size/mtime are folded into the transcript's freshness identity. + writeFile(t, companion, "v2-larger-contents") + after, err := set.Fingerprint(ctx, source) + require.NoError(t, err) + + assert.NotEqual(t, before.Size, after.Size, + "companion size should be folded into the fingerprint size") + assert.NotEqual(t, before, after, + "a companion change must alter the source fingerprint") +} + +func TestJSONLSourceSetCompanionFingerprintHashChanges(t *testing.T) { + ctx := context.Background() + root := t.TempDir() + transcript := filepath.Join(root, "session.jsonl") + companion := transcript + ".meta" + writeFile(t, transcript, `{"line":1}`+"\n") + writeFile(t, companion, "v1") + + set := newJSONLSourceSet( + AgentClaude, + []string{root}, + withContentHashing(), + withCompanionFiles(companionFor), + ) + sources, err := set.Discover(ctx) + require.NoError(t, err) + require.Len(t, sources, 1) + + before, err := set.Fingerprint(ctx, sources[0]) + require.NoError(t, err) + require.NotEmpty(t, before.Hash) + + // Rewrite the companion to the same length so size and mtime resolution are + // not the only signals; the content hash must still change. + writeFile(t, companion, "v2") + after, err := set.Fingerprint(ctx, sources[0]) + require.NoError(t, err) + assert.NotEqual(t, before.Hash, after.Hash, + "companion content must be mixed into the fingerprint hash") +} + +func TestJSONLSourceSetCompanionChangedPathMapsToTranscript(t *testing.T) { + ctx := context.Background() + root := t.TempDir() + transcript := filepath.Join(root, "session.jsonl") + companion := transcript + ".meta" + writeFile(t, transcript, `{"line":1}`+"\n") + writeFile(t, companion, "meta") + + set := newJSONLSourceSet( + AgentClaude, + []string{root}, + withCompanionFiles(companionFor), + ) + + changed, err := set.SourcesForChangedPath(ctx, ChangedPathRequest{ + Path: companion, + EventKind: "write", + WatchRoot: root, + }) + require.NoError(t, err) + require.Len(t, changed, 1, + "a companion change must map back to its owning transcript") + + src, ok := changed[0].Opaque.(JSONLSource) + require.True(t, ok) + assert.Equal(t, transcript, src.Path) +} + +func TestJSONLSourceSetCompanionWatchPlanIncludesCompanionGlob(t *testing.T) { + ctx := context.Background() + root := t.TempDir() + transcript := filepath.Join(root, "session.jsonl") + writeFile(t, transcript, `{"line":1}`+"\n") + writeFile(t, transcript+".meta", "meta") + + set := newJSONLSourceSet( + AgentClaude, + []string{root}, + withCompanionFiles(companionFor), + ) + + plan, err := set.WatchPlan(ctx) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Contains(t, plan.Roots[0].IncludeGlobs, "session.jsonl.meta") + assert.Contains(t, plan.Roots[0].IncludeGlobs, "*.jsonl") +} + +func TestJSONLSourceSetWithoutCompanionsUnaffected(t *testing.T) { + ctx := context.Background() + root := t.TempDir() + transcript := filepath.Join(root, "session.jsonl") + writeFile(t, transcript, `{"line":1}`+"\n") + + set := newJSONLSourceSet(AgentClaude, []string{root}) + sources, err := set.Discover(ctx) + require.NoError(t, err) + require.Len(t, sources, 1) + + fp, err := set.Fingerprint(ctx, sources[0]) + require.NoError(t, err) + + info, err := os.Stat(transcript) + require.NoError(t, err) + // Without a companion hook the fingerprint size is exactly the transcript + // size, confirming the companion folding is inert when unconfigured. + assert.Equal(t, info.Size(), fp.Size) +} diff --git a/internal/parser/jsonl_source_set_options.go b/internal/parser/jsonl_source_set_options.go index 1ff3256d9..863417b30 100644 --- a/internal/parser/jsonl_source_set_options.go +++ b/internal/parser/jsonl_source_set_options.go @@ -137,3 +137,14 @@ func withParseFile(fn jsonlParseFileFunc) jsonlOption { func withForceReplace() jsonlOption { return func(o *JSONLSourceSetOptions) { o.ForceReplace = true } } + +// --- companions --- + +// withCompanionFiles registers a sidecar hook that returns the companion files +// belonging to a transcript, given the transcript's path. The base folds the +// companions into the watch plan globs, the SourceFingerprint, and changed-path +// mapping, so a companion change re-parses its transcript. It reuses the +// sibling-metadata plumbing rather than adding an independent mechanism. +func withCompanionFiles(fn func(transcriptPath string) []string) jsonlOption { + return func(o *JSONLSourceSetOptions) { o.CompanionFiles = fn } +} From ca69b1f7d03247b0ddbb6f8ca610f5c0248bf5a1 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Thu, 25 Jun 2026 18:52:15 -0400 Subject: [PATCH 05/24] test(parser): relocate shared jsonl source-set test helpers to framework writeSourceFile and the generic source-set tests were introduced by the qwenpaw migration but are framework-level helpers used by ~20 provider test files. Placing them on the source-set-framework branch lets every family branch build its tests. --- internal/parser/jsonl_source_set_test.go | 446 +++++++++++++++++++++++ 1 file changed, 446 insertions(+) create mode 100644 internal/parser/jsonl_source_set_test.go diff --git a/internal/parser/jsonl_source_set_test.go b/internal/parser/jsonl_source_set_test.go new file mode 100644 index 000000000..e2a8ab813 --- /dev/null +++ b/internal/parser/jsonl_source_set_test.go @@ -0,0 +1,446 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestJSONLSourceSetDiscoverRecursiveStableSources(t *testing.T) { + root := t.TempDir() + writeSourceFile(t, filepath.Join(root, "b.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "a.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "nested", "c.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "nested", "ignored.txt"), "{}\n") + writeSourceFile(t, filepath.Join(root, "nested", "upper.JSONL"), "{}\n") + + roots := []string{root} + sources := newJSONLSourceSet(AgentCodex, roots, + withRecursive(), + withKey(func(root, path string) string { + return mustRelSlash(t, root, path) + }), + withProjectHint(func(root, path string) string { + rel := mustRelSlash(t, root, filepath.Dir(path)) + if rel == "." { + return "" + } + return rel + }), + ) + roots[0] = filepath.Join(root, "mutated") + + discovered, err := sources.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 3) + + assert.Equal(t, []string{ + "a.jsonl", + "b.jsonl", + "nested/c.jsonl", + }, sourceKeys(discovered)) + assert.Equal(t, []string{"", "", "nested"}, sourceProjects(discovered)) + for _, source := range discovered { + assert.Equal(t, AgentCodex, source.Provider) + assert.Equal(t, source.DisplayPath, source.FingerprintKey) + assert.NotEmpty(t, source.DisplayPath) + assert.IsType(t, JSONLSource{}, source.Opaque) + } +} + +func TestJSONLSourceSetShallowDiscoveryAndFilters(t *testing.T) { + root := t.TempDir() + writeSourceFile(t, filepath.Join(root, "keep.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "keep.ndjson"), "{}\n") + writeSourceFile(t, filepath.Join(root, "drop.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "nested", "skip.jsonl"), "{}\n") + + sources := newJSONLSourceSet(AgentGptme, []string{root}, + withExtensions(".jsonl", ".ndjson"), + withInclude(func(path string, _ os.FileInfo) bool { + return filepath.Base(path) != "drop.jsonl" + }), + ) + + discovered, err := sources.Discover(context.Background()) + require.NoError(t, err) + + assert.Equal(t, []string{ + filepath.Join(root, "keep.jsonl"), + filepath.Join(root, "keep.ndjson"), + }, sourceDisplayPaths(discovered)) +} + +func TestJSONLSourceSetWatchChangedPathFindAndFingerprint(t *testing.T) { + root := t.TempDir() + path := filepath.Join(root, "nested", "session-1.jsonl") + content := "{\"role\":\"user\"}\n" + writeSourceFile(t, path, content) + writeSourceFile(t, filepath.Join(root, "nested", "notes.txt"), "{}\n") + + sources := newJSONLSourceSet(AgentCodex, []string{root}, + withRecursive(), + withContentHashing(), + ) + + plan, err := sources.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl"}, plan.Roots[0].IncludeGlobs) + assert.NotEmpty(t, plan.Roots[0].DebounceKey) + + changed, err := sources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: path, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, path, changed[0].Key) + assert.Equal(t, path, changed[0].DisplayPath) + assert.Equal(t, path, changed[0].FingerprintKey) + + ignored, err := sources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, "nested", "notes.txt"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) + + outside, err := sources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(t.TempDir(), "session-1.jsonl"), + EventKind: "write", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, outside) + + found, ok, err := sources.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: path, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, path, found.DisplayPath) + + foundByID, ok, err := sources.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "session-1", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, found.DisplayPath, foundByID.DisplayPath) + + withoutOpaque := found + withoutOpaque.Opaque = nil + fingerprint, err := sources.Fingerprint(context.Background(), withoutOpaque) + require.NoError(t, err) + + info, err := os.Stat(path) + require.NoError(t, err) + assert.Equal(t, path, fingerprint.Key) + assert.Equal(t, info.Size(), fingerprint.Size) + assert.Equal(t, info.ModTime().UnixNano(), fingerprint.MTimeNS) + assert.Equal(t, fmt.Sprintf("%x", sha256.Sum256([]byte(content))), fingerprint.Hash) +} + +func TestJSONLSourceSetFindSourceUsesFingerprintKey(t *testing.T) { + root := t.TempDir() + path := filepath.Join(root, "nested", "session-1.jsonl") + writeSourceFile(t, path, "{}\n") + + defaultSources := newJSONLSourceSet( + AgentCodex, []string{root}, withRecursive(), + ) + found, ok, err := defaultSources.FindSource( + context.Background(), + FindSourceRequest{FingerprintKey: path}, + ) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, path, found.DisplayPath) + + customSources := newJSONLSourceSet(AgentCodex, []string{root}, + withRecursive(), + withFingerprintKey(func(root, path string) string { + return "fingerprint:" + mustRelSlash(t, root, path) + }), + ) + found, ok, err = customSources.FindSource( + context.Background(), + FindSourceRequest{FingerprintKey: "fingerprint:nested/session-1.jsonl"}, + ) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, path, found.DisplayPath) + assert.Equal(t, "fingerprint:nested/session-1.jsonl", found.FingerprintKey) +} + +func TestJSONLSourceSetChangedPathClassifiesDeletedFiles(t *testing.T) { + root := t.TempDir() + path := filepath.Join(root, "nested", "deleted.jsonl") + sources := newJSONLSourceSet(AgentCodex, []string{root}, + withRecursive(), + ) + + changed, err := sources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: path, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, path, changed[0].Key) + assert.Equal(t, path, changed[0].DisplayPath) + assert.Equal(t, path, changed[0].FingerprintKey) + assert.Equal(t, "nested/deleted.jsonl", changed[0].Opaque.(JSONLSource).RelPath) + + shallowPath := filepath.Join(root, "nested", "ignored.jsonl") + shallowSources := newJSONLSourceSet(AgentCodex, []string{root}) + changed, err = shallowSources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: shallowPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + assert.Empty(t, changed) +} + +func TestJSONLSourceSetChangedPathRejectsExistingNonRegularPath(t *testing.T) { + root := t.TempDir() + path := filepath.Join(root, "nested", "not-a-source.jsonl") + require.NoError(t, os.MkdirAll(path, 0o755)) + + sources := newJSONLSourceSet(AgentCodex, []string{root}, + withRecursive(), + ) + + changed, err := sources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: path, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + assert.Empty(t, changed) +} + +func TestJSONLSourceSetChangedPathUsesPathOnlyFilterForDeletedFiles(t *testing.T) { + root := t.TempDir() + sources := newJSONLSourceSet(AgentCodex, []string{root}, + withRecursive(), + withIncludePath(func(root, path string) bool { + return filepath.Base(path) == "events.jsonl" + }), + ) + + ignored, err := sources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, "session", "notes.jsonl"), + EventKind: "remove", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, ignored) + + changed, err := sources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, "session", "events.jsonl"), + EventKind: "remove", + WatchRoot: root, + }, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, filepath.Join(root, "session", "events.jsonl"), changed[0].DisplayPath) +} + +func TestJSONLSourceSetDescendPathPrunesSources(t *testing.T) { + root := t.TempDir() + keepPath := filepath.Join(root, "keep", "session.jsonl") + skipPath := filepath.Join(root, "skip", "session.jsonl") + writeSourceFile(t, keepPath, "{}\n") + writeSourceFile(t, skipPath, "{}\n") + + sources := newJSONLSourceSet(AgentCodex, []string{root}, + withRecursive(), + withDescendPath(func(root, path string) bool { + return filepath.Base(path) != "skip" + }), + ) + + discovered, err := sources.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, keepPath, discovered[0].DisplayPath) + + changed, err := sources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: skipPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + assert.Empty(t, changed) + + removed, err := sources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: filepath.Join(root, "skip", "removed.jsonl"), + EventKind: "remove", + WatchRoot: root, + }, + ) + require.NoError(t, err) + assert.Empty(t, removed) +} + +func TestJSONLSourceSetDuplicateKeysKeepFirstConfiguredRoot(t *testing.T) { + firstRoot := t.TempDir() + secondRoot := t.TempDir() + firstPath := filepath.Join(firstRoot, "session.jsonl") + secondPath := filepath.Join(secondRoot, "session.jsonl") + writeSourceFile(t, firstPath, "{}\n") + writeSourceFile(t, secondPath, "{}\n") + + sources := newJSONLSourceSet(AgentCodex, []string{firstRoot, secondRoot}, + withKey(func(_, path string) string { + return filepath.Base(path) + }), + ) + + discovered, err := sources.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, firstPath, discovered[0].DisplayPath) + + found, ok, err := sources.FindSource( + context.Background(), + FindSourceRequest{StoredFilePath: secondPath}, + ) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, firstPath, found.DisplayPath) + + changed, err := sources.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: secondPath, + EventKind: "write", + WatchRoot: secondRoot, + }, + ) + require.NoError(t, err) + assert.Empty(t, changed) +} + +func TestJSONLSourceSetFindSourceNormalizesRawSessionID(t *testing.T) { + root := t.TempDir() + path := filepath.Join(root, "session-1.jsonl") + writeSourceFile(t, path, "{}\n") + + // LookupIDValid rejects the raw, un-normalized form, so a lookup only + // succeeds when RawSessionIDForLookup runs before the validity gate and + // before the SessionIDFromPath comparison in the discovery loop. The + // on-disk session ID is "session-1" (base name without extension), which + // the raw "raw:session-1" only matches once normalized. + rejectsRaw := func(rawID string) bool { + return rawID != "" && !strings.HasPrefix(rawID, "raw:") + } + + normalizing := newJSONLSourceSet(AgentCodex, []string{root}, + withRawSessionIDForLookup(func(rawID string) string { + return strings.TrimPrefix(rawID, "raw:") + }), + withLookupIDValid(rejectsRaw), + ) + + found, ok, err := normalizing.FindSource( + context.Background(), + FindSourceRequest{RawSessionID: "raw:session-1"}, + ) + require.NoError(t, err) + require.True(t, ok, "normalized raw session ID must resolve its source") + assert.Equal(t, path, found.DisplayPath) + + // Without the normalizer the identical request is gated out: the raw form + // fails LookupIDValid and never matches the on-disk session ID. This locks + // in that the normalization step is what enables both checks. + unnormalized := newJSONLSourceSet(AgentCodex, []string{root}, + withLookupIDValid(rejectsRaw), + ) + + _, ok, err = unnormalized.FindSource( + context.Background(), + FindSourceRequest{RawSessionID: "raw:session-1"}, + ) + require.NoError(t, err) + assert.False(t, ok, "un-normalized raw session ID must not resolve") +} + +func TestJSONLSourceSetMissingRootAndInvalidLookupAreNoops(t *testing.T) { + root := t.TempDir() + sources := newJSONLSourceSet(AgentCodex, []string{ + filepath.Join(root, "missing"), + }) + + discovered, err := sources.Discover(context.Background()) + require.NoError(t, err) + assert.Empty(t, discovered) + + found, ok, err := sources.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "../session", + }) + require.NoError(t, err) + assert.False(t, ok) + assert.Empty(t, found) +} + +func writeSourceFile(t *testing.T, path, content string) { + t.Helper() + + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) +} + +func mustRelSlash(t *testing.T, root, path string) string { + t.Helper() + + rel, err := filepath.Rel(root, path) + require.NoError(t, err) + return filepath.ToSlash(rel) +} + +func sourceKeys(sources []SourceRef) []string { + keys := make([]string, 0, len(sources)) + for _, source := range sources { + keys = append(keys, source.Key) + } + return keys +} + +func sourceProjects(sources []SourceRef) []string { + projects := make([]string, 0, len(sources)) + for _, source := range sources { + projects = append(projects, source.ProjectHint) + } + return projects +} + +func sourceDisplayPaths(sources []SourceRef) []string { + paths := make([]string, 0, len(sources)) + for _, source := range sources { + paths = append(paths, source.DisplayPath) + } + return paths +} From 0ac2edffad112a97e88c85ad2a579ee63588294e Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Thu, 25 Jun 2026 19:03:59 -0400 Subject: [PATCH 06/24] refactor(parser): export the source-set framework API The reusable source-set scaffolding (functional options, source-set constructors, and the generic source-set provider/factory) was unexported. Because providers don't consume it until higher branches in the stack, staticcheck's unused linter flagged ~62 of these symbols as dead at every mid-stack branch, and the always-run golangci-lint pre-commit hook failed on each such commit. Exporting the API makes the unused analyzer ignore them (it never reports exported identifiers), eliminating the spurious findings stack-wide with no import cycle. --- internal/parser/directory_jsonl_source_set.go | 14 ++-- internal/parser/jsonl_source_set.go | 16 ++-- .../parser/jsonl_source_set_companion_test.go | 22 ++--- internal/parser/jsonl_source_set_options.go | 84 +++++++++---------- internal/parser/jsonl_source_set_test.go | 68 +++++++-------- internal/parser/multi_session_container.go | 70 ++++++++-------- internal/parser/provider_lookup.go | 10 +-- internal/parser/single_file_source_set.go | 60 ++++++------- internal/parser/source_set.go | 40 ++++----- 9 files changed, 192 insertions(+), 192 deletions(-) diff --git a/internal/parser/directory_jsonl_source_set.go b/internal/parser/directory_jsonl_source_set.go index 6a6aa9877..6b441b564 100644 --- a/internal/parser/directory_jsonl_source_set.go +++ b/internal/parser/directory_jsonl_source_set.go @@ -12,14 +12,14 @@ type DirectoryJSONLSourceSet struct { JSONLSourceSet } -// newDirectoryJSONLSourceSet returns a JSONL source helper for providers whose +// NewDirectoryJSONLSourceSet returns a JSONL source helper for providers whose // transcripts live one project directory below each configured root. The // returned helper is always recursive enough to classify watched project files, // but it rejects root-level and deeper nested files through IncludePath. -func newDirectoryJSONLSourceSet( +func NewDirectoryJSONLSourceSet( provider AgentType, roots []string, - opts ...jsonlOption, + opts ...JSONLOption, ) DirectoryJSONLSourceSet { var options JSONLSourceSetOptions for _, opt := range opts { @@ -28,14 +28,14 @@ func newDirectoryJSONLSourceSet( userIncludePath := options.IncludePath options.Recursive = true options.IncludePath = func(root, path string) bool { - if !isDirectoryJSONLPath(root, path) { + if !IsDirectoryJSONLPath(root, path) { return false } return userIncludePath == nil || userIncludePath(root, path) } if options.ProjectHint == nil { options.ProjectHint = func(root, path string) string { - return directoryJSONLProjectFromPath(path) + return DirectoryJSONLProjectFromPath(path) } } return DirectoryJSONLSourceSet{ @@ -43,7 +43,7 @@ func newDirectoryJSONLSourceSet( } } -func isDirectoryJSONLPath(root, path string) bool { +func IsDirectoryJSONLPath(root, path string) bool { rel, err := filepath.Rel(root, path) if err != nil { return false @@ -54,6 +54,6 @@ func isDirectoryJSONLPath(root, path string) bool { parts[1] != "" && parts[1] != "." && parts[1] != ".." } -func directoryJSONLProjectFromPath(path string) string { +func DirectoryJSONLProjectFromPath(path string) string { return filepath.Base(filepath.Dir(path)) } diff --git a/internal/parser/jsonl_source_set.go b/internal/parser/jsonl_source_set.go index 6b6f4923f..2e8062059 100644 --- a/internal/parser/jsonl_source_set.go +++ b/internal/parser/jsonl_source_set.go @@ -78,7 +78,7 @@ type JSONLSourceSetOptions struct { // discovery comparison. Providers whose stored IDs carry a suffix the // discovered filename stem lacks (for example iFlow subagent IDs) reduce it // to the base ID here so the comparison still matches. It runs after - // providerFindRequestWithRawSessionID and before the LookupIDValid gate. + // ProviderFindRequestWithRawSessionID and before the LookupIDValid gate. RawSessionIDForLookup func(rawID string) string // RawSessionIDSourceFiles reconstructs candidate file paths from a raw // session ID for providers whose IDs encode the on-disk layout rather than @@ -99,7 +99,7 @@ type JSONLSourceSetOptions struct { // plus the IDs of any sessions to exclude. Empty results with no exclusions // is a clean no-session. It is what makes JSONLSourceSet a full SourceSet // (its Parse method); leave it nil for discovery-only embedders that supply - // their own Parse. ctx and req.Machine are supplied by sourceSetProvider. + // their own Parse. ctx and req.Machine are supplied by SourceSetProvider. ParseFile jsonlParseFileFunc // ForceReplace marks every non-empty parse outcome from ParseFile as a full // replacement of the source's existing sessions, for providers whose @@ -129,13 +129,13 @@ type JSONLSourceSet struct { extensions []string } -// newJSONLSourceSet builds a JSONL source set for a provider's roots from +// NewJSONLSourceSet builds a JSONL source set for a provider's roots from // functional options. Every option has a zero-value default, so callers state // only what differs. -func newJSONLSourceSet( +func NewJSONLSourceSet( provider AgentType, roots []string, - opts ...jsonlOption, + opts ...JSONLOption, ) JSONLSourceSet { var options JSONLSourceSetOptions for _, opt := range opts { @@ -144,8 +144,8 @@ func newJSONLSourceSet( return jsonlSourceSetFromOptions(provider, roots, options) } -// jsonlSourceSetFromOptions is the shared constructor used by newJSONLSourceSet -// and newDirectoryJSONLSourceSet once options have been resolved. +// jsonlSourceSetFromOptions is the shared constructor used by NewJSONLSourceSet +// and NewDirectoryJSONLSourceSet once options have been resolved. func jsonlSourceSetFromOptions( provider AgentType, roots []string, @@ -453,7 +453,7 @@ func (s JSONLSourceSet) foldCompanionFingerprint( // Parse resolves the request's source to a file and parses it via the ParseFile // option, making JSONLSourceSet a full SourceSet. It mirrors the single-file // base's parse semantics: empty results with no exclusions is a clean -// no-session skip. sourceSetProvider resolves req.Machine before calling in. +// no-session skip. SourceSetProvider resolves req.Machine before calling in. func (s JSONLSourceSet) Parse( ctx context.Context, req ParseRequest, diff --git a/internal/parser/jsonl_source_set_companion_test.go b/internal/parser/jsonl_source_set_companion_test.go index 79614d205..83fdb08a4 100644 --- a/internal/parser/jsonl_source_set_companion_test.go +++ b/internal/parser/jsonl_source_set_companion_test.go @@ -11,7 +11,7 @@ import ( ) // companionFor maps a transcript path to its sibling ".meta" companion. It is -// the shape a provider passes to withCompanionFiles. +// the shape a provider passes to WithCompanionFiles. func companionFor(transcriptPath string) []string { return []string{transcriptPath + ".meta"} } @@ -29,10 +29,10 @@ func TestJSONLSourceSetCompanionFingerprintReflectsCompanionChange(t *testing.T) writeFile(t, transcript, `{"line":1}`+"\n") writeFile(t, companion, "v1") - set := newJSONLSourceSet( + set := NewJSONLSourceSet( AgentClaude, []string{root}, - withCompanionFiles(companionFor), + WithCompanionFiles(companionFor), ) sources, err := set.Discover(ctx) @@ -63,11 +63,11 @@ func TestJSONLSourceSetCompanionFingerprintHashChanges(t *testing.T) { writeFile(t, transcript, `{"line":1}`+"\n") writeFile(t, companion, "v1") - set := newJSONLSourceSet( + set := NewJSONLSourceSet( AgentClaude, []string{root}, - withContentHashing(), - withCompanionFiles(companionFor), + WithContentHashing(), + WithCompanionFiles(companionFor), ) sources, err := set.Discover(ctx) require.NoError(t, err) @@ -94,10 +94,10 @@ func TestJSONLSourceSetCompanionChangedPathMapsToTranscript(t *testing.T) { writeFile(t, transcript, `{"line":1}`+"\n") writeFile(t, companion, "meta") - set := newJSONLSourceSet( + set := NewJSONLSourceSet( AgentClaude, []string{root}, - withCompanionFiles(companionFor), + WithCompanionFiles(companionFor), ) changed, err := set.SourcesForChangedPath(ctx, ChangedPathRequest{ @@ -121,10 +121,10 @@ func TestJSONLSourceSetCompanionWatchPlanIncludesCompanionGlob(t *testing.T) { writeFile(t, transcript, `{"line":1}`+"\n") writeFile(t, transcript+".meta", "meta") - set := newJSONLSourceSet( + set := NewJSONLSourceSet( AgentClaude, []string{root}, - withCompanionFiles(companionFor), + WithCompanionFiles(companionFor), ) plan, err := set.WatchPlan(ctx) @@ -140,7 +140,7 @@ func TestJSONLSourceSetWithoutCompanionsUnaffected(t *testing.T) { transcript := filepath.Join(root, "session.jsonl") writeFile(t, transcript, `{"line":1}`+"\n") - set := newJSONLSourceSet(AgentClaude, []string{root}) + set := NewJSONLSourceSet(AgentClaude, []string{root}) sources, err := set.Discover(ctx) require.NoError(t, err) require.Len(t, sources, 1) diff --git a/internal/parser/jsonl_source_set_options.go b/internal/parser/jsonl_source_set_options.go index 863417b30..08ddd9bdc 100644 --- a/internal/parser/jsonl_source_set_options.go +++ b/internal/parser/jsonl_source_set_options.go @@ -11,140 +11,140 @@ type jsonlParseFileFunc func( ctx context.Context, path string, req ParseRequest, ) ([]ParseResult, []string, error) -// jsonlOption configures a JSONLSourceSet (or DirectoryJSONLSourceSet) at +// JSONLOption configures a JSONLSourceSet (or DirectoryJSONLSourceSet) at // construction. Options compose left to right; a later option of the same kind // overwrites an earlier one. Every field has a sensible zero value, so a source // set only states what differs from the default. -type jsonlOption func(*JSONLSourceSetOptions) +type JSONLOption func(*JSONLSourceSetOptions) // --- discovery shape --- -// withRecursive traverses subdirectories below each root rather than only the +// WithRecursive traverses subdirectories below each root rather than only the // direct children. -func withRecursive() jsonlOption { +func WithRecursive() JSONLOption { return func(o *JSONLSourceSetOptions) { o.Recursive = true } } -// withExtensions restricts sources to the given file extensions (default +// WithExtensions restricts sources to the given file extensions (default // .jsonl). Matching is case-sensitive to mirror legacy discovery. -func withExtensions(exts ...string) jsonlOption { +func WithExtensions(exts ...string) JSONLOption { return func(o *JSONLSourceSetOptions) { o.Extensions = exts } } -// withContentHashing includes a full content hash in the source fingerprint. +// WithContentHashing includes a full content hash in the source fingerprint. // Use only when size/mtime freshness is insufficient. -func withContentHashing() jsonlOption { +func WithContentHashing() JSONLOption { return func(o *JSONLSourceSetOptions) { o.Hash = true } } -// withSymlinkFollowing treats symlinks to both directories and regular files as +// WithSymlinkFollowing treats symlinks to both directories and regular files as // traversable/source candidates. It is the common bundle for providers whose // legacy discovery followed symlinked session trees. -func withSymlinkFollowing() jsonlOption { +func WithSymlinkFollowing() JSONLOption { return func(o *JSONLSourceSetOptions) { o.FollowSymlinkDirs = true o.FollowSymlinkFiles = true } } -// withFollowSymlinkFiles treats symlinks to regular files as sources. -func withFollowSymlinkFiles() jsonlOption { +// WithFollowSymlinkFiles treats symlinks to regular files as sources. +func WithFollowSymlinkFiles() JSONLOption { return func(o *JSONLSourceSetOptions) { o.FollowSymlinkFiles = true } } -// withDescendPath gates which directories recursive discovery descends into and +// WithDescendPath gates which directories recursive discovery descends into and // which source ancestors a changed path may sit under. -func withDescendPath(fn func(root, path string) bool) jsonlOption { +func WithDescendPath(fn func(root, path string) bool) JSONLOption { return func(o *JSONLSourceSetOptions) { o.DescendPath = fn } } -// withIncludePath sets the path-only source predicate, also used for +// WithIncludePath sets the path-only source predicate, also used for // deleted/renamed changed paths where os.FileInfo is unavailable. -func withIncludePath(fn func(root, path string) bool) jsonlOption { +func WithIncludePath(fn func(root, path string) bool) JSONLOption { return func(o *JSONLSourceSetOptions) { o.IncludePath = fn } } -// withInclude sets a source predicate for existing files that also sees the +// WithInclude sets a source predicate for existing files that also sees the // os.FileInfo. It is not called for deleted/renamed changed paths. -func withInclude(fn func(path string, info os.FileInfo) bool) jsonlOption { +func WithInclude(fn func(path string, info os.FileInfo) bool) JSONLOption { return func(o *JSONLSourceSetOptions) { o.Include = fn } } // --- identity / metadata --- -// withKey sets the stable per-source dedup key. -func withKey(fn func(root, path string) string) jsonlOption { +// WithKey sets the stable per-source dedup key. +func WithKey(fn func(root, path string) string) JSONLOption { return func(o *JSONLSourceSetOptions) { o.Key = fn } } -// withFingerprintKey overrides the persisted lookup/freshness identity when the +// WithFingerprintKey overrides the persisted lookup/freshness identity when the // display path is not the value that should survive a provider migration. -func withFingerprintKey(fn func(root, path string) string) jsonlOption { +func WithFingerprintKey(fn func(root, path string) string) JSONLOption { return func(o *JSONLSourceSetOptions) { o.FingerprintKey = fn } } -// withProjectHint sets display-only project metadata for a source. -func withProjectHint(fn func(root, path string) string) jsonlOption { +// WithProjectHint sets display-only project metadata for a source. +func WithProjectHint(fn func(root, path string) string) JSONLOption { return func(o *JSONLSourceSetOptions) { o.ProjectHint = fn } } -// withSessionIDFromPath sets the raw (unprefixed) session ID used by FindSource +// WithSessionIDFromPath sets the raw (unprefixed) session ID used by FindSource // fallback lookups. -func withSessionIDFromPath(fn func(root, path string) string) jsonlOption { +func WithSessionIDFromPath(fn func(root, path string) string) JSONLOption { return func(o *JSONLSourceSetOptions) { o.SessionIDFromPath = fn } } // --- lookup --- -// withLookupIDValid overrides the IsValidSessionID gate for the FindSource +// WithLookupIDValid overrides the IsValidSessionID gate for the FindSource // discovery fallback, for providers whose IDs carry separators it rejects. -func withLookupIDValid(fn func(rawID string) bool) jsonlOption { +func WithLookupIDValid(fn func(rawID string) bool) JSONLOption { return func(o *JSONLSourceSetOptions) { o.LookupIDValid = fn } } -// withRawSessionIDForLookup normalizes a raw session ID before the FindSource +// WithRawSessionIDForLookup normalizes a raw session ID before the FindSource // discovery comparison. -func withRawSessionIDForLookup(fn func(rawID string) string) jsonlOption { +func WithRawSessionIDForLookup(fn func(rawID string) string) JSONLOption { return func(o *JSONLSourceSetOptions) { o.RawSessionIDForLookup = fn } } -// withRawSessionIDSourceFiles reconstructs candidate file paths from a raw +// WithRawSessionIDSourceFiles reconstructs candidate file paths from a raw // session ID for providers whose IDs encode the on-disk layout. -func withRawSessionIDSourceFiles( +func WithRawSessionIDSourceFiles( fn func(roots []string, rawID string) []string, -) jsonlOption { +) JSONLOption { return func(o *JSONLSourceSetOptions) { o.RawSessionIDSourceFiles = fn } } -// withStoredPathFallbackRoot resolves the configured root for a stored source +// WithStoredPathFallbackRoot resolves the configured root for a stored source // path that is not under any current root. -func withStoredPathFallbackRoot( +func WithStoredPathFallbackRoot( fn func(storedPath string) (string, bool), -) jsonlOption { +) JSONLOption { return func(o *JSONLSourceSetOptions) { o.StoredPathFallbackRoot = fn } } // --- parse --- -// withParseFile makes the source set a full SourceSet by supplying its parse +// WithParseFile makes the source set a full SourceSet by supplying its parse // step. Leave it unset for discovery-only embedders that supply their own Parse. -func withParseFile(fn jsonlParseFileFunc) jsonlOption { +func WithParseFile(fn jsonlParseFileFunc) JSONLOption { return func(o *JSONLSourceSetOptions) { o.ParseFile = fn } } -// withForceReplace marks every non-empty ParseFile outcome as a full +// WithForceReplace marks every non-empty ParseFile outcome as a full // replacement of the source's existing sessions. -func withForceReplace() jsonlOption { +func WithForceReplace() JSONLOption { return func(o *JSONLSourceSetOptions) { o.ForceReplace = true } } // --- companions --- -// withCompanionFiles registers a sidecar hook that returns the companion files +// WithCompanionFiles registers a sidecar hook that returns the companion files // belonging to a transcript, given the transcript's path. The base folds the // companions into the watch plan globs, the SourceFingerprint, and changed-path // mapping, so a companion change re-parses its transcript. It reuses the // sibling-metadata plumbing rather than adding an independent mechanism. -func withCompanionFiles(fn func(transcriptPath string) []string) jsonlOption { +func WithCompanionFiles(fn func(transcriptPath string) []string) JSONLOption { return func(o *JSONLSourceSetOptions) { o.CompanionFiles = fn } } diff --git a/internal/parser/jsonl_source_set_test.go b/internal/parser/jsonl_source_set_test.go index e2a8ab813..ef59f181a 100644 --- a/internal/parser/jsonl_source_set_test.go +++ b/internal/parser/jsonl_source_set_test.go @@ -22,12 +22,12 @@ func TestJSONLSourceSetDiscoverRecursiveStableSources(t *testing.T) { writeSourceFile(t, filepath.Join(root, "nested", "upper.JSONL"), "{}\n") roots := []string{root} - sources := newJSONLSourceSet(AgentCodex, roots, - withRecursive(), - withKey(func(root, path string) string { + sources := NewJSONLSourceSet(AgentCodex, roots, + WithRecursive(), + WithKey(func(root, path string) string { return mustRelSlash(t, root, path) }), - withProjectHint(func(root, path string) string { + WithProjectHint(func(root, path string) string { rel := mustRelSlash(t, root, filepath.Dir(path)) if rel == "." { return "" @@ -62,9 +62,9 @@ func TestJSONLSourceSetShallowDiscoveryAndFilters(t *testing.T) { writeSourceFile(t, filepath.Join(root, "drop.jsonl"), "{}\n") writeSourceFile(t, filepath.Join(root, "nested", "skip.jsonl"), "{}\n") - sources := newJSONLSourceSet(AgentGptme, []string{root}, - withExtensions(".jsonl", ".ndjson"), - withInclude(func(path string, _ os.FileInfo) bool { + sources := NewJSONLSourceSet(AgentGptme, []string{root}, + WithExtensions(".jsonl", ".ndjson"), + WithInclude(func(path string, _ os.FileInfo) bool { return filepath.Base(path) != "drop.jsonl" }), ) @@ -85,9 +85,9 @@ func TestJSONLSourceSetWatchChangedPathFindAndFingerprint(t *testing.T) { writeSourceFile(t, path, content) writeSourceFile(t, filepath.Join(root, "nested", "notes.txt"), "{}\n") - sources := newJSONLSourceSet(AgentCodex, []string{root}, - withRecursive(), - withContentHashing(), + sources := NewJSONLSourceSet(AgentCodex, []string{root}, + WithRecursive(), + WithContentHashing(), ) plan, err := sources.WatchPlan(context.Background()) @@ -162,8 +162,8 @@ func TestJSONLSourceSetFindSourceUsesFingerprintKey(t *testing.T) { path := filepath.Join(root, "nested", "session-1.jsonl") writeSourceFile(t, path, "{}\n") - defaultSources := newJSONLSourceSet( - AgentCodex, []string{root}, withRecursive(), + defaultSources := NewJSONLSourceSet( + AgentCodex, []string{root}, WithRecursive(), ) found, ok, err := defaultSources.FindSource( context.Background(), @@ -173,9 +173,9 @@ func TestJSONLSourceSetFindSourceUsesFingerprintKey(t *testing.T) { require.True(t, ok) assert.Equal(t, path, found.DisplayPath) - customSources := newJSONLSourceSet(AgentCodex, []string{root}, - withRecursive(), - withFingerprintKey(func(root, path string) string { + customSources := NewJSONLSourceSet(AgentCodex, []string{root}, + WithRecursive(), + WithFingerprintKey(func(root, path string) string { return "fingerprint:" + mustRelSlash(t, root, path) }), ) @@ -192,8 +192,8 @@ func TestJSONLSourceSetFindSourceUsesFingerprintKey(t *testing.T) { func TestJSONLSourceSetChangedPathClassifiesDeletedFiles(t *testing.T) { root := t.TempDir() path := filepath.Join(root, "nested", "deleted.jsonl") - sources := newJSONLSourceSet(AgentCodex, []string{root}, - withRecursive(), + sources := NewJSONLSourceSet(AgentCodex, []string{root}, + WithRecursive(), ) changed, err := sources.SourcesForChangedPath( @@ -208,7 +208,7 @@ func TestJSONLSourceSetChangedPathClassifiesDeletedFiles(t *testing.T) { assert.Equal(t, "nested/deleted.jsonl", changed[0].Opaque.(JSONLSource).RelPath) shallowPath := filepath.Join(root, "nested", "ignored.jsonl") - shallowSources := newJSONLSourceSet(AgentCodex, []string{root}) + shallowSources := NewJSONLSourceSet(AgentCodex, []string{root}) changed, err = shallowSources.SourcesForChangedPath( context.Background(), ChangedPathRequest{Path: shallowPath, EventKind: "remove", WatchRoot: root}, @@ -222,8 +222,8 @@ func TestJSONLSourceSetChangedPathRejectsExistingNonRegularPath(t *testing.T) { path := filepath.Join(root, "nested", "not-a-source.jsonl") require.NoError(t, os.MkdirAll(path, 0o755)) - sources := newJSONLSourceSet(AgentCodex, []string{root}, - withRecursive(), + sources := NewJSONLSourceSet(AgentCodex, []string{root}, + WithRecursive(), ) changed, err := sources.SourcesForChangedPath( @@ -236,9 +236,9 @@ func TestJSONLSourceSetChangedPathRejectsExistingNonRegularPath(t *testing.T) { func TestJSONLSourceSetChangedPathUsesPathOnlyFilterForDeletedFiles(t *testing.T) { root := t.TempDir() - sources := newJSONLSourceSet(AgentCodex, []string{root}, - withRecursive(), - withIncludePath(func(root, path string) bool { + sources := NewJSONLSourceSet(AgentCodex, []string{root}, + WithRecursive(), + WithIncludePath(func(root, path string) bool { return filepath.Base(path) == "events.jsonl" }), ) @@ -274,9 +274,9 @@ func TestJSONLSourceSetDescendPathPrunesSources(t *testing.T) { writeSourceFile(t, keepPath, "{}\n") writeSourceFile(t, skipPath, "{}\n") - sources := newJSONLSourceSet(AgentCodex, []string{root}, - withRecursive(), - withDescendPath(func(root, path string) bool { + sources := NewJSONLSourceSet(AgentCodex, []string{root}, + WithRecursive(), + WithDescendPath(func(root, path string) bool { return filepath.Base(path) != "skip" }), ) @@ -313,8 +313,8 @@ func TestJSONLSourceSetDuplicateKeysKeepFirstConfiguredRoot(t *testing.T) { writeSourceFile(t, firstPath, "{}\n") writeSourceFile(t, secondPath, "{}\n") - sources := newJSONLSourceSet(AgentCodex, []string{firstRoot, secondRoot}, - withKey(func(_, path string) string { + sources := NewJSONLSourceSet(AgentCodex, []string{firstRoot, secondRoot}, + WithKey(func(_, path string) string { return filepath.Base(path) }), ) @@ -358,11 +358,11 @@ func TestJSONLSourceSetFindSourceNormalizesRawSessionID(t *testing.T) { return rawID != "" && !strings.HasPrefix(rawID, "raw:") } - normalizing := newJSONLSourceSet(AgentCodex, []string{root}, - withRawSessionIDForLookup(func(rawID string) string { + normalizing := NewJSONLSourceSet(AgentCodex, []string{root}, + WithRawSessionIDForLookup(func(rawID string) string { return strings.TrimPrefix(rawID, "raw:") }), - withLookupIDValid(rejectsRaw), + WithLookupIDValid(rejectsRaw), ) found, ok, err := normalizing.FindSource( @@ -376,8 +376,8 @@ func TestJSONLSourceSetFindSourceNormalizesRawSessionID(t *testing.T) { // Without the normalizer the identical request is gated out: the raw form // fails LookupIDValid and never matches the on-disk session ID. This locks // in that the normalization step is what enables both checks. - unnormalized := newJSONLSourceSet(AgentCodex, []string{root}, - withLookupIDValid(rejectsRaw), + unnormalized := NewJSONLSourceSet(AgentCodex, []string{root}, + WithLookupIDValid(rejectsRaw), ) _, ok, err = unnormalized.FindSource( @@ -390,7 +390,7 @@ func TestJSONLSourceSetFindSourceNormalizesRawSessionID(t *testing.T) { func TestJSONLSourceSetMissingRootAndInvalidLookupAreNoops(t *testing.T) { root := t.TempDir() - sources := newJSONLSourceSet(AgentCodex, []string{ + sources := NewJSONLSourceSet(AgentCodex, []string{ filepath.Join(root, "missing"), }) diff --git a/internal/parser/multi_session_container.go b/internal/parser/multi_session_container.go index 9e9c79091..968b34daf 100644 --- a/internal/parser/multi_session_container.go +++ b/internal/parser/multi_session_container.go @@ -76,70 +76,70 @@ type multiSessionConfig struct { stampContainerHash bool } -type multiSessionOption func(*multiSessionConfig) +type MultiSessionOption func(*multiSessionConfig) -func withContainerDiscovery(fn func(root string) []string) multiSessionOption { +func WithContainerDiscovery(fn func(root string) []string) MultiSessionOption { return func(c *multiSessionConfig) { c.discoverContainers = fn } } -func withSourceDiscovery( +func WithSourceDiscovery( fn func(root string) []multiSessionMatch, -) multiSessionOption { +) MultiSessionOption { return func(c *multiSessionConfig) { c.discoverSources = fn } } -func withWatchRoots(fn func(roots []string) []WatchRoot) multiSessionOption { +func WithWatchRoots(fn func(roots []string) []WatchRoot) MultiSessionOption { return func(c *multiSessionConfig) { c.watchRoots = fn } } -func withChangedPathClassifier( +func WithChangedPathClassifier( fn func(root, path string, allowMissing bool) (multiSessionMatch, bool), -) multiSessionOption { +) MultiSessionOption { return func(c *multiSessionConfig) { c.classifyPath = fn } } -func withMemberLookup( +func WithMemberLookup( fn func(root, rawID string) (multiSessionMatch, bool), -) multiSessionOption { +) MultiSessionOption { return func(c *multiSessionConfig) { c.findMember = fn } } -func withStoredPathFallback( +func WithStoredPathFallback( fn func(root, path string) (multiSessionMatch, bool), -) multiSessionOption { +) MultiSessionOption { return func(c *multiSessionConfig) { c.storedPathFallback = fn } } -func withFingerprint( +func WithFingerprint( fn func(src multiSessionSource) (SourceFingerprint, error), -) multiSessionOption { +) MultiSessionOption { return func(c *multiSessionConfig) { c.fingerprint = fn } } -func withContainerParse( +func WithContainerParse( fn func(src multiSessionSource, req ParseRequest) ([]ParseResult, error), -) multiSessionOption { +) MultiSessionOption { return func(c *multiSessionConfig) { c.parseContainer = fn } } -func withMemberParse( +func WithMemberParse( fn func(src multiSessionSource, req ParseRequest) (*ParseResult, error), -) multiSessionOption { +) MultiSessionOption { return func(c *multiSessionConfig) { c.parseMember = fn } } -func withMemberPresence(fn func(src multiSessionSource) bool) multiSessionOption { +func WithMemberPresence(fn func(src multiSessionSource) bool) MultiSessionOption { return func(c *multiSessionConfig) { c.memberPresent = fn } } -func withContainerHashStamping() multiSessionOption { +func WithContainerHashStamping() MultiSessionOption { return func(c *multiSessionConfig) { c.stampContainerHash = true } } -func newMultiSessionContainerSourceSet( +func NewMultiSessionContainerSourceSet( agent AgentType, roots []string, - opts ...multiSessionOption, + opts ...MultiSessionOption, ) multiSessionContainerSourceSet { cfg := multiSessionConfig{} for _, opt := range opts { @@ -147,19 +147,19 @@ func newMultiSessionContainerSourceSet( } switch { case cfg.discoverContainers == nil && cfg.discoverSources == nil: - panic("multi-session container: missing withContainerDiscovery or withSourceDiscovery") + panic("multi-session container: missing WithContainerDiscovery or WithSourceDiscovery") case cfg.watchRoots == nil: - panic("multi-session container: missing withWatchRoots") + panic("multi-session container: missing WithWatchRoots") case cfg.classifyPath == nil: - panic("multi-session container: missing withChangedPathClassifier") + panic("multi-session container: missing WithChangedPathClassifier") case cfg.findMember == nil: - panic("multi-session container: missing withMemberLookup") + panic("multi-session container: missing WithMemberLookup") case cfg.fingerprint == nil: - panic("multi-session container: missing withFingerprint") + panic("multi-session container: missing WithFingerprint") case cfg.parseContainer == nil: - panic("multi-session container: missing withContainerParse") + panic("multi-session container: missing WithContainerParse") case cfg.parseMember == nil: - panic("multi-session container: missing withMemberParse") + panic("multi-session container: missing WithMemberParse") } return multiSessionContainerSourceSet{ agent: agent, @@ -195,8 +195,8 @@ func (s multiSessionContainerSourceSet) Discover( } // discoverMatches yields the discovery matches for one root: either the -// member-level matches from withSourceDiscovery, or one whole-container match -// per path from withContainerDiscovery. +// member-level matches from WithSourceDiscovery, or one whole-container match +// per path from WithContainerDiscovery. func (s multiSessionContainerSourceSet) discoverMatches( root string, ) []multiSessionMatch { @@ -416,7 +416,7 @@ var _ SourceSet = multiSessionContainerSourceSet{} // Parse resolves the request's source and parses it: a member source yields one // result, a container source fans out every member. It satisfies the SourceSet -// interface; sourceSetProvider applies the request/config machine fallback +// interface; SourceSetProvider applies the request/config machine fallback // before calling in, so req.Machine is already resolved here. func (s multiSessionContainerSourceSet) Parse( ctx context.Context, @@ -432,15 +432,15 @@ func (s multiSessionContainerSourceSet) Parse( return s.parse(src, req) } -// newMultiSessionProviderFactory builds a ProviderFactory for a multi-session -// container provider. It is a thin adapter over the generic sourceSetFactory; +// NewMultiSessionProviderFactory builds a ProviderFactory for a multi-session +// container provider. It is a thin adapter over the generic SourceSetFactory; // the build closure constructs the agent's configured source set. -func newMultiSessionProviderFactory( +func NewMultiSessionProviderFactory( def AgentDef, caps Capabilities, build func(cfg ProviderConfig) multiSessionContainerSourceSet, ) ProviderFactory { - return newSourceSetFactory( + return NewSourceSetFactory( def, caps, func(cfg ProviderConfig) SourceSet { return build(cfg) }, ) diff --git a/internal/parser/provider_lookup.go b/internal/parser/provider_lookup.go index 5013ce400..4de0e33b2 100644 --- a/internal/parser/provider_lookup.go +++ b/internal/parser/provider_lookup.go @@ -2,19 +2,19 @@ package parser import "strings" -func providerFindRequestWithRawSessionID( +func ProviderFindRequestWithRawSessionID( def AgentDef, req FindSourceRequest, ) FindSourceRequest { if req.RawSessionID != "" { - req.RawSessionID = providerNormalizeRawSessionID(def, req.RawSessionID) + req.RawSessionID = ProviderNormalizeRawSessionID(def, req.RawSessionID) return req } - req.RawSessionID = providerRawSessionIDFromFull(def, req.FullSessionID) + req.RawSessionID = ProviderRawSessionIDFromFull(def, req.FullSessionID) return req } -func providerNormalizeRawSessionID(def AgentDef, id string) string { +func ProviderNormalizeRawSessionID(def AgentDef, id string) string { _, id = StripHostPrefix(id) if def.IDPrefix != "" && strings.HasPrefix(id, def.IDPrefix) { return strings.TrimPrefix(id, def.IDPrefix) @@ -22,7 +22,7 @@ func providerNormalizeRawSessionID(def AgentDef, id string) string { return id } -func providerRawSessionIDFromFull(def AgentDef, id string) string { +func ProviderRawSessionIDFromFull(def AgentDef, id string) string { if id == "" { return "" } diff --git a/internal/parser/single_file_source_set.go b/internal/parser/single_file_source_set.go index 6ded0f3e0..1144d1a7b 100644 --- a/internal/parser/single_file_source_set.go +++ b/internal/parser/single_file_source_set.go @@ -14,8 +14,8 @@ import ( // // Like multiSessionContainerSourceSet, all agent-specific behavior is supplied // through functional options (withFile*()), and the type implements SourceSet -// so it plugs into newSourceSetFactory. The composite/sidecar fingerprint -// variance lives entirely inside each provider's withFileFingerprint closure, +// so it plugs into NewSourceSetFactory. The composite/sidecar fingerprint +// variance lives entirely inside each provider's WithFileFingerprint closure, // so the base stays agnostic about sidecars until a shared helper is warranted. // singleFileSource is the engine-visible Opaque payload for a single-file @@ -61,55 +61,55 @@ type singleFileConfig struct { alwaysComplete bool } -type singleFileOption func(*singleFileConfig) +type SingleFileOption func(*singleFileConfig) -func withFileDiscovery( +func WithFileDiscovery( fn func(root string) []singleFileMatch, -) singleFileOption { +) SingleFileOption { return func(c *singleFileConfig) { c.discoverFiles = fn } } -func withFileWatchRoots( +func WithFileWatchRoots( fn func(roots []string) []WatchRoot, -) singleFileOption { +) SingleFileOption { return func(c *singleFileConfig) { c.watchRoots = fn } } -func withFileChangedPathClassifier( +func WithFileChangedPathClassifier( fn func(root, path string, allowMissing bool) (singleFileMatch, bool), -) singleFileOption { +) SingleFileOption { return func(c *singleFileConfig) { c.classifyPath = fn } } -func withFileLookup( +func WithFileLookup( fn func(root, rawID string) (singleFileMatch, bool), -) singleFileOption { +) SingleFileOption { return func(c *singleFileConfig) { c.findFile = fn } } -func withFileFingerprint( +func WithFileFingerprint( fn func(src singleFileSource) (SourceFingerprint, error), -) singleFileOption { +) SingleFileOption { return func(c *singleFileConfig) { c.fingerprint = fn } } -func withFileParse( +func WithFileParse( fn func(src singleFileSource, req ParseRequest) ([]ParseResult, []string, error), -) singleFileOption { +) SingleFileOption { return func(c *singleFileConfig) { c.parseFile = fn } } -// withAlwaysCompleteResultSet reports the result set as complete even when a +// WithAlwaysCompleteResultSet reports the result set as complete even when a // parse yields no sessions, instead of skipping. Used by providers whose parse // removes sessions via exclusions. -func withAlwaysCompleteResultSet() singleFileOption { +func WithAlwaysCompleteResultSet() SingleFileOption { return func(c *singleFileConfig) { c.alwaysComplete = true } } -func newSingleFileSourceSet( +func NewSingleFileSourceSet( agent AgentType, roots []string, - opts ...singleFileOption, + opts ...SingleFileOption, ) singleFileSourceSet { cfg := singleFileConfig{} for _, opt := range opts { @@ -117,17 +117,17 @@ func newSingleFileSourceSet( } switch { case cfg.discoverFiles == nil: - panic("single-file source set: missing withFileDiscovery") + panic("single-file source set: missing WithFileDiscovery") case cfg.watchRoots == nil: - panic("single-file source set: missing withFileWatchRoots") + panic("single-file source set: missing WithFileWatchRoots") case cfg.classifyPath == nil: - panic("single-file source set: missing withFileChangedPathClassifier") + panic("single-file source set: missing WithFileChangedPathClassifier") case cfg.findFile == nil: - panic("single-file source set: missing withFileLookup") + panic("single-file source set: missing WithFileLookup") case cfg.fingerprint == nil: - panic("single-file source set: missing withFileFingerprint") + panic("single-file source set: missing WithFileFingerprint") case cfg.parseFile == nil: - panic("single-file source set: missing withFileParse") + panic("single-file source set: missing WithFileParse") } return singleFileSourceSet{ agent: agent, @@ -257,7 +257,7 @@ func (s singleFileSourceSet) Fingerprint( } // Parse resolves the request's source and parses its single file into one -// session. It satisfies the SourceSet interface; sourceSetProvider applies the +// session. It satisfies the SourceSet interface; SourceSetProvider applies the // request/config machine fallback before calling in, so req.Machine is already // resolved here. func (s singleFileSourceSet) Parse( @@ -334,15 +334,15 @@ func (s singleFileSourceSet) sourceFromRef( return singleFileSource{}, false } -// newSingleFileProviderFactory builds a ProviderFactory for a single-file -// provider. It is a thin adapter over the generic sourceSetFactory; the build +// NewSingleFileProviderFactory builds a ProviderFactory for a single-file +// provider. It is a thin adapter over the generic SourceSetFactory; the build // closure constructs the agent's configured source set. -func newSingleFileProviderFactory( +func NewSingleFileProviderFactory( def AgentDef, caps Capabilities, build func(cfg ProviderConfig) singleFileSourceSet, ) ProviderFactory { - return newSourceSetFactory( + return NewSourceSetFactory( def, caps, func(cfg ProviderConfig) SourceSet { return build(cfg) }, ) diff --git a/internal/parser/source_set.go b/internal/parser/source_set.go index 4d8fff320..047699241 100644 --- a/internal/parser/source_set.go +++ b/internal/parser/source_set.go @@ -4,8 +4,8 @@ import "context" // source_set.go provides the generic plumbing shared by every reusable // source-set base. A SourceSet owns source resolution and parsing for one -// provider; sourceSetProvider wraps any SourceSet into a full Provider, and -// sourceSetFactory builds those providers from an AgentDef + Capabilities + a +// provider; SourceSetProvider wraps any SourceSet into a full Provider, and +// SourceSetFactory builds those providers from an AgentDef + Capabilities + a // per-config constructor. // // The point is that a base such as multiSessionContainerSourceSet (or @@ -19,7 +19,7 @@ import "context" // SourceSet is the source-resolution and parse core that a Provider delegates // to. It is the Provider interface minus the Definition/Capabilities/config -// plumbing (supplied by sourceSetProvider) and minus ParseIncremental (which +// plumbing (supplied by SourceSetProvider) and minus ParseIncremental (which // falls through to the ProviderBase "unsupported" default until a base needs // it). type SourceSet interface { @@ -33,49 +33,49 @@ type SourceSet interface { Parse(context.Context, ParseRequest) (ParseOutcome, error) } -// sourceSetProvider adapts a SourceSet to the Provider interface. It supplies +// SourceSetProvider adapts a SourceSet to the Provider interface. It supplies // the AgentDef/Capabilities/config carried by ProviderBase, forwards the source // methods to the SourceSet, and applies the two provider-level normalizations // every provider performs: raw-session-ID injection on FindSource and the // machine fallback on Parse. ParseIncremental is inherited from ProviderBase // (unsupported) until a base opts in. -type sourceSetProvider struct { +type SourceSetProvider struct { ProviderBase sources SourceSet } -func (p *sourceSetProvider) Discover(ctx context.Context) ([]SourceRef, error) { +func (p *SourceSetProvider) Discover(ctx context.Context) ([]SourceRef, error) { return p.sources.Discover(ctx) } -func (p *sourceSetProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { +func (p *SourceSetProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { return p.sources.WatchPlan(ctx) } -func (p *sourceSetProvider) SourcesForChangedPath( +func (p *SourceSetProvider) SourcesForChangedPath( ctx context.Context, req ChangedPathRequest, ) ([]SourceRef, error) { return p.sources.SourcesForChangedPath(ctx, req) } -func (p *sourceSetProvider) FindSource( +func (p *SourceSetProvider) FindSource( ctx context.Context, req FindSourceRequest, ) (SourceRef, bool, error) { return p.sources.FindSource( - ctx, providerFindRequestWithRawSessionID(p.Def, req), + ctx, ProviderFindRequestWithRawSessionID(p.Def, req), ) } -func (p *sourceSetProvider) Fingerprint( +func (p *SourceSetProvider) Fingerprint( ctx context.Context, source SourceRef, ) (SourceFingerprint, error) { return p.sources.Fingerprint(ctx, source) } -func (p *sourceSetProvider) Parse( +func (p *SourceSetProvider) Parse( ctx context.Context, req ParseRequest, ) (ParseOutcome, error) { @@ -83,39 +83,39 @@ func (p *sourceSetProvider) Parse( return p.sources.Parse(ctx, req) } -// sourceSetFactory is the generic ProviderFactory for any SourceSet-backed +// SourceSetFactory is the generic ProviderFactory for any SourceSet-backed // provider. build constructs the SourceSet from the cloned per-provider config // (roots, machine, path rewriter), so a base captures whatever config it needs // in a closure rather than threading it through a struct. -type sourceSetFactory struct { +type SourceSetFactory struct { def AgentDef caps Capabilities build func(cfg ProviderConfig) SourceSet } -func newSourceSetFactory( +func NewSourceSetFactory( def AgentDef, caps Capabilities, build func(cfg ProviderConfig) SourceSet, ) ProviderFactory { - return sourceSetFactory{ + return SourceSetFactory{ def: cloneAgentDef(def), caps: caps, build: build, } } -func (f sourceSetFactory) Definition() AgentDef { +func (f SourceSetFactory) Definition() AgentDef { return cloneAgentDef(f.def) } -func (f sourceSetFactory) Capabilities() Capabilities { +func (f SourceSetFactory) Capabilities() Capabilities { return f.caps } -func (f sourceSetFactory) NewProvider(cfg ProviderConfig) Provider { +func (f SourceSetFactory) NewProvider(cfg ProviderConfig) Provider { cfg = cfg.Clone() - return &sourceSetProvider{ + return &SourceSetProvider{ ProviderBase: ProviderBase{ Def: cloneAgentDef(f.def), Caps: f.caps, From b77564d4518e1dbe67feb298529834b7ab37c137 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Fri, 26 Jun 2026 15:52:18 -0400 Subject: [PATCH 07/24] fix(parser): normalize JSONL RelPath to forward slashes filepath.Rel returns OS-native separators, so on Windows the JSONLSource RelPath came back with backslashes (nested\deleted.jsonl) while the rest of the parser keys, display paths, and tests use forward-slash relative paths. Normalize with filepath.ToSlash so RelPath is platform-stable; this is a no-op on Unix and fixes the Windows Go Test failure in TestJSONLSourceSetChangedPathClassifiesDeletedFiles. --- internal/parser/jsonl_source_set.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/internal/parser/jsonl_source_set.go b/internal/parser/jsonl_source_set.go index 2e8062059..ab10ac176 100644 --- a/internal/parser/jsonl_source_set.go +++ b/internal/parser/jsonl_source_set.go @@ -710,6 +710,11 @@ func (s JSONLSourceSet) sourceRefFromPath( if err != nil { return SourceRef{}, false } + // RelPath is a forward-slash key by convention, matching how the rest of + // the parser builds source keys and display paths. filepath.Rel returns + // OS-native separators, so normalize here; this is a no-op on Unix and + // keeps RelPath stable for Windows callers and tests. + rel = filepath.ToSlash(rel) displayPath := firstNonEmptyJSONLString( callPathFunc(s.options.DisplayPath, root, path), path, From 05c62d922124acc5737dbdd4ebdcc767f286da8a Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sun, 28 Jun 2026 00:22:17 -0400 Subject: [PATCH 08/24] feat(parser,sync): add S3 discovery scaffolding to the source-set framework The provider migration replaced each agent's DiscoverFunc, which handled both local and s3:// roots, with provider.Discover. The source-set framework this branch introduces had no way to carry an s3:// object's durable metadata through discovery, so a migrated agent that enumerated remote objects would lose the machine, size, mtime, and fingerprint the S3 sync path depends on. Add the shared pieces here, ahead of any agent that uses them: an S3DiscoveredSource Opaque payload plus s3SourceRefFromDiscoveredFile to build an s3:// SourceRef, and engine threading that copies that metadata back into the DiscoveredFile so the existing S3 sync path (object fetch, fingerprinting, machine-ID namespacing, freshness, dedup, mtime cutoff) sees the same identity legacy discovery emitted directly. Nothing produces these refs yet; the Claude and Codex source sets wire them up on their own migration branches. Also stop cleanJSONLRoots from running filepath.Clean on s3:// roots: Clean collapses the scheme to s3:/ and defeats the HasPrefix("s3://") checks that route discovery to the object store. --- internal/parser/jsonl_source_set.go | 7 ++++ internal/parser/jsonl_source_set_test.go | 41 ++++++++++++++++++++++++ internal/parser/s3source.go | 39 ++++++++++++++++++++++ internal/parser/s3source_test.go | 36 +++++++++++++++++++++ internal/sync/engine.go | 21 ++++++++++-- 5 files changed, 142 insertions(+), 2 deletions(-) diff --git a/internal/parser/jsonl_source_set.go b/internal/parser/jsonl_source_set.go index ab10ac176..155234452 100644 --- a/internal/parser/jsonl_source_set.go +++ b/internal/parser/jsonl_source_set.go @@ -852,6 +852,13 @@ func cleanJSONLRoots(roots []string) []string { if root == "" { continue } + // Preserve s3:// roots verbatim: filepath.Clean collapses the "//" in the + // scheme to "s3:/", which breaks the s3:// prefix checks that route + // discovery to the object store instead of the local filesystem. + if strings.HasPrefix(root, "s3://") { + cleaned = append(cleaned, root) + continue + } cleaned = append(cleaned, filepath.Clean(root)) } return cleaned diff --git a/internal/parser/jsonl_source_set_test.go b/internal/parser/jsonl_source_set_test.go index ef59f181a..d4c4367a1 100644 --- a/internal/parser/jsonl_source_set_test.go +++ b/internal/parser/jsonl_source_set_test.go @@ -444,3 +444,44 @@ func sourceDisplayPaths(sources []SourceRef) []string { } return paths } + +func TestCleanJSONLRootsPreservesS3Scheme(t *testing.T) { + tests := []struct { + name string + roots []string + want []string + }{ + { + name: "s3 root kept verbatim", + roots: []string{"s3://bucket/laptop/raw/claude"}, + want: []string{"s3://bucket/laptop/raw/claude"}, + }, + { + name: "s3 root with trailing slash kept verbatim", + roots: []string{"s3://bucket/laptop/raw/claude/"}, + want: []string{"s3://bucket/laptop/raw/claude/"}, + }, + { + name: "local roots still cleaned", + roots: []string{"/tmp/foo/../bar", "/tmp/baz/"}, + want: []string{"/tmp/bar", "/tmp/baz"}, + }, + { + name: "empty roots dropped", + roots: []string{"", "s3://bucket/x", ""}, + want: []string{"s3://bucket/x"}, + }, + { + name: "mixed local and s3", + roots: []string{"/tmp/a/./b", "s3://bucket/y/"}, + want: []string{"/tmp/a/b", "s3://bucket/y/"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // filepath.Clean would collapse "s3://" to "s3:/", which defeats the + // HasPrefix("s3://") checks that route discovery to the object store. + assert.Equal(t, tt.want, cleanJSONLRoots(tt.roots)) + }) + } +} diff --git a/internal/parser/s3source.go b/internal/parser/s3source.go index b3cbbd6cf..76764aa57 100644 --- a/internal/parser/s3source.go +++ b/internal/parser/s3source.go @@ -335,6 +335,45 @@ func combineS3Fingerprints(values ...string) string { return prefix + strings.Join(entries, sep) } +// S3DiscoveredSource is the Opaque payload an S3-aware source set attaches to a +// discovered s3:// SourceRef. It carries the durable object metadata the sync +// engine threads back into the DiscoveredFile so S3 freshness, dedup, mtime +// cutoff, and machine-ID namespacing operate on a provider-discovered S3 source +// exactly as they did when discovery emitted these fields directly. Providers +// read local files, so an S3 source is never fingerprinted or parsed through the +// provider feature path; the engine routes s3:// objects to the dedicated S3 +// sync path, which re-stats and fetches the object itself. +type S3DiscoveredSource struct { + URI string + Project string + Machine string + Size int64 + MtimeNS int64 + Fingerprint string +} + +// s3SourceRefFromDiscoveredFile builds the SourceRef for an s3:// session object +// enumerated by a source set's discovery. The s3 URI is the stable identity +// across Key, DisplayPath, and FingerprintKey, and the durable object metadata +// rides in the Opaque payload for the engine to thread into the DiscoveredFile. +func s3SourceRefFromDiscoveredFile(file DiscoveredFile) SourceRef { + return SourceRef{ + Provider: file.Agent, + Key: file.Path, + DisplayPath: file.Path, + FingerprintKey: file.Path, + ProjectHint: file.Project, + Opaque: S3DiscoveredSource{ + URI: file.Path, + Project: file.Project, + Machine: file.Machine, + Size: file.SourceSize, + MtimeNS: file.SourceMtime, + Fingerprint: file.SourceFingerprint, + }, + } +} + // discoverClaudeS3 lists Claude session JSONL under an s3:// projects root, // mirroring DiscoverClaudeProjects' selection rules: // - top-level /.jsonl (skip names starting "agent-") diff --git a/internal/parser/s3source_test.go b/internal/parser/s3source_test.go index 098b53ca3..48d6557cf 100644 --- a/internal/parser/s3source_test.go +++ b/internal/parser/s3source_test.go @@ -242,3 +242,39 @@ func TestDiscoverClaudeS3RequiresSubagentsUnderParentSession(t *testing.T) { got[0].Path, ) } + +func TestS3SourceRefFromDiscoveredFile(t *testing.T) { + uri := "s3://bucket/laptop/raw/codex/sessions/2026/06/abc.jsonl" + file := DiscoveredFile{ + Path: uri, + Agent: AgentCodex, + Project: "proj", + Machine: "laptop", + SourceSize: 4096, + SourceMtime: 1718900000000000000, + SourceFingerprint: "fp-1", + } + + ref := s3SourceRefFromDiscoveredFile(file) + + // The s3 URI is the stable identity across every key field so dedup and + // fingerprinting agree on one source. + assert.Equal(t, AgentCodex, ref.Provider) + assert.Equal(t, uri, ref.Key) + assert.Equal(t, uri, ref.DisplayPath) + assert.Equal(t, uri, ref.FingerprintKey) + assert.Equal(t, "proj", ref.ProjectHint) + + // The durable object metadata rides in the Opaque payload for the engine to + // thread back into the DiscoveredFile. + opaque, ok := ref.Opaque.(S3DiscoveredSource) + require.True(t, ok, "Opaque must be an S3DiscoveredSource") + assert.Equal(t, S3DiscoveredSource{ + URI: uri, + Project: "proj", + Machine: "laptop", + Size: 4096, + MtimeNS: 1718900000000000000, + Fingerprint: "fp-1", + }, opaque) +} diff --git a/internal/sync/engine.go b/internal/sync/engine.go index e72f33010..e437968a3 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -3557,13 +3557,30 @@ func (e *Engine) discoverProviderSources( agent = def.Type } sourceCopy := source - files = append(files, parser.DiscoveredFile{ + discovered := parser.DiscoveredFile{ Path: sourcePath, Project: source.ProjectHint, Agent: agent, ProviderSource: &sourceCopy, ProviderProcess: true, - }) + } + // S3-aware source sets carry the durable object metadata in the + // Opaque payload. Thread it into the DiscoveredFile so the S3 sync + // path (object fetch, fingerprinting, machine-ID namespacing) and the + // freshness/dedup/mtime-cutoff logic see the same source identity the + // legacy s3:// discovery emitted directly. Providers read local files, + // so the engine still routes s3:// objects through processS3Session + // rather than the provider parse path. + if s3, ok := source.Opaque.(parser.S3DiscoveredSource); ok { + discovered.Machine = s3.Machine + discovered.SourceSize = s3.Size + discovered.SourceMtime = s3.MtimeNS + discovered.SourceFingerprint = s3.Fingerprint + if discovered.Project == "" { + discovered.Project = s3.Project + } + } + files = append(files, discovered) } } return files, failures From d61c47e7021fd12a0cc9c16beb62ec3f15eba540 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sun, 28 Jun 2026 10:00:27 -0400 Subject: [PATCH 09/24] test(parser): make cleanJSONLRoots test OS-agnostic The local-root cases hard-coded forward-slash expectations, but filepath.Clean emits OS-native separators -- backslashes on Windows -- so the test failed the Windows CI job (\tmp\bar vs /tmp/bar). Build the expected local paths with filepath.FromSlash so the assertion matches on every platform; the s3:// cases stay verbatim because their preservation is separator-independent. --- internal/parser/jsonl_source_set_test.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/internal/parser/jsonl_source_set_test.go b/internal/parser/jsonl_source_set_test.go index d4c4367a1..f9a2b6f23 100644 --- a/internal/parser/jsonl_source_set_test.go +++ b/internal/parser/jsonl_source_set_test.go @@ -464,7 +464,10 @@ func TestCleanJSONLRootsPreservesS3Scheme(t *testing.T) { { name: "local roots still cleaned", roots: []string{"/tmp/foo/../bar", "/tmp/baz/"}, - want: []string{"/tmp/bar", "/tmp/baz"}, + // filepath.Clean yields OS-native separators (backslashes on + // Windows), so build the expectation with FromSlash rather than + // hard-coding forward slashes. + want: []string{filepath.FromSlash("/tmp/bar"), filepath.FromSlash("/tmp/baz")}, }, { name: "empty roots dropped", @@ -474,7 +477,7 @@ func TestCleanJSONLRootsPreservesS3Scheme(t *testing.T) { { name: "mixed local and s3", roots: []string{"/tmp/a/./b", "s3://bucket/y/"}, - want: []string{"/tmp/a/b", "s3://bucket/y/"}, + want: []string{filepath.FromSlash("/tmp/a/b"), "s3://bucket/y/"}, }, } for _, tt := range tests { From 4a971f9dc07d5254f5e987b212e5159d284ea5fd Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sun, 28 Jun 2026 10:00:29 -0400 Subject: [PATCH 10/24] fix(sync): clear ProviderProcess for discovered s3 sources discoverProviderSources stamped ProviderProcess: true on every provider source, including s3:// objects, even though the accompanying comment says s3 objects route through processS3Session. Providers read local files, so the provider Fingerprint/Parse path cannot service a remote object. The s3:// guard in processProviderFile already declines these once a provider emits them, but the flag contradicted the intent and relied solely on that downstream guard. Clear ProviderProcess for s3 sources at the point the metadata is threaded, so processProviderFile declines them via its ProviderSource-without-process check and they route through the dedicated S3 sync path regardless of the later HasPrefix guard. --- internal/sync/engine.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/sync/engine.go b/internal/sync/engine.go index e437968a3..37bce5a60 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -3569,13 +3569,15 @@ func (e *Engine) discoverProviderSources( // path (object fetch, fingerprinting, machine-ID namespacing) and the // freshness/dedup/mtime-cutoff logic see the same source identity the // legacy s3:// discovery emitted directly. Providers read local files, - // so the engine still routes s3:// objects through processS3Session - // rather than the provider parse path. + // so clear ProviderProcess for s3:// objects: processProviderFile must + // decline them so they route through processS3Session rather than the + // provider Fingerprint/Parse path, which cannot read a remote object. if s3, ok := source.Opaque.(parser.S3DiscoveredSource); ok { discovered.Machine = s3.Machine discovered.SourceSize = s3.Size discovered.SourceMtime = s3.MtimeNS discovered.SourceFingerprint = s3.Fingerprint + discovered.ProviderProcess = false if discovered.Project == "" { discovered.Project = s3.Project } From 033316d5c8e76e668a5310a22b71e1c5e887798f Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sun, 28 Jun 2026 10:47:05 -0400 Subject: [PATCH 11/24] fix(parser): re-resolve stale stored paths in single-file FindSource singleFileSourceSet.FindSource returned any stored-path/fingerprint-key hit the moment classifyPath accepted it by shape, ignoring RequireFreshSource and never checking that the path still exists. Single-file providers like Reasonix and Cowork classify purely on path shape, so a moved or deleted stored file_path was returned as found, short-circuiting the raw-ID re-resolution that would have located the live transcript -- causing single-session resync/source lookup to fail on a stale path. Mirror the multiSessionContainerSourceSet.FindSource freshness guard: under RequireFreshSource, skip a classified stored path that is not a regular file so the lookup falls through to raw-ID re-resolution. PreferStoredSource semantics for still-present paths are unchanged, since only RequireFreshSource gates it. --- internal/parser/single_file_source_set.go | 15 +++- .../parser/single_file_source_set_test.go | 85 +++++++++++++++++++ 2 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 internal/parser/single_file_source_set_test.go diff --git a/internal/parser/single_file_source_set.go b/internal/parser/single_file_source_set.go index 1144d1a7b..da89d58aa 100644 --- a/internal/parser/single_file_source_set.go +++ b/internal/parser/single_file_source_set.go @@ -214,9 +214,20 @@ func (s singleFileSourceSet) FindSource( continue } for _, root := range s.roots { - if match, ok := s.cfg.classifyPath(root, path, false); ok { - return s.sourceRef(root, match), true, nil + match, ok := s.cfg.classifyPath(root, path, false) + if !ok { + continue + } + // classifyPath accepts a stored single-file path by shape, without + // confirming it still exists. A fresh-source lookup must not return a + // moved or deleted transcript: fall through to raw-ID re-resolution + // instead, mirroring the multiSessionContainerSourceSet.FindSource + // freshness guard. Only RequireFreshSource gates this, so + // PreferStoredSource semantics for still-present paths are unchanged. + if req.RequireFreshSource && !IsRegularFile(match.Path) { + continue } + return s.sourceRef(root, match), true, nil } } if req.RawSessionID == "" { diff --git a/internal/parser/single_file_source_set_test.go b/internal/parser/single_file_source_set_test.go new file mode 100644 index 000000000..656e9c07f --- /dev/null +++ b/internal/parser/single_file_source_set_test.go @@ -0,0 +1,85 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// newShapeOnlyTestSingleFileSourceSet builds a single-file source set whose +// classifyPath accepts a stored path by SHAPE alone (no on-disk check, as +// Reasonix and Cowork do) and whose findFile re-resolves a raw ID to the live +// file. Only the FindSource-relevant hooks carry real behavior; the rest are +// inert stubs required by the constructor. +func newShapeOnlyTestSingleFileSourceSet(root, livePath string) singleFileSourceSet { + return NewSingleFileSourceSet( + AgentReasonix, + []string{root}, + WithFileDiscovery(func(string) []singleFileMatch { return nil }), + WithFileWatchRoots(func([]string) []WatchRoot { return nil }), + WithFileChangedPathClassifier( + func(_, path string, _ bool) (singleFileMatch, bool) { + if path == "" { + return singleFileMatch{}, false + } + return singleFileMatch{Path: path}, true + }, + ), + WithFileLookup(func(_, rawID string) (singleFileMatch, bool) { + if rawID != "" && IsRegularFile(livePath) { + return singleFileMatch{Path: livePath}, true + } + return singleFileMatch{}, false + }), + WithFileFingerprint( + func(singleFileSource) (SourceFingerprint, error) { + return SourceFingerprint{}, nil + }, + ), + WithFileParse( + func(singleFileSource, ParseRequest) ([]ParseResult, []string, error) { + return nil, nil, nil + }, + ), + ) +} + +// TestSingleFileFindSourceRejectsStaleStoredPath verifies the fresh-source guard +// in singleFileSourceSet.FindSource: a stored path that classifies by shape but +// no longer exists must not be returned under RequireFreshSource; the lookup +// falls through to raw-ID re-resolution to the live file. Without +// RequireFreshSource the stored path is honored, preserving prior behavior. +func TestSingleFileFindSourceRejectsStaleStoredPath(t *testing.T) { + root := t.TempDir() + livePath := filepath.Join(root, "archive", "sess.jsonl") + require.NoError(t, os.MkdirAll(filepath.Dir(livePath), 0o755)) + require.NoError(t, os.WriteFile(livePath, []byte("{}\n"), 0o644)) + stalePath := filepath.Join(root, "sessions", "sess.jsonl") // never created + + s := newShapeOnlyTestSingleFileSourceSet(root, livePath) + + src, ok, err := s.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: stalePath, + FingerprintKey: stalePath, + RawSessionID: "sess", + RequireFreshSource: true, + }) + require.NoError(t, err) + require.True(t, ok, "raw-ID re-resolution should still find the live file") + assert.Equal(t, livePath, src.DisplayPath, + "a stale stored path must re-resolve to the live file under RequireFreshSource") + + src2, ok2, err := s.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: stalePath, + FingerprintKey: stalePath, + RawSessionID: "sess", + }) + require.NoError(t, err) + require.True(t, ok2) + assert.Equal(t, stalePath, src2.DisplayPath, + "without RequireFreshSource the stored-path hint is honored unchanged") +} From 0521aab98ab37b89e1359cfa9c3333d1b3364c57 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Wed, 24 Jun 2026 21:34:50 -0400 Subject: [PATCH 12/24] feat(parser): migrate commandcode and iflow providers Command Code and iFlow both fit the directory JSONL source shape, so moving them together proves the helper against real providers without mixing in nested layouts like Qwen or composite providers like WorkBuddy. The providers keep source discovery, changed-path classification, persisted lookup, fingerprinting, and parse normalization behind concrete facade implementations while preserving the legacy parser functions for current runtime callers. fix(parser): preserve JSONL provider symlink discovery Command Code and iFlow legacy discovery followed symlinked project directories. The migrated providers should keep that behavior so users with linked project roots do not silently lose discovery or raw-session lookup after moving onto the provider facade. test(parser): opt commandcode iflow into provider shadow CommandCode and iFlow now have concrete facade providers on this branch, so keeping them legacy-only would let the migration branch remain additive instead of exercised by the shared provider harness. This makes the migration manifest fail closed for the providers introduced here while leaving unrelated providers for their own stack branches. Validation: go test -tags "fts5" ./internal/parser -run TestProviderMigrationModes -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(sync): compare commandcode iflow shadow parity Command Code and iFlow now opt into shadow comparison, so their provider branch should prove more than provider-local parsing. Add source-level migration tests that run ObserveProviderSource and compare the normalized provider output against the legacy parser functions for both agents. Validation: go fmt ./...; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/... refactor(parser): fold commandcode and iflow into provider Move Command Code and iFlow parse ownership onto their concrete providers and delete the package-level discover/find/parse entrypoints plus the legacy sync dispatch for both agents. Both agents become provider-authoritative so runtime sync routes through provider changed-path classification and processProviderFile instead of the removed processCommandCode/processIflow methods. Command Code: - parseSession moves onto the provider; DiscoverCommandCodeSessions, FindCommandCodeSourceFile, and ParseCommandCodeSession are removed. - The provider reproduces the legacy .meta.json companion behavior: WatchPlan includes *.meta.json, SourcesForChangedPath remaps a changed .meta.json back to its .jsonl transcript, the composite Fingerprint folds the companion size, mtime, and content into the freshness identity, and Parse overrides File.Size/File.Mtime with the combined transcript+meta effective info. commandCodeEffectiveInfo stays in the engine for the SourceMtime watcher fallback. iFlow: - parseSession moves onto the provider; DiscoverIflowProjects, FindIflowSourceFile, and ParseIflowSession are removed. - Parse mirrors the legacy sync path: it resolves the project from the recorded cwd and git branch (falling back to GetProjectName of the project directory), applies InferRelationshipTypes to derive continuation/subagent links, and enables source content hashing so File.Hash matches the legacy ComputeFileHash value. Tests move from the deleted free functions to provider API coverage, add guard tests asserting the legacy entrypoints stay gone, drop the shadow comparison test, and remove both provider files from the pending-shim scan list. fix(parser): preserve commandcode file hash parity Command Code needs a composite provider fingerprint so metadata-only edits invalidate freshness, but that value should not replace the persisted transcript content hash. The legacy sync path stored the SHA-256 of the transcript file in file_hash, and changing that semantic would make metadata-only edits look like transcript content changes.\n\nKeep the composite value scoped to SourceFingerprint and recompute Session.File.Hash from the transcript during provider parse. The provider test now exercises Fingerprint -> Parse with a .meta.json companion to prove the two hashes remain distinct.\n\nValidation: go test -tags "fts5" ./internal/parser -run TestCommandCodeProvider -count=1; go test -tags "fts5" ./internal/parser -count=1; go test -tags "fts5" ./internal/sync -run 'Test.*CommandCode|Test.*Iflow' -count=1; go vet ./...; git diff --check fix(parser): thread ctx through commandcode and iflow source lookups --- internal/parser/commandcode.go | 80 +---- internal/parser/commandcode_provider.go | 321 +++++++++++++++++++ internal/parser/commandcode_provider_test.go | 171 ++++++++++ internal/parser/commandcode_test.go | 60 +++- internal/parser/discovery.go | 76 ----- internal/parser/iflow.go | 4 +- internal/parser/iflow_parser_test.go | 25 +- internal/parser/iflow_provider.go | 105 ++++++ internal/parser/iflow_provider_test.go | 147 +++++++++ internal/parser/provider.go | 16 +- internal/parser/provider_capabilities.go | 15 + internal/parser/provider_migration.go | 4 +- internal/parser/provider_shim_scan_test.go | 2 - internal/parser/types.go | 32 +- internal/parser/types_test.go | 7 +- internal/sync/engine.go | 148 --------- internal/sync/engine_test.go | 6 +- internal/sync/iflow_discovery_test.go | 64 ++-- 18 files changed, 912 insertions(+), 371 deletions(-) create mode 100644 internal/parser/commandcode_provider.go create mode 100644 internal/parser/commandcode_provider_test.go create mode 100644 internal/parser/iflow_provider.go create mode 100644 internal/parser/iflow_provider_test.go create mode 100644 internal/parser/provider_capabilities.go diff --git a/internal/parser/commandcode.go b/internal/parser/commandcode.go index 314d13f0b..a952fc937 100644 --- a/internal/parser/commandcode.go +++ b/internal/parser/commandcode.go @@ -5,7 +5,6 @@ import ( "fmt" "os" "path/filepath" - "sort" "strconv" "strings" "time" @@ -20,83 +19,8 @@ type commandCodeMeta struct { Cwd string `json:"cwd"` } -// DiscoverCommandCodeSessions finds Command Code transcripts under -// ~/.commandcode/projects//.jsonl. -func DiscoverCommandCodeSessions(projectsDir string) []DiscoveredFile { - if projectsDir == "" { - return nil - } - - projectEntries, err := os.ReadDir(projectsDir) - if err != nil { - return nil - } - - var files []DiscoveredFile - for _, entry := range projectEntries { - if !isDirOrSymlink(entry, projectsDir) { - continue - } - - projectDir := filepath.Join(projectsDir, entry.Name()) - sessionEntries, err := os.ReadDir(projectDir) - if err != nil { - continue - } - - for _, sessionEntry := range sessionEntries { - if sessionEntry.IsDir() { - continue - } - name := sessionEntry.Name() - if !strings.HasSuffix(name, ".jsonl") || - strings.HasSuffix(name, ".checkpoints.jsonl") || - strings.HasSuffix(name, ".prompts.jsonl") { - continue - } - id := strings.TrimSuffix(name, ".jsonl") - if !IsValidSessionID(id) { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(projectDir, name), - Agent: AgentCommandCode, - }) - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindCommandCodeSourceFile locates a Command Code transcript by -// its raw session ID (without the "commandcode:" prefix). -func FindCommandCodeSourceFile(projectsDir, rawID string) string { - if projectsDir == "" || !IsValidSessionID(rawID) { - return "" - } - - projectEntries, err := os.ReadDir(projectsDir) - if err != nil { - return "" - } - for _, entry := range projectEntries { - if !isDirOrSymlink(entry, projectsDir) { - continue - } - - candidate := filepath.Join(projectsDir, entry.Name(), rawID+".jsonl") - if info, err := os.Stat(candidate); err == nil && !info.IsDir() { - return candidate - } - } - return "" -} - -// ParseCommandCodeSession parses a Command Code JSONL transcript. -func ParseCommandCodeSession( +// parseSession parses a Command Code JSONL transcript. +func (p *commandCodeProvider) parseSession( path, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) diff --git a/internal/parser/commandcode_provider.go b/internal/parser/commandcode_provider.go new file mode 100644 index 000000000..2125a5ed7 --- /dev/null +++ b/internal/parser/commandcode_provider.go @@ -0,0 +1,321 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*commandCodeProvider)(nil) + +type commandCodeProviderFactory struct { + def AgentDef +} + +func newCommandCodeProviderFactory(def AgentDef) ProviderFactory { + return commandCodeProviderFactory{def: cloneAgentDef(def)} +} + +func (f commandCodeProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f commandCodeProviderFactory) Capabilities() Capabilities { + return commandCodeProviderCapabilities() +} + +func (f commandCodeProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &commandCodeProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: commandCodeProviderCapabilities(), + Config: cfg, + }, + sources: newCommandCodeSourceSet(cfg.Roots), + } +} + +type commandCodeProvider struct { + ProviderBase + sources DirectoryJSONLSourceSet +} + +func (p *commandCodeProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *commandCodeProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + plan, err := p.sources.WatchPlan(ctx) + if err != nil { + return WatchPlan{}, err + } + for i := range plan.Roots { + plan.Roots[i].IncludeGlobs = append( + plan.Roots[i].IncludeGlobs, + "*.meta.json", + ) + } + return plan, nil +} + +func (p *commandCodeProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + sources, err := p.sources.SourcesForChangedPath(ctx, req) + if err != nil || len(sources) > 0 { + return sources, err + } + if source, ok, err := p.sourceForMetaCompanion(ctx, req); err != nil { + return nil, err + } else if ok { + return []SourceRef{source}, nil + } + return nil, nil +} + +func (p *commandCodeProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + return p.sources.FindSource(ctx, providerFindRequestWithRawSessionID(p.Def, req)) +} + +func (p *commandCodeProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok, err := p.sources.pathFromSource(ctx, source) + if err != nil { + return SourceFingerprint{}, err + } + if !ok { + return SourceFingerprint{}, fmt.Errorf("commandcode source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString( + source.FingerprintKey, + source.Key, + path, + ), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + } + + h := sha256.New() + if err := addCommandCodeFingerprintPart(h, "transcript", path, info); err != nil { + return SourceFingerprint{}, err + } + metaPath := commandCodeMetaCompanionPath(path) + if metaInfo, ok, err := commandCodeCompanionInfo(metaPath); err != nil { + return SourceFingerprint{}, err + } else if ok && metaInfo != nil { + fingerprint.Size += metaInfo.Size() + if mtime := metaInfo.ModTime().UnixNano(); mtime > fingerprint.MTimeNS { + fingerprint.MTimeNS = mtime + } + if err := addCommandCodeFingerprintPart(h, "meta", metaPath, metaInfo); err != nil { + return SourceFingerprint{}, err + } + } + fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) + return fingerprint, nil +} + +func (p *commandCodeProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok, err := p.sources.pathFromSource(ctx, req.Source) + if err != nil { + return ParseOutcome{}, err + } + if !ok { + return ParseOutcome{}, fmt.Errorf("commandcode source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if hash, err := hashJSONLSourceFile(path); err == nil { + sess.File.Hash = hash + } + // Mirror the legacy effective-info behavior: the transcript's + // freshness identity (size and mtime) includes the .meta.json + // companion so a title-only rename triggers a reparse. + if size, mtime, ok := commandCodeEffectiveFileInfo(path); ok { + sess.File.Size = size + sess.File.Mtime = mtime + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +func newCommandCodeSourceSet(roots []string) DirectoryJSONLSourceSet { + return newDirectoryJSONLSourceSet(AgentCommandCode, roots, + withSymlinkFollowing(), + withIncludePath(isCommandCodeSourcePath), + withProjectHint(func(root, path string) string { return "" }), + withSessionIDFromPath(commandCodeSessionIDFromPath), + ) +} + +func (p *commandCodeProvider) sourceForMetaCompanion( + ctx context.Context, + req ChangedPathRequest, +) (SourceRef, bool, error) { + if req.Path == "" { + return SourceRef{}, false, nil + } + path := filepath.Clean(req.Path) + stem, ok := strings.CutSuffix(filepath.Base(path), ".meta.json") + if !ok || !IsValidSessionID(stem) { + return SourceRef{}, false, nil + } + transcriptPath := filepath.Join(filepath.Dir(path), stem+".jsonl") + if _, err := os.Stat(transcriptPath); err != nil { + return SourceRef{}, false, nil + } + source, ok, err := p.sources.sourceForPath(ctx, transcriptPath) + if err != nil { + return SourceRef{}, false, err + } + if !ok { + return SourceRef{}, false, nil + } + if req.WatchRoot != "" { + root := filepath.Clean(req.WatchRoot) + src := source.Opaque.(JSONLSource) + if !samePath(root, src.Root) { + return SourceRef{}, false, nil + } + } + return source, true, nil +} + +func isCommandCodeSourcePath(root, path string) bool { + name := filepath.Base(path) + if !strings.HasSuffix(name, ".jsonl") || + strings.HasSuffix(name, ".checkpoints.jsonl") || + strings.HasSuffix(name, ".prompts.jsonl") { + return false + } + return IsValidSessionID(strings.TrimSuffix(name, ".jsonl")) +} + +func commandCodeSessionIDFromPath(root, path string) string { + name := filepath.Base(path) + if !isCommandCodeSourcePath(root, path) { + return "" + } + return strings.TrimSuffix(name, ".jsonl") +} + +func commandCodeMetaCompanionPath(path string) string { + return strings.TrimSuffix(path, ".jsonl") + ".meta.json" +} + +// commandCodeEffectiveFileInfo returns the combined size and mtime of the +// transcript and its optional .meta.json companion. The bool is false only +// when the transcript itself cannot be stat'd. +func commandCodeEffectiveFileInfo(path string) (int64, int64, bool) { + info, err := os.Stat(path) + if err != nil { + return 0, 0, false + } + size := info.Size() + mtime := info.ModTime().UnixNano() + if metaInfo, ok, err := commandCodeCompanionInfo( + commandCodeMetaCompanionPath(path), + ); err == nil && ok && metaInfo != nil { + size += metaInfo.Size() + if metaMtime := metaInfo.ModTime().UnixNano(); metaMtime > mtime { + mtime = metaMtime + } + } + return size, mtime, true +} + +func commandCodeCompanionInfo(path string) (os.FileInfo, bool, error) { + info, err := os.Stat(path) + if os.IsNotExist(err) { + return nil, false, nil + } + if err != nil { + return nil, false, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return nil, false, nil + } + return info, true, nil +} + +func addCommandCodeFingerprintPart( + h interface{ Write([]byte) (int, error) }, + label string, + path string, + info os.FileInfo, +) error { + hash, err := hashJSONLSourceFile(path) + if err != nil { + return err + } + _, _ = fmt.Fprintf( + h, + "%s:%s:%d:%d:%s\n", + label, + filepath.Base(path), + info.Size(), + info.ModTime().UnixNano(), + hash, + ) + return nil +} + +func commandCodeProviderCapabilities() Capabilities { + return Capabilities{ + Source: jsonlFileProviderSourceCapabilities(), + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Cwd: CapabilitySupported, + GitBranch: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + MalformedLineCount: CapabilitySupported, + }, + } +} diff --git a/internal/parser/commandcode_provider_test.go b/internal/parser/commandcode_provider_test.go new file mode 100644 index 000000000..465699ad7 --- /dev/null +++ b/internal/parser/commandcode_provider_test.go @@ -0,0 +1,171 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCommandCodeProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentCommandCode) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentCommandCode, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestCommandCodeProviderSourceMethods(t *testing.T) { + root := t.TempDir() + projectDir := filepath.Join(root, "users-alice-code-sample-project") + sourcePath := filepath.Join(projectDir, "sess_123.jsonl") + writeSourceFile(t, sourcePath, commandCodeProviderFixture()) + writeSourceFile(t, filepath.Join(projectDir, "sess_123.checkpoints.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(projectDir, "sess_123.prompts.jsonl"), "{}\n") + + provider, ok := NewProvider(AgentCommandCode, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, AgentCommandCode, discovered[0].Provider) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + assert.Empty(t, discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~commandcode:sess_123", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + FingerprintKey: sourcePath, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + require.NoError(t, os.Remove(sourcePath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestCommandCodeProviderDiscoversSymlinkedProjectDirectory(t *testing.T) { + root := t.TempDir() + realProjectDir := filepath.Join(t.TempDir(), "real-project") + linkProjectDir := filepath.Join(root, "linked-project") + if err := os.Symlink(realProjectDir, linkProjectDir); err != nil { + t.Skipf("symlink not supported: %v", err) + } + sourcePath := filepath.Join(linkProjectDir, "sess_123.jsonl") + writeSourceFile(t, filepath.Join(realProjectDir, "sess_123.jsonl"), commandCodeProviderFixture()) + + provider, ok := NewProvider(AgentCommandCode, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "sess_123", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) +} + +func TestCommandCodeProviderParse(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "project", "sess_123.jsonl") + transcript := commandCodeProviderFixture() + writeSourceFile(t, sourcePath, transcript) + + provider, ok := NewProvider(AgentCommandCode, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: SourceFingerprint{ + Key: sourcePath, + }, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + assert.Equal(t, DataVersionCurrent, outcome.Results[0].DataVersion) + assert.Equal(t, "commandcode:sess_123", outcome.Results[0].Result.Session.ID) + assert.Equal(t, "devbox", outcome.Results[0].Result.Session.Machine) + assert.Equal(t, + fmt.Sprintf("%x", sha256.Sum256([]byte(transcript))), + outcome.Results[0].Result.Session.File.Hash, + ) + assert.Len(t, outcome.Results[0].Result.Messages, 2) +} + +func TestCommandCodeProviderParsePreservesTranscriptFileHash(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "project", "sess_123.jsonl") + transcript := commandCodeProviderFixture() + writeSourceFile(t, sourcePath, transcript) + writeSourceFile(t, commandCodeMetaCompanionPath(sourcePath), `{"title":"Renamed"}`) + + provider, ok := NewProvider(AgentCommandCode, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + transcriptHash := fmt.Sprintf("%x", sha256.Sum256([]byte(transcript))) + require.NotEqual(t, transcriptHash, fingerprint.Hash, + "fixture must prove metadata participates in freshness separately") + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + assert.Equal(t, transcriptHash, outcome.Results[0].Result.Session.File.Hash) +} + +func commandCodeProviderFixture() string { + return `{"id":"m1","timestamp":"2026-06-01T10:00:00Z","sessionId":"sess_123","role":"user","content":[{"type":"text","text":"Inspect server logs"}],"gitBranch":"feature/command-code","metadata":{"version":2,"cwd":"/Users/alice/code/sample-project"}} +{"id":"m2","timestamp":"2026-06-01T10:00:03Z","sessionId":"sess_123","role":"assistant","content":[{"type":"text","text":"The error is in the startup path."}],"gitBranch":"feature/command-code","metadata":{"version":2}}` +} diff --git a/internal/parser/commandcode_test.go b/internal/parser/commandcode_test.go index f3aa5d0e6..1fce65f91 100644 --- a/internal/parser/commandcode_test.go +++ b/internal/parser/commandcode_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "os" "path/filepath" "strings" @@ -10,7 +11,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestDiscoverCommandCodeSessions(t *testing.T) { +func TestCommandCodeProviderDiscoversSessions(t *testing.T) { t.Parallel() root := t.TempDir() @@ -22,13 +23,17 @@ func TestDiscoverCommandCodeSessions(t *testing.T) { require.NoError(t, os.WriteFile(filepath.Join(projectDir, "sess_a.prompts.jsonl"), []byte("{}\n"), 0o644)) require.NoError(t, os.WriteFile(filepath.Join(projectDir, "notes.txt"), []byte("ignore"), 0o644)) - files := DiscoverCommandCodeSessions(root) - require.Len(t, files, 1) - assert.Equal(t, AgentCommandCode, files[0].Agent) - assert.Equal(t, filepath.Join(projectDir, "sess_a.jsonl"), files[0].Path) + provider, ok := NewProvider(AgentCommandCode, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + assert.Equal(t, AgentCommandCode, sources[0].Provider) + assert.Equal(t, filepath.Join(projectDir, "sess_a.jsonl"), sources[0].DisplayPath) } -func TestFindCommandCodeSourceFile(t *testing.T) { +func TestCommandCodeProviderFindsSourceFile(t *testing.T) { t.Parallel() root := t.TempDir() @@ -37,11 +42,24 @@ func TestFindCommandCodeSourceFile(t *testing.T) { path := filepath.Join(projectDir, "sess_123.jsonl") require.NoError(t, os.WriteFile(path, []byte("{}\n"), 0o644)) - assert.Equal(t, path, FindCommandCodeSourceFile(root, "sess_123")) - assert.Empty(t, FindCommandCodeSourceFile(root, "sess_missing")) + provider, ok := NewProvider(AgentCommandCode, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "sess_123", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, path, found.DisplayPath) + + _, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "sess_missing", + }) + require.NoError(t, err) + assert.False(t, ok) } -func TestParseCommandCodeSession(t *testing.T) { +func TestCommandCodeProviderParsesSession(t *testing.T) { t.Parallel() content := `{"id":"m1","timestamp":"2026-06-01T10:00:00Z","sessionId":"sess_123","role":"user","content":[{"type":"text","text":"Inspect server logs"}],"gitBranch":"feature/command-code","metadata":{"version":2,"cwd":"/Users/alice/code/sample-project"}} @@ -49,13 +67,31 @@ func TestParseCommandCodeSession(t *testing.T) { {"id":"m3","timestamp":"2026-06-01T10:00:02Z","sessionId":"sess_123","role":"tool","content":[{"type":"tool-result","toolCallId":"tc1","toolName":"Read","output":{"type":"text","value":"error: boom"}}],"gitBranch":"feature/command-code","metadata":{"version":2}} {"id":"m4","timestamp":"2026-06-01T10:00:03Z","sessionId":"sess_123","role":"assistant","content":[{"type":"text","text":"The error is in the startup path."}],"gitBranch":"feature/command-code","metadata":{"version":2}}` - path := createTestFile(t, "commandcode.jsonl", content) + root := t.TempDir() + path := filepath.Join(root, "project", "sess_123.jsonl") + writeSourceFile(t, path, content) metaPath := strings.TrimSuffix(path, ".jsonl") + ".meta.json" require.NoError(t, os.WriteFile(metaPath, []byte(`{"title":"Startup investigation"}`), 0o644)) - sess, msgs, err := ParseCommandCodeSession(path, "local") + provider, ok := NewProvider(AgentCommandCode, ProviderConfig{ + Roots: []string{root}, + Machine: "local", + }) + require.True(t, ok) + source, found, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "sess_123", + }) require.NoError(t, err) - require.NotNil(t, sess) + require.True(t, found) + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: source, + Machine: "local", + }) + require.NoError(t, err) + require.Len(t, outcome.Results, 1) + + sess := outcome.Results[0].Result.Session + msgs := outcome.Results[0].Result.Messages require.Len(t, msgs, 4) assert.Equal(t, "commandcode:sess_123", sess.ID) diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index 016690792..565bdbba3 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -2354,49 +2354,6 @@ func FindQClawSourceFile(agentsDir, rawID string) string { return "" } -// DiscoverIflowProjects finds all project directories under the -// iFlow projects dir and returns their JSONL session files. -// iFlow stores sessions in .iflow/projects//session-.jsonl -func DiscoverIflowProjects(projectsDir string) []DiscoveredFile { - entries, err := os.ReadDir(projectsDir) - if err != nil { - return nil - } - - var files []DiscoveredFile - for _, entry := range entries { - if !isDirOrSymlink(entry, projectsDir) { - continue - } - - projDir := filepath.Join(projectsDir, entry.Name()) - sessionFiles, err := os.ReadDir(projDir) - if err != nil { - continue - } - - for _, sf := range sessionFiles { - if sf.IsDir() { - continue - } - name := sf.Name() - if !strings.HasPrefix(name, "session-") || !strings.HasSuffix(name, ".jsonl") { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(projDir, name), - Project: entry.Name(), - Agent: AgentIflow, - }) - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - // extractIflowBaseSessionID extracts the base session ID from an iFlow // session ID. Fork IDs are formatted as -, so we // remove the child UUID suffix to get the base session ID for file lookup. @@ -2424,39 +2381,6 @@ func extractIflowBaseSessionID(sessionID string) string { return sessionID } -// FindIflowSourceFile finds the original JSONL file for an iFlow -// session ID by searching all project directories. -func FindIflowSourceFile( - projectsDir, sessionID string, -) string { - if !IsValidSessionID(sessionID) { - return "" - } - - // For fork IDs, extract the base session ID to find the source file - baseID := extractIflowBaseSessionID(sessionID) - - entries, err := os.ReadDir(projectsDir) - if err != nil { - return "" - } - - target := "session-" + strings.TrimPrefix(baseID, "iflow:") + ".jsonl" - for _, entry := range entries { - if !isDirOrSymlink(entry, projectsDir) { - continue - } - candidate := filepath.Join( - projectsDir, entry.Name(), target, - ) - if _, err := os.Stat(candidate); err == nil { - return candidate - } - } - - return "" -} - // DiscoverVibeSessions finds all Vibe session files under the given root directory. // Vibe stores sessions in: ~/.vibe/logs/session/session_YYYYMMDD_HHMMSS_uuid/ // Each session directory contains messages.jsonl diff --git a/internal/parser/iflow.go b/internal/parser/iflow.go index eb502871a..f9c08e5cf 100644 --- a/internal/parser/iflow.go +++ b/internal/parser/iflow.go @@ -24,12 +24,12 @@ type dagEntryIflow struct { timestamp time.Time } -// ParseIflowSession parses an iFlow JSONL session file. +// parseSession parses an iFlow JSONL session file. // Returns a single ParseResult. Unlike Claude, iFlow's // uuid/parentUuid DAG represents streaming incremental updates // (sliding-window snapshots), not conversation forks, so fork // splitting is intentionally not applied. -func ParseIflowSession( +func parseIflowSession( path, project, machine string, ) ([]ParseResult, error) { info, err := os.Stat(path) diff --git a/internal/parser/iflow_parser_test.go b/internal/parser/iflow_parser_test.go index 8cf23170e..f78d504d2 100644 --- a/internal/parser/iflow_parser_test.go +++ b/internal/parser/iflow_parser_test.go @@ -10,6 +10,16 @@ import ( "github.com/stretchr/testify/require" ) +// parseIflowSessionForTest exercises the iFlow provider's transcript parsing +// in isolation, passing the project directly so tests can assert parse +// behavior without project-resolution enrichment. +func parseIflowSessionForTest( + t *testing.T, path, project, machine string, +) ([]ParseResult, error) { + t.Helper() + return parseIflowSession(path, project, machine) +} + func TestParseIflowSession(t *testing.T) { tests := []struct { name string @@ -29,7 +39,8 @@ func TestParseIflowSession(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - results, err := ParseIflowSession( + results, err := parseIflowSessionForTest( + t, tt.filename, "test-project", "local", @@ -68,7 +79,8 @@ func TestExtractIflowProjectHints(t *testing.T) { } func TestIflowSystemMessageFiltering(t *testing.T) { - results, err := ParseIflowSession( + results, err := parseIflowSessionForTest( + t, "testdata/iflow/session-5de701fc-7454-4858-a249-95cac4fd3b51.jsonl", "test-project", "local", @@ -90,7 +102,8 @@ func TestIflowSystemMessageFiltering(t *testing.T) { } func TestIflowToolCallParsing(t *testing.T) { - results, err := ParseIflowSession( + results, err := parseIflowSessionForTest( + t, "testdata/iflow/session-5de701fc-7454-4858-a249-95cac4fd3b51.jsonl", "test-project", "local", @@ -121,7 +134,8 @@ func TestIflowToolCallParsing(t *testing.T) { } func TestIflowBurstMerge(t *testing.T) { - results, err := ParseIflowSession( + results, err := parseIflowSessionForTest( + t, "testdata/iflow/session-5de701fc-7454-4858-a249-95cac4fd3b51.jsonl", "test-project", "local", @@ -283,7 +297,8 @@ func TestIflowBurstBoundary(t *testing.T) { } func TestIflowTimestampParsing(t *testing.T) { - results, err := ParseIflowSession( + results, err := parseIflowSessionForTest( + t, "testdata/iflow/session-5de701fc-7454-4858-a249-95cac4fd3b51.jsonl", "test-project", "local", diff --git a/internal/parser/iflow_provider.go b/internal/parser/iflow_provider.go new file mode 100644 index 000000000..7a28b7ef7 --- /dev/null +++ b/internal/parser/iflow_provider.go @@ -0,0 +1,105 @@ +package parser + +import ( + "context" + "path/filepath" + "strings" +) + +// iFlow stores each chat as a JSONL transcript named session-.jsonl in a +// per-project directory. It is a directory-of-files provider: discovery, +// watching, change classification, lookup, and fingerprinting come from +// DirectoryJSONLSourceSet. The ParseFile option makes that source set a full +// SourceSet so it rides the generic factory; RawSessionIDForLookup strips the +// subagent suffix from stored IDs so FindSource still matches the base file. +func newIflowProviderFactory(def AgentDef) ProviderFactory { + return newSourceSetFactory( + def, + iflowProviderCapabilities(), + func(cfg ProviderConfig) SourceSet { return newIflowSourceSet(cfg.Roots) }, + ) +} + +func newIflowSourceSet(roots []string) DirectoryJSONLSourceSet { + return newDirectoryJSONLSourceSet(AgentIflow, roots, + withContentHashing(), + withSymlinkFollowing(), + withIncludePath(isIflowSourcePath), + withSessionIDFromPath(iflowSessionIDFromPath), + withRawSessionIDForLookup(extractIflowBaseSessionID), + withParseFile(iflowParseFile), + ) +} + +func iflowParseFile( + ctx context.Context, path string, req ParseRequest, +) ([]ParseResult, []string, error) { + project := iflowResolveProject(ctx, req.Source, path) + results, err := parseIflowSession(path, project, req.Machine) + if err != nil { + return nil, nil, err + } + if len(results) == 0 { + return nil, nil, nil + } + // Mirror the legacy sync path: derive continuation/subagent + // relationship types from parent linkage before emitting. + InferRelationshipTypes(results) + if req.Fingerprint.Hash != "" { + for i := range results { + results[i].Session.File.Hash = req.Fingerprint.Hash + } + } + return results, nil, nil +} + +// iflowResolveProject mirrors the legacy sync project resolution for iFlow: +// start from the project directory name, then prefer a canonical project +// derived from the session's recorded cwd and git branch when available. +func iflowResolveProject( + ctx context.Context, + source SourceRef, + path string, +) string { + dirName := firstNonEmptyJSONLString( + source.ProjectHint, + directoryJSONLProjectFromPath(path), + ) + project := GetProjectName(dirName) + + cwd, gitBranch := ExtractIflowProjectHints(path) + if cwd != "" { + if p := ExtractProjectFromCwdWithBranchContext( + ctx, cwd, gitBranch, + ); p != "" { + project = p + } + } + return project +} + +func isIflowSourcePath(root, path string) bool { + name := filepath.Base(path) + return strings.HasPrefix(name, "session-") && + strings.HasSuffix(name, ".jsonl") +} + +func iflowSessionIDFromPath(root, path string) string { + if !isIflowSourcePath(root, path) { + return "" + } + stem := strings.TrimSuffix(filepath.Base(path), ".jsonl") + return strings.TrimPrefix(stem, "session-") +} + +func iflowProviderCapabilities() Capabilities { + return Capabilities{ + Source: jsonlFileProviderSourceCapabilities(), + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + }, + } +} diff --git a/internal/parser/iflow_provider_test.go b/internal/parser/iflow_provider_test.go new file mode 100644 index 000000000..6bb17fb06 --- /dev/null +++ b/internal/parser/iflow_provider_test.go @@ -0,0 +1,147 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestIflowProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentIflow) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentIflow, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestIflowProviderSourceMethods(t *testing.T) { + root := t.TempDir() + projectDir := filepath.Join(root, "test-project") + rawID := "5de701fc-7454-4858-a249-95cac4fd3b51" + sourcePath := filepath.Join(projectDir, "session-"+rawID+".jsonl") + copyFixtureFile(t, "testdata/iflow/session-"+rawID+".jsonl", sourcePath) + writeSourceFile(t, filepath.Join(projectDir, rawID+".jsonl"), "{}\n") + + provider, ok := NewProvider(AgentIflow, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, AgentIflow, discovered[0].Provider) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + assert.Equal(t, "test-project", discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~iflow:" + rawID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + forkID := rawID + "-6f5d8718-7a95-4bb8-965f-faa23246c82d" + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: forkID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + require.NoError(t, os.Remove(sourcePath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestIflowProviderDiscoversSymlinkedProjectDirectory(t *testing.T) { + root := t.TempDir() + realProjectDir := filepath.Join(t.TempDir(), "real-project") + linkProjectDir := filepath.Join(root, "linked-project") + if err := os.Symlink(realProjectDir, linkProjectDir); err != nil { + t.Skipf("symlink not supported: %v", err) + } + rawID := "5de701fc-7454-4858-a249-95cac4fd3b51" + sourcePath := filepath.Join(linkProjectDir, "session-"+rawID+".jsonl") + copyFixtureFile( + t, + "testdata/iflow/session-"+rawID+".jsonl", + filepath.Join(realProjectDir, "session-"+rawID+".jsonl"), + ) + + provider, ok := NewProvider(AgentIflow, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + assert.Equal(t, "linked-project", discovered[0].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: rawID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) +} + +func TestIflowProviderParse(t *testing.T) { + root := t.TempDir() + project := "test-project" + rawID := "5de701fc-7454-4858-a249-95cac4fd3b51" + sourcePath := filepath.Join(root, project, "session-"+rawID+".jsonl") + copyFixtureFile(t, "testdata/iflow/session-"+rawID+".jsonl", sourcePath) + + provider, ok := NewProvider(AgentIflow, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: SourceFingerprint{Key: sourcePath, Hash: "abc123"}, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + assert.Equal(t, DataVersionCurrent, outcome.Results[0].DataVersion) + assert.Equal(t, "iflow:"+rawID, outcome.Results[0].Result.Session.ID) + // The provider mirrors the legacy sync project resolution, deriving the + // canonical project from the session's recorded cwd rather than the raw + // project directory name. + assert.Equal(t, "docker_image_retagger", outcome.Results[0].Result.Session.Project) + assert.Equal(t, "devbox", outcome.Results[0].Result.Session.Machine) + assert.Equal(t, "abc123", outcome.Results[0].Result.Session.File.Hash) + assert.Len(t, outcome.Results[0].Result.Messages, 11) +} + +func copyFixtureFile(t *testing.T, src, dst string) { + t.Helper() + + data, err := os.ReadFile(src) + require.NoError(t, err) + writeSourceFile(t, dst, string(data)) +} diff --git a/internal/parser/provider.go b/internal/parser/provider.go index f71a5c242..64745e779 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -339,13 +339,23 @@ func (p *legacyProvider) Parse(context.Context, ParseRequest) (ParseOutcome, err func ProviderFactories() []ProviderFactory { factories := make([]ProviderFactory, 0, len(Registry)) for _, def := range Registry { - factories = append(factories, legacyProviderFactory{ - def: cloneAgentDef(def), - }) + factories = append(factories, providerFactoryForDef(def)) } return factories } +func providerFactoryForDef(def AgentDef) ProviderFactory { + def = cloneAgentDef(def) + switch def.Type { + case AgentCommandCode: + return newCommandCodeProviderFactory(def) + case AgentIflow: + return newIflowProviderFactory(def) + default: + return legacyProviderFactory{def: def} + } +} + // ProviderFactoryByType returns the factory for an agent type. func ProviderFactoryByType(t AgentType) (ProviderFactory, bool) { for _, factory := range ProviderFactories() { diff --git a/internal/parser/provider_capabilities.go b/internal/parser/provider_capabilities.go new file mode 100644 index 000000000..f95270b4f --- /dev/null +++ b/internal/parser/provider_capabilities.go @@ -0,0 +1,15 @@ +package parser + +func jsonlFileProviderSourceCapabilities() SourceCapabilities { + return SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + } +} diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index e213cbf00..439ec5334 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -27,14 +27,14 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentKilo: ProviderMigrationLegacyOnly, AgentOpenHands: ProviderMigrationLegacyOnly, AgentCursor: ProviderMigrationLegacyOnly, - AgentIflow: ProviderMigrationLegacyOnly, + AgentIflow: ProviderMigrationProviderAuthoritative, AgentAmp: ProviderMigrationLegacyOnly, AgentZencoder: ProviderMigrationLegacyOnly, AgentVSCodeCopilot: ProviderMigrationLegacyOnly, AgentVSCopilot: ProviderMigrationLegacyOnly, AgentPi: ProviderMigrationLegacyOnly, AgentQwen: ProviderMigrationLegacyOnly, - AgentCommandCode: ProviderMigrationLegacyOnly, + AgentCommandCode: ProviderMigrationProviderAuthoritative, AgentDeepSeekTUI: ProviderMigrationLegacyOnly, AgentOpenClaw: ProviderMigrationLegacyOnly, AgentQClaw: ProviderMigrationLegacyOnly, diff --git a/internal/parser/provider_shim_scan_test.go b/internal/parser/provider_shim_scan_test.go index b0231d1b3..19023bf20 100644 --- a/internal/parser/provider_shim_scan_test.go +++ b/internal/parser/provider_shim_scan_test.go @@ -46,14 +46,12 @@ var pendingShimProviderFiles = map[string]bool{ "antigravity_provider.go": true, "claude_provider.go": true, "codex_provider.go": true, - "commandcode_provider.go": true, "copilot_provider.go": true, "cowork_provider.go": true, "cursor_provider.go": true, "db_backed_provider.go": true, "gemini_provider.go": true, "hermes_provider.go": true, - "iflow_provider.go": true, "kiro_ide_provider.go": true, "kiro_provider.go": true, "opencode_provider.go": true, diff --git a/internal/parser/types.go b/internal/parser/types.go index b1179ceb6..4cb6fe4df 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -257,15 +257,13 @@ var Registry = []AgentDef{ FindSourceFunc: FindZencoderSourceFile, }, { - Type: AgentIflow, - DisplayName: "iFlow", - EnvVar: "IFLOW_DIR", - ConfigKey: "iflow_dirs", - DefaultDirs: []string{".iflow/projects"}, - IDPrefix: "iflow:", - FileBased: true, - DiscoverFunc: DiscoverIflowProjects, - FindSourceFunc: FindIflowSourceFile, + Type: AgentIflow, + DisplayName: "iFlow", + EnvVar: "IFLOW_DIR", + ConfigKey: "iflow_dirs", + DefaultDirs: []string{".iflow/projects"}, + IDPrefix: "iflow:", + FileBased: true, }, { Type: AgentVSCodeCopilot, @@ -350,15 +348,13 @@ var Registry = []AgentDef{ FindSourceFunc: FindQwenSourceFile, }, { - Type: AgentCommandCode, - DisplayName: "Command Code", - EnvVar: "COMMANDCODE_PROJECTS_DIR", - ConfigKey: "commandcode_project_dirs", - DefaultDirs: []string{".commandcode/projects"}, - IDPrefix: "commandcode:", - FileBased: true, - DiscoverFunc: DiscoverCommandCodeSessions, - FindSourceFunc: FindCommandCodeSourceFile, + Type: AgentCommandCode, + DisplayName: "Command Code", + EnvVar: "COMMANDCODE_PROJECTS_DIR", + ConfigKey: "commandcode_project_dirs", + DefaultDirs: []string{".commandcode/projects"}, + IDPrefix: "commandcode:", + FileBased: true, }, { Type: AgentDeepSeekTUI, diff --git a/internal/parser/types_test.go b/internal/parser/types_test.go index 5ef958ea0..7d2f12ee6 100644 --- a/internal/parser/types_test.go +++ b/internal/parser/types_test.go @@ -559,8 +559,11 @@ func TestCommandCodeRegistryEntry(t *testing.T) { def, ok := AgentByType(AgentCommandCode) require.True(t, ok, "AgentCommandCode missing from Registry") require.True(t, def.FileBased, "Command Code FileBased") - require.NotNil(t, def.DiscoverFunc, "Command Code DiscoverFunc") - require.NotNil(t, def.FindSourceFunc, "Command Code FindSourceFunc") + // Command Code is a migrated, provider-authoritative agent: source + // discovery and lookup live on the concrete provider, not on legacy + // AgentDef hooks. + require.Nil(t, def.DiscoverFunc, "Command Code DiscoverFunc") + require.Nil(t, def.FindSourceFunc, "Command Code FindSourceFunc") assert.Equal(t, []string{".commandcode/projects"}, def.DefaultDirs) assert.Equal(t, "commandcode:", def.IDPrefix) } diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 37bce5a60..b33f2d427 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -1216,27 +1216,6 @@ func (e *Engine) classifyOnePath( } } - // iFlow: //session-.jsonl - for _, iflowDir := range e.agentDirs[parser.AgentIflow] { - if iflowDir == "" { - continue - } - if rel, ok := isUnder(iflowDir, path); ok { - parts := strings.Split(rel, sep) - if len(parts) != 2 { - continue - } - if !strings.HasPrefix(parts[1], "session-") || !strings.HasSuffix(parts[1], ".jsonl") { - continue - } - return parser.DiscoveredFile{ - Path: path, - Project: parts[0], - Agent: parser.AgentIflow, - }, true - } - } - // Kimi: ///wire.jsonl (legacy) // or ///agents//wire.jsonl (.kimi-code) // Components that cannot round-trip through the ':'-delimited @@ -1520,46 +1499,6 @@ func (e *Engine) classifyOnePath( return df, true } - // Command Code: //.jsonl - for _, commandCodeDir := range e.agentDirs[parser.AgentCommandCode] { - if commandCodeDir == "" { - continue - } - if rel, ok := isUnder(commandCodeDir, path); ok { - parts := strings.Split(rel, sep) - if len(parts) != 2 { - continue - } - if sessionID, ok := strings.CutSuffix(parts[1], ".meta.json"); ok { - if !parser.IsValidSessionID(sessionID) { - continue - } - jsonlPath := filepath.Join(commandCodeDir, parts[0], sessionID+".jsonl") - if _, err := os.Stat(jsonlPath); err != nil { - continue - } - return parser.DiscoveredFile{ - Path: jsonlPath, - Project: parser.NormalizeName(parts[0]), - Agent: parser.AgentCommandCode, - }, true - } - if strings.HasSuffix(parts[1], ".checkpoints.jsonl") || - strings.HasSuffix(parts[1], ".prompts.jsonl") { - continue - } - sessionID, ok := strings.CutSuffix(parts[1], ".jsonl") - if !ok || !parser.IsValidSessionID(sessionID) { - continue - } - return parser.DiscoveredFile{ - Path: path, - Project: parser.NormalizeName(parts[0]), - Agent: parser.AgentCommandCode, - }, true - } - } - // OpenClaw: //sessions/.jsonl // or: //sessions/.jsonl. for _, ocDir := range e.agentDirs[parser.AgentOpenClaw] { @@ -4834,8 +4773,6 @@ func (e *Engine) processFile( res = e.processOpenHands(file, info) case parser.AgentCursor: res = e.processCursor(file, info) - case parser.AgentIflow: - res = e.processIflow(ctx, file, info) case parser.AgentAmp: res = e.processAmp(file, info) case parser.AgentDeepSeekTUI: @@ -4850,8 +4787,6 @@ func (e *Engine) processFile( res = e.processPi(file, info) case parser.AgentQwen: res = e.processQwen(file, info) - case parser.AgentCommandCode: - res = e.processCommandCode(file, info) case parser.AgentOpenClaw: res = e.processOpenClaw(file, info) case parser.AgentQClaw: @@ -7670,39 +7605,6 @@ func (e *Engine) processQwen( } } -func (e *Engine) processCommandCode( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - effectiveInfo := commandCodeEffectiveInfo(file.Path, info) - if e.shouldSkipByPath(file.Path, effectiveInfo) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseCommandCodeSession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - sess.File.Size = effectiveInfo.Size() - sess.File.Mtime = effectiveInfo.ModTime().UnixNano() - - return processResult{ - results: []parser.ParseResult{{ - Session: *sess, - Messages: msgs, - }}, - } -} - func commandCodeEffectiveInfo(path string, info os.FileInfo) os.FileInfo { size := info.Size() mtime := info.ModTime().UnixNano() @@ -7741,56 +7643,6 @@ func validateCursorContainment( return nil } -func (e *Engine) processIflow( - ctx context.Context, - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Extract session ID from filename: session-.jsonl - sessionID := "iflow:" + strings.TrimPrefix(strings.TrimSuffix(info.Name(), ".jsonl"), "session-") - - if e.shouldSkipFile(sessionID, info) { - sess, _ := e.db.GetSession( - ctx, e.idPrefix+sessionID, - ) - if sess != nil && - sess.Project != "" && - !parser.NeedsProjectReparse(sess.Project) { - return processResult{skip: true} - } - } - - // Determine project name from cwd if possible - project := parser.GetProjectName(file.Project) - cwd, gitBranch := parser.ExtractIflowProjectHints( - file.Path, - ) - if cwd != "" { - if p := parser.ExtractProjectFromCwdWithBranchContext( - ctx, cwd, gitBranch, - ); p != "" { - project = p - } - } - - results, err := parser.ParseIflowSession( - file.Path, project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - for i := range results { - results[i].Session.File.Hash = hash - } - } - - parser.InferRelationshipTypes(results) - - return processResult{results: results} -} - // computeFinalStreak counts trailing consecutive failures // from the end of the tool call list. func computeFinalStreak(calls []signals.ToolCallRow) int { diff --git a/internal/sync/engine_test.go b/internal/sync/engine_test.go index c43529966..f88056c28 100644 --- a/internal/sync/engine_test.go +++ b/internal/sync/engine_test.go @@ -3144,7 +3144,11 @@ func TestEngine_ClassifyPathsCommandCodeSession(t *testing.T) { require.Len(t, files, 1, "len(files) = %d, want 1 (%v)", len(files), files) assert.Equal(t, sessionPath, files[0].Path) assert.Equal(t, parser.AgentCommandCode, files[0].Agent) - assert.Equal(t, "users_alice_code_sample_project", files[0].Project) + // Command Code is provider-authoritative: classification attaches a + // provider source and recomputes the project during parse, so the + // classification carries no informational project hint. + assert.Empty(t, files[0].Project) + require.NotNil(t, files[0].ProviderSource) bogus := []string{ filepath.Join(commandCodeDir, "stray.jsonl"), diff --git a/internal/sync/iflow_discovery_test.go b/internal/sync/iflow_discovery_test.go index eb7749d5a..5bf654956 100644 --- a/internal/sync/iflow_discovery_test.go +++ b/internal/sync/iflow_discovery_test.go @@ -1,6 +1,7 @@ package sync import ( + "context" "os" "path/filepath" "testing" @@ -10,7 +11,7 @@ import ( "go.kenn.io/agentsview/internal/parser" ) -func TestDiscoverIflowProjects(t *testing.T) { +func TestIflowProviderDiscoversProjects(t *testing.T) { // Create a temporary directory structure for testing tmpDir := t.TempDir() @@ -40,16 +41,19 @@ func TestDiscoverIflowProjects(t *testing.T) { subDir := filepath.Join(proj1, "subdir") require.NoError(t, os.MkdirAll(subDir, 0o755)) - // Run discovery - files := parser.DiscoverIflowProjects(tmpDir) + provider, ok := parser.NewProvider(parser.AgentIflow, parser.ProviderConfig{ + Roots: []string{tmpDir}, + }) + require.True(t, ok) - // Verify results - assert.Len(t, files, 3) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + assert.Len(t, sources, 3) - // Verify file paths + // Verify source paths paths := make(map[string]bool) - for _, f := range files { - paths[f.Path] = true + for _, s := range sources { + paths[s.DisplayPath] = true } assert.True(t, paths[session1], "session1 not found in results") @@ -57,22 +61,22 @@ func TestDiscoverIflowProjects(t *testing.T) { assert.True(t, paths[session3], "session3 not found in results") assert.False(t, paths[otherFile], "other.txt should not be in results") - // Verify project names + // Verify project hints projects := make(map[string]bool) - for _, f := range files { - projects[f.Project] = true + for _, s := range sources { + projects[s.ProjectHint] = true } assert.True(t, projects["project1"], "project1 not found in projects") assert.True(t, projects["project2"], "project2 not found in projects") - // Verify agent type - for _, f := range files { - assert.Equal(t, parser.AgentType("iflow"), f.Agent) + // Verify provider type + for _, s := range sources { + assert.Equal(t, parser.AgentIflow, s.Provider) } } -func TestFindIflowSourceFile(t *testing.T) { +func TestIflowProviderFindsSourceFile(t *testing.T) { tmpDir := t.TempDir() // Create a project directory @@ -84,23 +88,39 @@ func TestFindIflowSourceFile(t *testing.T) { sessionFile := filepath.Join(proj, "session-"+sessionID+".jsonl") require.NoError(t, os.WriteFile(sessionFile, []byte(`{"test":"data"}`), 0o644)) + provider, ok := parser.NewProvider(parser.AgentIflow, parser.ProviderConfig{ + Roots: []string{tmpDir}, + }) + require.True(t, ok) + // Test finding the file - found := parser.FindIflowSourceFile(tmpDir, sessionID) - assert.Equal(t, sessionFile, found) + found, ok, err := provider.FindSource(context.Background(), parser.FindSourceRequest{ + RawSessionID: sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sessionFile, found.DisplayPath) // Test finding a non-existent file - notFound := parser.FindIflowSourceFile(tmpDir, "nonexistent") - assert.Empty(t, notFound) + _, ok, err = provider.FindSource(context.Background(), parser.FindSourceRequest{ + RawSessionID: "nonexistent", + }) + require.NoError(t, err) + assert.False(t, ok) // Test finding a fork ID (should extract base session ID) // Fork IDs have format: - - // The file lookup should use only the base UUID + // The source lookup should use only the base UUID baseSessionID := "96e6d875-92eb-40b9-b193-a9ba99f0f709" forkSessionID := baseSessionID + "-12345678-1234-5678-9abc-def012345678" forkSessionFile := filepath.Join(proj, "session-"+baseSessionID+".jsonl") require.NoError(t, os.WriteFile(forkSessionFile, []byte(`{"test":"fork"}`), 0o644)) // Test finding the fork session - should find the base file - foundFork := parser.FindIflowSourceFile(tmpDir, forkSessionID) - assert.Equal(t, forkSessionFile, foundFork, "for fork ID %s", forkSessionID) + foundFork, ok, err := provider.FindSource(context.Background(), parser.FindSourceRequest{ + RawSessionID: forkSessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, forkSessionFile, foundFork.DisplayPath, "for fork ID %s", forkSessionID) } From 49397971cbed9e47866f8c69c0cac9372297b117 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Wed, 24 Jun 2026 21:34:41 -0400 Subject: [PATCH 13/24] feat(parser): migrate gptme to provider facade The facade needs at least one real provider implementation before caller migration can prove the contract. Gptme is a narrow first target because it has a single-session JSONL source layout and an existing parser path that can be wrapped without changing runtime sync dispatch. This keeps gptme source behavior explicit: the provider composes JSONLSourceSet for filesystem mechanics, filters to the legacy one-level conversation.jsonl layout, and returns complete current parse outcomes while the rest of the registry remains on legacy adapters. fix(parser): preserve gptme provider source parity The gptme provider is intended to be a no-behavior-change facade migration, so it needs to preserve the legacy source semantics before sync callers can safely move to it. Symlinked session directories, deleted source events, and persisted lookup hints are all observable through the current discovery and session lookup paths. This keeps provider-backed gptme discovery and changed-path classification compatible with those legacy expectations while leaving runtime dispatch unchanged. test(parser): opt gptme into provider shadow Gptme now has a concrete facade provider on this branch, so the migration manifest should force it through the shared shadow-compare harness instead of leaving the provider implementation additive and unexercised. Lower branch opt-ins remain inherited and later provider families stay legacy-only until their own branches introduce concrete providers. Validation: go test -tags "fts5" ./internal/parser -run TestProviderMigrationModes -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(sync): compare gptme shadow parity GPTMe is marked shadow-compare on this branch, so add the shared source-level migration proof beside the concrete provider. The test runs ObserveProviderSource and compares normalized provider output with ParseGptmeSession while preserving the provider-computed content hash. Validation: go fmt ./...; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/... refactor(parser): fold gptme into provider GPTMe should be a migrated provider on this branch, not a provider wrapper around exported legacy parser entrypoints. Keeping DiscoverGptmeSessions, FindGptmeSourceFile, ParseGptmeSession, and the engine processGptme path made the stack additive and left two public shapes to maintain. Move GPTMe parsing behind the concrete provider, make GPTMe provider-authoritative at this branch, remove its legacy AgentDef hooks and engine dispatch, and replace shadow-baseline tests with provider API coverage plus a guard that the legacy symbols stay gone. Validation: go test -tags "fts5" ./internal/parser ./internal/sync ./cmd/agentsview -count=1; go vet ./...; git diff --check fix(parser): thread ctx through gptme source lookups --- internal/parser/gptme.go | 54 +---- internal/parser/gptme_provider.go | 323 +++++++++++++++++++++++++ internal/parser/gptme_provider_test.go | 230 ++++++++++++++++++ internal/parser/gptme_test.go | 63 +++-- internal/parser/provider.go | 2 + internal/parser/provider_migration.go | 2 +- internal/parser/types.go | 16 +- internal/sync/engine.go | 48 ---- 8 files changed, 610 insertions(+), 128 deletions(-) create mode 100644 internal/parser/gptme_provider.go create mode 100644 internal/parser/gptme_provider_test.go diff --git a/internal/parser/gptme.go b/internal/parser/gptme.go index 166df2d94..2900fa4cb 100644 --- a/internal/parser/gptme.go +++ b/internal/parser/gptme.go @@ -5,62 +5,16 @@ import ( "fmt" "os" "path/filepath" - "sort" "strings" "time" "github.com/tidwall/gjson" ) -// DiscoverGptmeSessions finds gptme session files under the -// given logs directory. Each session is a subdirectory containing -// a conversation.jsonl file (e.g. ~/.local/share/gptme/logs/). -func DiscoverGptmeSessions(logsDir string) []DiscoveredFile { - if logsDir == "" { - return nil - } - entries, err := os.ReadDir(logsDir) - if err != nil { - return nil - } - var files []DiscoveredFile - for _, entry := range entries { - if !isDirOrSymlink(entry, logsDir) { - continue - } - convPath := filepath.Join(logsDir, entry.Name(), "conversation.jsonl") - if _, err := os.Stat(convPath); err != nil { - continue - } - files = append(files, DiscoveredFile{ - Path: convPath, - Agent: AgentGptme, - }) - } - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindGptmeSourceFile locates a gptme session by its raw session ID -// (the directory name, without the "gptme:" prefix). -func FindGptmeSourceFile(logsDir, rawID string) string { - if logsDir == "" || rawID == "" { - return "" - } - candidate := filepath.Join(logsDir, rawID, "conversation.jsonl") - if info, err := os.Stat(candidate); err == nil && !info.IsDir() { - return candidate - } - return "" -} - -// ParseGptmeSession parses a gptme conversation.jsonl file. -// gptme stores one message per line with role/content/timestamp -// fields. Assistant messages carry an optional metadata object -// with model and usage sub-fields. -func ParseGptmeSession( +// parseSession parses a gptme conversation.jsonl file. gptme stores one +// message per line with role/content/timestamp fields. Assistant messages +// carry an optional metadata object with model and usage sub-fields. +func (p *gptmeProvider) parseSession( path, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) diff --git a/internal/parser/gptme_provider.go b/internal/parser/gptme_provider.go new file mode 100644 index 000000000..a5711ca51 --- /dev/null +++ b/internal/parser/gptme_provider.go @@ -0,0 +1,323 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" +) + +type gptmeProviderFactory struct { + def AgentDef +} + +func newGptmeProviderFactory(def AgentDef) ProviderFactory { + return gptmeProviderFactory{def: cloneAgentDef(def)} +} + +func (f gptmeProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f gptmeProviderFactory) Capabilities() Capabilities { + return gptmeProviderCapabilities() +} + +func (f gptmeProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &gptmeProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: gptmeProviderCapabilities(), + Config: cfg, + }, + sources: newGptmeSourceSet(cfg.Roots), + } +} + +type gptmeProvider struct { + ProviderBase + sources JSONLSourceSet +} + +func (p *gptmeProvider) Discover(ctx context.Context) ([]SourceRef, error) { + sources, err := p.sources.Discover(ctx) + if err != nil { + return nil, err + } + return p.filterSources(sources), nil +} + +func (p *gptmeProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *gptmeProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + sources, err := p.sources.SourcesForChangedPath(ctx, req) + if err != nil { + return nil, err + } + filtered := p.filterSources(sources) + if len(filtered) > 0 { + return filtered, nil + } + source, ok := p.sourceForEventPath(req) + if !ok { + return nil, nil + } + return []SourceRef{source}, nil +} + +func (p *gptmeProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + for _, path := range []string{ + req.StoredFilePath, + req.FingerprintKey, + } { + if path == "" { + continue + } + if source, ok, err := p.sourceForExistingPath(ctx, path); err != nil { + return SourceRef{}, false, err + } else if ok { + return source, true, nil + } + } + for _, id := range []string{ + req.RawSessionID, + p.rawSessionIDFromFull(req.FullSessionID), + } { + if id == "" { + continue + } + if source, ok, err := p.sourceForSessionID(ctx, id); err != nil { + return SourceRef{}, false, err + } else if ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +func (p *gptmeProvider) sourceForExistingPath( + ctx context.Context, + path string, +) (SourceRef, bool, error) { + source, ok, err := p.sources.sourceForPath(ctx, path) + if err != nil { + return SourceRef{}, false, err + } + if ok && p.isSource(source) { + return source, true, nil + } + return SourceRef{}, false, nil +} + +func (p *gptmeProvider) sourceForSessionID( + ctx context.Context, + id string, +) (SourceRef, bool, error) { + for _, root := range p.Config.Roots { + path := filepath.Join(root, id, "conversation.jsonl") + if source, ok, err := p.sourceForExistingPath(ctx, path); err != nil { + return SourceRef{}, false, err + } else if ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +func (p *gptmeProvider) rawSessionIDFromFull(id string) string { + if id == "" { + return "" + } + _, rawID := StripHostPrefix(id) + if !strings.HasPrefix(rawID, p.Def.IDPrefix) { + return "" + } + return strings.TrimPrefix(rawID, p.Def.IDPrefix) +} + +func (p *gptmeProvider) sourceForEventPath(req ChangedPathRequest) (SourceRef, bool) { + if req.Path == "" { + return SourceRef{}, false + } + if req.WatchRoot != "" { + root := filepath.Clean(req.WatchRoot) + if !p.hasRoot(root) { + return SourceRef{}, false + } + return gptmeSourceRef(root, filepath.Clean(req.Path)) + } + for _, root := range p.Config.Roots { + if source, ok := gptmeSourceRef(root, filepath.Clean(req.Path)); ok { + return source, true + } + } + return SourceRef{}, false +} + +func (p *gptmeProvider) hasRoot(root string) bool { + for _, configured := range p.Config.Roots { + if samePath(configured, root) { + return true + } + } + return false +} + +func (p *gptmeProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *gptmeProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok, err := p.sources.pathFromSource(ctx, req.Source) + if err != nil { + return ParseOutcome{}, err + } + if !ok { + return ParseOutcome{}, fmt.Errorf("gptme source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +func (p *gptmeProvider) filterSources(sources []SourceRef) []SourceRef { + if len(sources) == 0 { + return nil + } + filtered := sources[:0] + for _, source := range sources { + if p.isSource(source) { + filtered = append(filtered, source) + } + } + return filtered +} + +func (p *gptmeProvider) isSource(source SourceRef) bool { + src, ok := source.Opaque.(JSONLSource) + if !ok { + return false + } + return isGptmeConversationPath(src.Root, src.Path) +} + +func newGptmeSourceSet(roots []string) JSONLSourceSet { + return newJSONLSourceSet(AgentGptme, roots, + withRecursive(), + withContentHashing(), + withSymlinkFollowing(), + withInclude(func(path string, info os.FileInfo) bool { + return !info.IsDir() && filepath.Base(path) == "conversation.jsonl" + }), + withProjectHint(func(root, path string) string { + sessionID := gptmeSessionIDFromPath(root, path) + if sessionID == "" { + return "" + } + return gptmeProjectFromSessionName(sessionID) + }), + withSessionIDFromPath(gptmeSessionIDFromPath), + ) +} + +func gptmeProviderCapabilities() Capabilities { + return Capabilities{ + Source: SourceCapabilities{ + DiscoverSources: CapabilitySupported, + WatchSources: CapabilitySupported, + ClassifyChangedPath: CapabilitySupported, + FindSource: CapabilitySupported, + CompositeFingerprint: CapabilitySupported, + MultiSessionSource: CapabilityNotApplicable, + PerSessionErrors: CapabilityNotApplicable, + ExcludedSessions: CapabilityNotApplicable, + ForceReplaceOnParse: CapabilityNotApplicable, + }, + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Model: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + }, + } +} + +func isGptmeConversationPath(root, path string) bool { + rel, err := filepath.Rel(root, path) + if err != nil { + return false + } + parts := strings.Split(rel, string(filepath.Separator)) + return len(parts) == 2 && parts[1] == "conversation.jsonl" && + parts[0] != "." && parts[0] != ".." && parts[0] != "" +} + +func gptmeSessionIDFromPath(root, path string) string { + if !isGptmeConversationPath(root, path) { + return "" + } + return filepath.Base(filepath.Dir(path)) +} + +func gptmeSourceRef(root, path string) (SourceRef, bool) { + root = filepath.Clean(root) + path = filepath.Clean(path) + if !isGptmeConversationPath(root, path) { + return SourceRef{}, false + } + sessionID := gptmeSessionIDFromPath(root, path) + return SourceRef{ + Provider: AgentGptme, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: gptmeProjectFromSessionName(sessionID), + Opaque: JSONLSource{ + Root: root, + Path: path, + RelPath: filepath.Join(sessionID, "conversation.jsonl"), + }, + }, true +} diff --git a/internal/parser/gptme_provider_test.go b/internal/parser/gptme_provider_test.go new file mode 100644 index 000000000..327be7e23 --- /dev/null +++ b/internal/parser/gptme_provider_test.go @@ -0,0 +1,230 @@ +package parser + +import ( + "context" + "errors" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGptmeProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentGptme) + require.True(t, ok) + require.NotNil(t, factory) + + caps := factory.Capabilities() + assert.Equal(t, CapabilitySupported, caps.Source.DiscoverSources) + assert.Equal(t, CapabilitySupported, caps.Source.WatchSources) + assert.Equal(t, CapabilitySupported, caps.Source.ClassifyChangedPath) + assert.Equal(t, CapabilitySupported, caps.Source.FindSource) + assert.Equal(t, CapabilitySupported, caps.Source.CompositeFingerprint) + assert.Equal(t, CapabilityNotApplicable, caps.Source.MultiSessionSource) + assert.Equal(t, CapabilitySupported, caps.Content.FirstMessage) + assert.Equal(t, CapabilitySupported, caps.Content.Model) + assert.Equal(t, CapabilitySupported, caps.Content.PerMessageTokenUsage) + + provider, ok := NewProvider(AgentGptme, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestGptmeProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionID := "2026-06-13-write-hello-world" + sourcePath := filepath.Join(root, sessionID, "conversation.jsonl") + writeSourceFile(t, sourcePath, gptmeProviderFixture()) + writeSourceFile(t, filepath.Join(root, "conversation.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "nested", sessionID, "conversation.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "other", "notes.jsonl"), "{}\n") + + provider, ok := NewProvider(AgentGptme, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, AgentGptme, discovered[0].Provider) + assert.Equal(t, sourcePath, discovered[0].Key) + assert.Equal(t, sourcePath, discovered[0].FingerprintKey) + assert.Equal(t, "write-hello-world", discovered[0].ProjectHint) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, discovered[0].Key, changed[0].Key) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, discovered[0].Key, found.Key) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.NotZero(t, fingerprint.Size) + assert.NotZero(t, fingerprint.MTimeNS) + assert.NotEmpty(t, fingerprint.Hash) +} + +func TestGptmeProviderDiscoversSymlinkSessionDirectories(t *testing.T) { + root := t.TempDir() + targetRoot := t.TempDir() + sessionID := "2026-06-13-write-hello-world" + targetDir := filepath.Join(targetRoot, sessionID) + writeSourceFile( + t, + filepath.Join(targetDir, "conversation.jsonl"), + gptmeProviderFixture(), + ) + linkDir := filepath.Join(root, sessionID) + if err := os.Symlink(targetDir, linkDir); err != nil { + t.Skipf("creating directory symlink: %v", err) + } + + provider, ok := NewProvider(AgentGptme, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, filepath.Join(linkDir, "conversation.jsonl"), discovered[0].DisplayPath) +} + +func TestGptmeProviderClassifiesDeletedConversationPath(t *testing.T) { + root := t.TempDir() + sessionID := "2026-06-13-write-hello-world" + sourcePath := filepath.Join(root, sessionID, "conversation.jsonl") + writeSourceFile(t, sourcePath, gptmeProviderFixture()) + + provider, ok := NewProvider(AgentGptme, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + require.NoError(t, os.Remove(sourcePath)) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{ + Path: sourcePath, + EventKind: "remove", + WatchRoot: root, + }, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].Key) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + assert.Equal(t, "write-hello-world", changed[0].ProjectHint) +} + +func TestGptmeProviderFindSourceUsesPersistedFallbacks(t *testing.T) { + root := t.TempDir() + sessionID := "2026-06-13-write-hello-world" + sourcePath := filepath.Join(root, sessionID, "conversation.jsonl") + writeSourceFile(t, sourcePath, gptmeProviderFixture()) + + provider, ok := NewProvider(AgentGptme, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + for _, req := range []FindSourceRequest{ + {FingerprintKey: sourcePath}, + {FullSessionID: "gptme:" + sessionID}, + {FullSessionID: "host~gptme:" + sessionID}, + } { + found, ok, err := provider.FindSource(context.Background(), req) + require.NoError(t, err) + require.Truef(t, ok, "request %#v", req) + assert.Equal(t, sourcePath, found.DisplayPath) + } +} + +func TestGptmeProviderParse(t *testing.T) { + root := t.TempDir() + sessionID := "2026-06-13-write-hello-world" + sourcePath := filepath.Join(root, sessionID, "conversation.jsonl") + writeSourceFile(t, sourcePath, gptmeProviderFixture()) + + provider, ok := NewProvider(AgentGptme, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: sourcePath, + }) + require.NoError(t, err) + require.True(t, ok) + fingerprint, err := provider.Fingerprint(context.Background(), source) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: source, + Fingerprint: fingerprint, + Machine: "devbox", + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + assert.False(t, outcome.ForceReplace) + assert.Empty(t, outcome.SourceErrors) + require.Len(t, outcome.Results, 1) + + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Empty(t, result.RetryReason) + assert.Equal(t, "gptme:"+sessionID, result.Result.Session.ID) + assert.Equal(t, "write-hello-world", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, fingerprint.Hash, result.Result.Session.File.Hash) + require.Len(t, result.Result.Messages, 2) + assert.Equal(t, RoleUser, result.Result.Messages[0].Role) + assert.Equal(t, RoleAssistant, result.Result.Messages[1].Role) +} + +func TestGptmeProviderParseMissingSourceIsWholeSourceError(t *testing.T) { + provider, ok := NewProvider(AgentGptme, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: SourceRef{ + Provider: AgentGptme, + Key: "/tmp/missing/conversation.jsonl", + DisplayPath: "/tmp/missing/conversation.jsonl", + FingerprintKey: "/tmp/missing/conversation.jsonl", + }, + Machine: "devbox", + }) + require.Error(t, err) + assert.Empty(t, outcome) + assert.False(t, errors.Is(err, ErrUnsupportedProviderFeature)) +} + +func gptmeProviderFixture() string { + return `{"role":"user","content":"Write hello world.","timestamp":"2026-06-13T10:00:01.000000"}` + "\n" + + `{"role":"assistant","content":"Hello from gptme.","timestamp":"2026-06-13T10:00:02.000000","metadata":{"model":"demo-model","usage":{"input_tokens":10,"output_tokens":4}}}` + "\n" +} diff --git a/internal/parser/gptme_test.go b/internal/parser/gptme_test.go index 0394a20e2..f0525bc6e 100644 --- a/internal/parser/gptme_test.go +++ b/internal/parser/gptme_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "path/filepath" "testing" "time" @@ -9,18 +10,28 @@ import ( "github.com/stretchr/testify/require" ) -func TestParseGptmeSession(t *testing.T) { - path := filepath.Join( - "testdata", "gptme", - "2026-06-13-write-hello-world", - "conversation.jsonl", - ) +func TestGptmeProviderParsesFixture(t *testing.T) { + logsDir := filepath.Join("testdata", "gptme") - sess, msgs, err := ParseGptmeSession(path, "testmachine") + provider, ok := NewProvider(AgentGptme, ProviderConfig{ + Roots: []string{logsDir}, + Machine: "testmachine", + }) + require.True(t, ok) + source, found, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "2026-06-13-write-hello-world", + }) + require.NoError(t, err) + require.True(t, found) + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: source, + Machine: "testmachine", + }) require.NoError(t, err) - require.NotNil(t, sess) - require.NotEmpty(t, msgs) + require.Len(t, outcome.Results, 1) + sess := outcome.Results[0].Result.Session + msgs := outcome.Results[0].Result.Messages assert.Equal(t, "gptme:2026-06-13-write-hello-world", sess.ID) assert.Equal(t, "write-hello-world", sess.Project) assert.Equal(t, "testmachine", sess.Machine) @@ -60,23 +71,35 @@ func TestParseGptmeSession(t *testing.T) { assert.Equal(t, 2, sess.UserMessageCount) } -func TestDiscoverGptmeSessions(t *testing.T) { +func TestGptmeProviderDiscoversFixture(t *testing.T) { logsDir := filepath.Join("testdata", "gptme") - files := DiscoverGptmeSessions(logsDir) - require.Len(t, files, 1) - assert.Equal(t, AgentGptme, files[0].Agent) - assert.Contains(t, files[0].Path, "conversation.jsonl") + provider, ok := NewProvider(AgentGptme, ProviderConfig{Roots: []string{logsDir}}) + require.True(t, ok) + + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + assert.Equal(t, AgentGptme, sources[0].Provider) + assert.Contains(t, sources[0].DisplayPath, "conversation.jsonl") } -func TestFindGptmeSourceFile(t *testing.T) { +func TestGptmeProviderFindsFixtureSource(t *testing.T) { logsDir := filepath.Join("testdata", "gptme") + provider, ok := NewProvider(AgentGptme, ProviderConfig{Roots: []string{logsDir}}) + require.True(t, ok) - found := FindGptmeSourceFile(logsDir, "2026-06-13-write-hello-world") - assert.NotEmpty(t, found) - assert.Contains(t, found, "conversation.jsonl") + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "2026-06-13-write-hello-world", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Contains(t, found.DisplayPath, "conversation.jsonl") - notFound := FindGptmeSourceFile(logsDir, "nonexistent-session") - assert.Empty(t, notFound) + _, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "nonexistent-session", + }) + require.NoError(t, err) + assert.False(t, ok) } func TestGptmeProjectFromSessionName(t *testing.T) { diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 64745e779..7426f70ee 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -351,6 +351,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newCommandCodeProviderFactory(def) case AgentIflow: return newIflowProviderFactory(def) + case AgentGptme: + return newGptmeProviderFactory(def) default: return legacyProviderFactory{def: def} } diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 439ec5334..513a64d87 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -55,7 +55,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentVibe: ProviderMigrationLegacyOnly, AgentZed: ProviderMigrationLegacyOnly, AgentQwenPaw: ProviderMigrationLegacyOnly, - AgentGptme: ProviderMigrationLegacyOnly, + AgentGptme: ProviderMigrationProviderAuthoritative, AgentShelley: ProviderMigrationLegacyOnly, AgentAider: ProviderMigrationLegacyOnly, AgentOMP: ProviderMigrationLegacyOnly, diff --git a/internal/parser/types.go b/internal/parser/types.go index 4cb6fe4df..3a2b92d6b 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -585,15 +585,13 @@ var Registry = []AgentDef{ FindSourceFunc: FindQwenPawSourceFile, }, { - Type: AgentGptme, - DisplayName: "gptme", - EnvVar: "GPTME_DIR", - ConfigKey: "gptme_dirs", - DefaultDirs: []string{".local/share/gptme/logs"}, - IDPrefix: "gptme:", - FileBased: true, - DiscoverFunc: DiscoverGptmeSessions, - FindSourceFunc: FindGptmeSourceFile, + Type: AgentGptme, + DisplayName: "gptme", + EnvVar: "GPTME_DIR", + ConfigKey: "gptme_dirs", + DefaultDirs: []string{".local/share/gptme/logs"}, + IDPrefix: "gptme:", + FileBased: true, }, { // Shelley (exe.dev) stores all conversations in a single diff --git a/internal/sync/engine.go b/internal/sync/engine.go index b33f2d427..40d4251da 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -1478,23 +1478,6 @@ func (e *Engine) classifyOnePath( } } - // gptme: //conversation.jsonl - for _, gptmeDir := range e.agentDirs[parser.AgentGptme] { - if gptmeDir == "" { - continue - } - if rel, ok := isUnder(gptmeDir, path); ok { - parts := strings.Split(rel, sep) - if len(parts) != 2 || parts[1] != "conversation.jsonl" { - continue - } - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentGptme, - }, true - } - } - if df, ok := e.classifyAiderPath(path); ok { return df, true } @@ -4817,8 +4800,6 @@ func (e *Engine) processFile( res = e.processAntigravityCLI(file, info) case parser.AgentQwenPaw: res = e.processQwenPaw(file, info) - case parser.AgentGptme: - res = e.processGptme(file, info) case parser.AgentAider: res = e.processAider(file, info) default: @@ -7235,35 +7216,6 @@ func (e *Engine) processPositron( } } -func (e *Engine) processGptme( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseGptmeSession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - // aiderFileUnchanged reports whether a physical aider history file is // unchanged since the last sync. Aider sessions are stored under virtual // "#" paths, so the generic shouldSkipByPath (which looks the From 51b2f04f5491c4352a41de5b4349c1b59b2e5c0b Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Wed, 24 Jun 2026 21:34:29 -0400 Subject: [PATCH 14/24] feat(parser): migrate deepseek tui provider DeepSeek TUI has a shallow one-file-per-session JSON layout, so moving it next keeps the provider migration incremental while exercising the JSON source helper with non-JSONL extensions. The provider preserves legacy discovery filters for latest and offline queue files, raw/full ID lookup, changed-path classification, fingerprint propagation, and parse normalization without changing runtime sync dispatch. fix(parser): preserve deepseek tui symlink files DeepSeek TUI legacy lookup and parsing followed direct symlinks to session JSON files, so the facade provider needs an explicit way to preserve that source shape instead of silently dropping linked archives. The JSONL source helper keeps symlink-file following opt-in, DeepSeek TUI enables it, and the branch manifest opts the concrete provider into shadow comparison so the migration is exercised rather than additive. Validation: go test -tags "fts5" ./internal/parser -run 'Test(DeepSeekTUIProvider|JSONLSourceSet|ProviderMigrationModes)' -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(parser): skip deepseek symlink test when unsupported Some test environments deny symlink creation even though the provider behavior is valid when links are available. The regression should skip in that environment instead of failing for host permissions. Validation: go test -tags "fts5" ./internal/parser -run 'Test(DeepSeekTUIProvider|ProviderMigrationModes)' -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(sync): compare deepseek tui shadow parity DeepSeek TUI is shadow-compared on this branch, so add the shared source-level proof that provider observation matches the existing ParseDeepSeekTUISession output. This keeps the branch review focused on an actual migration surface rather than only provider-local parser tests. Validation: go fmt ./...; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/... refactor(parser): fold deepseek tui into provider DeepSeek TUI should have one maintained parser shape on this branch. Leaving exported discover, lookup, and parse functions beside the concrete provider kept the migration additive and forced sync to preserve a second dispatch path. Make the concrete provider authoritative, move parsing onto the provider, remove the AgentDef legacy hooks and engine dispatch, and replace shadow-baseline tests with provider API coverage plus a guard that the old symbols stay gone. Validation: go test -tags "fts5" ./internal/parser ./internal/sync ./cmd/agentsview -count=1; go vet ./...; git diff --check fix(parser): preserve deepseek tui file hash DeepSeek TUI legacy sync stored the transcript content hash, but the migrated provider source set did not request hashing, so provider-authoritative Parse left Session.File.Hash empty when using the real Fingerprint path.\n\nEnable source hashing for DeepSeek TUI and make the provider parse test use Fingerprint -> Parse to assert the persisted file_hash value comes from the session JSON content.\n\nValidation: go test -tags "fts5" ./internal/parser -run TestDeepSeekTUIProvider -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check --- internal/parser/deepseek_tui.go | 43 +---- internal/parser/deepseek_tui_provider.go | 66 ++++++++ internal/parser/deepseek_tui_provider_test.go | 160 ++++++++++++++++++ internal/parser/deepseek_tui_test.go | 110 +++++++++--- internal/parser/provider.go | 2 + internal/parser/provider_migration.go | 2 +- internal/parser/types.go | 6 +- internal/parser/types_test.go | 4 +- internal/sync/engine.go | 58 ------- internal/sync/engine_test.go | 3 + 10 files changed, 322 insertions(+), 132 deletions(-) create mode 100644 internal/parser/deepseek_tui_provider.go create mode 100644 internal/parser/deepseek_tui_provider_test.go diff --git a/internal/parser/deepseek_tui.go b/internal/parser/deepseek_tui.go index d1fd2314a..534e95230 100644 --- a/internal/parser/deepseek_tui.go +++ b/internal/parser/deepseek_tui.go @@ -5,7 +5,6 @@ import ( "fmt" "os" "path/filepath" - "sort" "strings" "github.com/tidwall/gjson" @@ -13,47 +12,7 @@ import ( const deepSeekTUIPrefix = "deepseek-tui:" -// DiscoverDeepSeekTUISessions finds DeepSeek TUI / CodeWhale session -// JSON documents under a sessions directory. -func DiscoverDeepSeekTUISessions(root string) []DiscoveredFile { - entries, err := os.ReadDir(root) - if err != nil { - return nil - } - - files := make([]DiscoveredFile, 0) - for _, entry := range entries { - if entry.IsDir() || !isDeepSeekTUISessionFile(entry.Name()) { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(root, entry.Name()), - Agent: AgentDeepSeekTUI, - }) - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindDeepSeekTUISourceFile locates a DeepSeek TUI / CodeWhale session -// JSON document by raw session ID. -func FindDeepSeekTUISourceFile(root, rawID string) string { - if !IsValidSessionID(rawID) { - return "" - } - path := filepath.Join(root, rawID+".json") - if info, err := os.Stat(path); err == nil && !info.IsDir() { - return path - } - return "" -} - -// ParseDeepSeekTUISession parses a DeepSeek TUI / CodeWhale saved -// session JSON file. -func ParseDeepSeekTUISession( +func parseDeepSeekTUISession( path, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) diff --git a/internal/parser/deepseek_tui_provider.go b/internal/parser/deepseek_tui_provider.go new file mode 100644 index 000000000..6246c5e09 --- /dev/null +++ b/internal/parser/deepseek_tui_provider.go @@ -0,0 +1,66 @@ +package parser + +import ( + "context" + "path/filepath" +) + +// DeepSeek TUI stores each session as a single JSON file in a directory. It is +// a directory-of-files provider: discovery, watching, change classification, +// lookup, and fingerprinting come from JSONLSourceSet, and the ParseFile option +// makes that source set a full SourceSet so it rides the generic factory. +func newDeepSeekTUIProviderFactory(def AgentDef) ProviderFactory { + return newSourceSetFactory( + def, + deepSeekTUIProviderCapabilities(), + func(cfg ProviderConfig) SourceSet { return newDeepSeekTUISourceSet(cfg.Roots) }, + ) +} + +func newDeepSeekTUISourceSet(roots []string) JSONLSourceSet { + return newJSONLSourceSet(AgentDeepSeekTUI, roots, + withExtensions(".json"), + withFollowSymlinkFiles(), + withContentHashing(), + withIncludePath(isDeepSeekTUISourcePath), + withSessionIDFromPath(func(root, path string) string { + return deepSeekTUISessionIDFromPath(path) + }), + withParseFile(deepSeekTUIParseFile), + ) +} + +func deepSeekTUIParseFile( + _ context.Context, path string, req ParseRequest, +) ([]ParseResult, []string, error) { + sess, msgs, err := parseDeepSeekTUISession(path, req.Machine) + if err != nil { + return nil, nil, err + } + if sess == nil { + return nil, nil, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return []ParseResult{{Session: *sess, Messages: msgs}}, nil, nil +} + +func isDeepSeekTUISourcePath(root, path string) bool { + return isDeepSeekTUISessionFile(filepath.Base(path)) +} + +func deepSeekTUIProviderCapabilities() Capabilities { + return Capabilities{ + Source: jsonlFileProviderSourceCapabilities(), + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Cwd: CapabilitySupported, + Model: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + }, + } +} diff --git a/internal/parser/deepseek_tui_provider_test.go b/internal/parser/deepseek_tui_provider_test.go new file mode 100644 index 000000000..f3ac4f109 --- /dev/null +++ b/internal/parser/deepseek_tui_provider_test.go @@ -0,0 +1,160 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDeepSeekTUIProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentDeepSeekTUI) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentDeepSeekTUI, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestDeepSeekTUIProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "session_123.json") + writeSourceFile(t, sourcePath, deepSeekTUIProviderFixture()) + writeSourceFile(t, filepath.Join(root, "latest.json"), "{}\n") + writeSourceFile(t, filepath.Join(root, "offline_queue.json"), "{}\n") + writeSourceFile(t, filepath.Join(root, "nested", "session_456.json"), "{}\n") + + provider, ok := NewProvider(AgentDeepSeekTUI, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, AgentDeepSeekTUI, discovered[0].Provider) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~deepseek-tui:session_123", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + FingerprintKey: sourcePath, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + require.NoError(t, os.Remove(sourcePath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestDeepSeekTUIProviderSourceMethodsFollowSymlinkedSessionFile(t *testing.T) { + root := t.TempDir() + targetDir := t.TempDir() + targetPath := filepath.Join(targetDir, "session_123.json") + sourcePath := filepath.Join(root, "session_123.json") + writeSourceFile(t, targetPath, deepSeekTUIProviderFixture()) + if err := os.Symlink(targetPath, sourcePath); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentDeepSeekTUI, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~deepseek-tui:session_123", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestDeepSeekTUIProviderParse(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "session_123.json") + content := deepSeekTUIProviderFixture() + writeSourceFile(t, sourcePath, content) + + provider, ok := NewProvider(AgentDeepSeekTUI, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + assert.Equal(t, DataVersionCurrent, outcome.Results[0].DataVersion) + assert.Equal(t, "deepseek-tui:session_123", outcome.Results[0].Result.Session.ID) + assert.Equal(t, "sample_project", outcome.Results[0].Result.Session.Project) + assert.Equal(t, "devbox", outcome.Results[0].Result.Session.Machine) + assert.Equal(t, + fmt.Sprintf("%x", sha256.Sum256([]byte(content))), + outcome.Results[0].Result.Session.File.Hash, + ) + assert.Len(t, outcome.Results[0].Result.Messages, 2) +} + +func deepSeekTUIProviderFixture() string { + return `{ + "metadata": { + "id": "session_123", + "title": "Investigate DeepSeek TUI", + "created_at": "2026-06-01T10:00:00Z", + "updated_at": "2026-06-01T10:02:00Z", + "model": "deepseek-chat", + "workspace": "/Users/alice/code/sample-project" + }, + "messages": [ + {"role": "user", "content": "Inspect server logs", "timestamp": "2026-06-01T10:00:05Z"}, + {"role": "assistant", "content": [{"type": "text", "text": "The server failed during startup."}], "timestamp": "2026-06-01T10:00:10Z"} + ] +}` +} diff --git a/internal/parser/deepseek_tui_test.go b/internal/parser/deepseek_tui_test.go index 40dede71c..285978c5a 100644 --- a/internal/parser/deepseek_tui_test.go +++ b/internal/parser/deepseek_tui_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "os" "path/filepath" "testing" @@ -9,7 +10,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestDiscoverDeepSeekTUISessions(t *testing.T) { +func TestDeepSeekTUIProviderDiscoversSessions(t *testing.T) { t.Parallel() root := t.TempDir() @@ -22,27 +23,57 @@ func TestDiscoverDeepSeekTUISessions(t *testing.T) { require.NoError(t, os.MkdirAll(checkpointDir, 0o755)) require.NoError(t, os.WriteFile(filepath.Join(checkpointDir, "nested.json"), []byte(`{}`), 0o644)) - files := DiscoverDeepSeekTUISessions(root) + provider, ok := NewProvider(AgentDeepSeekTUI, ProviderConfig{ + Roots: []string{root}, + Machine: "local", + }) + require.True(t, ok) + files, err := provider.Discover(context.Background()) + require.NoError(t, err) require.Len(t, files, 2) - assert.Equal(t, filepath.Join(root, "session_a.json"), files[0].Path) - assert.Equal(t, AgentDeepSeekTUI, files[0].Agent) - assert.Equal(t, filepath.Join(root, "session_b.json"), files[1].Path) - assert.Equal(t, AgentDeepSeekTUI, files[1].Agent) + assert.Equal(t, filepath.Join(root, "session_a.json"), files[0].DisplayPath) + assert.Equal(t, AgentDeepSeekTUI, files[0].Provider) + assert.Equal(t, filepath.Join(root, "session_b.json"), files[1].DisplayPath) + assert.Equal(t, AgentDeepSeekTUI, files[1].Provider) } -func TestFindDeepSeekTUISourceFile(t *testing.T) { +func TestDeepSeekTUIProviderFindsSourceFile(t *testing.T) { t.Parallel() root := t.TempDir() path := filepath.Join(root, "session_123.json") require.NoError(t, os.WriteFile(path, []byte(`{}`), 0o644)) - assert.Equal(t, path, FindDeepSeekTUISourceFile(root, "session_123")) - assert.Empty(t, FindDeepSeekTUISourceFile(root, "missing")) - assert.Empty(t, FindDeepSeekTUISourceFile(root, "../session_123")) + provider, ok := NewProvider(AgentDeepSeekTUI, ProviderConfig{ + Roots: []string{root}, + Machine: "local", + }) + require.True(t, ok) + + found, ok, err := provider.FindSource( + context.Background(), + FindSourceRequest{RawSessionID: "session_123"}, + ) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, path, found.DisplayPath) + + _, ok, err = provider.FindSource( + context.Background(), + FindSourceRequest{RawSessionID: "missing"}, + ) + require.NoError(t, err) + assert.False(t, ok) + + _, ok, err = provider.FindSource( + context.Background(), + FindSourceRequest{RawSessionID: "../session_123"}, + ) + require.NoError(t, err) + assert.False(t, ok) } -func TestParseDeepSeekTUISessionBasic(t *testing.T) { +func TestDeepSeekTUIProviderParsesBasicSession(t *testing.T) { t.Parallel() content := `{ @@ -61,9 +92,9 @@ func TestParseDeepSeekTUISessionBasic(t *testing.T) { {"role": "assistant", "content": [{"type": "text", "text": "The server failed during startup."}], "timestamp": "2026-06-01T10:00:10Z"} ] }` - path := createTestFile(t, "deepseek_tui.json", content) + path := createTestFile(t, "session_123.json", content) - sess, msgs, err := ParseDeepSeekTUISession(path, "local") + sess, msgs, err := parseDeepSeekTUITestSession(t, path, "local") require.NoError(t, err) require.NotNil(t, sess) require.Len(t, msgs, 2) @@ -89,7 +120,7 @@ func TestParseDeepSeekTUISessionBasic(t *testing.T) { assert.Equal(t, "The server failed during startup.", msgs[1].Content) } -func TestParseDeepSeekTUISessionToolUseAndThinking(t *testing.T) { +func TestDeepSeekTUIProviderParsesToolUseAndThinking(t *testing.T) { t.Parallel() content := `{ @@ -106,9 +137,9 @@ func TestParseDeepSeekTUISessionToolUseAndThinking(t *testing.T) { {"role": "assistant", "content": [{"type": "text", "text": "It is a Go file."}]} ] }` - path := createTestFile(t, "deepseek_tui_tools.json", content) + path := createTestFile(t, "session_tools.json", content) - sess, msgs, err := ParseDeepSeekTUISession(path, "local") + sess, msgs, err := parseDeepSeekTUITestSession(t, path, "local") require.NoError(t, err) require.NotNil(t, sess) require.Len(t, msgs, 4) @@ -129,7 +160,7 @@ func TestParseDeepSeekTUISessionToolUseAndThinking(t *testing.T) { assert.Equal(t, "package main", DecodeContent(msgs[2].ToolResults[0].ContentRaw)) } -func TestParseDeepSeekTUISessionObjectToolResult(t *testing.T) { +func TestDeepSeekTUIProviderParsesObjectToolResult(t *testing.T) { t.Parallel() content := `{ @@ -144,9 +175,9 @@ func TestParseDeepSeekTUISessionObjectToolResult(t *testing.T) { ]} ] }` - path := createTestFile(t, "deepseek_tui_obj.json", content) + path := createTestFile(t, "session_obj.json", content) - _, msgs, err := ParseDeepSeekTUISession(path, "local") + _, msgs, err := parseDeepSeekTUITestSession(t, path, "local") require.NoError(t, err) require.Len(t, msgs, 3) @@ -156,7 +187,7 @@ func TestParseDeepSeekTUISessionObjectToolResult(t *testing.T) { assert.Equal(t, "file1.go\nfile2.go", DecodeContent(result.ContentRaw)) } -func TestParseDeepSeekTUISessionEmptyObjectToolResult(t *testing.T) { +func TestDeepSeekTUIProviderParsesEmptyObjectToolResult(t *testing.T) { t.Parallel() content := `{ @@ -171,9 +202,9 @@ func TestParseDeepSeekTUISessionEmptyObjectToolResult(t *testing.T) { ]} ] }` - path := createTestFile(t, "deepseek_tui_empty_obj.json", content) + path := createTestFile(t, "session_empty_obj.json", content) - _, msgs, err := ParseDeepSeekTUISession(path, "local") + _, msgs, err := parseDeepSeekTUITestSession(t, path, "local") require.NoError(t, err) require.Len(t, msgs, 3) @@ -183,16 +214,45 @@ func TestParseDeepSeekTUISessionEmptyObjectToolResult(t *testing.T) { assert.Empty(t, DecodeContent(result.ContentRaw)) } -func TestParseDeepSeekTUISessionSkipsEmpty(t *testing.T) { +func TestDeepSeekTUIProviderSkipsEmptySession(t *testing.T) { t.Parallel() - path := createTestFile(t, "deepseek_tui_empty.json", `{ + path := createTestFile(t, "empty_session.json", `{ "metadata": {"id": "empty_session"}, "messages": [] }`) - sess, msgs, err := ParseDeepSeekTUISession(path, "local") + sess, msgs, err := parseDeepSeekTUITestSession(t, path, "local") require.NoError(t, err) assert.Nil(t, sess) assert.Nil(t, msgs) } + +func parseDeepSeekTUITestSession( + t *testing.T, + path string, + machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + + provider, ok := NewProvider(AgentDeepSeekTUI, ProviderConfig{ + Roots: []string{filepath.Dir(path)}, + Machine: machine, + }) + require.True(t, ok) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: SourceRef{ + Provider: AgentDeepSeekTUI, + Key: path, + DisplayPath: path, + FingerprintKey: path, + }, + Machine: machine, + }) + if err != nil || len(outcome.Results) == 0 { + return nil, nil, err + } + result := outcome.Results[0].Result + return &result.Session, result.Messages, nil +} diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 7426f70ee..9032857b2 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -349,6 +349,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { switch def.Type { case AgentCommandCode: return newCommandCodeProviderFactory(def) + case AgentDeepSeekTUI: + return newDeepSeekTUIProviderFactory(def) case AgentIflow: return newIflowProviderFactory(def) case AgentGptme: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 513a64d87..5e917ad81 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -35,7 +35,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentPi: ProviderMigrationLegacyOnly, AgentQwen: ProviderMigrationLegacyOnly, AgentCommandCode: ProviderMigrationProviderAuthoritative, - AgentDeepSeekTUI: ProviderMigrationLegacyOnly, + AgentDeepSeekTUI: ProviderMigrationProviderAuthoritative, AgentOpenClaw: ProviderMigrationLegacyOnly, AgentQClaw: ProviderMigrationLegacyOnly, AgentKimi: ProviderMigrationLegacyOnly, diff --git a/internal/parser/types.go b/internal/parser/types.go index 3a2b92d6b..2621a1371 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -365,10 +365,8 @@ var Registry = []AgentDef{ ".codewhale/sessions", ".deepseek/sessions", }, - IDPrefix: "deepseek-tui:", - FileBased: true, - DiscoverFunc: DiscoverDeepSeekTUISessions, - FindSourceFunc: FindDeepSeekTUISourceFile, + IDPrefix: "deepseek-tui:", + FileBased: true, }, { Type: AgentOpenClaw, diff --git a/internal/parser/types_test.go b/internal/parser/types_test.go index 7d2f12ee6..8a3c630da 100644 --- a/internal/parser/types_test.go +++ b/internal/parser/types_test.go @@ -572,8 +572,8 @@ func TestDeepSeekTUIRegistryEntry(t *testing.T) { def, ok := AgentByType(AgentDeepSeekTUI) require.True(t, ok, "AgentDeepSeekTUI missing from Registry") require.True(t, def.FileBased, "DeepSeek TUI FileBased") - require.NotNil(t, def.DiscoverFunc, "DeepSeek TUI DiscoverFunc") - require.NotNil(t, def.FindSourceFunc, "DeepSeek TUI FindSourceFunc") + assert.Nil(t, def.DiscoverFunc, "DeepSeek TUI DiscoverFunc") + assert.Nil(t, def.FindSourceFunc, "DeepSeek TUI FindSourceFunc") assert.Equal(t, "DeepSeek TUI", def.DisplayName) assert.Equal(t, "DEEPSEEK_TUI_SESSIONS_DIR", def.EnvVar) assert.Equal(t, "deepseek_tui_sessions_dirs", def.ConfigKey) diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 40d4251da..fc8ead815 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -1336,30 +1336,6 @@ func (e *Engine) classifyOnePath( } } - // DeepSeek TUI / CodeWhale: /.json - for _, dsDir := range e.agentDirs[parser.AgentDeepSeekTUI] { - if dsDir == "" { - continue - } - if rel, ok := isUnder(dsDir, path); ok { - if strings.Count(rel, sep) != 0 { - continue - } - name := filepath.Base(rel) - if name == "latest.json" || name == "offline_queue.json" { - continue - } - sessionID, ok := strings.CutSuffix(name, ".json") - if !ok || !parser.IsValidSessionID(sessionID) { - continue - } - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentDeepSeekTUI, - }, true - } - } - // Zencoder: /.jsonl for _, zenDir := range e.agentDirs[parser.AgentZencoder] { if zenDir == "" { @@ -4758,8 +4734,6 @@ func (e *Engine) processFile( res = e.processCursor(file, info) case parser.AgentAmp: res = e.processAmp(file, info) - case parser.AgentDeepSeekTUI: - res = e.processDeepSeekTUI(file, info) case parser.AgentZencoder: res = e.processZencoder(file, info) case parser.AgentVSCodeCopilot: @@ -6404,38 +6378,6 @@ func (e *Engine) processAmp( } } -func (e *Engine) processDeepSeekTUI( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseDeepSeekTUISession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - inode, device := getFileIdentity(info) - sess.File.Inode = inode - sess.File.Device = device - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - func (e *Engine) processZencoder( file parser.DiscoveredFile, info os.FileInfo, ) processResult { diff --git a/internal/sync/engine_test.go b/internal/sync/engine_test.go index f88056c28..cacb62c9a 100644 --- a/internal/sync/engine_test.go +++ b/internal/sync/engine_test.go @@ -3108,6 +3108,9 @@ func TestEngine_ClassifyPathsDeepSeekTUISession(t *testing.T) { require.Len(t, files, 1, "len(files) = %d, want 1 (%v)", len(files), files) assert.Equal(t, sessionPath, files[0].Path) assert.Equal(t, parser.AgentDeepSeekTUI, files[0].Agent) + assert.True(t, files[0].ProviderProcess) + require.NotNil(t, files[0].ProviderSource) + assert.Equal(t, sessionPath, files[0].ProviderSource.DisplayPath) bogus := []string{ filepath.Join(deepSeekDir, "stray.jsonl"), From e1d8c90685654880ee6fb033285a9dc46e1b6296 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Wed, 24 Jun 2026 21:34:17 -0400 Subject: [PATCH 15/24] feat(parser): migrate amp and zencoder providers Amp and Zencoder both use shallow session-file roots, so migrating them together keeps the provider stack moving without introducing another source helper. The concrete providers preserve legacy filename filters, raw/full ID lookup, deleted-path classification, fingerprint propagation, and parse normalization while continuing to compose the shared JSON source mechanics explicitly. fix(parser): preserve JSONL symlink file sources Migrated providers are intended to preserve legacy source discovery while moving behind the provider facade. Several legacy JSON/JSONL discoveries accepted matching symlinked session files and the parsers read through those symlink targets, so the shared source helper needs an explicit opt-in for that source shape instead of treating every symlink as non-regular metadata. This keeps the default helper behavior strict while allowing shallow and directory JSONL providers to opt into the compatibility path they already had before the migration. Validation: go test -tags "fts5" ./internal/parser -run 'Test(Amp|Zencoder)ProviderSourceMethodsFollowSymlinkedSessionFile' -count=1; go test -tags "fts5" ./internal/parser -run 'Test(Amp|Zencoder|DeepSeekTUI)ProviderSourceMethodsFollowSymlinkedSessionFile|Test(CommandCode|Iflow)ProviderDiscoversSymlinkedProjectDirectory|TestGptmeProvider' -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; make test-short; make nilaway; git diff --check test(parser): opt amp zencoder into provider shadow Amp and Zencoder now have concrete facade providers on this branch, so their migration modes should fail closed through the shared shadow-compare harness instead of leaving those implementations additive. Earlier provider opt-ins stay inherited from lower stack branches, and later provider families remain legacy-only until their own branches introduce concrete providers. Validation: go test -tags "fts5" ./internal/parser -run TestProviderMigrationModes -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(sync): compare amp zencoder shadow parity Amp and Zencoder are shadow-compared on this branch, so add source-level migration tests that run ObserveProviderSource and compare provider output to the legacy ParseAmpSession and ParseZencoderSession functions. Validation: go fmt ./...; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/... refactor(parser): fold amp zencoder into providers Amp and Zencoder should stop carrying two public parser shapes once their concrete providers exist. Keeping exported parser entrypoints and legacy sync dispatch made this branch additive instead of a real migration. Make both providers authoritative, move parsing behind provider methods, remove source callbacks and engine dispatch, and replace shadow-baseline tests with provider API coverage plus guards that the old symbols stay gone. Validation: go test -tags "fts5" ./internal/parser ./internal/sync ./cmd/agentsview -count=1; go vet ./...; git diff --check fix(parser): preserve amp zencoder file hashes Amp and Zencoder legacy sync stored the source content hash, but the migrated providers did not request hashed source fingerprints. Provider-authoritative writes would therefore clear file_hash when running through the real provider path.\n\nEnable source hashing for both providers and update their provider tests to exercise Fingerprint -> Parse instead of passing manually injected hashes.\n\nValidation: go test -tags "fts5" ./internal/parser -run 'Test(Amp|Zencoder)ProviderParse' -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check --- internal/parser/amp.go | 4 +- internal/parser/amp_provider.go | 63 ++++ internal/parser/amp_test.go | 75 +++-- internal/parser/amp_zencoder_provider_test.go | 286 ++++++++++++++++++ internal/parser/discovery.go | 46 --- internal/parser/discovery_test.go | 96 +++++- internal/parser/provider.go | 4 + internal/parser/provider_migration.go | 4 +- internal/parser/types.go | 32 +- internal/parser/zencoder.go | 56 +--- internal/parser/zencoder_provider.go | 68 +++++ internal/parser/zencoder_test.go | 155 +++++++--- internal/sync/engine.go | 95 ------ 13 files changed, 685 insertions(+), 299 deletions(-) create mode 100644 internal/parser/amp_provider.go create mode 100644 internal/parser/amp_zencoder_provider_test.go create mode 100644 internal/parser/zencoder_provider.go diff --git a/internal/parser/amp.go b/internal/parser/amp.go index 625ca9bcd..a1ce5e304 100644 --- a/internal/parser/amp.go +++ b/internal/parser/amp.go @@ -12,9 +12,7 @@ import ( "github.com/tidwall/gjson" ) -// ParseAmpSession parses an Amp thread JSON file. -// Each thread is a single JSON document at ~/.local/share/amp/threads/T-*.json. -func ParseAmpSession( +func parseAmpSession( path, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) diff --git a/internal/parser/amp_provider.go b/internal/parser/amp_provider.go new file mode 100644 index 000000000..e23ed1ab2 --- /dev/null +++ b/internal/parser/amp_provider.go @@ -0,0 +1,63 @@ +package parser + +import ( + "context" + "path/filepath" +) + +// Amp stores each thread as a single JSON file in a directory. It is a +// directory-of-files provider: discovery, watching, change classification, +// lookup, and fingerprinting come from JSONLSourceSet, and the ParseFile option +// makes that source set a full SourceSet so it rides the generic factory. +func newAmpProviderFactory(def AgentDef) ProviderFactory { + return newSourceSetFactory( + def, + ampProviderCapabilities(), + func(cfg ProviderConfig) SourceSet { return newAmpSourceSet(cfg.Roots) }, + ) +} + +func newAmpSourceSet(roots []string) JSONLSourceSet { + return newJSONLSourceSet(AgentAmp, roots, + withExtensions(".json"), + withFollowSymlinkFiles(), + withContentHashing(), + withIncludePath(isAmpSourcePath), + withSessionIDFromPath(func(root, path string) string { + return ampThreadIDFromPath(path) + }), + withParseFile(ampParseFile), + ) +} + +func ampParseFile( + _ context.Context, path string, req ParseRequest, +) ([]ParseResult, []string, error) { + sess, msgs, err := parseAmpSession(path, req.Machine) + if err != nil { + return nil, nil, err + } + if sess == nil { + return nil, nil, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return []ParseResult{{Session: *sess, Messages: msgs}}, nil, nil +} + +func isAmpSourcePath(root, path string) bool { + return IsAmpThreadFileName(filepath.Base(path)) +} + +func ampProviderCapabilities() Capabilities { + return Capabilities{ + Source: jsonlFileProviderSourceCapabilities(), + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + }, + } +} diff --git a/internal/parser/amp_test.go b/internal/parser/amp_test.go index 3c33adbe1..c7ba658c5 100644 --- a/internal/parser/amp_test.go +++ b/internal/parser/amp_test.go @@ -1,6 +1,8 @@ package parser import ( + "context" + "path/filepath" "strings" "testing" "time" @@ -15,10 +17,43 @@ func runAmpParserTest( ) (*ParsedSession, []ParsedMessage, error) { t.Helper() path := createTestFile(t, "T-test.json", content) - return ParseAmpSession(path, "local") + return parseAmpTestSession(t, path, "local") } -func TestParseAmpSession_Basic(t *testing.T) { +func parseAmpTestSession( + t *testing.T, + path string, + machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + + provider, ok := NewProvider(AgentAmp, ProviderConfig{ + Roots: []string{filepath.Dir(path)}, + Machine: machine, + }) + require.True(t, ok) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: SourceRef{ + Provider: AgentAmp, + Key: path, + DisplayPath: path, + FingerprintKey: path, + Opaque: JSONLSource{ + Root: filepath.Dir(path), + Path: path, + }, + }, + Machine: machine, + }) + if err != nil || len(outcome.Results) == 0 { + return nil, nil, err + } + result := outcome.Results[0].Result + return &result.Session, result.Messages, nil +} + +func TestAmpProviderParsesBasic(t *testing.T) { threadID := "T-019ca26f-aaaa-bbbb-cccc-dddddddddddd" content := `{ "v": 1, @@ -43,7 +78,7 @@ func TestParseAmpSession_Basic(t *testing.T) { }` path := createTestFile(t, threadID+".json", content) - sess, msgs, err := ParseAmpSession(path, "local") + sess, msgs, err := parseAmpTestSession(t, path, "local") require.NoError(t, err) require.NotNil(t, sess) @@ -72,7 +107,7 @@ func TestParseAmpSession_Basic(t *testing.T) { assert.Equal(t, 1, msgs[1].Ordinal) } -func TestParseAmpSession_ToolUseAndThinking(t *testing.T) { +func TestAmpProviderParsesToolUseAndThinking(t *testing.T) { content := `{ "v": 1, "id": "T-tooluse", @@ -322,7 +357,7 @@ func TestExtractAmpToolResults(t *testing.T) { } } -func TestParseAmpSession_AmpToolResultSchema(t *testing.T) { +func TestAmpProviderParsesAmpToolResultSchema(t *testing.T) { content := `{ "v": 1, "id": "T-amp-tool-result-schema", @@ -347,7 +382,7 @@ func TestParseAmpSession_AmpToolResultSchema(t *testing.T) { assert.Equal(t, "Here is a complete breakdown", DecodeContent(msgs[1].ToolResults[0].ContentRaw)) } -func TestParseAmpSession_AmpToolResultDict(t *testing.T) { +func TestAmpProviderParsesAmpToolResultDict(t *testing.T) { content := `{ "v": 1, "id": "T-amp-tool-result-dict", @@ -370,7 +405,7 @@ func TestParseAmpSession_AmpToolResultDict(t *testing.T) { assert.Equal(t, "cmd output", DecodeContent(msgs[1].ToolResults[0].ContentRaw)) } -func TestParseAmpSession_NoEnv(t *testing.T) { +func TestAmpProviderParsesNoEnv(t *testing.T) { content := `{ "v": 1, "id": "T-noenv", @@ -389,7 +424,7 @@ func TestParseAmpSession_NoEnv(t *testing.T) { require.Equal(t, 1, len(msgs)) } -func TestParseAmpSession_NoTitle(t *testing.T) { +func TestAmpProviderParsesNoTitle(t *testing.T) { content := `{ "v": 1, "id": "T-notitle", @@ -408,7 +443,7 @@ func TestParseAmpSession_NoTitle(t *testing.T) { assert.Equal(t, "Fix the bug in main.go please.", sess.FirstMessage) } -func TestParseAmpSession_NoMetaTraces(t *testing.T) { +func TestAmpProviderParsesNoMetaTraces(t *testing.T) { content := `{ "v": 1, "id": "T-notraces", @@ -427,7 +462,7 @@ func TestParseAmpSession_NoMetaTraces(t *testing.T) { assertZeroTimestamp(t, sess.EndedAt, "EndedAt") } -func TestParseAmpSession_LastTraceWithoutEndTime(t *testing.T) { +func TestAmpProviderParsesLastTraceWithoutEndTime(t *testing.T) { content := `{ "v": 1, "id": "T-trace-end-missing", @@ -451,7 +486,7 @@ func TestParseAmpSession_LastTraceWithoutEndTime(t *testing.T) { assert.Equal(t, "2024-01-01T00:00:02Z", sess.EndedAt.UTC().Format(time.RFC3339)) } -func TestParseAmpSession_EmptyThread(t *testing.T) { +func TestAmpProviderParsesEmptyThread(t *testing.T) { content := `{ "v": 1, "id": "T-empty", @@ -466,7 +501,7 @@ func TestParseAmpSession_EmptyThread(t *testing.T) { assert.Nil(t, msgs) } -func TestParseAmpSession_FirstMessageTruncation(t *testing.T) { +func TestAmpProviderParsesFirstMessageTruncation(t *testing.T) { longText := strings.Repeat("a", 400) content := `{"v":1,"id":"T-trunc","created":1704067200000,"messages":[` + `{"role":"user","content":[{"type":"text","text":"` + longText + `"}]}]}` @@ -478,7 +513,7 @@ func TestParseAmpSession_FirstMessageTruncation(t *testing.T) { assert.Equal(t, 303, len(sess.FirstMessage)) } -func TestParseAmpSession_InvalidCreated(t *testing.T) { +func TestAmpProviderParsesInvalidCreated(t *testing.T) { t.Run("missing created", func(t *testing.T) { content := `{ "v": 1, @@ -539,9 +574,9 @@ func TestParseAmpSession_InvalidCreated(t *testing.T) { }) } -func TestParseAmpSession_Errors(t *testing.T) { +func TestAmpProviderParsesErrors(t *testing.T) { t.Run("missing file", func(t *testing.T) { - _, _, err := ParseAmpSession("/nonexistent/T-xxx.json", "local") + _, _, err := parseAmpTestSession(t, "/nonexistent/T-xxx.json", "local") assert.Error(t, err) }) @@ -563,13 +598,13 @@ func TestParseAmpSession_Errors(t *testing.T) { t.Run("missing id and invalid filename", func(t *testing.T) { content := `{"v":1,"created":1704067200000,"messages":[]}` path := createTestFile(t, "bad-name.json", content) - _, _, err := ParseAmpSession(path, "local") + _, _, err := parseAmpTestSession(t, path, "local") assert.Error(t, err) assert.Contains(t, err.Error(), "missing or invalid id") }) } -func TestParseAmpSession_MismatchedID(t *testing.T) { +func TestAmpProviderParsesMismatchedID(t *testing.T) { t.Run("invalid JSON id", func(t *testing.T) { content := `{ "v": 1, @@ -581,7 +616,7 @@ func TestParseAmpSession_MismatchedID(t *testing.T) { }` path := createTestFile(t, "T-fallback-uuid.json", content) - sess, _, err := ParseAmpSession(path, "local") + sess, _, err := parseAmpTestSession(t, path, "local") require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "amp:T-fallback-uuid", sess.ID) @@ -600,7 +635,7 @@ func TestParseAmpSession_MismatchedID(t *testing.T) { }` path := createTestFile(t, "bad-name.json", content) - sess, _, err := ParseAmpSession(path, "local") + sess, _, err := parseAmpTestSession(t, path, "local") require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "amp:T-from-json", sess.ID) @@ -620,7 +655,7 @@ func TestParseAmpSession_MismatchedID(t *testing.T) { }` path := createTestFile(t, "T-from-file.json", content) - sess, _, err := ParseAmpSession(path, "local") + sess, _, err := parseAmpTestSession(t, path, "local") require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "amp:T-from-file", sess.ID) diff --git a/internal/parser/amp_zencoder_provider_test.go b/internal/parser/amp_zencoder_provider_test.go new file mode 100644 index 000000000..a8d50c036 --- /dev/null +++ b/internal/parser/amp_zencoder_provider_test.go @@ -0,0 +1,286 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAmpProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentAmp) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentAmp, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestAmpProviderSourceMethods(t *testing.T) { + root := t.TempDir() + threadID := "T-019ca26f-aaaa-bbbb-cccc-dddddddddddd" + sourcePath := filepath.Join(root, threadID+".json") + writeSourceFile(t, sourcePath, ampProviderFixture(threadID)) + writeSourceFile(t, filepath.Join(root, "T-.json"), "{}\n") + writeSourceFile(t, filepath.Join(root, "notes.json"), "{}\n") + writeSourceFile(t, filepath.Join(root, "nested", threadID+".json"), "{}\n") + + provider, ok := NewProvider(AgentAmp, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, AgentAmp, discovered[0].Provider) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~amp:" + threadID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + require.NoError(t, os.Remove(sourcePath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestAmpProviderSourceMethodsFollowSymlinkedSessionFile(t *testing.T) { + root := t.TempDir() + targetDir := t.TempDir() + threadID := "T-019ca26f-aaaa-bbbb-cccc-dddddddddddd" + targetPath := filepath.Join(targetDir, threadID+".json") + sourcePath := filepath.Join(root, threadID+".json") + writeSourceFile(t, targetPath, ampProviderFixture(threadID)) + if err := os.Symlink(targetPath, sourcePath); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentAmp, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~amp:" + threadID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestAmpProviderParse(t *testing.T) { + root := t.TempDir() + threadID := "T-019ca26f-aaaa-bbbb-cccc-dddddddddddd" + sourcePath := filepath.Join(root, threadID+".json") + content := ampProviderFixture(threadID) + writeSourceFile(t, sourcePath, content) + + provider, ok := NewProvider(AgentAmp, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + assert.Equal(t, DataVersionCurrent, outcome.Results[0].DataVersion) + assert.Equal(t, "amp:"+threadID, outcome.Results[0].Result.Session.ID) + assert.Equal(t, "amp-project", outcome.Results[0].Result.Session.Project) + assert.Equal(t, "devbox", outcome.Results[0].Result.Session.Machine) + assert.Equal(t, + fmt.Sprintf("%x", sha256.Sum256([]byte(content))), + outcome.Results[0].Result.Session.File.Hash, + ) + assert.Len(t, outcome.Results[0].Result.Messages, 2) +} + +func TestZencoderProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentZencoder) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentZencoder, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestZencoderProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "abc-def-123.jsonl") + writeSourceFile(t, sourcePath, zencoderProviderFixture("abc-def-123")) + writeSourceFile(t, filepath.Join(root, "notes.txt"), "{}\n") + writeSourceFile(t, filepath.Join(root, "nested", "abc-def-123.jsonl"), "{}\n") + + provider, ok := NewProvider(AgentZencoder, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, AgentZencoder, discovered[0].Provider) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~zencoder:abc-def-123", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + require.NoError(t, os.Remove(sourcePath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestZencoderProviderSourceMethodsFollowSymlinkedSessionFile(t *testing.T) { + root := t.TempDir() + targetDir := t.TempDir() + targetPath := filepath.Join(targetDir, "abc-def-123.jsonl") + sourcePath := filepath.Join(root, "abc-def-123.jsonl") + writeSourceFile(t, targetPath, zencoderProviderFixture("abc-def-123")) + if err := os.Symlink(targetPath, sourcePath); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentZencoder, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~zencoder:abc-def-123", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestZencoderProviderParse(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "abc-def-123.jsonl") + content := zencoderProviderFixture("abc-def-123") + writeSourceFile(t, sourcePath, content) + + provider, ok := NewProvider(AgentZencoder, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fingerprint, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + assert.Equal(t, DataVersionCurrent, outcome.Results[0].DataVersion) + assert.Equal(t, "zencoder:abc-def-123", outcome.Results[0].Result.Session.ID) + assert.Equal(t, "sample_project", outcome.Results[0].Result.Session.Project) + assert.Equal(t, "devbox", outcome.Results[0].Result.Session.Machine) + assert.Equal(t, + fmt.Sprintf("%x", sha256.Sum256([]byte(content))), + outcome.Results[0].Result.Session.File.Hash, + ) + assert.Len(t, outcome.Results[0].Result.Messages, 3) +} + +func ampProviderFixture(threadID string) string { + return `{ + "v": 1, + "id": "` + threadID + `", + "created": 1704067200000, + "title": "Migrate database schema", + "messages": [ + {"role": "user", "content": [{"type": "text", "text": "Migrate the DB schema."}]}, + {"role": "assistant", "content": [{"type": "text", "text": "Sure, I will help."}]} + ], + "env": {"initial": {"trees": [{"displayName": "amp-project"}]}}, + "meta": {"traces": []} +}` +} + +func zencoderProviderFixture(sessionID string) string { + return strings.Join([]string{ + `{"id":"` + sessionID + `","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}`, + `{"role":"system","content":"Working directory: /Users/alice/code/sample-project"}`, + `{"role":"user","content":[{"type":"text","text":"hello"}]}`, + `{"role":"assistant","content":[{"type":"text","text":"OK."}]}`, + }, "\n") +} diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index 565bdbba3..2b4bf8a3c 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -1363,52 +1363,6 @@ func ResolveGeminiProject( return NormalizeName(dirName) } -// DiscoverAmpSessions finds all thread JSON files under -// the Amp threads directory (~/.local/share/amp/threads/T-*.json). -func DiscoverAmpSessions(threadsDir string) []DiscoveredFile { - if threadsDir == "" { - return nil - } - - entries, err := os.ReadDir(threadsDir) - if err != nil { - return nil - } - - var files []DiscoveredFile - for _, entry := range entries { - if entry.IsDir() { - continue - } - name := entry.Name() - if !IsAmpThreadFileName(name) { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(threadsDir, name), - Agent: AgentAmp, - }) - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindAmpSourceFile locates an Amp thread file by its raw -// thread ID (without the "amp:" prefix). -func FindAmpSourceFile(threadsDir, threadID string) string { - if threadsDir == "" || !isValidAmpThreadID(threadID) { - return "" - } - candidate := filepath.Join(threadsDir, threadID+".json") - if _, err := os.Stat(candidate); err == nil { - return candidate - } - return "" -} - // DiscoverCopilotSessions finds all JSONL files under // /session-state/. Supports both bare format // (.jsonl) and directory format (/events.jsonl). diff --git a/internal/parser/discovery_test.go b/internal/parser/discovery_test.go index 1a1b50bb4..72f4c98ba 100644 --- a/internal/parser/discovery_test.go +++ b/internal/parser/discovery_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "os" "path/filepath" "strings" @@ -57,6 +58,31 @@ func assertDiscoveredFiles(t *testing.T, got []DiscoveredFile, wantFilenames []s } } +func assertSourceRefs(t *testing.T, got []SourceRef, wantFilenames []string, wantAgent AgentType) { + t.Helper() + + want := make(map[string]bool) + for _, f := range wantFilenames { + want[f] = true + } + + gotMap := make(map[string]bool) + for _, f := range got { + base := filepath.Base(f.DisplayPath) + gotMap[base] = true + assert.Equalf(t, wantAgent, f.Provider, "file %q: provider", base) + } + + assert.Equal(t, len(want), len(got), "files total") + + for file := range want { + assert.Truef(t, gotMap[file], "missing expected file: %q", file) + } + for file := range gotMap { + assert.Truef(t, want[file], "got unexpected file: %q", file) + } +} + func TestDiscoverClaudeProjects(t *testing.T) { tests := []struct { name string @@ -179,7 +205,7 @@ func TestDiscoverCodexSessions(t *testing.T) { } } -func TestDiscoverAmpSessions(t *testing.T) { +func TestAmpProviderDiscoversSessions(t *testing.T) { tests := []struct { name string files map[string]string @@ -211,17 +237,27 @@ func TestDiscoverAmpSessions(t *testing.T) { t.Run(tt.name, func(t *testing.T) { dir := t.TempDir() setupFileSystem(t, dir, tt.files) - files := DiscoverAmpSessions(dir) - assertDiscoveredFiles( - t, files, tt.wantFiles, AgentAmp, - ) + provider, ok := NewProvider(AgentAmp, ProviderConfig{ + Roots: []string{dir}, + Machine: "local", + }) + require.True(t, ok) + files, err := provider.Discover(context.Background()) + require.NoError(t, err) + assertSourceRefs(t, files, tt.wantFiles, AgentAmp) }) } t.Run("Nonexistent", func(t *testing.T) { dir := filepath.Join(t.TempDir(), "does-not-exist") - files := DiscoverAmpSessions(dir) - assert.Nil(t, files, "expected nil") + provider, ok := NewProvider(AgentAmp, ProviderConfig{ + Roots: []string{dir}, + Machine: "local", + }) + require.True(t, ok) + files, err := provider.Discover(context.Background()) + require.NoError(t, err) + assert.Empty(t, files, "expected empty") }) } @@ -291,30 +327,54 @@ func TestFindClaudeSourceFile(t *testing.T) { }) } -func TestFindAmpSourceFile(t *testing.T) { +func TestAmpProviderFindsSourceFile(t *testing.T) { t.Run("Found", func(t *testing.T) { dir := t.TempDir() rel := "T-019ca26f-aaaa-bbbb-cccc-dddddddddddd.json" setupFileSystem(t, dir, map[string]string{ rel: "{}", }) - got := FindAmpSourceFile( - dir, "T-019ca26f-aaaa-bbbb-cccc-dddddddddddd", + provider, ok := NewProvider(AgentAmp, ProviderConfig{ + Roots: []string{dir}, + Machine: "local", + }) + require.True(t, ok) + got, ok, err := provider.FindSource( + context.Background(), + FindSourceRequest{ + RawSessionID: "T-019ca26f-aaaa-bbbb-cccc-dddddddddddd", + }, ) + require.NoError(t, err) + require.True(t, ok) want := filepath.Join(dir, rel) - assert.Equal(t, want, got) + assert.Equal(t, want, got.DisplayPath) }) t.Run("Nonexistent", func(t *testing.T) { dir := t.TempDir() - got := FindAmpSourceFile( - dir, "T-019ca26f-aaaa-bbbb-cccc-dddddddddddd", + provider, ok := NewProvider(AgentAmp, ProviderConfig{ + Roots: []string{dir}, + Machine: "local", + }) + require.True(t, ok) + _, ok, err := provider.FindSource( + context.Background(), + FindSourceRequest{ + RawSessionID: "T-019ca26f-aaaa-bbbb-cccc-dddddddddddd", + }, ) - assert.Empty(t, got, "expected empty") + require.NoError(t, err) + assert.False(t, ok, "expected empty") }) t.Run("Validation", func(t *testing.T) { dir := t.TempDir() + provider, ok := NewProvider(AgentAmp, ProviderConfig{ + Roots: []string{dir}, + Machine: "local", + }) + require.True(t, ok) tests := []string{ "", "../bad", @@ -323,8 +383,12 @@ func TestFindAmpSourceFile(t *testing.T) { "T-", } for _, id := range tests { - got := FindAmpSourceFile(dir, id) - assert.Emptyf(t, got, "FindAmpSourceFile(%q)", id) + _, ok, err := provider.FindSource( + context.Background(), + FindSourceRequest{RawSessionID: id}, + ) + require.NoError(t, err) + assert.Falsef(t, ok, "Amp provider FindSource(%q)", id) } }) } diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 9032857b2..27d6e88ca 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -347,6 +347,8 @@ func ProviderFactories() []ProviderFactory { func providerFactoryForDef(def AgentDef) ProviderFactory { def = cloneAgentDef(def) switch def.Type { + case AgentAmp: + return newAmpProviderFactory(def) case AgentCommandCode: return newCommandCodeProviderFactory(def) case AgentDeepSeekTUI: @@ -355,6 +357,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newIflowProviderFactory(def) case AgentGptme: return newGptmeProviderFactory(def) + case AgentZencoder: + return newZencoderProviderFactory(def) default: return legacyProviderFactory{def: def} } diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 5e917ad81..6778a3304 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -28,8 +28,8 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentOpenHands: ProviderMigrationLegacyOnly, AgentCursor: ProviderMigrationLegacyOnly, AgentIflow: ProviderMigrationProviderAuthoritative, - AgentAmp: ProviderMigrationLegacyOnly, - AgentZencoder: ProviderMigrationLegacyOnly, + AgentAmp: ProviderMigrationProviderAuthoritative, + AgentZencoder: ProviderMigrationProviderAuthoritative, AgentVSCodeCopilot: ProviderMigrationLegacyOnly, AgentVSCopilot: ProviderMigrationLegacyOnly, AgentPi: ProviderMigrationLegacyOnly, diff --git a/internal/parser/types.go b/internal/parser/types.go index 2621a1371..3a4c04f6e 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -235,26 +235,22 @@ var Registry = []AgentDef{ FindSourceFunc: FindCursorSourceFile, }, { - Type: AgentAmp, - DisplayName: "Amp", - EnvVar: "AMP_DIR", - ConfigKey: "amp_dirs", - DefaultDirs: []string{".local/share/amp/threads"}, - IDPrefix: "amp:", - FileBased: true, - DiscoverFunc: DiscoverAmpSessions, - FindSourceFunc: FindAmpSourceFile, + Type: AgentAmp, + DisplayName: "Amp", + EnvVar: "AMP_DIR", + ConfigKey: "amp_dirs", + DefaultDirs: []string{".local/share/amp/threads"}, + IDPrefix: "amp:", + FileBased: true, }, { - Type: AgentZencoder, - DisplayName: "Zencoder", - EnvVar: "ZENCODER_DIR", - ConfigKey: "zencoder_dirs", - DefaultDirs: []string{".zencoder/sessions"}, - IDPrefix: "zencoder:", - FileBased: true, - DiscoverFunc: DiscoverZencoderSessions, - FindSourceFunc: FindZencoderSourceFile, + Type: AgentZencoder, + DisplayName: "Zencoder", + EnvVar: "ZENCODER_DIR", + ConfigKey: "zencoder_dirs", + DefaultDirs: []string{".zencoder/sessions"}, + IDPrefix: "zencoder:", + FileBased: true, }, { Type: AgentIflow, diff --git a/internal/parser/zencoder.go b/internal/parser/zencoder.go index 62e6fcec1..84fe74705 100644 --- a/internal/parser/zencoder.go +++ b/internal/parser/zencoder.go @@ -5,7 +5,6 @@ import ( "os" "path/filepath" "regexp" - "sort" "strings" "time" @@ -445,10 +444,7 @@ func zencoderToolResultContentLength( return total } -// ParseZencoderSession parses a Zencoder JSONL session file. -// Returns (nil, nil, nil) if the file doesn't exist or -// contains no user/assistant messages. -func ParseZencoderSession( +func parseZencoderSession( path, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) @@ -567,53 +563,3 @@ func ParseZencoderSession( func IsZencoderSessionFileName(name string) bool { return strings.HasSuffix(name, ".jsonl") } - -// DiscoverZencoderSessions finds all JSONL files under -// the Zencoder sessions directory (~/.zencoder/sessions/*.jsonl). -func DiscoverZencoderSessions( - sessionsDir string, -) []DiscoveredFile { - if sessionsDir == "" { - return nil - } - - entries, err := os.ReadDir(sessionsDir) - if err != nil { - return nil - } - - var files []DiscoveredFile - for _, entry := range entries { - if entry.IsDir() { - continue - } - name := entry.Name() - if !IsZencoderSessionFileName(name) { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(sessionsDir, name), - Agent: AgentZencoder, - }) - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindZencoderSourceFile locates a Zencoder session file by -// its raw session ID (without the "zencoder:" prefix). -func FindZencoderSourceFile( - sessionsDir, rawID string, -) string { - if sessionsDir == "" || !IsValidSessionID(rawID) { - return "" - } - candidate := filepath.Join(sessionsDir, rawID+".jsonl") - if _, err := os.Stat(candidate); err == nil { - return candidate - } - return "" -} diff --git a/internal/parser/zencoder_provider.go b/internal/parser/zencoder_provider.go new file mode 100644 index 000000000..35888d350 --- /dev/null +++ b/internal/parser/zencoder_provider.go @@ -0,0 +1,68 @@ +package parser + +import ( + "context" + "path/filepath" + "strings" +) + +// Zencoder stores each session as a single JSONL file in a directory. It is a +// directory-of-files provider: discovery, watching, change classification, +// lookup, and fingerprinting come from JSONLSourceSet, and the ParseFile option +// makes that source set a full SourceSet so it rides the generic factory. +func newZencoderProviderFactory(def AgentDef) ProviderFactory { + return newSourceSetFactory( + def, + zencoderProviderCapabilities(), + func(cfg ProviderConfig) SourceSet { return newZencoderSourceSet(cfg.Roots) }, + ) +} + +func newZencoderSourceSet(roots []string) JSONLSourceSet { + return newJSONLSourceSet(AgentZencoder, roots, + withFollowSymlinkFiles(), + withContentHashing(), + withIncludePath(isZencoderSourcePath), + withSessionIDFromPath(zencoderSessionIDFromPath), + withParseFile(zencoderParseFile), + ) +} + +func zencoderParseFile( + _ context.Context, path string, req ParseRequest, +) ([]ParseResult, []string, error) { + sess, msgs, err := parseZencoderSession(path, req.Machine) + if err != nil { + return nil, nil, err + } + if sess == nil { + return nil, nil, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return []ParseResult{{Session: *sess, Messages: msgs}}, nil, nil +} + +func isZencoderSourcePath(root, path string) bool { + return IsZencoderSessionFileName(filepath.Base(path)) +} + +func zencoderSessionIDFromPath(root, path string) string { + return strings.TrimSuffix(filepath.Base(path), ".jsonl") +} + +func zencoderProviderCapabilities() Capabilities { + return Capabilities{ + Source: jsonlFileProviderSourceCapabilities(), + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Cwd: CapabilitySupported, + Relationships: CapabilitySupported, + Subagents: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + }, + } +} diff --git a/internal/parser/zencoder_test.go b/internal/parser/zencoder_test.go index 38b6261b0..749383862 100644 --- a/internal/parser/zencoder_test.go +++ b/internal/parser/zencoder_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "os" "path/filepath" "strings" @@ -16,10 +17,43 @@ func runZencoderParserTest( ) (*ParsedSession, []ParsedMessage, error) { t.Helper() path := createTestFile(t, "test-uuid.jsonl", content) - return ParseZencoderSession(path, "local") + return parseZencoderTestSession(t, path, "local") } -func TestParseZencoderSession_Basic(t *testing.T) { +func parseZencoderTestSession( + t *testing.T, + path string, + machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + + provider, ok := NewProvider(AgentZencoder, ProviderConfig{ + Roots: []string{filepath.Dir(path)}, + Machine: machine, + }) + require.True(t, ok) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: SourceRef{ + Provider: AgentZencoder, + Key: path, + DisplayPath: path, + FingerprintKey: path, + Opaque: JSONLSource{ + Root: filepath.Dir(path), + Path: path, + }, + }, + Machine: machine, + }) + if err != nil || len(outcome.Results) == 0 { + return nil, nil, err + } + result := outcome.Results[0].Result + return &result.Session, result.Messages, nil +} + +func TestZencoderProviderParsesBasic(t *testing.T) { header := `{"id":"abc-123","chatId":"chat-1","modelId":"model-1","parentId":"","creationReason":"newChat","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z","version":"1"}` system := `{"role":"system","content":"You are an AI assistant.\n\n# Environment\n\nWorking directory: /home/user/myproject\n\nOS: linux"}` user := `{"role":"user","content":[{"type":"text","text":"Fix the bug.","tag":"user-input"}]}` @@ -70,7 +104,7 @@ func TestParseZencoderSession_Basic(t *testing.T) { assert.Equal(t, RelNone, sess.RelationshipType) } -func TestParseZencoderSession_ToolCallAndReasoning(t *testing.T) { +func TestZencoderProviderParsesToolCallAndReasoning(t *testing.T) { header := `{"id":"tc-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[{"type":"text","text":"Read the file."}]}` assistant := `{"role":"assistant","content":[` + @@ -105,7 +139,7 @@ func TestParseZencoderSession_ToolCallAndReasoning(t *testing.T) { assert.Equal(t, "tc1", msgs[1].ToolCalls[0].ToolUseID) } -func TestParseZencoderSession_ToolResults(t *testing.T) { +func TestZencoderProviderParsesToolResults(t *testing.T) { header := `{"id":"tr-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[{"type":"text","text":"Read it."}]}` assistant := `{"role":"assistant","content":[` + @@ -139,7 +173,7 @@ func TestParseZencoderSession_ToolResults(t *testing.T) { "package main") } -func TestParseZencoderSession_UserInputTagFiltering(t *testing.T) { +func TestZencoderProviderParsesUserInputTagFiltering(t *testing.T) { header := `{"id":"tag-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[` + `{"type":"text","text":"system instructions","tag":"instructions"},` + @@ -177,7 +211,7 @@ func TestParseZencoderSession_UserInputTagFiltering(t *testing.T) { assert.Equal(t, "actual user input", sess.FirstMessage) } -func TestParseZencoderSession_DirectContinuation(t *testing.T) { +func TestZencoderProviderParsesDirectContinuation(t *testing.T) { header := `{"id":"child-123","parentId":"parent-456","creationReason":"directContinuation","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[{"type":"text","text":"Continue."}]}` assistant := `{"role":"assistant","content":[{"type":"text","text":"Continuing."}]}` @@ -194,7 +228,7 @@ func TestParseZencoderSession_DirectContinuation(t *testing.T) { assert.Equal(t, RelContinuation, sess.RelationshipType) } -func TestParseZencoderSession_SummarizedContinuation(t *testing.T) { +func TestZencoderProviderParsesSummarizedContinuation(t *testing.T) { header := `{"id":"child-789","parentId":"parent-012","creationReason":"summarizedContinuation","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[{"type":"text","text":"Continue."}]}` assistant := `{"role":"assistant","content":[{"type":"text","text":"OK."}]}` @@ -211,7 +245,7 @@ func TestParseZencoderSession_SummarizedContinuation(t *testing.T) { assert.Equal(t, RelContinuation, sess.RelationshipType) } -func TestParseZencoderSession_ProjectExtraction(t *testing.T) { +func TestZencoderProviderParsesProjectExtraction(t *testing.T) { header := `{"id":"proj-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` system := `{"role":"system","content":"You are helpful.\n\nWorking directory: /home/user/workspace/coolproject\n"}` user := `{"role":"user","content":[{"type":"text","text":"hello"}]}` @@ -232,7 +266,7 @@ func TestParseZencoderSession_ProjectExtraction(t *testing.T) { assert.False(t, msgs[1].IsSystem) } -func TestParseZencoderSession_EmptySession(t *testing.T) { +func TestZencoderProviderParsesEmptySession(t *testing.T) { // Header only, no messages. header := `{"id":"empty-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` @@ -242,7 +276,7 @@ func TestParseZencoderSession_EmptySession(t *testing.T) { assert.Nil(t, msgs) } -func TestParseZencoderSession_PermissionSkippedFinishStored(t *testing.T) { +func TestZencoderProviderParsesPermissionSkippedFinishStored(t *testing.T) { header := `{"id":"skip-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[{"type":"text","text":"Do it."}]}` permission := `{"role":"permission","data":{"allowed":true}}` @@ -268,7 +302,7 @@ func TestParseZencoderSession_PermissionSkippedFinishStored(t *testing.T) { assert.Equal(t, 1, sess.UserMessageCount) } -func TestParseZencoderSession_FirstMessageTruncation(t *testing.T) { +func TestZencoderProviderParsesFirstMessageTruncation(t *testing.T) { header := `{"id":"trunc-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` longText := strings.Repeat("a", 400) user := `{"role":"user","content":[{"type":"text","text":"` + longText + `"}]}` @@ -282,16 +316,14 @@ func TestParseZencoderSession_FirstMessageTruncation(t *testing.T) { assert.Equal(t, 303, len(sess.FirstMessage)) } -func TestParseZencoderSession_MissingFile(t *testing.T) { - sess, msgs, err := ParseZencoderSession( - "/nonexistent/test.jsonl", "local", - ) +func TestZencoderProviderParsesMissingFile(t *testing.T) { + sess, msgs, err := parseZencoderTestSession(t, "/nonexistent/test.jsonl", "local") require.NoError(t, err) assert.Nil(t, sess) assert.Nil(t, msgs) } -func TestParseZencoderSession_FallbackSessionID(t *testing.T) { +func TestZencoderProviderParsesFallbackSessionID(t *testing.T) { // Header with no id field -> falls back to filename. header := `{"createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[{"type":"text","text":"hello"}]}` @@ -305,7 +337,7 @@ func TestParseZencoderSession_FallbackSessionID(t *testing.T) { assert.Equal(t, "zencoder:test-uuid", sess.ID) } -func TestDiscoverZencoderSessions(t *testing.T) { +func TestZencoderProviderDiscoversSessions(t *testing.T) { dir := t.TempDir() // Create some session files. @@ -324,39 +356,74 @@ func TestDiscoverZencoderSessions(t *testing.T) { filepath.Join(dir, "subdir"), 0o755, )) - files := DiscoverZencoderSessions(dir) + provider, ok := NewProvider(AgentZencoder, ProviderConfig{ + Roots: []string{dir}, + Machine: "local", + }) + require.True(t, ok) + files, err := provider.Discover(context.Background()) + require.NoError(t, err) assert.Equal(t, 2, len(files)) for _, f := range files { - assert.Equal(t, AgentZencoder, f.Agent) - assert.True(t, strings.HasSuffix(f.Path, ".jsonl")) + assert.Equal(t, AgentZencoder, f.Provider) + assert.True(t, strings.HasSuffix(f.DisplayPath, ".jsonl")) } } -func TestDiscoverZencoderSessions_EmptyDir(t *testing.T) { - files := DiscoverZencoderSessions("") - assert.Nil(t, files) +func TestZencoderProviderDiscoversEmptyDir(t *testing.T) { + provider, ok := NewProvider(AgentZencoder, ProviderConfig{ + Roots: []string{""}, + Machine: "local", + }) + require.True(t, ok) + files, err := provider.Discover(context.Background()) + require.NoError(t, err) + assert.Empty(t, files) } -func TestFindZencoderSourceFile(t *testing.T) { +func TestZencoderProviderFindsSourceFile(t *testing.T) { dir := t.TempDir() name := "abc-def-123.jsonl" f, err := os.Create(filepath.Join(dir, name)) require.NoError(t, err) f.Close() - result := FindZencoderSourceFile(dir, "abc-def-123") - assert.Equal(t, filepath.Join(dir, name), result) + provider, ok := NewProvider(AgentZencoder, ProviderConfig{ + Roots: []string{dir}, + Machine: "local", + }) + require.True(t, ok) + found, ok, err := provider.FindSource( + context.Background(), + FindSourceRequest{RawSessionID: "abc-def-123"}, + ) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, filepath.Join(dir, name), found.DisplayPath) // Non-existent ID. - result = FindZencoderSourceFile(dir, "nonexistent") - assert.Empty(t, result) + _, ok, err = provider.FindSource( + context.Background(), + FindSourceRequest{RawSessionID: "nonexistent"}, + ) + require.NoError(t, err) + assert.False(t, ok) // Empty dir. - result = FindZencoderSourceFile("", "abc-def-123") - assert.Empty(t, result) + emptyProvider, ok := NewProvider(AgentZencoder, ProviderConfig{ + Roots: []string{""}, + Machine: "local", + }) + require.True(t, ok) + _, ok, err = emptyProvider.FindSource( + context.Background(), + FindSourceRequest{RawSessionID: "abc-def-123"}, + ) + require.NoError(t, err) + assert.False(t, ok) } -func TestParseZencoderSession_UserContentWithoutTag(t *testing.T) { +func TestZencoderProviderParsesUserContentWithoutTag(t *testing.T) { header := `{"id":"notag-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[{"type":"text","text":"no tag input"}]}` assistant := `{"role":"assistant","content":[{"type":"text","text":"OK."}]}` @@ -373,7 +440,7 @@ func TestParseZencoderSession_UserContentWithoutTag(t *testing.T) { assert.Equal(t, "no tag input", msgs[0].Content) } -func TestParseZencoderSession_NewChatNoRelationship(t *testing.T) { +func TestZencoderProviderParsesNewChatNoRelationship(t *testing.T) { header := `{"id":"new-123","parentId":"some-parent","creationReason":"newChat","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[{"type":"text","text":"hello"}]}` @@ -388,7 +455,7 @@ func TestParseZencoderSession_NewChatNoRelationship(t *testing.T) { assert.Equal(t, RelNone, sess.RelationshipType) } -func TestParseZencoderSession_SubagentSessionID(t *testing.T) { +func TestZencoderProviderParsesSubagentSessionID(t *testing.T) { header := `{"id":"parent-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[{"type":"text","text":"Use subagent."}]}` assistant := `{"role":"assistant","content":[` + @@ -415,7 +482,7 @@ func TestParseZencoderSession_SubagentSessionID(t *testing.T) { ) } -func TestParseZencoderSession_SubagentMultiple(t *testing.T) { +func TestZencoderProviderParsesSubagentMultiple(t *testing.T) { header := `{"id":"parent-456","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[{"type":"text","text":"Use subagents."}]}` assistant := `{"role":"assistant","content":[` + @@ -447,7 +514,7 @@ func TestParseZencoderSession_SubagentMultiple(t *testing.T) { ) } -func TestParseZencoderSession_NoSessionIDTag(t *testing.T) { +func TestZencoderProviderParsesNoSessionIDTag(t *testing.T) { header := `{"id":"parent-789","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[{"type":"text","text":"Read file."}]}` assistant := `{"role":"assistant","content":[` + @@ -469,7 +536,7 @@ func TestParseZencoderSession_NoSessionIDTag(t *testing.T) { assert.Empty(t, msgs[1].ToolCalls[0].SubagentSessionID) } -func TestParseZencoderSession_SkillBlocks(t *testing.T) { +func TestZencoderProviderParsesSkillBlocks(t *testing.T) { header := `{"id":"skill-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[` + `{"type":"text","text":"Do the thing.","tag":"user-input"},` + @@ -505,7 +572,7 @@ func TestParseZencoderSession_SkillBlocks(t *testing.T) { assert.Equal(t, "Do the thing.", sess.FirstMessage) } -func TestParseZencoderSession_ToolResultSystemTags(t *testing.T) { +func TestZencoderProviderParsesToolResultSystemTags(t *testing.T) { header := `{"id":"trsys-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` user := `{"role":"user","content":[{"type":"text","text":"Run it."}]}` assistant := `{"role":"assistant","content":[` + @@ -544,7 +611,7 @@ func TestParseZencoderSession_ToolResultSystemTags(t *testing.T) { assert.Contains(t, msgs[3].Content, "Extra context") } -func TestParseZencoderSession_ToolResultTaggedBlocksFilteredFromContentRaw(t *testing.T) { +func TestZencoderProviderParsesToolResultTaggedBlocksFilteredFromContentRaw(t *testing.T) { // Verify that tagged text blocks in tool-result content are // stripped from ContentRaw (to avoid double-rendering) and // emitted as a separate system message instead. @@ -663,7 +730,7 @@ func TestParseZencoderSession_ToolResultTaggedBlocksFilteredFromContentRaw(t *te } } -func TestParseZencoderSession_SystemOnlySession(t *testing.T) { +func TestZencoderProviderParsesSystemOnlySession(t *testing.T) { // A session with only a header and a system message (e.g. // environment banner) should be filtered out as empty. header := `{"id":"sysonly-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` @@ -677,7 +744,7 @@ func TestParseZencoderSession_SystemOnlySession(t *testing.T) { assert.Nil(t, msgs, "system-only session should produce no messages") } -func TestParseZencoderSession_SystemAndFinishOnlySession(t *testing.T) { +func TestZencoderProviderParsesSystemAndFinishOnlySession(t *testing.T) { // A session with system + finish but no real user/assistant // messages should also be filtered out. header := `{"id":"sysfin-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` @@ -692,7 +759,7 @@ func TestParseZencoderSession_SystemAndFinishOnlySession(t *testing.T) { assert.Nil(t, msgs) } -func TestParseZencoderSession_TimestampBoundsFromMessages(t *testing.T) { +func TestZencoderProviderParsesTimestampBoundsFromMessages(t *testing.T) { // When header timestamps are missing, session bounds should // be derived from per-message timestamps. header := `{"id":"bounds-123"}` @@ -714,7 +781,7 @@ func TestParseZencoderSession_TimestampBoundsFromMessages(t *testing.T) { assertTimestamp(t, sess.EndedAt, wantEnd) } -func TestParseZencoderSession_TimestampBoundsStaleHeader(t *testing.T) { +func TestZencoderProviderParsesTimestampBoundsStaleHeader(t *testing.T) { // When header has timestamps but messages have more // recent ones, endedAt should be updated. header := `{"id":"stale-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` @@ -738,7 +805,7 @@ func TestParseZencoderSession_TimestampBoundsStaleHeader(t *testing.T) { assertTimestamp(t, sess.EndedAt, wantEnd) } -func TestParseZencoderSession_MessageTimestamps(t *testing.T) { +func TestZencoderProviderParsesMessageTimestamps(t *testing.T) { header := `{"id":"ts-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:05:00Z"}` system := `{"role":"system","content":"You are an AI.\n\nWorking directory: /home/user/proj","createdAt":"2024-01-01T00:00:01Z"}` user := `{"role":"user","content":[{"type":"text","text":"Hello."}],"createdAt":"2024-01-01T00:00:02Z"}` @@ -777,7 +844,7 @@ func TestParseZencoderSession_MessageTimestamps(t *testing.T) { assert.Equal(t, wantFinish, msgs[4].Timestamp) } -func TestParseZencoderSession_MessageTimestamps_Missing(t *testing.T) { +func TestZencoderProviderParsesMessageTimestamps_Missing(t *testing.T) { header := `{"id":"ts-miss-123","createdAt":"2024-01-01T00:00:00Z","updatedAt":"2024-01-01T00:01:00Z"}` // Lines without createdAt field. user := `{"role":"user","content":[{"type":"text","text":"No timestamp."}]}` diff --git a/internal/sync/engine.go b/internal/sync/engine.go index fc8ead815..60f74372c 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -1320,38 +1320,6 @@ func (e *Engine) classifyOnePath( } } - // Amp: /T-*.json - for _, ampDir := range e.agentDirs[parser.AgentAmp] { - if ampDir == "" { - continue - } - if rel, ok := isUnder(ampDir, path); ok { - if strings.Count(rel, sep) == 0 && - parser.IsAmpThreadFileName(filepath.Base(rel)) { - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentAmp, - }, true - } - } - } - - // Zencoder: /.jsonl - for _, zenDir := range e.agentDirs[parser.AgentZencoder] { - if zenDir == "" { - continue - } - if rel, ok := isUnder(zenDir, path); ok { - if strings.Count(rel, sep) == 0 && - parser.IsZencoderSessionFileName(filepath.Base(rel)) { - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentZencoder, - }, true - } - } - } - // VSCode Copilot: /workspaceStorage//chatSessions/.{json,jsonl} // or: /globalStorage/emptyWindowChatSessions/.{json,jsonl} for _, vscDir := range e.agentDirs[parser.AgentVSCodeCopilot] { @@ -4732,10 +4700,6 @@ func (e *Engine) processFile( res = e.processOpenHands(file, info) case parser.AgentCursor: res = e.processCursor(file, info) - case parser.AgentAmp: - res = e.processAmp(file, info) - case parser.AgentZencoder: - res = e.processZencoder(file, info) case parser.AgentVSCodeCopilot: res = e.processVSCodeCopilot(file, info) case parser.AgentVSCopilot: @@ -6348,65 +6312,6 @@ func (e *Engine) processGemini( } } -func (e *Engine) processAmp( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - // Fast path: skip by file_path + mtime before parsing. - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseAmpSession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - -func (e *Engine) processZencoder( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseZencoderSession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - func (e *Engine) processVSCodeCopilot( file parser.DiscoveredFile, info os.FileInfo, ) processResult { From aa2927d8f761c04c6793cd8e2ddd6f81b7130f5a Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Wed, 24 Jun 2026 21:34:07 -0400 Subject: [PATCH 16/24] feat(parser): migrate pi provider Pi is the next JSONL-shaped parser that can move behind the provider facade without introducing a new source framework. Its source layout is still simple enough to compose the directory JSONL helper, but it needs provider-owned filtering because legacy discovery validates the session header while raw session lookup only checks the expected filename under encoded-cwd directories. This keeps that discovery-versus-lookup asymmetry explicit in the provider and preserves symlinked encoded-cwd directory support while parse output continues to come from the existing Pi parser. Validation: go test -tags "fts5" ./internal/parser -run TestPiProvider -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; make test-short; git diff --check fix(parser): preserve pi header-based discovery Pi discovery has historically treated the filename as source shape only: any one-level JSONL file under an encoded-cwd directory can be a session if its header has type=session. The provider migration accidentally applied raw session ID filename validation before header validation, which would drop valid files whose session ID comes from the header instead of the filename. Raw-ID lookup still validates the requested ID before reconstructing .jsonl, so the legacy discovery-versus-lookup asymmetry remains explicit without broadening lookup inputs. Validation: go test -tags "fts5" ./internal/parser -run TestPiProviderDiscoveryAcceptsSessionHeaderInNonSessionIDFilename -count=1; go test -tags "fts5" ./internal/parser -run 'TestPiProvider(DiscoveryAcceptsSessionHeaderInNonSessionIDFilename|SourceMethods|Parse|DiscoversSymlinkedCWDDirectory|FactoryReplacesLegacyAdapter)' -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; make test-short; git diff --check test(parser): opt pi into provider shadow Pi now has a concrete facade provider on this branch, so its migration mode should enter the shared shadow-compare harness instead of remaining an additive implementation behind legacy-only dispatch. The stack keeps lower provider opt-ins inherited and leaves later provider branches legacy-only until their own migrations land. Validation: go test -tags "fts5" ./internal/parser -run TestProviderMigrationModes -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(sync): compare pi shadow parity Pi is shadow-compared on this branch, so add the shared source-level proof that provider observation matches ParsePiSession output for a representative session file. Validation: go fmt ./...; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go vet ./...; git diff --check; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/... refactor(parser): fold pi into provider Pi should not keep exported parser and source callback APIs after its concrete provider exists. Removing those hooks also exposed that full sync and single-session lookup still assumed AgentDef callbacks, so provider-authoritative agents were not actually runnable without legacy callbacks. Move Pi parsing behind the provider, remove its legacy discovery and sync dispatch, add provider discovery and provider lookup to the sync root path, and replace shadow-baseline coverage with provider API tests plus a guard that the old symbols stay gone. Validation: go test -tags "fts5" ./internal/parser ./internal/sync ./cmd/agentsview -count=1; go vet ./...; git diff --check fix(parser): preserve pi family provider capabilities OMP shared the Pi on-disk format but was left legacy-only after the legacy registry hooks were removed, so full sync and changed-path sync could no longer reach it through the migrated provider path. Parse-diff had the same shape of regression for provider-authoritative agents because it only trusted AgentDef discovery callbacks.\n\nFold OMP into the concrete Pi-family provider, derive parse identity from the provider definition, and teach parse-diff plus CLI validation to accept provider-authoritative on-disk sources. This keeps the branch as an actual migration rather than a shim around removed legacy functions.\n\nValidation: go test -tags "fts5" ./internal/parser -count=1; go test -tags "fts5" ./cmd/agentsview -run 'TestParseDiff' -count=1; go test -tags "fts5" ./internal/sync -run 'Test(ParseDiff|OMPSyncAllAndChangedPathUseProvider)' -count=1; go test -tags "fts5" ./internal/sync -count=1; go vet ./...; git diff --check fix(parser): thread ctx through pi source lookups --- cmd/agentsview/parse_diff.go | 23 +- cmd/agentsview/parse_diff_test.go | 10 + internal/parser/discovery.go | 94 -------- internal/parser/pi.go | 18 +- internal/parser/pi_provider.go | 228 ++++++++++++++++++++ internal/parser/pi_provider_test.go | 227 +++++++++++++++++++ internal/parser/pi_test.go | 196 ++++++++--------- internal/parser/provider.go | 2 + internal/parser/provider_migration.go | 4 +- internal/parser/types.go | 32 ++- internal/sync/engine.go | 67 ------ internal/sync/engine_integration_test.go | 39 ++++ internal/sync/parsediff.go | 83 ++++++- internal/sync/parsediff_integration_test.go | 29 +++ 14 files changed, 741 insertions(+), 311 deletions(-) create mode 100644 internal/parser/pi_provider.go create mode 100644 internal/parser/pi_provider_test.go diff --git a/cmd/agentsview/parse_diff.go b/cmd/agentsview/parse_diff.go index 12f32c349..5af7aeb6b 100644 --- a/cmd/agentsview/parse_diff.go +++ b/cmd/agentsview/parse_diff.go @@ -238,7 +238,7 @@ func parseDiffAgentTypes(names []string) ([]parser.AgentType, error) { strings.Join(parseDiffSupportedAgents(), ", "), ) } - if !def.FileBased || def.DiscoverFunc == nil { + if !parseDiffAgentSupported(def) { return nil, fmt.Errorf( "agent %q is not supported by parse-diff "+ "(no on-disk source to re-parse)", @@ -253,18 +253,33 @@ func parseDiffAgentTypes(names []string) ([]parser.AgentType, error) { return out, nil } -// parseDiffSupportedAgents lists the agent types parse-diff can -// re-parse: file-based agents with a discovery function. +// parseDiffSupportedAgents lists the agent types parse-diff can re-parse. func parseDiffSupportedAgents() []string { var names []string for _, def := range parser.Registry { - if def.FileBased && def.DiscoverFunc != nil { + if parseDiffAgentSupported(def) { names = append(names, string(def.Type)) } } return names } +func parseDiffAgentSupported(def parser.AgentDef) bool { + if !def.FileBased { + return false + } + if def.DiscoverFunc != nil { + return true + } + switch parser.ProviderMigrationModes()[def.Type] { + case parser.ProviderMigrationProviderAuthoritative: + _, ok := parser.ProviderFactoryByType(def.Type) + return ok + default: + return false + } +} + // renderParseDiffReport writes the human-readable report. An empty // archive renders a zero-count summary with no tables. Every value // that originates in session files or archive rows (IDs, paths, diff --git a/cmd/agentsview/parse_diff_test.go b/cmd/agentsview/parse_diff_test.go index 99a358c55..3b9567167 100644 --- a/cmd/agentsview/parse_diff_test.go +++ b/cmd/agentsview/parse_diff_test.go @@ -110,6 +110,16 @@ func TestParseDiffAgentTypes(t *testing.T) { in: []string{"claude"}, want: []string{"claude"}, }, + { + name: "provider authoritative agent", + in: []string{"pi"}, + want: []string{"pi"}, + }, + { + name: "provider authoritative shared provider family agent", + in: []string{"omp"}, + want: []string{"omp"}, + }, { name: "trims and lowercases", in: []string{" Claude "}, diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index 2b4bf8a3c..1cf207076 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -1471,100 +1471,6 @@ func IsPiSessionFile(path string) bool { return false } -// DiscoverPiSessions finds JSONL files under piDir that are -// valid pi sessions. Pi sessions live in -// //.jsonl; the encoded-cwd -// format is ambiguous between pi versions, so discovery -// validates by reading the session header rather than parsing -// the directory name. Project is left empty so ParsePiSession -// can derive it from the header cwd field. -func DiscoverPiSessions(piDir string) []DiscoveredFile { - return discoverPiLikeSessions(piDir, AgentPi) -} - -// DiscoverOMPSessions finds JSONL files under an OhMyPi session root. -// OMP uses the same layout and file format as Pi, rooted by default at -// ~/.omp/agent/sessions. -func DiscoverOMPSessions(ompDir string) []DiscoveredFile { - return discoverPiLikeSessions(ompDir, AgentOMP) -} - -func discoverPiLikeSessions(piDir string, agent AgentType) []DiscoveredFile { - if piDir == "" { - return nil - } - entries, err := os.ReadDir(piDir) - if err != nil { - return nil - } - var files []DiscoveredFile - for _, entry := range entries { - if !isDirOrSymlink(entry, piDir) { - continue - } - cwdDir := filepath.Join(piDir, entry.Name()) - sessionFiles, err := os.ReadDir(cwdDir) - if err != nil { - continue - } - for _, sf := range sessionFiles { - if sf.IsDir() { - continue - } - if !strings.HasSuffix(sf.Name(), ".jsonl") { - continue - } - path := filepath.Join(cwdDir, sf.Name()) - if !IsPiSessionFile(path) { - continue - } - files = append(files, DiscoveredFile{ - Path: path, - Agent: agent, - // Project intentionally empty; ParsePiSession - // derives project from the header cwd field. - }) - } - } - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindPiSourceFile finds the original JSONL file for a pi -// session ID by searching all encoded-cwd subdirectories -// under piDir for a file named .jsonl. -func FindPiSourceFile(piDir, sessionID string) string { - return findPiLikeSourceFile(piDir, sessionID) -} - -// FindOMPSourceFile finds the original JSONL file for an OMP session ID. -func FindOMPSourceFile(ompDir, sessionID string) string { - return findPiLikeSourceFile(ompDir, sessionID) -} - -func findPiLikeSourceFile(piDir, sessionID string) string { - if piDir == "" || !IsValidSessionID(sessionID) { - return "" - } - entries, err := os.ReadDir(piDir) - if err != nil { - return "" - } - target := sessionID + ".jsonl" - for _, entry := range entries { - if !isDirOrSymlink(entry, piDir) { - continue - } - candidate := filepath.Join(piDir, entry.Name(), target) - if _, err := os.Stat(candidate); err == nil { - return candidate - } - } - return "" -} - // isRegularFile returns true if path exists and is a regular // file (not a symlink, directory, or other special file). // IsRegularFile reports whether path is a regular file (not diff --git a/internal/parser/pi.go b/internal/parser/pi.go index f9b6e965e..b8fada33e 100644 --- a/internal/parser/pi.go +++ b/internal/parser/pi.go @@ -11,22 +11,12 @@ import ( "github.com/tidwall/gjson" ) -// ParsePiSession parses a pi-agent JSONL session file. -// The file format uses a leading session-header entry followed by -// message, model_change, and compaction entries. -func ParsePiSession( +func (p *piProvider) parseSession( path, project, machine string, ) (*ParsedSession, []ParsedMessage, error) { - return parsePiLikeSession(path, project, machine, AgentPi, "pi:") -} - -// ParseOMPSession parses an OhMyPi JSONL session file. OMP uses the -// same on-disk session format as Pi, but sessions are identified with -// the omp agent type and omp: session ID prefix. -func ParseOMPSession( - path, project, machine string, -) (*ParsedSession, []ParsedMessage, error) { - return parsePiLikeSession(path, project, machine, AgentOMP, "omp:") + return parsePiLikeSession( + path, project, machine, p.Def.Type, p.Def.IDPrefix, + ) } func parsePiLikeSession( diff --git a/internal/parser/pi_provider.go b/internal/parser/pi_provider.go new file mode 100644 index 000000000..501b6cd74 --- /dev/null +++ b/internal/parser/pi_provider.go @@ -0,0 +1,228 @@ +package parser + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*piProvider)(nil) + +type piProviderFactory struct { + def AgentDef +} + +func newPiProviderFactory(def AgentDef) ProviderFactory { + return piProviderFactory{def: cloneAgentDef(def)} +} + +func (f piProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f piProviderFactory) Capabilities() Capabilities { + return piProviderCapabilities() +} + +func (f piProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &piProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: piProviderCapabilities(), + Config: cfg, + }, + sources: newPiSourceSet(f.def.Type, cfg.Roots), + } +} + +type piProvider struct { + ProviderBase + sources DirectoryJSONLSourceSet +} + +func (p *piProvider) Discover(ctx context.Context) ([]SourceRef, error) { + sources, err := p.sources.Discover(ctx) + if err != nil { + return nil, err + } + return p.filterDiscoveredSources(sources), nil +} + +func (p *piProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + return p.sources.WatchPlan(ctx) +} + +func (p *piProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + sources, err := p.sources.SourcesForChangedPath(ctx, req) + if err != nil || len(sources) == 0 { + return sources, err + } + if jsonlMissingPathFallbackAllowed(req) { + return sources, nil + } + return p.filterDiscoveredSources(sources), nil +} + +func (p *piProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + req = providerFindRequestWithRawSessionID(p.Def, req) + for _, path := range []string{ + req.StoredFilePath, + req.FingerprintKey, + } { + if path == "" { + continue + } + if source, ok, err := p.sources.sourceForPath(ctx, path); err != nil { + return SourceRef{}, false, err + } else if ok { + return source, true, nil + } + } + if req.RawSessionID == "" || !IsValidSessionID(req.RawSessionID) { + return SourceRef{}, false, nil + } + for _, root := range p.Config.Roots { + source, ok, err := p.sourceForSessionID(ctx, root, req.RawSessionID) + if err != nil || ok { + return source, ok, err + } + } + return SourceRef{}, false, nil +} + +func (p *piProvider) sourceForSessionID( + ctx context.Context, + root string, + sessionID string, +) (SourceRef, bool, error) { + entries, err := os.ReadDir(root) + if err != nil { + return SourceRef{}, false, nil + } + target := sessionID + ".jsonl" + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return SourceRef{}, false, err + } + if !isDirOrSymlink(entry, root) { + continue + } + candidate := filepath.Join(root, entry.Name(), target) + source, ok, err := p.sources.sourceForPath(ctx, candidate) + if err != nil { + return SourceRef{}, false, err + } + if ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +func (p *piProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + return p.sources.Fingerprint(ctx, source) +} + +func (p *piProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok, err := p.sources.pathFromSource(ctx, req.Source) + if err != nil { + return ParseOutcome{}, err + } + if !ok { + return ParseOutcome{}, fmt.Errorf("pi source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, req.Source.ProjectHint, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +func (p *piProvider) filterDiscoveredSources(sources []SourceRef) []SourceRef { + filtered := sources[:0] + for _, source := range sources { + src, ok := source.Opaque.(JSONLSource) + if !ok || !IsPiSessionFile(src.Path) { + continue + } + filtered = append(filtered, source) + } + return filtered +} + +func newPiSourceSet(agent AgentType, roots []string) DirectoryJSONLSourceSet { + return newDirectoryJSONLSourceSet(agent, roots, + withSymlinkFollowing(), + withIncludePath(isPiSourcePath), + withProjectHint(func(root, path string) string { return "" }), + withSessionIDFromPath(piSessionIDFromPath), + ) +} + +func isPiSourcePath(root, path string) bool { + return strings.HasSuffix(filepath.Base(path), ".jsonl") +} + +func piSessionIDFromPath(root, path string) string { + if !isPiSourcePath(root, path) { + return "" + } + return strings.TrimSuffix(filepath.Base(path), ".jsonl") +} + +func piProviderCapabilities() Capabilities { + return Capabilities{ + Source: jsonlFileProviderSourceCapabilities(), + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Cwd: CapabilitySupported, + Relationships: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/pi_provider_test.go b/internal/parser/pi_provider_test.go new file mode 100644 index 000000000..041f408cc --- /dev/null +++ b/internal/parser/pi_provider_test.go @@ -0,0 +1,227 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPiProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentPi) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentPi, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestOMPProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "encoded-cwd", "session-123.jsonl") + writeSourceFile(t, sourcePath, piProviderFixture("session-123")) + + provider, ok := NewProvider(AgentOMP, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, AgentOMP, discovered[0].Provider) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~omp:session-123", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, AgentOMP, found.Provider) + assert.Equal(t, sourcePath, found.DisplayPath) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: discovered[0], + Fingerprint: SourceFingerprint{Key: sourcePath, Hash: "abc123"}, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + assert.Equal(t, "omp:session-123", outcome.Results[0].Result.Session.ID) + assert.Equal(t, AgentOMP, outcome.Results[0].Result.Session.Agent) + assert.Equal(t, "abc123", outcome.Results[0].Result.Session.File.Hash) + + require.NoError(t, os.Remove(sourcePath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, AgentOMP, changed[0].Provider) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestPiProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "encoded-cwd", "session-123.jsonl") + lookupOnlyPath := filepath.Join(root, "encoded-cwd", "lookup-only.jsonl") + writeSourceFile(t, sourcePath, piProviderFixture("session-123")) + writeSourceFile(t, lookupOnlyPath, `{"type":"message"}`+"\n") + writeSourceFile(t, filepath.Join(root, "encoded-cwd", "notes.txt"), "{}\n") + writeSourceFile(t, filepath.Join(root, "root-session.jsonl"), piProviderFixture("root-session")) + writeSourceFile(t, filepath.Join(root, "encoded-cwd", "nested", "deep.jsonl"), piProviderFixture("deep")) + + provider, ok := NewProvider(AgentPi, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, AgentPi, discovered[0].Provider) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + assert.Empty(t, discovered[0].ProjectHint) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl"}, plan.Roots[0].IncludeGlobs) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~pi:session-123", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "pi:lookup-only", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, lookupOnlyPath, found.DisplayPath) + + require.NoError(t, os.Remove(sourcePath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestPiProviderDiscoveryAcceptsSessionHeaderInNonSessionIDFilename(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "encoded-cwd", "2025.01.01.jsonl") + writeSourceFile(t, sourcePath, piProviderFixture("header-session-id")) + + provider, ok := NewProvider(AgentPi, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: discovered[0], + }) + require.NoError(t, err) + require.Len(t, outcome.Results, 1) + assert.Equal(t, "pi:header-session-id", outcome.Results[0].Result.Session.ID) + + _, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "2025.01.01", + }) + require.NoError(t, err) + assert.False(t, ok) +} + +func TestPiProviderDiscoversSymlinkedCWDDirectory(t *testing.T) { + root := t.TempDir() + targetDir := t.TempDir() + sourcePath := filepath.Join(root, "linked-cwd", "session-123.jsonl") + targetPath := filepath.Join(targetDir, "session-123.jsonl") + writeSourceFile(t, targetPath, piProviderFixture("session-123")) + if err := os.Symlink(targetDir, filepath.Join(root, "linked-cwd")); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentPi, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~pi:session-123", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) +} + +func TestPiProviderParse(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "encoded-cwd", "session-123.jsonl") + writeSourceFile(t, sourcePath, piProviderFixture("session-123")) + + provider, ok := NewProvider(AgentPi, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: SourceFingerprint{Key: sourcePath, Hash: "abc123"}, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + assert.Equal(t, DataVersionCurrent, outcome.Results[0].DataVersion) + assert.Equal(t, "pi:session-123", outcome.Results[0].Result.Session.ID) + assert.Equal(t, "pi_project", outcome.Results[0].Result.Session.Project) + assert.Equal(t, "devbox", outcome.Results[0].Result.Session.Machine) + assert.Equal(t, "abc123", outcome.Results[0].Result.Session.File.Hash) + assert.Len(t, outcome.Results[0].Result.Messages, 2) +} + +func piProviderFixture(sessionID string) string { + return strings.Join([]string{ + `{"type":"session","version":3,"id":"` + sessionID + `","timestamp":"2025-01-01T10:00:00Z","cwd":"/Users/alice/code/pi-project"}`, + `{"type":"message","id":"msg-1","timestamp":"2025-01-01T10:00:01Z","message":{"role":"user","content":"Inspect the Pi source."}}`, + `{"type":"message","id":"msg-2","timestamp":"2025-01-01T10:00:02Z","message":{"role":"assistant","content":"Looks ready.","model":"claude-opus-4-5","usage":{"input_tokens":10,"output_tokens":5}}}`, + }, "\n") +} diff --git a/internal/parser/pi_test.go b/internal/parser/pi_test.go index 84d66800e..572a961e8 100644 --- a/internal/parser/pi_test.go +++ b/internal/parser/pi_test.go @@ -1,9 +1,9 @@ package parser import ( + "context" "errors" "fmt" - "os" "path/filepath" "strings" "testing" @@ -19,19 +19,54 @@ import ( func runPiParserTest(t *testing.T, content string) (*ParsedSession, []ParsedMessage) { t.Helper() path := createTestFile(t, "pi-session.jsonl", content) - sess, msgs, err := ParsePiSession(path, "my_project", "local") + sess, msgs, err := parsePiTestSession(t, path, "my_project", "local") require.NoError(t, err) return sess, msgs } -// TestParsePiSession_SessionHeader verifies that the session-level fields are +func parsePiTestSession( + t *testing.T, + path string, + project string, + machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + + provider, ok := NewProvider(AgentPi, ProviderConfig{ + Roots: []string{filepath.Dir(filepath.Dir(path))}, + Machine: machine, + }) + require.True(t, ok) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: SourceRef{ + Provider: AgentPi, + Key: path, + DisplayPath: path, + FingerprintKey: path, + ProjectHint: project, + Opaque: JSONLSource{ + Root: filepath.Dir(filepath.Dir(path)), + Path: path, + }, + }, + Machine: machine, + }) + if err != nil || len(outcome.Results) == 0 { + return nil, nil, err + } + result := outcome.Results[0].Result + return &result.Session, result.Messages, nil +} + +// TestPiProviderParsesSessionHeader verifies that the session-level fields are // populated correctly from the pi fixture header (PRSR-01, PRSR-11, PRSR-10). -func TestParsePiSession_SessionHeader(t *testing.T) { +func TestPiProviderParsesSessionHeader(t *testing.T) { fixturePath := createTestFile( t, "pi-test-session-uuid.jsonl", loadFixture(t, "pi/session.jsonl"), ) - sess, msgs, err := ParsePiSession(fixturePath, "", "local") + sess, msgs, err := parsePiTestSession(t, fixturePath, "", "local") require.NoError(t, err) assert.Equal(t, "pi:pi-test-session-uuid", sess.ID, "PRSR-01: session ID") @@ -57,54 +92,7 @@ func TestParsePiSession_SessionHeader(t *testing.T) { _ = msgs // not the focus of this sub-test } -func TestOMPRegistryMetadata(t *testing.T) { - def, ok := AgentByType(AgentOMP) - require.True(t, ok) - - assert.Equal(t, AgentOMP, def.Type) - assert.Equal(t, "OhMyPi", def.DisplayName) - assert.Equal(t, "OMP_DIR", def.EnvVar) - assert.Equal(t, "omp_dirs", def.ConfigKey) - assert.Equal(t, []string{".omp/agent/sessions"}, def.DefaultDirs) - assert.Equal(t, "omp:", def.IDPrefix) - assert.True(t, def.FileBased) - require.NotNil(t, def.DiscoverFunc) - require.NotNil(t, def.FindSourceFunc) -} - -func TestParseOMPSession_SessionIdentity(t *testing.T) { - fixturePath := createTestFile( - t, "omp-test-session-uuid.jsonl", - loadFixture(t, "pi/session.jsonl"), - ) - sess, msgs, err := ParseOMPSession(fixturePath, "", "local") - require.NoError(t, err) - require.NotNil(t, sess) - - assert.Equal(t, "omp:pi-test-session-uuid", sess.ID) - assert.Equal(t, AgentOMP, sess.Agent) - assert.Equal(t, "omp:2025-01-01T09-00-00-000Z_parent-uuid", sess.ParentSessionID) - assert.Equal(t, "/Users/alice/code/my-project", sess.Cwd) - assert.Equal(t, "my_project", sess.Project) - require.NotEmpty(t, msgs) -} - -func TestDiscoverOMPSessions(t *testing.T) { - root := t.TempDir() - projectDir := filepath.Join(root, "-Users-alice-code-my-project") - require.NoError(t, os.MkdirAll(projectDir, 0o755)) - path := filepath.Join(projectDir, "omp-test-session-uuid.jsonl") - require.NoError(t, os.WriteFile(path, []byte(loadFixture(t, "pi/session.jsonl")), 0o644)) - - files := DiscoverOMPSessions(root) - require.Len(t, files, 1) - assert.Equal(t, path, files[0].Path) - assert.Equal(t, AgentOMP, files[0].Agent) - assert.Empty(t, files[0].Project) - assert.Equal(t, path, FindOMPSourceFile(root, "omp-test-session-uuid")) -} - -func TestParsePiSession_SessionInfoName(t *testing.T) { +func TestPiProviderParsesSessionInfoName(t *testing.T) { content := strings.Join([]string{ `{"type":"session","version":3,"id":"named-sess","timestamp":"2025-01-01T10:00:00Z","cwd":"/Users/alice/code/my-project"}`, `{"type":"session_info","id":"info-1","parentId":null,"timestamp":"2025-01-01T10:00:01Z","name":"Original name"}`, @@ -122,7 +110,7 @@ func TestParsePiSession_SessionInfoName(t *testing.T) { assert.Equal(t, RoleUser, msgs[0].Role) } -func TestParsePiSession_SessionInfoLastNameWins(t *testing.T) { +func TestPiProviderParsesSessionInfoLastNameWins(t *testing.T) { content := strings.Join([]string{ `{"type":"session","version":3,"id":"renamed-sess","timestamp":"2025-01-01T10:00:00Z","cwd":"/Users/alice/code/my-project"}`, `{"type":"session_info","id":"info-1","parentId":null,"timestamp":"2025-01-01T10:00:01Z","name":"Initial name"}`, @@ -137,14 +125,14 @@ func TestParsePiSession_SessionInfoLastNameWins(t *testing.T) { assert.Equal(t, "Final name", sess.SessionName) } -// TestParsePiSession_UserMessages verifies user message content and ordinals +// TestPiProviderParsesUserMessages verifies user message content and ordinals // (PRSR-02, PRSR-01). -func TestParsePiSession_UserMessages(t *testing.T) { +func TestPiProviderParsesUserMessages(t *testing.T) { fixturePath := createTestFile( t, "pi-session.jsonl", loadFixture(t, "pi/session.jsonl"), ) - sess, msgs, err := ParsePiSession(fixturePath, "", "local") + sess, msgs, err := parsePiTestSession(t, fixturePath, "", "local") require.NoError(t, err) // First non-toolResult user message at index 0. @@ -156,14 +144,14 @@ func TestParsePiSession_UserMessages(t *testing.T) { assert.Contains(t, sess.FirstMessage, "Fix the login bug", "PRSR-01: FirstMessage") } -// TestParsePiSession_AssistantMessages verifies the assistant message with +// TestPiProviderParsesAssistantMessages verifies the assistant message with // thinking, text, and tool call (PRSR-03, PRSR-04, PRSR-06). -func TestParsePiSession_AssistantMessages(t *testing.T) { +func TestPiProviderParsesAssistantMessages(t *testing.T) { fixturePath := createTestFile( t, "pi-session.jsonl", loadFixture(t, "pi/session.jsonl"), ) - _, msgs, err := ParsePiSession(fixturePath, "", "local") + _, msgs, err := parsePiTestSession(t, fixturePath, "", "local") require.NoError(t, err) // entry-2 is the second entry overall (index 1 in messages). @@ -195,14 +183,14 @@ func TestParsePiSession_AssistantMessages(t *testing.T) { assert.Contains(t, assistantMsg.Content, "[Read: auth.go]", "tool use marker in Content") } -// TestParsePiSession_ToolResults verifies tool result entries are parsed +// TestPiProviderParsesToolResults verifies tool result entries are parsed // correctly (PRSR-05). -func TestParsePiSession_ToolResults(t *testing.T) { +func TestPiProviderParsesToolResults(t *testing.T) { fixturePath := createTestFile( t, "pi-session.jsonl", loadFixture(t, "pi/session.jsonl"), ) - _, msgs, err := ParsePiSession(fixturePath, "", "local") + _, msgs, err := parsePiTestSession(t, fixturePath, "", "local") require.NoError(t, err) var toolResultMsg *ParsedMessage @@ -223,7 +211,7 @@ func TestParsePiSession_ToolResults(t *testing.T) { assert.Contains(t, decoded, "package auth", "ContentRaw must decode to tool output text") } -func TestParsePiSession_StringContent(t *testing.T) { +func TestPiProviderParsesStringContent(t *testing.T) { header := `{"type":"session","id":"str-sess","timestamp":"2025-01-01T10:00:00Z","cwd":"/tmp"}` + "\n" t.Run("assistant string content", func(t *testing.T) { @@ -250,14 +238,14 @@ func TestParsePiSession_StringContent(t *testing.T) { }) } -// TestParsePiSession_ThinkingBlocks verifies both explicit and redacted +// TestPiProviderParsesThinkingBlocks verifies both explicit and redacted // thinking blocks (PRSR-06). -func TestParsePiSession_ThinkingBlocks(t *testing.T) { +func TestPiProviderParsesThinkingBlocks(t *testing.T) { fixturePath := createTestFile( t, "pi-session.jsonl", loadFixture(t, "pi/session.jsonl"), ) - _, msgs, err := ParsePiSession(fixturePath, "", "local") + _, msgs, err := parsePiTestSession(t, fixturePath, "", "local") require.NoError(t, err) t.Run("explicit thinking", func(t *testing.T) { @@ -291,14 +279,14 @@ func TestParsePiSession_ThinkingBlocks(t *testing.T) { }) } -// TestParsePiSession_UserMessageCount verifies that metadata entries do -// not inflate user counts even when compactions persist as system rows. -func TestParsePiSession_UserMessageCount(t *testing.T) { +// TestPiProviderParsesUserMessageCount verifies that model_change and +// compaction entries are skipped entirely and do not inflate user counts. +func TestPiProviderParsesUserMessageCount(t *testing.T) { fixturePath := createTestFile( t, "pi-session.jsonl", loadFixture(t, "pi/session.jsonl"), ) - sess, _, err := ParsePiSession(fixturePath, "", "local") + sess, _, err := parsePiTestSession(t, fixturePath, "", "local") require.NoError(t, err) // The fixture has 2 real user messages. Metadata rows must not count @@ -307,9 +295,9 @@ func TestParsePiSession_UserMessageCount(t *testing.T) { "UserMessageCount must only count real user messages") } -// TestParsePiSession_UserMessageCountEmptyContent verifies that user messages +// TestPiProviderParsesUserMessageCountEmptyContent verifies that user messages // with non-text or empty payloads are still counted. -func TestParsePiSession_UserMessageCountEmptyContent(t *testing.T) { +func TestPiProviderParsesUserMessageCountEmptyContent(t *testing.T) { fixture := `{"type":"session","id":"sess-1","cwd":"/tmp","timestamp":"2025-01-01T10:00:00Z"} {"type":"message","timestamp":"2025-01-01T10:00:00Z","message":{"role":"user","content":[{"type":"text","text":"hello"}]},"id":"1"} {"type":"message","timestamp":"2025-01-01T10:00:01Z","message":{"role":"user","content":[{"type":"image","source":{"data":"abc"}}]},"id":"2"} @@ -317,7 +305,7 @@ func TestParsePiSession_UserMessageCountEmptyContent(t *testing.T) { {"type":"message","timestamp":"2025-01-01T10:00:03Z","message":{"role":"assistant","content":[{"type":"text","text":"response"}]},"id":"4"}` fixturePath := createTestFile(t, "pi-empty-content.jsonl", fixture) - sess, _, err := ParsePiSession(fixturePath, "", "local") + sess, _, err := parsePiTestSession(t, fixturePath, "", "local") require.NoError(t, err) // All 3 user messages should be counted, even those without text content. @@ -325,21 +313,21 @@ func TestParsePiSession_UserMessageCountEmptyContent(t *testing.T) { "UserMessageCount must count user messages with empty or non-text content") } -// TestParsePiSession_SilentSkips verifies that the parser silently ignores +// TestPiProviderParsesSilentSkips verifies that the parser silently ignores // malformed JSON, thinking_level_change entries, and unknown future entry types // without returning an error. -func TestParsePiSession_SilentSkips(t *testing.T) { +func TestPiProviderParsesSilentSkips(t *testing.T) { fixturePath := createTestFile( t, "pi-session.jsonl", loadFixture(t, "pi/session.jsonl"), ) - _, _, err := ParsePiSession(fixturePath, "", "local") + _, _, err := parsePiTestSession(t, fixturePath, "", "local") require.NoError(t, err, "parser must succeed despite malformed/unknown lines") } -// TestParsePiSession_V1Session verifies that a session without an id field +// TestPiProviderParsesV1Session verifies that a session without an id field // derives its session ID from the filename (PRSR-09). -func TestParsePiSession_V1Session(t *testing.T) { +func TestPiProviderParsesV1Session(t *testing.T) { v1Content := strings.Join([]string{ `{"type":"session","timestamp":"2025-01-01T10:00:00Z","cwd":"/Users/alice/code/v1-project"}`, `{"type":"message","timestamp":"2025-01-01T10:00:01Z","message":{"role":"user","content":[{"type":"text","text":"hello"}]}}`, @@ -347,7 +335,7 @@ func TestParsePiSession_V1Session(t *testing.T) { }, "\n") path := createTestFile(t, "v1-session.jsonl", v1Content) - sess, _, err := ParsePiSession(path, "v1_project", "local") + sess, _, err := parsePiTestSession(t, path, "v1_project", "local") require.NoError(t, err) assert.Equal(t, "pi:v1-session", sess.ID, "PRSR-09: V1 session ID from filename") @@ -362,7 +350,7 @@ func TestParsePiSession_V1MessageLineageStaysEmpty(t *testing.T) { }, "\n") path := createTestFile(t, "v1-lineage.jsonl", content) - sess, msgs, err := ParsePiSession(path, "v1_project", "local") + sess, msgs, err := parsePiTestSession(t, path, "v1_project", "local") require.NoError(t, err) assert.Equal(t, "pi:v1-lineage", sess.ID) @@ -373,14 +361,14 @@ func TestParsePiSession_V1MessageLineageStaysEmpty(t *testing.T) { } } -// TestParsePiSession_BranchedFrom verifies the exact ParentSessionID value +// TestPiProviderParsesBranchedFrom verifies the exact ParentSessionID value // extracted from the branchedFrom field (PRSR-10). -func TestParsePiSession_BranchedFrom(t *testing.T) { +func TestPiProviderParsesBranchedFrom(t *testing.T) { fixturePath := createTestFile( t, "pi-session.jsonl", loadFixture(t, "pi/session.jsonl"), ) - sess, _, err := ParsePiSession(fixturePath, "", "local") + sess, _, err := parsePiTestSession(t, fixturePath, "", "local") require.NoError(t, err) t.Run("parent session ID from branchedFrom", func(t *testing.T) { @@ -443,9 +431,9 @@ func TestParsePiSession_MessageLineageContinuity(t *testing.T) { assert.Equal(t, "assistant", msgs[5].SourceType) } -// TestParsePiSession_IOError verifies that I/O errors encountered after the +// TestPiProviderParsesIOError verifies that I/O errors encountered after the // session header are surfaced and that the error string contains "reading pi". -func TestParsePiSession_IOError(t *testing.T) { +func TestPiProviderParsesIOError(t *testing.T) { t.Run("error message format contains reading pi", func(t *testing.T) { ioErr := errors.New("disk read failed") err := fmt.Errorf("reading pi %s: %w", "/some/path/session.jsonl", ioErr) @@ -458,7 +446,7 @@ func TestParsePiSession_IOError(t *testing.T) { msg := `{"type":"message","id":"entry-1","timestamp":"2025-01-01T10:00:01Z","message":{"role":"user","content":[{"type":"text","text":"hello"}]}}` + "\n" path := createTestFile(t, "pi-clean-read.jsonl", header+msg) - sess, msgs, parseErr := ParsePiSession(path, "my_project", "local") + sess, msgs, parseErr := parsePiTestSession(t, path, "my_project", "local") require.NoError(t, parseErr, "clean read must not produce an error") require.NotNil(t, sess) @@ -591,7 +579,7 @@ func TestParsePiAssistantMessage_IntentInToolMarker(t *testing.T) { "agent__intent must be normalized to description for tool marker") } -// TestParsePiSession_ErrorCases verifies error handling for missing, empty, +// TestPiProviderParsesErrorCases verifies error handling for missing, empty, // and invalid session files. func TestNormalizePiIntent(t *testing.T) { tests := []struct { @@ -652,22 +640,22 @@ func TestNormalizePiIntent(t *testing.T) { } } -func TestParsePiSession_ErrorCases(t *testing.T) { +func TestPiProviderParsesErrorCases(t *testing.T) { t.Run("missing file", func(t *testing.T) { - _, _, err := ParsePiSession("/nonexistent/path/session.jsonl", "proj", "local") + _, _, err := parsePiTestSession(t, "/nonexistent/path/session.jsonl", "proj", "local") assert.Error(t, err, "missing file must return error") }) t.Run("empty file", func(t *testing.T) { path := createTestFile(t, "empty.jsonl", "") - _, _, err := ParsePiSession(path, "proj", "local") + _, _, err := parsePiTestSession(t, path, "proj", "local") assert.Error(t, err, "empty file (no session header) must return error") }) t.Run("not a pi session", func(t *testing.T) { content := `{"type":"message","id":"entry-1","timestamp":"2025-01-01T10:00:00Z","message":{"role":"user","content":[{"type":"text","text":"hello"}]}}` + "\n" path := createTestFile(t, "not-pi.jsonl", content) - _, _, err := ParsePiSession(path, "proj", "local") + _, _, err := parsePiTestSession(t, path, "proj", "local") assert.Error(t, err, "file without session header must return error") }) @@ -678,23 +666,23 @@ func TestParsePiSession_ErrorCases(t *testing.T) { msg := `{"type":"message","id":"m1","timestamp":"2025-06-01T10:01:00Z","message":{"role":"user","content":"hello"}}` content := " \n\t\n" + header + "\n" + msg + "\n" path := createTestFile(t, "ws-leading.jsonl", content) - sess, msgs, err := ParsePiSession(path, "proj", "local") + sess, msgs, err := parsePiTestSession(t, path, "proj", "local") require.NoError(t, err, "whitespace-only leading lines must not cause parse failure") assert.Equal(t, "pi:ws-sess", sess.ID) assert.Len(t, msgs, 1) }) } -// TestParsePiSession_TokenUsageFromFixture verifies that assistant +// TestPiProviderParsesTokenUsageFromFixture verifies that assistant // messages in the standard pi fixture get Model and TokenUsage // populated from the inline message.model and message.usage fields. // Without this, the usage dashboard reports $0 for pi sessions. -func TestParsePiSession_TokenUsageFromFixture(t *testing.T) { +func TestPiProviderParsesTokenUsageFromFixture(t *testing.T) { fixturePath := createTestFile( t, "pi-session.jsonl", loadFixture(t, "pi/session.jsonl"), ) - sess, msgs, err := ParsePiSession(fixturePath, "", "local") + sess, msgs, err := parsePiTestSession(t, fixturePath, "", "local") require.NoError(t, err) var assistants []ParsedMessage @@ -736,10 +724,10 @@ func TestParsePiSession_TokenUsageFromFixture(t *testing.T) { "session PeakContextTokens = max(100, 200)") } -// TestParsePiSession_ModelFromModelChange verifies that when an +// TestPiProviderParsesModelFromModelChange verifies that when an // assistant message has no inline model field, the parser falls // back to the most recent model_change entry's modelId. -func TestParsePiSession_ModelFromModelChange(t *testing.T) { +func TestPiProviderParsesModelFromModelChange(t *testing.T) { header := `{"type":"session","id":"mc-sess","timestamp":"2025-01-01T10:00:00Z","cwd":"/tmp"}` + "\n" mc := `{"type":"model_change","id":"mc1","timestamp":"2025-01-01T10:00:00.5Z","provider":"openai","modelId":"gpt-5.4"}` + "\n" user := `{"type":"message","id":"u1","timestamp":"2025-01-01T10:00:01Z","message":{"role":"user","content":"hi"}}` + "\n" @@ -762,12 +750,12 @@ func TestParsePiSession_ModelFromModelChange(t *testing.T) { "token usage extracted from message.usage") } -// TestParsePiSession_UnknownUsageShape verifies that a present +// TestPiProviderParsesUnknownUsageShape verifies that a present // but unrecognized usage object (empty {} or a foreign schema // with none of the keys we know about) leaves TokenUsage empty // so the usage query filter skips the row, rather than // fabricating a zero-valued record. -func TestParsePiSession_UnknownUsageShape(t *testing.T) { +func TestPiProviderParsesUnknownUsageShape(t *testing.T) { header := `{"type":"session","id":"uu-sess","timestamp":"2025-01-01T10:00:00Z","cwd":"/tmp"}` + "\n" cases := []struct { @@ -795,14 +783,14 @@ func TestParsePiSession_UnknownUsageShape(t *testing.T) { } } -// TestParsePiSession_ZeroUsage verifies that an explicit usage +// TestPiProviderParsesZeroUsage verifies that an explicit usage // block with every counter at zero is preserved as "known // zero" rather than collapsed to "unknown". The normalized // token_usage is still written and coverage flags follow field // presence, matching the claude parser contract and letting // downstream rollups distinguish an errored request from a // missing usage blob. -func TestParsePiSession_ZeroUsage(t *testing.T) { +func TestPiProviderParsesZeroUsage(t *testing.T) { header := `{"type":"session","id":"zu-sess","timestamp":"2025-01-01T10:00:00Z","cwd":"/tmp"}` + "\n" asst := `{"type":"message","id":"a1","timestamp":"2025-01-01T10:00:01Z","message":{"role":"assistant","content":"oops","model":"gpt-5.4","usage":{"input":0,"output":0}}}` @@ -827,10 +815,10 @@ func TestParsePiSession_ZeroUsage(t *testing.T) { assert.Equal(t, 0, m.ContextTokens) } -// TestParsePiSession_NoUsageNoTokenUsage verifies that messages +// TestPiProviderParsesNoUsageNoTokenUsage verifies that messages // without a usage block do not write an empty token_usage row, // since the eligibility filter requires token_usage != ”. -func TestParsePiSession_NoUsageNoTokenUsage(t *testing.T) { +func TestPiProviderParsesNoUsageNoTokenUsage(t *testing.T) { header := `{"type":"session","id":"nu-sess","timestamp":"2025-01-01T10:00:00Z","cwd":"/tmp"}` + "\n" asst := `{"type":"message","id":"a1","timestamp":"2025-01-01T10:00:01Z","message":{"role":"assistant","content":"hello","model":"claude-opus-4-5"}}` diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 27d6e88ca..ed3975317 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -357,6 +357,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newIflowProviderFactory(def) case AgentGptme: return newGptmeProviderFactory(def) + case AgentOMP, AgentPi: + return newPiProviderFactory(def) case AgentZencoder: return newZencoderProviderFactory(def) default: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 6778a3304..eb7ea76d8 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -32,7 +32,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentZencoder: ProviderMigrationProviderAuthoritative, AgentVSCodeCopilot: ProviderMigrationLegacyOnly, AgentVSCopilot: ProviderMigrationLegacyOnly, - AgentPi: ProviderMigrationLegacyOnly, + AgentPi: ProviderMigrationProviderAuthoritative, AgentQwen: ProviderMigrationLegacyOnly, AgentCommandCode: ProviderMigrationProviderAuthoritative, AgentDeepSeekTUI: ProviderMigrationProviderAuthoritative, @@ -58,7 +58,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentGptme: ProviderMigrationProviderAuthoritative, AgentShelley: ProviderMigrationLegacyOnly, AgentAider: ProviderMigrationLegacyOnly, - AgentOMP: ProviderMigrationLegacyOnly, + AgentOMP: ProviderMigrationProviderAuthoritative, AgentReasonix: ProviderMigrationLegacyOnly, } diff --git a/internal/parser/types.go b/internal/parser/types.go index 3a4c04f6e..9beddd96e 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -308,26 +308,22 @@ var Registry = []AgentDef{ FindSourceFunc: FindVisualStudioCopilotSourceFile, }, { - Type: AgentPi, - DisplayName: "Pi", - EnvVar: "PI_DIR", - ConfigKey: "pi_dirs", - DefaultDirs: []string{".pi/agent/sessions"}, - IDPrefix: "pi:", - FileBased: true, - DiscoverFunc: DiscoverPiSessions, - FindSourceFunc: FindPiSourceFile, + Type: AgentPi, + DisplayName: "Pi", + EnvVar: "PI_DIR", + ConfigKey: "pi_dirs", + DefaultDirs: []string{".pi/agent/sessions"}, + IDPrefix: "pi:", + FileBased: true, }, { - Type: AgentOMP, - DisplayName: "OhMyPi", - EnvVar: "OMP_DIR", - ConfigKey: "omp_dirs", - DefaultDirs: []string{".omp/agent/sessions"}, - IDPrefix: "omp:", - FileBased: true, - DiscoverFunc: DiscoverOMPSessions, - FindSourceFunc: FindOMPSourceFile, + Type: AgentOMP, + DisplayName: "OhMyPi", + EnvVar: "OMP_DIR", + ConfigKey: "omp_dirs", + DefaultDirs: []string{".omp/agent/sessions"}, + IDPrefix: "omp:", + FileBased: true, }, { Type: AgentQwen, diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 60f74372c..e60b5110c 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -1374,32 +1374,6 @@ func (e *Engine) classifyOnePath( return df, true } - // Pi/OMP: //.jsonl - for _, agent := range []parser.AgentType{parser.AgentPi, parser.AgentOMP} { - for _, piDir := range e.agentDirs[agent] { - if piDir == "" { - continue - } - if rel, ok := isUnder(piDir, path); ok { - parts := strings.Split(rel, sep) - if len(parts) != 2 { - continue - } - if !strings.HasSuffix(parts[1], ".jsonl") { - continue - } - if !parser.IsPiSessionFile(path) { - continue - } - return parser.DiscoveredFile{ - Path: path, - Agent: agent, - // Project left empty; parser derives from header cwd. - }, true - } - } - } - // Qwen: //chats/.jsonl for _, qwenDir := range e.agentDirs[parser.AgentQwen] { if qwenDir == "" { @@ -4704,8 +4678,6 @@ func (e *Engine) processFile( res = e.processVSCodeCopilot(file, info) case parser.AgentVSCopilot: res = e.processVisualStudioCopilot(file, info) - case parser.AgentPi, parser.AgentOMP: - res = e.processPi(file, info) case parser.AgentQwen: res = e.processQwen(file, info) case parser.AgentOpenClaw: @@ -7335,45 +7307,6 @@ func (e *Engine) processCursor( } } -// processPi parses a pi session file and returns the result -// for batching. Modeled on processClaude. -func (e *Engine) processPi( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - var ( - sess *parser.ParsedSession - msgs []parser.ParsedMessage - err error - ) - if file.Agent == parser.AgentOMP { - sess, msgs, err = parser.ParseOMPSession(file.Path, file.Project, e.machine) - } else { - sess, msgs, err = parser.ParsePiSession(file.Path, file.Project, e.machine) - } - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{{ - Session: *sess, - Messages: msgs, - }}, - } -} - func (e *Engine) processQwen( file parser.DiscoveredFile, info os.FileInfo, ) processResult { diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index 08e7ca34e..41a0446c6 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -37,6 +37,7 @@ type testEnv struct { iflowDir string ampDir string piDir string + ompDir string kiroDir string shelleyDir string antigravityCLIDir string @@ -117,6 +118,7 @@ func setupTestEnv(t *testing.T, opts ...TestEnvOption) *testEnv { iflowDir: t.TempDir(), ampDir: t.TempDir(), piDir: t.TempDir(), + ompDir: t.TempDir(), shelleyDir: t.TempDir(), antigravityCLIDir: t.TempDir(), db: dbtest.OpenTestDB(t), @@ -184,6 +186,7 @@ func setupTestEnv(t *testing.T, opts ...TestEnvOption) *testEnv { parser.AgentIflow: {env.iflowDir}, parser.AgentAmp: {env.ampDir}, parser.AgentPi: {env.piDir}, + parser.AgentOMP: {env.ompDir}, parser.AgentKiro: kiroDirs, parser.AgentShelley: {env.shelleyDir}, parser.AgentAntigravityCLI: {env.antigravityCLIDir}, @@ -6963,6 +6966,42 @@ func TestPiSessionIntegration(t *testing.T) { ) } +func TestOMPSyncAllAndChangedPathUseProvider(t *testing.T) { + env := setupTestEnv(t) + path := env.writeSession( + t, + env.ompDir, + filepath.Join("encoded-cwd", "omp-sync.jsonl"), + piLikeProviderFixture("omp-sync", "/Users/alice/code/omp-app"), + ) + + runSyncAndAssert(t, env.engine, sync.SyncStats{ + TotalSessions: 1, Synced: 1, + }) + assertSessionState(t, env.db, "omp:omp-sync", func(sess *db.Session) { + assert.Equal(t, "omp", sess.Agent) + assert.Equal(t, "omp_app", sess.Project) + }) + assert.Equal(t, path, env.engine.FindSourceFile("omp:omp-sync")) + + updated := piLikeProviderFixture("omp-sync", "/Users/alice/code/omp-renamed") + dbtest.WriteTestFile(t, path, []byte(updated)) + env.engine.SyncPaths([]string{path}) + + assertSessionState(t, env.db, "omp:omp-sync", func(sess *db.Session) { + assert.Equal(t, "omp", sess.Agent) + assert.Equal(t, "omp_renamed", sess.Project) + }) +} + +func piLikeProviderFixture(sessionID, cwd string) string { + return strings.Join([]string{ + `{"type":"session","version":3,"id":"` + sessionID + `","timestamp":"2025-01-01T10:00:00Z","cwd":"` + cwd + `"}`, + `{"type":"message","id":"msg-1","timestamp":"2025-01-01T10:00:01Z","message":{"role":"user","content":"Inspect the source."}}`, + `{"type":"message","id":"msg-2","timestamp":"2025-01-01T10:00:02Z","message":{"role":"assistant","content":"Ready.","model":"claude-opus-4-5"}}`, + }, "\n") +} + func TestIncrementalSync_ClaudeAppend(t *testing.T) { env := setupTestEnv(t) diff --git a/internal/sync/parsediff.go b/internal/sync/parsediff.go index a28ca47b5..52be8cb33 100644 --- a/internal/sync/parsediff.go +++ b/internal/sync/parsediff.go @@ -3,6 +3,7 @@ package sync import ( "context" "fmt" + "log" "os" "path/filepath" "sort" @@ -48,7 +49,7 @@ func (e *Engine) ParseDiff(ctx context.Context, opts ParseDiffOptions) (*ParseDi e.syncMu.Lock() defer e.syncMu.Unlock() - resolved, err := resolveParseDiffAgents(opts.Agents) + resolved, err := e.resolveParseDiffAgents(opts.Agents) if err != nil { return nil, err } @@ -68,13 +69,18 @@ func (e *Engine) ParseDiff(ctx context.Context, opts ParseDiffOptions) (*ParseDi } // Discovery mirrors syncAllLocked's file phase: per-agent - // DiscoverFunc over the configured dirs, then dedupe and the + // DiscoverFunc over the configured dirs, or provider discovery for + // agents that have dropped legacy discovery, then dedupe and the // legacy-Kiro shadow filter. var files []parser.DiscoveredFile for _, def := range resolved { - for _, d := range e.agentDirs[def.Type] { - files = append(files, def.DiscoverFunc(d)...) + if def.DiscoverFunc != nil { + for _, d := range e.agentDirs[def.Type] { + files = append(files, def.DiscoverFunc(d)...) + } + continue } + files = append(files, e.parseDiffProviderSources(ctx, def.Type)...) } // DiscoverFunc does not emit the shared-SQLite source for Kiro // (data.sqlite3) or db-mode OpenCode (opencode.db) — normal sync @@ -204,18 +210,79 @@ func (e *Engine) ParseDiff(ctx context.Context, opts ParseDiffOptions) (*ParseDi return report, nil } +// parseDiffProviderSources discovers an agent's on-disk sources through +// the provider facade for agents that have dropped their DiscoverFunc. +func (e *Engine) parseDiffProviderSources( + ctx context.Context, + agentType parser.AgentType, +) []parser.DiscoveredFile { + factory, ok := e.providerFactories[agentType] + if !ok || factory == nil { + return nil + } + roots := e.agentDirs[agentType] + if len(roots) == 0 { + return nil + } + provider := factory.NewProvider(parser.ProviderConfig{ + Roots: roots, + Machine: e.machine, + }) + sources, err := provider.Discover(ctx) + if err != nil { + log.Printf("parse-diff %s provider discovery: %v", agentType, err) + return nil + } + def := provider.Definition() + var files []parser.DiscoveredFile + for _, source := range sources { + sourcePath := providerDiscoveredPath(source) + if sourcePath == "" { + continue + } + agent := source.Provider + if agent == "" { + agent = def.Type + } + sourceCopy := source + files = append(files, parser.DiscoveredFile{ + Path: sourcePath, + Project: source.ProjectHint, + Agent: agent, + ProviderSource: &sourceCopy, + ProviderProcess: true, + }) + } + return files +} + +func (e *Engine) parseDiffAgentDiscoverable(def parser.AgentDef) bool { + if !def.FileBased { + return false + } + if def.DiscoverFunc != nil { + return true + } + switch e.providerMigrationModes[def.Type] { + case parser.ProviderMigrationProviderAuthoritative: + factory, ok := e.providerFactories[def.Type] + return ok && factory != nil + default: + return false + } +} + // resolveParseDiffAgents validates the requested agent set against // the registry and returns the matching defs in registry order. Only -// file-based agents with a DiscoverFunc have an on-disk source to -// re-parse. -func resolveParseDiffAgents( +// file-based agents with an on-disk source can be re-parsed. +func (e *Engine) resolveParseDiffAgents( requested []parser.AgentType, ) ([]parser.AgentDef, error) { var allowed []parser.AgentDef allowedSet := make(map[parser.AgentType]bool) var names []string for _, def := range parser.Registry { - if def.FileBased && def.DiscoverFunc != nil { + if e.parseDiffAgentDiscoverable(def) { allowed = append(allowed, def) allowedSet[def.Type] = true names = append(names, string(def.Type)) diff --git a/internal/sync/parsediff_integration_test.go b/internal/sync/parsediff_integration_test.go index 8060baa9a..faef85da3 100644 --- a/internal/sync/parsediff_integration_test.go +++ b/internal/sync/parsediff_integration_test.go @@ -40,6 +40,7 @@ func newParseDiffEngine(env *testEnv) *sync.Engine { parser.AgentIflow: {env.iflowDir}, parser.AgentAmp: {env.ampDir}, parser.AgentPi: {env.piDir}, + parser.AgentOMP: {env.ompDir}, parser.AgentKiro: {env.kiroDir}, parser.AgentKilo: {env.kiloDir}, parser.AgentShelley: {env.shelleyDir}, @@ -807,6 +808,34 @@ func TestParseDiffAgentScope(t *testing.T) { "ParseDiff must reject database-backed agents") } +func TestParseDiffCoversProviderAuthoritativePiFamily(t *testing.T) { + env := setupTestEnv(t) + env.writeSession( + t, + env.piDir, + filepath.Join("encoded-cwd", "pd-pi.jsonl"), + piLikeProviderFixture("pd-pi", "/Users/alice/code/pi-app"), + ) + env.writeSession( + t, + env.ompDir, + filepath.Join("encoded-cwd", "pd-omp.jsonl"), + piLikeProviderFixture("pd-omp", "/Users/alice/code/omp-app"), + ) + runSyncAndAssert(t, env.engine, sync.SyncStats{ + TotalSessions: 2, Synced: 2, + }) + + report := runParseDiff(t, env, sync.ParseDiffOptions{ + Agents: []parser.AgentType{parser.AgentPi, parser.AgentOMP}, + }) + assert.Equal(t, []string{"pi", "omp"}, report.Agents) + assert.Equal(t, 2, report.FilesExamined) + assert.Equal(t, sync.ParseDiffTotals{ + Examined: 2, Identical: 2, + }, report.Totals) +} + // TestParseDiffCoversKiroSQLite proves that Kiro's shared data.sqlite3 // store — which DiscoverFunc never emits and which normal sync reaches // through a dedicated phase — is actually re-parsed by parse-diff. A From e892b4dd4997d4df90c153a73fbfe1b62f098381 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Wed, 24 Jun 2026 21:33:52 -0400 Subject: [PATCH 17/24] feat(parser): migrate workbuddy provider WorkBuddy is still JSONL-backed, but its source layout has two valid shapes: project-level session files and nested subagent files. Moving it behind a concrete provider keeps that provider-specific shape explicit while continuing to reuse the shared JSONL filesystem mechanics. The provider preserves legacy discovery and lookup behavior, including symlinked project directories and files, compound subagent raw IDs, deleted-path classification, source fingerprinting, and existing parser normalization for parent/subagent relationships. Validation: go fmt ./...; go test -tags "fts5" ./internal/parser -run TestWorkBuddyProvider -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; make test-short; git diff --check test(parser): document workbuddy subagent discovery WorkBuddy legacy discovery accepts any JSONL filename under a valid parent session's subagents directory, while raw subagent lookup still validates the requested ID. The provider migration intentionally preserves that asymmetry rather than tightening discovery and dropping sources that older code would import. Validation: go fmt ./...; go test -tags "fts5" ./internal/parser -run TestWorkBuddyProviderSourceMethods -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check; make nilaway test(parser): opt workbuddy into provider shadow WorkBuddy now has a concrete facade provider on this branch, so its migration mode should enter the shared shadow-compare harness rather than remaining legacy-only and additive. Lower provider opt-ins stay inherited and later provider branches remain responsible for their own concrete providers. Validation: go test -tags "fts5" ./internal/parser -run TestProviderMigrationModes -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(sync): compare workbuddy shadow parity WorkBuddy is shadow-compared on this branch, so add source-level migration coverage that compares provider observation with ParseWorkBuddySession. The test covers both the main session file and nested subagent file shape so parent relationship parity stays visible while the stack migrates provider by provider. Validation: go test -tags "fts5" ./internal/parser ./internal/sync -run 'TestObserveProviderSourceMatchesWorkBuddyLegacyParser|TestWorkBuddyProvider|TestParseWorkBuddy' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/...; git diff --check; go test -tags "fts5" ./internal/sync -run TestObserveProviderSourceMatchesWorkBuddyLegacyParser -count=1 refactor(parser): fold workbuddy into provider WorkBuddy already had a concrete provider, but it still depended on exported legacy parser/source functions and legacy sync dispatch. That kept the branch additive and let the old shape remain authoritative.\n\nMove parsing and composite subagent source lookup behind the provider, remove registry callbacks and sync dispatch, and convert the WorkBuddy tests to provider-backed helpers plus a guard that the old entrypoints stay gone.\n\nValidation: go test -tags "fts5" ./internal/parser ./internal/sync -run 'TestWorkBuddy|TestDiscoverWorkBuddy|TestParseWorkBuddy|TestFindWorkBuddy|TestEngineClassifyWorkBuddy|TestWorkBuddyRegistry' -count=1 -v; go test -tags "fts5" ./internal/parser ./internal/sync ./cmd/agentsview -count=1; go vet ./...; git diff --check fix(parser): preserve workbuddy file hashes WorkBuddy legacy sync stored the transcript content hash for both main sessions and subagent transcripts. The provider migration kept copying Fingerprint.Hash into Session.File.Hash, but the recursive source set did not request hashed fingerprints, so provider-authoritative writes would clear file_hash.\n\nEnable source hashing and make the provider parse test exercise Fingerprint -> Parse for both main and subagent sources.\n\nValidation: go test -tags "fts5" ./internal/parser -run TestWorkBuddyProvider -count=1; go test -tags "fts5" ./internal/parser -count=1; go test -tags "fts5" ./internal/sync -run 'Test.*WorkBuddy' -count=1; go vet ./...; git diff --check fix(parser): thread ctx through workbuddy source lookups --- internal/parser/provider.go | 2 + internal/parser/provider_migration.go | 2 +- internal/parser/types.go | 16 +- internal/parser/workbuddy.go | 97 +-------- internal/parser/workbuddy_provider.go | 129 +++++++++++ internal/parser/workbuddy_provider_test.go | 238 +++++++++++++++++++++ internal/parser/workbuddy_test.go | 66 ++++-- internal/sync/engine.go | 75 ------- internal/sync/workbuddy_test.go | 19 +- 9 files changed, 443 insertions(+), 201 deletions(-) create mode 100644 internal/parser/workbuddy_provider.go create mode 100644 internal/parser/workbuddy_provider_test.go diff --git a/internal/parser/provider.go b/internal/parser/provider.go index ed3975317..817381435 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -359,6 +359,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newGptmeProviderFactory(def) case AgentOMP, AgentPi: return newPiProviderFactory(def) + case AgentWorkBuddy: + return newWorkBuddyProviderFactory(def) case AgentZencoder: return newZencoderProviderFactory(def) default: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index eb7ea76d8..6da788779 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -45,7 +45,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentKiroIDE: ProviderMigrationLegacyOnly, AgentCortex: ProviderMigrationLegacyOnly, AgentHermes: ProviderMigrationLegacyOnly, - AgentWorkBuddy: ProviderMigrationLegacyOnly, + AgentWorkBuddy: ProviderMigrationProviderAuthoritative, AgentForge: ProviderMigrationLegacyOnly, AgentPiebald: ProviderMigrationLegacyOnly, AgentWarp: ProviderMigrationLegacyOnly, diff --git a/internal/parser/types.go b/internal/parser/types.go index 9beddd96e..019bb3a76 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -461,15 +461,13 @@ var Registry = []AgentDef{ FindSourceFunc: FindHermesSourceFile, }, { - Type: AgentWorkBuddy, - DisplayName: "WorkBuddy", - EnvVar: "WORKBUDDY_PROJECTS_DIR", - ConfigKey: "workbuddy_project_dirs", - DefaultDirs: []string{".workbuddy/projects"}, - IDPrefix: "workbuddy:", - FileBased: true, - DiscoverFunc: DiscoverWorkBuddySessions, - FindSourceFunc: FindWorkBuddySourceFile, + Type: AgentWorkBuddy, + DisplayName: "WorkBuddy", + EnvVar: "WORKBUDDY_PROJECTS_DIR", + ConfigKey: "workbuddy_project_dirs", + DefaultDirs: []string{".workbuddy/projects"}, + IDPrefix: "workbuddy:", + FileBased: true, }, { Type: AgentForge, diff --git a/internal/parser/workbuddy.go b/internal/parser/workbuddy.go index 135f939f4..847a98393 100644 --- a/internal/parser/workbuddy.go +++ b/internal/parser/workbuddy.go @@ -5,108 +5,13 @@ import ( "fmt" "os" "path/filepath" - "sort" "strings" "time" "github.com/tidwall/gjson" ) -func DiscoverWorkBuddySessions(projectsDir string) []DiscoveredFile { - if projectsDir == "" { - return nil - } - - projects, err := os.ReadDir(projectsDir) - if err != nil { - return nil - } - - var files []DiscoveredFile - for _, projEntry := range projects { - if !isDirOrSymlink(projEntry, projectsDir) { - continue - } - project := projEntry.Name() - projectDir := filepath.Join(projectsDir, project) - entries, err := os.ReadDir(projectDir) - if err != nil { - continue - } - for _, entry := range entries { - name := entry.Name() - if !entry.IsDir() && strings.HasSuffix(name, ".jsonl") { - stem := strings.TrimSuffix(name, ".jsonl") - if IsValidSessionID(stem) { - files = append(files, DiscoveredFile{ - Path: filepath.Join(projectDir, name), - Project: project, - Agent: AgentWorkBuddy, - }) - } - continue - } - if !isDirOrSymlink(entry, projectDir) || !IsValidSessionID(name) { - continue - } - subagentsDir := filepath.Join(projectDir, name, "subagents") - subagents, err := os.ReadDir(subagentsDir) - if err != nil { - continue - } - for _, sub := range subagents { - if sub.IsDir() || !strings.HasSuffix(sub.Name(), ".jsonl") { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(subagentsDir, sub.Name()), - Project: project, - Agent: AgentWorkBuddy, - }) - } - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -func FindWorkBuddySourceFile(projectsDir, rawID string) string { - if projectsDir == "" { - return "" - } - rawID = strings.TrimPrefix(rawID, "workbuddy:") - sessionID, subagentID, hasSubagent := strings.Cut(rawID, ":subagent:") - if !IsValidSessionID(sessionID) { - return "" - } - if hasSubagent && !IsValidSessionID(subagentID) { - return "" - } - - projects, err := os.ReadDir(projectsDir) - if err != nil { - return "" - } - for _, projEntry := range projects { - if !isDirOrSymlink(projEntry, projectsDir) { - continue - } - projectDir := filepath.Join(projectsDir, projEntry.Name()) - candidate := filepath.Join(projectDir, sessionID+".jsonl") - if hasSubagent { - candidate = filepath.Join(projectDir, sessionID, "subagents", subagentID+".jsonl") - } - if _, err := os.Stat(candidate); err == nil { - return candidate - } - } - return "" -} - -func ParseWorkBuddySession(path, project, machine string) (*ParsedSession, []ParsedMessage, error) { +func parseWorkBuddySession(path, project, machine string) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) if err != nil { return nil, nil, fmt.Errorf("stat %s: %w", path, err) diff --git a/internal/parser/workbuddy_provider.go b/internal/parser/workbuddy_provider.go new file mode 100644 index 000000000..d88ca5593 --- /dev/null +++ b/internal/parser/workbuddy_provider.go @@ -0,0 +1,129 @@ +package parser + +import ( + "context" + "path/filepath" + "strings" +) + +// WorkBuddy stores each session as a JSONL file in a project directory, with +// subagent transcripts nested under a "subagents" subdirectory. It is a +// directory-of-files provider: discovery, watching, change classification, +// lookup, and fingerprinting come from JSONLSourceSet, and the ParseFile option +// makes that source set a full SourceSet so it rides the generic factory. +func newWorkBuddyProviderFactory(def AgentDef) ProviderFactory { + return newSourceSetFactory( + def, + workBuddyProviderCapabilities(), + func(cfg ProviderConfig) SourceSet { return newWorkBuddySourceSet(cfg.Roots) }, + ) +} + +func newWorkBuddySourceSet(roots []string) JSONLSourceSet { + return newJSONLSourceSet(AgentWorkBuddy, roots, + withRecursive(), + withSymlinkFollowing(), + withContentHashing(), + withIncludePath(isWorkBuddySourcePath), + withProjectHint(workBuddyProjectHintFromPath), + withSessionIDFromPath(workBuddySessionIDFromPath), + withLookupIDValid(isWorkBuddyLookupID), + withParseFile(workBuddyParseFile), + ) +} + +func workBuddyParseFile( + _ context.Context, path string, req ParseRequest, +) ([]ParseResult, []string, error) { + sess, msgs, err := parseWorkBuddySession(path, req.Source.ProjectHint, req.Machine) + if err != nil { + return nil, nil, err + } + if sess == nil { + return nil, nil, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return []ParseResult{{Session: *sess, Messages: msgs}}, nil, nil +} + +func isWorkBuddySourcePath(root, path string) bool { + parts, ok := workBuddyPathParts(root, path) + if !ok { + return false + } + switch len(parts) { + case 2: + stem, ok := strings.CutSuffix(parts[1], ".jsonl") + return ok && IsValidSessionID(stem) + case 4: + return IsValidSessionID(parts[1]) && + parts[2] == "subagents" && + strings.HasSuffix(parts[3], ".jsonl") + default: + return false + } +} + +func workBuddyProjectHintFromPath(root, path string) string { + parts, ok := workBuddyPathParts(root, path) + if !ok || len(parts) < 2 { + return "" + } + return parts[0] +} + +func workBuddySessionIDFromPath(root, path string) string { + if !isWorkBuddySourcePath(root, path) { + return "" + } + parts, _ := workBuddyPathParts(root, path) + stem := strings.TrimSuffix(filepath.Base(path), ".jsonl") + if len(parts) == 4 { + return parts[1] + ":subagent:" + stem + } + return stem +} + +func isWorkBuddyLookupID(rawID string) bool { + if rawID == "" { + return false + } + sessionID, subagentID, hasSubagent := strings.Cut(rawID, ":subagent:") + if !IsValidSessionID(sessionID) { + return false + } + return !hasSubagent || IsValidSessionID(subagentID) +} + +func workBuddyPathParts(root, path string) ([]string, bool) { + rel, err := filepath.Rel(root, path) + if err != nil { + return nil, false + } + parts := strings.Split(rel, string(filepath.Separator)) + for _, part := range parts { + if part == "" || part == "." || part == ".." { + return nil, false + } + } + return parts, true +} + +func workBuddyProviderCapabilities() Capabilities { + return Capabilities{ + Source: jsonlFileProviderSourceCapabilities(), + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Cwd: CapabilitySupported, + Relationships: CapabilitySupported, + Subagents: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + PerMessageTokenUsage: CapabilitySupported, + MalformedLineCount: CapabilitySupported, + Model: CapabilitySupported, + }, + } +} diff --git a/internal/parser/workbuddy_provider_test.go b/internal/parser/workbuddy_provider_test.go new file mode 100644 index 000000000..c411801c1 --- /dev/null +++ b/internal/parser/workbuddy_provider_test.go @@ -0,0 +1,238 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestWorkBuddyProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentWorkBuddy) + require.True(t, ok) + require.NotNil(t, factory) + + caps := factory.Capabilities() + assert.Equal(t, CapabilitySupported, caps.Source.DiscoverSources) + assert.Equal(t, CapabilitySupported, caps.Source.WatchSources) + assert.Equal(t, CapabilitySupported, caps.Source.ClassifyChangedPath) + assert.Equal(t, CapabilitySupported, caps.Source.FindSource) + assert.Equal(t, CapabilitySupported, caps.Source.CompositeFingerprint) + assert.Equal(t, CapabilitySupported, caps.Content.FirstMessage) + assert.Equal(t, CapabilitySupported, caps.Content.Cwd) + assert.Equal(t, CapabilitySupported, caps.Content.Relationships) + assert.Equal(t, CapabilitySupported, caps.Content.Subagents) + assert.Equal(t, CapabilitySupported, caps.Content.ToolCalls) + assert.Equal(t, CapabilitySupported, caps.Content.ToolResults) + assert.Equal(t, CapabilitySupported, caps.Content.PerMessageTokenUsage) + assert.Equal(t, CapabilitySupported, caps.Content.Model) + assert.Equal(t, CapabilitySupported, caps.Content.MalformedLineCount) + + provider, ok := NewProvider(AgentWorkBuddy, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestWorkBuddyProviderSourceMethods(t *testing.T) { + root := t.TempDir() + sessionID := "11111111-1111-4111-8111-111111111111" + subagentID := "agent-123" + projectDir := filepath.Join(root, "proj") + sourcePath := filepath.Join(projectDir, sessionID+".jsonl") + subagentPath := filepath.Join( + projectDir, sessionID, "subagents", subagentID+".jsonl", + ) + nonIDSubagentPath := filepath.Join( + projectDir, sessionID, "subagents", "2025.01.01.jsonl", + ) + writeSourceFile(t, sourcePath, workBuddyProviderFixture("hello")) + writeSourceFile(t, subagentPath, workBuddyProviderFixture("sub task")) + writeSourceFile(t, nonIDSubagentPath, workBuddyProviderFixture("dated sub task")) + writeSourceFile(t, filepath.Join(projectDir, "2025.01.01.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(projectDir, sessionID, "tool-results", "tool_123.txt"), "{}\n") + writeSourceFile(t, filepath.Join(root, sessionID+".jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(projectDir, sessionID, "subagents", "nested", "deep.jsonl"), "{}\n") + + provider, ok := NewProvider(AgentWorkBuddy, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 3) + assert.Equal( + t, + []string{sourcePath, nonIDSubagentPath, subagentPath}, + sourceDisplayPaths(discovered), + ) + assert.Equal(t, []string{"proj", "proj", "proj"}, sourceProjects(discovered)) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl"}, plan.Roots[0].IncludeGlobs) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~workbuddy:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.NotZero(t, fingerprint.Size) + assert.NotZero(t, fingerprint.MTimeNS) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: sessionID + ":subagent:" + subagentID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, subagentPath, found.DisplayPath) + + _, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: sessionID + ":subagent:../agent-123", + }) + require.NoError(t, err) + assert.False(t, ok) + + _, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: sessionID + ":subagent:2025.01.01", + }) + require.NoError(t, err) + assert.False(t, ok) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: subagentPath, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, subagentPath, found.DisplayPath) + + require.NoError(t, os.Remove(subagentPath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: subagentPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, subagentPath, changed[0].DisplayPath) +} + +func TestWorkBuddyProviderDiscoversSymlinkedProjectDirectory(t *testing.T) { + root := t.TempDir() + targetDir := t.TempDir() + sessionID := "11111111-1111-4111-8111-111111111111" + linkDir := filepath.Join(root, "proj") + sourcePath := filepath.Join(linkDir, sessionID+".jsonl") + writeSourceFile(t, filepath.Join(targetDir, sessionID+".jsonl"), workBuddyProviderFixture("hello")) + if err := os.Symlink(targetDir, linkDir); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentWorkBuddy, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~workbuddy:" + sessionID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) +} + +func TestWorkBuddyProviderParseMainAndSubagent(t *testing.T) { + root := t.TempDir() + sessionID := "11111111-1111-4111-8111-111111111111" + subagentID := "agent-123" + sourcePath := filepath.Join(root, "proj", sessionID+".jsonl") + subagentPath := filepath.Join(root, "proj", sessionID, "subagents", subagentID+".jsonl") + mainContent := workBuddyProviderFixture("hello") + subContent := workBuddyProviderFixture("sub task") + writeSourceFile(t, sourcePath, mainContent) + writeSourceFile(t, subagentPath, subContent) + + provider, ok := NewProvider(AgentWorkBuddy, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 2) + + mainFingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + mainOutcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: mainFingerprint, + }) + require.NoError(t, err) + require.True(t, mainOutcome.ResultSetComplete) + require.Len(t, mainOutcome.Results, 1) + mainResult := mainOutcome.Results[0] + assert.Equal(t, DataVersionCurrent, mainResult.DataVersion) + assert.Equal(t, "workbuddy:"+sessionID, mainResult.Result.Session.ID) + assert.Equal(t, "devbox", mainResult.Result.Session.Machine) + assert.Equal(t, + fmt.Sprintf("%x", sha256.Sum256([]byte(mainContent))), + mainResult.Result.Session.File.Hash, + ) + assert.Len(t, mainResult.Result.Messages, 3) + assert.Equal(t, "hello", mainResult.Result.Session.FirstMessage) + assert.True(t, mainResult.Result.Session.HasTotalOutputTokens) + + subFingerprint, err := provider.Fingerprint(context.Background(), sources[1]) + require.NoError(t, err) + subOutcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[1], + Fingerprint: subFingerprint, + }) + require.NoError(t, err) + require.True(t, subOutcome.ResultSetComplete) + require.Len(t, subOutcome.Results, 1) + subResult := subOutcome.Results[0] + assert.Equal(t, DataVersionCurrent, subResult.DataVersion) + assert.Equal( + t, + "workbuddy:"+sessionID+":subagent:"+subagentID, + subResult.Result.Session.ID, + ) + assert.Equal(t, "workbuddy:"+sessionID, subResult.Result.Session.ParentSessionID) + assert.Equal(t, RelSubagent, subResult.Result.Session.RelationshipType) + assert.Equal(t, + fmt.Sprintf("%x", sha256.Sum256([]byte(subContent))), + subResult.Result.Session.File.Hash, + ) +} + +func workBuddyProviderFixture(firstMessage string) string { + return fmt.Sprintf( + `{"id":"u1","timestamp":1778749186168,"type":"message","role":"user","content":[{"type":"input_text","text":%q}],"cwd":"/tmp/cwd-project"} +{"id":"a1","timestamp":1778749187168,"type":"message","role":"assistant","content":[{"type":"output_text","text":"hi"}],"providerData":{"model":"gpt-5.5","usage":{"inputTokens":20,"outputTokens":4,"cacheReadInputTokens":5}}} +{"id":"fc1","timestamp":1778749188168,"type":"function_call","name":"Bash","callId":"call_1","arguments":"{\"command\":\"pwd\"}"} +`, firstMessage) +} diff --git a/internal/parser/workbuddy_test.go b/internal/parser/workbuddy_test.go index d42024d78..19890e17c 100644 --- a/internal/parser/workbuddy_test.go +++ b/internal/parser/workbuddy_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "fmt" "os" "path/filepath" @@ -11,6 +12,47 @@ import ( "github.com/tidwall/gjson" ) +func parseWorkBuddyTestSession( + t testing.TB, + path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return parseWorkBuddySession(path, project, machine) +} + +func discoverWorkBuddyTestSessions(t testing.TB, root string) []DiscoveredFile { + t.Helper() + provider, ok := NewProvider(AgentWorkBuddy, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + + files := make([]DiscoveredFile, 0, len(sources)) + for _, source := range sources { + files = append(files, DiscoveredFile{ + Path: source.DisplayPath, + Project: source.ProjectHint, + Agent: source.Provider, + }) + } + return files +} + +func findWorkBuddyTestSourceFile(t testing.TB, root, rawID string) string { + t.Helper() + provider, ok := NewProvider(AgentWorkBuddy, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + source, found, err := provider.FindSource( + context.Background(), + FindSourceRequest{RawSessionID: rawID}, + ) + require.NoError(t, err) + if !found { + return "" + } + return source.DisplayPath +} + func TestDiscoverWorkBuddySessions(t *testing.T) { root := t.TempDir() mainPath := filepath.Join(root, "proj", "11111111-1111-4111-8111-111111111111.jsonl") @@ -21,7 +63,7 @@ func TestDiscoverWorkBuddySessions(t *testing.T) { require.NoError(t, os.WriteFile(path, []byte("{}\n"), 0o644), "WriteFile(%q)", path) } - files := DiscoverWorkBuddySessions(root) + files := discoverWorkBuddyTestSessions(t, root) require.Len(t, files, 2) assert.Equal(t, mainPath, files[0].Path) assert.Equal(t, "proj", files[0].Project) @@ -44,7 +86,7 @@ func TestParseWorkBuddySession(t *testing.T) { `, cwd, cwd) require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) - sess, msgs, err := ParseWorkBuddySession(path, "proj", "local") + sess, msgs, err := parseWorkBuddyTestSession(t, path, "proj", "local") require.NoError(t, err) require.NotNil(t, sess, "session nil") assert.Equal(t, "workbuddy:11111111-1111-4111-8111-111111111111", sess.ID) @@ -76,7 +118,7 @@ func TestParseWorkBuddySessionDoesNotDoubleCountOpenAICachedTokens(t *testing.T) ` require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) - sess, msgs, err := ParseWorkBuddySession(path, "proj", "local") + sess, msgs, err := parseWorkBuddyTestSession(t, path, "proj", "local") require.NoError(t, err) require.NotNil(t, sess, "session nil") require.Len(t, msgs, 1) @@ -101,7 +143,7 @@ func TestParseWorkBuddySessionUsesCwdProjectAndFileSessionID(t *testing.T) { `, cwd) require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) - sess, _, err := ParseWorkBuddySession(path, "stored-project", "local") + sess, _, err := parseWorkBuddyTestSession(t, path, "stored-project", "local") require.NoError(t, err) assert.Equal(t, "workbuddy:22222222-2222-4222-8222-222222222222", sess.ID) assert.Equal(t, "cwd_project", sess.Project) @@ -114,7 +156,7 @@ func TestParseWorkBuddySessionNormalizesWindowsCwdProject(t *testing.T) { ` require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) - sess, _, err := ParseWorkBuddySession(path, "stored", "local") + sess, _, err := parseWorkBuddyTestSession(t, path, "stored", "local") require.NoError(t, err) assert.Equal(t, "report_builder", sess.Project) } @@ -126,7 +168,7 @@ func TestParseWorkBuddySessionFallsBackToDiscoveredProjectWhenCwdHasNoProject(t ` require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) - sess, _, err := ParseWorkBuddySession(path, "discovered-proj", "local") + sess, _, err := parseWorkBuddyTestSession(t, path, "discovered-proj", "local") require.NoError(t, err) assert.Equal(t, "discovered-proj", sess.Project) } @@ -138,7 +180,7 @@ func TestParseWorkBuddySessionOmitsAbsentTokenUsageKeys(t *testing.T) { ` require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) - _, msgs, err := ParseWorkBuddySession(path, "proj", "local") + _, msgs, err := parseWorkBuddyTestSession(t, path, "proj", "local") require.NoError(t, err) require.Len(t, msgs, 1) m := msgs[0] @@ -160,7 +202,7 @@ func TestParseWorkBuddySubagentSession(t *testing.T) { ` require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) - sess, _, err := ParseWorkBuddySession(path, "proj", "local") + sess, _, err := parseWorkBuddyTestSession(t, path, "proj", "local") require.NoError(t, err) assert.Equal(t, "workbuddy:11111111-1111-4111-8111-111111111111:subagent:agent-123", sess.ID) assert.Equal(t, "workbuddy:11111111-1111-4111-8111-111111111111", sess.ParentSessionID) @@ -172,7 +214,7 @@ func TestFindWorkBuddySourceFile(t *testing.T) { path := filepath.Join(root, "proj", "11111111-1111-4111-8111-111111111111.jsonl") require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) require.NoError(t, os.WriteFile(path, []byte("{}\n"), 0o644)) - got := FindWorkBuddySourceFile(root, "workbuddy:11111111-1111-4111-8111-111111111111") + got := findWorkBuddyTestSourceFile(t, root, "workbuddy:11111111-1111-4111-8111-111111111111") assert.Equal(t, path, got) } @@ -182,7 +224,7 @@ func TestFindWorkBuddySourceFileRejectsInvalidSubagentID(t *testing.T) { require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) require.NoError(t, os.WriteFile(path, []byte("{}\n"), 0o644)) - got := FindWorkBuddySourceFile(root, "workbuddy:11111111-1111-4111-8111-111111111111:subagent:../agent-123") + got := findWorkBuddyTestSourceFile(t, root, "workbuddy:11111111-1111-4111-8111-111111111111:subagent:../agent-123") assert.Empty(t, got, "want empty path") } @@ -193,7 +235,7 @@ func TestParseWorkBuddyProjectNamedSubagentsIsNotSubagent(t *testing.T) { ` require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) - sess, _, err := ParseWorkBuddySession(path, "subagents", "local") + sess, _, err := parseWorkBuddyTestSession(t, path, "subagents", "local") require.NoError(t, err) assert.Equal(t, "workbuddy:11111111-1111-4111-8111-111111111111", sess.ID) assert.Empty(t, sess.ParentSessionID) @@ -207,7 +249,7 @@ func TestParseWorkBuddySessionDecodesObjectToolResultText(t *testing.T) { ` require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) - _, msgs, err := ParseWorkBuddySession(path, "proj", "local") + _, msgs, err := parseWorkBuddyTestSession(t, path, "proj", "local") require.NoError(t, err) require.Len(t, msgs, 1) require.Len(t, msgs[0].ToolResults, 1) diff --git a/internal/sync/engine.go b/internal/sync/engine.go index e60b5110c..3b10aba02 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -1288,38 +1288,6 @@ func (e *Engine) classifyOnePath( } } - // WorkBuddy: //.jsonl - // or: ///subagents/*.jsonl - for _, workBuddyDir := range e.agentDirs[parser.AgentWorkBuddy] { - if workBuddyDir == "" { - continue - } - if rel, ok := isUnder(workBuddyDir, path); ok { - if !strings.HasSuffix(path, ".jsonl") { - continue - } - parts := strings.Split(rel, sep) - if len(parts) == 2 { - stem := strings.TrimSuffix(parts[1], ".jsonl") - if !parser.IsValidSessionID(stem) { - continue - } - return parser.DiscoveredFile{ - Path: path, - Project: parts[0], - Agent: parser.AgentWorkBuddy, - }, true - } - if len(parts) == 4 && parts[2] == "subagents" { - return parser.DiscoveredFile{ - Path: path, - Project: parts[0], - Agent: parser.AgentWorkBuddy, - }, true - } - } - } - // VSCode Copilot: /workspaceStorage//chatSessions/.{json,jsonl} // or: /globalStorage/emptyWindowChatSessions/.{json,jsonl} for _, vscDir := range e.agentDirs[parser.AgentVSCodeCopilot] { @@ -4694,8 +4662,6 @@ func (e *Engine) processFile( res = e.processCortex(file, info) case parser.AgentHermes: res = e.processHermes(file, info) - case parser.AgentWorkBuddy: - res = e.processWorkBuddy(file, info) case parser.AgentVibe: res = e.processVibe(file, info) case parser.AgentPositron: @@ -6866,35 +6832,6 @@ func (e *Engine) processHermes( } } -func (e *Engine) processWorkBuddy( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseWorkBuddySession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - func (e *Engine) processVibe( file parser.DiscoveredFile, info os.FileInfo, ) processResult { @@ -9866,18 +9803,6 @@ func (e *Engine) SyncSingleSessionContext( file.Project = parser.GetProjectName( filepath.Base(filepath.Dir(filepath.Dir(path))), ) - case parser.AgentWorkBuddy: - for _, workBuddyDir := range e.agentDirs[parser.AgentWorkBuddy] { - rel, ok := isUnder(workBuddyDir, path) - if !ok { - continue - } - parts := strings.Split(rel, string(filepath.Separator)) - if len(parts) == 2 || len(parts) == 4 && parts[2] == "subagents" { - file.Project = parts[0] - break - } - } } res := e.processFile(ctx, file) diff --git a/internal/sync/workbuddy_test.go b/internal/sync/workbuddy_test.go index 2b5ac9c79..18f0e312d 100644 --- a/internal/sync/workbuddy_test.go +++ b/internal/sync/workbuddy_test.go @@ -34,20 +34,22 @@ func TestEngineClassifyWorkBuddyPaths(t *testing.T) { require.NoError(t, os.WriteFile(path, []byte("{}\n"), 0o644)) } - got, ok := engine.classifyOnePath(mainPath, nil) - require.True(t, ok, "main path did not classify") + files := engine.classifyPaths([]string{mainPath}) + require.Len(t, files, 1, "main path did not classify") + got := files[0] assert.Equal(t, mainPath, got.Path) assert.Equal(t, "proj", got.Project) assert.Equal(t, parser.AgentWorkBuddy, got.Agent) - got, ok = engine.classifyOnePath(subPath, nil) - require.True(t, ok, "subagent path did not classify") + files = engine.classifyPaths([]string{subPath}) + require.Len(t, files, 1, "subagent path did not classify") + got = files[0] assert.Equal(t, subPath, got.Path) assert.Equal(t, "proj", got.Project) assert.Equal(t, parser.AgentWorkBuddy, got.Agent) - got, ok = engine.classifyOnePath(toolPath, nil) - assert.False(t, ok, "tool result classified as %+v", got) + files = engine.classifyPaths([]string{toolPath}) + assert.Empty(t, files, "tool result classified as %+v", files) } func TestEngineClassifyWorkBuddyProjectNamedSubagentsAsMainSession(t *testing.T) { @@ -64,8 +66,9 @@ func TestEngineClassifyWorkBuddyProjectNamedSubagentsAsMainSession(t *testing.T) require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) require.NoError(t, os.WriteFile(path, []byte("{}\n"), 0o644)) - got, ok := engine.classifyOnePath(path, nil) - require.True(t, ok, "path did not classify") + files := engine.classifyPaths([]string{path}) + require.Len(t, files, 1, "path did not classify") + got := files[0] assert.Equal(t, path, got.Path) assert.Equal(t, "subagents", got.Project) assert.Equal(t, parser.AgentWorkBuddy, got.Agent) From 3e1b7c703c8abbf8e48bd4e6a2d689e51ac15648 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Wed, 24 Jun 2026 21:33:45 -0400 Subject: [PATCH 18/24] feat(parser): migrate cortex provider Cortex has a shallow metadata-file source shape, with optional companion history JSONL handled inside the existing parser. Moving it behind a concrete provider keeps source discovery and lookup explicit without adding a new source abstraction. The provider preserves the legacy Cortex session-file predicate, backup/history companion exclusions, symlinked file behavior, deleted-path classification, source fingerprinting, and parse normalization for session names, cwd, and tool content. Validation: go fmt ./...; go test -tags "fts5" ./internal/parser -run TestCortexProvider -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; make test-short; git diff --check fix(parser): include cortex history companions Cortex split-history sessions parse messages from a sibling .history.jsonl file, so provider-backed live sync has to treat that companion as part of the same source. Otherwise a history-only append can be watched but never mapped back to the metadata session, or can keep the same freshness identity and skip reparsing. This keeps the persisted source key on the .json metadata file while adding companion watch classification and a composite fingerprint over the metadata and history files when the companion exists. Validation: go fmt ./...; go test -tags "fts5" ./internal/parser -run TestCortexProviderClassifiesAndFingerprintsHistoryCompanion -count=1; go test -tags "fts5" ./internal/parser -run TestCortexProvider -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; make test-short; git diff --check; make nilaway test(parser): opt cortex into provider shadow Cortex now has a concrete facade provider on this branch, so its migration mode should enter shadow comparison instead of staying legacy-only and additive. Lower provider opt-ins stay inherited and later provider branches own their own manifest changes. Validation: go test -tags "fts5" ./internal/parser -run TestProviderMigrationModes -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(sync): compare cortex shadow parity Cortex is shadow-compared on this branch, so add source-level migration coverage that compares provider observation with ParseCortexSession. The fixture uses Cortex's split metadata/history format so the test proves the provider path preserves companion-history parse behavior while still planning the primary session ID. Validation: go test -tags "fts5" ./internal/parser ./internal/sync -run 'TestObserveProviderSourceMatchesCortexLegacyParser|TestCortexProvider|TestParseCortex' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; git diff --check; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/... refactor(parser): fold cortex into provider Move Cortex parse ownership onto the concrete provider and remove the package-level discover/find/parse entrypoints. Route Cortex sync classification and processing through provider changed-path handling so this branch migrates the provider instead of adding another shim. fix(sync): include cortex companion mtimes in quick sync Provider-authoritative Cortex discovery emits the metadata JSON as the source, but its freshness identity also includes the split .history.jsonl companion. SyncAllSince was still filtering on the metadata file mtime before provider fingerprinting, so history-only updates could be dropped during quick sync.\n\nUse provider fingerprint mtimes for provider-process discovered files before applying the since cutoff, falling back to the existing per-agent stat logic when the provider has no mtime. Cortex full parses now replace messages as well, because split history rewrites can change existing ordinals rather than only append.\n\nValidation: go test -tags "fts5" ./internal/sync -run TestSyncAllSinceCortexHistoryUpdateTriggersResync -count=1; go test -tags "fts5" ./internal/parser -run Cortex -count=1; go test -tags "fts5" ./internal/sync -run 'Cortex|TestClassifyOnePath_Cortex|TestSyncAllSinceCortexHistoryUpdateTriggersResync' -count=1; go test -tags "fts5" ./internal/sync -count=1; go fmt ./...; go vet ./...; git diff --check fix(parser): thread ctx through cortex source lookups --- internal/parser/cortex.go | 67 +---- internal/parser/cortex_provider.go | 301 +++++++++++++++++++++++ internal/parser/cortex_provider_test.go | 218 ++++++++++++++++ internal/parser/cortex_test.go | 69 ++++-- internal/parser/provider.go | 2 + internal/parser/provider_migration.go | 2 +- internal/parser/types.go | 6 +- internal/sync/classify_cortex_test.go | 17 +- internal/sync/cortex_integration_test.go | 70 ++++++ internal/sync/engine.go | 124 ++++------ 10 files changed, 717 insertions(+), 159 deletions(-) create mode 100644 internal/parser/cortex_provider.go create mode 100644 internal/parser/cortex_provider_test.go create mode 100644 internal/sync/cortex_integration_test.go diff --git a/internal/parser/cortex.go b/internal/parser/cortex.go index 03ddf537d..c8dce0331 100644 --- a/internal/parser/cortex.go +++ b/internal/parser/cortex.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "os" - "path/filepath" "regexp" "strings" "time" @@ -335,11 +334,11 @@ func parseCortexTimestamps(_ string) map[string]time.Time { return make(map[string]time.Time) } -// ParseCortexSession parses a Cortex session from its .json metadata -// file. If the file contains an embedded "history" array, it is used -// directly. If no history is embedded (the split-file format), the -// companion .history.jsonl file is read instead. -func ParseCortexSession( +// parseSession parses a Cortex session from its .json metadata file. If the +// file contains an embedded "history" array, it is used directly. If no history +// is embedded (the split-file format), the companion .history.jsonl file is +// read instead. +func (p *cortexProvider) parseSession( path, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) @@ -511,59 +510,3 @@ func IsCortexSessionFile(name string) bool { stem := strings.TrimSuffix(name, ".json") return IsValidSessionID(stem) } - -// DiscoverCortexSessions finds all primary session metadata files -// in the Cortex conversations directory (~/.snowflake/cortex/conversations). -// Backup files (*.back.*.json) are silently skipped. Both embedded-history -// sessions (.json with a "history" key) and split sessions -// (.json + .history.jsonl) are returned as a single entry -// pointing to the .json metadata file. -func DiscoverCortexSessions( - conversationsDir string, -) []DiscoveredFile { - if conversationsDir == "" { - return nil - } - - entries, err := os.ReadDir(conversationsDir) - if err != nil { - return nil - } - - var files []DiscoveredFile - for _, entry := range entries { - if entry.IsDir() { - continue - } - name := entry.Name() - if !IsCortexSessionFile(name) { - continue - } - files = append(files, DiscoveredFile{ - Path: filepath.Join(conversationsDir, name), - Agent: AgentCortex, - }) - } - - return files -} - -// FindCortexSourceFile locates a Cortex session file by UUID. Accepts -// both the raw UUID and the prefixed "cortex:" form. Returns the -// path to the .json metadata file if found, otherwise "". -func FindCortexSourceFile( - conversationsDir, sessionID string, -) string { - // Strip "cortex:" prefix before validation — callers may - // pass the full prefixed ID. - sessionID = strings.TrimPrefix(sessionID, "cortex:") - if conversationsDir == "" || !IsValidSessionID(sessionID) { - return "" - } - - candidate := filepath.Join(conversationsDir, sessionID+".json") - if _, err := os.Stat(candidate); err == nil { - return candidate - } - return "" -} diff --git a/internal/parser/cortex_provider.go b/internal/parser/cortex_provider.go new file mode 100644 index 000000000..d201f2761 --- /dev/null +++ b/internal/parser/cortex_provider.go @@ -0,0 +1,301 @@ +package parser + +import ( + "context" + "crypto/sha256" + "fmt" + "os" + "path/filepath" + "strings" +) + +var _ Provider = (*cortexProvider)(nil) + +type cortexProviderFactory struct { + def AgentDef +} + +func newCortexProviderFactory(def AgentDef) ProviderFactory { + return cortexProviderFactory{def: cloneAgentDef(def)} +} + +func (f cortexProviderFactory) Definition() AgentDef { + return cloneAgentDef(f.def) +} + +func (f cortexProviderFactory) Capabilities() Capabilities { + return cortexProviderCapabilities() +} + +func (f cortexProviderFactory) NewProvider(cfg ProviderConfig) Provider { + cfg = cfg.Clone() + return &cortexProvider{ + ProviderBase: ProviderBase{ + Def: cloneAgentDef(f.def), + Caps: cortexProviderCapabilities(), + Config: cfg, + }, + sources: newCortexSourceSet(cfg.Roots), + } +} + +type cortexProvider struct { + ProviderBase + sources JSONLSourceSet +} + +func (p *cortexProvider) Discover(ctx context.Context) ([]SourceRef, error) { + return p.sources.Discover(ctx) +} + +func (p *cortexProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { + plan, err := p.sources.WatchPlan(ctx) + if err != nil { + return WatchPlan{}, err + } + for i := range plan.Roots { + plan.Roots[i].IncludeGlobs = append( + plan.Roots[i].IncludeGlobs, + "*.history.jsonl", + ) + } + return plan, nil +} + +func (p *cortexProvider) SourcesForChangedPath( + ctx context.Context, + req ChangedPathRequest, +) ([]SourceRef, error) { + sources, err := p.sources.SourcesForChangedPath(ctx, req) + if err != nil || len(sources) > 0 { + return sources, err + } + if source, ok, err := p.sourceForHistoryCompanion(ctx, req); err != nil { + return nil, err + } else if ok { + return []SourceRef{source}, nil + } + return nil, nil +} + +func (p *cortexProvider) FindSource( + ctx context.Context, + req FindSourceRequest, +) (SourceRef, bool, error) { + return p.sources.FindSource(ctx, providerFindRequestWithRawSessionID(p.Def, req)) +} + +func (p *cortexProvider) Fingerprint( + ctx context.Context, + source SourceRef, +) (SourceFingerprint, error) { + if err := ctx.Err(); err != nil { + return SourceFingerprint{}, err + } + path, ok, err := p.sources.pathFromSource(ctx, source) + if err != nil { + return SourceFingerprint{}, err + } + if !ok { + return SourceFingerprint{}, fmt.Errorf("cortex source path unavailable") + } + info, err := os.Stat(path) + if err != nil { + return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) + } + fingerprint := SourceFingerprint{ + Key: firstNonEmptyJSONLString( + source.FingerprintKey, + source.Key, + path, + ), + Size: info.Size(), + MTimeNS: info.ModTime().UnixNano(), + } + + h := sha256.New() + if err := addCortexFingerprintPart(h, "metadata", path, info); err != nil { + return SourceFingerprint{}, err + } + historyPath := cortexHistoryCompanionPath(path) + if historyInfo, ok, err := cortexCompanionInfo(historyPath); err != nil { + return SourceFingerprint{}, err + } else if ok && historyInfo != nil { + fingerprint.Size += historyInfo.Size() + mtime := historyInfo.ModTime().UnixNano() + if mtime > fingerprint.MTimeNS { + fingerprint.MTimeNS = mtime + } + if err := addCortexFingerprintPart(h, "history", historyPath, historyInfo); err != nil { + return SourceFingerprint{}, err + } + } + fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) + return fingerprint, nil +} + +func (p *cortexProvider) Parse( + ctx context.Context, + req ParseRequest, +) (ParseOutcome, error) { + if err := ctx.Err(); err != nil { + return ParseOutcome{}, err + } + path, ok, err := p.sources.pathFromSource(ctx, req.Source) + if err != nil { + return ParseOutcome{}, err + } + if !ok { + return ParseOutcome{}, fmt.Errorf("cortex source path unavailable") + } + machine := firstNonEmptyJSONLString(req.Machine, p.Config.Machine) + sess, msgs, err := p.parseSession(path, machine) + if err != nil { + return ParseOutcome{}, err + } + if sess == nil { + return ParseOutcome{ + ResultSetComplete: true, + SkipReason: SkipNoSession, + }, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + return ParseOutcome{ + Results: []ParseResultOutcome{{ + Result: ParseResult{ + Session: *sess, + Messages: msgs, + }, + DataVersion: DataVersionCurrent, + }}, + ResultSetComplete: true, + }, nil +} + +func newCortexSourceSet(roots []string) JSONLSourceSet { + return newJSONLSourceSet(AgentCortex, roots, + withExtensions(".json"), + withFollowSymlinkFiles(), + withIncludePath(isCortexSourcePath), + withSessionIDFromPath(cortexSessionIDFromPath), + withProjectHint(func(root, path string) string { return "" }), + ) +} + +func (p *cortexProvider) sourceForHistoryCompanion( + ctx context.Context, + req ChangedPathRequest, +) (SourceRef, bool, error) { + if req.Path == "" { + return SourceRef{}, false, nil + } + path := filepath.Clean(req.Path) + for _, root := range p.sources.roots { + if req.WatchRoot != "" && !samePath(req.WatchRoot, root) { + continue + } + source, ok, err := cortexSourceForHistoryCompanion(ctx, p.sources, root, path) + if err != nil { + return SourceRef{}, false, err + } + if ok { + return source, true, nil + } + } + return SourceRef{}, false, nil +} + +func cortexSourceForHistoryCompanion( + ctx context.Context, + sources JSONLSourceSet, + root string, + path string, +) (SourceRef, bool, error) { + root = filepath.Clean(root) + if !samePath(filepath.Dir(path), root) { + return SourceRef{}, false, nil + } + stem, ok := strings.CutSuffix(filepath.Base(path), ".history.jsonl") + if !ok || !IsCortexSessionFile(stem+".json") { + return SourceRef{}, false, nil + } + metadataPath := filepath.Join(root, stem+".json") + if source, ok, err := sources.sourceForPath(ctx, metadataPath); err != nil { + return SourceRef{}, false, err + } else if ok { + return source, true, nil + } + return SourceRef{}, false, nil +} + +func isCortexSourcePath(root, path string) bool { + if !samePath(filepath.Dir(path), filepath.Clean(root)) { + return false + } + return IsCortexSessionFile(filepath.Base(path)) +} + +func cortexSessionIDFromPath(root, path string) string { + if !isCortexSourcePath(root, path) { + return "" + } + return strings.TrimSuffix(filepath.Base(path), ".json") +} + +func cortexHistoryCompanionPath(path string) string { + return strings.TrimSuffix(path, ".json") + ".history.jsonl" +} + +func cortexCompanionInfo(path string) (os.FileInfo, bool, error) { + info, err := os.Stat(path) + if os.IsNotExist(err) { + return nil, false, nil + } + if err != nil { + return nil, false, fmt.Errorf("stat %s: %w", path, err) + } + if info.IsDir() { + return nil, false, nil + } + return info, true, nil +} + +func addCortexFingerprintPart( + h interface{ Write([]byte) (int, error) }, + label string, + path string, + info os.FileInfo, +) error { + hash, err := hashJSONLSourceFile(path) + if err != nil { + return err + } + _, _ = fmt.Fprintf( + h, + "%s:%s:%d:%d:%s\n", + label, + filepath.Base(path), + info.Size(), + info.ModTime().UnixNano(), + hash, + ) + return nil +} + +func cortexProviderCapabilities() Capabilities { + return Capabilities{ + Source: jsonlFileProviderSourceCapabilities(), + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + SessionName: CapabilitySupported, + Cwd: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + }, + } +} diff --git a/internal/parser/cortex_provider_test.go b/internal/parser/cortex_provider_test.go new file mode 100644 index 000000000..3d89c137c --- /dev/null +++ b/internal/parser/cortex_provider_test.go @@ -0,0 +1,218 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCortexProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentCortex) + require.True(t, ok) + require.NotNil(t, factory) + + caps := factory.Capabilities() + assert.Equal(t, CapabilitySupported, caps.Source.DiscoverSources) + assert.Equal(t, CapabilitySupported, caps.Source.WatchSources) + assert.Equal(t, CapabilitySupported, caps.Source.ClassifyChangedPath) + assert.Equal(t, CapabilitySupported, caps.Source.FindSource) + assert.Equal(t, CapabilitySupported, caps.Source.CompositeFingerprint) + assert.Equal(t, CapabilitySupported, caps.Content.FirstMessage) + assert.Equal(t, CapabilitySupported, caps.Content.SessionName) + assert.Equal(t, CapabilitySupported, caps.Content.Cwd) + assert.Equal(t, CapabilitySupported, caps.Content.ToolCalls) + assert.Equal(t, CapabilitySupported, caps.Content.ToolResults) + + provider, ok := NewProvider(AgentCortex, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestCortexProviderSourceMethods(t *testing.T) { + root := t.TempDir() + otherID := "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" + sourcePath := filepath.Join(root, cortexTestUUID+".json") + otherPath := filepath.Join(root, otherID+".json") + writeSourceFile(t, sourcePath, minimalCortexSession(cortexTestUUID)) + writeSourceFile(t, otherPath, minimalCortexSession(otherID)) + writeSourceFile(t, filepath.Join(root, cortexTestUUID+".history.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, cortexTestUUID+".back.123.json"), "{}\n") + writeSourceFile(t, filepath.Join(root, "has spaces.json"), "{}\n") + writeSourceFile(t, filepath.Join(root, "nested", cortexTestUUID+".json"), "{}\n") + + provider, ok := NewProvider(AgentCortex, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.Equal(t, []string{sourcePath, otherPath}, sourceDisplayPaths(discovered)) + assert.Equal(t, []string{"", ""}, sourceProjects(discovered)) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.False(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.json", "*.history.jsonl"}, plan.Roots[0].IncludeGlobs) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~cortex:" + cortexTestUUID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, sourcePath, fingerprint.Key) + assert.NotZero(t, fingerprint.Size) + assert.NotZero(t, fingerprint.MTimeNS) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + StoredFilePath: otherPath, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, otherPath, found.DisplayPath) + + _, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "../" + cortexTestUUID, + }) + require.NoError(t, err) + assert.False(t, ok) + + require.NoError(t, os.Remove(sourcePath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: sourcePath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestCortexProviderClassifiesAndFingerprintsHistoryCompanion(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, cortexTestUUID+".json") + historyPath := filepath.Join(root, cortexTestUUID+".history.jsonl") + writeSourceFile(t, sourcePath, `{ + "session_id":"`+cortexTestUUID+`", + "working_directory":"/home/user/project" + }`) + writeSourceFile( + t, + historyPath, + `{"role":"user","id":"m1","content":[{"type":"text","text":"from history"}]}`+"\n", + ) + + provider, ok := NewProvider(AgentCortex, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: historyPath, EventKind: "write", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) + assert.Equal(t, sourcePath, changed[0].FingerprintKey) + + before, err := provider.Fingerprint(context.Background(), changed[0]) + require.NoError(t, err) + assert.Equal(t, sourcePath, before.Key) + assert.NotEmpty(t, before.Hash) + + writeSourceFile( + t, + historyPath, + `{"role":"user","id":"m1","content":[{"type":"text","text":"updated history"}]}`+"\n", + ) + after, err := provider.Fingerprint(context.Background(), changed[0]) + require.NoError(t, err) + assert.Equal(t, sourcePath, after.Key) + assert.NotEqual(t, before.Hash, after.Hash) + assert.NotEqual(t, before.Size, after.Size) + + require.NoError(t, os.Remove(historyPath)) + changed, err = provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: historyPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, sourcePath, changed[0].DisplayPath) +} + +func TestCortexProviderSourceMethodsFollowSymlinkedSessionFile(t *testing.T) { + root := t.TempDir() + targetRoot := t.TempDir() + sourcePath := filepath.Join(root, cortexTestUUID+".json") + targetPath := filepath.Join(targetRoot, cortexTestUUID+".json") + writeSourceFile(t, targetPath, minimalCortexSession(cortexTestUUID)) + if err := os.Symlink(targetPath, sourcePath); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentCortex, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~cortex:" + cortexTestUUID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) +} + +func TestCortexProviderParse(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, cortexTestUUID+".json") + writeSourceFile(t, sourcePath, minimalCortexSession(cortexTestUUID)) + + provider, ok := NewProvider(AgentCortex, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: SourceFingerprint{Key: sourcePath, Hash: "abc123"}, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + result := outcome.Results[0] + assert.Equal(t, DataVersionCurrent, result.DataVersion) + assert.Equal(t, "cortex:"+cortexTestUUID, result.Result.Session.ID) + assert.Equal(t, "project", result.Result.Session.Project) + assert.Equal(t, "devbox", result.Result.Session.Machine) + assert.Equal(t, "abc123", result.Result.Session.File.Hash) + assert.Equal(t, "Test session", result.Result.Session.SessionName) + assert.Len(t, result.Result.Messages, 2) +} diff --git a/internal/parser/cortex_test.go b/internal/parser/cortex_test.go index a1191619f..061038a61 100644 --- a/internal/parser/cortex_test.go +++ b/internal/parser/cortex_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "os" "path/filepath" "runtime" @@ -37,11 +38,23 @@ func minimalCortexSession(sessionID string) string { }` } +func parseCortexSessionForTest( + t *testing.T, + path, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + provider, ok := NewProvider(AgentCortex, ProviderConfig{Machine: machine}) + require.True(t, ok) + cortex, ok := provider.(*cortexProvider) + require.True(t, ok) + return cortex.parseSession(path, machine) +} + func TestParseCortexSession_Basic(t *testing.T) { content := minimalCortexSession(cortexTestUUID) path := createTestFile(t, cortexTestUUID+".json", content) - sess, msgs, err := ParseCortexSession(path, "local") + sess, msgs, err := parseCortexSessionForTest(t, path, "local") require.NoError(t, err) require.NotNil(t, sess) @@ -61,7 +74,7 @@ func TestParseCortexSession_EmptySessionID(t *testing.T) { content := `{"session_id": "", "history": []}` path := createTestFile(t, "empty.json", content) - sess, msgs, err := ParseCortexSession(path, "local") + sess, msgs, err := parseCortexSessionForTest(t, path, "local") require.NoError(t, err) assert.Nil(t, sess) assert.Nil(t, msgs) @@ -92,7 +105,7 @@ func TestParseCortexSession_SkipsInternalBlocks(t *testing.T) { }` path := createTestFile(t, cortexTestUUID+".json", content) - sess, msgs, err := ParseCortexSession(path, "local") + sess, msgs, err := parseCortexSessionForTest(t, path, "local") require.NoError(t, err) require.NotNil(t, sess) @@ -137,7 +150,7 @@ func TestParseCortexSession_ToolUse(t *testing.T) { }` path := createTestFile(t, cortexTestUUID+".json", content) - sess, msgs, err := ParseCortexSession(path, "local") + sess, msgs, err := parseCortexSessionForTest(t, path, "local") require.NoError(t, err) require.NotNil(t, sess) @@ -177,7 +190,7 @@ func TestParseCortexSession_SplitHistoryJSONL(t *testing.T) { histPath := filepath.Join(dir, uuid+".history.jsonl") require.NoError(t, os.WriteFile(histPath, []byte(lines), 0o644)) - sess, msgs, err := ParseCortexSession(metaPath, "local") + sess, msgs, err := parseCortexSessionForTest(t, metaPath, "local") require.NoError(t, err) require.NotNil(t, sess) @@ -210,7 +223,7 @@ func TestParseCortexSession_SplitHistoryReadError(t *testing.T) { require.NoError(t, os.Chmod(histPath, 0o000)) t.Cleanup(func() { os.Chmod(histPath, 0o644) }) - _, _, err := ParseCortexSession(metaPath, "local") + _, _, err := parseCortexSessionForTest(t, metaPath, "local") require.Error(t, err, "non-ENOENT read error should propagate") assert.Contains(t, err.Error(), "read history") } @@ -229,7 +242,7 @@ func TestParseCortexSession_SplitHistoryMissing(t *testing.T) { metaPath := filepath.Join(dir, uuid+".json") require.NoError(t, os.WriteFile(metaPath, []byte(meta), 0o644)) - sess, msgs, err := ParseCortexSession(metaPath, "local") + sess, msgs, err := parseCortexSessionForTest(t, metaPath, "local") require.NoError(t, err) assert.Nil(t, sess, "missing JSONL should silently skip") assert.Nil(t, msgs) @@ -260,7 +273,7 @@ func TestParseCortexSession_FirstUserTurnSystemOnly(t *testing.T) { }` path := createTestFile(t, cortexTestUUID+".json", content) - sess, msgs, err := ParseCortexSession(path, "local") + sess, msgs, err := parseCortexSessionForTest(t, path, "local") require.NoError(t, err) require.NotNil(t, sess) @@ -318,16 +331,25 @@ func TestDiscoverCortexSessions(t *testing.T) { filepath.Join(dir, name), []byte(""), 0o644)) } - files := DiscoverCortexSessions(dir) - require.Len(t, files, 2) - for _, f := range files { - assert.Equal(t, AgentCortex, f.Agent) - } + provider, ok := NewProvider(AgentCortex, ProviderConfig{Roots: []string{dir}}) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 2) + assert.Equal(t, []string{ + filepath.Join(dir, cortexTestUUID+".json"), + filepath.Join(dir, uuid2+".json"), + }, sourceDisplayPaths(sources)) } func TestDiscoverCortexSessions_EmptyDir(t *testing.T) { - assert.Nil(t, DiscoverCortexSessions("")) - assert.Nil(t, DiscoverCortexSessions("/nonexistent")) + provider, ok := NewProvider(AgentCortex, ProviderConfig{ + Roots: []string{"", "/nonexistent"}, + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + assert.Empty(t, sources) } func TestFindCortexSourceFile(t *testing.T) { @@ -349,8 +371,21 @@ func TestFindCortexSourceFile(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := FindCortexSourceFile(tt.dir, tt.sessionID) - assert.Equal(t, tt.want, got) + provider, ok := NewProvider(AgentCortex, ProviderConfig{ + Roots: []string{tt.dir}, + }) + require.True(t, ok) + source, ok, err := provider.FindSource( + context.Background(), + FindSourceRequest{RawSessionID: tt.sessionID}, + ) + require.NoError(t, err) + if tt.want == "" { + assert.False(t, ok) + return + } + require.True(t, ok) + assert.Equal(t, tt.want, source.DisplayPath) }) } } diff --git a/internal/parser/provider.go b/internal/parser/provider.go index 817381435..dead326ec 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -351,6 +351,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newAmpProviderFactory(def) case AgentCommandCode: return newCommandCodeProviderFactory(def) + case AgentCortex: + return newCortexProviderFactory(def) case AgentDeepSeekTUI: return newDeepSeekTUIProviderFactory(def) case AgentIflow: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 6da788779..7678e33bb 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -43,7 +43,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentChatGPT: ProviderMigrationLegacyOnly, AgentKiro: ProviderMigrationLegacyOnly, AgentKiroIDE: ProviderMigrationLegacyOnly, - AgentCortex: ProviderMigrationLegacyOnly, + AgentCortex: ProviderMigrationProviderAuthoritative, AgentHermes: ProviderMigrationLegacyOnly, AgentWorkBuddy: ProviderMigrationProviderAuthoritative, AgentForge: ProviderMigrationLegacyOnly, diff --git a/internal/parser/types.go b/internal/parser/types.go index 019bb3a76..91662d489 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -444,10 +444,8 @@ var Registry = []AgentDef{ DefaultDirs: []string{ ".snowflake/cortex/conversations", }, - IDPrefix: "cortex:", - FileBased: true, - DiscoverFunc: DiscoverCortexSessions, - FindSourceFunc: FindCortexSourceFile, + IDPrefix: "cortex:", + FileBased: true, }, { Type: AgentHermes, diff --git a/internal/sync/classify_cortex_test.go b/internal/sync/classify_cortex_test.go index 68e2f64d7..6f3ddc9da 100644 --- a/internal/sync/classify_cortex_test.go +++ b/internal/sync/classify_cortex_test.go @@ -21,11 +21,15 @@ func TestClassifyOnePath_Cortex(t *testing.T) { require.NoError(t, os.WriteFile(jsonlPath, []byte("{}"), 0o644)) eng := &Engine{ + db: openTestDB(t), agentDirs: map[parser.AgentType][]string{ parser.AgentCortex: {dir}, }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentCortex: parser.ProviderMigrationProviderAuthoritative, + }, } - geminiMap := make(map[string]map[string]string) tests := []struct { name string @@ -71,9 +75,14 @@ func TestClassifyOnePath_Cortex(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, ok := eng.classifyOnePath(tt.path, geminiMap) - assert.Equal(t, tt.want, ok) - if ok { + files := eng.classifyPaths([]string{tt.path}) + if !tt.want { + assert.Empty(t, files) + return + } + require.Len(t, files, 1) + got := files[0] + if tt.want { assert.Equal(t, tt.agent, got.Agent) assert.Equal(t, tt.retPath, got.Path) } diff --git a/internal/sync/cortex_integration_test.go b/internal/sync/cortex_integration_test.go new file mode 100644 index 000000000..abbe36ee4 --- /dev/null +++ b/internal/sync/cortex_integration_test.go @@ -0,0 +1,70 @@ +package sync_test + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "go.kenn.io/agentsview/internal/dbtest" + "go.kenn.io/agentsview/internal/parser" + "go.kenn.io/agentsview/internal/sync" +) + +func TestSyncAllSinceCortexHistoryUpdateTriggersResync(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + cortexDir := t.TempDir() + testDB := dbtest.OpenTestDB(t) + engine := sync.NewEngine(testDB, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCortex: {cortexDir}, + }, + Machine: "local", + }) + + uuid := "11111111-2222-3333-4444-555555555555" + metaPath := filepath.Join(cortexDir, uuid+".json") + historyPath := filepath.Join(cortexDir, uuid+".history.jsonl") + require.NoError(t, os.WriteFile(metaPath, []byte(cortexSyncMeta(uuid)), 0o644)) + require.NoError(t, os.WriteFile(historyPath, []byte(cortexSyncHistory("Before cutoff")), 0o644)) + + baseTime := time.Unix(1_781_475_200, 0) + require.NoError(t, os.Chtimes(metaPath, baseTime, baseTime)) + require.NoError(t, os.Chtimes(historyPath, baseTime, baseTime)) + + engine.SyncPaths([]string{metaPath}) + assertMessageContent(t, testDB, "cortex:"+uuid, "Before cutoff", "ack") + + cutoff := baseTime.Add(500 * time.Millisecond) + historyTime := baseTime.Add(time.Second) + require.NoError(t, os.WriteFile(historyPath, []byte(cortexSyncHistory("After cutoff")), 0o644)) + require.NoError(t, os.Chtimes(historyPath, historyTime, historyTime)) + + stats := engine.SyncAllSince(context.Background(), cutoff, nil) + require.Equal(t, 1, stats.Synced, "synced = %d, want 1", stats.Synced) + assertMessageContent(t, testDB, "cortex:"+uuid, "After cutoff", "ack") +} + +func cortexSyncMeta(uuid string) string { + return `{ + "session_id": "` + uuid + `", + "title": "Cortex split history", + "working_directory": "/home/user/cortex-project", + "created_at": "2024-06-01T10:00:00Z", + "last_updated": "2024-06-01T10:05:00Z" +}` +} + +func cortexSyncHistory(prompt string) string { + return strings.Join([]string{ + `{"role":"user","id":"m1","content":[{"type":"text","text":"` + prompt + `"}]}`, + `{"role":"assistant","id":"m2","content":[{"type":"text","text":"ack"}]}`, + }, "\n") + "\n" +} diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 3b10aba02..1d8066b5e 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -1466,43 +1466,6 @@ func (e *Engine) classifyOnePath( } } - // Cortex: /.json - // or: /.history.jsonl → remap to .json - for _, cortexDir := range e.agentDirs[parser.AgentCortex] { - if cortexDir == "" { - continue - } - if rel, ok := isUnder(cortexDir, path); ok { - if strings.Count(rel, sep) != 0 { - continue - } - name := filepath.Base(rel) - - // .history.jsonl companion → remap to .json metadata. - if stem, ok := strings.CutSuffix( - name, ".history.jsonl", - ); ok { - jsonPath := filepath.Join( - cortexDir, stem+".json", - ) - if parser.IsCortexSessionFile(stem + ".json") { - return parser.DiscoveredFile{ - Path: jsonPath, - Agent: parser.AgentCortex, - }, true - } - continue - } - - if parser.IsCortexSessionFile(name) { - return parser.DiscoveredFile{ - Path: path, - Agent: parser.AgentCortex, - }, true - } - } - } - // Antigravity IDE: /conversations/.db (+ -wal, -shm). // annotations/.pbtxt and brain//* sidecar events are // handled in classifyPaths via classifyAntigravitySidecarPath, @@ -2887,7 +2850,7 @@ func (e *Engine) syncAllLocked( if !since.IsZero() { all = e.dedupeClaudeDiscoveredFiles(all) - all = e.filterFilesByMtime(all, since) + all = e.filterFilesByMtime(ctx, all, since) } all = dedupeDiscoveredFiles(all) @@ -3428,13 +3391,15 @@ func (e *Engine) recordSyncFinished() { // dropped). The cost is one stat per file — acceptable for // polling use cases where most files will be skipped. func (e *Engine) filterFilesByMtime( - files []parser.DiscoveredFile, cutoff time.Time, + ctx context.Context, + files []parser.DiscoveredFile, + cutoff time.Time, ) []parser.DiscoveredFile { cutoffNs := cutoff.UnixNano() out := files[:0] codexIndexRefresh := make(map[string][]parser.DiscoveredFile) for _, f := range files { - mtime, err := discoveredFileMtime(f) + mtime, err := e.discoveredFileEffectiveMtime(ctx, f) if err != nil { out = append(out, f) continue @@ -3483,6 +3448,53 @@ func (e *Engine) filterFilesByMtime( return out } +func (e *Engine) discoveredFileEffectiveMtime( + ctx context.Context, + file parser.DiscoveredFile, +) (int64, error) { + if file.ProviderSource != nil && file.ProviderProcess { + if mtime, ok, err := e.providerFingerprintMtime(ctx, file); err != nil { + return 0, err + } else if ok { + return mtime, nil + } + } + return discoveredFileMtime(file) +} + +func (e *Engine) providerFingerprintMtime( + ctx context.Context, + file parser.DiscoveredFile, +) (int64, bool, error) { + if file.ProviderSource == nil { + return 0, false, nil + } + factory, ok := e.providerFactories[file.Agent] + if !ok || factory == nil { + return 0, false, nil + } + source := *file.ProviderSource + if source.Provider != "" && source.Provider != file.Agent { + return 0, false, fmt.Errorf( + "provider source mismatch for %s: %s", + file.Agent, + source.Provider, + ) + } + provider := factory.NewProvider(parser.ProviderConfig{ + Roots: e.agentDirs[file.Agent], + Machine: e.machine, + }) + fingerprint, err := provider.Fingerprint(ctx, source) + if err != nil { + return 0, false, err + } + if fingerprint.MTimeNS == 0 { + return 0, false, nil + } + return fingerprint.MTimeNS, true, nil +} + func discoveredFileMtime( file parser.DiscoveredFile, ) (int64, error) { @@ -4658,8 +4670,6 @@ func (e *Engine) processFile( res = e.processKiro(file, info) case parser.AgentKiroIDE: res = e.processKiroIDE(file, info) - case parser.AgentCortex: - res = e.processCortex(file, info) case parser.AgentHermes: res = e.processHermes(file, info) case parser.AgentVibe: @@ -6764,35 +6774,6 @@ func (e *Engine) processKiroIDE( } } -func (e *Engine) processCortex( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseCortexSession( - file.Path, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, - }, - } -} - func (e *Engine) processHermes( file parser.DiscoveredFile, info os.FileInfo, ) processResult { @@ -8395,6 +8376,7 @@ func shouldReplaceFullParseMessages( pw.sess.Agent == parser.AgentAntigravity || pw.sess.Agent == parser.AgentAntigravityCLI || pw.sess.Agent == parser.AgentQwenPaw || + pw.sess.Agent == parser.AgentCortex || // Vibe pairs later tool-result carrier records back to an // earlier assistant tool call. An incremental append would // only add the new ordinals and leave the existing tool call's From c772e2a15fbcbbcfed9ba53965cf1a890a2cb1a5 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sun, 21 Jun 2026 14:02:54 -0400 Subject: [PATCH 19/24] feat(parser): migrate kimi provider Kimi uses two wire.jsonl layouts whose raw IDs include colon-delimited path components, so it cannot rely entirely on the generic JSONL raw-ID lookup. Moving it behind a concrete provider keeps discovery and source classification on the shared JSONL helper while preserving Kimi-specific layout validation and lookup semantics.\n\nThe provider keeps legacy support for both the .kimi project/session layout and the .kimi-code workdir/session/agents layout, including symlinked directories, invalid component filtering, project hints, deleted-path classification, and parser output normalization. test(parser): cover kimi new-layout provider parse The roborev design review questioned whether the provider-backed Kimi migration proved the newer .kimi-code layout could round-trip through lookup and parsing. The existing parser and lookup code already handled that raw ID shape, but the provider tests only parsed the legacy layout.\n\nThis adds provider-level coverage for the .kimi-code workdir/session/agents layout so the branch itself documents the persisted session ID, project hint, source path, machine, hash propagation, and message output expected from that source shape. test(parser): opt kimi into provider shadow Kimi now has a concrete facade provider on this branch, so its migration mode should enter shadow comparison instead of remaining legacy-only and additive. Lower provider opt-ins stay inherited and later branches own their provider modes. Validation: go test -tags "fts5" ./internal/parser -run TestProviderMigrationModes -count=1; go test -tags "fts5" ./internal/parser -count=1; go vet ./...; git diff --check test(sync): compare kimi shadow parity Kimi is shadow-compared on this branch, so add source-level migration coverage that compares provider observation with ParseKimiSession. The test covers both the legacy project/session wire.jsonl layout and the newer .kimi-code agents layout, keeping the fragile path-derived ID and project behavior visible during review. Validation: go test -tags "fts5" ./internal/parser ./internal/sync -run 'TestObserveProviderSourceMatchesKimiLegacyParser|TestKimiProvider|TestParseKimi|TestSyncPathsAndSingleSession_KimiNewLayout|TestClassifyOnePath_Kimi' -count=1; go test -tags "fts5" ./internal/parser ./internal/sync -count=1; go fmt ./...; go vet ./...; git diff --check; ./custom-gcl run --config .golangci.nilaway.yml ./internal/parser/... ./internal/sync/...; make nilaway refactor(parser): fold kimi into provider Move Kimi parse and raw-ID source lookup onto the concrete provider and remove package-level discover/find/parse entrypoints. Route Kimi sync classification and processing through provider changed-path handling so the branch migrates the provider instead of preserving legacy dispatch. --- internal/parser/kimi.go | 144 +---------------- internal/parser/kimi_provider.go | 141 +++++++++++++++++ internal/parser/kimi_provider_test.go | 214 ++++++++++++++++++++++++++ internal/parser/kimi_test.go | 166 +++++++++++++------- internal/parser/provider.go | 2 + internal/parser/provider_migration.go | 2 +- internal/parser/types.go | 6 +- internal/sync/classify_kimi_test.go | 33 ++-- internal/sync/engine.go | 85 ---------- 9 files changed, 492 insertions(+), 301 deletions(-) create mode 100644 internal/parser/kimi_provider.go create mode 100644 internal/parser/kimi_provider_test.go diff --git a/internal/parser/kimi.go b/internal/parser/kimi.go index 0514fe38a..90ab6a658 100644 --- a/internal/parser/kimi.go +++ b/internal/parser/kimi.go @@ -5,7 +5,6 @@ import ( "fmt" "os" "path/filepath" - "sort" "strings" "time" @@ -52,143 +51,6 @@ import ( // Kimi model so the estimate keeps tracking a current rate. const defaultKimiModel = "moonshot/kimi-k2.6" -// DiscoverKimiSessions finds all wire.jsonl files under the Kimi -// sessions directory. It supports two layouts: -// -// Legacy (".kimi/sessions"): -// -// ///wire.jsonl -// -// New (".kimi-code/sessions"): -// -// /_/session_/agents//wire.jsonl -func DiscoverKimiSessions(sessionsDir string) []DiscoveredFile { - if sessionsDir == "" { - return nil - } - - projDirs, err := os.ReadDir(sessionsDir) - if err != nil { - return nil - } - - var files []DiscoveredFile - for _, projEntry := range projDirs { - if !isDirOrSymlink(projEntry, sessionsDir) { - continue - } - - projDir := filepath.Join(sessionsDir, projEntry.Name()) - sessionDirs, err := os.ReadDir(projDir) - if err != nil { - continue - } - - for _, sessEntry := range sessionDirs { - if !isDirOrSymlink(sessEntry, projDir) { - continue - } - - sessDir := filepath.Join(projDir, sessEntry.Name()) - - // Legacy layout. - wirePath := filepath.Join(sessDir, "wire.jsonl") - if _, err := os.Stat(wirePath); err == nil { - // The project and session names become ':'-delimited - // session-ID components; skip sessions whose names - // cannot round-trip through FindKimiSourceFile. - if kimiIDComponentsValid( - projEntry.Name(), sessEntry.Name(), - ) { - files = append(files, DiscoveredFile{ - Path: wirePath, - Project: DecodeKimiProjectDir(projEntry.Name()), - Agent: AgentKimi, - }) - } - continue - } - - // New .kimi-code layout. - agentsDir := filepath.Join(sessDir, "agents") - agentEntries, err := os.ReadDir(agentsDir) - if err != nil { - continue - } - for _, agentEntry := range agentEntries { - if !isDirOrSymlink(agentEntry, agentsDir) { - continue - } - wirePath = filepath.Join( - agentsDir, agentEntry.Name(), "wire.jsonl", - ) - if _, err := os.Stat(wirePath); err == nil && - kimiIDComponentsValid( - projEntry.Name(), - sessEntry.Name(), - agentEntry.Name(), - ) { - files = append(files, DiscoveredFile{ - Path: wirePath, - Project: DecodeKimiProjectDir(projEntry.Name()), - Agent: AgentKimi, - }) - } - } - } - } - - sort.Slice(files, func(i, j int) bool { - return files[i].Path < files[j].Path - }) - return files -} - -// FindKimiSourceFile locates a Kimi session file by its raw -// session ID (without the "kimi:" prefix). Supported raw ID formats: -// -// Legacy: -// -// : -// → ///wire.jsonl -// -// New (.kimi-code): -// -// _:: -// → /_//agents//wire.jsonl -func FindKimiSourceFile(sessionsDir, rawID string) string { - if sessionsDir == "" { - return "" - } - - parts := strings.Split(rawID, ":") - for _, p := range parts { - if !IsValidSessionID(p) { - return "" - } - } - - switch len(parts) { - case 2: - // Legacy layout. - candidate := filepath.Join( - sessionsDir, parts[0], parts[1], "wire.jsonl", - ) - if _, err := os.Stat(candidate); err == nil { - return candidate - } - case 3: - // New .kimi-code layout. - candidate := filepath.Join( - sessionsDir, parts[0], parts[2], "agents", parts[1], "wire.jsonl", - ) - if _, err := os.Stat(candidate); err == nil { - return candidate - } - } - return "" -} - // kimiSessionIDFromPath extracts the raw Kimi session ID from its // wire.jsonl path. Legacy paths yield ":"; .kimi-code // paths yield "::". @@ -248,7 +110,7 @@ func isKimiHash(s string) bool { } // kimiIDComponentsValid reports whether the given path-derived -// components can form a session ID that FindKimiSourceFile can +// components can form a session ID that provider raw-ID lookup can // round-trip back to the source file. Each component must itself be a // valid session ID (alphanumeric, '-', '_'); a ':' or any other // character outside that set would break the ':'-delimited ID split @@ -263,11 +125,11 @@ func kimiIDComponentsValid(components ...string) bool { return true } -// ParseKimiSession parses a Kimi wire.jsonl file. Legacy Kimi CLI +// parseSession parses a Kimi wire.jsonl file. Legacy Kimi CLI // sessions store nested message.type records (TurnBegin, ContentPart, // ToolCall, ToolResult, StatusUpdate, TurnEnd). Kimi Code sessions store // top-level records (turn.prompt, context.append_loop_event, usage.record). -func ParseKimiSession( +func parseKimiSession( path, project, machine string, ) (*ParsedSession, []ParsedMessage, error) { info, err := os.Stat(path) diff --git a/internal/parser/kimi_provider.go b/internal/parser/kimi_provider.go new file mode 100644 index 000000000..d9007fbe8 --- /dev/null +++ b/internal/parser/kimi_provider.go @@ -0,0 +1,141 @@ +package parser + +import ( + "context" + "path/filepath" + "strings" +) + +// Kimi stores each session as a wire.jsonl transcript under a per-workspace +// directory, with subagent transcripts nested under an "agents" subdirectory. +// It is a directory-of-files provider: discovery, watching, change +// classification, and fingerprinting come from JSONLSourceSet. The ParseFile +// option makes that source set a full SourceSet so it rides the generic +// factory; RawSessionIDSourceFiles reconstructs the wire.jsonl path from a +// colon-joined raw ID, which the standard filename-stem lookup cannot match. +func newKimiProviderFactory(def AgentDef) ProviderFactory { + return newSourceSetFactory( + def, + kimiProviderCapabilities(), + func(cfg ProviderConfig) SourceSet { return newKimiSourceSet(cfg.Roots) }, + ) +} + +func newKimiSourceSet(roots []string) JSONLSourceSet { + return newJSONLSourceSet(AgentKimi, roots, + withRecursive(), + withSymlinkFollowing(), + withIncludePath(isKimiSourcePath), + withProjectHint(kimiProjectHintFromPath), + withSessionIDFromPath(func(root, path string) string { + if !isKimiSourcePath(root, path) { + return "" + } + return kimiSessionIDFromPath(path) + }), + withRawSessionIDSourceFiles(kimiRawSessionIDSourceFiles), + withParseFile(kimiParseFile), + ) +} + +func kimiParseFile( + _ context.Context, path string, req ParseRequest, +) ([]ParseResult, []string, error) { + sess, msgs, err := parseKimiSession(path, req.Source.ProjectHint, req.Machine) + if err != nil { + return nil, nil, err + } + if sess == nil { + return nil, nil, nil + } + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash + } + // Kimi sessions with only session-level token aggregates emit a + // session-level usage event (parseKimiSession sets sess.UsageEvents); + // carry it through so the cost engine can price the session. + return []ParseResult{{ + Session: *sess, + Messages: msgs, + UsageEvents: sess.UsageEvents, + }}, nil, nil +} + +// kimiRawSessionIDSourceFiles reconstructs wire.jsonl candidate paths from a +// colon-joined raw ID. A two-part ID maps to /// +// wire.jsonl; a three-part ID adds the agents/ subagent layout +// ///agents//wire.jsonl. +func kimiRawSessionIDSourceFiles(roots []string, rawID string) []string { + parts := strings.Split(rawID, ":") + if !kimiIDComponentsValid(parts...) { + return nil + } + var candidates []string + for _, root := range roots { + if root == "" { + continue + } + switch len(parts) { + case 2: + candidates = append( + candidates, + filepath.Join(root, parts[0], parts[1], "wire.jsonl"), + ) + case 3: + candidates = append(candidates, filepath.Join( + root, parts[0], parts[2], "agents", parts[1], "wire.jsonl", + )) + } + } + return candidates +} + +func isKimiSourcePath(root, path string) bool { + parts, ok := kimiSourceRelParts(root, path) + if !ok || len(parts) == 0 || parts[len(parts)-1] != "wire.jsonl" { + return false + } + switch len(parts) { + case 3: + return kimiIDComponentsValid(parts[0], parts[1]) + case 5: + return parts[2] == "agents" && + kimiIDComponentsValid(parts[0], parts[1], parts[3]) + default: + return false + } +} + +func kimiProjectHintFromPath(root, path string) string { + parts, ok := kimiSourceRelParts(root, path) + if !ok || len(parts) == 0 { + return "" + } + return DecodeKimiProjectDir(parts[0]) +} + +func kimiSourceRelParts(root, path string) ([]string, bool) { + rel, err := filepath.Rel(filepath.Clean(root), filepath.Clean(path)) + if err != nil { + return nil, false + } + parts := strings.Split(rel, string(filepath.Separator)) + for _, part := range parts { + if part == "" || part == "." || part == ".." { + return nil, false + } + } + return parts, true +} + +func kimiProviderCapabilities() Capabilities { + return Capabilities{ + Source: jsonlFileProviderSourceCapabilities(), + Content: ContentCapabilities{ + FirstMessage: CapabilitySupported, + Thinking: CapabilitySupported, + ToolCalls: CapabilitySupported, + ToolResults: CapabilitySupported, + }, + } +} diff --git a/internal/parser/kimi_provider_test.go b/internal/parser/kimi_provider_test.go new file mode 100644 index 000000000..88b811fa2 --- /dev/null +++ b/internal/parser/kimi_provider_test.go @@ -0,0 +1,214 @@ +package parser + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestKimiProviderFactoryReplacesLegacyAdapter(t *testing.T) { + factory, ok := ProviderFactoryByType(AgentKimi) + require.True(t, ok) + require.NotNil(t, factory) + + provider, ok := NewProvider(AgentKimi, ProviderConfig{ + Roots: []string{t.TempDir()}, + Machine: "devbox", + }) + require.True(t, ok) + require.NotNil(t, provider) +} + +func TestKimiProviderSourceMethods(t *testing.T) { + root := t.TempDir() + legacyPath := filepath.Join(root, "abc123", "uuid-1", "wire.jsonl") + newPath := filepath.Join( + root, + "wd_kimi-code_057f5c09ee3f", + "session_uuid-2", + "agents", + "main", + "wire.jsonl", + ) + invalidPath := filepath.Join( + root, + "wd_kimi-code_057f5c09ee3f", + "session_uuid-3", + "agents", + "sub agent", + "wire.jsonl", + ) + writeSourceFile(t, legacyPath, kimiProviderFixture("legacy question")) + writeSourceFile(t, newPath, kimiProviderFixture("new layout question")) + writeSourceFile(t, invalidPath, kimiProviderFixture("bad agent")) + writeSourceFile(t, filepath.Join(root, "abc123", "uuid-1", "other.jsonl"), "{}\n") + writeSourceFile(t, filepath.Join(root, "wire.jsonl"), "{}\n") + + provider, ok := NewProvider(AgentKimi, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + plan, err := provider.WatchPlan(context.Background()) + require.NoError(t, err) + require.Len(t, plan.Roots, 1) + assert.Equal(t, root, plan.Roots[0].Path) + assert.True(t, plan.Roots[0].Recursive) + assert.Equal(t, []string{"*.jsonl"}, plan.Roots[0].IncludeGlobs) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 2) + assert.Equal(t, AgentKimi, discovered[0].Provider) + assert.Equal(t, legacyPath, discovered[0].DisplayPath) + assert.Equal(t, "abc123", discovered[0].ProjectHint) + assert.Equal(t, newPath, discovered[1].DisplayPath) + assert.Equal(t, "kimi-code", discovered[1].ProjectHint) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~kimi:abc123:uuid-1", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, legacyPath, found.DisplayPath) + + fingerprint, err := provider.Fingerprint(context.Background(), found) + require.NoError(t, err) + assert.Equal(t, legacyPath, fingerprint.Key) + assert.Positive(t, fingerprint.Size) + assert.Positive(t, fingerprint.MTimeNS) + + found, ok, err = provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "wd_kimi-code_057f5c09ee3f:main:session_uuid-2", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, newPath, found.DisplayPath) + + require.NoError(t, os.Remove(legacyPath)) + changed, err := provider.SourcesForChangedPath( + context.Background(), + ChangedPathRequest{Path: legacyPath, EventKind: "remove", WatchRoot: root}, + ) + require.NoError(t, err) + require.Len(t, changed, 1) + assert.Equal(t, legacyPath, changed[0].DisplayPath) +} + +func TestKimiProviderDiscoversSymlinkedProjectDirectory(t *testing.T) { + root := t.TempDir() + targetRoot := t.TempDir() + targetProject := filepath.Join(targetRoot, "abc123") + sourceProject := filepath.Join(root, "abc123") + sourcePath := filepath.Join(sourceProject, "uuid-1", "wire.jsonl") + writeSourceFile( + t, + filepath.Join(targetProject, "uuid-1", "wire.jsonl"), + kimiProviderFixture("from symlink"), + ) + if err := os.Symlink(targetProject, sourceProject); err != nil { + t.Skipf("symlink not supported: %v", err) + } + + provider, ok := NewProvider(AgentKimi, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + discovered, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, discovered, 1) + assert.Equal(t, sourcePath, discovered[0].DisplayPath) + + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~kimi:abc123:uuid-1", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, found.DisplayPath) +} + +func TestKimiProviderParse(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "abc123", "uuid-1", "wire.jsonl") + writeSourceFile(t, sourcePath, kimiProviderFixture("provider question")) + + provider, ok := NewProvider(AgentKimi, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: SourceFingerprint{Key: sourcePath, Hash: "abc123"}, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + assert.Equal(t, DataVersionCurrent, outcome.Results[0].DataVersion) + assert.Equal(t, "kimi:abc123:uuid-1", outcome.Results[0].Result.Session.ID) + assert.Equal(t, "abc123", outcome.Results[0].Result.Session.Project) + assert.Equal(t, "devbox", outcome.Results[0].Result.Session.Machine) + assert.Equal(t, "abc123", outcome.Results[0].Result.Session.File.Hash) + assert.Len(t, outcome.Results[0].Result.Messages, 2) +} + +func TestKimiProviderParseNewLayoutRoundTrip(t *testing.T) { + root := t.TempDir() + rawID := "wd_kimi-code_057f5c09ee3f:main:session_uuid-2" + sourcePath := filepath.Join( + root, + "wd_kimi-code_057f5c09ee3f", + "session_uuid-2", + "agents", + "main", + "wire.jsonl", + ) + writeSourceFile(t, sourcePath, kimiProviderFixture("new layout provider question")) + + provider, ok := NewProvider(AgentKimi, ProviderConfig{ + Roots: []string{root}, + Machine: "devbox", + }) + require.True(t, ok) + + source, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + FullSessionID: "host~kimi:" + rawID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, sourcePath, source.DisplayPath) + assert.Equal(t, "kimi-code", source.ProjectHint) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: source, + Fingerprint: SourceFingerprint{Key: sourcePath, Hash: "abc123"}, + }) + require.NoError(t, err) + require.True(t, outcome.ResultSetComplete) + require.Len(t, outcome.Results, 1) + session := outcome.Results[0].Result.Session + assert.Equal(t, "kimi:"+rawID, session.ID) + assert.Equal(t, "kimi-code", session.Project) + assert.Equal(t, "devbox", session.Machine) + assert.Equal(t, sourcePath, session.File.Path) + assert.Equal(t, "abc123", session.File.Hash) + assert.Len(t, outcome.Results[0].Result.Messages, 2) +} + +func kimiProviderFixture(firstMessage string) string { + return `{"type":"metadata","protocol_version":"1.3"}` + "\n" + + `{"timestamp":1704067200.0,"message":{"type":"TurnBegin","payload":{"user_input":[{"type":"text","text":"` + firstMessage + `"}]}}}` + "\n" + + `{"timestamp":1704067201.0,"message":{"type":"ContentPart","payload":{"type":"text","text":"Done."}}}` + "\n" + + `{"timestamp":1704067202.0,"message":{"type":"TurnEnd","payload":{}}}` + "\n" +} diff --git a/internal/parser/kimi_test.go b/internal/parser/kimi_test.go index fa5288af3..276d99890 100644 --- a/internal/parser/kimi_test.go +++ b/internal/parser/kimi_test.go @@ -1,6 +1,7 @@ package parser import ( + "context" "os" "path/filepath" "strings" @@ -43,6 +44,14 @@ func writeKimiCodeWireJSONL( return path } +func parseKimiSessionForTest( + t *testing.T, + path, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + t.Helper() + return parseKimiSession(path, project, machine) +} + func TestParseKimiSession_Basic(t *testing.T) { path := writeKimiWireJSONL(t, "abc123", "sess-uuid-1234", @@ -54,7 +63,7 @@ func TestParseKimiSession_Basic(t *testing.T) { }, ) - sess, msgs, err := ParseKimiSession( + sess, msgs, err := parseKimiSessionForTest(t, path, "myproject", "local", ) require.NoError(t, err) @@ -94,7 +103,7 @@ func TestParseKimiSession_ThinkingAndToolUse(t *testing.T) { }, ) - sess, msgs, err := ParseKimiSession( + sess, msgs, err := parseKimiSessionForTest(t, path, "testproj", "local", ) require.NoError(t, err) @@ -139,7 +148,7 @@ func TestParseKimiSession_Empty(t *testing.T) { }, ) - sess, msgs, err := ParseKimiSession( + sess, msgs, err := parseKimiSessionForTest(t, path, "testproj", "local", ) require.NoError(t, err) @@ -159,7 +168,7 @@ func TestParseKimiSession_ErrorToolResult(t *testing.T) { }, ) - sess, msgs, err := ParseKimiSession( + sess, msgs, err := parseKimiSessionForTest(t, path, "testproj", "local", ) require.NoError(t, err) @@ -184,7 +193,7 @@ func TestParseKimiSession_ArrayToolResult(t *testing.T) { }, ) - sess, msgs, err := ParseKimiSession( + sess, msgs, err := parseKimiSessionForTest(t, path, "testproj", "local", ) require.NoError(t, err) @@ -216,7 +225,7 @@ func TestParseKimiSession_MultipleStatusUpdates(t *testing.T) { }, ) - sess, _, err := ParseKimiSession( + sess, _, err := parseKimiSessionForTest(t, path, "testproj", "local", ) require.NoError(t, err) @@ -239,7 +248,7 @@ func TestParseKimiSession_StatusUpdate(t *testing.T) { }, ) - sess, _, err := ParseKimiSession( + sess, _, err := parseKimiSessionForTest(t, path, "testproj", "local", ) require.NoError(t, err) @@ -330,7 +339,7 @@ func TestParseKimiSession_ZeroValuedStatusUpdatePreservesCoverage(t *testing.T) }, ) - sess, _, err := ParseKimiSession( + sess, _, err := parseKimiSessionForTest(t, path, "testproj", "local", ) require.NoError(t, err) @@ -352,7 +361,7 @@ func TestParseKimiSession_NoProject(t *testing.T) { }, ) - sess, _, err := ParseKimiSession(path, "", "local") + sess, _, err := parseKimiSessionForTest(t, path, "", "local") require.NoError(t, err) require.NotNil(t, sess) assert.Equal(t, "kimi", sess.Project) @@ -372,7 +381,7 @@ func TestParseKimiSession_MessageTimestamps(t *testing.T) { }, ) - _, msgs, err := ParseKimiSession( + _, msgs, err := parseKimiSessionForTest(t, path, "testproj", "local", ) require.NoError(t, err) @@ -413,7 +422,7 @@ func TestParseKimiSession_EmptyFragmentTimestamp(t *testing.T) { }, ) - _, msgs, err := ParseKimiSession( + _, msgs, err := parseKimiSessionForTest(t, path, "testproj", "local", ) require.NoError(t, err) @@ -437,7 +446,7 @@ func TestParseKimiSession_EmptyFragmentTimestamp(t *testing.T) { }, ) - _, msgs, err := ParseKimiSession( + _, msgs, err := parseKimiSessionForTest(t, path, "testproj", "local", ) require.NoError(t, err) @@ -448,7 +457,7 @@ func TestParseKimiSession_EmptyFragmentTimestamp(t *testing.T) { } func TestParseKimiSession_MissingFile(t *testing.T) { - _, _, err := ParseKimiSession( + _, _, err := parseKimiSessionForTest(t, "/nonexistent/wire.jsonl", "proj", "local", ) assert.Error(t, err) @@ -466,7 +475,7 @@ func TestParseKimiSession_FirstMessageTruncation(t *testing.T) { }, ) - sess, _, err := ParseKimiSession( + sess, _, err := parseKimiSessionForTest(t, path, "testproj", "local", ) require.NoError(t, err) @@ -492,18 +501,26 @@ func TestDiscoverKimiSessions(t *testing.T) { []byte(`{"type":"metadata"}`+"\n"), 0o644, )) - files := DiscoverKimiSessions(dir) - require.Equal(t, 2, len(files)) - assert.Equal(t, AgentKimi, files[0].Agent) - assert.Equal(t, "abc123", files[0].Project) + provider, ok := NewProvider(AgentKimi, ProviderConfig{Roots: []string{dir}}) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 2) + assert.Equal(t, []string{ + filepath.Join(sessDir, "wire.jsonl"), + filepath.Join(sessDir2, "wire.jsonl"), + }, sourceDisplayPaths(sources)) + assert.Equal(t, []string{"abc123", "abc123"}, sourceProjects(sources)) } func TestDiscoverKimiSessions_Empty(t *testing.T) { - files := DiscoverKimiSessions("") - assert.Nil(t, files) - - files = DiscoverKimiSessions("/nonexistent") - assert.Nil(t, files) + provider, ok := NewProvider(AgentKimi, ProviderConfig{ + Roots: []string{"", "/nonexistent"}, + }) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + assert.Empty(t, sources) } func TestFindKimiSourceFile(t *testing.T) { @@ -517,15 +534,29 @@ func TestFindKimiSourceFile(t *testing.T) { wirePath, []byte("{}"), 0o644, )) - found := FindKimiSourceFile(dir, "abc123:uuid-1") - assert.Equal(t, wirePath, found) + provider, ok := NewProvider(AgentKimi, ProviderConfig{Roots: []string{dir}}) + require.True(t, ok) + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "abc123:uuid-1", + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, wirePath, found.DisplayPath) - assert.Equal(t, "", - FindKimiSourceFile(dir, "abc123:nonexistent")) - assert.Equal(t, "", - FindKimiSourceFile(dir, "invalid")) - assert.Equal(t, "", - FindKimiSourceFile("", "abc123:uuid-1")) + for _, rawID := range []string{"abc123:nonexistent", "invalid"} { + _, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: rawID, + }) + require.NoError(t, err) + assert.False(t, ok) + } + emptyProvider, ok := NewProvider(AgentKimi, ProviderConfig{}) + require.True(t, ok) + _, ok, err = emptyProvider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: "abc123:uuid-1", + }) + require.NoError(t, err) + assert.False(t, ok) } func TestDiscoverKimiSessions_NewLayout(t *testing.T) { @@ -540,12 +571,14 @@ func TestDiscoverKimiSessions_NewLayout(t *testing.T) { wirePath, []byte(`{"type":"metadata"}`+"\n"), 0o644, )) - files := DiscoverKimiSessions(dir) - require.Equal(t, 1, len(files)) - assert.Equal(t, AgentKimi, files[0].Agent) - assert.Equal(t, wirePath, files[0].Path) + provider, ok := NewProvider(AgentKimi, ProviderConfig{Roots: []string{dir}}) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + assert.Equal(t, wirePath, sources[0].DisplayPath) // Project is decoded from "wd__". - assert.Equal(t, "claude-code", files[0].Project) + assert.Equal(t, "claude-code", sources[0].ProjectHint) } func TestDiscoverKimiSessions_NewLayout_NonMainAgent(t *testing.T) { @@ -560,9 +593,12 @@ func TestDiscoverKimiSessions_NewLayout_NonMainAgent(t *testing.T) { wirePath, []byte(`{"type":"metadata"}`+"\n"), 0o644, )) - files := DiscoverKimiSessions(dir) - require.Equal(t, 1, len(files)) - assert.Equal(t, wirePath, files[0].Path) + provider, ok := NewProvider(AgentKimi, ProviderConfig{Roots: []string{dir}}) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + assert.Equal(t, wirePath, sources[0].DisplayPath) } func TestFindKimiSourceFile_NewLayout(t *testing.T) { @@ -577,14 +613,26 @@ func TestFindKimiSourceFile_NewLayout(t *testing.T) { wirePath, []byte("{}"), 0o644, )) + provider, ok := NewProvider(AgentKimi, ProviderConfig{Roots: []string{dir}}) + require.True(t, ok) rawID := workdirDir + ":main:" + sessionDir - found := FindKimiSourceFile(dir, rawID) - assert.Equal(t, wirePath, found) - - assert.Equal(t, "", - FindKimiSourceFile(dir, workdirDir+":main:nonexistent")) - assert.Equal(t, "", - FindKimiSourceFile(dir, workdirDir+":"+sessionDir)) + found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: rawID, + }) + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, wirePath, found.DisplayPath) + + for _, rawID := range []string{ + workdirDir + ":main:nonexistent", + workdirDir + ":" + sessionDir, + } { + _, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ + RawSessionID: rawID, + }) + require.NoError(t, err) + assert.False(t, ok) + } } func TestParseKimiSession_NewLayoutSessionID(t *testing.T) { @@ -598,7 +646,7 @@ func TestParseKimiSession_NewLayoutSessionID(t *testing.T) { }, ) - sess, msgs, err := ParseKimiSession(path, "myproject", "local") + sess, msgs, err := parseKimiSessionForTest(t, path, "myproject", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -629,7 +677,7 @@ func TestParseKimiSession_NativeKimiCodeEvents(t *testing.T) { }, ) - sess, msgs, err := ParseKimiSession(path, "myproject", "local") + sess, msgs, err := parseKimiSessionForTest(t, path, "myproject", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -687,7 +735,7 @@ func TestParseKimiSession_NativeKimiCodeToolCall(t *testing.T) { }, ) - sess, msgs, err := ParseKimiSession(path, "myproject", "local") + sess, msgs, err := parseKimiSessionForTest(t, path, "myproject", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -729,7 +777,7 @@ func TestParseKimiSession_NewLayout_AgentZero(t *testing.T) { }, ) - sess, _, err := ParseKimiSession(path, "myproject", "local") + sess, _, err := parseKimiSessionForTest(t, path, "myproject", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -760,9 +808,12 @@ func TestDiscoverKimiSessions_MixedLayouts(t *testing.T) { newPath, []byte(`{"type":"metadata"}`+"\n"), 0o644, )) - files := DiscoverKimiSessions(dir) - require.Equal(t, 2, len(files)) - paths := []string{files[0].Path, files[1].Path} + provider, ok := NewProvider(AgentKimi, ProviderConfig{Roots: []string{dir}}) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 2) + paths := sourceDisplayPaths(sources) assert.Contains(t, paths, legacyPath) assert.Contains(t, paths, newPath) } @@ -837,7 +888,10 @@ func TestDiscoverKimiSessions_NewLayout_RejectsInvalidComponent(t *testing.T) { goodPath, []byte(`{"type":"metadata"}`+"\n"), 0o644, )) - files := DiscoverKimiSessions(dir) - require.Len(t, files, 1) - assert.Equal(t, goodPath, files[0].Path) + provider, ok := NewProvider(AgentKimi, ProviderConfig{Roots: []string{dir}}) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + assert.Equal(t, goodPath, sources[0].DisplayPath) } diff --git a/internal/parser/provider.go b/internal/parser/provider.go index dead326ec..c62f626f0 100644 --- a/internal/parser/provider.go +++ b/internal/parser/provider.go @@ -359,6 +359,8 @@ func providerFactoryForDef(def AgentDef) ProviderFactory { return newIflowProviderFactory(def) case AgentGptme: return newGptmeProviderFactory(def) + case AgentKimi: + return newKimiProviderFactory(def) case AgentOMP, AgentPi: return newPiProviderFactory(def) case AgentWorkBuddy: diff --git a/internal/parser/provider_migration.go b/internal/parser/provider_migration.go index 7678e33bb..40bc701d5 100644 --- a/internal/parser/provider_migration.go +++ b/internal/parser/provider_migration.go @@ -38,7 +38,7 @@ var providerMigrationModes = map[AgentType]ProviderMigrationMode{ AgentDeepSeekTUI: ProviderMigrationProviderAuthoritative, AgentOpenClaw: ProviderMigrationLegacyOnly, AgentQClaw: ProviderMigrationLegacyOnly, - AgentKimi: ProviderMigrationLegacyOnly, + AgentKimi: ProviderMigrationProviderAuthoritative, AgentClaudeAI: ProviderMigrationLegacyOnly, AgentChatGPT: ProviderMigrationLegacyOnly, AgentKiro: ProviderMigrationLegacyOnly, diff --git a/internal/parser/types.go b/internal/parser/types.go index 91662d489..a127300fc 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -394,10 +394,8 @@ var Registry = []AgentDef{ ".kimi/sessions", ".kimi-code/sessions", }, - IDPrefix: "kimi:", - FileBased: true, - DiscoverFunc: DiscoverKimiSessions, - FindSourceFunc: FindKimiSourceFile, + IDPrefix: "kimi:", + FileBased: true, }, { Type: AgentClaudeAI, diff --git a/internal/sync/classify_kimi_test.go b/internal/sync/classify_kimi_test.go index 4d01ad180..cfbd6533b 100644 --- a/internal/sync/classify_kimi_test.go +++ b/internal/sync/classify_kimi_test.go @@ -10,13 +10,11 @@ import ( "go.kenn.io/agentsview/internal/parser" ) -// TestClassifyOnePath_Kimi covers the file-watcher classification -// gate for both Kimi session layouts. The new .kimi-code layout has -// a 5-segment relative path and must be classified (and its project -// decoded) just like the legacy 3-segment layout. classifyOnePath is -// the sole consumer of SyncPaths / watcher events, so this is what -// guards live updates for new-layout files. -func TestClassifyOnePath_Kimi(t *testing.T) { +// TestEngineClassifyKimiPaths covers provider changed-path classification for +// both Kimi session layouts. The new .kimi-code layout has a 5-segment relative +// path and must be classified with its decoded project just like the legacy +// 3-segment layout. +func TestEngineClassifyKimiPaths(t *testing.T) { dir := t.TempDir() // Legacy: ///wire.jsonl @@ -51,11 +49,15 @@ func TestClassifyOnePath_Kimi(t *testing.T) { } eng := &Engine{ + db: openTestDB(t), agentDirs: map[parser.AgentType][]string{ parser.AgentKimi: {dir}, }, + providerFactories: providerFactoryMap(parser.ProviderFactories()), + providerMigrationModes: map[parser.AgentType]parser.ProviderMigrationMode{ + parser.AgentKimi: parser.ProviderMigrationProviderAuthoritative, + }, } - geminiMap := make(map[string]map[string]string) tests := []struct { name string @@ -95,13 +97,16 @@ func TestClassifyOnePath_Kimi(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, ok := eng.classifyOnePath(tt.path, geminiMap) - assert.Equal(t, tt.want, ok) - if ok { - assert.Equal(t, parser.AgentKimi, got.Agent) - assert.Equal(t, tt.project, got.Project) - assert.Equal(t, tt.path, got.Path) + files := eng.classifyPaths([]string{tt.path}) + if !tt.want { + assert.Empty(t, files) + return } + require.Len(t, files, 1) + got := files[0] + assert.Equal(t, parser.AgentKimi, got.Agent) + assert.Equal(t, tt.project, got.Project) + assert.Equal(t, tt.path, got.Path) }) } } diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 1d8066b5e..2849d36f8 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -1216,40 +1216,6 @@ func (e *Engine) classifyOnePath( } } - // Kimi: ///wire.jsonl (legacy) - // or ///agents//wire.jsonl (.kimi-code) - // Components that cannot round-trip through the ':'-delimited - // session ID (per IsValidSessionID) are left unclassified so they - // are never imported in a non-resyncable state. - for _, kimiDir := range e.agentDirs[parser.AgentKimi] { - if kimiDir == "" { - continue - } - if rel, ok := isUnder(kimiDir, path); ok { - parts := strings.Split(rel, sep) - switch { - case len(parts) == 3 && parts[2] == "wire.jsonl" && - parser.IsValidSessionID(parts[0]) && - parser.IsValidSessionID(parts[1]): - return parser.DiscoveredFile{ - Path: path, - Project: parser.DecodeKimiProjectDir(parts[0]), - Agent: parser.AgentKimi, - }, true - case len(parts) == 5 && parts[2] == "agents" && - parts[4] == "wire.jsonl" && - parser.IsValidSessionID(parts[0]) && - parser.IsValidSessionID(parts[1]) && - parser.IsValidSessionID(parts[3]): - return parser.DiscoveredFile{ - Path: path, - Project: parser.DecodeKimiProjectDir(parts[0]), - Agent: parser.AgentKimi, - }, true - } - } - } - // QwenPaw: //sessions/.json // or //sessions//.json for _, qwenpawDir := range e.agentDirs[parser.AgentQwenPaw] { @@ -4664,8 +4630,6 @@ func (e *Engine) processFile( res = e.processOpenClaw(file, info) case parser.AgentQClaw: res = e.processQClaw(file, info) - case parser.AgentKimi: - res = e.processKimi(file, info) case parser.AgentKiro: res = e.processKiro(file, info) case parser.AgentKiroIDE: @@ -6424,35 +6388,6 @@ func (e *Engine) processVisualStudioCopilot( } } -func (e *Engine) processKimi( - file parser.DiscoveredFile, info os.FileInfo, -) processResult { - if e.shouldSkipByPath(file.Path, info) { - return processResult{skip: true} - } - - sess, msgs, err := parser.ParseKimiSession( - file.Path, file.Project, e.machine, - ) - if err != nil { - return processResult{err: err} - } - if sess == nil { - return processResult{} - } - - hash, err := ComputeFileHash(file.Path) - if err == nil { - sess.File.Hash = hash - } - - return processResult{ - results: []parser.ParseResult{ - {Session: *sess, Messages: msgs, UsageEvents: sess.UsageEvents}, - }, - } -} - func (e *Engine) processQwenPaw( file parser.DiscoveredFile, info os.FileInfo, ) processResult { @@ -9711,26 +9646,6 @@ func (e *Engine) SyncSingleSessionContext( } else { file.Project = filepath.Base(filepath.Dir(path)) } - case parser.AgentKimi: - // path is ///wire.jsonl (legacy) - // or ///agents//wire.jsonl (.kimi-code) - // In both layouts the project is the first path segment relative - // to the sessions dir. Deriving two levels up (the old approach) - // mis-resolves to "agents" under the .kimi-code layout. - for _, kimiDir := range e.agentDirs[parser.AgentKimi] { - rel, ok := isUnder(kimiDir, path) - if !ok { - continue - } - parts := strings.Split(rel, string(filepath.Separator)) - if len(parts) > 0 { - file.Project = parser.DecodeKimiProjectDir(parts[0]) - } - break - } - if file.Project == "" { - file.Project = "kimi" - } case parser.AgentQwenPaw: // path is //sessions/.json or // //sessions//.json From 524136b60bee729118de18efebdc7dcbd77532ec Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Thu, 25 Jun 2026 19:05:06 -0400 Subject: [PATCH 20/24] refactor(parser): adopt exported source-set API in cli-jsonl providers Update the cli-jsonl provider call sites to the exported source-set framework API (WithRecursive, NewJSONLSourceSet, etc.) renamed on the source-set-framework branch. --- internal/parser/amp_provider.go | 16 ++++++++-------- internal/parser/commandcode_provider.go | 12 ++++++------ internal/parser/cortex_provider.go | 14 +++++++------- internal/parser/deepseek_tui_provider.go | 16 ++++++++-------- internal/parser/gptme_provider.go | 14 +++++++------- internal/parser/iflow_provider.go | 18 +++++++++--------- internal/parser/kimi_provider.go | 18 +++++++++--------- internal/parser/pi_provider.go | 12 ++++++------ internal/parser/workbuddy_provider.go | 20 ++++++++++---------- internal/parser/zencoder_provider.go | 14 +++++++------- 10 files changed, 77 insertions(+), 77 deletions(-) diff --git a/internal/parser/amp_provider.go b/internal/parser/amp_provider.go index e23ed1ab2..89e1fc331 100644 --- a/internal/parser/amp_provider.go +++ b/internal/parser/amp_provider.go @@ -10,7 +10,7 @@ import ( // lookup, and fingerprinting come from JSONLSourceSet, and the ParseFile option // makes that source set a full SourceSet so it rides the generic factory. func newAmpProviderFactory(def AgentDef) ProviderFactory { - return newSourceSetFactory( + return NewSourceSetFactory( def, ampProviderCapabilities(), func(cfg ProviderConfig) SourceSet { return newAmpSourceSet(cfg.Roots) }, @@ -18,15 +18,15 @@ func newAmpProviderFactory(def AgentDef) ProviderFactory { } func newAmpSourceSet(roots []string) JSONLSourceSet { - return newJSONLSourceSet(AgentAmp, roots, - withExtensions(".json"), - withFollowSymlinkFiles(), - withContentHashing(), - withIncludePath(isAmpSourcePath), - withSessionIDFromPath(func(root, path string) string { + return NewJSONLSourceSet(AgentAmp, roots, + WithExtensions(".json"), + WithFollowSymlinkFiles(), + WithContentHashing(), + WithIncludePath(isAmpSourcePath), + WithSessionIDFromPath(func(root, path string) string { return ampThreadIDFromPath(path) }), - withParseFile(ampParseFile), + WithParseFile(ampParseFile), ) } diff --git a/internal/parser/commandcode_provider.go b/internal/parser/commandcode_provider.go index 2125a5ed7..ad94f816b 100644 --- a/internal/parser/commandcode_provider.go +++ b/internal/parser/commandcode_provider.go @@ -82,7 +82,7 @@ func (p *commandCodeProvider) FindSource( ctx context.Context, req FindSourceRequest, ) (SourceRef, bool, error) { - return p.sources.FindSource(ctx, providerFindRequestWithRawSessionID(p.Def, req)) + return p.sources.FindSource(ctx, ProviderFindRequestWithRawSessionID(p.Def, req)) } func (p *commandCodeProvider) Fingerprint( @@ -184,11 +184,11 @@ func (p *commandCodeProvider) Parse( } func newCommandCodeSourceSet(roots []string) DirectoryJSONLSourceSet { - return newDirectoryJSONLSourceSet(AgentCommandCode, roots, - withSymlinkFollowing(), - withIncludePath(isCommandCodeSourcePath), - withProjectHint(func(root, path string) string { return "" }), - withSessionIDFromPath(commandCodeSessionIDFromPath), + return NewDirectoryJSONLSourceSet(AgentCommandCode, roots, + WithSymlinkFollowing(), + WithIncludePath(isCommandCodeSourcePath), + WithProjectHint(func(root, path string) string { return "" }), + WithSessionIDFromPath(commandCodeSessionIDFromPath), ) } diff --git a/internal/parser/cortex_provider.go b/internal/parser/cortex_provider.go index d201f2761..7ce7bfb75 100644 --- a/internal/parser/cortex_provider.go +++ b/internal/parser/cortex_provider.go @@ -82,7 +82,7 @@ func (p *cortexProvider) FindSource( ctx context.Context, req FindSourceRequest, ) (SourceRef, bool, error) { - return p.sources.FindSource(ctx, providerFindRequestWithRawSessionID(p.Def, req)) + return p.sources.FindSource(ctx, ProviderFindRequestWithRawSessionID(p.Def, req)) } func (p *cortexProvider) Fingerprint( @@ -178,12 +178,12 @@ func (p *cortexProvider) Parse( } func newCortexSourceSet(roots []string) JSONLSourceSet { - return newJSONLSourceSet(AgentCortex, roots, - withExtensions(".json"), - withFollowSymlinkFiles(), - withIncludePath(isCortexSourcePath), - withSessionIDFromPath(cortexSessionIDFromPath), - withProjectHint(func(root, path string) string { return "" }), + return NewJSONLSourceSet(AgentCortex, roots, + WithExtensions(".json"), + WithFollowSymlinkFiles(), + WithIncludePath(isCortexSourcePath), + WithSessionIDFromPath(cortexSessionIDFromPath), + WithProjectHint(func(root, path string) string { return "" }), ) } diff --git a/internal/parser/deepseek_tui_provider.go b/internal/parser/deepseek_tui_provider.go index 6246c5e09..ae9ec7ad2 100644 --- a/internal/parser/deepseek_tui_provider.go +++ b/internal/parser/deepseek_tui_provider.go @@ -10,7 +10,7 @@ import ( // lookup, and fingerprinting come from JSONLSourceSet, and the ParseFile option // makes that source set a full SourceSet so it rides the generic factory. func newDeepSeekTUIProviderFactory(def AgentDef) ProviderFactory { - return newSourceSetFactory( + return NewSourceSetFactory( def, deepSeekTUIProviderCapabilities(), func(cfg ProviderConfig) SourceSet { return newDeepSeekTUISourceSet(cfg.Roots) }, @@ -18,15 +18,15 @@ func newDeepSeekTUIProviderFactory(def AgentDef) ProviderFactory { } func newDeepSeekTUISourceSet(roots []string) JSONLSourceSet { - return newJSONLSourceSet(AgentDeepSeekTUI, roots, - withExtensions(".json"), - withFollowSymlinkFiles(), - withContentHashing(), - withIncludePath(isDeepSeekTUISourcePath), - withSessionIDFromPath(func(root, path string) string { + return NewJSONLSourceSet(AgentDeepSeekTUI, roots, + WithExtensions(".json"), + WithFollowSymlinkFiles(), + WithContentHashing(), + WithIncludePath(isDeepSeekTUISourcePath), + WithSessionIDFromPath(func(root, path string) string { return deepSeekTUISessionIDFromPath(path) }), - withParseFile(deepSeekTUIParseFile), + WithParseFile(deepSeekTUIParseFile), ) } diff --git a/internal/parser/gptme_provider.go b/internal/parser/gptme_provider.go index a5711ca51..5a8e23727 100644 --- a/internal/parser/gptme_provider.go +++ b/internal/parser/gptme_provider.go @@ -245,21 +245,21 @@ func (p *gptmeProvider) isSource(source SourceRef) bool { } func newGptmeSourceSet(roots []string) JSONLSourceSet { - return newJSONLSourceSet(AgentGptme, roots, - withRecursive(), - withContentHashing(), - withSymlinkFollowing(), - withInclude(func(path string, info os.FileInfo) bool { + return NewJSONLSourceSet(AgentGptme, roots, + WithRecursive(), + WithContentHashing(), + WithSymlinkFollowing(), + WithInclude(func(path string, info os.FileInfo) bool { return !info.IsDir() && filepath.Base(path) == "conversation.jsonl" }), - withProjectHint(func(root, path string) string { + WithProjectHint(func(root, path string) string { sessionID := gptmeSessionIDFromPath(root, path) if sessionID == "" { return "" } return gptmeProjectFromSessionName(sessionID) }), - withSessionIDFromPath(gptmeSessionIDFromPath), + WithSessionIDFromPath(gptmeSessionIDFromPath), ) } diff --git a/internal/parser/iflow_provider.go b/internal/parser/iflow_provider.go index 7a28b7ef7..781cf5aba 100644 --- a/internal/parser/iflow_provider.go +++ b/internal/parser/iflow_provider.go @@ -13,7 +13,7 @@ import ( // SourceSet so it rides the generic factory; RawSessionIDForLookup strips the // subagent suffix from stored IDs so FindSource still matches the base file. func newIflowProviderFactory(def AgentDef) ProviderFactory { - return newSourceSetFactory( + return NewSourceSetFactory( def, iflowProviderCapabilities(), func(cfg ProviderConfig) SourceSet { return newIflowSourceSet(cfg.Roots) }, @@ -21,13 +21,13 @@ func newIflowProviderFactory(def AgentDef) ProviderFactory { } func newIflowSourceSet(roots []string) DirectoryJSONLSourceSet { - return newDirectoryJSONLSourceSet(AgentIflow, roots, - withContentHashing(), - withSymlinkFollowing(), - withIncludePath(isIflowSourcePath), - withSessionIDFromPath(iflowSessionIDFromPath), - withRawSessionIDForLookup(extractIflowBaseSessionID), - withParseFile(iflowParseFile), + return NewDirectoryJSONLSourceSet(AgentIflow, roots, + WithContentHashing(), + WithSymlinkFollowing(), + WithIncludePath(isIflowSourcePath), + WithSessionIDFromPath(iflowSessionIDFromPath), + WithRawSessionIDForLookup(extractIflowBaseSessionID), + WithParseFile(iflowParseFile), ) } @@ -63,7 +63,7 @@ func iflowResolveProject( ) string { dirName := firstNonEmptyJSONLString( source.ProjectHint, - directoryJSONLProjectFromPath(path), + DirectoryJSONLProjectFromPath(path), ) project := GetProjectName(dirName) diff --git a/internal/parser/kimi_provider.go b/internal/parser/kimi_provider.go index d9007fbe8..c457f9dc0 100644 --- a/internal/parser/kimi_provider.go +++ b/internal/parser/kimi_provider.go @@ -14,7 +14,7 @@ import ( // factory; RawSessionIDSourceFiles reconstructs the wire.jsonl path from a // colon-joined raw ID, which the standard filename-stem lookup cannot match. func newKimiProviderFactory(def AgentDef) ProviderFactory { - return newSourceSetFactory( + return NewSourceSetFactory( def, kimiProviderCapabilities(), func(cfg ProviderConfig) SourceSet { return newKimiSourceSet(cfg.Roots) }, @@ -22,19 +22,19 @@ func newKimiProviderFactory(def AgentDef) ProviderFactory { } func newKimiSourceSet(roots []string) JSONLSourceSet { - return newJSONLSourceSet(AgentKimi, roots, - withRecursive(), - withSymlinkFollowing(), - withIncludePath(isKimiSourcePath), - withProjectHint(kimiProjectHintFromPath), - withSessionIDFromPath(func(root, path string) string { + return NewJSONLSourceSet(AgentKimi, roots, + WithRecursive(), + WithSymlinkFollowing(), + WithIncludePath(isKimiSourcePath), + WithProjectHint(kimiProjectHintFromPath), + WithSessionIDFromPath(func(root, path string) string { if !isKimiSourcePath(root, path) { return "" } return kimiSessionIDFromPath(path) }), - withRawSessionIDSourceFiles(kimiRawSessionIDSourceFiles), - withParseFile(kimiParseFile), + WithRawSessionIDSourceFiles(kimiRawSessionIDSourceFiles), + WithParseFile(kimiParseFile), ) } diff --git a/internal/parser/pi_provider.go b/internal/parser/pi_provider.go index 501b6cd74..519d300a1 100644 --- a/internal/parser/pi_provider.go +++ b/internal/parser/pi_provider.go @@ -76,7 +76,7 @@ func (p *piProvider) FindSource( if err := ctx.Err(); err != nil { return SourceRef{}, false, err } - req = providerFindRequestWithRawSessionID(p.Def, req) + req = ProviderFindRequestWithRawSessionID(p.Def, req) for _, path := range []string{ req.StoredFilePath, req.FingerprintKey, @@ -191,11 +191,11 @@ func (p *piProvider) filterDiscoveredSources(sources []SourceRef) []SourceRef { } func newPiSourceSet(agent AgentType, roots []string) DirectoryJSONLSourceSet { - return newDirectoryJSONLSourceSet(agent, roots, - withSymlinkFollowing(), - withIncludePath(isPiSourcePath), - withProjectHint(func(root, path string) string { return "" }), - withSessionIDFromPath(piSessionIDFromPath), + return NewDirectoryJSONLSourceSet(agent, roots, + WithSymlinkFollowing(), + WithIncludePath(isPiSourcePath), + WithProjectHint(func(root, path string) string { return "" }), + WithSessionIDFromPath(piSessionIDFromPath), ) } diff --git a/internal/parser/workbuddy_provider.go b/internal/parser/workbuddy_provider.go index d88ca5593..618d8e724 100644 --- a/internal/parser/workbuddy_provider.go +++ b/internal/parser/workbuddy_provider.go @@ -12,7 +12,7 @@ import ( // lookup, and fingerprinting come from JSONLSourceSet, and the ParseFile option // makes that source set a full SourceSet so it rides the generic factory. func newWorkBuddyProviderFactory(def AgentDef) ProviderFactory { - return newSourceSetFactory( + return NewSourceSetFactory( def, workBuddyProviderCapabilities(), func(cfg ProviderConfig) SourceSet { return newWorkBuddySourceSet(cfg.Roots) }, @@ -20,15 +20,15 @@ func newWorkBuddyProviderFactory(def AgentDef) ProviderFactory { } func newWorkBuddySourceSet(roots []string) JSONLSourceSet { - return newJSONLSourceSet(AgentWorkBuddy, roots, - withRecursive(), - withSymlinkFollowing(), - withContentHashing(), - withIncludePath(isWorkBuddySourcePath), - withProjectHint(workBuddyProjectHintFromPath), - withSessionIDFromPath(workBuddySessionIDFromPath), - withLookupIDValid(isWorkBuddyLookupID), - withParseFile(workBuddyParseFile), + return NewJSONLSourceSet(AgentWorkBuddy, roots, + WithRecursive(), + WithSymlinkFollowing(), + WithContentHashing(), + WithIncludePath(isWorkBuddySourcePath), + WithProjectHint(workBuddyProjectHintFromPath), + WithSessionIDFromPath(workBuddySessionIDFromPath), + WithLookupIDValid(isWorkBuddyLookupID), + WithParseFile(workBuddyParseFile), ) } diff --git a/internal/parser/zencoder_provider.go b/internal/parser/zencoder_provider.go index 35888d350..add93300e 100644 --- a/internal/parser/zencoder_provider.go +++ b/internal/parser/zencoder_provider.go @@ -11,7 +11,7 @@ import ( // lookup, and fingerprinting come from JSONLSourceSet, and the ParseFile option // makes that source set a full SourceSet so it rides the generic factory. func newZencoderProviderFactory(def AgentDef) ProviderFactory { - return newSourceSetFactory( + return NewSourceSetFactory( def, zencoderProviderCapabilities(), func(cfg ProviderConfig) SourceSet { return newZencoderSourceSet(cfg.Roots) }, @@ -19,12 +19,12 @@ func newZencoderProviderFactory(def AgentDef) ProviderFactory { } func newZencoderSourceSet(roots []string) JSONLSourceSet { - return newJSONLSourceSet(AgentZencoder, roots, - withFollowSymlinkFiles(), - withContentHashing(), - withIncludePath(isZencoderSourcePath), - withSessionIDFromPath(zencoderSessionIDFromPath), - withParseFile(zencoderParseFile), + return NewJSONLSourceSet(AgentZencoder, roots, + WithFollowSymlinkFiles(), + WithContentHashing(), + WithIncludePath(isZencoderSourcePath), + WithSessionIDFromPath(zencoderSessionIDFromPath), + WithParseFile(zencoderParseFile), ) } From a8a54751f1449a0d90226e7998ca5854724e3dfc Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Fri, 26 Jun 2026 15:22:05 -0400 Subject: [PATCH 21/24] test(sync): pass context to filterFilesByMtime in S3 tests This branch adds a context.Context parameter to filterFilesByMtime but left the S3 source tests calling the old two-argument form, so internal/sync failed to compile on this branch and every branch above it until a later commit happened to fix the calls. Update the seven call sites here, where the signature changed, so the branch builds on its own. --- internal/sync/s3_source_test.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/internal/sync/s3_source_test.go b/internal/sync/s3_source_test.go index ff5412805..ae73ec7fd 100644 --- a/internal/sync/s3_source_test.go +++ b/internal/sync/s3_source_test.go @@ -244,7 +244,7 @@ func TestFilterFilesByMtimeKeepsS3ChangedFingerprint(t *testing.T) { })) e := &Engine{db: database} - got := e.filterFilesByMtime([]parser.DiscoveredFile{{ + got := e.filterFilesByMtime(context.Background(), []parser.DiscoveredFile{{ Agent: parser.AgentClaude, Path: path, Project: "test-proj", @@ -273,7 +273,7 @@ func TestFilterFilesByMtimeKeepsS3ChangedSize(t *testing.T) { })) e := &Engine{db: database} - got := e.filterFilesByMtime([]parser.DiscoveredFile{{ + got := e.filterFilesByMtime(context.Background(), []parser.DiscoveredFile{{ Agent: parser.AgentClaude, Path: path, Project: "test-proj", @@ -360,7 +360,7 @@ func TestFilterFilesByMtimeKeepsOnlyS3CodexChangedIndexTitle(t *testing.T) { } e := &Engine{db: database} - got := e.filterFilesByMtime([]parser.DiscoveredFile{ + got := e.filterFilesByMtime(context.Background(), []parser.DiscoveredFile{ { Agent: parser.AgentCodex, Path: renamedPath, @@ -435,7 +435,7 @@ func TestFilterFilesByMtimeDoesNotFetchOldS3CodexIndex(t *testing.T) { } e := &Engine{db: database} - got := e.filterFilesByMtime([]parser.DiscoveredFile{{ + got := e.filterFilesByMtime(context.Background(), []parser.DiscoveredFile{{ Agent: parser.AgentCodex, Path: path, Machine: "laptop", @@ -497,7 +497,7 @@ func TestFilterFilesByMtimeKeepsS3CodexIndexFetchError(t *testing.T) { } e := &Engine{db: database} - got := e.filterFilesByMtime([]parser.DiscoveredFile{{ + got := e.filterFilesByMtime(context.Background(), []parser.DiscoveredFile{{ Agent: parser.AgentCodex, Path: path, Machine: "laptop", @@ -570,7 +570,7 @@ func TestFilterFilesByMtimeKeepsS3CodexClearedIndexTitle(t *testing.T) { } e := &Engine{db: database} - got := e.filterFilesByMtime([]parser.DiscoveredFile{{ + got := e.filterFilesByMtime(context.Background(), []parser.DiscoveredFile{{ Agent: parser.AgentCodex, Path: path, Machine: "laptop", @@ -626,7 +626,7 @@ func TestFilterFilesByMtimeKeepsS3CodexMissingIndexWhenTitleStored(t *testing.T) } e := &Engine{db: database} - got := e.filterFilesByMtime([]parser.DiscoveredFile{{ + got := e.filterFilesByMtime(context.Background(), []parser.DiscoveredFile{{ Agent: parser.AgentCodex, Path: path, Machine: "laptop", From 2ea488d7c8ac7d921b3dde864d4b29662c1fad35 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Fri, 26 Jun 2026 16:31:44 -0400 Subject: [PATCH 22/24] test(parser): create symlink targets before linking for Windows TestCommandCodeProviderDiscoversSymlinkedProjectDirectory and the iflow equivalent created the directory symlink before the target directory existed. On Windows os.Symlink to a missing target produces a file symlink, so discovery could not descend into the linked project directory and found zero sources. Populate the target directory first, matching the claude/kimi/qwen/workbuddy symlink tests that already pass on Windows; this is order-only and unchanged on Unix. --- internal/parser/commandcode_provider_test.go | 5 ++++- internal/parser/iflow_provider_test.go | 11 +++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/internal/parser/commandcode_provider_test.go b/internal/parser/commandcode_provider_test.go index 465699ad7..a344b14d6 100644 --- a/internal/parser/commandcode_provider_test.go +++ b/internal/parser/commandcode_provider_test.go @@ -74,11 +74,14 @@ func TestCommandCodeProviderDiscoversSymlinkedProjectDirectory(t *testing.T) { root := t.TempDir() realProjectDir := filepath.Join(t.TempDir(), "real-project") linkProjectDir := filepath.Join(root, "linked-project") + // Populate the target directory before symlinking so Windows records a + // directory symlink. Symlinking a not-yet-existent target yields a file + // symlink there, which discovery cannot descend into. + writeSourceFile(t, filepath.Join(realProjectDir, "sess_123.jsonl"), commandCodeProviderFixture()) if err := os.Symlink(realProjectDir, linkProjectDir); err != nil { t.Skipf("symlink not supported: %v", err) } sourcePath := filepath.Join(linkProjectDir, "sess_123.jsonl") - writeSourceFile(t, filepath.Join(realProjectDir, "sess_123.jsonl"), commandCodeProviderFixture()) provider, ok := NewProvider(AgentCommandCode, ProviderConfig{ Roots: []string{root}, diff --git a/internal/parser/iflow_provider_test.go b/internal/parser/iflow_provider_test.go index 6bb17fb06..30e8fc071 100644 --- a/internal/parser/iflow_provider_test.go +++ b/internal/parser/iflow_provider_test.go @@ -73,16 +73,19 @@ func TestIflowProviderDiscoversSymlinkedProjectDirectory(t *testing.T) { root := t.TempDir() realProjectDir := filepath.Join(t.TempDir(), "real-project") linkProjectDir := filepath.Join(root, "linked-project") - if err := os.Symlink(realProjectDir, linkProjectDir); err != nil { - t.Skipf("symlink not supported: %v", err) - } rawID := "5de701fc-7454-4858-a249-95cac4fd3b51" - sourcePath := filepath.Join(linkProjectDir, "session-"+rawID+".jsonl") + // Populate the target directory before symlinking so Windows records a + // directory symlink. Symlinking a not-yet-existent target yields a file + // symlink there, which discovery cannot descend into. copyFixtureFile( t, "testdata/iflow/session-"+rawID+".jsonl", filepath.Join(realProjectDir, "session-"+rawID+".jsonl"), ) + if err := os.Symlink(realProjectDir, linkProjectDir); err != nil { + t.Skipf("symlink not supported: %v", err) + } + sourcePath := filepath.Join(linkProjectDir, "session-"+rawID+".jsonl") provider, ok := NewProvider(AgentIflow, ProviderConfig{ Roots: []string{root}, From 426cf68f31b2cb6c3ed18b328e7d48d56112c103 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sun, 28 Jun 2026 13:59:28 -0400 Subject: [PATCH 23/24] fix(parser): use shared content hashing for pi/kimi/cortex/commandcode The provider migration dropped the per-agent file_hash that the legacy parse computed for these file-based agents: their JSONL source sets never enabled content hashing, so a normal sync fingerprint carried an empty hash and a resync cleared the stored file_hash to NULL. Enable WithContentHashing on all four. Cortex and Command Code also hand-rolled a bespoke Fingerprint (plus WatchPlan and SourcesForChangedPath overrides) solely to fold their .history.jsonl / .meta.json sidecar into the freshness identity. The shared JSONLSourceSet already does this through the WithCompanionFiles hook, which folds companion size, mtime, and content into the fingerprint, watches the sidecar, and maps a changed sidecar back to its transcript. Route both providers through that hook and delete the bespoke methods so file_hash and the companion fold come from one place; Command Code's parse now threads the shared fingerprint hash rather than a separate transcript-only hash. The companion size/mtime fold is arithmetically identical to the old bespoke fold, and the DB-freshness skip keys on size and mtime (not hash), so no unchanged session reparses. The shared watch plan tracks each discovered session's specific sidecar rather than a wildcard. --- internal/parser/commandcode_provider.go | 141 ++-------------- internal/parser/commandcode_provider_test.go | 54 ++++-- internal/parser/cortex_provider.go | 163 ++----------------- internal/parser/cortex_provider_test.go | 36 +++- internal/parser/kimi_provider.go | 4 + internal/parser/kimi_provider_test.go | 29 ++++ internal/parser/pi_provider.go | 4 + internal/parser/pi_provider_test.go | 29 ++++ 8 files changed, 173 insertions(+), 287 deletions(-) diff --git a/internal/parser/commandcode_provider.go b/internal/parser/commandcode_provider.go index ad94f816b..46eb8af9a 100644 --- a/internal/parser/commandcode_provider.go +++ b/internal/parser/commandcode_provider.go @@ -2,7 +2,6 @@ package parser import ( "context" - "crypto/sha256" "fmt" "os" "path/filepath" @@ -49,33 +48,14 @@ func (p *commandCodeProvider) Discover(ctx context.Context) ([]SourceRef, error) } func (p *commandCodeProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { - plan, err := p.sources.WatchPlan(ctx) - if err != nil { - return WatchPlan{}, err - } - for i := range plan.Roots { - plan.Roots[i].IncludeGlobs = append( - plan.Roots[i].IncludeGlobs, - "*.meta.json", - ) - } - return plan, nil + return p.sources.WatchPlan(ctx) } func (p *commandCodeProvider) SourcesForChangedPath( ctx context.Context, req ChangedPathRequest, ) ([]SourceRef, error) { - sources, err := p.sources.SourcesForChangedPath(ctx, req) - if err != nil || len(sources) > 0 { - return sources, err - } - if source, ok, err := p.sourceForMetaCompanion(ctx, req); err != nil { - return nil, err - } else if ok { - return []SourceRef{source}, nil - } - return nil, nil + return p.sources.SourcesForChangedPath(ctx, req) } func (p *commandCodeProvider) FindSource( @@ -89,51 +69,7 @@ func (p *commandCodeProvider) Fingerprint( ctx context.Context, source SourceRef, ) (SourceFingerprint, error) { - if err := ctx.Err(); err != nil { - return SourceFingerprint{}, err - } - path, ok, err := p.sources.pathFromSource(ctx, source) - if err != nil { - return SourceFingerprint{}, err - } - if !ok { - return SourceFingerprint{}, fmt.Errorf("commandcode source path unavailable") - } - info, err := os.Stat(path) - if err != nil { - return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) - } - if info.IsDir() { - return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) - } - fingerprint := SourceFingerprint{ - Key: firstNonEmptyJSONLString( - source.FingerprintKey, - source.Key, - path, - ), - Size: info.Size(), - MTimeNS: info.ModTime().UnixNano(), - } - - h := sha256.New() - if err := addCommandCodeFingerprintPart(h, "transcript", path, info); err != nil { - return SourceFingerprint{}, err - } - metaPath := commandCodeMetaCompanionPath(path) - if metaInfo, ok, err := commandCodeCompanionInfo(metaPath); err != nil { - return SourceFingerprint{}, err - } else if ok && metaInfo != nil { - fingerprint.Size += metaInfo.Size() - if mtime := metaInfo.ModTime().UnixNano(); mtime > fingerprint.MTimeNS { - fingerprint.MTimeNS = mtime - } - if err := addCommandCodeFingerprintPart(h, "meta", metaPath, metaInfo); err != nil { - return SourceFingerprint{}, err - } - } - fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) - return fingerprint, nil + return p.sources.Fingerprint(ctx, source) } func (p *commandCodeProvider) Parse( @@ -161,8 +97,11 @@ func (p *commandCodeProvider) Parse( SkipReason: SkipNoSession, }, nil } - if hash, err := hashJSONLSourceFile(path); err == nil { - sess.File.Hash = hash + // Use the shared fingerprint hash (which folds the .meta.json companion via + // WithCompanionFiles + WithContentHashing) rather than recomputing a bespoke + // transcript-only hash, so file_hash stays consistent with the fingerprint. + if req.Fingerprint.Hash != "" { + sess.File.Hash = req.Fingerprint.Hash } // Mirror the legacy effective-info behavior: the transcript's // freshness identity (size and mtime) includes the .meta.json @@ -189,42 +128,18 @@ func newCommandCodeSourceSet(roots []string) DirectoryJSONLSourceSet { WithIncludePath(isCommandCodeSourcePath), WithProjectHint(func(root, path string) string { return "" }), WithSessionIDFromPath(commandCodeSessionIDFromPath), + // Command Code's .meta.json sidecar participates in source freshness, so + // fold it into the shared fingerprint (size, mtime, and content hash) via + // the framework companion hook instead of a bespoke Fingerprint. + // WithContentHashing preserves the legacy per-agent file_hash, which a + // resync would otherwise clear to NULL. + WithCompanionFiles(func(transcriptPath string) []string { + return []string{commandCodeMetaCompanionPath(transcriptPath)} + }), + WithContentHashing(), ) } -func (p *commandCodeProvider) sourceForMetaCompanion( - ctx context.Context, - req ChangedPathRequest, -) (SourceRef, bool, error) { - if req.Path == "" { - return SourceRef{}, false, nil - } - path := filepath.Clean(req.Path) - stem, ok := strings.CutSuffix(filepath.Base(path), ".meta.json") - if !ok || !IsValidSessionID(stem) { - return SourceRef{}, false, nil - } - transcriptPath := filepath.Join(filepath.Dir(path), stem+".jsonl") - if _, err := os.Stat(transcriptPath); err != nil { - return SourceRef{}, false, nil - } - source, ok, err := p.sources.sourceForPath(ctx, transcriptPath) - if err != nil { - return SourceRef{}, false, err - } - if !ok { - return SourceRef{}, false, nil - } - if req.WatchRoot != "" { - root := filepath.Clean(req.WatchRoot) - src := source.Opaque.(JSONLSource) - if !samePath(root, src.Root) { - return SourceRef{}, false, nil - } - } - return source, true, nil -} - func isCommandCodeSourcePath(root, path string) bool { name := filepath.Base(path) if !strings.HasSuffix(name, ".jsonl") || @@ -282,28 +197,6 @@ func commandCodeCompanionInfo(path string) (os.FileInfo, bool, error) { return info, true, nil } -func addCommandCodeFingerprintPart( - h interface{ Write([]byte) (int, error) }, - label string, - path string, - info os.FileInfo, -) error { - hash, err := hashJSONLSourceFile(path) - if err != nil { - return err - } - _, _ = fmt.Fprintf( - h, - "%s:%s:%d:%d:%s\n", - label, - filepath.Base(path), - info.Size(), - info.ModTime().UnixNano(), - hash, - ) - return nil -} - func commandCodeProviderCapabilities() Capabilities { return Capabilities{ Source: jsonlFileProviderSourceCapabilities(), diff --git a/internal/parser/commandcode_provider_test.go b/internal/parser/commandcode_provider_test.go index a344b14d6..a9c16551d 100644 --- a/internal/parser/commandcode_provider_test.go +++ b/internal/parser/commandcode_provider_test.go @@ -117,11 +117,11 @@ func TestCommandCodeProviderParse(t *testing.T) { require.NoError(t, err) require.Len(t, sources, 1) + fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) outcome, err := provider.Parse(context.Background(), ParseRequest{ - Source: sources[0], - Fingerprint: SourceFingerprint{ - Key: sourcePath, - }, + Source: sources[0], + Fingerprint: fingerprint, }) require.NoError(t, err) require.True(t, outcome.ResultSetComplete) @@ -129,14 +129,15 @@ func TestCommandCodeProviderParse(t *testing.T) { assert.Equal(t, DataVersionCurrent, outcome.Results[0].DataVersion) assert.Equal(t, "commandcode:sess_123", outcome.Results[0].Result.Session.ID) assert.Equal(t, "devbox", outcome.Results[0].Result.Session.Machine) - assert.Equal(t, - fmt.Sprintf("%x", sha256.Sum256([]byte(transcript))), - outcome.Results[0].Result.Session.File.Hash, - ) + assert.Equal(t, fingerprint.Hash, outcome.Results[0].Result.Session.File.Hash) assert.Len(t, outcome.Results[0].Result.Messages, 2) } -func TestCommandCodeProviderParsePreservesTranscriptFileHash(t *testing.T) { +// TestCommandCodeProviderParseUsesSharedFingerprintHash verifies that file_hash +// is the shared fingerprint hash, which folds the .meta.json companion via +// WithCompanionFiles, rather than a bespoke transcript-only hash. A title-only +// .meta.json change therefore moves both the fingerprint and the stored hash. +func TestCommandCodeProviderParseUsesSharedFingerprintHash(t *testing.T) { root := t.TempDir() sourcePath := filepath.Join(root, "project", "sess_123.jsonl") transcript := commandCodeProviderFixture() @@ -154,9 +155,10 @@ func TestCommandCodeProviderParsePreservesTranscriptFileHash(t *testing.T) { fingerprint, err := provider.Fingerprint(context.Background(), sources[0]) require.NoError(t, err) + require.NotEmpty(t, fingerprint.Hash) transcriptHash := fmt.Sprintf("%x", sha256.Sum256([]byte(transcript))) require.NotEqual(t, transcriptHash, fingerprint.Hash, - "fixture must prove metadata participates in freshness separately") + "the .meta.json companion must participate in the fingerprint hash") outcome, err := provider.Parse(context.Background(), ParseRequest{ Source: sources[0], @@ -165,10 +167,40 @@ func TestCommandCodeProviderParsePreservesTranscriptFileHash(t *testing.T) { require.NoError(t, err) require.True(t, outcome.ResultSetComplete) require.Len(t, outcome.Results, 1) - assert.Equal(t, transcriptHash, outcome.Results[0].Result.Session.File.Hash) + assert.Equal(t, fingerprint.Hash, outcome.Results[0].Result.Session.File.Hash, + "parse threads the shared fingerprint hash, not a transcript-only hash") } func commandCodeProviderFixture() string { return `{"id":"m1","timestamp":"2026-06-01T10:00:00Z","sessionId":"sess_123","role":"user","content":[{"type":"text","text":"Inspect server logs"}],"gitBranch":"feature/command-code","metadata":{"version":2,"cwd":"/Users/alice/code/sample-project"}} {"id":"m2","timestamp":"2026-06-01T10:00:03Z","sessionId":"sess_123","role":"assistant","content":[{"type":"text","text":"The error is in the startup path."}],"gitBranch":"feature/command-code","metadata":{"version":2}}` } + +// TestCommandCodeProviderFingerprintIncludesContentHash guards that the Command +// Code provider computes a full-file content hash. The legacy per-agent parse +// stored a file_hash; without WithContentHashing the provider fingerprint hash +// is empty and a resync clears the stored file_hash to NULL. Toggle-provable: +// removing WithContentHashing from newCommandCodeSourceSet fails here. +func TestCommandCodeProviderFingerprintIncludesContentHash(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "users-alice-code-sample-project", "sess_123.jsonl") + writeSourceFile(t, sourcePath, commandCodeProviderFixture()) + + provider, ok := NewProvider(AgentCommandCode, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + fp, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + require.NotEmpty(t, fp.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fp, + }) + require.NoError(t, err) + require.Len(t, outcome.Results, 1) + assert.Equal(t, fp.Hash, outcome.Results[0].Result.Session.File.Hash) +} diff --git a/internal/parser/cortex_provider.go b/internal/parser/cortex_provider.go index 7ce7bfb75..2dba8bd2e 100644 --- a/internal/parser/cortex_provider.go +++ b/internal/parser/cortex_provider.go @@ -2,9 +2,7 @@ package parser import ( "context" - "crypto/sha256" "fmt" - "os" "path/filepath" "strings" ) @@ -49,33 +47,14 @@ func (p *cortexProvider) Discover(ctx context.Context) ([]SourceRef, error) { } func (p *cortexProvider) WatchPlan(ctx context.Context) (WatchPlan, error) { - plan, err := p.sources.WatchPlan(ctx) - if err != nil { - return WatchPlan{}, err - } - for i := range plan.Roots { - plan.Roots[i].IncludeGlobs = append( - plan.Roots[i].IncludeGlobs, - "*.history.jsonl", - ) - } - return plan, nil + return p.sources.WatchPlan(ctx) } func (p *cortexProvider) SourcesForChangedPath( ctx context.Context, req ChangedPathRequest, ) ([]SourceRef, error) { - sources, err := p.sources.SourcesForChangedPath(ctx, req) - if err != nil || len(sources) > 0 { - return sources, err - } - if source, ok, err := p.sourceForHistoryCompanion(ctx, req); err != nil { - return nil, err - } else if ok { - return []SourceRef{source}, nil - } - return nil, nil + return p.sources.SourcesForChangedPath(ctx, req) } func (p *cortexProvider) FindSource( @@ -89,52 +68,7 @@ func (p *cortexProvider) Fingerprint( ctx context.Context, source SourceRef, ) (SourceFingerprint, error) { - if err := ctx.Err(); err != nil { - return SourceFingerprint{}, err - } - path, ok, err := p.sources.pathFromSource(ctx, source) - if err != nil { - return SourceFingerprint{}, err - } - if !ok { - return SourceFingerprint{}, fmt.Errorf("cortex source path unavailable") - } - info, err := os.Stat(path) - if err != nil { - return SourceFingerprint{}, fmt.Errorf("stat %s: %w", path, err) - } - if info.IsDir() { - return SourceFingerprint{}, fmt.Errorf("stat %s: source is a directory", path) - } - fingerprint := SourceFingerprint{ - Key: firstNonEmptyJSONLString( - source.FingerprintKey, - source.Key, - path, - ), - Size: info.Size(), - MTimeNS: info.ModTime().UnixNano(), - } - - h := sha256.New() - if err := addCortexFingerprintPart(h, "metadata", path, info); err != nil { - return SourceFingerprint{}, err - } - historyPath := cortexHistoryCompanionPath(path) - if historyInfo, ok, err := cortexCompanionInfo(historyPath); err != nil { - return SourceFingerprint{}, err - } else if ok && historyInfo != nil { - fingerprint.Size += historyInfo.Size() - mtime := historyInfo.ModTime().UnixNano() - if mtime > fingerprint.MTimeNS { - fingerprint.MTimeNS = mtime - } - if err := addCortexFingerprintPart(h, "history", historyPath, historyInfo); err != nil { - return SourceFingerprint{}, err - } - } - fingerprint.Hash = fmt.Sprintf("%x", h.Sum(nil)) - return fingerprint, nil + return p.sources.Fingerprint(ctx, source) } func (p *cortexProvider) Parse( @@ -184,55 +118,18 @@ func newCortexSourceSet(roots []string) JSONLSourceSet { WithIncludePath(isCortexSourcePath), WithSessionIDFromPath(cortexSessionIDFromPath), WithProjectHint(func(root, path string) string { return "" }), + // Cortex's .history.jsonl sidecar participates in source freshness, so + // fold it into the shared fingerprint (size, mtime, and content hash) + // via the framework companion hook instead of a bespoke Fingerprint. + // WithContentHashing preserves the legacy per-agent file_hash, which a + // resync would otherwise clear to NULL. + WithCompanionFiles(func(transcriptPath string) []string { + return []string{cortexHistoryCompanionPath(transcriptPath)} + }), + WithContentHashing(), ) } -func (p *cortexProvider) sourceForHistoryCompanion( - ctx context.Context, - req ChangedPathRequest, -) (SourceRef, bool, error) { - if req.Path == "" { - return SourceRef{}, false, nil - } - path := filepath.Clean(req.Path) - for _, root := range p.sources.roots { - if req.WatchRoot != "" && !samePath(req.WatchRoot, root) { - continue - } - source, ok, err := cortexSourceForHistoryCompanion(ctx, p.sources, root, path) - if err != nil { - return SourceRef{}, false, err - } - if ok { - return source, true, nil - } - } - return SourceRef{}, false, nil -} - -func cortexSourceForHistoryCompanion( - ctx context.Context, - sources JSONLSourceSet, - root string, - path string, -) (SourceRef, bool, error) { - root = filepath.Clean(root) - if !samePath(filepath.Dir(path), root) { - return SourceRef{}, false, nil - } - stem, ok := strings.CutSuffix(filepath.Base(path), ".history.jsonl") - if !ok || !IsCortexSessionFile(stem+".json") { - return SourceRef{}, false, nil - } - metadataPath := filepath.Join(root, stem+".json") - if source, ok, err := sources.sourceForPath(ctx, metadataPath); err != nil { - return SourceRef{}, false, err - } else if ok { - return source, true, nil - } - return SourceRef{}, false, nil -} - func isCortexSourcePath(root, path string) bool { if !samePath(filepath.Dir(path), filepath.Clean(root)) { return false @@ -251,42 +148,6 @@ func cortexHistoryCompanionPath(path string) string { return strings.TrimSuffix(path, ".json") + ".history.jsonl" } -func cortexCompanionInfo(path string) (os.FileInfo, bool, error) { - info, err := os.Stat(path) - if os.IsNotExist(err) { - return nil, false, nil - } - if err != nil { - return nil, false, fmt.Errorf("stat %s: %w", path, err) - } - if info.IsDir() { - return nil, false, nil - } - return info, true, nil -} - -func addCortexFingerprintPart( - h interface{ Write([]byte) (int, error) }, - label string, - path string, - info os.FileInfo, -) error { - hash, err := hashJSONLSourceFile(path) - if err != nil { - return err - } - _, _ = fmt.Fprintf( - h, - "%s:%s:%d:%d:%s\n", - label, - filepath.Base(path), - info.Size(), - info.ModTime().UnixNano(), - hash, - ) - return nil -} - func cortexProviderCapabilities() Capabilities { return Capabilities{ Source: jsonlFileProviderSourceCapabilities(), diff --git a/internal/parser/cortex_provider_test.go b/internal/parser/cortex_provider_test.go index 3d89c137c..8322b7810 100644 --- a/internal/parser/cortex_provider_test.go +++ b/internal/parser/cortex_provider_test.go @@ -64,7 +64,12 @@ func TestCortexProviderSourceMethods(t *testing.T) { require.Len(t, plan.Roots, 1) assert.Equal(t, root, plan.Roots[0].Path) assert.False(t, plan.Roots[0].Recursive) - assert.Equal(t, []string{"*.json", "*.history.jsonl"}, plan.Roots[0].IncludeGlobs) + // The shared companion mechanism watches each discovered session's specific + // .history.jsonl sidecar (not a wildcard), so a sidecar change on a known + // session is observed live; new sessions are picked up on rediscovery. + assert.Contains(t, plan.Roots[0].IncludeGlobs, "*.json") + assert.Contains(t, plan.Roots[0].IncludeGlobs, cortexTestUUID+".history.jsonl") + assert.Contains(t, plan.Roots[0].IncludeGlobs, otherID+".history.jsonl") found, ok, err := provider.FindSource(context.Background(), FindSourceRequest{ FullSessionID: "host~cortex:" + cortexTestUUID, @@ -216,3 +221,32 @@ func TestCortexProviderParse(t *testing.T) { assert.Equal(t, "Test session", result.Result.Session.SessionName) assert.Len(t, result.Result.Messages, 2) } + +// TestCortexProviderFingerprintIncludesContentHash guards that the Cortex +// provider computes a full-file content hash. The legacy per-agent parse stored +// a file_hash; without WithContentHashing the provider fingerprint hash is empty +// and a resync clears the stored file_hash to NULL. Toggle-provable: removing +// WithContentHashing from newCortexSourceSet makes fp.Hash empty and fails here. +func TestCortexProviderFingerprintIncludesContentHash(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, cortexTestUUID+".json") + writeSourceFile(t, sourcePath, minimalCortexSession(cortexTestUUID)) + + provider, ok := NewProvider(AgentCortex, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + fp, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + require.NotEmpty(t, fp.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fp, + }) + require.NoError(t, err) + require.Len(t, outcome.Results, 1) + assert.Equal(t, fp.Hash, outcome.Results[0].Result.Session.File.Hash) +} diff --git a/internal/parser/kimi_provider.go b/internal/parser/kimi_provider.go index c457f9dc0..bb332cd14 100644 --- a/internal/parser/kimi_provider.go +++ b/internal/parser/kimi_provider.go @@ -35,6 +35,10 @@ func newKimiSourceSet(roots []string) JSONLSourceSet { }), WithRawSessionIDSourceFiles(kimiRawSessionIDSourceFiles), WithParseFile(kimiParseFile), + // Kimi persisted a full-file content hash (file_hash) in the legacy + // per-agent parse. Without this the provider fingerprint hash is empty + // and a resync clears the stored file_hash to NULL. + WithContentHashing(), ) } diff --git a/internal/parser/kimi_provider_test.go b/internal/parser/kimi_provider_test.go index 88b811fa2..7617c9f18 100644 --- a/internal/parser/kimi_provider_test.go +++ b/internal/parser/kimi_provider_test.go @@ -212,3 +212,32 @@ func kimiProviderFixture(firstMessage string) string { `{"timestamp":1704067201.0,"message":{"type":"ContentPart","payload":{"type":"text","text":"Done."}}}` + "\n" + `{"timestamp":1704067202.0,"message":{"type":"TurnEnd","payload":{}}}` + "\n" } + +// TestKimiProviderFingerprintIncludesContentHash guards that the Kimi provider +// computes a full-file content hash. The legacy per-agent parse stored a +// file_hash; without WithContentHashing the provider fingerprint hash is empty +// and a resync clears the stored file_hash to NULL. Toggle-provable: removing +// WithContentHashing from newKimiSourceSet makes fp.Hash empty and fails here. +func TestKimiProviderFingerprintIncludesContentHash(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "abc123", "uuid-1", "wire.jsonl") + writeSourceFile(t, sourcePath, kimiProviderFixture("inspect logs")) + + provider, ok := NewProvider(AgentKimi, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + fp, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + require.NotEmpty(t, fp.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fp, + }) + require.NoError(t, err) + require.Len(t, outcome.Results, 1) + assert.Equal(t, fp.Hash, outcome.Results[0].Result.Session.File.Hash) +} diff --git a/internal/parser/pi_provider.go b/internal/parser/pi_provider.go index 519d300a1..d794a3401 100644 --- a/internal/parser/pi_provider.go +++ b/internal/parser/pi_provider.go @@ -196,6 +196,10 @@ func newPiSourceSet(agent AgentType, roots []string) DirectoryJSONLSourceSet { WithIncludePath(isPiSourcePath), WithProjectHint(func(root, path string) string { return "" }), WithSessionIDFromPath(piSessionIDFromPath), + // Pi/OMP persisted a full-file content hash (file_hash) in the legacy + // per-agent parse. Without this the provider fingerprint hash is empty + // and a resync clears the stored file_hash to NULL. + WithContentHashing(), ) } diff --git a/internal/parser/pi_provider_test.go b/internal/parser/pi_provider_test.go index 041f408cc..a3fb7682c 100644 --- a/internal/parser/pi_provider_test.go +++ b/internal/parser/pi_provider_test.go @@ -225,3 +225,32 @@ func piProviderFixture(sessionID string) string { `{"type":"message","id":"msg-2","timestamp":"2025-01-01T10:00:02Z","message":{"role":"assistant","content":"Looks ready.","model":"claude-opus-4-5","usage":{"input_tokens":10,"output_tokens":5}}}`, }, "\n") } + +// TestPiProviderFingerprintIncludesContentHash guards that the Pi provider +// computes a full-file content hash. The legacy per-agent parse stored a +// file_hash; without WithContentHashing the provider fingerprint hash is empty +// and a resync clears the stored file_hash to NULL. Toggle-provable: removing +// WithContentHashing from newPiSourceSet makes fp.Hash empty and fails here. +func TestPiProviderFingerprintIncludesContentHash(t *testing.T) { + root := t.TempDir() + sourcePath := filepath.Join(root, "encoded-cwd", "session-123.jsonl") + writeSourceFile(t, sourcePath, piProviderFixture("session-123")) + + provider, ok := NewProvider(AgentPi, ProviderConfig{Roots: []string{root}}) + require.True(t, ok) + sources, err := provider.Discover(context.Background()) + require.NoError(t, err) + require.Len(t, sources, 1) + + fp, err := provider.Fingerprint(context.Background(), sources[0]) + require.NoError(t, err) + require.NotEmpty(t, fp.Hash) + + outcome, err := provider.Parse(context.Background(), ParseRequest{ + Source: sources[0], + Fingerprint: fp, + }) + require.NoError(t, err) + require.Len(t, outcome.Results, 1) + assert.Equal(t, fp.Hash, outcome.Results[0].Result.Session.File.Hash) +} From c69d636d4790d1e633eaff38cbe5bde5b8e5c4b5 Mon Sep 17 00:00:00 2001 From: Marius van Niekerk Date: Sun, 28 Jun 2026 16:14:20 -0400 Subject: [PATCH 24/24] test(parser): call renamed parseKimiSession in Kimi cost tests The Kimi aggregate-cost tests carried in from main call the exported ParseKimiSession, which the provider migration renames to the unexported parseKimiSession. Point them at the renamed helper so the parser package test binary compiles after the migration. --- internal/parser/kimi_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/parser/kimi_test.go b/internal/parser/kimi_test.go index 276d99890..55900287c 100644 --- a/internal/parser/kimi_test.go +++ b/internal/parser/kimi_test.go @@ -276,7 +276,7 @@ func TestParseKimiSession_SessionLevelTokensEmitUsageEvent(t *testing.T) { }, ) - sess, _, err := ParseKimiSession(path, "testproj", "local") + sess, _, err := parseKimiSession(path, "testproj", "local") require.NoError(t, err) require.NotNil(t, sess) @@ -304,7 +304,7 @@ func TestParseKimiSession_PerMessageTokensSkipUsageEvent(t *testing.T) { }, ) - sess, _, err := ParseKimiSession(path, "testproj", "local") + sess, _, err := parseKimiSession(path, "testproj", "local") require.NoError(t, err) require.NotNil(t, sess) assert.Empty(t, sess.UsageEvents) @@ -321,7 +321,7 @@ func TestParseKimiSession_StepEndModelOverridesDefault(t *testing.T) { }, ) - _, msgs, err := ParseKimiSession(path, "testproj", "local") + _, msgs, err := parseKimiSession(path, "testproj", "local") require.NoError(t, err) require.Len(t, msgs, 2) assert.Equal(t, "moonshot/kimi-k2", msgs[1].Model)