From 4f5da6ac781bb694ba88a98d3e6e7a7d1dd94158 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 25 Jun 2026 23:45:51 +0000 Subject: [PATCH 1/4] MCP: windowed get_message body reading Return body_text as a paginated slice instead of the full message body. Adds offset, center_at, max_chars (default 2000, max 4000), body_length, body_returned, and has_more so agents can page through long messages or jump to a match via center_at (byte offset from search_in_message). Values above maxBodyChars clamp to 4000; zero or negative max_chars use the default. Guard against Engine.GetMessage returning (nil, nil) for not-found. Return UTF-8-adjusted offset, body_returned, and has_more so sequential paging (offset += body_returned) does not skip multibyte characters. Co-authored-by: endolith --- internal/mcp/handlers.go | 172 +++++++++++++++++++++++++++- internal/mcp/server.go | 15 ++- internal/mcp/server_test.go | 216 +++++++++++++++++++++++++++++++++++- 3 files changed, 398 insertions(+), 5 deletions(-) diff --git a/internal/mcp/handlers.go b/internal/mcp/handlers.go index 1940aeab..e83bb438 100644 --- a/internal/mcp/handlers.go +++ b/internal/mcp/handlers.go @@ -13,6 +13,7 @@ import ( "path/filepath" "strings" "time" + "unicode/utf8" "github.com/mark3labs/mcp-go/mcp" "go.kenn.io/msgvault/internal/deletion" @@ -27,6 +28,11 @@ const ( maxLimit = 1000 maxSearchMessagesLimit = 50 defaultSearchLimit = 20 + defaultBodyChars = 2000 + // maxBodyChars caps the body slice returned by get_message regardless of what + // the caller requests via max_chars. Prevents a single tool call from flooding + // the context window; callers page forward using offset. + maxBodyChars = 4000 // totalCountUnknown is returned when the backend cannot report a full match // count (body FTS fallback, hybrid/vector ranking depth, or list_messages // without a separate count query). Clients should use has_more for paging. @@ -644,6 +650,108 @@ func (h *handlers) filterFromFindSimilarArgs(ctx context.Context, args map[strin return f, nil } +// bodyByteSliceRange returns a UTF-8-safe subslice of body[start:end] and the +// adjusted byte offsets actually used. adjEnd is exclusive; callers use it for +// has_more and sequential paging via offset += body_returned. +func bodyByteSliceRange(body string, start, end int) (text string, adjStart, adjEnd int) { + if start < 0 { + start = 0 + } + if end > len(body) { + end = len(body) + } + if start >= len(body) { + return "", len(body), len(body) + } + if start >= end { + return oneRuneSlice(body, start) + } + + adjStart, adjEnd = start, end + for adjStart < adjEnd && !utf8.RuneStart(body[adjStart]) { + adjStart++ + } + for adjEnd > adjStart && adjEnd < len(body) && !utf8.RuneStart(body[adjEnd]) { + adjEnd-- + } + for adjEnd > adjStart { + s := body[adjStart:adjEnd] + if utf8.ValidString(s) { + return s, adjStart, adjEnd + } + adjEnd-- + } + return oneRuneSlice(body, adjStart) +} + +// oneRuneSlice returns a single rune starting at or after start so tiny windows +// and mid-rune offsets still advance sequential paging. +func oneRuneSlice(body string, start int) (text string, adjStart, adjEnd int) { + adjStart = start + for adjStart < len(body) && !utf8.RuneStart(body[adjStart]) { + adjStart++ + } + if adjStart >= len(body) { + return "", len(body), len(body) + } + _, size := utf8.DecodeRuneInString(body[adjStart:]) + if size <= 0 { + return "", adjStart, adjStart + } + adjEnd = min(len(body), adjStart+size) + return body[adjStart:adjEnd], adjStart, adjEnd +} + +// bodyByteSlice returns body[start:end], nudging boundaries inward so the +// result is always valid UTF-8. MCP body APIs use byte offsets; without +// this, a window can split a multibyte rune (emoji, CJK, accented letters). +func bodyByteSlice(body string, start, end int) string { + text, _, _ := bodyByteSliceRange(body, start, end) + return text +} + +// contextWindow returns byte offsets [start, end) for a window of up to +// contextChars bytes centered on a match at pos with byte length termLen. +func contextWindow(bodyLen, pos, termLen, contextChars int) (start, end int) { + start = pos - (contextChars-termLen)/2 + end = start + contextChars + if start < 0 { + start = 0 + end = min(bodyLen, contextChars) + } else if end > bodyLen { + end = bodyLen + start = max(0, end-contextChars) + } + return start, end +} + +type getMessageResponse struct { + ID int64 `json:"id"` + SourceMessageID string `json:"source_message_id"` + ConversationID int64 `json:"conversation_id"` + SourceConversationID string `json:"source_conversation_id"` + Subject string `json:"subject"` + MessageType string `json:"message_type,omitempty"` + Snippet string `json:"snippet"` + SentAt time.Time `json:"sent_at"` + ReceivedAt *time.Time `json:"received_at,omitempty"` + DeletedAt *time.Time `json:"deleted_at,omitempty"` + SizeEstimate int64 `json:"size_estimate"` + HasAttachments bool `json:"has_attachments"` + From []query.Address `json:"from"` + To []query.Address `json:"to"` + Cc []query.Address `json:"cc"` + Bcc []query.Address `json:"bcc"` + BodyText string `json:"body_text"` + BodyHTML string `json:"body_html"` + BodyLength int `json:"body_length"` + BodyReturned int `json:"body_returned"` + Offset int `json:"offset"` + HasMore bool `json:"has_more"` + Labels []string `json:"labels"` + Attachments []query.AttachmentInfo `json:"attachments"` +} + func (h *handlers) getMessage(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) { args := req.GetArguments() @@ -656,8 +764,58 @@ func (h *handlers) getMessage(ctx context.Context, req mcp.CallToolRequest) (*mc if err != nil { return mcp.NewToolResultError(fmt.Sprintf("message not found: %v", err)), nil } + if msg == nil { + return mcp.NewToolResultError("message not found"), nil + } - return jsonResult(msg) + maxChars := intArg(args, "max_chars", defaultBodyChars) + if maxChars <= 0 { + maxChars = defaultBodyChars + } else if maxChars > maxBodyChars { + maxChars = maxBodyChars + } + + fullBody := msg.BodyText + bodyLen := len(fullBody) + + var start, end int + if centerAt := intArg(args, "center_at", -1); centerAt >= 0 { + // Center the window on the given byte offset (e.g. char_offset from + // search_in_message). contextWindow handles clamping to body boundaries. + start, end = contextWindow(bodyLen, centerAt, 0, maxChars) + } else { + start = min(intArg(args, "offset", 0), bodyLen) + end = min(start+maxChars, bodyLen) + } + + bodyText, sliceStart, sliceEnd := bodyByteSliceRange(fullBody, start, end) + + return jsonResult(getMessageResponse{ + ID: msg.ID, + SourceMessageID: msg.SourceMessageID, + ConversationID: msg.ConversationID, + SourceConversationID: msg.SourceConversationID, + Subject: msg.Subject, + MessageType: msg.MessageType, + Snippet: msg.Snippet, + SentAt: msg.SentAt, + ReceivedAt: msg.ReceivedAt, + DeletedAt: msg.DeletedAt, + SizeEstimate: msg.SizeEstimate, + HasAttachments: msg.HasAttachments, + From: msg.From, + To: msg.To, + Cc: msg.Cc, + Bcc: msg.Bcc, + BodyText: bodyText, + BodyHTML: "", + BodyLength: bodyLen, + BodyReturned: len(bodyText), + Offset: sliceStart, + HasMore: sliceEnd < bodyLen, + Labels: msg.Labels, + Attachments: msg.Attachments, + }) } const maxAttachmentSize = 50 * 1024 * 1024 // 50MB @@ -943,6 +1101,18 @@ func (h *handlers) aggregate(ctx context.Context, req mcp.CallToolRequest) (*mcp return jsonResult(rows) } +// intArg extracts a non-negative integer from a map, with a default. +func intArg(args map[string]any, key string, def int) int { + v, ok := args[key].(float64) + if !ok { + return def + } + if math.IsNaN(v) || v < 0 || math.IsInf(v, 1) || v > float64(math.MaxInt) { + return def + } + return int(v) +} + // limitArg extracts a non-negative integer limit from a map, with a default. // JSON numbers arrive as float64. Clamps to maxLimit to prevent excessive // result sets. diff --git a/internal/mcp/server.go b/internal/mcp/server.go index 58a3c23e..84052dab 100644 --- a/internal/mcp/server.go +++ b/internal/mcp/server.go @@ -234,12 +234,25 @@ func searchMessagesTool(vectorAvailable bool) mcp.Tool { func getMessageTool() mcp.Tool { return mcp.NewTool(ToolGetMessage, - mcp.WithDescription("Get full message details including body text, recipients, labels, and attachments by message ID."), + mcp.WithDescription("Get message details including recipients, labels, attachments, and a slice of body text (HTML omitted). "+ + "Body paging mirrors search pagination: body_length=total bytes, offset=where this chunk starts, body_returned=bytes in this chunk, has_more=more body follows. "+ + "To read sequentially: call again with offset += body_returned. "+ + "To jump to a match: use center_at= to center the window on that location. "+ + "Note: snippet is pre-stored source metadata (may be empty for non-Gmail sources)."), mcp.WithReadOnlyHintAnnotation(true), mcp.WithNumber("id", mcp.Required(), mcp.Description("Message ID"), ), + mcp.WithNumber("offset", + mcp.Description("Byte offset from the start of body_text to begin reading (default 0). Ignored when center_at is provided."), + ), + mcp.WithNumber("center_at", + mcp.Description("Byte offset from the start of body_text to center the window on (e.g. char_offset from search_in_message). Takes precedence over offset."), + ), + mcp.WithNumber("max_chars", + mcp.Description("Maximum body_text bytes to return (default 2000, max 4000). Values above 4000 are clamped to 4000; zero or negative values use the default."), + ), ) } diff --git a/internal/mcp/server_test.go b/internal/mcp/server_test.go index 5cdd97ac..c69f768e 100644 --- a/internal/mcp/server_test.go +++ b/internal/mcp/server_test.go @@ -11,6 +11,7 @@ import ( "strings" "testing" "time" + "unicode/utf8" "github.com/mark3labs/mcp-go/mcp" assertpkg "github.com/stretchr/testify/assert" @@ -65,6 +66,18 @@ type paginatedListMessages struct { HasMore bool `json:"has_more"` } +type getMessageResp struct { + ID int64 `json:"id"` + Subject string `json:"subject"` + BodyText string `json:"body_text"` + BodyHTML string `json:"body_html"` + BodyLength int `json:"body_length"` + BodyReturned int `json:"body_returned"` + Offset int `json:"offset"` + HasMore bool `json:"has_more"` + ConversationID int64 `json:"conversation_id"` +} + // newTestHandlers creates a handlers instance with the given mock engine. func newTestHandlers(eng *querytest.MockEngine) *handlers { return &handlers{engine: eng} @@ -600,6 +613,54 @@ func TestSearchMessages_HybridPagination_ProbeRowDetectsMore(t *testing.T) { assert.False(resp2.HasMore, "has_more page 2") } +func TestBodyByteSlice(t *testing.T) { + t.Run("ascii unchanged", func(t *testing.T) { + body := "hello world" + assertpkg.Equal(t, "hello", bodyByteSlice(body, 0, 5)) + }) + + t.Run("does not split multibyte rune", func(t *testing.T) { + body := "café" + s := bodyByteSlice(body, 0, 4) + assertpkg.True(t, utf8.ValidString(s), "result must be valid UTF-8: %q", s) + assertpkg.Equal(t, "caf", s) + }) + + t.Run("emoji not bisected", func(t *testing.T) { + body := strings.Repeat("a", 10) + "😀" + strings.Repeat("b", 10) + emojiStart := 10 + s := bodyByteSlice(body, emojiStart, emojiStart+2) + assertpkg.True(t, utf8.ValidString(s), "result must be valid UTF-8: %q", s) + wide := bodyByteSlice(body, emojiStart, emojiStart+4) + assertpkg.True(t, utf8.ValidString(wide)) + assertpkg.Equal(t, "😀", wide) + }) + + t.Run("returns adjusted offsets for paging", func(t *testing.T) { + assert := assertpkg.New(t) + body := "aaa😀bbb" + text, adjStart, adjEnd := bodyByteSliceRange(body, 0, 5) + assert.Equal("aaa", text) + assert.Equal(0, adjStart) + assert.Equal(3, adjEnd) + + text2, adjStart2, adjEnd2 := bodyByteSliceRange(body, 3, 8) + assert.True(utf8.ValidString(text2)) + assert.Equal(3, adjStart2) + assert.Equal("😀b", text2) + assert.Equal(8, adjEnd2) + }) + + t.Run("tiny window returns one rune", func(t *testing.T) { + assert := assertpkg.New(t) + body := "aaa😀bbb" + text, adjStart, adjEnd := bodyByteSliceRange(body, 3, 4) + assert.Equal("😀", text) + assert.Equal(3, adjStart) + assert.Equal(7, adjEnd) + }) +} + func TestSearchMessages_UnknownMode(t *testing.T) { h := newTestHandlers(&querytest.MockEngine{}) @@ -620,9 +681,158 @@ func TestGetMessage(t *testing.T) { h := newTestHandlers(eng) t.Run("found", func(t *testing.T) { - msg := runTool[query.MessageDetail](t, "get_message", h.getMessage, map[string]any{"id": float64(42)}) - assertpkg.Equal(t, "Test Message", msg.Subject, "subject") - assertpkg.Equal(t, "thread-xyz", msg.SourceConversationID, "SourceConversationID") + assert := assertpkg.New(t) + msg := runTool[getMessageResp](t, "get_message", h.getMessage, map[string]any{"id": float64(42)}) + assert.Equal("Test Message", msg.Subject, "subject") + assert.Equal("Hello world", msg.BodyText, "body_text") + assert.Empty(msg.BodyHTML, "body_html stripped") + assert.Equal(11, msg.BodyLength, "body_length") + assert.Equal(11, msg.BodyReturned, "body_returned") + assert.False(msg.HasMore, "has_more") + }) + + t.Run("truncates long body", func(t *testing.T) { + assert := assertpkg.New(t) + longBody := strings.Repeat("x", 5000) + eng2 := &querytest.MockEngine{ + Messages: map[int64]*query.MessageDetail{ + 50: testutil.NewMessageDetail(50).WithBodyText(longBody).BuildPtr(), + }, + } + h2 := newTestHandlers(eng2) + msg := runTool[getMessageResp](t, "get_message", h2.getMessage, map[string]any{"id": float64(50)}) + assert.Equal(5000, msg.BodyLength, "body_length") + assert.Equal(2000, msg.BodyReturned, "body_returned") + assert.Len(msg.BodyText, 2000, "truncated body_text") + assert.True(msg.HasMore, "has_more") + }) + + t.Run("offset pagination", func(t *testing.T) { + assert := assertpkg.New(t) + body := strings.Repeat("a", 3000) + eng2 := &querytest.MockEngine{ + Messages: map[int64]*query.MessageDetail{ + 51: testutil.NewMessageDetail(51).WithBodyText(body).BuildPtr(), + }, + } + h2 := newTestHandlers(eng2) + msg := runTool[getMessageResp](t, "get_message", h2.getMessage, map[string]any{ + "id": float64(51), + "offset": float64(2000), + }) + assert.Equal(2000, msg.Offset, "offset") + assert.Equal(1000, msg.BodyReturned, "body_returned") + assert.Len(msg.BodyText, 1000, "second page length") + assert.False(msg.HasMore, "has_more") + }) + + t.Run("center_at mid-body", func(t *testing.T) { + body := strings.Repeat("a", 1000) + "KEYWORD" + strings.Repeat("z", 1000) + matchOffset := 1000 + eng2 := &querytest.MockEngine{ + Messages: map[int64]*query.MessageDetail{ + 52: testutil.NewMessageDetail(52).WithBodyText(body).BuildPtr(), + }, + } + h2 := newTestHandlers(eng2) + msg := runTool[getMessageResp](t, "get_message", h2.getMessage, map[string]any{ + "id": float64(52), + "center_at": float64(matchOffset), + "max_chars": float64(200), + }) + assertpkg.Contains(t, msg.BodyText, "KEYWORD") + assertpkg.LessOrEqual(t, msg.Offset, matchOffset, "window starts before match") + assertpkg.LessOrEqual(t, len(msg.BodyText), 200, "respects max_chars") + }) + + t.Run("center_at near start", func(t *testing.T) { + body := "KEYWORD" + strings.Repeat("z", 1000) + eng2 := &querytest.MockEngine{ + Messages: map[int64]*query.MessageDetail{ + 53: testutil.NewMessageDetail(53).WithBodyText(body).BuildPtr(), + }, + } + h2 := newTestHandlers(eng2) + msg := runTool[getMessageResp](t, "get_message", h2.getMessage, map[string]any{ + "id": float64(53), + "center_at": float64(0), + "max_chars": float64(200), + }) + assertpkg.Contains(t, msg.BodyText, "KEYWORD") + assertpkg.Equal(t, 0, msg.Offset, "starts at body start") + }) + + t.Run("max_chars above cap clamps to 4000", func(t *testing.T) { + assert := assertpkg.New(t) + longBody := strings.Repeat("x", 5000) + eng2 := &querytest.MockEngine{ + Messages: map[int64]*query.MessageDetail{ + 54: testutil.NewMessageDetail(54).WithBodyText(longBody).BuildPtr(), + }, + } + h2 := newTestHandlers(eng2) + msg := runTool[getMessageResp](t, "get_message", h2.getMessage, map[string]any{ + "id": float64(54), + "max_chars": float64(5000), + }) + assert.Equal(4000, msg.BodyReturned, "body_returned") + assert.Len(msg.BodyText, 4000, "clamped body_text") + assert.True(msg.HasMore, "has_more") + }) + + t.Run("max_chars zero uses default", func(t *testing.T) { + assert := assertpkg.New(t) + longBody := strings.Repeat("x", 5000) + eng2 := &querytest.MockEngine{ + Messages: map[int64]*query.MessageDetail{ + 55: testutil.NewMessageDetail(55).WithBodyText(longBody).BuildPtr(), + }, + } + h2 := newTestHandlers(eng2) + msg := runTool[getMessageResp](t, "get_message", h2.getMessage, map[string]any{ + "id": float64(55), + "max_chars": float64(0), + }) + assert.Equal(2000, msg.BodyReturned, "body_returned") + assert.Len(msg.BodyText, 2000, "default body_text") + assert.True(msg.HasMore, "has_more") + }) + + t.Run("nil message without error", func(t *testing.T) { + eng2 := &querytest.MockEngine{ + GetMessageFunc: func(context.Context, int64) (*query.MessageDetail, error) { + return nil, nil //nolint:nilnil // mirrors Engine.GetMessage not-found contract + }, + } + h2 := newTestHandlers(eng2) + runToolExpectError(t, "get_message", h2.getMessage, map[string]any{"id": float64(42)}) + }) + + t.Run("utf8 sequential paging", func(t *testing.T) { + assert := assertpkg.New(t) + body := strings.Repeat("a", 10) + "😀" + strings.Repeat("b", 10) + eng2 := &querytest.MockEngine{ + Messages: map[int64]*query.MessageDetail{ + 56: testutil.NewMessageDetail(56).WithBodyText(body).BuildPtr(), + }, + } + h2 := newTestHandlers(eng2) + + var parts []string + offset := 0 + for { + msg := runTool[getMessageResp](t, "get_message", h2.getMessage, map[string]any{ + "id": float64(56), + "offset": float64(offset), + "max_chars": float64(5), + }) + parts = append(parts, msg.BodyText) + if !msg.HasMore { + break + } + offset += msg.BodyReturned + } + assert.Equal(body, strings.Join(parts, ""), "rejoined pages") }) errorCases := []struct { From 52ed0ea62d148f204fd355966e1e6c198c75d620 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sat, 27 Jun 2026 20:10:34 -0500 Subject: [PATCH 2/4] fix(mcp): keep html-only messages readable Windowed get_message responses are meant to protect MCP context, but blanking HTML made messages without a plain-text body appear unreadable. Preserve the context guard while falling back to a windowed body_html slice for HTML-only messages and advertise the selected body format so clients can interpret the response correctly. The get_message tool description also referred to a follow-up search_in_message tool that is not part of this branch, so keep the current schema self-contained until that tool lands. Generated with Codex (GPT-5) Co-authored-by: Codex --- internal/mcp/handlers.go | 19 ++++++++++++++++--- internal/mcp/server.go | 7 ++++--- internal/mcp/server_test.go | 29 +++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/internal/mcp/handlers.go b/internal/mcp/handlers.go index e83bb438..c04bfae4 100644 --- a/internal/mcp/handlers.go +++ b/internal/mcp/handlers.go @@ -744,6 +744,7 @@ type getMessageResponse struct { Bcc []query.Address `json:"bcc"` BodyText string `json:"body_text"` BodyHTML string `json:"body_html"` + BodyFormat string `json:"body_format,omitempty"` BodyLength int `json:"body_length"` BodyReturned int `json:"body_returned"` Offset int `json:"offset"` @@ -776,6 +777,11 @@ func (h *handlers) getMessage(ctx context.Context, req mcp.CallToolRequest) (*mc } fullBody := msg.BodyText + bodyFormat := "text" + if fullBody == "" && msg.BodyHTML != "" { + fullBody = msg.BodyHTML + bodyFormat = "html" + } bodyLen := len(fullBody) var start, end int @@ -788,7 +794,13 @@ func (h *handlers) getMessage(ctx context.Context, req mcp.CallToolRequest) (*mc end = min(start+maxChars, bodyLen) } - bodyText, sliceStart, sliceEnd := bodyByteSliceRange(fullBody, start, end) + bodySlice, sliceStart, sliceEnd := bodyByteSliceRange(fullBody, start, end) + bodyText := bodySlice + bodyHTML := "" + if bodyFormat == "html" { + bodyText = "" + bodyHTML = bodySlice + } return jsonResult(getMessageResponse{ ID: msg.ID, @@ -808,9 +820,10 @@ func (h *handlers) getMessage(ctx context.Context, req mcp.CallToolRequest) (*mc Cc: msg.Cc, Bcc: msg.Bcc, BodyText: bodyText, - BodyHTML: "", + BodyHTML: bodyHTML, + BodyFormat: bodyFormat, BodyLength: bodyLen, - BodyReturned: len(bodyText), + BodyReturned: len(bodySlice), Offset: sliceStart, HasMore: sliceEnd < bodyLen, Labels: msg.Labels, diff --git a/internal/mcp/server.go b/internal/mcp/server.go index 84052dab..c9a08e70 100644 --- a/internal/mcp/server.go +++ b/internal/mcp/server.go @@ -234,10 +234,11 @@ func searchMessagesTool(vectorAvailable bool) mcp.Tool { func getMessageTool() mcp.Tool { return mcp.NewTool(ToolGetMessage, - mcp.WithDescription("Get message details including recipients, labels, attachments, and a slice of body text (HTML omitted). "+ + mcp.WithDescription("Get message details including recipients, labels, attachments, and a slice of the message body. "+ + "Returns plain text when available; HTML-only messages return a body_html slice with body_format=html. "+ "Body paging mirrors search pagination: body_length=total bytes, offset=where this chunk starts, body_returned=bytes in this chunk, has_more=more body follows. "+ "To read sequentially: call again with offset += body_returned. "+ - "To jump to a match: use center_at= to center the window on that location. "+ + "To jump to a known match location: use center_at= to center the window on that location. "+ "Note: snippet is pre-stored source metadata (may be empty for non-Gmail sources)."), mcp.WithReadOnlyHintAnnotation(true), mcp.WithNumber("id", @@ -248,7 +249,7 @@ func getMessageTool() mcp.Tool { mcp.Description("Byte offset from the start of body_text to begin reading (default 0). Ignored when center_at is provided."), ), mcp.WithNumber("center_at", - mcp.Description("Byte offset from the start of body_text to center the window on (e.g. char_offset from search_in_message). Takes precedence over offset."), + mcp.Description("Byte offset from the start of the selected body to center the window on. Takes precedence over offset."), ), mcp.WithNumber("max_chars", mcp.Description("Maximum body_text bytes to return (default 2000, max 4000). Values above 4000 are clamped to 4000; zero or negative values use the default."), diff --git a/internal/mcp/server_test.go b/internal/mcp/server_test.go index c69f768e..67ffb9ee 100644 --- a/internal/mcp/server_test.go +++ b/internal/mcp/server_test.go @@ -71,6 +71,7 @@ type getMessageResp struct { Subject string `json:"subject"` BodyText string `json:"body_text"` BodyHTML string `json:"body_html"` + BodyFormat string `json:"body_format"` BodyLength int `json:"body_length"` BodyReturned int `json:"body_returned"` Offset int `json:"offset"` @@ -686,11 +687,30 @@ func TestGetMessage(t *testing.T) { assert.Equal("Test Message", msg.Subject, "subject") assert.Equal("Hello world", msg.BodyText, "body_text") assert.Empty(msg.BodyHTML, "body_html stripped") + assert.Equal("text", msg.BodyFormat, "body_format") assert.Equal(11, msg.BodyLength, "body_length") assert.Equal(11, msg.BodyReturned, "body_returned") assert.False(msg.HasMore, "has_more") }) + t.Run("html-only body returns html slice", func(t *testing.T) { + assert := assertpkg.New(t) + htmlBody := "

Hello world

" + eng2 := &querytest.MockEngine{ + Messages: map[int64]*query.MessageDetail{ + 57: testutil.NewMessageDetail(57).WithBodyText("").WithBodyHTML(htmlBody).BuildPtr(), + }, + } + h2 := newTestHandlers(eng2) + msg := runTool[getMessageResp](t, "get_message", h2.getMessage, map[string]any{"id": float64(57)}) + assert.Empty(msg.BodyText, "body_text") + assert.Equal(htmlBody, msg.BodyHTML, "body_html") + assert.Equal("html", msg.BodyFormat, "body_format") + assert.Equal(len(htmlBody), msg.BodyLength, "body_length") + assert.Equal(len(htmlBody), msg.BodyReturned, "body_returned") + assert.False(msg.HasMore, "has_more") + }) + t.Run("truncates long body", func(t *testing.T) { assert := assertpkg.New(t) longBody := strings.Repeat("x", 5000) @@ -852,6 +872,15 @@ func TestGetMessage(t *testing.T) { } } +func TestGetMessageToolDescriptionDoesNotReferenceFutureTools(t *testing.T) { + tool := getMessageTool() + assertpkg.NotContains(t, tool.Description, "search_in_message") + centerAt := tool.InputSchema.Properties["center_at"] + raw, err := json.Marshal(centerAt) + requirepkg.NoError(t, err, "marshal center_at schema") + assertpkg.NotContains(t, string(raw), "search_in_message") +} + func TestGetStats_VectorDisabled(t *testing.T) { assert := assertpkg.New(t) eng := &querytest.MockEngine{ From 74b8a60afe9f5e41bda8d03df3c3d9b7310e53b8 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sat, 27 Jun 2026 21:12:29 -0500 Subject: [PATCH 3/4] fix(mcp): allow html body selection Windowed get_message should default to plain text for context safety, but mixed text/html messages still need a discoverable way to read the HTML representation. Add an explicit body_format selector so clients can page either representation without restoring unbounded full-body responses. Generated with Codex (GPT-5) Co-authored-by: Codex --- internal/mcp/handlers.go | 29 +++++++++++++++++++++++------ internal/mcp/server.go | 8 ++++++-- internal/mcp/server_test.go | 24 ++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/internal/mcp/handlers.go b/internal/mcp/handlers.go index c04bfae4..714b4631 100644 --- a/internal/mcp/handlers.go +++ b/internal/mcp/handlers.go @@ -29,6 +29,9 @@ const ( maxSearchMessagesLimit = 50 defaultSearchLimit = 20 defaultBodyChars = 2000 + bodyFormatAuto = "auto" + bodyFormatText = "text" + bodyFormatHTML = "html" // maxBodyChars caps the body slice returned by get_message regardless of what // the caller requests via max_chars. Prevents a single tool call from flooding // the context window; callers page forward using offset. @@ -776,18 +779,32 @@ func (h *handlers) getMessage(ctx context.Context, req mcp.CallToolRequest) (*mc maxChars = maxBodyChars } + requestedBodyFormat, _ := args["body_format"].(string) + if requestedBodyFormat == "" { + requestedBodyFormat = bodyFormatAuto + } + fullBody := msg.BodyText - bodyFormat := "text" - if fullBody == "" && msg.BodyHTML != "" { + bodyFormat := bodyFormatText + switch requestedBodyFormat { + case bodyFormatAuto: + if fullBody == "" && msg.BodyHTML != "" { + fullBody = msg.BodyHTML + bodyFormat = bodyFormatHTML + } + case bodyFormatText: + case bodyFormatHTML: fullBody = msg.BodyHTML - bodyFormat = "html" + bodyFormat = bodyFormatHTML + default: + return mcp.NewToolResultError("body_format must be one of auto, text, html"), nil } bodyLen := len(fullBody) var start, end int if centerAt := intArg(args, "center_at", -1); centerAt >= 0 { - // Center the window on the given byte offset (e.g. char_offset from - // search_in_message). contextWindow handles clamping to body boundaries. + // Center the window on the given byte offset. contextWindow handles + // clamping to body boundaries. start, end = contextWindow(bodyLen, centerAt, 0, maxChars) } else { start = min(intArg(args, "offset", 0), bodyLen) @@ -797,7 +814,7 @@ func (h *handlers) getMessage(ctx context.Context, req mcp.CallToolRequest) (*mc bodySlice, sliceStart, sliceEnd := bodyByteSliceRange(fullBody, start, end) bodyText := bodySlice bodyHTML := "" - if bodyFormat == "html" { + if bodyFormat == bodyFormatHTML { bodyText = "" bodyHTML = bodySlice } diff --git a/internal/mcp/server.go b/internal/mcp/server.go index c9a08e70..c0c95d88 100644 --- a/internal/mcp/server.go +++ b/internal/mcp/server.go @@ -246,13 +246,17 @@ func getMessageTool() mcp.Tool { mcp.Description("Message ID"), ), mcp.WithNumber("offset", - mcp.Description("Byte offset from the start of body_text to begin reading (default 0). Ignored when center_at is provided."), + mcp.Description("Byte offset from the start of the selected body to begin reading (default 0). Ignored when center_at is provided."), ), mcp.WithNumber("center_at", mcp.Description("Byte offset from the start of the selected body to center the window on. Takes precedence over offset."), ), mcp.WithNumber("max_chars", - mcp.Description("Maximum body_text bytes to return (default 2000, max 4000). Values above 4000 are clamped to 4000; zero or negative values use the default."), + mcp.Description("Maximum selected-body bytes to return (default 2000, max 4000). Values above 4000 are clamped to 4000; zero or negative values use the default."), + ), + mcp.WithString("body_format", + mcp.Description("Which body representation to page: auto (default, plain text when available, HTML fallback), text, or html."), + mcp.Enum(bodyFormatAuto, bodyFormatText, bodyFormatHTML), ), ) } diff --git a/internal/mcp/server_test.go b/internal/mcp/server_test.go index 67ffb9ee..95de48c7 100644 --- a/internal/mcp/server_test.go +++ b/internal/mcp/server_test.go @@ -711,6 +711,30 @@ func TestGetMessage(t *testing.T) { assert.False(msg.HasMore, "has_more") }) + t.Run("html format selects html from mixed body", func(t *testing.T) { + assert := assertpkg.New(t) + htmlBody := "

Hello HTML

" + eng2 := &querytest.MockEngine{ + Messages: map[int64]*query.MessageDetail{ + 58: testutil.NewMessageDetail(58). + WithBodyText("Hello text"). + WithBodyHTML(htmlBody). + BuildPtr(), + }, + } + h2 := newTestHandlers(eng2) + msg := runTool[getMessageResp](t, "get_message", h2.getMessage, map[string]any{ + "id": float64(58), + "body_format": "html", + }) + assert.Empty(msg.BodyText, "body_text") + assert.Equal(htmlBody, msg.BodyHTML, "body_html") + assert.Equal("html", msg.BodyFormat, "body_format") + assert.Equal(len(htmlBody), msg.BodyLength, "body_length") + assert.Equal(len(htmlBody), msg.BodyReturned, "body_returned") + assert.False(msg.HasMore, "has_more") + }) + t.Run("truncates long body", func(t *testing.T) { assert := assertpkg.New(t) longBody := strings.Repeat("x", 5000) From 3a4f0a3bba556f7ec17d2be956f790d1186ed4bc Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sun, 28 Jun 2026 09:06:16 -0500 Subject: [PATCH 4/4] fix(mcp): add explicit full body mode Windowed get_message defaults protect MCP context, but reading a whole long message through repeated pages reloads the same hydrated body each time. Add an explicit full_body escape hatch so clients can intentionally trade context budget for one full selected-body response until backend-side body slicing exists. Generated with Codex (GPT-5) Co-authored-by: Codex --- internal/mcp/handlers.go | 5 ++++- internal/mcp/server.go | 3 +++ internal/mcp/server_test.go | 24 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/internal/mcp/handlers.go b/internal/mcp/handlers.go index 714b4631..6a893e4c 100644 --- a/internal/mcp/handlers.go +++ b/internal/mcp/handlers.go @@ -802,7 +802,10 @@ func (h *handlers) getMessage(ctx context.Context, req mcp.CallToolRequest) (*mc bodyLen := len(fullBody) var start, end int - if centerAt := intArg(args, "center_at", -1); centerAt >= 0 { + fullBodyRequested, _ := args["full_body"].(bool) + if fullBodyRequested { + start, end = 0, bodyLen + } else if centerAt := intArg(args, "center_at", -1); centerAt >= 0 { // Center the window on the given byte offset. contextWindow handles // clamping to body boundaries. start, end = contextWindow(bodyLen, centerAt, 0, maxChars) diff --git a/internal/mcp/server.go b/internal/mcp/server.go index c0c95d88..7d4dc720 100644 --- a/internal/mcp/server.go +++ b/internal/mcp/server.go @@ -258,6 +258,9 @@ func getMessageTool() mcp.Tool { mcp.Description("Which body representation to page: auto (default, plain text when available, HTML fallback), text, or html."), mcp.Enum(bodyFormatAuto, bodyFormatText, bodyFormatHTML), ), + mcp.WithBoolean("full_body", + mcp.Description("Return the complete selected body in one response, ignoring offset, center_at, and max_chars. Use only when the full content is explicitly needed."), + ), ) } diff --git a/internal/mcp/server_test.go b/internal/mcp/server_test.go index 95de48c7..0e9ab0a3 100644 --- a/internal/mcp/server_test.go +++ b/internal/mcp/server_test.go @@ -751,6 +751,30 @@ func TestGetMessage(t *testing.T) { assert.True(msg.HasMore, "has_more") }) + t.Run("full_body returns complete selected body", func(t *testing.T) { + assert := assertpkg.New(t) + longBody := strings.Repeat("x", 5000) + eng2 := &querytest.MockEngine{ + Messages: map[int64]*query.MessageDetail{ + 59: testutil.NewMessageDetail(59).WithBodyText(longBody).BuildPtr(), + }, + } + h2 := newTestHandlers(eng2) + msg := runTool[getMessageResp](t, "get_message", h2.getMessage, map[string]any{ + "id": float64(59), + "full_body": true, + "max_chars": float64(10), + "offset": float64(2000), + "center_at": float64(3000), + }) + assert.Equal(longBody, msg.BodyText, "body_text") + assert.Equal("text", msg.BodyFormat, "body_format") + assert.Equal(5000, msg.BodyLength, "body_length") + assert.Equal(5000, msg.BodyReturned, "body_returned") + assert.Equal(0, msg.Offset, "offset") + assert.False(msg.HasMore, "has_more") + }) + t.Run("offset pagination", func(t *testing.T) { assert := assertpkg.New(t) body := strings.Repeat("a", 3000)