Skip to content

Commit e069ef4

Browse files
authored
feat: session affinity + prompt cache for multi-turn conversations (#242)
* feat: session affinity for multi-turn conversations Route subsequent turns of a conversation to the same account that created the initial response. This fixes two issues caused by account rotation breaking conversation chains: 1. previous_response_id becoming invalid across accounts — the backend stores conversation state per-account, so switching accounts meant losing server-side history 2. Prompt cache misses — cache is per-account on the backend, rotating accounts forced full context reprocessing every turn Implementation: - SessionAffinityMap: responseId → entryId mapping with 4h TTL - acquire() accepts preferredEntryId hint, falls back to normal rotation - proxy-handler captures responseId from both streaming and non-streaming paths (onResponseId callback was previously discarded) - Request logs now show affinity=hit/miss, payload size, and usage stats * feat: prompt_cache_key + missing WebSocket fields Send prompt_cache_key (per-conversation UUID) in every request to enable backend prompt caching. The conversation ID is inherited across the previous_response_id chain via SessionAffinityMap. Also: - Forward service_tier on both WebSocket and HTTP paths (was dropped) - Send include: ["reasoning.encrypted_content"] when reasoning is active - Extend SessionAffinityMap with conversationId tracking --------- Co-authored-by: icebear0828 <icebear0828@users.noreply.github.com>
1 parent fa2d01f commit e069ef4

12 files changed

Lines changed: 269 additions & 14 deletions

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,17 @@
88

99
### Added
1010

11+
- Session affinity:同一对话链路由到同一账号,修复 `previous_response_id` 跨账号失效问题
12+
- `prompt_cache_key`:每个对话链生成唯一 UUID 传递给后端,启用 prompt cache
13+
- WebSocket 请求新增 `include: ["reasoning.encrypted_content"]`(reasoning 开启时自动设置)
14+
- 请求级监控日志:affinity hit/miss、payload 大小、usage 统计、大 payload 告警
1115
- E2E 测试:proxy-routes(36 cases)、dashboard-auth(9)、batch-label(11)、admin-general(11)、debug-routes(5)—— 覆盖率从 51% 提升至 ~75%
1216
- 单元测试:config-loader(16 cases)、config-schema(10)、codex-models(9)
1317
- account-import service 测试补充 RT rotation/fallback 2 cases
1418

1519
### Fixed
1620

21+
- 修复 `service_tier` 在 WebSocket 和 HTTP 两条路径均被丢弃的 bug — 现在正确转发给后端
1722
- 修复 `PUT /api/proxies/settings``PUT /api/proxies/:id` 路由参数 shadow 的 bug(Hono 按注册顺序匹配)
1823

1924
### Changed
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import { describe, it, expect, afterEach } from "vitest";
2+
import { SessionAffinityMap } from "../session-affinity.js";
3+
4+
describe("SessionAffinityMap", () => {
5+
let map: SessionAffinityMap;
6+
7+
afterEach(() => {
8+
map?.dispose();
9+
});
10+
11+
it("records and looks up a mapping", () => {
12+
map = new SessionAffinityMap();
13+
map.record("resp_abc", "entry_123", "conv_1");
14+
expect(map.lookup("resp_abc")).toBe("entry_123");
15+
});
16+
17+
it("returns null for unknown response IDs", () => {
18+
map = new SessionAffinityMap();
19+
expect(map.lookup("resp_unknown")).toBeNull();
20+
});
21+
22+
it("overwrites previous mapping for same response ID", () => {
23+
map = new SessionAffinityMap();
24+
map.record("resp_abc", "entry_1", "conv_1");
25+
map.record("resp_abc", "entry_2", "conv_2");
26+
expect(map.lookup("resp_abc")).toBe("entry_2");
27+
});
28+
29+
it("expires entries after TTL", () => {
30+
map = new SessionAffinityMap(50); // 50ms TTL
31+
map.record("resp_abc", "entry_123", "conv_1");
32+
expect(map.lookup("resp_abc")).toBe("entry_123");
33+
34+
const start = Date.now();
35+
while (Date.now() - start < 60) {
36+
// busy wait
37+
}
38+
expect(map.lookup("resp_abc")).toBeNull();
39+
});
40+
41+
it("tracks size correctly", () => {
42+
map = new SessionAffinityMap();
43+
expect(map.size).toBe(0);
44+
map.record("resp_1", "entry_1", "conv_1");
45+
map.record("resp_2", "entry_2", "conv_2");
46+
expect(map.size).toBe(2);
47+
});
48+
49+
it("cleans up on dispose", () => {
50+
map = new SessionAffinityMap();
51+
map.record("resp_1", "entry_1", "conv_1");
52+
map.dispose();
53+
expect(map.size).toBe(0);
54+
});
55+
56+
// Conversation ID tracking
57+
it("looks up conversation ID for a response", () => {
58+
map = new SessionAffinityMap();
59+
map.record("resp_abc", "entry_1", "conv_xyz");
60+
expect(map.lookupConversationId("resp_abc")).toBe("conv_xyz");
61+
});
62+
63+
it("returns null conversation ID for unknown response", () => {
64+
map = new SessionAffinityMap();
65+
expect(map.lookupConversationId("resp_unknown")).toBeNull();
66+
});
67+
68+
it("conversation ID is inherited across response chain", () => {
69+
map = new SessionAffinityMap();
70+
// Turn 1: new conversation
71+
map.record("resp_1", "entry_1", "conv_abc");
72+
// Turn 2: inherit conv ID from turn 1
73+
const convId = map.lookupConversationId("resp_1");
74+
expect(convId).toBe("conv_abc");
75+
map.record("resp_2", "entry_1", convId!);
76+
// Turn 3: inherit from turn 2
77+
expect(map.lookupConversationId("resp_2")).toBe("conv_abc");
78+
});
79+
80+
it("expires conversation ID along with entry", () => {
81+
map = new SessionAffinityMap(50);
82+
map.record("resp_abc", "entry_1", "conv_1");
83+
84+
const start = Date.now();
85+
while (Date.now() - start < 60) {
86+
// busy wait
87+
}
88+
expect(map.lookupConversationId("resp_abc")).toBeNull();
89+
});
90+
});

src/auth/account-lifecycle.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ export class AccountLifecycle {
4646
if (slots.length === 0) this.acquireLocks.delete(entryId);
4747
}
4848

49-
acquire(options?: { model?: string; excludeIds?: string[] }): AcquiredAccount | null {
49+
acquire(options?: { model?: string; excludeIds?: string[]; preferredEntryId?: string }): AcquiredAccount | null {
5050
const nowMs = Date.now();
5151
const now = new Date(nowMs);
5252

@@ -100,7 +100,14 @@ export class AccountLifecycle {
100100
}
101101
}
102102

103-
const selected = this.strategy.select(candidates, this.rotationState);
103+
// Session affinity: prefer the account that owns the conversation
104+
let selected: AccountEntry;
105+
if (options?.preferredEntryId) {
106+
const preferred = candidates.find((a) => a.id === options.preferredEntryId);
107+
selected = preferred ?? this.strategy.select(candidates, this.rotationState);
108+
} else {
109+
selected = this.strategy.select(candidates, this.rotationState);
110+
}
104111
const prevSlots = this.acquireLocks.get(selected.id);
105112
const prevSlotMs = prevSlots?.[prevSlots.length - 1] ?? null;
106113
this.pushSlot(selected.id);

src/auth/account-pool.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ export class AccountPool {
6464

6565
// ── Lifecycle (acquire/release) ───────────────────────────────────
6666

67-
acquire(options?: { model?: string; excludeIds?: string[] }): AcquiredAccount | null {
67+
acquire(options?: { model?: string; excludeIds?: string[]; preferredEntryId?: string }): AcquiredAccount | null {
6868
return this.lifecycle.acquire(options);
6969
}
7070

src/auth/session-affinity.ts

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/**
2+
* Session affinity — maps Codex response IDs to account entry IDs.
3+
*
4+
* When a request includes `previous_response_id`, the proxy looks up which
5+
* account created that response and routes to the same account. This enables:
6+
* - Server-side conversation history reuse (previous_response_id chain)
7+
* - Prompt cache hits (cache is per-account on the backend)
8+
*/
9+
10+
interface AffinityEntry {
11+
entryId: string;
12+
conversationId: string;
13+
createdAt: number;
14+
}
15+
16+
const DEFAULT_TTL_MS = 4 * 60 * 60 * 1000; // 4 hours
17+
const CLEANUP_INTERVAL_MS = 10 * 60 * 1000; // 10 minutes
18+
19+
export class SessionAffinityMap {
20+
private map = new Map<string, AffinityEntry>();
21+
private ttlMs: number;
22+
private cleanupTimer: ReturnType<typeof setInterval> | null = null;
23+
24+
constructor(ttlMs: number = DEFAULT_TTL_MS) {
25+
this.ttlMs = ttlMs;
26+
this.cleanupTimer = setInterval(() => this.cleanup(), CLEANUP_INTERVAL_MS);
27+
}
28+
29+
/** Record that a response was created by a specific account in a conversation. */
30+
record(responseId: string, entryId: string, conversationId: string): void {
31+
this.map.set(responseId, { entryId, conversationId, createdAt: Date.now() });
32+
}
33+
34+
/** Look up which account created a given response. */
35+
lookup(responseId: string): string | null {
36+
const entry = this.getEntry(responseId);
37+
return entry?.entryId ?? null;
38+
}
39+
40+
/** Look up the conversation ID for a given response. */
41+
lookupConversationId(responseId: string): string | null {
42+
const entry = this.getEntry(responseId);
43+
return entry?.conversationId ?? null;
44+
}
45+
46+
private getEntry(responseId: string): AffinityEntry | null {
47+
const entry = this.map.get(responseId);
48+
if (!entry) return null;
49+
if (Date.now() - entry.createdAt > this.ttlMs) {
50+
this.map.delete(responseId);
51+
return null;
52+
}
53+
return entry;
54+
}
55+
56+
/** Remove expired entries. */
57+
private cleanup(): void {
58+
const now = Date.now();
59+
for (const [key, entry] of this.map) {
60+
if (now - entry.createdAt > this.ttlMs) {
61+
this.map.delete(key);
62+
}
63+
}
64+
}
65+
66+
get size(): number {
67+
return this.map.size;
68+
}
69+
70+
dispose(): void {
71+
if (this.cleanupTimer) {
72+
clearInterval(this.cleanupTimer);
73+
this.cleanupTimer = null;
74+
}
75+
this.map.clear();
76+
}
77+
}
78+
79+
/** Singleton instance. */
80+
let instance: SessionAffinityMap | null = null;
81+
82+
export function getSessionAffinityMap(): SessionAffinityMap {
83+
if (!instance) {
84+
instance = new SessionAffinityMap();
85+
}
86+
return instance;
87+
}

src/proxy/codex-api.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,9 @@ export class CodexApi {
210210
if (request.tools?.length) wsRequest.tools = request.tools;
211211
if (request.tool_choice) wsRequest.tool_choice = request.tool_choice;
212212
if (request.text) wsRequest.text = request.text;
213+
if (request.service_tier) wsRequest.service_tier = request.service_tier;
214+
if (request.prompt_cache_key) wsRequest.prompt_cache_key = request.prompt_cache_key;
215+
if (request.include?.length) wsRequest.include = request.include;
213216

214217
return createWebSocketResponse(wsUrl, headers, wsRequest, signal, this.proxyUrl);
215218
}
@@ -233,7 +236,7 @@ export class CodexApi {
233236
headers["Accept"] = "text/event-stream";
234237
headers["OpenAI-Beta"] = "responses_websockets=2026-02-06";
235238

236-
const { service_tier: _st, previous_response_id: _pid, useWebSocket: _ws, ...bodyFields } = request;
239+
const { previous_response_id: _pid, useWebSocket: _ws, ...bodyFields } = request;
237240
const body = JSON.stringify(bodyFields);
238241

239242
let transportRes;

src/proxy/codex-types.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ export interface CodexResponsesRequest {
2828
};
2929
/** Optional: reference a previous response for multi-turn (WebSocket only). */
3030
previous_response_id?: string;
31+
/** Prompt cache key — stable per-conversation UUID for backend prompt caching. */
32+
prompt_cache_key?: string;
33+
/** Include additional response data (e.g. "reasoning.encrypted_content"). */
34+
include?: string[];
3135
/** When true, use WebSocket transport (enables previous_response_id and server-side storage). */
3236
useWebSocket?: boolean;
3337
}

src/proxy/ws-transport.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ export interface WsCreateRequest {
4949
strict?: boolean;
5050
};
5151
};
52+
service_tier?: string | null;
53+
prompt_cache_key?: string;
54+
include?: string[];
5255
// NOTE: `store` and `stream` are intentionally omitted.
5356
// The backend defaults to storing via WebSocket and always streams.
5457
}

src/routes/shared/__tests__/account-acquisition.test.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,18 @@ describe("acquireAccount", () => {
2828

2929
const result = acquireAccount(pool as never, "gpt-5.4", ["x1"], "OpenAI");
3030

31-
expect(pool.acquire).toHaveBeenCalledWith({ model: "gpt-5.4", excludeIds: ["x1"] });
31+
expect(pool.acquire).toHaveBeenCalledWith({ model: "gpt-5.4", excludeIds: ["x1"], preferredEntryId: undefined });
3232
expect(result).toEqual({ entryId: "e1", token: "t1", accountId: "a1" });
3333
});
3434

35+
it("passes preferredEntryId for session affinity", () => {
36+
pool.acquire.mockReturnValue({ entryId: "e1", token: "t1", accountId: "a1" });
37+
38+
acquireAccount(pool as never, "gpt-5.4", undefined, "OpenAI", "e1");
39+
40+
expect(pool.acquire).toHaveBeenCalledWith({ model: "gpt-5.4", excludeIds: undefined, preferredEntryId: "e1" });
41+
});
42+
3543
it("returns null when pool has no available account", () => {
3644
pool.acquire.mockReturnValue(null);
3745

@@ -45,7 +53,7 @@ describe("acquireAccount", () => {
4553

4654
acquireAccount(pool as never, "gpt-5.4", undefined, "OpenAI");
4755

48-
expect(pool.acquire).toHaveBeenCalledWith({ model: "gpt-5.4", excludeIds: undefined });
56+
expect(pool.acquire).toHaveBeenCalledWith({ model: "gpt-5.4", excludeIds: undefined, preferredEntryId: undefined });
4957
});
5058
});
5159

src/routes/shared/account-acquisition.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ export function acquireAccount(
1717
model: string,
1818
excludeIds?: string[],
1919
tag?: string,
20+
preferredEntryId?: string,
2021
): AcquiredAccount | null {
21-
const acquired = pool.acquire({ model, excludeIds });
22+
const acquired = pool.acquire({ model, excludeIds, preferredEntryId });
2223
if (!acquired && tag) {
2324
console.warn(`[${tag}] No available account for model "${model}"`);
2425
}

0 commit comments

Comments
 (0)