From 0d0a26360cdf559a59b1af4e5b839e3614210fc8 Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Thu, 18 Jun 2026 20:43:24 -0700 Subject: [PATCH] feat: attribute outbound Databricks requests to AppKit consistently Only the service and user WorkspaceClients carried the AppKit User-Agent (@databricks/appkit/). The cache client sent unknown/0.0.0, and the two raw-fetch sites (files upload, MCP connector) sent no AppKit User-Agent at all, since they bypass the SDK apiClient. Extract getClientOptions() into a shared context/client-options module (adding APPKIT_USER_AGENT for fetch sites with no apiClient), pass it to the cache client, and stamp the User-Agent on the files upload fetch (via apiClient.userAgent()) and both MCP fetches. Signed-off-by: MarioCadenas --- packages/appkit/src/cache/index.ts | 3 +- .../appkit/src/connectors/files/client.ts | 3 ++ .../src/connectors/files/tests/client.test.ts | 12 +++++++ packages/appkit/src/connectors/mcp/client.ts | 7 ++++ .../src/connectors/mcp/tests/client.test.ts | 4 +++ packages/appkit/src/context/client-options.ts | 34 +++++++++++++++++++ .../appkit/src/context/service-context.ts | 19 +---------- 7 files changed, 63 insertions(+), 19 deletions(-) create mode 100644 packages/appkit/src/context/client-options.ts diff --git a/packages/appkit/src/cache/index.ts b/packages/appkit/src/cache/index.ts index 7a948a822..641b59fac 100644 --- a/packages/appkit/src/cache/index.ts +++ b/packages/appkit/src/cache/index.ts @@ -2,6 +2,7 @@ import { createHash } from "node:crypto"; import { ApiError, WorkspaceClient } from "@databricks/sdk-experimental"; import type { CacheConfig, CacheEntry, CacheStorage } from "shared"; import { createLakebasePool } from "../connectors/lakebase"; +import { getClientOptions } from "../context/client-options"; import { AppKitError, ExecutionError, InitializationError } from "../errors"; import { createLogger } from "../logging/logger"; import type { Counter, TelemetryProvider } from "../telemetry"; @@ -170,7 +171,7 @@ export class CacheManager { // try to use lakebase storage try { - const workspaceClient = new WorkspaceClient({}); + const workspaceClient = new WorkspaceClient({}, getClientOptions()); const pool = createLakebasePool({ workspaceClient }); const persistentStorage = new PersistentStorage(config, pool); diff --git a/packages/appkit/src/connectors/files/client.ts b/packages/appkit/src/connectors/files/client.ts index 93203fdb6..78b3b2de3 100644 --- a/packages/appkit/src/connectors/files/client.ts +++ b/packages/appkit/src/connectors/files/client.ts @@ -311,6 +311,9 @@ export class FilesConnector { const headers = new Headers({ "Content-Type": "application/octet-stream", + // This raw fetch bypasses apiClient, which would otherwise stamp the + // User-Agent; set it explicitly so the upload is attributed to AppKit. + "User-Agent": client.apiClient.userAgent(), }); const fetchOptions: RequestInit = { method: "PUT", headers, body }; diff --git a/packages/appkit/src/connectors/files/tests/client.test.ts b/packages/appkit/src/connectors/files/tests/client.test.ts index e7a4264c3..560764703 100644 --- a/packages/appkit/src/connectors/files/tests/client.test.ts +++ b/packages/appkit/src/connectors/files/tests/client.test.ts @@ -20,9 +20,13 @@ const { mockFilesApi, mockConfig, mockClient, MockApiError } = vi.hoisted( authenticate: vi.fn(), }; + const mockApiClient = { + userAgent: vi.fn(() => "@databricks/appkit/9.9.9"), + }; const mockClient = { files: mockFilesApi, config: mockConfig, + apiClient: mockApiClient, } as unknown as WorkspaceClient; class MockApiError extends Error { @@ -538,6 +542,14 @@ describe("FilesConnector", () => { expect(mockConfig.authenticate).toHaveBeenCalledWith(expect.any(Headers)); }); + test("stamps the AppKit User-Agent from the SDK apiClient", async () => { + await connector.upload(mockClient, "file.txt", "data"); + + const init = fetchSpy.mock.calls[0][1] as RequestInit; + const headers = init.headers as Headers; + expect(headers.get("User-Agent")).toBe("@databricks/appkit/9.9.9"); + }); + test("builds URL from client.config.host", async () => { await connector.upload(mockClient, "file.txt", "data"); diff --git a/packages/appkit/src/connectors/mcp/client.ts b/packages/appkit/src/connectors/mcp/client.ts index 5b80997c9..75baf531c 100644 --- a/packages/appkit/src/connectors/mcp/client.ts +++ b/packages/appkit/src/connectors/mcp/client.ts @@ -23,6 +23,7 @@ * transport. */ import type { AgentToolDefinition } from "shared"; +import { APPKIT_USER_AGENT } from "../../context/client-options"; import { createLogger } from "../../logging/logger"; import { assertResolvedHostSafe, @@ -423,6 +424,9 @@ export class AppKitMcpClient { const authHeaders = await this.resolveAuthHeaders(options); const headers: Record = { + // Raw fetch bypasses the SDK's apiClient; stamp the AppKit User-Agent so + // MCP traffic is attributed to AppKit. + "User-Agent": APPKIT_USER_AGENT, "Content-Type": "application/json", Accept: "application/json, text/event-stream", ...authHeaders, @@ -497,6 +501,9 @@ export class AppKitMcpClient { const authHeaders = await this.resolveAuthHeaders(options); const headers: Record = { + // Raw fetch bypasses the SDK's apiClient; stamp the AppKit User-Agent so + // MCP traffic is attributed to AppKit. + "User-Agent": APPKIT_USER_AGENT, "Content-Type": "application/json", Accept: "application/json, text/event-stream", ...authHeaders, diff --git a/packages/appkit/src/connectors/mcp/tests/client.test.ts b/packages/appkit/src/connectors/mcp/tests/client.test.ts index 839d14f95..b6e1dd849 100644 --- a/packages/appkit/src/connectors/mcp/tests/client.test.ts +++ b/packages/appkit/src/connectors/mcp/tests/client.test.ts @@ -1,4 +1,5 @@ import { beforeEach, describe, expect, test, vi } from "vitest"; +import { APPKIT_USER_AGENT } from "../../../context/client-options"; import { AppKitMcpClient } from "../client"; import type { DnsLookup, McpHostPolicy } from "../host-policy"; @@ -143,7 +144,10 @@ describe("AppKitMcpClient — host allowlist", () => { for (const call of calls) { const headers = call.init.headers as Record; expect(headers.Authorization).toBe("Bearer SP-TOKEN"); + // Every MCP request is attributed to AppKit via User-Agent. + expect(headers["User-Agent"]).toBe(APPKIT_USER_AGENT); } + expect(APPKIT_USER_AGENT).toMatch(/^@databricks\/appkit\//); expect(client.canForwardWorkspaceAuth("genie-1")).toBe(true); }); diff --git a/packages/appkit/src/context/client-options.ts b/packages/appkit/src/context/client-options.ts new file mode 100644 index 000000000..0e41daa25 --- /dev/null +++ b/packages/appkit/src/context/client-options.ts @@ -0,0 +1,34 @@ +import type { ClientOptions } from "@databricks/sdk-experimental"; +import { coerce } from "semver"; +import { + name as productName, + version as productVersion, +} from "../../package.json"; + +/** + * SDK client options that stamp every `apiClient.request()` with an AppKit + * User-Agent (`@databricks/appkit/`), so outbound Databricks traffic + * is attributable to AppKit. Use this for every `WorkspaceClient` AppKit + * constructs at runtime. + */ +export function getClientOptions(): ClientOptions { + const isDev = process.env.NODE_ENV === "development"; + const semver = coerce(productVersion); + const normalizedVersion = (semver?.version ?? + productVersion) as ClientOptions["productVersion"]; + + return { + product: productName, + productVersion: normalizedVersion, + ...(isDev && { userAgentExtra: { mode: "dev" } }), + }; +} + +/** + * Product/version User-Agent string matching the SDK stamp, for raw `fetch` + * call sites that bypass the SDK's `apiClient` and have no client to derive it + * from (e.g. the MCP connector). + */ +export const APPKIT_USER_AGENT = `${productName}/${ + coerce(productVersion)?.version ?? productVersion +}`; diff --git a/packages/appkit/src/context/service-context.ts b/packages/appkit/src/context/service-context.ts index fa2f9c3ef..b3c13f422 100644 --- a/packages/appkit/src/context/service-context.ts +++ b/packages/appkit/src/context/service-context.ts @@ -5,16 +5,12 @@ import { type sql, WorkspaceClient, } from "@databricks/sdk-experimental"; -import { coerce } from "semver"; -import { - name as productName, - version as productVersion, -} from "../../package.json"; import { AuthenticationError, ConfigurationError, InitializationError, } from "../errors"; +import { getClientOptions } from "./client-options"; import type { UserContext } from "./user-context"; /** @@ -32,19 +28,6 @@ export interface ServiceContextState { workspaceId: Promise; } -function getClientOptions(): ClientOptions { - const isDev = process.env.NODE_ENV === "development"; - const semver = coerce(productVersion); - const normalizedVersion = (semver?.version ?? - productVersion) as ClientOptions["productVersion"]; - - return { - product: productName, - productVersion: normalizedVersion, - ...(isDev && { userAgentExtra: { mode: "dev" } }), - }; -} - /** * ServiceContext is a singleton that manages the service principal's * WorkspaceClient and shared resources like warehouse/workspace IDs.