From 24aed9ccd3b68396643d1bd28b048d7c95e20e23 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Thu, 23 Apr 2026 15:03:03 +0200 Subject: [PATCH 01/13] feat(files): policy-gated SP fallback on HTTP routes (phase 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 of 2 — remove pre-policy 401 on missing x-forwarded-user; let the volume policy decide via { id: , isServicePrincipal: true }. asUser(req) keeps strict throw semantics; single logger.debug on fallback replaces the dev-mode logger.warn. Startup no-explicit-policy warning broadened to mention header-less HTTP. Tests rewritten to codify new contract. Two pre-existing auto-generated files reformatted to match biome. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- packages/appkit/src/plugins/files/plugin.ts | 31 +-- .../files/tests/plugin.integration.test.ts | 116 ++++++++- .../src/plugins/files/tests/plugin.test.ts | 227 ++++++++++++++---- 3 files changed, 312 insertions(+), 62 deletions(-) diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts index 75f2e14db..282390765 100644 --- a/packages/appkit/src/plugins/files/plugin.ts +++ b/packages/appkit/src/plugins/files/plugin.ts @@ -100,19 +100,12 @@ export class FilesPlugin extends Plugin { } /** - * Extract user identity from the request. - * Falls back to `getCurrentUserId()` in development mode. + * Strict extraction for `VolumeHandle.asUser(req)` — throws when the header + * is missing. HTTP routes use an inline silent fallback instead. */ private _extractUser(req: express.Request): FilePolicyUser { const userId = req.header("x-forwarded-user")?.trim(); if (userId) return { id: userId }; - if (process.env.NODE_ENV === "development") { - logger.warn( - "No x-forwarded-user header — falling back to service principal identity for policy checks. " + - "Ensure your proxy forwards user headers to test per-user policies.", - ); - return { id: getCurrentUserId() }; - } throw AuthenticationError.missingToken( "Missing x-forwarded-user header. Cannot resolve user ID.", ); @@ -152,7 +145,8 @@ export class FilesPlugin extends Plugin { /** * HTTP-level wrapper around `_checkPolicy`. - * Extracts user (401 on failure), runs policy (403 on denial). + * Resolves the user inline (header when present, otherwise the SP identity), + * then runs the volume policy (403 on denial, 500 on unexpected error). * Returns `true` if the request may proceed, `false` if a response was sent. */ private async _enforcePolicy( @@ -163,15 +157,15 @@ export class FilesPlugin extends Plugin { path: string, resourceOverrides?: Partial, ): Promise { + const headerUserId = req.header("x-forwarded-user")?.trim(); let user: FilePolicyUser; - try { - user = this._extractUser(req); - } catch (error) { - if (error instanceof AuthenticationError) { - res.status(401).json({ error: error.message, plugin: this.name }); - return false; - } - throw error; + if (headerUserId) { + user = { id: headerUserId }; + } else { + logger.debug( + "No x-forwarded-user header — proceeding with service principal identity for policy evaluation.", + ); + user = { id: getCurrentUserId(), isServicePrincipal: true }; } try { @@ -231,6 +225,7 @@ export class FilesPlugin extends Plugin { if (!volumes[key].policy) { logger.warn( 'Volume "%s" has no explicit policy — defaulting to publicRead(). ' + + "This also matches header-less HTTP requests (which run as the service principal). " + "Set a policy in files({ volumes: { %s: { policy: ... } } }) to silence this warning.", key, key, diff --git a/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts b/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts index da90760db..3c836f9b8 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts @@ -439,15 +439,125 @@ describe("Files Plugin Integration", () => { }); describe("Service principal execution", () => { - test("requests without user token return 401 (policy requires user identity)", async () => { + test("header-less request + default publicRead() + list → 200 (policy decides)", async () => { + mockFilesApi.listDirectoryContents.mockReturnValue( + (async function* () { + yield { + name: "sp-file.txt", + path: "/Volumes/catalog/schema/vol/sp-file.txt", + is_directory: false, + }; + })(), + ); + // Use a unique path to avoid cached results from earlier tests const response = await fetch( `${baseUrl}/api/files/${VOL}/list?path=sp-only`, ); - expect(response.status).toBe(401); + expect(response.status).toBe(200); + }); + + test("header-less request + default publicRead() + upload → 403", async () => { + const response = await fetch( + `${baseUrl}/api/files/${VOL}/upload?path=/Volumes/catalog/schema/vol/sp-upload.bin`, + { + method: "POST", + headers: { "content-length": "0" }, + }, + ); + + expect(response.status).toBe(403); const data = (await response.json()) as { error: string }; - expect(data.error).toMatch(/x-forwarded-user/); + expect(data.error).toMatch(/Policy denied/); + }); + + test("header-less request + denyAll() volume → 403", async () => { + const denySpy = vi.fn().mockReturnValue(false); + const appkit = await createApp({ + plugins: [ + serverPlugin({ + port: TEST_PORT + 1, + host: "127.0.0.1", + autoStart: false, + }), + files({ + volumes: { + files: { policy: denySpy }, + }, + }), + ], + }); + + try { + await appkit.server.start(); + const localBase = `http://127.0.0.1:${TEST_PORT + 1}`; + + const response = await fetch( + `${localBase}/api/files/${VOL}/list?path=denied`, + ); + + expect(response.status).toBe(403); + expect(denySpy).toHaveBeenCalled(); + const userArg = denySpy.mock.calls[0][2]; + expect(userArg.isServicePrincipal).toBe(true); + } finally { + const srv = appkit.server.getServer(); + if (srv) { + await new Promise((resolve, reject) => { + srv.close((err) => (err ? reject(err) : resolve())); + }); + } + } + }); + + test("header-less HTTP request → custom policy observes isServicePrincipal: true", async () => { + const policySpy = vi.fn().mockReturnValue(true); + const appkit = await createApp({ + plugins: [ + serverPlugin({ + port: TEST_PORT + 2, + host: "127.0.0.1", + autoStart: false, + }), + files({ + volumes: { + files: { policy: policySpy }, + }, + }), + ], + }); + + try { + await appkit.server.start(); + const localBase = `http://127.0.0.1:${TEST_PORT + 2}`; + + mockFilesApi.listDirectoryContents.mockReturnValue( + (async function* () { + yield { + name: "spy-file.txt", + path: "/Volumes/catalog/schema/vol/spy-file.txt", + is_directory: false, + }; + })(), + ); + + const response = await fetch( + `${localBase}/api/files/${VOL}/list?path=spy`, + ); + + expect(response.status).toBe(200); + expect(policySpy).toHaveBeenCalledTimes(1); + const userArg = policySpy.mock.calls[0][2]; + expect(userArg.isServicePrincipal).toBe(true); + } finally { + const srv = appkit.server.getServer(); + if (srv) { + await new Promise((resolve, reject) => { + srv.close((err) => (err ? reject(err) : resolve())); + }); + } + } }); test("requests with user headers also succeed", async () => { diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts index a4b9bea22..79587a1e4 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -271,40 +271,12 @@ describe("FilesPlugin", () => { } }); - test("asUser without user header in production → throws AuthenticationError", () => { - const originalEnv = process.env.NODE_ENV; - process.env.NODE_ENV = "production"; - - try { - const plugin = new FilesPlugin(VOLUMES_CONFIG); - const handle = plugin.exports()("uploads"); - const mockReq = { header: () => undefined } as any; - - expect(() => handle.asUser(mockReq)).toThrow(AuthenticationError); - } finally { - process.env.NODE_ENV = originalEnv; - } - }); - - test("asUser in dev mode returns VolumeAPI with all 9 methods", () => { - const originalEnv = process.env.NODE_ENV; - process.env.NODE_ENV = "development"; - - try { - const plugin = new FilesPlugin(VOLUMES_CONFIG); - const handle = plugin.exports()("uploads"); - const mockReq = { - header: (name: string) => - name === "x-forwarded-user" ? "test-user" : undefined, - } as any; - const api = handle.asUser(mockReq); + test("asUser without user header → throws AuthenticationError", () => { + const plugin = new FilesPlugin(VOLUMES_CONFIG); + const handle = plugin.exports()("uploads"); + const mockReq = { header: () => undefined } as any; - for (const method of volumeMethods) { - expect(typeof (api as any)[method]).toBe("function"); - } - } finally { - process.env.NODE_ENV = originalEnv; - } + expect(() => handle.asUser(mockReq)).toThrow(AuthenticationError); }); test("direct methods on handle work as service principal", () => { @@ -988,19 +960,189 @@ describe("FilesPlugin", () => { delete process.env.DATABRICKS_VOLUME_WRITEONLY; }); - test("policy volume + no user header (production) → 401", async () => { - const originalEnv = process.env.NODE_ENV; - process.env.NODE_ENV = "production"; + test("header-less HTTP + default publicRead() + read action → 200 with SP user", async () => { + const policySpy = vi.fn().mockReturnValue(true); + const spyConfig = { + volumes: { + spied: { policy: policySpy }, + uploads: {}, + exports: {}, + }, + }; + process.env.DATABRICKS_VOLUME_SPIED = "/Volumes/c/s/spied"; + + try { + const plugin = new FilesPlugin(spyConfig); + const handler = getRouteHandler(plugin, "get", "/list"); + const res = mockRes(); + + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "h.txt", path: "/h.txt", is_directory: false }; + }, + ); + + const noUserHeaders: Record = {}; + await handler( + { + params: { volumeKey: "spied" }, + query: {}, + headers: noUserHeaders, + header: (name: string) => noUserHeaders[name.toLowerCase()], + }, + res, + ); + + const statusCodes = (res.status.mock.calls as number[][]).map( + (c) => c[0], + ); + expect(statusCodes).not.toContain(401); + expect(statusCodes).not.toContain(403); + expect(policySpy).toHaveBeenCalledWith( + "list", + expect.objectContaining({ volume: "spied" }), + expect.objectContaining({ + id: "test-service-principal", + isServicePrincipal: true, + }), + ); + } finally { + delete process.env.DATABRICKS_VOLUME_SPIED; + } + }); + + test("header-less HTTP + default publicRead() + write action → 403 with SP user", async () => { + const plugin = new FilesPlugin(POLICY_CONFIG); + const handler = getRouteHandler(plugin, "post", "/upload"); + const res = mockRes(); + + const noUserHeaders: Record = { + "content-length": "100", + }; + await handler( + { + params: { volumeKey: "uploads" }, + query: { path: "/test.bin" }, + headers: noUserHeaders, + header: (name: string) => noUserHeaders[name.toLowerCase()], + }, + res, + ); + + expect(res.status).toHaveBeenCalledWith(403); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: expect.stringContaining("Policy denied"), + }), + ); + }); + + test("header-less HTTP + denyAll() → 403 with SP user observed by policy", async () => { + const policySpy = vi.fn(policy.denyAll()); + const spyConfig = { + volumes: { + denied: { policy: policySpy }, + uploads: {}, + exports: {}, + }, + }; + process.env.DATABRICKS_VOLUME_DENIED = "/Volumes/c/s/denied"; + + try { + const plugin = new FilesPlugin(spyConfig); + const handler = getRouteHandler(plugin, "get", "/list"); + const res = mockRes(); + + const noUserHeaders: Record = {}; + await handler( + { + params: { volumeKey: "denied" }, + query: {}, + headers: noUserHeaders, + header: (name: string) => noUserHeaders[name.toLowerCase()], + }, + res, + ); + + expect(res.status).toHaveBeenCalledWith(403); + expect(policySpy).toHaveBeenCalledWith( + "list", + expect.objectContaining({ volume: "denied" }), + expect.objectContaining({ isServicePrincipal: true }), + ); + } finally { + delete process.env.DATABRICKS_VOLUME_DENIED; + } + }); + + test("header-less HTTP request → policy spy observes { isServicePrincipal: true } and decision is honored", async () => { + const allowSpy = vi.fn().mockResolvedValue(true); + const allowConfig = { + volumes: { + gated: { policy: allowSpy }, + uploads: {}, + exports: {}, + }, + }; + process.env.DATABRICKS_VOLUME_GATED = "/Volumes/c/s/gated"; + + try { + const plugin = new FilesPlugin(allowConfig); + const handler = getRouteHandler(plugin, "get", "/list"); + const res = mockRes(); + + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "g.txt", path: "/g.txt", is_directory: false }; + }, + ); + + const noUserHeaders: Record = {}; + await handler( + { + params: { volumeKey: "gated" }, + query: {}, + headers: noUserHeaders, + header: (name: string) => noUserHeaders[name.toLowerCase()], + }, + res, + ); + + expect(allowSpy).toHaveBeenCalledTimes(1); + const userArg = allowSpy.mock.calls[0][2]; + expect(userArg.isServicePrincipal).toBe(true); + expect(userArg.id).toBe("test-service-principal"); + + const statusCodes = (res.status.mock.calls as number[][]).map( + (c) => c[0], + ); + expect(statusCodes).not.toContain(401); + expect(statusCodes).not.toContain(403); + } finally { + delete process.env.DATABRICKS_VOLUME_GATED; + } + }); + + test("header-less HTTP request + policy returns false → 403 (decision honored)", async () => { + const denySpy = vi.fn().mockResolvedValue(false); + const denyConfig = { + volumes: { + gated: { policy: denySpy }, + uploads: {}, + exports: {}, + }, + }; + process.env.DATABRICKS_VOLUME_GATED = "/Volumes/c/s/gated"; + try { - const plugin = new FilesPlugin(POLICY_CONFIG); + const plugin = new FilesPlugin(denyConfig); const handler = getRouteHandler(plugin, "get", "/list"); const res = mockRes(); - // Override both headers to undefined so _extractUser has no user const noUserHeaders: Record = {}; await handler( { - params: { volumeKey: "public" }, + params: { volumeKey: "gated" }, query: {}, headers: noUserHeaders, header: (name: string) => noUserHeaders[name.toLowerCase()], @@ -1008,9 +1150,12 @@ describe("FilesPlugin", () => { res, ); - expect(res.status).toHaveBeenCalledWith(401); + expect(denySpy).toHaveBeenCalledTimes(1); + const userArg = denySpy.mock.calls[0][2]; + expect(userArg.isServicePrincipal).toBe(true); + expect(res.status).toHaveBeenCalledWith(403); } finally { - process.env.NODE_ENV = originalEnv; + delete process.env.DATABRICKS_VOLUME_GATED; } }); From 631abc99e28f1044ef906edcab5c7b948a8bbff3 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Thu, 23 Apr 2026 15:11:19 +0200 Subject: [PATCH 02/13] feat(appkit): files plugin policy docs and JSDoc (phase 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 of 2 — non-behavioral polish. JSDoc on FilePolicyUser.id / isServicePrincipal broadened to describe header-less HTTP as a valid SP call origin. VolumeHandle JSDoc notes asUser(req) throws AuthenticationError.missingToken regardless of NODE_ENV. Files-plugin docs paragraphs that implied x-forwarded-user was mandatory have been reworded. Auto-regenerated typedoc for FilePolicyUser included. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- .../api/appkit/Interface.FilePolicyUser.md | 10 ++++++++- docs/docs/plugins/files.md | 21 +++++++++++++------ packages/appkit/src/plugins/files/policy.ts | 13 +++++++++++- packages/appkit/src/plugins/files/types.ts | 4 +++- 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/docs/docs/api/appkit/Interface.FilePolicyUser.md b/docs/docs/api/appkit/Interface.FilePolicyUser.md index 0c4bae2fc..7be87fce2 100644 --- a/docs/docs/api/appkit/Interface.FilePolicyUser.md +++ b/docs/docs/api/appkit/Interface.FilePolicyUser.md @@ -10,6 +10,11 @@ Minimal user identity passed to the policy function. id: string; ``` +Identifier of the requesting caller. For end-user HTTP requests this is +the value of the `x-forwarded-user` header; for direct SDK calls and +header-less HTTP requests (which run as the service principal), this is +the service principal's ID. + *** ### isServicePrincipal? @@ -18,4 +23,7 @@ id: string; optional isServicePrincipal: boolean; ``` -`true` when the caller is the service principal (direct SDK call, not `asUser`). +`true` when the call is executing as the service principal — either a +direct SDK call (`appKit.files(...)`) or an HTTP request that arrived +without an `x-forwarded-user` header. Policy authors typically check +this first to distinguish SP traffic from end-user traffic. diff --git a/docs/docs/plugins/files.md b/docs/docs/plugins/files.md index c823a5819..3e0294f6e 100644 --- a/docs/docs/plugins/files.md +++ b/docs/docs/plugins/files.md @@ -121,7 +121,7 @@ There are three layers of access control in the files plugin. Understanding how - **UC grants** control what the service principal can do at the Databricks level. These are set at deploy time via `app.yaml` resource bindings. The SP needs `WRITE_VOLUME` — the plugin declares this via resource requirements. - **Execution identity** determines whose credentials are used for the actual API call. HTTP routes always use the SP. The programmatic API uses SP by default but supports `asUser(req)` for OBO. -- **File policies** are application-level checks evaluated **before** the API call. They receive the requesting user's identity (from the `x-forwarded-user` header) and decide allow/deny. This is the only gate that distinguishes between users on HTTP routes. +- **File policies** are application-level checks evaluated **before** the API call. They receive a `FilePolicyUser` describing the caller and decide allow/deny. On HTTP routes the user is extracted from the `x-forwarded-user` header when present; when the header is absent, the policy receives `{ id: , isServicePrincipal: true }` and decides for itself whether to allow service-principal traffic. This is the only gate that distinguishes between users on HTTP routes. :::warning @@ -233,7 +233,7 @@ Dangerous MIME types (`text/html`, `text/javascript`, `application/javascript`, ## HTTP routes -Routes are mounted at `/api/files/*`. All routes execute as the service principal. Policy enforcement checks user identity (from the `x-forwarded-user` header) before allowing operations — see [Access policies](#access-policies). +Routes are mounted at `/api/files/*`. All routes execute as the service principal. Before each operation the volume policy runs: user identity comes from the `x-forwarded-user` header when present, otherwise the policy is handed `{ id: , isServicePrincipal: true }` and decides whether to allow the call. See [Access policies](#access-policies). | Method | Path | Query / Body | Response | | ------ | -------------------------- | ---------------------------- | ------------------------------------------------- | @@ -369,9 +369,18 @@ interface FileResource { } interface FilePolicyUser { - /** User ID from the `x-forwarded-user` header. */ + /** + * Identifier of the requesting caller. For end-user HTTP requests this is + * the value of the `x-forwarded-user` header; for direct SDK calls and + * header-less HTTP requests (which run as the service principal), this + * is the service principal's ID. + */ id: string; - /** `true` when the caller is the service principal (direct SDK call, not `asUser`). */ + /** + * `true` when the call is executing as the service principal — either a + * direct SDK call (`appKit.files(...)`) or an HTTP request that arrived + * without an `x-forwarded-user` header. + */ isServicePrincipal?: boolean; } @@ -420,9 +429,9 @@ Built-in extensions: `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, `.svg`, `.bmp`, ` ## User context -HTTP routes always execute as the **service principal** — the SP's Databricks credentials are used for all API calls. User identity is extracted from the `x-forwarded-user` header and passed to the volume's [access policy](#access-policies) for authorization. This means UC grants on the SP (not individual users) determine what operations are possible, while policies control what each user is allowed to do through the app. +HTTP routes always execute as the **service principal** — the SP's Databricks credentials are used for all API calls. User identity is extracted from the `x-forwarded-user` header and passed to the volume's [access policy](#access-policies) for authorization. When the header is absent the policy is handed `{ id: , isServicePrincipal: true }` and decides whether to allow the call — in practice that branch only fires in development without a reverse proxy or when an upstream proxy is misconfigured, since real Databricks Apps runtimes always forward the header. This means UC grants on the SP (not individual users) determine what operations are possible, while policies control what each user is allowed to do through the app. -The programmatic API returns a `VolumeHandle` that exposes all `VolumeAPI` methods directly (service principal) and an `asUser(req)` method for OBO access. Calling any method without `asUser()` logs a warning encouraging OBO usage but does not throw. OBO access is strongly recommended for production use. +The programmatic API returns a `VolumeHandle` that exposes all `VolumeAPI` methods directly (service principal) and an `asUser(req)` method for OBO access. Calling any method without `asUser()` runs the policy with `isServicePrincipal: true`. `asUser(req)` throws `AuthenticationError.missingToken` when the `x-forwarded-user` header is missing, regardless of `NODE_ENV`. OBO access is strongly recommended for production use. ## Resource requirements diff --git a/packages/appkit/src/plugins/files/policy.ts b/packages/appkit/src/plugins/files/policy.ts index 87b23f377..54875ca0f 100644 --- a/packages/appkit/src/plugins/files/policy.ts +++ b/packages/appkit/src/plugins/files/policy.ts @@ -57,8 +57,19 @@ export interface FileResource { /** Minimal user identity passed to the policy function. */ export interface FilePolicyUser { + /** + * Identifier of the requesting caller. For end-user HTTP requests this is + * the value of the `x-forwarded-user` header; for direct SDK calls and + * header-less HTTP requests (which run as the service principal), this is + * the service principal's ID. + */ id: string; - /** `true` when the caller is the service principal (direct SDK call, not `asUser`). */ + /** + * `true` when the call is executing as the service principal — either a + * direct SDK call (`appKit.files(...)`) or an HTTP request that arrived + * without an `x-forwarded-user` header. Policy authors typically check + * this first to distinguish SP traffic from end-user traffic. + */ isServicePrincipal?: boolean; } diff --git a/packages/appkit/src/plugins/files/types.ts b/packages/appkit/src/plugins/files/types.ts index 82b546886..0dc27b7fa 100644 --- a/packages/appkit/src/plugins/files/types.ts +++ b/packages/appkit/src/plugins/files/types.ts @@ -87,7 +87,9 @@ export interface FilePreview extends FileMetadata { * * All methods execute as the service principal and enforce the volume's * policy (if configured) with `{ isServicePrincipal: true }`. - * `asUser(req)` re-wraps with the real user identity for per-user policy checks. + * `asUser(req)` re-wraps with the real user identity for per-user policy + * checks; it throws `AuthenticationError.missingToken` when the + * `x-forwarded-user` header is missing, regardless of `NODE_ENV`. */ export type VolumeHandle = VolumeAPI & { asUser: (req: IAppRequest) => VolumeAPI; From b20e24d64076858bc6f3c2193091c68ef70b6df3 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Thu, 23 Apr 2026 15:31:50 +0200 Subject: [PATCH 03/13] chore: update dev fallback and docs --- docs/docs/plugins/files.md | 2 +- packages/appkit/src/plugins/files/plugin.ts | 12 +++++-- .../src/plugins/files/tests/plugin.test.ts | 33 ++++++++++++++++--- packages/appkit/src/plugins/files/types.ts | 6 ++-- 4 files changed, 43 insertions(+), 10 deletions(-) diff --git a/docs/docs/plugins/files.md b/docs/docs/plugins/files.md index 3e0294f6e..18295567d 100644 --- a/docs/docs/plugins/files.md +++ b/docs/docs/plugins/files.md @@ -431,7 +431,7 @@ Built-in extensions: `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, `.svg`, `.bmp`, ` HTTP routes always execute as the **service principal** — the SP's Databricks credentials are used for all API calls. User identity is extracted from the `x-forwarded-user` header and passed to the volume's [access policy](#access-policies) for authorization. When the header is absent the policy is handed `{ id: , isServicePrincipal: true }` and decides whether to allow the call — in practice that branch only fires in development without a reverse proxy or when an upstream proxy is misconfigured, since real Databricks Apps runtimes always forward the header. This means UC grants on the SP (not individual users) determine what operations are possible, while policies control what each user is allowed to do through the app. -The programmatic API returns a `VolumeHandle` that exposes all `VolumeAPI` methods directly (service principal) and an `asUser(req)` method for OBO access. Calling any method without `asUser()` runs the policy with `isServicePrincipal: true`. `asUser(req)` throws `AuthenticationError.missingToken` when the `x-forwarded-user` header is missing, regardless of `NODE_ENV`. OBO access is strongly recommended for production use. +The programmatic API returns a `VolumeHandle` that exposes all `VolumeAPI` methods directly (service principal) and an `asUser(req)` method for OBO access. Calling any method without `asUser()` runs the policy with `isServicePrincipal: true`. In production, `asUser(req)` throws `AuthenticationError.missingToken` when the `x-forwarded-user` header is missing. In development (`NODE_ENV === "development"`) it falls back to the service principal instead, so local testing without a reverse proxy continues to work. ## Resource requirements diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts index 282390765..a9526a03e 100644 --- a/packages/appkit/src/plugins/files/plugin.ts +++ b/packages/appkit/src/plugins/files/plugin.ts @@ -100,12 +100,20 @@ export class FilesPlugin extends Plugin { } /** - * Strict extraction for `VolumeHandle.asUser(req)` — throws when the header - * is missing. HTTP routes use an inline silent fallback instead. + * Extraction for `VolumeHandle.asUser(req)`. Throws in production when the + * header is missing. In development (`NODE_ENV === "development"`) falls + * back to the service principal so local testing without a reverse proxy + * works. HTTP routes use an inline silent fallback regardless of NODE_ENV. */ private _extractUser(req: express.Request): FilePolicyUser { const userId = req.header("x-forwarded-user")?.trim(); if (userId) return { id: userId }; + if (process.env.NODE_ENV === "development") { + logger.debug( + "No x-forwarded-user header on asUser(req) — falling back to service principal identity (dev mode).", + ); + return { id: getCurrentUserId(), isServicePrincipal: true }; + } throw AuthenticationError.missingToken( "Missing x-forwarded-user header. Cannot resolve user ID.", ); diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts index 79587a1e4..8af73ee0d 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -271,12 +271,35 @@ describe("FilesPlugin", () => { } }); - test("asUser without user header → throws AuthenticationError", () => { - const plugin = new FilesPlugin(VOLUMES_CONFIG); - const handle = plugin.exports()("uploads"); - const mockReq = { header: () => undefined } as any; + test("asUser without user header in production → throws AuthenticationError", () => { + const originalEnv = process.env.NODE_ENV; + process.env.NODE_ENV = "production"; + try { + const plugin = new FilesPlugin(VOLUMES_CONFIG); + const handle = plugin.exports()("uploads"); + const mockReq = { header: () => undefined } as any; + + expect(() => handle.asUser(mockReq)).toThrow(AuthenticationError); + } finally { + process.env.NODE_ENV = originalEnv; + } + }); - expect(() => handle.asUser(mockReq)).toThrow(AuthenticationError); + test("asUser without user header in development → falls back to SP identity", () => { + const originalEnv = process.env.NODE_ENV; + process.env.NODE_ENV = "development"; + try { + const plugin = new FilesPlugin(VOLUMES_CONFIG); + const handle = plugin.exports()("uploads"); + const mockReq = { header: () => undefined } as any; + + // Does not throw; returns a VolumeAPI that will run the policy with + // { isServicePrincipal: true } (matching the HTTP-path collapsed semantic). + const api = handle.asUser(mockReq); + expect(typeof api.list).toBe("function"); + } finally { + process.env.NODE_ENV = originalEnv; + } }); test("direct methods on handle work as service principal", () => { diff --git a/packages/appkit/src/plugins/files/types.ts b/packages/appkit/src/plugins/files/types.ts index 0dc27b7fa..54a9b3692 100644 --- a/packages/appkit/src/plugins/files/types.ts +++ b/packages/appkit/src/plugins/files/types.ts @@ -88,8 +88,10 @@ export interface FilePreview extends FileMetadata { * All methods execute as the service principal and enforce the volume's * policy (if configured) with `{ isServicePrincipal: true }`. * `asUser(req)` re-wraps with the real user identity for per-user policy - * checks; it throws `AuthenticationError.missingToken` when the - * `x-forwarded-user` header is missing, regardless of `NODE_ENV`. + * checks. In production it throws `AuthenticationError.missingToken` when + * the `x-forwarded-user` header is missing; in development + * (`NODE_ENV === "development"`) it falls back to the service principal so + * local testing without a reverse proxy continues to work. */ export type VolumeHandle = VolumeAPI & { asUser: (req: IAppRequest) => VolumeAPI; From 2cfcfa40818c2537e7bef4208b0a50f6066aa16a Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Mon, 27 Apr 2026 11:14:26 +0200 Subject: [PATCH 04/13] feat(files): per-volume auth field + _resolveAuth helper (phase 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds optional `auth: "service-principal" | "on-behalf-of-user"` to both VolumeConfig and IFilesConfig. Resolution order: volume.auth ?? plugin.auth ?? "service-principal". Removes the undocumented `bypassPolicy` parameter from createVolumeAPI (zero callsites in packages/ or apps/, so no consumers to migrate). Phase 1 of files-per-volume-auth-mode — config surface only, no routing or SDK identity change yet. _resolveAuth is unused at this point and will be wired in Phase 2. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- packages/appkit/src/plugins/files/plugin.ts | 29 +++--- .../src/plugins/files/tests/plugin.test.ts | 92 +++++++++++++++++++ packages/appkit/src/plugins/files/types.ts | 12 +++ 3 files changed, 121 insertions(+), 12 deletions(-) diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts index a9526a03e..f5dfa2cd5 100644 --- a/packages/appkit/src/plugins/files/plugin.ts +++ b/packages/appkit/src/plugins/files/plugin.ts @@ -217,6 +217,7 @@ export class FilesPlugin extends Plugin { customContentTypes: volumeCfg.customContentTypes ?? config.customContentTypes, policy: volumeCfg.policy ?? policy.publicRead(), + auth: volumeCfg.auth, }; this.volumeConfigs[key] = mergedConfig; @@ -956,27 +957,31 @@ export class FilesPlugin extends Plugin { } } + private _resolveAuth( + volumeKey: string, + ): "service-principal" | "on-behalf-of-user" { + return ( + this.volumeConfigs[volumeKey]?.auth ?? + this.config.auth ?? + "service-principal" + ); + } + /** * Creates a VolumeAPI for a specific volume key. * - * By default, enforces the volume's policy before each operation. - * Pass `bypassPolicy: true` to skip policy checks — useful for - * background jobs or migrations that should bypass user-facing policies. - * - * @security When `bypassPolicy` is `true`, no policy enforcement runs. - * Do not expose bypassed APIs to HTTP routes or end-user code paths. + * Enforces the volume's policy before each operation. */ protected createVolumeAPI( volumeKey: string, user: FilePolicyUser, - options?: { bypassPolicy?: boolean }, ): VolumeAPI { const connector = this.volumeConnectors[volumeKey]; - const noop = () => Promise.resolve(); - const check = options?.bypassPolicy - ? noop - : (action: FileAction, path: string, overrides?: Partial) => - this._checkPolicy(volumeKey, action, path, user, overrides); + const check = ( + action: FileAction, + path: string, + overrides?: Partial, + ) => this._checkPolicy(volumeKey, action, path, user, overrides); return { list: async (directoryPath?: string) => { diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts index 8af73ee0d..c678356b3 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -1,6 +1,7 @@ import { mockServiceContext, setupDatabricksEnv } from "@tools/test-helpers"; import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; import { ServiceContext } from "../../../context/service-context"; +import { createApp } from "../../../core"; import { AuthenticationError } from "../../../errors"; import { ResourceType } from "../../../registry"; import { @@ -68,6 +69,7 @@ vi.mock("../../../context", async (importOriginal) => { vi.mock("../../../cache", () => ({ CacheManager: { getInstanceSync: vi.fn(() => mockCacheInstance), + getInstance: vi.fn(async () => mockCacheInstance), }, })); @@ -1841,6 +1843,96 @@ describe("FilesPlugin", () => { }); }); + describe("_resolveAuth config inheritance", () => { + test("volume-level auth overrides plugin default", () => { + const plugin = new FilesPlugin({ + auth: "service-principal", + volumes: { + uploads: { auth: "on-behalf-of-user" }, + exports: {}, + }, + }); + expect((plugin as any)._resolveAuth("uploads")).toBe("on-behalf-of-user"); + }); + + test("volume without auth inherits plugin default", () => { + const plugin = new FilesPlugin({ + auth: "on-behalf-of-user", + volumes: { + uploads: {}, + exports: {}, + }, + }); + expect((plugin as any)._resolveAuth("exports")).toBe("on-behalf-of-user"); + }); + + test("neither volume nor plugin sets auth → defaults to service-principal", () => { + const plugin = new FilesPlugin({ + volumes: { + uploads: {}, + exports: {}, + }, + }); + expect((plugin as any)._resolveAuth("uploads")).toBe("service-principal"); + }); + + test("createApp round-trip preserves auth field through public factory", async () => { + // Satisfy the manifest's static `Files` resource requirement so + // ResourceRegistry.enforceValidation passes during createApp. + process.env.DATABRICKS_VOLUME_FILES = "/Volumes/catalog/schema/files"; + + // Capture the FilesPlugin instance constructed by createApp by spying on + // exports() (called when AppKit's plugin getter fires). This exercises + // the public construction path so any future runtime config validator + // (e.g. Zod) that drops the `auth` field would break this test. + let captured: FilesPlugin | undefined; + const exportsSpy = vi + .spyOn(FilesPlugin.prototype, "exports") + .mockImplementation(function (this: FilesPlugin) { + captured = this; + // Return a minimal stub; we only care about capturing `this`. + const stub = (() => { + throw new Error("not used in this test"); + }) as unknown as ReturnType; + (stub as any).volume = () => { + throw new Error("not used in this test"); + }; + return stub; + }); + + try { + const appkit = await createApp({ + plugins: [ + files({ + auth: "on-behalf-of-user", + volumes: { + uploads: { auth: "service-principal" }, + exports: {}, + }, + }), + ], + }); + + // Trigger the AppKit getter so wrapWithAsUser invokes exports() + // and our spy captures the FilesPlugin instance. + void (appkit as unknown as { files: unknown }).files; + + expect(captured).toBeInstanceOf(FilesPlugin); + // Volume override wins over plugin-level default. + expect((captured as any)._resolveAuth("uploads")).toBe( + "service-principal", + ); + // Volume with no explicit auth inherits the plugin default. + expect((captured as any)._resolveAuth("exports")).toBe( + "on-behalf-of-user", + ); + } finally { + exportsSpy.mockRestore(); + delete process.env.DATABRICKS_VOLUME_FILES; + } + }); + }); + describe("Upload Stream Size Limiter", () => { test("stream under limit passes through all chunks", async () => { const maxSize = 100; diff --git a/packages/appkit/src/plugins/files/types.ts b/packages/appkit/src/plugins/files/types.ts index 54a9b3692..ce15c540f 100644 --- a/packages/appkit/src/plugins/files/types.ts +++ b/packages/appkit/src/plugins/files/types.ts @@ -15,6 +15,13 @@ export interface VolumeConfig { * service principal and the policy decides whether the action is allowed. */ policy?: FilePolicy; + /** + * Per-volume auth mode. When `"on-behalf-of-user"`, route handlers and + * programmatic calls execute Unity Catalog SDK operations as the end user + * instead of the service principal. Inherits from `IFilesConfig.auth` if + * not set; defaults to `"service-principal"`. + */ + auth?: "service-principal" | "on-behalf-of-user"; } /** @@ -50,6 +57,11 @@ export interface IFilesConfig extends BasePluginConfig { customContentTypes?: Record; /** Maximum upload size in bytes. Defaults to 5 GB (Databricks Files API v2 limit). */ maxUploadSize?: number; + /** + * Plugin-level default auth mode for all volumes. Each volume can override + * via `VolumeConfig.auth`. Defaults to `"service-principal"` if not set. + */ + auth?: "service-principal" | "on-behalf-of-user"; } /** A single entry returned when listing a directory. Re-exported from `@databricks/sdk-experimental`. */ From 2b357e4e0ea4f33ac4c66de5a7747b413f8dd53c Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Mon, 27 Apr 2026 11:23:56 +0200 Subject: [PATCH 05/13] feat(appkit): files OBO identity extraction + policy gate (phase 2) Adds `_extractObiUser` to the files plugin and wires identity selection into `_enforcePolicy` so it branches on `_resolveAuth(volumeKey)`: - service-principal volumes: existing inline extraction (unchanged) - on-behalf-of-user volumes: require x-forwarded-access-token + user headers; 401 in production when missing, dev-fallback to SP with a single warn in development Only the policy-user identity changes here. UC SDK calls still execute as the service principal until Phase 3 wires `runInUserContext`. Phase 2 of files-per-volume-auth-mode. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- packages/appkit/src/plugins/files/plugin.ts | 72 +++++- packages/appkit/src/plugins/files/policy.ts | 5 + .../src/plugins/files/tests/plugin.test.ts | 237 ++++++++++++++++++ 3 files changed, 304 insertions(+), 10 deletions(-) diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts index f5dfa2cd5..233737894 100644 --- a/packages/appkit/src/plugins/files/plugin.ts +++ b/packages/appkit/src/plugins/files/plugin.ts @@ -119,6 +119,34 @@ export class FilesPlugin extends Plugin { ); } + /** + * Extraction for OBO (on-behalf-of-user) volumes on the HTTP path. Both the + * `x-forwarded-access-token` and `x-forwarded-user` headers must be present + * for a valid end-user identity. When the token is missing: + * - In production we throw `AuthenticationError.missingToken` so the route + * responds with 401 (no SDK call is made). + * - In development (`NODE_ENV === "development"`) we emit a single warning + * and fall back to the service principal identity so local testing + * without a reverse proxy continues to work. + */ + private _extractObiUser(req: express.Request): FilePolicyUser { + const token = req.header("x-forwarded-access-token")?.trim(); + const userId = req.header("x-forwarded-user")?.trim(); + if (token && userId) { + return { id: userId, isServicePrincipal: false }; + } + if (!token && process.env.NODE_ENV === "development") { + logger.warn( + "OBO volume requested without x-forwarded-access-token — falling back to service principal identity (dev mode). " + + "In production this request would 401.", + ); + return { id: getCurrentUserId(), isServicePrincipal: true }; + } + throw AuthenticationError.missingToken( + "Missing x-forwarded-access-token header for on-behalf-of-user volume.", + ); + } + /** * Check the policy for a volume. No-op if no policy is configured. * Throws `PolicyDeniedError` if denied. @@ -153,9 +181,20 @@ export class FilesPlugin extends Plugin { /** * HTTP-level wrapper around `_checkPolicy`. - * Resolves the user inline (header when present, otherwise the SP identity), - * then runs the volume policy (403 on denial, 500 on unexpected error). + * Selects the policy user based on the volume's auth mode (resolved via + * `_resolveAuth`): + * - `"service-principal"` (default): use the `x-forwarded-user` header when + * present, otherwise fall back to the SP identity (legacy behavior). + * - `"on-behalf-of-user"`: require `x-forwarded-access-token` (and the + * matching `x-forwarded-user`); 401 in production when missing, + * dev-fallback to SP identity in development. + * Then runs the volume policy (403 on denial, 500 on unexpected error). * Returns `true` if the request may proceed, `false` if a response was sent. + * + * NOTE: This method only selects which identity the policy sees. It does + * NOT change which `WorkspaceClient` the SDK calls execute against — that + * still uses the service principal until a later phase wires the actual + * OBO `runInUserContext` plumbing. */ private async _enforcePolicy( req: express.Request, @@ -165,15 +204,28 @@ export class FilesPlugin extends Plugin { path: string, resourceOverrides?: Partial, ): Promise { - const headerUserId = req.header("x-forwarded-user")?.trim(); let user: FilePolicyUser; - if (headerUserId) { - user = { id: headerUserId }; - } else { - logger.debug( - "No x-forwarded-user header — proceeding with service principal identity for policy evaluation.", - ); - user = { id: getCurrentUserId(), isServicePrincipal: true }; + try { + const auth = this._resolveAuth(volumeKey); + if (auth === "on-behalf-of-user") { + user = this._extractObiUser(req); + } else { + const headerUserId = req.header("x-forwarded-user")?.trim(); + if (headerUserId) { + user = { id: headerUserId }; + } else { + logger.debug( + "No x-forwarded-user header — proceeding with service principal identity for policy evaluation.", + ); + user = { id: getCurrentUserId(), isServicePrincipal: true }; + } + } + } catch (error) { + if (error instanceof AuthenticationError) { + res.status(401).json({ error: error.message, plugin: this.name }); + return false; + } + throw error; } try { diff --git a/packages/appkit/src/plugins/files/policy.ts b/packages/appkit/src/plugins/files/policy.ts index 54875ca0f..3d2366fcd 100644 --- a/packages/appkit/src/plugins/files/policy.ts +++ b/packages/appkit/src/plugins/files/policy.ts @@ -69,6 +69,11 @@ export interface FilePolicyUser { * direct SDK call (`appKit.files(...)`) or an HTTP request that arrived * without an `x-forwarded-user` header. Policy authors typically check * this first to distinguish SP traffic from end-user traffic. + * + * `false` is set explicitly on the OBO (on-behalf-of-user) HTTP path when + * a valid `x-forwarded-access-token` and `x-forwarded-user` were both + * present on the request — i.e. the policy is running with a real + * end-user identity rather than the service principal. */ isServicePrincipal?: boolean; } diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts index c678356b3..b0054f0ec 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -1933,6 +1933,243 @@ describe("FilesPlugin", () => { }); }); + describe("OBO identity selection", () => { + function getRouteHandler( + plugin: FilesPlugin, + method: "get" | "post" | "delete", + pathSuffix: string, + ) { + const mockRouter = { + use: vi.fn(), + get: vi.fn(), + post: vi.fn(), + put: vi.fn(), + delete: vi.fn(), + patch: vi.fn(), + } as any; + plugin.injectRoutes(mockRouter); + const call = mockRouter[method].mock.calls.find( + (c: unknown[]) => + typeof c[0] === "string" && (c[0] as string).endsWith(pathSuffix), + ); + return call[call.length - 1] as (req: any, res: any) => Promise; + } + + function mockRes() { + const res: any = { headersSent: false }; + res.status = vi.fn().mockReturnValue(res); + res.json = vi.fn().mockReturnValue(res); + res.type = vi.fn().mockReturnValue(res); + res.send = vi.fn().mockReturnValue(res); + res.setHeader = vi.fn().mockReturnValue(res); + res.end = vi.fn(); + return res; + } + + function mockReq( + volumeKey: string, + headers: Record, + overrides: Record = {}, + ) { + return { + params: { volumeKey }, + query: {}, + ...overrides, + headers, + header: (name: string) => headers[name.toLowerCase()], + }; + } + + let originalNodeEnv: string | undefined; + + beforeEach(() => { + originalNodeEnv = process.env.NODE_ENV; + process.env.DATABRICKS_VOLUME_OBO_VOL = "/Volumes/c/s/obo"; + }); + + afterEach(() => { + if (originalNodeEnv === undefined) { + delete process.env.NODE_ENV; + } else { + process.env.NODE_ENV = originalNodeEnv; + } + delete process.env.DATABRICKS_VOLUME_OBO_VOL; + }); + + test("OBO volume + valid token → policy receives { isServicePrincipal: false, id: }", async () => { + const policySpy = vi.fn().mockReturnValue(true); + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { auth: "on-behalf-of-user", policy: policySpy }, + uploads: {}, + exports: {}, + }, + }); + const handler = getRouteHandler(plugin, "get", "/list"); + const res = mockRes(); + + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "o.txt", path: "/o.txt", is_directory: false }; + }, + ); + + await handler( + mockReq("obo_vol", { + "x-forwarded-access-token": "test-token", + "x-forwarded-user": "alice@example.com", + }), + res, + ); + + expect(policySpy).toHaveBeenCalledTimes(1); + const userArg = policySpy.mock.calls[0][2]; + expect(userArg).toEqual({ + id: "alice@example.com", + isServicePrincipal: false, + }); + + const statusCodes = (res.status.mock.calls as number[][]).map( + (c) => c[0], + ); + expect(statusCodes).not.toContain(401); + expect(statusCodes).not.toContain(403); + }); + + test("OBO volume + missing token + NODE_ENV != 'development' → 401, no SDK call", async () => { + process.env.NODE_ENV = "production"; + const policySpy = vi.fn().mockReturnValue(true); + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { auth: "on-behalf-of-user", policy: policySpy }, + uploads: {}, + exports: {}, + }, + }); + const handler = getRouteHandler(plugin, "get", "/list"); + const res = mockRes(); + + // No x-forwarded-access-token header. + await handler( + mockReq("obo_vol", { + "x-forwarded-user": "alice@example.com", + }), + res, + ); + + expect(res.status).toHaveBeenCalledWith(401); + const errBody = res.json.mock.calls[0][0]; + expect(errBody).toEqual( + expect.objectContaining({ + error: expect.stringContaining("Missing"), + plugin: "files", + }), + ); + // The error shape comes from AuthenticationError.missingToken (mentions + // x-forwarded-access-token in our message). + expect(errBody.error).toMatch(/x-forwarded-access-token/); + + // Policy must not have been evaluated and the SDK must not have been + // called. + expect(policySpy).not.toHaveBeenCalled(); + expect(mockClient.files.listDirectoryContents).not.toHaveBeenCalled(); + }); + + test("OBO volume + missing token + NODE_ENV === 'development' → exactly one warn, SP fallback proceeds", async () => { + process.env.NODE_ENV = "development"; + const policySpy = vi.fn().mockReturnValue(true); + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { auth: "on-behalf-of-user", policy: policySpy }, + uploads: {}, + exports: {}, + }, + }); + const handler = getRouteHandler(plugin, "get", "/list"); + const res = mockRes(); + + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "d.txt", path: "/d.txt", is_directory: false }; + }, + ); + + // The plugin's logger.warn delegates to console.warn. Spy on console.warn + // and filter for the unique substring of our dev-fallback message. + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + + try { + await handler( + mockReq("obo_vol", { + "x-forwarded-user": "alice@example.com", + }), + res, + ); + + const matchingWarns = warnSpy.mock.calls.filter((args) => + args.some( + (a) => + typeof a === "string" && + a.includes( + "OBO volume requested without x-forwarded-access-token", + ), + ), + ); + expect(matchingWarns).toHaveLength(1); + } finally { + warnSpy.mockRestore(); + } + + expect(policySpy).toHaveBeenCalledTimes(1); + const userArg = policySpy.mock.calls[0][2]; + expect(userArg).toEqual( + expect.objectContaining({ isServicePrincipal: true }), + ); + + const statusCodes = (res.status.mock.calls as number[][]).map( + (c) => c[0], + ); + expect(statusCodes).not.toContain(401); + expect(statusCodes).not.toContain(403); + }); + + test("OBO volume + valid token + policy denies → 403 PolicyDeniedError", async () => { + const policySpy = vi.fn().mockReturnValue(false); + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { auth: "on-behalf-of-user", policy: policySpy }, + uploads: {}, + exports: {}, + }, + }); + const handler = getRouteHandler(plugin, "get", "/list"); + const res = mockRes(); + + await handler( + mockReq("obo_vol", { + "x-forwarded-access-token": "test-token", + "x-forwarded-user": "alice@example.com", + }), + res, + ); + + expect(policySpy).toHaveBeenCalledTimes(1); + const userArg = policySpy.mock.calls[0][2]; + expect(userArg).toEqual({ + id: "alice@example.com", + isServicePrincipal: false, + }); + + expect(res.status).toHaveBeenCalledWith(403); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: expect.stringContaining("Policy denied"), + plugin: "files", + }), + ); + }); + }); + describe("Upload Stream Size Limiter", () => { test("stream under limit passes through all chunks", async () => { const maxSize = 100; From 9f244f96eab25b4055db59c7cdfb5e0d933282a6 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Mon, 27 Apr 2026 11:41:49 +0200 Subject: [PATCH 06/13] feat(appkit): files OBO read routes via runInUserContext (phase 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the seven read handlers (list, read, download, raw, exists, metadata, preview) through a new `_runWithAuth(req, volumeKey, fn)` helper that: - runs `fn` directly on SP volumes — byte-for-byte identical to today - wraps `fn` in `runInUserContext` on OBO volumes when both x-forwarded headers are present, so the SDK call and `getCurrentUserId()` resolve to the end user's identity. Cache keys use `getCurrentUserId()`, so per-user cache isolation falls out for free. Policy still gates first; `_enforcePolicy` already 401s in production when OBO headers are missing, so the dev-fallback path inside `_runWithAuth` is only reachable under `NODE_ENV === "development"`. Also rewrites the cache invalidation comment and the `VolumeAPI` / `VolumeHandle` / `exports()` JSDoc to describe both modes (previously all three claimed "operations execute as the service principal"). Phase 3 of files-per-volume-auth-mode — first end-to-end demoable slice. Write routes (Phase 4) and `VolumeHandle.asUser` SDK identity (Phase 5) are not yet wired. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- packages/appkit/src/plugins/files/plugin.ts | 328 +++++++++++------- .../src/plugins/files/tests/plugin.test.ts | 284 +++++++++++++++ packages/appkit/src/plugins/files/types.ts | 33 +- 3 files changed, 508 insertions(+), 137 deletions(-) diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts index 233737894..e61651f0d 100644 --- a/packages/appkit/src/plugins/files/plugin.ts +++ b/packages/appkit/src/plugins/files/plugin.ts @@ -9,7 +9,13 @@ import { isSafeInlineContentType, validateCustomContentTypes, } from "../../connectors/files"; -import { getCurrentUserId, getWorkspaceClient } from "../../context"; +import { + getCurrentUserId, + getWorkspaceClient, + runInUserContext, + ServiceContext, + type UserContext, +} from "../../context"; import { AuthenticationError } from "../../errors"; import { createLogger } from "../../logging/logger"; import { Plugin, toPlugin } from "../../plugin"; @@ -468,9 +474,13 @@ export class FilesPlugin extends Plugin { * Uses the same cache-key format as `_handleList`: resolved path for * subdirectories, `"__root__"` for the volume root. * - * Cache keys include `getCurrentUserId()` — must match the identity used - * by `this.execute()` in `_handleList`. Both run in service-principal - * context; wrapping either in `runInUserContext` would break invalidation. + * Cache keys include `getCurrentUserId()`, so reads and the subsequent + * invalidation stay consistent across auth modes: on SP volumes both run as + * the service principal, and on OBO volumes both run inside the same + * `runInUserContext` scope so `getCurrentUserId()` resolves to the end + * user's ID for both the cached read and the matching invalidation. The + * user identity propagates transparently through `getCurrentUserId()`, so + * no explicit user ID needs to be threaded through this function. */ private _invalidateListCache( volumeKey: string, @@ -543,23 +553,25 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "list", path ?? "/"))) return; - try { - const result = await this.execute( - async () => connector.list(getWorkspaceClient(), path), - this._readSettings([ - `files:${volumeKey}:list`, - path ? connector.resolvePath(path) : "__root__", - ]), - ); + await this._runWithAuth(req, volumeKey, async () => { + try { + const result = await this.execute( + async () => connector.list(getWorkspaceClient(), path), + this._readSettings([ + `files:${volumeKey}:list`, + path ? connector.resolvePath(path) : "__root__", + ]), + ); - if (!result.ok) { - this._sendStatusError(res, result.status); - return; + if (!result.ok) { + this._sendStatusError(res, result.status); + return; + } + res.json(result.data); + } catch (error) { + this._handleApiError(res, error, "List failed"); } - res.json(result.data); - } catch (error) { - this._handleApiError(res, error, "List failed"); - } + }); } private async _handleRead( @@ -578,23 +590,25 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "read", path))) return; - try { - const result = await this.execute( - async () => connector.read(getWorkspaceClient(), path), - this._readSettings([ - `files:${volumeKey}:read`, - connector.resolvePath(path), - ]), - ); + await this._runWithAuth(req, volumeKey, async () => { + try { + const result = await this.execute( + async () => connector.read(getWorkspaceClient(), path), + this._readSettings([ + `files:${volumeKey}:read`, + connector.resolvePath(path), + ]), + ); - if (!result.ok) { - this._sendStatusError(res, result.status); - return; + if (!result.ok) { + this._sendStatusError(res, result.status); + return; + } + res.type("text/plain").send(result.data); + } catch (error) { + this._handleApiError(res, error, "Read failed"); } - res.type("text/plain").send(result.data); - } catch (error) { - this._handleApiError(res, error, "Read failed"); - } + }); } private async _handleDownload( @@ -645,64 +659,66 @@ export class FilesPlugin extends Plugin { const label = opts.mode === "download" ? "Download" : "Raw fetch"; const volumeCfg = this.volumeConfigs[volumeKey]; - try { - const settings: PluginExecutionSettings = { - default: FILES_DOWNLOAD_DEFAULTS, - }; - const response = await this.execute( - async () => connector.download(getWorkspaceClient(), path), - settings, - ); - - if (!response.ok) { - this._sendStatusError(res, response.status); - return; - } - - const resolvedType = contentTypeFromPath( - path, - undefined, - volumeCfg.customContentTypes, - ); - const fileName = sanitizeFilename(path.split("/").pop() ?? "download"); + await this._runWithAuth(req, volumeKey, async () => { + try { + const settings: PluginExecutionSettings = { + default: FILES_DOWNLOAD_DEFAULTS, + }; + const response = await this.execute( + async () => connector.download(getWorkspaceClient(), path), + settings, + ); - res.setHeader("Content-Type", resolvedType); - res.setHeader("X-Content-Type-Options", "nosniff"); + if (!response.ok) { + this._sendStatusError(res, response.status); + return; + } - if (opts.mode === "raw") { - res.setHeader("Content-Security-Policy", "sandbox"); - if (!isSafeInlineContentType(resolvedType)) { + const resolvedType = contentTypeFromPath( + path, + undefined, + volumeCfg.customContentTypes, + ); + const fileName = sanitizeFilename(path.split("/").pop() ?? "download"); + + res.setHeader("Content-Type", resolvedType); + res.setHeader("X-Content-Type-Options", "nosniff"); + + if (opts.mode === "raw") { + res.setHeader("Content-Security-Policy", "sandbox"); + if (!isSafeInlineContentType(resolvedType)) { + res.setHeader( + "Content-Disposition", + `attachment; filename="${fileName}"`, + ); + } + } else { res.setHeader( "Content-Disposition", `attachment; filename="${fileName}"`, ); } - } else { - res.setHeader( - "Content-Disposition", - `attachment; filename="${fileName}"`, - ); - } - if (response.data.contents) { - const nodeStream = Readable.fromWeb( - response.data.contents as import("node:stream/web").ReadableStream, - ); - nodeStream.on("error", (err) => { - logger.error("Stream error during %s: %O", opts.mode, err); - if (!res.headersSent) { - this._sendStatusError(res, 500); - } else { - res.destroy(); - } - }); - nodeStream.pipe(res); - } else { - res.end(); + if (response.data.contents) { + const nodeStream = Readable.fromWeb( + response.data.contents as import("node:stream/web").ReadableStream, + ); + nodeStream.on("error", (err) => { + logger.error("Stream error during %s: %O", opts.mode, err); + if (!res.headersSent) { + this._sendStatusError(res, 500); + } else { + res.destroy(); + } + }); + nodeStream.pipe(res); + } else { + res.end(); + } + } catch (error) { + this._handleApiError(res, error, `${label} failed`); } - } catch (error) { - this._handleApiError(res, error, `${label} failed`); - } + }); } private async _handleExists( @@ -722,23 +738,25 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "exists", path))) return; - try { - const result = await this.execute( - async () => connector.exists(getWorkspaceClient(), path), - this._readSettings([ - `files:${volumeKey}:exists`, - connector.resolvePath(path), - ]), - ); + await this._runWithAuth(req, volumeKey, async () => { + try { + const result = await this.execute( + async () => connector.exists(getWorkspaceClient(), path), + this._readSettings([ + `files:${volumeKey}:exists`, + connector.resolvePath(path), + ]), + ); - if (!result.ok) { - this._sendStatusError(res, result.status); - return; + if (!result.ok) { + this._sendStatusError(res, result.status); + return; + } + res.json({ exists: result.data }); + } catch (error) { + this._handleApiError(res, error, "Exists check failed"); } - res.json({ exists: result.data }); - } catch (error) { - this._handleApiError(res, error, "Exists check failed"); - } + }); } private async _handleMetadata( @@ -758,23 +776,25 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "metadata", path))) return; - try { - const result = await this.execute( - async () => connector.metadata(getWorkspaceClient(), path), - this._readSettings([ - `files:${volumeKey}:metadata`, - connector.resolvePath(path), - ]), - ); + await this._runWithAuth(req, volumeKey, async () => { + try { + const result = await this.execute( + async () => connector.metadata(getWorkspaceClient(), path), + this._readSettings([ + `files:${volumeKey}:metadata`, + connector.resolvePath(path), + ]), + ); - if (!result.ok) { - this._sendStatusError(res, result.status); - return; + if (!result.ok) { + this._sendStatusError(res, result.status); + return; + } + res.json(result.data); + } catch (error) { + this._handleApiError(res, error, "Metadata fetch failed"); } - res.json(result.data); - } catch (error) { - this._handleApiError(res, error, "Metadata fetch failed"); - } + }); } private async _handlePreview( @@ -794,23 +814,25 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "preview", path))) return; - try { - const result = await this.execute( - async () => connector.preview(getWorkspaceClient(), path), - this._readSettings([ - `files:${volumeKey}:preview`, - connector.resolvePath(path), - ]), - ); + await this._runWithAuth(req, volumeKey, async () => { + try { + const result = await this.execute( + async () => connector.preview(getWorkspaceClient(), path), + this._readSettings([ + `files:${volumeKey}:preview`, + connector.resolvePath(path), + ]), + ); - if (!result.ok) { - this._sendStatusError(res, result.status); - return; + if (!result.ok) { + this._sendStatusError(res, result.status); + return; + } + res.json(result.data); + } catch (error) { + this._handleApiError(res, error, "Preview failed"); } - res.json(result.data); - } catch (error) { - this._handleApiError(res, error, "Preview failed"); - } + }); } private async _handleUpload( @@ -1019,6 +1041,49 @@ export class FilesPlugin extends Plugin { ); } + /** + * Build a `UserContext` from request headers when both + * `x-forwarded-access-token` and `x-forwarded-user` are present, otherwise + * return `null`. Used by OBO route handlers to wrap SDK calls in the + * end-user's identity. A `null` result means "fall back to the service + * principal client" — for OBO volumes in production, `_enforcePolicy` will + * already have responded 401 before we get here, so `null` is reachable + * only on the dev-fallback path. + */ + private _buildUserContextOrNull(req: express.Request): UserContext | null { + const token = req.header("x-forwarded-access-token")?.trim(); + const userId = req.header("x-forwarded-user")?.trim(); + if (!token || !userId) return null; + return ServiceContext.createUserContext(token, userId); + } + + /** + * Run `fn` under the correct execution context for `volumeKey`. + * - SP volumes (`auth: "service-principal"`): invokes `fn` directly so the + * service-principal `WorkspaceClient` and `getCurrentUserId()` are used — + * identical behavior to pre-OBO releases. + * - OBO volumes (`auth: "on-behalf-of-user"`): wraps `fn` in + * `runInUserContext` with a `UserContext` built from the request headers, + * so SDK calls execute as the end user and `getCurrentUserId()` (and + * therefore cache keys) resolve to the user's ID. Falls back to the SP + * path when no user context can be built — only reachable in dev mode, + * since `_enforcePolicy` 401s in production when the OBO headers are + * missing. + */ + private async _runWithAuth( + req: express.Request, + volumeKey: string, + fn: () => Promise, + ): Promise { + if (this._resolveAuth(volumeKey) === "on-behalf-of-user") { + const userCtx = this._buildUserContextOrNull(req); + if (userCtx) { + return runInUserContext(userCtx, fn); + } + } + return fn(); + } + /** * Creates a VolumeAPI for a specific volume key. * @@ -1111,8 +1176,11 @@ export class FilesPlugin extends Plugin { * Returns the programmatic API for the Files plugin. * Callable with a volume key to get a volume-scoped handle. * - * All operations execute as the service principal. - * Use policies to control per-user access. + * SP volumes (`auth: "service-principal"`, the default) execute as the + * service principal. OBO volumes (`auth: "on-behalf-of-user"`) executed + * through the HTTP routes run as the end user; for programmatic calls + * outside a route, use `asUser(req)` to opt into per-user execution. + * Policies control per-user access in either mode. * * @example * ```ts diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts index b0054f0ec..eecfb7982 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -2170,6 +2170,290 @@ describe("FilesPlugin", () => { }); }); + describe("OBO read routes", () => { + function getRouteHandler( + plugin: FilesPlugin, + method: "get" | "post" | "delete", + pathSuffix: string, + ) { + const mockRouter = { + use: vi.fn(), + get: vi.fn(), + post: vi.fn(), + put: vi.fn(), + delete: vi.fn(), + patch: vi.fn(), + } as any; + plugin.injectRoutes(mockRouter); + const call = mockRouter[method].mock.calls.find( + (c: unknown[]) => + typeof c[0] === "string" && (c[0] as string).endsWith(pathSuffix), + ); + return call[call.length - 1] as (req: any, res: any) => Promise; + } + + function mockRes() { + const res: any = { headersSent: false }; + res.status = vi.fn().mockReturnValue(res); + res.json = vi.fn().mockReturnValue(res); + res.type = vi.fn().mockReturnValue(res); + res.send = vi.fn().mockReturnValue(res); + res.setHeader = vi.fn().mockReturnValue(res); + res.end = vi.fn(); + return res; + } + + function mockReq( + volumeKey: string, + headers: Record, + overrides: Record = {}, + ) { + return { + params: { volumeKey }, + query: {}, + ...overrides, + headers, + header: (name: string) => headers[name.toLowerCase()], + }; + } + + /** + * Replace the default `getCurrentUserId` mock with one that delegates to + * the real implementation, so that calls inside `runInUserContext` resolve + * to the wrapped UserContext's `userId` (and the per-user cache key + * derived from it). + */ + async function useRealGetCurrentUserId() { + const actual = + await vi.importActual( + "../../../context", + ); + const ctx = await import("../../../context"); + vi.mocked(ctx.getCurrentUserId).mockImplementation( + actual.getCurrentUserId, + ); + } + + let originalNodeEnv: string | undefined; + + beforeEach(() => { + originalNodeEnv = process.env.NODE_ENV; + process.env.DATABRICKS_VOLUME_OBO_VOL = "/Volumes/c/s/obo"; + }); + + afterEach(() => { + if (originalNodeEnv === undefined) { + delete process.env.NODE_ENV; + } else { + process.env.NODE_ENV = originalNodeEnv; + } + delete process.env.DATABRICKS_VOLUME_OBO_VOL; + }); + + test("OBO list + valid token wraps SDK call in user context (alice's userId resolves inside the wrapped fn)", async () => { + await useRealGetCurrentUserId(); + const policySpy = vi.fn().mockReturnValue(true); + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { auth: "on-behalf-of-user", policy: policySpy }, + uploads: {}, + exports: {}, + }, + }); + const handler = getRouteHandler(plugin, "get", "/list"); + const res = mockRes(); + + // Snapshot the user IDs observed inside each SDK invocation so we can + // assert that the SDK call ran inside `runInUserContext` with the + // expected user identity. + const observedUserIds: string[] = []; + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + // getCurrentUserId() inside the wrapped fn should resolve to alice. + const ctx = await import("../../../context"); + observedUserIds.push(ctx.getCurrentUserId()); + yield { name: "o.txt", path: "/o.txt", is_directory: false }; + }, + ); + + await handler( + mockReq("obo_vol", { + "x-forwarded-access-token": "test-token", + "x-forwarded-user": "alice@example.com", + }), + res, + ); + + expect(observedUserIds).toEqual(["alice@example.com"]); + + const statusCodes = (res.status.mock.calls as number[][]).map( + (c) => c[0], + ); + expect(statusCodes).not.toContain(401); + expect(statusCodes).not.toContain(403); + expect(statusCodes).not.toContain(500); + }); + + test("OBO read happy path: valid token + policy allows + UC allows → 200", async () => { + const policySpy = vi.fn().mockReturnValue(true); + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { auth: "on-behalf-of-user", policy: policySpy }, + uploads: {}, + exports: {}, + }, + }); + const handler = getRouteHandler(plugin, "get", "/read"); + const res = mockRes(); + + // The connector reads via files.download — return a valid 200-ish + // response with content body. + mockClient.files.download.mockImplementation(async () => ({ + contents: new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode("hello")); + controller.close(); + }, + }), + })); + + await handler( + mockReq( + "obo_vol", + { + "x-forwarded-access-token": "test-token", + "x-forwarded-user": "alice@example.com", + }, + { query: { path: "hello.txt" } }, + ), + res, + ); + + // Both gates passed: policy was consulted with OBO identity, and the + // SDK's response was relayed back to the client. + expect(policySpy).toHaveBeenCalledTimes(1); + const userArg = policySpy.mock.calls[0][2]; + expect(userArg).toEqual({ + id: "alice@example.com", + isServicePrincipal: false, + }); + + // 2xx path: handler called res.type/send rather than res.status(non-2xx). + const statusCodes = (res.status.mock.calls as number[][]).map( + (c) => c[0], + ); + expect(statusCodes).not.toContain(401); + expect(statusCodes).not.toContain(403); + expect(statusCodes).not.toContain(500); + expect(res.send).toHaveBeenCalled(); + }); + + test("Cache isolation: same path, two users on OBO volume → two distinct cache keys", async () => { + await useRealGetCurrentUserId(); + const policySpy = vi.fn().mockReturnValue(true); + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { auth: "on-behalf-of-user", policy: policySpy }, + uploads: {}, + exports: {}, + }, + }); + const handler = getRouteHandler(plugin, "get", "/list"); + + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "f.txt", path: "/f.txt", is_directory: false }; + }, + ); + + // Alice's request. + await handler( + mockReq("obo_vol", { + "x-forwarded-access-token": "alice-token", + "x-forwarded-user": "alice@example.com", + }), + mockRes(), + ); + + // Bob's request — same volume, same path. + await handler( + mockReq("obo_vol", { + "x-forwarded-access-token": "bob-token", + "x-forwarded-user": "bob@example.com", + }), + mockRes(), + ); + + // The cache layer is consulted via getOrExecute(cacheKey, fn, userKey). + // For OBO volumes the `userKey` argument must be the real user's ID + // (resolved by getCurrentUserId() inside the runInUserContext scope), + // so the two requests produce two distinct cache entries. + const userKeys = mockCacheInstance.getOrExecute.mock.calls.map( + (c: unknown[]) => c[2], + ); + expect(userKeys).toEqual(["alice@example.com", "bob@example.com"]); + expect(new Set(userKeys).size).toBe(2); + }); + + test("SP/OBO cache no-collide: same path on SP volume vs OBO volume → distinct cache keys", async () => { + await useRealGetCurrentUserId(); + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { + auth: "on-behalf-of-user", + policy: policy.allowAll(), + }, + // SP-mode volume (default auth). + uploads: { policy: policy.allowAll() }, + exports: {}, + }, + }); + + const listHandler = getRouteHandler(plugin, "get", "/list"); + + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "f.txt", path: "/f.txt", is_directory: false }; + }, + ); + + // SP volume request — `getCurrentUserId()` outside any user context + // returns the service principal's identity (the underlying real impl + // reads `ServiceContext.serviceUserId`). + await listHandler( + mockReq("uploads", { + "x-forwarded-access-token": "sp-token-ignored", + "x-forwarded-user": "alice@example.com", + }), + mockRes(), + ); + + // OBO volume request — same human user, but execution is wrapped in + // runInUserContext so `userKey` resolves to alice's ID, not the SP's. + await listHandler( + mockReq("obo_vol", { + "x-forwarded-access-token": "alice-token", + "x-forwarded-user": "alice@example.com", + }), + mockRes(), + ); + + const calls = mockCacheInstance.getOrExecute.mock.calls; + expect(calls).toHaveLength(2); + + // The cacheKey is the first arg — the SP and OBO volumes already + // namespace cache entries by `volumeKey`, so the array form differs. + const cacheKeys = calls.map((c: unknown[]) => c[0]); + expect(cacheKeys[0]).not.toEqual(cacheKeys[1]); + + // Defense-in-depth: even if the array-form cacheKey matched, the + // userKey differs because OBO runs under runInUserContext while SP + // runs in the SP's serviceUserId. + const userKeys = calls.map((c: unknown[]) => c[2]); + expect(userKeys[0]).not.toEqual(userKeys[1]); + }); + }); + describe("Upload Stream Size Limiter", () => { test("stream under limit passes through all chunks", async () => { const maxSize = 100; diff --git a/packages/appkit/src/plugins/files/types.ts b/packages/appkit/src/plugins/files/types.ts index ce15c540f..785a30385 100644 --- a/packages/appkit/src/plugins/files/types.ts +++ b/packages/appkit/src/plugins/files/types.ts @@ -26,8 +26,20 @@ export interface VolumeConfig { /** * User-facing API for a single volume. - * All operations execute as the service principal. When a policy is - * configured on the volume, every call is checked against that policy. + * + * Which identity executes each operation depends on the volume's effective + * `auth` mode (resolved from `VolumeConfig.auth` ?? `IFilesConfig.auth` ?? + * `"service-principal"`): + * - SP volumes (`auth: "service-principal"`): operations execute as the + * service principal. + * - OBO volumes (`auth: "on-behalf-of-user"`): operations invoked through + * the HTTP routes execute as the end user (the token from + * `x-forwarded-access-token` is used to build the SDK client). For + * programmatic calls outside an HTTP route, see `VolumeHandle.asUser(req)` + * to opt into per-user execution explicitly. + * + * When a policy is configured on the volume, every call is checked against + * that policy with the appropriate identity (service principal vs end user). */ export interface VolumeAPI { list(directoryPath?: string): Promise; @@ -97,11 +109,18 @@ export interface FilePreview extends FileMetadata { /** * Volume handle returned by `app.files("volumeKey")`. * - * All methods execute as the service principal and enforce the volume's - * policy (if configured) with `{ isServicePrincipal: true }`. - * `asUser(req)` re-wraps with the real user identity for per-user policy - * checks. In production it throws `AuthenticationError.missingToken` when - * the `x-forwarded-user` header is missing; in development + * Default execution identity follows the volume's effective `auth` mode: + * - SP volumes (`auth: "service-principal"`): methods execute as the service + * principal and the volume policy (if configured) sees + * `{ isServicePrincipal: true }`. + * - OBO volumes (`auth: "on-behalf-of-user"`): methods invoked from inside + * an HTTP route handler execute as the end user (the route wires the + * request token into a `runInUserContext` scope before calling SDK code). + * + * `asUser(req)` re-wraps the API with the real user identity from the + * request — useful for per-user policy checks when calling the API outside + * an HTTP route. In production it throws `AuthenticationError.missingToken` + * when the `x-forwarded-user` header is missing; in development * (`NODE_ENV === "development"`) it falls back to the service principal so * local testing without a reverse proxy continues to work. */ From 515f8cfe1f8d009c65c04addbea25905781c9bc2 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Mon, 27 Apr 2026 11:54:54 +0200 Subject: [PATCH 07/13] feat(appkit): files OBO write routes + upload-headers test (phase 4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires `_handleUpload`, `_handleMkdir`, and `_handleDelete` through `_runWithAuth` so write traffic on OBO volumes executes as the end user. SP volumes are byte-for-byte identical (no-op wrap fall-through). The upload handler does a hand-rolled `fetch PUT` and calls `client.config.authenticate(headers)` to populate auth headers — the whole body now runs inside the user-context wrap, so the user-token client is what authenticates the outgoing request. Adds the non-negotiable upload-headers contract test: triggers an OBO upload and asserts the outgoing fetch carries `Authorization: Bearer USER-TOKEN-FOO` (not the SP's marker), proving the chain runInUserContext → getWorkspaceClient → client.config.authenticate → fetch headers is intact end-to-end. Verified the test fails meaningfully if the wrap is removed. Phase 4 of files-per-volume-auth-mode. Phase 5 (`VolumeHandle.asUser` SDK identity fix) is next. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- packages/appkit/src/plugins/files/plugin.ts | 204 +++++---- .../src/plugins/files/tests/plugin.test.ts | 401 ++++++++++++++++++ 2 files changed, 511 insertions(+), 94 deletions(-) diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts index e61651f0d..409063f37 100644 --- a/packages/appkit/src/plugins/files/plugin.ts +++ b/packages/appkit/src/plugins/files/plugin.ts @@ -881,70 +881,82 @@ export class FilesPlugin extends Plugin { logger.debug(req, "Upload started: volume=%s path=%s", volumeKey, path); - try { - const rawStream: ReadableStream = Readable.toWeb(req); - - let bytesReceived = 0; - const webStream = rawStream.pipeThrough( - new TransformStream({ - transform(chunk, controller) { - bytesReceived += chunk.byteLength; - if (bytesReceived > maxSize) { - controller.error( - new Error( - `Upload stream exceeds maximum allowed size (${maxSize} bytes)`, - ), - ); - return; - } - controller.enqueue(chunk); - }, - }), - ); - - logger.debug( - req, - "Upload body received: volume=%s path=%s, size=%d bytes", - volumeKey, - path, - contentLength ?? 0, - ); - const settings: PluginExecutionSettings = { - default: FILES_WRITE_DEFAULTS, - }; - const result = await this.trackWrite(() => - this.execute(async () => { - await connector.upload(getWorkspaceClient(), path, webStream); - return { success: true as const }; - }, settings), - ); - - this._invalidateListCache(volumeKey, path, connector); + await this._runWithAuth(req, volumeKey, async () => { + try { + const rawStream: ReadableStream = Readable.toWeb(req); + + let bytesReceived = 0; + const webStream = rawStream.pipeThrough( + new TransformStream({ + transform(chunk, controller) { + bytesReceived += chunk.byteLength; + if (bytesReceived > maxSize) { + controller.error( + new Error( + `Upload stream exceeds maximum allowed size (${maxSize} bytes)`, + ), + ); + return; + } + controller.enqueue(chunk); + }, + }), + ); - if (!result.ok) { - logger.error( + logger.debug( req, - "Upload failed: volume=%s path=%s, size=%d bytes", + "Upload body received: volume=%s path=%s, size=%d bytes", volumeKey, path, contentLength ?? 0, ); - this._sendStatusError(res, result.status); - return; - } + const settings: PluginExecutionSettings = { + default: FILES_WRITE_DEFAULTS, + }; + // The connector's `upload` resolves `getWorkspaceClient()` and + // `client.config.authenticate(headers)` synchronously inside this + // callback. When `_runWithAuth` wraps us in `runInUserContext`, that + // chain produces user-token Authorization headers on the outgoing + // `fetch PUT`. The OBO upload-headers test pins this contract. + const result = await this.trackWrite(() => + this.execute(async () => { + await connector.upload(getWorkspaceClient(), path, webStream); + return { success: true as const }; + }, settings), + ); - logger.debug(req, "Upload complete: volume=%s path=%s", volumeKey, path); - res.json(result.data); - } catch (error) { - if ( - error instanceof Error && - error.message.includes("exceeds maximum allowed size") - ) { - res.status(413).json({ error: error.message, plugin: this.name }); - return; + this._invalidateListCache(volumeKey, path, connector); + + if (!result.ok) { + logger.error( + req, + "Upload failed: volume=%s path=%s, size=%d bytes", + volumeKey, + path, + contentLength ?? 0, + ); + this._sendStatusError(res, result.status); + return; + } + + logger.debug( + req, + "Upload complete: volume=%s path=%s", + volumeKey, + path, + ); + res.json(result.data); + } catch (error) { + if ( + error instanceof Error && + error.message.includes("exceeds maximum allowed size") + ) { + res.status(413).json({ error: error.message, plugin: this.name }); + return; + } + this._handleApiError(res, error, "Upload failed"); } - this._handleApiError(res, error, "Upload failed"); - } + }); } private async _handleMkdir( @@ -965,28 +977,30 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "mkdir", dirPath))) return; - try { - const settings: PluginExecutionSettings = { - default: FILES_WRITE_DEFAULTS, - }; - const result = await this.trackWrite(() => - this.execute(async () => { - await connector.createDirectory(getWorkspaceClient(), dirPath); - return { success: true as const }; - }, settings), - ); + await this._runWithAuth(req, volumeKey, async () => { + try { + const settings: PluginExecutionSettings = { + default: FILES_WRITE_DEFAULTS, + }; + const result = await this.trackWrite(() => + this.execute(async () => { + await connector.createDirectory(getWorkspaceClient(), dirPath); + return { success: true as const }; + }, settings), + ); - this._invalidateListCache(volumeKey, dirPath, connector); + this._invalidateListCache(volumeKey, dirPath, connector); - if (!result.ok) { - this._sendStatusError(res, result.status); - return; - } + if (!result.ok) { + this._sendStatusError(res, result.status); + return; + } - res.json(result.data); - } catch (error) { - this._handleApiError(res, error, "Create directory failed"); - } + res.json(result.data); + } catch (error) { + this._handleApiError(res, error, "Create directory failed"); + } + }); } private async _handleDelete( @@ -1007,28 +1021,30 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "delete", path))) return; - try { - const settings: PluginExecutionSettings = { - default: FILES_WRITE_DEFAULTS, - }; - const result = await this.trackWrite(() => - this.execute(async () => { - await connector.delete(getWorkspaceClient(), path); - return { success: true as const }; - }, settings), - ); + await this._runWithAuth(req, volumeKey, async () => { + try { + const settings: PluginExecutionSettings = { + default: FILES_WRITE_DEFAULTS, + }; + const result = await this.trackWrite(() => + this.execute(async () => { + await connector.delete(getWorkspaceClient(), path); + return { success: true as const }; + }, settings), + ); - this._invalidateListCache(volumeKey, path, connector); + this._invalidateListCache(volumeKey, path, connector); - if (!result.ok) { - this._sendStatusError(res, result.status); - return; - } + if (!result.ok) { + this._sendStatusError(res, result.status); + return; + } - res.json(result.data); - } catch (error) { - this._handleApiError(res, error, "Delete failed"); - } + res.json(result.data); + } catch (error) { + this._handleApiError(res, error, "Delete failed"); + } + }); } private _resolveAuth( diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts index eecfb7982..1d6d1e282 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -1,3 +1,4 @@ +import { Readable } from "node:stream"; import { mockServiceContext, setupDatabricksEnv } from "@tools/test-helpers"; import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; import { ServiceContext } from "../../../context/service-context"; @@ -2454,6 +2455,406 @@ describe("FilesPlugin", () => { }); }); + describe("OBO write routes", () => { + function getRouteHandler( + plugin: FilesPlugin, + method: "get" | "post" | "delete", + pathSuffix: string, + ) { + const mockRouter = { + use: vi.fn(), + get: vi.fn(), + post: vi.fn(), + put: vi.fn(), + delete: vi.fn(), + patch: vi.fn(), + } as any; + plugin.injectRoutes(mockRouter); + const call = mockRouter[method].mock.calls.find( + (c: unknown[]) => + typeof c[0] === "string" && (c[0] as string).endsWith(pathSuffix), + ); + return call[call.length - 1] as (req: any, res: any) => Promise; + } + + function mockRes() { + const res: any = { headersSent: false }; + res.status = vi.fn().mockReturnValue(res); + res.json = vi.fn().mockReturnValue(res); + res.type = vi.fn().mockReturnValue(res); + res.send = vi.fn().mockReturnValue(res); + res.setHeader = vi.fn().mockReturnValue(res); + res.end = vi.fn(); + return res; + } + + /** + * `_handleUpload` calls `Readable.toWeb(req)` on the express request, so + * the mock req must be (or extend) a real Node Readable stream. Build one + * from the supplied body bytes and tack on the express-shaped helpers. + */ + function mockUploadReq( + volumeKey: string, + headers: Record, + body: string | Buffer = "", + overrides: Record = {}, + ) { + const buf = Buffer.isBuffer(body) ? body : Buffer.from(body); + const stream = Readable.from([buf]) as Readable & { + params?: any; + query?: any; + headers?: any; + header?: (name: string) => string | undefined; + }; + stream.params = { volumeKey }; + // Relative path so the connector's `resolvePath` joins it with the + // volume's defaultVolume from `DATABRICKS_VOLUME_OBO_VOL`. + stream.query = { path: "upload-target.bin" }; + stream.headers = headers; + stream.header = (name: string) => headers[name.toLowerCase()]; + Object.assign(stream, overrides); + return stream; + } + + function mockReq( + volumeKey: string, + headers: Record, + overrides: Record = {}, + ) { + return { + params: { volumeKey }, + query: {}, + body: {}, + ...overrides, + headers, + header: (name: string) => headers[name.toLowerCase()], + }; + } + + /** + * Replace the default `getCurrentUserId` mock with the real implementation + * so calls inside `runInUserContext` resolve to the wrapped UserContext's + * `userId` (mirrors the helper used by the `OBO read routes` block). + */ + async function useRealGetCurrentUserId() { + const actual = + await vi.importActual( + "../../../context", + ); + const ctx = await import("../../../context"); + vi.mocked(ctx.getCurrentUserId).mockImplementation( + actual.getCurrentUserId, + ); + } + + /** + * Replace the default `getWorkspaceClient` mock with the real + * implementation so that calls inside `runInUserContext` return the + * UserContext's `client` (the user-token-authenticated WorkspaceClient) + * while calls outside the user context fall back to the + * service-principal client from `ServiceContext.get()`. Required to + * exercise the real `_runWithAuth → runInUserContext → getWorkspaceClient + * → client.config.authenticate → fetch headers` chain. + */ + async function useRealGetWorkspaceClient() { + const actual = + await vi.importActual( + "../../../context", + ); + const ctx = await import("../../../context"); + vi.mocked(ctx.getWorkspaceClient).mockImplementation( + actual.getWorkspaceClient, + ); + } + + let originalNodeEnv: string | undefined; + + beforeEach(() => { + originalNodeEnv = process.env.NODE_ENV; + process.env.DATABRICKS_VOLUME_OBO_VOL = "/Volumes/c/s/obo"; + }); + + afterEach(() => { + if (originalNodeEnv === undefined) { + delete process.env.NODE_ENV; + } else { + process.env.NODE_ENV = originalNodeEnv; + } + delete process.env.DATABRICKS_VOLUME_OBO_VOL; + vi.unstubAllGlobals(); + }); + + /** + * NON-NEGOTIABLE upload-headers contract. + * + * `_handleUpload` does a hand-rolled `fetch PUT` (not a typed SDK call) + * via the connector's `upload()`. Inside `_runWithAuth` on an OBO volume, + * the chain is: + * + * getWorkspaceClient() → user-token WorkspaceClient + * client.config.authenticate(h) → injects "Bearer " + * fetch(url, { headers }) → outgoing request as the user + * + * This test pins that chain end-to-end. If any future SDK upgrade or + * refactor changes `client.config.authenticate`'s signature, removes the + * `runInUserContext` wrap from `_handleUpload`, or rewires + * `getWorkspaceClient()` so it returns the SP client inside the OBO + * scope, the user-token Authorization header will not reach `fetch` and + * this assertion fails. SP-token would silently leak to UC otherwise. + */ + test("OBO upload: outgoing fetch PUT carries user-token Authorization header (not SP)", async () => { + await useRealGetCurrentUserId(); + await useRealGetWorkspaceClient(); + + // SP-token marker — what the existing mockClient would inject if the + // OBO wrap leaked. We assert this NEVER reaches the outgoing fetch. + mockClient.config.authenticate.mockImplementation( + async (headers: Headers) => { + headers.set("Authorization", "Bearer SP-TOKEN"); + }, + ); + + // User-token marker — what the OBO scope MUST inject. + const userClient = { + config: { + host: "https://test.databricks.com", + authenticate: vi.fn(async (headers: Headers) => { + headers.set("Authorization", "Bearer USER-TOKEN-FOO"); + }), + }, + // `_handleUpload` only routes through the connector's `upload()`, + // which uses host + authenticate + fetch. No `files.*` accessor is + // touched on the user client during this path. + }; + + // Wire `_buildUserContextOrNull → ServiceContext.createUserContext` to + // return a UserContext whose `client` is our user-token client. This + // is the same hook `mockServiceContext` already installs; we just + // override its impl for this test. + serviceContextMock.createUserContextSpy.mockImplementation( + (_token: string, userId: string) => ({ + client: userClient as any, + userId, + warehouseId: serviceContextMock.serviceContext.warehouseId, + workspaceId: serviceContextMock.serviceContext.workspaceId, + isUserContext: true, + }), + ); + + // Capture the outgoing PUT. + const fetchSpy = vi + .fn() + .mockResolvedValue({ ok: true, status: 200, text: async () => "" }); + vi.stubGlobal("fetch", fetchSpy); + + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { + auth: "on-behalf-of-user", + policy: policy.allowAll(), + }, + uploads: {}, + exports: {}, + }, + }); + const handler = getRouteHandler(plugin, "post", "/upload"); + const res = mockRes(); + + await handler( + mockUploadReq( + "obo_vol", + { + "x-forwarded-access-token": "alice-token", + "x-forwarded-user": "alice@example.com", + "content-length": "5", + }, + "hello", + ), + res, + ); + + // The user-token authenticator was consulted exactly when upload ran. + expect(userClient.config.authenticate).toHaveBeenCalledTimes(1); + + // The hand-rolled fetch PUT happened exactly once. + expect(fetchSpy).toHaveBeenCalledTimes(1); + const fetchArgs = fetchSpy.mock.calls[0]; + const init = fetchArgs[1] as RequestInit & { headers: Headers }; + expect(init.method).toBe("PUT"); + + // The contract — proves the user-token Authorization header reached + // fetch. Toggling the `_runWithAuth` wrap off in `_handleUpload` + // breaks this assertion (fetch would carry "Bearer SP-TOKEN" instead). + expect(init.headers.get("Authorization")).toBe("Bearer USER-TOKEN-FOO"); + expect(init.headers.get("Authorization")).not.toBe("Bearer SP-TOKEN"); + + // Defense-in-depth: SP authenticator was NOT called along the OBO path. + expect(mockClient.config.authenticate).not.toHaveBeenCalled(); + }); + + test("OBO upload + missing token + NODE_ENV=production → 401 before any SDK or fetch call", async () => { + process.env.NODE_ENV = "production"; + + const fetchSpy = vi.fn(); + vi.stubGlobal("fetch", fetchSpy); + + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { + auth: "on-behalf-of-user", + policy: policy.allowAll(), + }, + uploads: {}, + exports: {}, + }, + }); + const handler = getRouteHandler(plugin, "post", "/upload"); + const res = mockRes(); + + // No x-forwarded-access-token header. + await handler( + mockUploadReq( + "obo_vol", + { + "x-forwarded-user": "alice@example.com", + "content-length": "5", + }, + "hello", + ), + res, + ); + + expect(res.status).toHaveBeenCalledWith(401); + const errBody = res.json.mock.calls[0][0]; + expect(errBody).toEqual( + expect.objectContaining({ + error: expect.stringContaining("Missing"), + plugin: "files", + }), + ); + expect(errBody.error).toMatch(/x-forwarded-access-token/); + + // Neither the SDK upload nor the hand-rolled fetch ran. + expect(mockClient.files.upload).not.toHaveBeenCalled(); + expect(fetchSpy).not.toHaveBeenCalled(); + }); + + test("OBO mkdir + policy denies → 403 PolicyDeniedError; SDK not invoked", async () => { + const policySpy = vi.fn().mockReturnValue(false); + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { auth: "on-behalf-of-user", policy: policySpy }, + uploads: {}, + exports: {}, + }, + }); + const handler = getRouteHandler(plugin, "post", "/mkdir"); + const res = mockRes(); + + const fetchSpy = vi.fn(); + vi.stubGlobal("fetch", fetchSpy); + + await handler( + mockReq( + "obo_vol", + { + "x-forwarded-access-token": "alice-token", + "x-forwarded-user": "alice@example.com", + }, + { body: { path: "/new-dir" } }, + ), + res, + ); + + // Policy was consulted with the OBO identity. + expect(policySpy).toHaveBeenCalledTimes(1); + const userArg = policySpy.mock.calls[0][2]; + expect(userArg).toEqual({ + id: "alice@example.com", + isServicePrincipal: false, + }); + + // 403 PolicyDeniedError shape. + expect(res.status).toHaveBeenCalledWith(403); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: expect.stringContaining("Policy denied"), + plugin: "files", + }), + ); + + // SDK + fetch not invoked. + expect(mockClient.files.createDirectory).not.toHaveBeenCalled(); + expect(fetchSpy).not.toHaveBeenCalled(); + }); + + test("OBO delete + valid token + UC denies → user-token client invoked, error propagated", async () => { + await useRealGetCurrentUserId(); + await useRealGetWorkspaceClient(); + + // Distinct user-token client with a `files.delete` that mimics a UC + // failure (e.g. 403 from UC because the user lacks privilege). + const userClient = { + config: { + host: "https://test.databricks.com", + authenticate: vi.fn(async (h: Headers) => { + h.set("Authorization", "Bearer USER-TOKEN-DEL"); + }), + }, + files: { + delete: vi.fn(async () => { + throw new MockApiError("UC denied", 403); + }), + }, + }; + + serviceContextMock.createUserContextSpy.mockImplementation( + (_token: string, userId: string) => ({ + client: userClient as any, + userId, + warehouseId: serviceContextMock.serviceContext.warehouseId, + workspaceId: serviceContextMock.serviceContext.workspaceId, + isUserContext: true, + }), + ); + + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { + auth: "on-behalf-of-user", + policy: policy.allowAll(), + }, + uploads: {}, + exports: {}, + }, + }); + const handler = getRouteHandler(plugin, "delete", "/:volumeKey"); + const res = mockRes(); + + await handler( + mockReq( + "obo_vol", + { + "x-forwarded-access-token": "alice-token", + "x-forwarded-user": "alice@example.com", + }, + { query: { path: "doomed.txt" } }, + ), + res, + ); + + // The user-token client was used, not the SP one. + expect(userClient.files.delete).toHaveBeenCalledTimes(1); + expect(mockClient.files.delete).not.toHaveBeenCalled(); + + // The UC error surfaced as 403 (the ApiError statusCode). + expect(res.status).toHaveBeenCalledWith(403); + expect(res.json).toHaveBeenCalledWith( + expect.objectContaining({ plugin: "files" }), + ); + }); + }); + describe("Upload Stream Size Limiter", () => { test("stream under limit passes through all chunks", async () => { const maxSize = 100; From 20d0e0bb4de3de77a341feaed96067ffa78e7a95 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Mon, 27 Apr 2026 12:08:59 +0200 Subject: [PATCH 08/13] feat(appkit): files asUser routes SDK calls as the user (phase 5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `appKit.files("vol").asUser(req).()` previously only swapped the user identity passed to the volume policy — the underlying SDK call still ran as the service principal. After this change, every method on the returned handle is wrapped in `runInUserContext` with the request's user identity, so the UC SDK call genuinely executes as the end user. This is a hard override at the SDK level: it applies even on `auth: "service-principal"` volumes, where it was previously a no-op for SDK identity. Implementation: a new `_wrapVolumeAPIInUserContext` helper wraps each method in the returned `VolumeAPI`. Reuses `_buildUserContextOrNull` so the dev-mode fallback (`NODE_ENV === "development"` + missing token) skips the wrap and continues to execute as the SP — matching pre-OBO behavior locally without a reverse proxy. Strict `_extractUser` is unchanged — `asUser` still throws `AuthenticationError.missingToken` in production when the user header is missing. Programmatic OBO note: `appKit.files("obo-vol").()` (no req, no asUser) cannot synthesize a user identity and continues to execute against whatever client `getWorkspaceClient()` resolves to at the call site (typically the SP at the top level). The OBO volume default applies to HTTP route traffic via `_runWithAuth`. For programmatic per-user execution, `asUser(req)` is the supported path. Phase 5 of files-per-volume-auth-mode. BREAKING CHANGE: programmatic callers of appKit.files("vol").asUser(req).() that expected the SDK call to execute as the service principal (with only the policy seeing the user) now see the SDK call execute as the user. Audit programmatic asUser callers and remove the asUser wrap if SP credentials were the desired behavior. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- packages/appkit/src/plugins/files/plugin.ts | 52 ++++- .../src/plugins/files/tests/plugin.test.ts | 188 ++++++++++++++++++ packages/appkit/src/plugins/files/types.ts | 26 ++- 3 files changed, 257 insertions(+), 9 deletions(-) diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts index 409063f37..11395959b 100644 --- a/packages/appkit/src/plugins/files/plugin.ts +++ b/packages/appkit/src/plugins/files/plugin.ts @@ -1100,6 +1100,38 @@ export class FilesPlugin extends Plugin { return fn(); } + /** + * Wrap each `VolumeAPI` method so its execution runs inside + * `runInUserContext(userCtx, ...)`. Used by `VolumeHandle.asUser(req)` to + * force the SDK identity to the end user regardless of the volume's + * `auth` setting. The policy check baked into each method (via + * `createVolumeAPI`) runs inside the same scope, so `getCurrentUserId()` + * and any cache `userKey` derived from it also resolve to the user. + */ + private _wrapVolumeAPIInUserContext( + api: VolumeAPI, + userCtx: UserContext, + ): VolumeAPI { + const wrap = + ( + fn: (...args: Args) => Promise, + ): ((...args: Args) => Promise) => + (...args: Args) => + runInUserContext(userCtx, () => fn(...args)); + + return { + list: wrap(api.list), + read: wrap(api.read), + download: wrap(api.download), + exists: wrap(api.exists), + metadata: wrap(api.metadata), + upload: wrap(api.upload), + createDirectory: wrap(api.createDirectory), + delete: wrap(api.delete), + preview: wrap(api.preview), + }; + } + /** * Creates a VolumeAPI for a specific volume key. * @@ -1196,14 +1228,18 @@ export class FilesPlugin extends Plugin { * service principal. OBO volumes (`auth: "on-behalf-of-user"`) executed * through the HTTP routes run as the end user; for programmatic calls * outside a route, use `asUser(req)` to opt into per-user execution. - * Policies control per-user access in either mode. + * `asUser(req)` is a hard override at the SDK level: it forces every + * subsequent call to execute as the end user inside `runInUserContext`, + * regardless of the volume's `auth` setting. Policies control per-user + * access in either mode. * * @example * ```ts * // Service principal access * appKit.files("uploads").list() * - * // With policy: pass user identity for access control + * // With policy: pass user identity for access control. The SDK call + * // also executes as the user (not the service principal). * appKit.files("uploads").asUser(req).list() * ``` */ @@ -1229,7 +1265,17 @@ export class FilesPlugin extends Plugin { ...spApi, asUser: (req: express.Request) => { const user = this._extractUser(req); - return this.createVolumeAPI(volumeKey, user); + const api = this.createVolumeAPI(volumeKey, user); + // Force OBO at the SDK level regardless of the volume's `auth` + // setting: each method runs inside `runInUserContext` so + // `getWorkspaceClient()` returns the user-token client. When no + // user token is available (only reachable in dev mode after the + // strict `_extractUser` falls back to the SP identity), we skip + // the wrap so the SDK call continues to execute as the SP — the + // pre-OBO behavior. + const userCtx = this._buildUserContextOrNull(req); + if (!userCtx) return api; + return this._wrapVolumeAPIInUserContext(api, userCtx); }, }; }; diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts index 1d6d1e282..9d98824da 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -2855,6 +2855,194 @@ describe("FilesPlugin", () => { }); }); + describe("VolumeHandle.asUser SDK identity", () => { + /** + * Replace the default `getWorkspaceClient` mock with the real + * implementation so that calls inside `runInUserContext` return the + * UserContext's `client` (the user-token-authenticated WorkspaceClient) + * while calls outside the user context fall back to the + * service-principal client from `ServiceContext.get()`. + */ + async function useRealGetWorkspaceClient() { + const actual = + await vi.importActual( + "../../../context", + ); + const ctx = await import("../../../context"); + vi.mocked(ctx.getWorkspaceClient).mockImplementation( + actual.getWorkspaceClient, + ); + } + + /** + * Build a minimal mock express.Request with the supplied headers. Only + * `header(name)` is consulted by `VolumeHandle.asUser`. + */ + function mockReq(headers: Record) { + return { + header: (name: string) => headers[name.toLowerCase()], + } as any; + } + + let originalNodeEnv: string | undefined; + + beforeEach(async () => { + originalNodeEnv = process.env.NODE_ENV; + // Restore the default `getWorkspaceClient(() => mockClient)` mock in + // case a previous test in this block called `useRealGetWorkspaceClient` + // — Vitest's `vi.clearAllMocks` clears call history but not impls. + const ctx = await import("../../../context"); + vi.mocked(ctx.getWorkspaceClient).mockImplementation( + () => mockClient as any, + ); + }); + + afterEach(() => { + if (originalNodeEnv === undefined) { + delete process.env.NODE_ENV; + } else { + process.env.NODE_ENV = originalNodeEnv; + } + }); + + test("asUser on SP-configured volume → SDK call goes through user-token client (hard override)", async () => { + // Use the real ALS-driven `getWorkspaceClient` so that calls inside + // `runInUserContext` resolve to the wrapped UserContext's client and + // calls outside fall through to ServiceContext.client (which lacks + // `.files` — making any leak through the SP path crash loudly). + await useRealGetWorkspaceClient(); + + // Distinct user-token client whose `listDirectoryContents` is the + // one we expect `asUser` to route through. + const userListSpy = vi.fn(async function* () { + yield { name: "user.txt", path: "/user.txt", is_directory: false }; + }); + const userClient = { + config: { + host: "https://test.databricks.com", + authenticate: vi.fn(), + }, + files: { listDirectoryContents: userListSpy }, + }; + + // Wire `_buildUserContextOrNull → ServiceContext.createUserContext` to + // return a UserContext whose `client` is our user-token client. + serviceContextMock.createUserContextSpy.mockImplementation( + (_token: string, userId: string) => ({ + client: userClient as any, + userId, + warehouseId: serviceContextMock.serviceContext.warehouseId, + workspaceId: serviceContextMock.serviceContext.workspaceId, + isUserContext: true, + }), + ); + + // Set the SP volume's default path BEFORE plugin construction so the + // connector picks it up. + process.env.DATABRICKS_VOLUME_SP_VOL = "/Volumes/c/s/sp"; + try { + const plugin = new FilesPlugin({ + volumes: { + // SP-configured volume (the auth: "service-principal" default). + sp_vol: { auth: "service-principal", policy: policy.allowAll() }, + uploads: {}, + exports: {}, + }, + }); + + const handle = plugin.exports()("sp_vol"); + const userApi = handle.asUser( + mockReq({ + "x-forwarded-access-token": "alice-token", + "x-forwarded-user": "alice@example.com", + }), + ); + + // Materialize the async iterator — the connector iterates the SDK + // generator and aggregates results. + await userApi.list("subdir"); + + // The user-token client served the SDK call (proof the + // `runInUserContext` wrap took effect). + expect(userListSpy).toHaveBeenCalledTimes(1); + // Defense-in-depth: a UserContext was constructed for the request. + expect(serviceContextMock.createUserContextSpy).toHaveBeenCalledTimes( + 1, + ); + } finally { + delete process.env.DATABRICKS_VOLUME_SP_VOL; + } + }); + + test("programmatic call on OBO volume without asUser → no runInUserContext wrap; SDK runs through default getWorkspaceClient (SP at top level)", async () => { + // Discovery note: programmatic calls (no `req`) cannot synthesize a + // user identity. The OBO-volume "execute as user" behavior is wired + // through the HTTP route layer's `_runWithAuth`, which builds a + // UserContext from the request headers. There is no equivalent for + // the programmatic path — `appKit.files("obo-vol").list()` (no + // `asUser`) executes against whatever `getWorkspaceClient()` returns + // in the current ALS scope, which at the top level is the SP client. + // This test pins that reality. The task spec's framing of "OBO + // default executes as user via volume default" is HTTP-only. + + // We rely on the default mocked `getWorkspaceClient(() => mockClient)` + // so the SP client returned is the file-aware fixture. + const spListSpy = vi.fn(async function* () { + yield { name: "sp.txt", path: "/sp.txt", is_directory: false }; + }); + mockClient.files.listDirectoryContents.mockImplementation(spListSpy); + + // Spy on createUserContext to confirm no wrap happened. + serviceContextMock.createUserContextSpy.mockClear(); + + process.env.DATABRICKS_VOLUME_OBO_VOL = "/Volumes/c/s/obo"; + try { + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { + auth: "on-behalf-of-user", + policy: policy.allowAll(), + }, + uploads: {}, + exports: {}, + }, + }); + + const handle = plugin.exports()("obo_vol"); + await handle.list("subdir"); // no asUser; pure programmatic call + + // The default `mockClient` (SP fixture) served the call. No + // user-context wrap exists at the top of the call stack — the OBO + // semantic only applies on the HTTP route path. + expect(spListSpy).toHaveBeenCalledTimes(1); + expect(serviceContextMock.createUserContextSpy).not.toHaveBeenCalled(); + } finally { + delete process.env.DATABRICKS_VOLUME_OBO_VOL; + } + }); + + test("programmatic call on SP volume without asUser → SDK runs as SP; runInUserContext is not invoked (status-quo guard)", async () => { + // Default mocked `getWorkspaceClient(() => mockClient)`; no + // ALS-driven dispatch. + const spListSpy = vi.fn(async function* () { + yield { name: "sp.txt", path: "/sp.txt", is_directory: false }; + }); + mockClient.files.listDirectoryContents.mockImplementation(spListSpy); + + serviceContextMock.createUserContextSpy.mockClear(); + + const plugin = new FilesPlugin(VOLUMES_CONFIG); + const handle = plugin.exports()("uploads"); + await handle.list("subdir"); // no asUser; pure SP path + + // The SP client served the SDK call. + expect(spListSpy).toHaveBeenCalledTimes(1); + // No UserContext was ever built — the SP default path shouldn't go + // anywhere near `createUserContext`. + expect(serviceContextMock.createUserContextSpy).not.toHaveBeenCalled(); + }); + }); + describe("Upload Stream Size Limiter", () => { test("stream under limit passes through all chunks", async () => { const maxSize = 100; diff --git a/packages/appkit/src/plugins/files/types.ts b/packages/appkit/src/plugins/files/types.ts index 785a30385..cb1618051 100644 --- a/packages/appkit/src/plugins/files/types.ts +++ b/packages/appkit/src/plugins/files/types.ts @@ -117,12 +117,26 @@ export interface FilePreview extends FileMetadata { * an HTTP route handler execute as the end user (the route wires the * request token into a `runInUserContext` scope before calling SDK code). * - * `asUser(req)` re-wraps the API with the real user identity from the - * request — useful for per-user policy checks when calling the API outside - * an HTTP route. In production it throws `AuthenticationError.missingToken` - * when the `x-forwarded-user` header is missing; in development - * (`NODE_ENV === "development"`) it falls back to the service principal so - * local testing without a reverse proxy continues to work. + * `asUser(req)` is a hard override: regardless of the volume's `auth` + * setting (SP or OBO), the returned API runs every SDK call inside + * `runInUserContext` with the user identity extracted from the request, + * so the underlying `WorkspaceClient` is the user-token client. This makes + * `appKit.files("sp-vol").asUser(req).list()` actually execute as the end + * user, even on a service-principal-configured volume. + * + * In production `asUser` throws `AuthenticationError.missingToken` when the + * `x-forwarded-user` header is missing. In development + * (`NODE_ENV === "development"`) it logs a warning and falls back to the + * service principal so local testing without a reverse proxy continues to + * work — in that fallback no `runInUserContext` wrap is applied and SDK + * calls execute as the SP, identical to pre-OBO behavior. + * + * @remarks Behavior change: prior to OBO support, `asUser(req)` only + * influenced the policy user passed to the volume policy — the underlying + * SDK call still ran as the service principal. With OBO support landed, + * `asUser` now also forces the SDK call to run as the user. Any caller + * that relied on the pre-OBO behavior (policy sees user, SDK runs as SP) + * must remove the `asUser` wrap. */ export type VolumeHandle = VolumeAPI & { asUser: (req: IAppRequest) => VolumeAPI; From 44f950bf87d97d9a323daf5fe0c8635b024ea04e Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Mon, 27 Apr 2026 12:24:14 +0200 Subject: [PATCH 09/13] feat(appkit): files.auth_mode span attribute + manifest scope JSDoc (phase 6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tags every Files plugin trace span with `files.auth_mode` set to either `"service-principal"` or `"on-behalf-of-user"`, reflecting what operationally happened (whether the SDK call ran inside `runInUserContext`). Two complementary plumbing paths land on the same attribute key: - HTTP routes: route handlers compute the effective mode via a new `_effectiveAuthMode(req, volumeKey)` helper and thread it into the existing `PluginExecutionSettings.telemetryInterceptor.attributes` shape — `TelemetryInterceptor` already passes those attributes to `tracer.startActiveSpan`, so the attribute lands on the existing `plugin.execute` span without new infrastructure. - Programmatic API (`exports()` SP path + `asUser` OBO path): wraps each VolumeAPI method in a new `_withAuthModeSpan(operation, mode, fn)` helper that calls `this.telemetry.startActiveSpan("files.", ...)`. Programmatic calls previously created NO span, so this introduces new `files.` spans on programmatic surface — a small observability enrichment beyond pure attribute plumbing. Spans respect the plugin's `traces` config; the noop tracer takes over when traces are disabled. Updates `getResourceRequirements()` JSDoc to document the per-volume permission split: SP volumes need the SP to hold `WRITE_VOLUME`; OBO volumes need the end user (not the SP) to hold it. Adds a `// TODO: extend plugin-manifest.schema.json` marker for the eventual schema-level fix. Phase 6 of files-per-volume-auth-mode. Phase 7 (docs, playground, changelog) is next. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- packages/appkit/src/plugins/files/plugin.ts | 254 ++++++++++++++---- .../src/plugins/files/tests/plugin.test.ts | 227 ++++++++++++++++ 2 files changed, 434 insertions(+), 47 deletions(-) diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts index 11395959b..cedf641b7 100644 --- a/packages/appkit/src/plugins/files/plugin.ts +++ b/packages/appkit/src/plugins/files/plugin.ts @@ -86,9 +86,33 @@ export class FilesPlugin extends Plugin { /** * Generates resource requirements dynamically from discovered + configured volumes. * Each volume key maps to a `DATABRICKS_VOLUME_{KEY_UPPERCASE}` env var. + * + * ## Per-volume permission scope (SP vs OBO) + * + * The returned manifest entries describe a single permission grant per + * volume, but the *grantee* depends on the volume's `auth` setting at + * runtime — and that distinction is **not** expressed in the manifest + * today: + * + * - **Service-principal volumes** (the default, `auth: "service-principal"`): + * the app's service principal needs `WRITE_VOLUME` (or read-equivalent) + * on the UC volume. This matches the manifest entry as written. + * - **On-behalf-of-user volumes** (`auth: "on-behalf-of-user"`): SDK calls + * execute as the **end user**, so the *user* — not the SP — must hold + * `WRITE_VOLUME` (or read-equivalent) on the UC volume. The SP only + * needs to be allowed to mint user-token requests; it does not need + * direct volume permissions. + * + * The static manifest cannot currently express this per-volume split, so + * callers configuring OBO volumes must communicate the per-user permission + * requirement out-of-band (docs, runbooks, deployment scripts) until the + * manifest schema gains a per-volume auth scope field. */ static getResourceRequirements(config: IFilesConfig): ResourceRequirement[] { const volumes = FilesPlugin.discoverVolumes(config); + // TODO: extend plugin-manifest.schema.json to express per-volume auth + // scope so OBO volumes can declare end-user-required permissions in the + // manifest itself. return Object.keys(volumes).map((key) => ({ type: ResourceType.VOLUME, alias: `volume-${key}`, @@ -460,11 +484,41 @@ export class FilesPlugin extends Plugin { private _readSettings( cacheKey: (string | number | object)[], + authMode: "service-principal" | "on-behalf-of-user", ): PluginExecutionSettings { return { default: { ...FILES_READ_DEFAULTS, cache: { ...FILES_READ_DEFAULTS.cache, cacheKey }, + telemetryInterceptor: { + attributes: this._authModeAttributes(authMode), + }, + }, + }; + } + + private _writeSettings( + authMode: "service-principal" | "on-behalf-of-user", + ): PluginExecutionSettings { + return { + default: { + ...FILES_WRITE_DEFAULTS, + telemetryInterceptor: { + attributes: this._authModeAttributes(authMode), + }, + }, + }; + } + + private _downloadSettings( + authMode: "service-principal" | "on-behalf-of-user", + ): PluginExecutionSettings { + return { + default: { + ...FILES_DOWNLOAD_DEFAULTS, + telemetryInterceptor: { + attributes: this._authModeAttributes(authMode), + }, }, }; } @@ -553,14 +607,18 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "list", path ?? "/"))) return; + const authMode = this._effectiveAuthMode(req, volumeKey); await this._runWithAuth(req, volumeKey, async () => { try { const result = await this.execute( async () => connector.list(getWorkspaceClient(), path), - this._readSettings([ - `files:${volumeKey}:list`, - path ? connector.resolvePath(path) : "__root__", - ]), + this._readSettings( + [ + `files:${volumeKey}:list`, + path ? connector.resolvePath(path) : "__root__", + ], + authMode, + ), ); if (!result.ok) { @@ -590,14 +648,15 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "read", path))) return; + const authMode = this._effectiveAuthMode(req, volumeKey); await this._runWithAuth(req, volumeKey, async () => { try { const result = await this.execute( async () => connector.read(getWorkspaceClient(), path), - this._readSettings([ - `files:${volumeKey}:read`, - connector.resolvePath(path), - ]), + this._readSettings( + [`files:${volumeKey}:read`, connector.resolvePath(path)], + authMode, + ), ); if (!result.ok) { @@ -658,12 +717,11 @@ export class FilesPlugin extends Plugin { const label = opts.mode === "download" ? "Download" : "Raw fetch"; const volumeCfg = this.volumeConfigs[volumeKey]; + const authMode = this._effectiveAuthMode(req, volumeKey); await this._runWithAuth(req, volumeKey, async () => { try { - const settings: PluginExecutionSettings = { - default: FILES_DOWNLOAD_DEFAULTS, - }; + const settings = this._downloadSettings(authMode); const response = await this.execute( async () => connector.download(getWorkspaceClient(), path), settings, @@ -738,14 +796,15 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "exists", path))) return; + const authMode = this._effectiveAuthMode(req, volumeKey); await this._runWithAuth(req, volumeKey, async () => { try { const result = await this.execute( async () => connector.exists(getWorkspaceClient(), path), - this._readSettings([ - `files:${volumeKey}:exists`, - connector.resolvePath(path), - ]), + this._readSettings( + [`files:${volumeKey}:exists`, connector.resolvePath(path)], + authMode, + ), ); if (!result.ok) { @@ -776,14 +835,15 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "metadata", path))) return; + const authMode = this._effectiveAuthMode(req, volumeKey); await this._runWithAuth(req, volumeKey, async () => { try { const result = await this.execute( async () => connector.metadata(getWorkspaceClient(), path), - this._readSettings([ - `files:${volumeKey}:metadata`, - connector.resolvePath(path), - ]), + this._readSettings( + [`files:${volumeKey}:metadata`, connector.resolvePath(path)], + authMode, + ), ); if (!result.ok) { @@ -814,14 +874,15 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "preview", path))) return; + const authMode = this._effectiveAuthMode(req, volumeKey); await this._runWithAuth(req, volumeKey, async () => { try { const result = await this.execute( async () => connector.preview(getWorkspaceClient(), path), - this._readSettings([ - `files:${volumeKey}:preview`, - connector.resolvePath(path), - ]), + this._readSettings( + [`files:${volumeKey}:preview`, connector.resolvePath(path)], + authMode, + ), ); if (!result.ok) { @@ -881,6 +942,7 @@ export class FilesPlugin extends Plugin { logger.debug(req, "Upload started: volume=%s path=%s", volumeKey, path); + const authMode = this._effectiveAuthMode(req, volumeKey); await this._runWithAuth(req, volumeKey, async () => { try { const rawStream: ReadableStream = Readable.toWeb(req); @@ -910,9 +972,7 @@ export class FilesPlugin extends Plugin { path, contentLength ?? 0, ); - const settings: PluginExecutionSettings = { - default: FILES_WRITE_DEFAULTS, - }; + const settings = this._writeSettings(authMode); // The connector's `upload` resolves `getWorkspaceClient()` and // `client.config.authenticate(headers)` synchronously inside this // callback. When `_runWithAuth` wraps us in `runInUserContext`, that @@ -977,11 +1037,10 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "mkdir", dirPath))) return; + const authMode = this._effectiveAuthMode(req, volumeKey); await this._runWithAuth(req, volumeKey, async () => { try { - const settings: PluginExecutionSettings = { - default: FILES_WRITE_DEFAULTS, - }; + const settings = this._writeSettings(authMode); const result = await this.trackWrite(() => this.execute(async () => { await connector.createDirectory(getWorkspaceClient(), dirPath); @@ -1021,11 +1080,10 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "delete", path))) return; + const authMode = this._effectiveAuthMode(req, volumeKey); await this._runWithAuth(req, volumeKey, async () => { try { - const settings: PluginExecutionSettings = { - default: FILES_WRITE_DEFAULTS, - }; + const settings = this._writeSettings(authMode); const result = await this.trackWrite(() => this.execute(async () => { await connector.delete(getWorkspaceClient(), path); @@ -1073,6 +1131,42 @@ export class FilesPlugin extends Plugin { return ServiceContext.createUserContext(token, userId); } + /** + * Build the telemetry attribute hash for the `files.auth_mode` span + * attribute. The value reflects what operationally happened — i.e. + * whether `runInUserContext` actually wrapped the SDK call: + * - HTTP route on OBO volume + valid token → `"on-behalf-of-user"`. + * - HTTP route on OBO volume + dev-fallback (no token) → + * `"service-principal"` (the route falls through to the SP client). + * - HTTP route on SP volume → `"service-principal"`. + * - `asUser(req)` programmatic calls with a real user context → + * `"on-behalf-of-user"`. + * - Any unwrapped path → `"service-principal"`. + */ + private _authModeAttributes( + authMode: "service-principal" | "on-behalf-of-user", + ): { "files.auth_mode": string } { + return { "files.auth_mode": authMode }; + } + + /** + * Resolve the auth mode that `_runWithAuth` would actually apply to the + * request. Mirrors `_runWithAuth`'s decision logic so HTTP handlers can + * tag their telemetry spans with the operationally-effective auth mode + * before invoking `this.execute(...)`. + */ + private _effectiveAuthMode( + req: express.Request, + volumeKey: string, + ): "service-principal" | "on-behalf-of-user" { + if (this._resolveAuth(volumeKey) !== "on-behalf-of-user") { + return "service-principal"; + } + return this._buildUserContextOrNull(req) + ? "on-behalf-of-user" + : "service-principal"; + } + /** * Run `fn` under the correct execution context for `volumeKey`. * - SP volumes (`auth: "service-principal"`): invokes `fn` directly so the @@ -1100,13 +1194,71 @@ export class FilesPlugin extends Plugin { return fn(); } + /** + * Wrap a programmatic VolumeAPI method invocation in a telemetry span + * tagged with `files.auth_mode`. Programmatic calls bypass + * `this.execute(...)` (and therefore the `TelemetryInterceptor`); this + * helper restores the missing span so the `files.auth_mode` attribute + * lands consistently across both HTTP and programmatic surfaces. + */ + private _withAuthModeSpan( + operation: string, + authMode: "service-principal" | "on-behalf-of-user", + fn: () => Promise, + ): Promise { + return this.telemetry.startActiveSpan( + `files.${operation}`, + { attributes: this._authModeAttributes(authMode) }, + async (span) => { + try { + return await fn(); + } finally { + span.end(); + } + }, + ); + } + + /** + * Wrap each `VolumeAPI` method so its execution is tagged with + * `files.auth_mode = "service-principal"`. Used for programmatic calls + * that don't go through `asUser(req)`. Each wrapped method runs inside + * its own telemetry span so SP-mode invocations are distinguishable in + * trace output. + */ + private _wrapVolumeAPIWithSPSpan(api: VolumeAPI): VolumeAPI { + const wrap = + ( + operation: string, + fn: (...args: Args) => Promise, + ): ((...args: Args) => Promise) => + (...args: Args) => + this._withAuthModeSpan(operation, "service-principal", () => + fn(...args), + ); + + return { + list: wrap("list", api.list), + read: wrap("read", api.read), + download: wrap("download", api.download), + exists: wrap("exists", api.exists), + metadata: wrap("metadata", api.metadata), + upload: wrap("upload", api.upload), + createDirectory: wrap("createDirectory", api.createDirectory), + delete: wrap("delete", api.delete), + preview: wrap("preview", api.preview), + }; + } + /** * Wrap each `VolumeAPI` method so its execution runs inside * `runInUserContext(userCtx, ...)`. Used by `VolumeHandle.asUser(req)` to * force the SDK identity to the end user regardless of the volume's * `auth` setting. The policy check baked into each method (via * `createVolumeAPI`) runs inside the same scope, so `getCurrentUserId()` - * and any cache `userKey` derived from it also resolve to the user. + * and any cache `userKey` derived from it also resolve to the user. Each + * wrapped invocation is tagged with `files.auth_mode = "on-behalf-of-user"` + * so OBO calls are distinguishable in trace output. */ private _wrapVolumeAPIInUserContext( api: VolumeAPI, @@ -1114,21 +1266,24 @@ export class FilesPlugin extends Plugin { ): VolumeAPI { const wrap = ( + operation: string, fn: (...args: Args) => Promise, ): ((...args: Args) => Promise) => (...args: Args) => - runInUserContext(userCtx, () => fn(...args)); + this._withAuthModeSpan(operation, "on-behalf-of-user", () => + runInUserContext(userCtx, () => fn(...args)), + ); return { - list: wrap(api.list), - read: wrap(api.read), - download: wrap(api.download), - exists: wrap(api.exists), - metadata: wrap(api.metadata), - upload: wrap(api.upload), - createDirectory: wrap(api.createDirectory), - delete: wrap(api.delete), - preview: wrap(api.preview), + list: wrap("list", api.list), + read: wrap("read", api.read), + download: wrap("download", api.download), + exists: wrap("exists", api.exists), + metadata: wrap("metadata", api.metadata), + upload: wrap("upload", api.upload), + createDirectory: wrap("createDirectory", api.createDirectory), + delete: wrap("delete", api.delete), + preview: wrap("preview", api.preview), }; } @@ -1259,7 +1414,12 @@ export class FilesPlugin extends Plugin { }, isServicePrincipal: true, }; - const spApi = this.createVolumeAPI(volumeKey, spUser); + // Default (non-asUser) programmatic surface: every call is tagged + // with `files.auth_mode = "service-principal"` for telemetry parity + // with the HTTP route path. + const spApi = this._wrapVolumeAPIWithSPSpan( + this.createVolumeAPI(volumeKey, spUser), + ); return { ...spApi, @@ -1271,10 +1431,10 @@ export class FilesPlugin extends Plugin { // `getWorkspaceClient()` returns the user-token client. When no // user token is available (only reachable in dev mode after the // strict `_extractUser` falls back to the SP identity), we skip - // the wrap so the SDK call continues to execute as the SP — the - // pre-OBO behavior. + // the OBO wrap and instead apply the SP-mode span wrap so trace + // output reflects the actual SP execution. const userCtx = this._buildUserContextOrNull(req); - if (!userCtx) return api; + if (!userCtx) return this._wrapVolumeAPIWithSPSpan(api); return this._wrapVolumeAPIInUserContext(api, userCtx); }, }; diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts index 9d98824da..a80d8b2e3 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -3043,6 +3043,233 @@ describe("FilesPlugin", () => { }); }); + describe("files.auth_mode telemetry attribute", () => { + /** + * Spy on the plugin's telemetry provider so we can inspect the + * `attributes` argument passed to `startActiveSpan`. Both the + * `TelemetryInterceptor` (HTTP routes) and `_withAuthModeSpan` + * (programmatic API) ultimately call `telemetry.startActiveSpan(name, + * options, fn)` — `options.attributes["files.auth_mode"]` is the + * single point this test pins. + */ + function spyOnTelemetry(plugin: FilesPlugin) { + const telemetry = (plugin as any).telemetry as { + startActiveSpan: (...args: unknown[]) => Promise; + }; + const calls: Array<{ + name: string; + attributes: Record | undefined; + }> = []; + const original = telemetry.startActiveSpan.bind(telemetry); + vi.spyOn(telemetry, "startActiveSpan").mockImplementation( + (...args: unknown[]) => { + const [name, options] = args as [ + string, + { attributes?: Record } | undefined, + ]; + calls.push({ name, attributes: options?.attributes }); + return original(...args); + }, + ); + return calls; + } + + function getRouteHandler( + plugin: FilesPlugin, + method: "get" | "post" | "delete", + pathSuffix: string, + ) { + const mockRouter = { + use: vi.fn(), + get: vi.fn(), + post: vi.fn(), + put: vi.fn(), + delete: vi.fn(), + patch: vi.fn(), + } as any; + plugin.injectRoutes(mockRouter); + const call = mockRouter[method].mock.calls.find( + (c: unknown[]) => + typeof c[0] === "string" && (c[0] as string).endsWith(pathSuffix), + ); + return call[call.length - 1] as (req: any, res: any) => Promise; + } + + function mockRes() { + const res: any = { headersSent: false }; + res.status = vi.fn().mockReturnValue(res); + res.json = vi.fn().mockReturnValue(res); + res.type = vi.fn().mockReturnValue(res); + res.send = vi.fn().mockReturnValue(res); + res.setHeader = vi.fn().mockReturnValue(res); + res.end = vi.fn(); + return res; + } + + function mockReq( + volumeKey: string, + headers: Record, + overrides: Record = {}, + ) { + return { + params: { volumeKey }, + query: {}, + ...overrides, + headers, + header: (name: string) => headers[name.toLowerCase()], + }; + } + + let originalNodeEnv: string | undefined; + + beforeEach(() => { + originalNodeEnv = process.env.NODE_ENV; + process.env.DATABRICKS_VOLUME_OBO_VOL = "/Volumes/c/s/obo"; + }); + + afterEach(() => { + if (originalNodeEnv === undefined) { + delete process.env.NODE_ENV; + } else { + process.env.NODE_ENV = originalNodeEnv; + } + delete process.env.DATABRICKS_VOLUME_OBO_VOL; + }); + + test("OBO volume + HTTP route + valid token → span attribute is 'on-behalf-of-user'", async () => { + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { auth: "on-behalf-of-user", policy: policy.allowAll() }, + uploads: {}, + exports: {}, + }, + }); + const calls = spyOnTelemetry(plugin); + + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "f.txt", path: "/f.txt", is_directory: false }; + }, + ); + + const handler = getRouteHandler(plugin, "get", "/list"); + await handler( + mockReq("obo_vol", { + "x-forwarded-access-token": "alice-token", + "x-forwarded-user": "alice@example.com", + }), + mockRes(), + ); + + // The span created by TelemetryInterceptor must carry the OBO marker. + const obo = calls.find( + (c) => c.attributes?.["files.auth_mode"] === "on-behalf-of-user", + ); + expect(obo).toBeDefined(); + }); + + test("SP volume + HTTP route → span attribute is 'service-principal'", async () => { + const plugin = new FilesPlugin(VOLUMES_CONFIG); + const calls = spyOnTelemetry(plugin); + + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "f.txt", path: "/f.txt", is_directory: false }; + }, + ); + + const handler = getRouteHandler(plugin, "get", "/list"); + await handler(mockReq("uploads", {}), mockRes()); + + // No OBO span attribute should appear on the SP route path. + const oboCall = calls.find( + (c) => c.attributes?.["files.auth_mode"] === "on-behalf-of-user", + ); + expect(oboCall).toBeUndefined(); + + // At least one span must explicitly tag service-principal. + const sp = calls.find( + (c) => c.attributes?.["files.auth_mode"] === "service-principal", + ); + expect(sp).toBeDefined(); + }); + + test("appKit.files('sp-vol').asUser(req).list() programmatic → span attribute is 'on-behalf-of-user' (asUser forces it on SP volumes)", async () => { + const plugin = new FilesPlugin(VOLUMES_CONFIG); + const calls = spyOnTelemetry(plugin); + + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "f.txt", path: "/f.txt", is_directory: false }; + }, + ); + + const handle = plugin.exports()("uploads"); // SP-mode volume + const userApi = handle.asUser({ + header: (name: string) => + ({ + "x-forwarded-access-token": "alice-token", + "x-forwarded-user": "alice@example.com", + })[name.toLowerCase()], + } as any); + await userApi.list("subdir"); + + // asUser hard-overrides SP into OBO, so the programmatic span must + // carry the OBO marker. + const obo = calls.find( + (c) => c.attributes?.["files.auth_mode"] === "on-behalf-of-user", + ); + expect(obo).toBeDefined(); + expect(obo?.name).toBe("files.list"); + }); + + test("OBO volume + HTTP route + dev fallback (no token) → span attribute is 'service-principal'", async () => { + // Dev-fallback path: OBO volume, but no x-forwarded-access-token, so + // `_buildUserContextOrNull` returns null and `_runWithAuth` falls + // through to direct SP execution. The span must reflect what + // operationally happened (SP), not what the volume is configured + // for (OBO). + process.env.NODE_ENV = "development"; + + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { auth: "on-behalf-of-user", policy: policy.allowAll() }, + uploads: {}, + exports: {}, + }, + }); + const calls = spyOnTelemetry(plugin); + + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "f.txt", path: "/f.txt", is_directory: false }; + }, + ); + + const handler = getRouteHandler(plugin, "get", "/list"); + // Provide x-forwarded-user (so `_extractObiUser` accepts the dev + // fallback identity) but no x-forwarded-access-token. + await handler( + mockReq("obo_vol", { + "x-forwarded-user": "alice@example.com", + }), + mockRes(), + ); + + // Span must NOT be tagged on-behalf-of-user when no user context was + // built (dev-fallback runs as SP). + const obo = calls.find( + (c) => c.attributes?.["files.auth_mode"] === "on-behalf-of-user", + ); + expect(obo).toBeUndefined(); + + const sp = calls.find( + (c) => c.attributes?.["files.auth_mode"] === "service-principal", + ); + expect(sp).toBeDefined(); + }); + }); + describe("Upload Stream Size Limiter", () => { test("stream under limit passes through all chunks", async () => { const maxSize = 100; From 5e6baa1801b9cbc6a0491027db96fc1f34803d76 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Mon, 27 Apr 2026 12:41:37 +0200 Subject: [PATCH 10/13] docs(appkit): files OBO docs, playground demo, changelog (phase 7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final polish phase for files-per-volume-auth-mode: JSDoc: - Expand `VolumeConfig.auth` and `IFilesConfig.auth` with SP and OBO example blocks plus resolution-order notes. - Extend `FilePolicyUser.isServicePrincipal` JSDoc with the full six-row matrix covering SP/OBO x HTTP/programmatic x header presence, including the `asUser(req)` rows. Docs (`docs/docs/plugins/files.md`): - New "Auth modes" section: two modes, resolution order, per-mode permission requirements, prod/dev OBO behavior, manifest-scope limitation, side-by-side config examples. - New `usersOnly` policy example using `isServicePrincipal: false` and a "Policy user matrix" subsection. - Rewrites every "all operations execute as the service principal" paragraph to describe both modes. - Dedicated `asUser(req)` subsection with the honest limitation that programmatic OBO defaults don't apply without `asUser`. Dev-playground: - Server: new `obo_demo` volume with `auth: "on-behalf-of-user"` and a `usersOnly` policy; new smoke route `GET /policy/obo-volume` using `asUser(req)`. `app.yaml` gets the matching `DATABRICKS_VOLUME_OBO_DEMO` resource binding. - Client: `policy-matrix.route.tsx` gets a "Per-volume OBO mode" section with two probes (HTTP route + programmatic smoke) so the end-to-end OBO path is exercisable from the UI. Changelog: - New `## Unreleased` section in `CHANGELOG.md` (lives above the release-it generated `[0.24.0]` block) covering the per-volume auth feature, the `files.auth_mode` span, the `asUser` SDK identity behavior change, the `bypassPolicy` removal, and the honest limitation about programmatic OBO without `asUser`. Generated: - `types.generated.ts` and `plugin-manifest.generated.ts` are regenerated formatting (line-collapsing) from `pnpm build`'s generator step. Content unchanged. Backpressure: pnpm build, pnpm docs:build, pnpm check:fix, pnpm -r typecheck, pnpm test (1666 passing) all clean. Phase 7 of files-per-volume-auth-mode — feature is shippable. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- CHANGELOG.md | 14 ++ apps/dev-playground/app.yaml | 5 + .../client/src/routes/policy-matrix.route.tsx | 73 +++++++ apps/dev-playground/server/index.ts | 54 +++++ .../api/appkit/Interface.FilePolicyUser.md | 22 +- docs/docs/plugins/files.md | 206 ++++++++++++++++-- packages/appkit/src/plugins/files/policy.ts | 25 ++- packages/appkit/src/plugins/files/types.ts | 83 ++++++- 8 files changed, 449 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03a1e9216..42a3eda3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,20 @@ All notable changes to this project will be documented in this file. # Changelog +## Unreleased + +### appkit (files plugin) — per-volume auth mode + +* **feat(files):** add per-volume `auth` field (`VolumeConfig.auth`) and plugin-level `auth` default (`IFilesConfig.auth`) for selecting between service-principal and on-behalf-of-user execution. Resolution order: `volume.auth ?? plugin.auth ?? "service-principal"`. OBO mode wires HTTP route handlers and `VolumeHandle.asUser` through `runInUserContext` so SDK calls execute as the end user. +* **feat(files):** add `files.auth_mode` OpenTelemetry span attribute on every operation, set to either `"service-principal"` or `"on-behalf-of-user"` for trace filtering. Programmatic API calls (`appKit.files("vol").()`) now create `files.` spans where they previously did not — minor observability enrichment that respects the plugin's `traces` config (no spans emitted when traces are disabled). +* **feat(files)!:** `appKit.files("vol").asUser(req).list()` now executes the SDK call as the **end user** (previously the SDK still ran as the service principal — only the policy user was swapped). Programmatic callers that relied on SP credentials post-`asUser(req)` must remove the `asUser` wrap. +* **chore(files):** remove undocumented `bypassPolicy` option on `createVolumeAPI`. Zero consumers in `packages/` or `apps/`; no migration needed. + +#### Honest limitation + +Programmatic calls on an OBO volume **without** `asUser(req)` (i.e. `appKit.files("obo-vol").list()`) cannot synthesize a user identity and continue to execute against the service principal client at the call site. For programmatic per-user execution, use `asUser(req)`. The OBO volume default applies to **HTTP route traffic**, where the request headers are available. + + ## [0.24.0](https://github.com/databricks/appkit/compare/v0.23.0...v0.24.0) (2026-04-20) * add AST extraction to serving type generator and move types to shared/ ([#279](https://github.com/databricks/appkit/issues/279)) ([422afb3](https://github.com/databricks/appkit/commit/422afb38aa73f8adb94e091225dc3381bd92cfcd)) diff --git a/apps/dev-playground/app.yaml b/apps/dev-playground/app.yaml index e58e71a31..6303c9419 100644 --- a/apps/dev-playground/app.yaml +++ b/apps/dev-playground/app.yaml @@ -27,3 +27,8 @@ env: valueFrom: volume - name: DATABRICKS_VOLUME_IMPLICIT valueFrom: volume + # OBO demo: same physical volume; auth: "on-behalf-of-user" routes + # HTTP traffic through runInUserContext so SDK calls execute as the + # end user. + - name: DATABRICKS_VOLUME_OBO_DEMO + valueFrom: volume diff --git a/apps/dev-playground/client/src/routes/policy-matrix.route.tsx b/apps/dev-playground/client/src/routes/policy-matrix.route.tsx index c299aafc1..55fcb9b51 100644 --- a/apps/dev-playground/client/src/routes/policy-matrix.route.tsx +++ b/apps/dev-playground/client/src/routes/policy-matrix.route.tsx @@ -86,6 +86,10 @@ function PolicyMatrixRoute() { const [runningAll, setRunningAll] = useState(false); const [spResult, setSpResult] = useState(null); const [oboResult, setOboResult] = useState(null); + const [oboVolumeResult, setOboVolumeResult] = useState(null); + const [oboVolumeHttpResult, setOboVolumeHttpResult] = useState( + null, + ); useEffect(() => { fetch("/whoami") @@ -197,6 +201,40 @@ function PolicyMatrixRoute() { setOboResult(JSON.stringify(await r.json(), null, 2)); }, []); + /** + * Programmatic OBO-volume smoke. Calls the dev-playground's + * `/policy/obo-volume` route which hits `appkit.files("obo_demo")` — + * a volume configured with `auth: "on-behalf-of-user"` — through both + * `asUser(req)` and the bare callable. The browser automatically + * forwards `x-forwarded-user` / `x-forwarded-access-token` when running + * behind the Databricks Apps reverse proxy; locally they're absent and + * the dev fallback reports `service-principal` execution. + */ + const runOboVolumeSmoke = useCallback(async () => { + setOboVolumeResult("…"); + const r = await fetch("/policy/obo-volume"); + setOboVolumeResult(JSON.stringify(await r.json(), null, 2)); + }, []); + + /** + * Direct HTTP probe against the OBO volume's `/list` route. Confirms + * end-to-end that the route handler routes the SDK call through + * `runInUserContext` when the headers are present, and returns 401 (or + * 403, in dev fallback) when they're missing. + */ + const runOboVolumeHttp = useCallback(async () => { + setOboVolumeHttpResult("…"); + try { + const r = await fetch(`/api/files/obo_demo/list`); + const body = await r.json().catch(() => ({}) as Record); + setOboVolumeHttpResult( + JSON.stringify({ httpStatus: r.status, body }, null, 2), + ); + } catch (err) { + setOboVolumeHttpResult(err instanceof Error ? err.message : String(err)); + } + }, []); + const reset = useCallback(() => setState(initialState), [initialState]); return ( @@ -297,6 +335,41 @@ function PolicyMatrixRoute() { + +
+

+ Per-volume OBO mode (auth: "on-behalf-of-user") +

+

+ Hits the obo_demo volume — configured with{" "} + auth: "on-behalf-of-user" — to confirm SDK calls + execute as the end user when the request carries{" "} + x-forwarded-access-token +{" "} + x-forwarded-user. In the deployed Databricks App those + headers are injected by the platform reverse proxy. Locally they're + absent and the dev-mode fallback applies: HTTP returns 403{" "} + (the usersOnly policy denies SP traffic) and the + programmatic path runs as the SP. +

+
+ + +
+
+ + +
+
); diff --git a/apps/dev-playground/server/index.ts b/apps/dev-playground/server/index.ts index 8a77b76c1..cdccaf4fe 100644 --- a/apps/dev-playground/server/index.ts +++ b/apps/dev-playground/server/index.ts @@ -48,6 +48,15 @@ const adminOnly: FilePolicy = (action, _resource, user) => { return true; }; +/** + * OBO demo policy: deny anything running as the SP (including the dev + * fallback when no `x-forwarded-access-token` is present). Only real + * end-users (`isServicePrincipal: false`) get through. + */ +const usersOnly: FilePolicy = (_action, _resource, user) => { + return user.isServicePrincipal !== true; +}; + createApp({ plugins: [ server({ autoStart: false }), @@ -79,6 +88,14 @@ createApp({ write_only: { policy: files.policy.not(files.policy.publicRead()) }, // no explicit policy → falls back to publicRead() + startup warning implicit: {}, + // OBO demo volume — auth: "on-behalf-of-user" routes HTTP traffic + // through `runInUserContext` so SDK calls execute with the end + // user's access token. The `usersOnly` policy denies any traffic + // that wasn't authenticated via `x-forwarded-access-token`. + obo_demo: { + auth: "on-behalf-of-user", + policy: usersOnly, + }, }, }), serving(), @@ -195,6 +212,43 @@ createApp({ results, }); }); + + /** + * Per-volume OBO mode demo. Hits the `obo_demo` volume — configured + * with `auth: "on-behalf-of-user"` — to confirm: + * + * 1. With a forwarded user identity, HTTP routes execute the SDK + * call as the end user (request goes through `runInUserContext`). + * 2. Without `x-forwarded-access-token`, production returns 401; + * development falls back to the SP and the `usersOnly` policy + * rejects with 403. + * 3. Programmatic `appkit.files("obo_demo").asUser(req).list()` runs + * inside the same user context. + * + * Returns the HTTP status, body, and the user identity the server + * observes — so the policy-matrix client can render a clear + * pass/fail panel. + */ + app.get("/policy/obo-volume", async (req, res) => { + const xForwardedUser = req.header("x-forwarded-user") ?? null; + const xForwardedToken = + (req.header("x-forwarded-access-token")?.length ?? 0) > 0; + + const programmatic: ProbeResult[] = await runProbes([ + [ + "obo_demo", + "list", + () => appkit.files("obo_demo").asUser(req).list(), + ], + ]); + + res.json({ + mode: "on-behalf-of-user", + xForwardedUser, + xForwardedAccessTokenPresent: xForwardedToken, + programmatic, + }); + }); }) .start(); }); diff --git a/docs/docs/api/appkit/Interface.FilePolicyUser.md b/docs/docs/api/appkit/Interface.FilePolicyUser.md index 7be87fce2..9fb22fa0d 100644 --- a/docs/docs/api/appkit/Interface.FilePolicyUser.md +++ b/docs/docs/api/appkit/Interface.FilePolicyUser.md @@ -25,5 +25,23 @@ optional isServicePrincipal: boolean; `true` when the call is executing as the service principal — either a direct SDK call (`appKit.files(...)`) or an HTTP request that arrived -without an `x-forwarded-user` header. Policy authors typically check -this first to distinguish SP traffic from end-user traffic. +without an `x-forwarded-user` / `x-forwarded-access-token` header. +Policy authors typically check this first to distinguish SP traffic +from end-user traffic. + +The flag reflects the **policy user** the plugin selects, which +combines the volume's effective `auth` mode with the headers on the +incoming request. The full matrix: + +| Volume `auth` | Path | Headers | `isServicePrincipal` | Notes | +| --------------------- | ------------------------------ | ----------------------------- | -------------------- | ---------------------------------------------------------------------------------------------- | +| `service-principal` | HTTP | `x-forwarded-user` present | `false` (or unset) | Pre-OBO behavior. Policy sees the end user but the SDK call still runs as the SP. | +| `service-principal` | HTTP | no `x-forwarded-user` | `true` | Headerless request — policy and SDK both run as the SP. | +| `on-behalf-of-user` | HTTP | valid token + user header | `false` | Real end-user execution. Policy sees the user; the SDK call also runs as the user. | +| `on-behalf-of-user` | HTTP | missing token, dev-fallback | `true` | Only reachable when `NODE_ENV === "development"` (prod returns 401). Treated as SP traffic. | +| any | Programmatic `asUser(req)` | `x-forwarded-user` present | `false` | `asUser` extracts the user; the SDK call runs as the user inside `runInUserContext`. | + +Programmatic calls without `asUser(req)` always set +`isServicePrincipal: true` because no request is available to derive a +user identity from. OBO volume defaults apply only to HTTP route +traffic; for programmatic per-user execution, use `asUser(req)`. diff --git a/docs/docs/plugins/files.md b/docs/docs/plugins/files.md index 18295567d..2e87d0341 100644 --- a/docs/docs/plugins/files.md +++ b/docs/docs/plugins/files.md @@ -14,7 +14,7 @@ File operations against Databricks Unity Catalog Volumes. Supports listing, read - Upload size limits with streaming enforcement - Automatic cache invalidation on write operations - Custom content type mappings -- Per-user execution context (OBO) +- **Per-volume auth modes**: each volume can run as the service principal (the default) or on behalf of the end user - **Access policies**: Per-volume policy functions that gate read and write operations ## Basic usage @@ -73,6 +73,11 @@ interface IFilesConfig { customContentTypes?: Record; /** Maximum upload size in bytes. Defaults to 5 GB. Inherited by all volumes. */ maxUploadSize?: number; + /** + * Plugin-level default auth mode. Volumes inherit this when they do not + * set `VolumeConfig.auth`. Defaults to `"service-principal"`. + */ + auth?: "service-principal" | "on-behalf-of-user"; } interface VolumeConfig { @@ -82,6 +87,11 @@ interface VolumeConfig { maxUploadSize?: number; /** Map of file extensions to MIME types for this volume. Overrides plugin-level default. */ customContentTypes?: Record; + /** + * Per-volume auth mode. Inherits from `IFilesConfig.auth` when not set; + * defaults to `"service-principal"`. + */ + auth?: "service-principal" | "on-behalf-of-user"; } ``` @@ -100,6 +110,77 @@ files({ }); ``` +### Auth modes + +Each volume runs in one of two auth modes. The mode determines which identity executes the underlying Unity Catalog SDK call — and therefore which UC grant applies: + +| Mode | SDK identity | Required UC grant on the volume | +| --- | --- | --- | +| `"service-principal"` (default) | the app's service principal | `WRITE_VOLUME` (or read-equivalent) on the **SP** | +| `"on-behalf-of-user"` | the end user from the request | `WRITE_VOLUME` (or read-equivalent) on the **end user** | + +#### Resolution order + +For each volume the plugin resolves the auth mode in this order: + +``` +VolumeConfig.auth > IFilesConfig.auth > "service-principal" +``` + +Set `IFilesConfig.auth` to flip the default for every volume in one place, and override individual volumes via `VolumeConfig.auth`. + +#### Service-principal mode (default) + +Every HTTP request executes as the app's service principal. The end-user identity (from `x-forwarded-user`) is still passed into the volume policy, but the SDK call uses the SP's credentials: + +```ts +files({ + volumes: { + exports: { + // auth is implicit: "service-principal" + policy: files.policy.publicRead(), + }, + }, +}); +``` + +Use SP mode for shared resources, app-managed exports, or any case where you want a single grant on the SP to govern all access. + +#### On-behalf-of-user mode + +Every HTTP request executes as the end user. The plugin pulls the user identity and access token from the headers Databricks Apps inject (`x-forwarded-user` and `x-forwarded-access-token`) and runs the SDK call inside `runInUserContext`: + +```ts +files({ + volumes: { + "user-uploads": { + auth: "on-behalf-of-user", + // The policy sees the real end user (isServicePrincipal: false). + // You can use the volume policy in addition to UC grants. + policy: (action, _resource, user) => + // Only allow real end users, never the SP. + !user.isServicePrincipal, + }, + }, +}); +``` + +Use OBO mode when the per-user UC grant is meaningful — e.g. enforcement of UC ACLs at the SDK layer, or audit trails that need to attribute the API call to the end user instead of the app's SP. + +#### Production vs development behavior + +| Environment | OBO request with **valid** token | OBO request with **missing** `x-forwarded-access-token` | +| --- | --- | --- | +| Production | Runs as the end user. | `401 Unauthorized` — no SDK call is made. | +| Development (`NODE_ENV === "development"`) | Runs as the end user. | Logs a warning, falls back to the SP, and continues. | + +The dev-mode fallback exists so local testing without a Databricks Apps reverse proxy continues to work; deployed apps always have the headers injected. + +#### Limitations + +- The plugin manifest's `getResourceRequirements()` declares `WRITE_VOLUME` on the **service principal** for every volume, regardless of the volume's `auth` mode. For OBO volumes, the actual permission requirement is on the **end user** — communicate this out-of-band (deployment runbooks, customer onboarding docs) until the plugin manifest schema gains a per-volume auth scope field. +- Cache keys include `getCurrentUserId()`, so OBO volumes get per-user cache isolation automatically. SP volumes share a single cache slice keyed by the SP id. + ### Permission model There are three layers of access control in the files plugin. Understanding how they interact is critical for securing your app: @@ -107,11 +188,14 @@ There are three layers of access control in the files plugin. Understanding how ``` ┌─────────────────────────────────────────────────┐ │ Unity Catalog grants │ -│ (WRITE_VOLUME on the SP — set at deploy time) │ +│ WRITE_VOLUME on the SP (auth: service-principal)│ +│ WRITE_VOLUME on the user (auth: on-behalf-of-user)│ ├─────────────────────────────────────────────────┤ │ Execution identity │ -│ HTTP routes → always service principal │ -│ Programmatic → SP by default, asUser() for OBO │ +│ Resolved per volume from VolumeConfig.auth ?? │ +│ IFilesConfig.auth ?? "service-principal". │ +│ asUser(req) is a hard override at the SDK │ +│ level for the programmatic API. │ ├─────────────────────────────────────────────────┤ │ File policies │ │ Per-volume (action, resource, user) → boolean │ @@ -119,13 +203,15 @@ There are three layers of access control in the files plugin. Understanding how └─────────────────────────────────────────────────┘ ``` -- **UC grants** control what the service principal can do at the Databricks level. These are set at deploy time via `app.yaml` resource bindings. The SP needs `WRITE_VOLUME` — the plugin declares this via resource requirements. -- **Execution identity** determines whose credentials are used for the actual API call. HTTP routes always use the SP. The programmatic API uses SP by default but supports `asUser(req)` for OBO. -- **File policies** are application-level checks evaluated **before** the API call. They receive a `FilePolicyUser` describing the caller and decide allow/deny. On HTTP routes the user is extracted from the `x-forwarded-user` header when present; when the header is absent, the policy receives `{ id: , isServicePrincipal: true }` and decides for itself whether to allow service-principal traffic. This is the only gate that distinguishes between users on HTTP routes. +- **UC grants** control what an identity can do at the Databricks level. Which identity needs the grant depends on the volume's auth mode (see [Auth modes](#auth-modes)). For SP volumes, the SP needs `WRITE_VOLUME` (the plugin declares this in its manifest). For OBO volumes, the **end user** needs `WRITE_VOLUME` on the volume; the SP itself does not. +- **Execution identity** determines whose credentials are used for the actual API call. Each volume resolves to either the service principal or the end user, per its `auth` setting. The programmatic API also exposes `asUser(req)` to force per-user execution regardless of the volume's `auth`. +- **File policies** are application-level checks evaluated **before** the API call. They receive a `FilePolicyUser` describing the caller and decide allow/deny. On HTTP routes the policy user is selected based on the volume's `auth` mode and the request headers — see the [`isServicePrincipal` matrix](#policy-user-matrix). On SP volumes when `x-forwarded-user` is absent, the policy receives `{ id: , isServicePrincipal: true }` and decides whether to allow service-principal traffic. This is the only gate that distinguishes between users on HTTP routes. :::warning -Since HTTP routes always execute as the service principal, removing a user's UC `WRITE_VOLUME` grant has **no effect** on HTTP access — the SP's grant is what's used. Policies are how you restrict what individual users can do through your app. +For service-principal volumes, every HTTP request executes as the SP regardless of which user made it — so removing a user's UC `WRITE_VOLUME` grant has **no effect** on HTTP access. Policies are how you restrict what individual users can do through your app. + +For on-behalf-of-user volumes, requests execute as the requesting user — so each user must have `WRITE_VOLUME` on the volume themselves, and you can rely on UC grants in addition to policies. ::: @@ -209,6 +295,56 @@ files({ }); ``` +#### OBO policy example + +For on-behalf-of-user volumes, the policy receives `isServicePrincipal: false` whenever the request runs with a real end-user identity. A common pattern is to deny SP traffic outright so anonymous (header-less) calls can't reach the volume: + +```ts +import { type FilePolicy } from "@databricks/appkit"; + +// Deny anything running as the service principal — including the dev-mode +// fallback when no x-forwarded-access-token was provided. Real end users +// (with isServicePrincipal: false) get the configured access. +const usersOnly: FilePolicy = (_action, _resource, user) => { + return user.isServicePrincipal !== true; +}; + +files({ + volumes: { + "user-uploads": { + auth: "on-behalf-of-user", + policy: usersOnly, + }, + }, +}); +``` + +You can compose it with any other policy via `files.policy.all(...)` to add per-action gating: + +```ts +files({ + volumes: { + "user-uploads": { + auth: "on-behalf-of-user", + policy: files.policy.all(usersOnly, files.policy.publicRead()), + }, + }, +}); +``` + +#### Policy user matrix + +The plugin selects the policy user based on the volume's effective `auth` mode and the request headers. The full table: + +| Volume `auth` | Path | Headers | `isServicePrincipal` | Notes | +| --------------------- | --------------------------- | ----------------------------- | -------------------- | ---------------------------------------------------------------------------------------------- | +| `service-principal` | HTTP | `x-forwarded-user` present | `false` (or unset) | Pre-OBO behavior. Policy sees the end user but the SDK call still runs as the SP. | +| `service-principal` | HTTP | no `x-forwarded-user` | `true` | Headerless request — policy and SDK both run as the SP. | +| `on-behalf-of-user` | HTTP | valid token + user header | `false` | Real end-user execution. Policy sees the user; the SDK call also runs as the user. | +| `on-behalf-of-user` | HTTP | missing token, dev-fallback | `true` | Only reachable when `NODE_ENV === "development"` (prod returns 401). Treated as SP traffic. | +| any | Programmatic `asUser(req)` | `x-forwarded-user` present | `false` | `asUser` extracts the user; the SDK call runs as the user inside `runInUserContext`. | +| any | Programmatic (no `asUser`) | n/a | `true` | No request available to derive a user — runs as the SP. | + #### Enforcement - **HTTP routes**: Policy checked before every operation. Denied → `403` JSON response with `Policy denied "{action}" on volume "{volumeKey}"`. @@ -233,7 +369,7 @@ Dangerous MIME types (`text/html`, `text/javascript`, `application/javascript`, ## HTTP routes -Routes are mounted at `/api/files/*`. All routes execute as the service principal. Before each operation the volume policy runs: user identity comes from the `x-forwarded-user` header when present, otherwise the policy is handed `{ id: , isServicePrincipal: true }` and decides whether to allow the call. See [Access policies](#access-policies). +Routes are mounted at `/api/files/*`. Each route resolves the volume's [auth mode](#auth-modes) and either executes as the service principal (the default) or wraps the SDK call in `runInUserContext` for OBO volumes. Before every operation the volume policy runs against the resolved policy user — see the [policy user matrix](#policy-user-matrix) for the exact mapping. See also [Access policies](#access-policies). | Method | Path | Query / Body | Response | | ------ | -------------------------- | ---------------------------- | ------------------------------------------------- | @@ -287,21 +423,38 @@ Write operations (`upload`, `mkdir`, `delete`) automatically invalidate the cach ## Programmatic API -The `exports()` API is a callable that accepts a volume key and returns a `VolumeHandle`. The handle exposes all `VolumeAPI` methods directly (service principal, logs a warning) and an `asUser(req)` method for OBO access (recommended). +The `files` plugin export is a callable that accepts a volume key and returns a `VolumeHandle`. The handle exposes all `VolumeAPI` methods directly and an `asUser(req)` method for opting into per-user execution. ```ts -// OBO access (recommended) +// Default — runs as the service principal, regardless of the volume's auth +// setting (no req is available to derive a user from). +const entries = await appkit.files("uploads").list(); + +// asUser(req) — runs as the end user, regardless of the volume's auth +// setting. Forces SDK calls into runInUserContext using the request's +// x-forwarded-user / x-forwarded-access-token headers. const entries = await appkit.files("uploads").asUser(req).list(); const content = await appkit.files("exports").asUser(req).read("report.csv"); -// Service principal access (logs a warning encouraging OBO) -const entries = await appkit.files("uploads").list(); - // Named accessor const vol = appkit.files.volume("uploads"); await vol.asUser(req).list(); ``` +### `asUser(req)` + +`asUser(req)` is the supported path for programmatic per-user execution. The returned API runs every method inside `runInUserContext` so the underlying `WorkspaceClient` is the user-token client — the SDK call executes as the user, not just the policy check. + +In production, `asUser(req)` throws `AuthenticationError.missingToken` when `x-forwarded-user` is absent. In development (`NODE_ENV === "development"`) it logs a warning and falls back to the service principal so local testing without a Databricks Apps reverse proxy keeps working — the fallback skips the `runInUserContext` wrap. + +:::warning Programmatic OBO without `asUser(req)` + +A volume configured with `auth: "on-behalf-of-user"` only routes through `runInUserContext` on the **HTTP route path**, where the request headers are available. A direct programmatic call — `appkit.files("obo-vol").list()` — has no request to derive an end-user identity from, so it executes against whatever client `getWorkspaceClient()` resolves to at the call site (typically the SP at the top level). + +For programmatic per-user execution, always use `asUser(req)`. The volume's `auth` mode controls HTTP traffic; `asUser(req)` controls programmatic traffic. + +::: + ### VolumeAPI methods | Method | Signature | Returns | @@ -378,8 +531,10 @@ interface FilePolicyUser { id: string; /** * `true` when the call is executing as the service principal — either a - * direct SDK call (`appKit.files(...)`) or an HTTP request that arrived - * without an `x-forwarded-user` header. + * direct SDK call (`appKit.files(...)` without `asUser`), an HTTP request + * with no forwarded headers, or the dev-mode fallback for an OBO volume + * with a missing token. See the [policy user matrix](#policy-user-matrix) + * for the full table. */ isServicePrincipal?: boolean; } @@ -397,6 +552,11 @@ interface VolumeConfig { maxUploadSize?: number; /** Map of file extensions to MIME types for this volume. */ customContentTypes?: Record; + /** + * Per-volume auth mode. Inherits from `IFilesConfig.auth` when not set; + * defaults to `"service-principal"`. + */ + auth?: "service-principal" | "on-behalf-of-user"; } interface VolumeAPI { @@ -411,7 +571,10 @@ interface VolumeAPI { preview(filePath: string): Promise; } -/** Volume handle: all VolumeAPI methods (service principal) + asUser() for OBO. */ +/** + * Volume handle: all VolumeAPI methods (run as the service principal by + * default) + asUser() to force per-user execution at the SDK level. + */ type VolumeHandle = VolumeAPI & { asUser: (req: Request) => VolumeAPI; }; @@ -429,9 +592,12 @@ Built-in extensions: `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, `.svg`, `.bmp`, ` ## User context -HTTP routes always execute as the **service principal** — the SP's Databricks credentials are used for all API calls. User identity is extracted from the `x-forwarded-user` header and passed to the volume's [access policy](#access-policies) for authorization. When the header is absent the policy is handed `{ id: , isServicePrincipal: true }` and decides whether to allow the call — in practice that branch only fires in development without a reverse proxy or when an upstream proxy is misconfigured, since real Databricks Apps runtimes always forward the header. This means UC grants on the SP (not individual users) determine what operations are possible, while policies control what each user is allowed to do through the app. +HTTP routes execute as either the service principal or the end user, depending on the volume's [auth mode](#auth-modes): -The programmatic API returns a `VolumeHandle` that exposes all `VolumeAPI` methods directly (service principal) and an `asUser(req)` method for OBO access. Calling any method without `asUser()` runs the policy with `isServicePrincipal: true`. In production, `asUser(req)` throws `AuthenticationError.missingToken` when the `x-forwarded-user` header is missing. In development (`NODE_ENV === "development"`) it falls back to the service principal instead, so local testing without a reverse proxy continues to work. +- **Service-principal volumes** (the default): the SP's Databricks credentials are used for the API call. User identity is extracted from the `x-forwarded-user` header and passed to the volume's [access policy](#access-policies) for authorization, but the SDK call still runs as the SP. When the header is absent the policy is handed `{ id: , isServicePrincipal: true }` and decides whether to allow the call — in practice that branch only fires in development without a reverse proxy or when an upstream proxy is misconfigured, since real Databricks Apps runtimes always forward the header. UC grants on the **SP** determine what operations are possible. +- **On-behalf-of-user volumes**: the end user's access token (from `x-forwarded-access-token`) is used to mint the SDK client, so the API call runs with the user's identity. Both the policy and the SDK see the user. UC grants on the **end user** determine what operations are possible. In production, requests with a missing token return `401`; in development (`NODE_ENV === "development"`) they fall back to the SP with a warning. + +The programmatic API returns a `VolumeHandle` that exposes all `VolumeAPI` methods directly and an `asUser(req)` method for forcing per-user execution. Calling a method without `asUser()` runs the policy and the SDK call as the SP. `asUser(req)` is a hard override at the SDK level: it forces every subsequent call to execute as the end user inside `runInUserContext`, regardless of the volume's `auth` setting. In production, `asUser(req)` throws `AuthenticationError.missingToken` when the `x-forwarded-user` header is missing. In development it falls back to the service principal instead, so local testing without a reverse proxy continues to work. ## Resource requirements @@ -439,6 +605,8 @@ Volume resources are declared **dynamically** via `getResourceRequirements(confi For example, if `DATABRICKS_VOLUME_UPLOADS` and `DATABRICKS_VOLUME_EXPORTS` are set, calling `files()` generates two required volume resources validated at startup — no explicit `volumes` config needed. +The manifest declares the grant against the **service principal**. For OBO volumes (`auth: "on-behalf-of-user"`), the actual permission requirement is on the **end user** — communicate this out-of-band in your deployment documentation until the manifest schema gains a per-volume auth scope field. + ## Error responses All errors return JSON: diff --git a/packages/appkit/src/plugins/files/policy.ts b/packages/appkit/src/plugins/files/policy.ts index 3d2366fcd..be231891b 100644 --- a/packages/appkit/src/plugins/files/policy.ts +++ b/packages/appkit/src/plugins/files/policy.ts @@ -67,13 +67,26 @@ export interface FilePolicyUser { /** * `true` when the call is executing as the service principal — either a * direct SDK call (`appKit.files(...)`) or an HTTP request that arrived - * without an `x-forwarded-user` header. Policy authors typically check - * this first to distinguish SP traffic from end-user traffic. + * without an `x-forwarded-user` / `x-forwarded-access-token` header. + * Policy authors typically check this first to distinguish SP traffic + * from end-user traffic. * - * `false` is set explicitly on the OBO (on-behalf-of-user) HTTP path when - * a valid `x-forwarded-access-token` and `x-forwarded-user` were both - * present on the request — i.e. the policy is running with a real - * end-user identity rather than the service principal. + * The flag reflects the **policy user** the plugin selects, which + * combines the volume's effective `auth` mode with the headers on the + * incoming request. The full matrix: + * + * | Volume `auth` | Path | Headers | `isServicePrincipal` | Notes | + * | --------------------- | ------------------------------ | ----------------------------- | -------------------- | ---------------------------------------------------------------------------------------------- | + * | `service-principal` | HTTP | `x-forwarded-user` present | `false` (or unset) | Pre-OBO behavior. Policy sees the end user but the SDK call still runs as the SP. | + * | `service-principal` | HTTP | no `x-forwarded-user` | `true` | Headerless request — policy and SDK both run as the SP. | + * | `on-behalf-of-user` | HTTP | valid token + user header | `false` | Real end-user execution. Policy sees the user; the SDK call also runs as the user. | + * | `on-behalf-of-user` | HTTP | missing token, dev-fallback | `true` | Only reachable when `NODE_ENV === "development"` (prod returns 401). Treated as SP traffic. | + * | any | Programmatic `asUser(req)` | `x-forwarded-user` present | `false` | `asUser` extracts the user; the SDK call runs as the user inside `runInUserContext`. | + * + * Programmatic calls without `asUser(req)` always set + * `isServicePrincipal: true` because no request is available to derive a + * user identity from. OBO volume defaults apply only to HTTP route + * traffic; for programmatic per-user execution, use `asUser(req)`. */ isServicePrincipal?: boolean; } diff --git a/packages/appkit/src/plugins/files/types.ts b/packages/appkit/src/plugins/files/types.ts index cb1618051..2e92be5a4 100644 --- a/packages/appkit/src/plugins/files/types.ts +++ b/packages/appkit/src/plugins/files/types.ts @@ -16,10 +16,50 @@ export interface VolumeConfig { */ policy?: FilePolicy; /** - * Per-volume auth mode. When `"on-behalf-of-user"`, route handlers and - * programmatic calls execute Unity Catalog SDK operations as the end user - * instead of the service principal. Inherits from `IFilesConfig.auth` if - * not set; defaults to `"service-principal"`. + * Per-volume auth mode. When `"on-behalf-of-user"`, HTTP route handlers + * execute Unity Catalog SDK operations as the end user (using the + * `x-forwarded-access-token` and `x-forwarded-user` headers injected by + * the Databricks Apps reverse proxy) instead of the service principal. + * Inherits from `IFilesConfig.auth` if not set; defaults to + * `"service-principal"`. + * + * **Permission scope**: + * - `"service-principal"`: the app's SP needs `WRITE_VOLUME` (or + * read-equivalent) on the UC volume. + * - `"on-behalf-of-user"`: each end user needs `WRITE_VOLUME` (or + * read-equivalent) on the UC volume; the SP itself does not need + * direct volume permissions. + * + * In production, OBO requests with a missing `x-forwarded-access-token` + * return `401`. In development (`NODE_ENV === "development"`) they fall + * back to the SP with a single warning so local testing without a + * reverse proxy continues to work. + * + * @example Service-principal volume (default) + * ```ts + * files({ + * volumes: { + * exports: { + * auth: "service-principal", // explicit; same as omitting + * policy: files.policy.publicRead(), + * }, + * }, + * }); + * ``` + * + * @example On-behalf-of-user volume + * ```ts + * files({ + * volumes: { + * "user-uploads": { + * auth: "on-behalf-of-user", + * // Policies see the real end user identity here. + * policy: (action, _resource, user) => + * !user.isServicePrincipal, + * }, + * }, + * }); + * ``` */ auth?: "service-principal" | "on-behalf-of-user"; } @@ -70,8 +110,39 @@ export interface IFilesConfig extends BasePluginConfig { /** Maximum upload size in bytes. Defaults to 5 GB (Databricks Files API v2 limit). */ maxUploadSize?: number; /** - * Plugin-level default auth mode for all volumes. Each volume can override - * via `VolumeConfig.auth`. Defaults to `"service-principal"` if not set. + * Plugin-level default auth mode for all volumes. Volumes without an + * explicit `auth` field inherit this default; volumes that set their own + * `VolumeConfig.auth` override it. Defaults to `"service-principal"` if + * not set. + * + * Resolution order (per volume): + * `VolumeConfig.auth` > `IFilesConfig.auth` > `"service-principal"`. + * + * @example Mark every volume OBO unless explicitly overridden + * ```ts + * files({ + * auth: "on-behalf-of-user", + * volumes: { + * // Inherits "on-behalf-of-user" from the plugin-level default. + * "user-uploads": { policy: files.policy.allowAll() }, + * // Overrides the plugin default to run as the SP. + * reports: { + * auth: "service-principal", + * policy: files.policy.publicRead(), + * }, + * }, + * }); + * ``` + * + * @example Default to service-principal (the implicit default) + * ```ts + * files({ + * // No `auth` set → all volumes default to "service-principal". + * volumes: { + * exports: { policy: files.policy.publicRead() }, + * }, + * }); + * ``` */ auth?: "service-principal" | "on-behalf-of-user"; } From 3f98628dbd70bbeea57fa4872f41be6525b24f69 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Mon, 27 Apr 2026 14:25:40 +0200 Subject: [PATCH 11/13] =?UTF-8?q?fix(appkit):=20files=20OBO=20review=20fix?= =?UTF-8?q?es=20=E2=80=94=20auth=20strictness,=20allocation,=20cache,=20sp?= =?UTF-8?q?ans?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses four findings from the multi-model review of the files OBO feature. 1. asUser privilege confusion in production (CRITICAL, security) `_extractUser` now requires BOTH `x-forwarded-user` AND `x-forwarded-access-token` in production — throws `AuthenticationError.missingToken` when either is missing. Previously a request with the user header but no token returned a SP-wrapped API where the policy saw the request as a real end user (isServicePrincipal: undefined) but the SDK ran with SP credentials, so policies like `usersOnly: !user.isServicePrincipal` were satisfiable while wielding the SP's broader UC grants. CWE-639. Dev fallback now marks the returned policy user as `isServicePrincipal: true` and warns (was debug) so policies can't be tricked even in dev. 2. Double WorkspaceClient allocation per OBO request (HIGH, perf) New `_resolveAuthForRequest(req, volumeKey)` returns { mode, userCtx } and builds the UserContext at most once per request. `_runWithAuth(userCtx, fn)` now takes the pre-built ctx. Removes `_effectiveAuthMode` (no longer needed). Every OBO HTTP request previously constructed two WorkspaceClients; cache hits paid that cost too. Now: one allocation, even on cache hits. 3. Write-cache invalidation on OBO + wrong path arg (HIGH, correctness) Two related bugs at `_invalidateListCache`: (a) cache keys included `getCurrentUserId()`, so user A's write only busted user A's list cache — user B continued to see stale listings; (b) the `path` arg was the file path instead of the parent directory. Fix: list-cache is disabled on OBO volumes (no cross-user staleness possible). On SP volumes, invalidation now derives the parent directory via `parentDirectory()`, falling back to the `"__root__"` sentinel for root-level writes — matches `_handleList`'s key shape. Cache delete + path resolution are wrapped in best-effort try/catch so an invalidation failure cannot convert a successful write into HTTP 500. 4. Duplicate `files.` spans on programmatic calls (HIGH, perf) `FilesConnector.` already opens a `files.` span via its `traced()` decorator. The Phase 6 `_withAuthModeSpan` opened an identical span on top, doubling span allocation/export volume on every programmatic call. Fix: new `runWithFilesSpanAttributes(attrs, fn)` exported from `connectors/files/client.ts` uses AsyncLocalStorage to make ambient attributes available to the connector's `traced()` decorator. `_withAuthModeSpan` renamed to `_withAuthModeAttributes` — no longer creates a span; just sets `files.auth_mode` on the connector's existing span via the ALS channel. Programmatic calls now produce exactly one `files.` span. HTTP route attribute path (`PluginExecutionSettings.telemetryInterceptor.attributes`) is unchanged. Tests: 8 new regression guards covering each fix's failure mode (1674 total, was 1666). The pre-existing `DownloadResponse` unused-import warning is also resolved as cleanup fallout. User-facing notes (for changelog follow-up): - asUser(req) throws on missing token in production (was silent SP fallback — the previous behavior was a privilege-confusion bug). - OBO volume reads are no longer cached. Trade-off: every OBO read hits the SDK; in exchange, no cross-user staleness. SP volume reads still cache. - Programmatic appKit.files(vol).() emits one `files.` span instead of two. The `files.auth_mode` attribute now lands on the connector's existing span. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- .../appkit/src/connectors/files/client.ts | 38 ++ packages/appkit/src/plugins/files/plugin.ts | 322 +++++++---- .../src/plugins/files/tests/plugin.test.ts | 498 ++++++++++++++++-- 3 files changed, 695 insertions(+), 163 deletions(-) diff --git a/packages/appkit/src/connectors/files/client.ts b/packages/appkit/src/connectors/files/client.ts index caa384af7..93203fdb6 100644 --- a/packages/appkit/src/connectors/files/client.ts +++ b/packages/appkit/src/connectors/files/client.ts @@ -1,3 +1,4 @@ +import { AsyncLocalStorage } from "node:async_hooks"; import { ApiError, type WorkspaceClient } from "@databricks/sdk-experimental"; import type { TelemetryOptions } from "shared"; import { createLogger } from "../../logging/logger"; @@ -24,6 +25,37 @@ import { const logger = createLogger("connectors:files"); +/** + * Ambient span-attribute propagation for `FilesConnector.traced()`. + * + * Callers (e.g. the plugin's `_withAuthModeAttributes` wrapper) set extra + * span attributes here via `runWithFilesSpanAttributes(attrs, fn)`. The + * connector's `traced()` decorator reads them and merges them into the + * span it creates around the SDK call. This lets the plugin tag spans with + * `files.auth_mode` without opening a duplicate `files.` span. + * + * AsyncLocalStorage is used so concurrent requests don't see each other's + * attributes. Outside an active scope, `getStore()` returns `undefined` and + * the connector falls back to the static attribute set. + */ +const filesSpanAttributesStorage = new AsyncLocalStorage< + Record +>(); + +/** + * Run `fn` with the supplied attributes attached to whatever span the + * `FilesConnector` opens for its SDK call. Used to propagate request-scoped + * attributes (e.g. `files.auth_mode`) onto the connector's span without + * opening a parent span — avoids the 2x span allocation that + * `startActiveSpan` parented otherwise. + */ +export function runWithFilesSpanAttributes( + attributes: Record, + fn: () => Promise, +): Promise { + return filesSpanAttributesStorage.run(attributes, fn); +} + interface FilesConnectorConfig { defaultVolume?: string; timeout?: number; @@ -102,12 +134,18 @@ export class FilesConnector { const startTime = Date.now(); let success = false; + // Pull any ambient attributes set by `runWithFilesSpanAttributes` (e.g. + // `files.auth_mode` from the plugin layer). Static `attributes` win on + // collision so callers can override per-call. + const ambient = filesSpanAttributesStorage.getStore(); + return this.telemetry.startActiveSpan( `files.${operation}`, { kind: SpanKind.CLIENT, attributes: { "files.operation": operation, + ...(ambient ?? {}), ...attributes, }, }, diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts index cedf641b7..b5a0f52bb 100644 --- a/packages/appkit/src/plugins/files/plugin.ts +++ b/packages/appkit/src/plugins/files/plugin.ts @@ -7,6 +7,7 @@ import { contentTypeFromPath, FilesConnector, isSafeInlineContentType, + runWithFilesSpanAttributes, validateCustomContentTypes, } from "../../connectors/files"; import { @@ -37,7 +38,6 @@ import { policy, } from "./policy"; import type { - DownloadResponse, FilesExport, IFilesConfig, VolumeAPI, @@ -130,22 +130,42 @@ export class FilesPlugin extends Plugin { } /** - * Extraction for `VolumeHandle.asUser(req)`. Throws in production when the - * header is missing. In development (`NODE_ENV === "development"`) falls - * back to the service principal so local testing without a reverse proxy - * works. HTTP routes use an inline silent fallback regardless of NODE_ENV. + * Extraction for `VolumeHandle.asUser(req)`. In production we require BOTH + * `x-forwarded-user` and `x-forwarded-access-token`, and throw + * `AuthenticationError.missingToken` if either is missing — otherwise a + * request with only `x-forwarded-user: alice` would let policies see Alice + * as a "real user" (`isServicePrincipal: false`) while the SDK call below + * falls through to the SP client because `_buildUserContextOrNull` returns + * `null`. Net effect: policy approves the user, SDK runs as SP, privilege + * confusion (CWE-639/863). + * + * In development (`NODE_ENV === "development"`) we keep a local-loop + * convenience: if either header is missing we emit a single warning and + * return a policy user explicitly marked `isServicePrincipal: true`, so + * even in dev a `usersOnly`-style policy that gates on + * `!user.isServicePrincipal` cannot be tricked. The matching SDK execution + * path also falls through to the SP client (no `runInUserContext` wrap), + * so the policy user and the SDK identity stay aligned. */ private _extractUser(req: express.Request): FilePolicyUser { const userId = req.header("x-forwarded-user")?.trim(); - if (userId) return { id: userId }; + const token = req.header("x-forwarded-access-token")?.trim(); + if (userId && token) return { id: userId, isServicePrincipal: false }; if (process.env.NODE_ENV === "development") { - logger.debug( - "No x-forwarded-user header on asUser(req) — falling back to service principal identity (dev mode).", + logger.warn( + "asUser(req) called without complete x-forwarded-user + x-forwarded-access-token headers — " + + "falling back to service principal identity (dev mode). " + + "In production this request would 401.", ); return { id: getCurrentUserId(), isServicePrincipal: true }; } + if (!token) { + throw AuthenticationError.missingToken( + "Missing x-forwarded-access-token header for asUser(req). Both x-forwarded-user and x-forwarded-access-token are required.", + ); + } throw AuthenticationError.missingToken( - "Missing x-forwarded-user header. Cannot resolve user ID.", + "Missing x-forwarded-user header. Cannot resolve user ID for asUser(req).", ); } @@ -486,10 +506,20 @@ export class FilesPlugin extends Plugin { cacheKey: (string | number | object)[], authMode: "service-principal" | "on-behalf-of-user", ): PluginExecutionSettings { + // OBO volumes: disable list/read cache. The cache layer is keyed by + // `getCurrentUserId()`, so user A's writes can only invalidate user A's + // cache entry — user B would continue to see stale data for the same + // volume/path until TTL. Disabling caching trades read performance for + // correctness; the alternative is a per-(volume, path) generation + // counter folded into the cache key on writes (a future enhancement). + const isObo = authMode === "on-behalf-of-user"; + const cache = isObo + ? { ...FILES_READ_DEFAULTS.cache, enabled: false, cacheKey } + : { ...FILES_READ_DEFAULTS.cache, cacheKey }; return { default: { ...FILES_READ_DEFAULTS, - cache: { ...FILES_READ_DEFAULTS.cache, cacheKey }, + cache, telemetryInterceptor: { attributes: this._authModeAttributes(authMode), }, @@ -525,31 +555,86 @@ export class FilesPlugin extends Plugin { /** * Invalidate cached list entries for a directory after a write operation. - * Uses the same cache-key format as `_handleList`: resolved path for - * subdirectories, `"__root__"` for the volume root. + * Must produce the SAME cache-key shape that `_handleList` stored under. + * `_handleList` builds its key from `req.query.path`: when `path` is + * provided it uses `connector.resolvePath(path)`, otherwise it uses the + * sentinel `"__root__"`. The invalidation here must derive the matching + * directory from the FILE path being written: + * + * - `"/Volumes/c/s/v/foo/bar.txt"` → `parentDirectory` returns + * `"/Volumes/c/s/v/foo"` → resolved path key. + * - `"/bar.txt"` and `"bar.txt"` → root-level files: matching list cache + * was a rootless `list()` call → `"__root__"` sentinel. + * + * On OBO volumes the read cache is disabled (see `_readSettings`), so + * invalidation is a no-op here for `mode === "on-behalf-of-user"`. The + * cache layer is keyed by `getCurrentUserId()`, so user A's writes can + * only invalidate user A's cache entry — user B would otherwise see stale + * data for the same volume/path until TTL. Disabling the cache on OBO + * trades read performance for correctness; the alternative is a + * per-(volume, path) generation counter folded into the cache key on + * writes (a future enhancement). * - * Cache keys include `getCurrentUserId()`, so reads and the subsequent - * invalidation stay consistent across auth modes: on SP volumes both run as - * the service principal, and on OBO volumes both run inside the same - * `runInUserContext` scope so `getCurrentUserId()` resolves to the end - * user's ID for both the cached read and the matching invalidation. The - * user identity propagates transparently through `getCurrentUserId()`, so - * no explicit user ID needs to be threaded through this function. + * Best-effort: a thrown `connector.resolvePath` (e.g. on malformed input) + * is swallowed here. Invalidation is purely an optimization signal — a + * missed delete only costs read freshness, not correctness, and + * propagating the error would convert a successful write into an HTTP + * 500. */ private _invalidateListCache( volumeKey: string, - parentPath: string, + writtenPath: string, connector: FilesConnector, + mode: "service-principal" | "on-behalf-of-user" = "service-principal", ): void { - const parent = parentDirectory(parentPath); - const cachePathSegment = parent - ? connector.resolvePath(parent) - : "__root__"; - const listKey = this.cache.generateKey( - [`files:${volumeKey}:list`, cachePathSegment], - getCurrentUserId(), - ); - this.cache.delete(listKey); + if (mode === "on-behalf-of-user") { + // OBO read cache is disabled — nothing to invalidate. Skipping here + // also prevents accidentally caching a wrong-namespace delete that + // would mask the missing cross-user invalidation if the cache were + // ever re-enabled. + return; + } + const parent = parentDirectory(writtenPath); + // The list cache stored under `"__root__"` whenever the matching read + // came from a rootless `list()` (no `?path=`). `parentDirectory` + // returns `"/"` for root-level files like `/bar.txt`, and `""` for + // relative root-level files like `bar.txt`. Both map to the sentinel. + const isRootLevel = !parent || parent === "/"; + const userKey = getCurrentUserId(); + const tryDelete = (segment: string) => { + try { + this.cache.delete( + this.cache.generateKey([`files:${volumeKey}:list`, segment], userKey), + ); + } catch (err) { + logger.debug( + "List-cache invalidation failed for volume=%s segment=%s: %O", + volumeKey, + segment, + err, + ); + } + }; + + if (isRootLevel) { + // A rootless `list()` produced the `"__root__"` key. + tryDelete("__root__"); + return; + } + + let resolved: string; + try { + resolved = connector.resolvePath(parent); + } catch (err) { + logger.debug( + "List-cache invalidation: resolvePath(%s) failed for volume=%s: %O", + parent, + volumeKey, + err, + ); + return; + } + tryDelete(resolved); } private _handleApiError( @@ -607,8 +692,8 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "list", path ?? "/"))) return; - const authMode = this._effectiveAuthMode(req, volumeKey); - await this._runWithAuth(req, volumeKey, async () => { + const { mode, userCtx } = this._resolveAuthForRequest(req, volumeKey); + await this._runWithAuth(userCtx, async () => { try { const result = await this.execute( async () => connector.list(getWorkspaceClient(), path), @@ -617,7 +702,7 @@ export class FilesPlugin extends Plugin { `files:${volumeKey}:list`, path ? connector.resolvePath(path) : "__root__", ], - authMode, + mode, ), ); @@ -648,14 +733,14 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "read", path))) return; - const authMode = this._effectiveAuthMode(req, volumeKey); - await this._runWithAuth(req, volumeKey, async () => { + const { mode, userCtx } = this._resolveAuthForRequest(req, volumeKey); + await this._runWithAuth(userCtx, async () => { try { const result = await this.execute( async () => connector.read(getWorkspaceClient(), path), this._readSettings( [`files:${volumeKey}:read`, connector.resolvePath(path)], - authMode, + mode, ), ); @@ -717,11 +802,11 @@ export class FilesPlugin extends Plugin { const label = opts.mode === "download" ? "Download" : "Raw fetch"; const volumeCfg = this.volumeConfigs[volumeKey]; - const authMode = this._effectiveAuthMode(req, volumeKey); + const { mode, userCtx } = this._resolveAuthForRequest(req, volumeKey); - await this._runWithAuth(req, volumeKey, async () => { + await this._runWithAuth(userCtx, async () => { try { - const settings = this._downloadSettings(authMode); + const settings = this._downloadSettings(mode); const response = await this.execute( async () => connector.download(getWorkspaceClient(), path), settings, @@ -796,14 +881,14 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "exists", path))) return; - const authMode = this._effectiveAuthMode(req, volumeKey); - await this._runWithAuth(req, volumeKey, async () => { + const { mode, userCtx } = this._resolveAuthForRequest(req, volumeKey); + await this._runWithAuth(userCtx, async () => { try { const result = await this.execute( async () => connector.exists(getWorkspaceClient(), path), this._readSettings( [`files:${volumeKey}:exists`, connector.resolvePath(path)], - authMode, + mode, ), ); @@ -835,14 +920,14 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "metadata", path))) return; - const authMode = this._effectiveAuthMode(req, volumeKey); - await this._runWithAuth(req, volumeKey, async () => { + const { mode, userCtx } = this._resolveAuthForRequest(req, volumeKey); + await this._runWithAuth(userCtx, async () => { try { const result = await this.execute( async () => connector.metadata(getWorkspaceClient(), path), this._readSettings( [`files:${volumeKey}:metadata`, connector.resolvePath(path)], - authMode, + mode, ), ); @@ -874,14 +959,14 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "preview", path))) return; - const authMode = this._effectiveAuthMode(req, volumeKey); - await this._runWithAuth(req, volumeKey, async () => { + const { mode, userCtx } = this._resolveAuthForRequest(req, volumeKey); + await this._runWithAuth(userCtx, async () => { try { const result = await this.execute( async () => connector.preview(getWorkspaceClient(), path), this._readSettings( [`files:${volumeKey}:preview`, connector.resolvePath(path)], - authMode, + mode, ), ); @@ -942,8 +1027,8 @@ export class FilesPlugin extends Plugin { logger.debug(req, "Upload started: volume=%s path=%s", volumeKey, path); - const authMode = this._effectiveAuthMode(req, volumeKey); - await this._runWithAuth(req, volumeKey, async () => { + const { mode, userCtx } = this._resolveAuthForRequest(req, volumeKey); + await this._runWithAuth(userCtx, async () => { try { const rawStream: ReadableStream = Readable.toWeb(req); @@ -972,7 +1057,7 @@ export class FilesPlugin extends Plugin { path, contentLength ?? 0, ); - const settings = this._writeSettings(authMode); + const settings = this._writeSettings(mode); // The connector's `upload` resolves `getWorkspaceClient()` and // `client.config.authenticate(headers)` synchronously inside this // callback. When `_runWithAuth` wraps us in `runInUserContext`, that @@ -985,7 +1070,7 @@ export class FilesPlugin extends Plugin { }, settings), ); - this._invalidateListCache(volumeKey, path, connector); + this._invalidateListCache(volumeKey, path, connector, mode); if (!result.ok) { logger.error( @@ -1037,10 +1122,10 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "mkdir", dirPath))) return; - const authMode = this._effectiveAuthMode(req, volumeKey); - await this._runWithAuth(req, volumeKey, async () => { + const { mode, userCtx } = this._resolveAuthForRequest(req, volumeKey); + await this._runWithAuth(userCtx, async () => { try { - const settings = this._writeSettings(authMode); + const settings = this._writeSettings(mode); const result = await this.trackWrite(() => this.execute(async () => { await connector.createDirectory(getWorkspaceClient(), dirPath); @@ -1048,7 +1133,7 @@ export class FilesPlugin extends Plugin { }, settings), ); - this._invalidateListCache(volumeKey, dirPath, connector); + this._invalidateListCache(volumeKey, dirPath, connector, mode); if (!result.ok) { this._sendStatusError(res, result.status); @@ -1080,10 +1165,10 @@ export class FilesPlugin extends Plugin { if (!(await this._enforcePolicy(req, res, volumeKey, "delete", path))) return; - const authMode = this._effectiveAuthMode(req, volumeKey); - await this._runWithAuth(req, volumeKey, async () => { + const { mode, userCtx } = this._resolveAuthForRequest(req, volumeKey); + await this._runWithAuth(userCtx, async () => { try { - const settings = this._writeSettings(authMode); + const settings = this._writeSettings(mode); const result = await this.trackWrite(() => this.execute(async () => { await connector.delete(getWorkspaceClient(), path); @@ -1091,7 +1176,7 @@ export class FilesPlugin extends Plugin { }, settings), ); - this._invalidateListCache(volumeKey, path, connector); + this._invalidateListCache(volumeKey, path, connector, mode); if (!result.ok) { this._sendStatusError(res, result.status); @@ -1150,81 +1235,88 @@ export class FilesPlugin extends Plugin { } /** - * Resolve the auth mode that `_runWithAuth` would actually apply to the - * request. Mirrors `_runWithAuth`'s decision logic so HTTP handlers can - * tag their telemetry spans with the operationally-effective auth mode - * before invoking `this.execute(...)`. + * One-shot resolver for HTTP route handlers. Builds the request's + * `UserContext` AT MOST ONCE (when the volume is OBO and the headers are + * present) and returns both the operationally-effective auth mode and the + * pre-built `UserContext`. + * + * Handlers thread the `userCtx` into `_runWithAuth(userCtx, fn)` to avoid + * a second `ServiceContext.createUserContext()` allocation — that call + * builds a fresh `WorkspaceClient` per invocation, so doing it twice per + * request was pure throwaway overhead. */ - private _effectiveAuthMode( + private _resolveAuthForRequest( req: express.Request, volumeKey: string, - ): "service-principal" | "on-behalf-of-user" { + ): { + mode: "service-principal" | "on-behalf-of-user"; + userCtx: UserContext | null; + } { if (this._resolveAuth(volumeKey) !== "on-behalf-of-user") { - return "service-principal"; + return { mode: "service-principal", userCtx: null }; } - return this._buildUserContextOrNull(req) - ? "on-behalf-of-user" - : "service-principal"; + const userCtx = this._buildUserContextOrNull(req); + return userCtx + ? { mode: "on-behalf-of-user", userCtx } + : { mode: "service-principal", userCtx: null }; } /** - * Run `fn` under the correct execution context for `volumeKey`. - * - SP volumes (`auth: "service-principal"`): invokes `fn` directly so the - * service-principal `WorkspaceClient` and `getCurrentUserId()` are used — - * identical behavior to pre-OBO releases. - * - OBO volumes (`auth: "on-behalf-of-user"`): wraps `fn` in - * `runInUserContext` with a `UserContext` built from the request headers, + * Run `fn` under the correct execution context. + * - `userCtx` is `null`: invokes `fn` directly so the service-principal + * `WorkspaceClient` and `getCurrentUserId()` are used — identical + * behavior to pre-OBO releases. This covers both SP volumes and the + * OBO dev-fallback path (where headers were missing). + * - `userCtx` is a `UserContext`: wraps `fn` in `runInUserContext(userCtx)`, * so SDK calls execute as the end user and `getCurrentUserId()` (and - * therefore cache keys) resolve to the user's ID. Falls back to the SP - * path when no user context can be built — only reachable in dev mode, - * since `_enforcePolicy` 401s in production when the OBO headers are - * missing. + * therefore cache keys) resolve to the user's ID. + * + * The caller is responsible for building `userCtx` exactly once per + * request via `_resolveAuthForRequest`; this signature deliberately does + * NOT take a `req` so it cannot accidentally re-build the context. */ private async _runWithAuth( - req: express.Request, - volumeKey: string, + userCtx: UserContext | null, fn: () => Promise, ): Promise { - if (this._resolveAuth(volumeKey) === "on-behalf-of-user") { - const userCtx = this._buildUserContextOrNull(req); - if (userCtx) { - return runInUserContext(userCtx, fn); - } + if (userCtx) { + return runInUserContext(userCtx, fn); } return fn(); } /** - * Wrap a programmatic VolumeAPI method invocation in a telemetry span - * tagged with `files.auth_mode`. Programmatic calls bypass - * `this.execute(...)` (and therefore the `TelemetryInterceptor`); this - * helper restores the missing span so the `files.auth_mode` attribute - * lands consistently across both HTTP and programmatic surfaces. + * Tag the span that `FilesConnector.` opens with + * `files.auth_mode`. Programmatic VolumeAPI methods bypass + * `this.execute(...)` (and therefore the `TelemetryInterceptor`), so the + * connector's own `files.` span is the natural place to land + * this attribute. Rather than opening a parent `files.` span + * (which would duplicate the connector's span — same name, doubled + * allocation/export), we propagate the attribute via AsyncLocalStorage + * and let the connector merge it into its existing span at creation + * time. + * + * The `operation` parameter is unused by the propagation mechanism (the + * connector knows its own operation), but kept in the signature for API + * stability with the previous span-creation form. */ - private _withAuthModeSpan( - operation: string, + private _withAuthModeAttributes( + _operation: string, authMode: "service-principal" | "on-behalf-of-user", fn: () => Promise, ): Promise { - return this.telemetry.startActiveSpan( - `files.${operation}`, - { attributes: this._authModeAttributes(authMode) }, - async (span) => { - try { - return await fn(); - } finally { - span.end(); - } - }, - ); + return runWithFilesSpanAttributes(this._authModeAttributes(authMode), fn); } /** - * Wrap each `VolumeAPI` method so its execution is tagged with - * `files.auth_mode = "service-principal"`. Used for programmatic calls - * that don't go through `asUser(req)`. Each wrapped method runs inside - * its own telemetry span so SP-mode invocations are distinguishable in - * trace output. + * Wrap each `VolumeAPI` method so the `FilesConnector` span it produces is + * tagged with `files.auth_mode = "service-principal"`. Used for + * programmatic calls that don't go through `asUser(req)`. + * + * The attribute is attached to the connector's existing span via + * AsyncLocalStorage propagation (see `_withAuthModeAttributes`); no + * additional parent span is opened, so each call produces exactly one + * `files.` span instead of two. */ private _wrapVolumeAPIWithSPSpan(api: VolumeAPI): VolumeAPI { const wrap = @@ -1233,7 +1325,7 @@ export class FilesPlugin extends Plugin { fn: (...args: Args) => Promise, ): ((...args: Args) => Promise) => (...args: Args) => - this._withAuthModeSpan(operation, "service-principal", () => + this._withAuthModeAttributes(operation, "service-principal", () => fn(...args), ); @@ -1256,9 +1348,11 @@ export class FilesPlugin extends Plugin { * force the SDK identity to the end user regardless of the volume's * `auth` setting. The policy check baked into each method (via * `createVolumeAPI`) runs inside the same scope, so `getCurrentUserId()` - * and any cache `userKey` derived from it also resolve to the user. Each - * wrapped invocation is tagged with `files.auth_mode = "on-behalf-of-user"` - * so OBO calls are distinguishable in trace output. + * and any cache `userKey` derived from it also resolve to the user. + * + * Each wrapped invocation tags the connector's span with + * `files.auth_mode = "on-behalf-of-user"` via AsyncLocalStorage + * propagation — no additional parent span is opened. */ private _wrapVolumeAPIInUserContext( api: VolumeAPI, @@ -1270,7 +1364,7 @@ export class FilesPlugin extends Plugin { fn: (...args: Args) => Promise, ): ((...args: Args) => Promise) => (...args: Args) => - this._withAuthModeSpan(operation, "on-behalf-of-user", () => + this._withAuthModeAttributes(operation, "on-behalf-of-user", () => runInUserContext(userCtx, () => fn(...args)), ); diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts index a80d8b2e3..76f25664c 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -1520,7 +1520,7 @@ describe("FilesPlugin", () => { } }); - test("asUser() call → policy receives user without isServicePrincipal", async () => { + test("asUser() call with full OBO headers → policy receives { id, isServicePrincipal: false }", async () => { const policySpy = vi.fn().mockReturnValue(true); const spyConfig = { volumes: { @@ -1549,9 +1549,13 @@ describe("FilesPlugin", () => { expect.objectContaining({ volume: "spied" }), expect.objectContaining({ id: "test-user" }), ); - // Should NOT have isServicePrincipal set + // `_extractUser` MUST mark a real-user identity explicitly so + // policies can reliably distinguish user vs SP. Returning + // `undefined` here would let `usersOnly`-style policies tied to + // `!user.isServicePrincipal` behave inconsistently (the legacy + // bug fixed by the asUser hardening). const userArg = policySpy.mock.calls[0][2]; - expect(userArg.isServicePrincipal).toBeUndefined(); + expect(userArg.isServicePrincipal).toBe(false); } finally { delete process.env.DATABRICKS_VOLUME_SPIED; } @@ -2349,7 +2353,14 @@ describe("FilesPlugin", () => { expect(res.send).toHaveBeenCalled(); }); - test("Cache isolation: same path, two users on OBO volume → two distinct cache keys", async () => { + test("OBO read cache is DISABLED: cross-user reads do not share cache state", async () => { + // Per Fix 3: the read cache is keyed by `getCurrentUserId()`, so user + // A's writes can only invalidate user A's cache entry. Cross-user + // staleness was the bug. The chosen mitigation (Option B) is to + // disable the read cache on OBO volumes entirely — this test pins + // that contract: OBO reads must NOT consult `getOrExecute`. The + // alternative (Option A: per-(volume, path) generation counters) + // would re-enable cache here. await useRealGetCurrentUserId(); const policySpy = vi.fn().mockReturnValue(true); const plugin = new FilesPlugin({ @@ -2385,18 +2396,13 @@ describe("FilesPlugin", () => { mockRes(), ); - // The cache layer is consulted via getOrExecute(cacheKey, fn, userKey). - // For OBO volumes the `userKey` argument must be the real user's ID - // (resolved by getCurrentUserId() inside the runInUserContext scope), - // so the two requests produce two distinct cache entries. - const userKeys = mockCacheInstance.getOrExecute.mock.calls.map( - (c: unknown[]) => c[2], - ); - expect(userKeys).toEqual(["alice@example.com", "bob@example.com"]); - expect(new Set(userKeys).size).toBe(2); + // Cache is disabled on OBO: `getOrExecute` is bypassed. The SDK + // must execute on every request — no cross-user staleness possible. + expect(mockCacheInstance.getOrExecute).not.toHaveBeenCalled(); + expect(mockClient.files.listDirectoryContents).toHaveBeenCalledTimes(2); }); - test("SP/OBO cache no-collide: same path on SP volume vs OBO volume → distinct cache keys", async () => { + test("SP volume reads still use the cache (cache is only disabled for OBO)", async () => { await useRealGetCurrentUserId(); const plugin = new FilesPlugin({ volumes: { @@ -2418,9 +2424,7 @@ describe("FilesPlugin", () => { }, ); - // SP volume request — `getCurrentUserId()` outside any user context - // returns the service principal's identity (the underlying real impl - // reads `ServiceContext.serviceUserId`). + // SP volume request — must consult the cache (cache enabled). await listHandler( mockReq("uploads", { "x-forwarded-access-token": "sp-token-ignored", @@ -2429,8 +2433,7 @@ describe("FilesPlugin", () => { mockRes(), ); - // OBO volume request — same human user, but execution is wrapped in - // runInUserContext so `userKey` resolves to alice's ID, not the SP's. + // OBO volume request — must skip the cache (cache disabled on OBO). await listHandler( mockReq("obo_vol", { "x-forwarded-access-token": "alice-token", @@ -2440,18 +2443,61 @@ describe("FilesPlugin", () => { ); const calls = mockCacheInstance.getOrExecute.mock.calls; - expect(calls).toHaveLength(2); + // Exactly one cache consultation — the SP volume's. The OBO request + // bypassed the cache entirely. + expect(calls).toHaveLength(1); + const spCacheKey = calls[0][0]; + // The SP cache entry's array form is namespaced by volumeKey, so the + // OBO volume could not have collided even if its read had been + // cached. + expect(spCacheKey).toEqual( + expect.arrayContaining([expect.stringContaining("uploads")]), + ); + }); - // The cacheKey is the first arg — the SP and OBO volumes already - // namespace cache entries by `volumeKey`, so the array form differs. - const cacheKeys = calls.map((c: unknown[]) => c[0]); - expect(cacheKeys[0]).not.toEqual(cacheKeys[1]); + /** + * Fix 2 regression: every OBO HTTP request must build the UserContext + * AT MOST ONCE. The previous implementation invoked + * `_effectiveAuthMode` and then `_runWithAuth` separately — each + * called `_buildUserContextOrNull`, which calls + * `ServiceContext.createUserContext`, which constructs a fresh + * `WorkspaceClient`. Two builds per request was pure throwaway + * overhead. The single-pass `_resolveAuthForRequest` resolver collapses + * them. This test pins the contract: ONE `createUserContext` call per + * OBO HTTP request. + */ + test("OBO HTTP request builds the UserContext exactly once (no duplicate WorkspaceClient allocation)", async () => { + const policySpy = vi.fn().mockReturnValue(true); + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { auth: "on-behalf-of-user", policy: policySpy }, + uploads: {}, + exports: {}, + }, + }); + const handler = getRouteHandler(plugin, "get", "/list"); - // Defense-in-depth: even if the array-form cacheKey matched, the - // userKey differs because OBO runs under runInUserContext while SP - // runs in the SP's serviceUserId. - const userKeys = calls.map((c: unknown[]) => c[2]); - expect(userKeys[0]).not.toEqual(userKeys[1]); + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "f.txt", path: "/f.txt", is_directory: false }; + }, + ); + + // Reset before our request — `mockServiceContext` may have been + // consulted during plugin setup in earlier tests in the suite. + serviceContextMock.createUserContextSpy.mockClear(); + + await handler( + mockReq("obo_vol", { + "x-forwarded-access-token": "alice-token", + "x-forwarded-user": "alice@example.com", + }), + mockRes(), + ); + + // Exactly one. Two would mean the duplicate-allocation regression + // is back. + expect(serviceContextMock.createUserContextSpy).toHaveBeenCalledTimes(1); }); }); @@ -2853,6 +2899,210 @@ describe("FilesPlugin", () => { expect.objectContaining({ plugin: "files" }), ); }); + + /** + * Fix 3 regression: SP-volume write to a nested path must invalidate + * the parent directory's list cache, not the file path's. The previous + * code passed `path` (file path) directly to the cache key as the + * "path segment" — so `_handleList` (which keys by directory) and + * `_invalidateListCache` (which keyed by file) used different segments. + */ + test("SP write of /Volumes/.../foo/bar.txt invalidates the parent /foo list cache key (not the file path)", async () => { + // SP volume — uses the cache. + process.env.DATABRICKS_VOLUME_SP_VOL = "/Volumes/c/s/sp"; + try { + const plugin = new FilesPlugin({ + volumes: { + sp_vol: { policy: policy.allowAll() }, + uploads: {}, + exports: {}, + }, + }); + const mkdirHandler = getRouteHandler(plugin, "post", "/mkdir"); + + mockClient.files.createDirectory.mockResolvedValue(undefined); + + // Track which (parts, userKey) pairs go through generateKey so we + // can match the invalidation segment exactly. + const generateKeyCalls: Array<{ + parts: (string | number | object)[]; + userKey: string; + }> = []; + mockCacheInstance.generateKey.mockImplementation( + (parts: (string | number | object)[], userKey: string) => { + generateKeyCalls.push({ parts, userKey }); + return "stub-key"; + }, + ); + + await mkdirHandler( + mockReq("sp_vol", {}, { body: { path: "/Volumes/c/s/sp/foo/bar" } }), + mockRes(), + ); + + // Exactly one list-cache invalidation key was constructed. + const listInvalidations = generateKeyCalls.filter( + (c) => Array.isArray(c.parts) && c.parts[0] === "files:sp_vol:list", + ); + expect(listInvalidations).toHaveLength(1); + + // The path-segment is the PARENT directory (resolved), not the + // written path. `parentDirectory("/Volumes/c/s/sp/foo/bar")` + // returns `"/Volumes/c/s/sp/foo"`, which the connector resolves + // unchanged because it's already absolute and starts with /Volumes/. + expect(listInvalidations[0].parts[1]).toBe("/Volumes/c/s/sp/foo"); + // Defense-in-depth: the file path itself must NOT appear as the + // segment. + expect(listInvalidations[0].parts[1]).not.toBe( + "/Volumes/c/s/sp/foo/bar", + ); + } finally { + delete process.env.DATABRICKS_VOLUME_SP_VOL; + } + }); + + /** + * Fix 3 regression: SP root-level write (e.g. mkdir of a relative path + * like "newdir") must invalidate the `"__root__"` sentinel that + * `_handleList` uses for rootless listings. + */ + test("SP write of root-level path uses the __root__ sentinel for invalidation (matches _handleList's rootless cache key)", async () => { + const plugin = new FilesPlugin({ + volumes: { + uploads: { policy: policy.allowAll() }, + exports: {}, + }, + }); + const mkdirHandler = getRouteHandler(plugin, "post", "/mkdir"); + + mockClient.files.createDirectory.mockResolvedValue(undefined); + + const generateKeyCalls: Array<{ + parts: (string | number | object)[]; + userKey: string; + }> = []; + mockCacheInstance.generateKey.mockImplementation( + (parts: (string | number | object)[], userKey: string) => { + generateKeyCalls.push({ parts, userKey }); + return "stub-key"; + }, + ); + + // Relative path "newdir" → parentDirectory returns "" → root-level + // → must use the "__root__" sentinel. + await mkdirHandler( + mockReq("uploads", {}, { body: { path: "newdir" } }), + mockRes(), + ); + + const listInvalidations = generateKeyCalls.filter( + (c) => Array.isArray(c.parts) && c.parts[0] === "files:uploads:list", + ); + expect(listInvalidations).toHaveLength(1); + expect(listInvalidations[0].parts[1]).toBe("__root__"); + }); + + /** + * Fix 3 regression: cross-user OBO read freshness. The OBO read cache + * is keyed by `getCurrentUserId()`, so user A's writes can only + * invalidate user A's cache entry. With cache disabled on OBO, user B + * must see fresh data after user A writes. + */ + test("OBO write by user A → user B's next read sees fresh data (cross-user freshness; cache disabled on OBO)", async () => { + // This test relies on the DEFAULT mocked `getWorkspaceClient` and + // `getCurrentUserId` (always returning the SP fixture's + // `mockClient`). Earlier tests in this block install the REAL impls + // via `useRealGetWorkspaceClient`/`useRealGetCurrentUserId`, and + // Vitest's `vi.clearAllMocks` between tests does NOT reset + // implementations — so we restore the defaults explicitly. + const ctx = await import("../../../context"); + vi.mocked(ctx.getWorkspaceClient).mockImplementation( + () => mockClient as any, + ); + vi.mocked(ctx.getCurrentUserId).mockImplementation( + () => "test-service-principal", + ); + + const plugin = new FilesPlugin({ + volumes: { + obo_vol: { + auth: "on-behalf-of-user", + policy: policy.allowAll(), + }, + uploads: {}, + exports: {}, + }, + }); + const listHandler = getRouteHandler(plugin, "get", "/list"); + const uploadHandler = getRouteHandler(plugin, "post", "/upload"); + + // The connector's `list()` aggregates async-iterable yields into an + // array. Initial listings return [], the post-upload listing returns + // [{...}]. We toggle the return AFTER alice's upload has reached + // the SDK to simulate cross-user freshness. + let postUploadVisible = false; + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + if (postUploadVisible) { + yield { + name: "fresh.txt", + path: "/fresh.txt", + is_directory: false, + }; + } + }, + ); + mockClient.config.authenticate.mockImplementation(async (h: Headers) => { + h.set("Authorization", "Bearer ALICE"); + }); + const fetchSpy = vi + .fn() + .mockResolvedValue({ ok: true, status: 200, text: async () => "" }); + vi.stubGlobal("fetch", fetchSpy); + + // Bob's first list — empty. + const bobRes1 = mockRes(); + await listHandler( + mockReq("obo_vol", { + "x-forwarded-access-token": "bob-token", + "x-forwarded-user": "bob@example.com", + }), + bobRes1, + ); + + // Alice uploads. + postUploadVisible = true; + const aliceRes = mockRes(); + await uploadHandler( + mockUploadReq( + "obo_vol", + { + "x-forwarded-access-token": "alice-token", + "x-forwarded-user": "alice@example.com", + "content-length": "5", + }, + "hello", + ), + aliceRes, + ); + + // Bob's second list — must see the fresh file because the OBO read + // cache is disabled (cross-user freshness guard). + const bobRes2 = mockRes(); + await listHandler( + mockReq("obo_vol", { + "x-forwarded-access-token": "bob-token", + "x-forwarded-user": "bob@example.com", + }), + bobRes2, + ); + + // Bob's first response was empty, second has the fresh entry. + const bobJson1 = bobRes1.json.mock.calls[0]?.[0] ?? []; + const bobJson2 = bobRes2.json.mock.calls[0]?.[0] ?? []; + expect(Array.isArray(bobJson1) ? bobJson1.length : 0).toBe(0); + expect(Array.isArray(bobJson2) ? bobJson2.length : 0).toBeGreaterThan(0); + }); }); describe("VolumeHandle.asUser SDK identity", () => { @@ -3041,36 +3291,129 @@ describe("FilesPlugin", () => { // anywhere near `createUserContext`. expect(serviceContextMock.createUserContextSpy).not.toHaveBeenCalled(); }); + + /** + * Fix 1 regression: in production, `asUser(req)` MUST require both + * `x-forwarded-user` and `x-forwarded-access-token`. The previous + * implementation only required the user header. With user-only: + * - `_extractUser` returned `{ id: "alice" }` (no isServicePrincipal) + * - `_buildUserContextOrNull` returned `null` (token missing) + * - `asUser` returned the SP-wrapped API (no runInUserContext) + * Net effect: policy saw alice as a real user, SDK ran with SP creds. + * This test pins the new strict-token contract. + */ + test("asUser in production with x-forwarded-user but no x-forwarded-access-token throws AuthenticationError.missingToken (privilege-confusion guard)", () => { + process.env.NODE_ENV = "production"; + const plugin = new FilesPlugin(VOLUMES_CONFIG); + const handle = plugin.exports()("uploads"); + const reqWithUserOnly = { + header: (name: string) => + ({ "x-forwarded-user": "alice@example.com" })[name.toLowerCase()], + } as any; + + expect(() => handle.asUser(reqWithUserOnly)).toThrow(AuthenticationError); + try { + handle.asUser(reqWithUserOnly); + } catch (err) { + expect(err).toBeInstanceOf(AuthenticationError); + expect((err as Error).message).toMatch(/x-forwarded-access-token/); + } + }); + + /** + * Fix 1 regression — dev mode: with only `x-forwarded-user` and no + * `x-forwarded-access-token`, asUser must NOT silently expose the SP + * client to the user identity. The dev-fallback returns a policy user + * marked `isServicePrincipal: true` so a `usersOnly` policy that gates + * on `!user.isServicePrincipal` still fails closed (just like + * production does, with a 401 there). + */ + test("asUser in development with x-forwarded-user but no x-forwarded-access-token returns SP-marked policy user; SDK runs as SP", async () => { + process.env.NODE_ENV = "development"; + const policySpy = vi.fn().mockReturnValue(true); + + // Use the default mocked SP client; spy on its listDirectoryContents + // to confirm the SDK call resolved against the SP path. + const spListSpy = vi.fn(async function* () { + yield { name: "sp.txt", path: "/sp.txt", is_directory: false }; + }); + mockClient.files.listDirectoryContents.mockImplementation(spListSpy); + + serviceContextMock.createUserContextSpy.mockClear(); + + const plugin = new FilesPlugin({ + volumes: { + uploads: { policy: policySpy }, + exports: {}, + }, + }); + const handle = plugin.exports()("uploads"); + + const reqWithUserOnly = { + header: (name: string) => + ({ "x-forwarded-user": "alice@example.com" })[name.toLowerCase()], + } as any; + + const userApi = handle.asUser(reqWithUserOnly); + await userApi.list(); + + // Policy received an explicitly SP-marked identity — a usersOnly + // policy gating on !isServicePrincipal would correctly fail closed. + expect(policySpy).toHaveBeenCalledTimes(1); + const userArg = policySpy.mock.calls[0][2]; + expect(userArg.isServicePrincipal).toBe(true); + + // SDK ran via the SP client (no UserContext was built — the + // dev-fallback skips runInUserContext). + expect(spListSpy).toHaveBeenCalledTimes(1); + expect(serviceContextMock.createUserContextSpy).not.toHaveBeenCalled(); + }); }); describe("files.auth_mode telemetry attribute", () => { /** - * Spy on the plugin's telemetry provider so we can inspect the - * `attributes` argument passed to `startActiveSpan`. Both the - * `TelemetryInterceptor` (HTTP routes) and `_withAuthModeSpan` - * (programmatic API) ultimately call `telemetry.startActiveSpan(name, - * options, fn)` — `options.attributes["files.auth_mode"]` is the - * single point this test pins. + * Spy on the plugin's telemetry provider AND every per-volume + * `FilesConnector.telemetry` provider so we can inspect every + * `startActiveSpan` invocation. + * + * The plugin's spans come from the `TelemetryInterceptor` (HTTP route + * path). Programmatic calls go through the connector's `traced()` + * decorator, which creates `files.` spans on the connector's own + * provider — and per Fix 4 the auth-mode attribute is now propagated + * onto THAT span via AsyncLocalStorage (no duplicate parent span on + * the plugin's provider). The test asserts on the merged call list. */ function spyOnTelemetry(plugin: FilesPlugin) { - const telemetry = (plugin as any).telemetry as { - startActiveSpan: (...args: unknown[]) => Promise; - }; const calls: Array<{ name: string; attributes: Record | undefined; }> = []; - const original = telemetry.startActiveSpan.bind(telemetry); - vi.spyOn(telemetry, "startActiveSpan").mockImplementation( - (...args: unknown[]) => { - const [name, options] = args as [ - string, - { attributes?: Record } | undefined, - ]; - calls.push({ name, attributes: options?.attributes }); - return original(...args); - }, - ); + + const wire = (telemetry: { + startActiveSpan: (...args: unknown[]) => Promise; + }) => { + const original = telemetry.startActiveSpan.bind(telemetry); + vi.spyOn(telemetry, "startActiveSpan").mockImplementation( + (...args: unknown[]) => { + const [name, options] = args as [ + string, + { attributes?: Record } | undefined, + ]; + calls.push({ name, attributes: options?.attributes }); + return original(...args); + }, + ); + }; + + wire((plugin as any).telemetry); + const connectors = (plugin as any).volumeConnectors as Record< + string, + { telemetry: any } + >; + for (const conn of Object.values(connectors)) { + wire(conn.telemetry); + } + return calls; } @@ -3223,6 +3566,63 @@ describe("FilesPlugin", () => { expect(obo?.name).toBe("files.list"); }); + /** + * Fix 4 regression: programmatic calls produce exactly ONE + * `files.` span (the connector's), not two. The previous + * `_withAuthModeSpan` helper opened a duplicate parent span with the + * same name, doubling allocation/export. The current implementation + * propagates `files.auth_mode` onto the connector's existing span via + * AsyncLocalStorage. This test pins span count == 1. + */ + test("programmatic asUser().list() produces exactly ONE files.list span (no duplicate parent span)", async () => { + const plugin = new FilesPlugin(VOLUMES_CONFIG); + const calls = spyOnTelemetry(plugin); + + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "f.txt", path: "/f.txt", is_directory: false }; + }, + ); + + const handle = plugin.exports()("uploads"); + const userApi = handle.asUser({ + header: (name: string) => + ({ + "x-forwarded-access-token": "alice-token", + "x-forwarded-user": "alice@example.com", + })[name.toLowerCase()], + } as any); + await userApi.list("subdir"); + + const filesListSpans = calls.filter((c) => c.name === "files.list"); + // Pre-fix: 2 (outer auth-mode span + inner connector span). Now: 1. + expect(filesListSpans).toHaveLength(1); + // The single span must carry the auth_mode attribute. + expect(filesListSpans[0].attributes?.["files.auth_mode"]).toBe( + "on-behalf-of-user", + ); + }); + + test("programmatic SP-volume .list() produces exactly ONE files.list span tagged service-principal", async () => { + const plugin = new FilesPlugin(VOLUMES_CONFIG); + const calls = spyOnTelemetry(plugin); + + mockClient.files.listDirectoryContents.mockImplementation( + async function* () { + yield { name: "f.txt", path: "/f.txt", is_directory: false }; + }, + ); + + const handle = plugin.exports()("uploads"); + await handle.list("subdir"); // no asUser → SP path + + const filesListSpans = calls.filter((c) => c.name === "files.list"); + expect(filesListSpans).toHaveLength(1); + expect(filesListSpans[0].attributes?.["files.auth_mode"]).toBe( + "service-principal", + ); + }); + test("OBO volume + HTTP route + dev fallback (no token) → span attribute is 'service-principal'", async () => { // Dev-fallback path: OBO volume, but no x-forwarded-access-token, so // `_buildUserContextOrNull` returns null and `_runWithAuth` falls From 81ad34a25ea718898e4b351f7e564c9151943946 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Mon, 27 Apr 2026 15:36:49 +0200 Subject: [PATCH 12/13] fix(appkit): files invalidate-cache await + integration ephemeral ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-up review fixes on top of 3f98628d. A — _invalidateListCache not awaited (medium, correctness) Write handlers (_handleUpload, _handleMkdir, _handleDelete) called _invalidateListCache without `await`, so res.json() shipped before cache invalidation completed. A client that issued a follow-up GET /list in the same tick could hit the stale cache. Fix: _invalidateListCache is now genuinely `async` and all three call sites `await` it before sending the response. Best-effort try/catch around cache.delete and connector.resolvePath remains, so an invalidation failure cannot convert a successful write into 500. Regression test installs a deferred-promise cache.delete and asserts res.json is NOT called until the delete settles. Verified the test fails when the `await` is removed and passes when it is added back. B — hardcoded ports in integration tests (medium, CI hygiene) plugin.integration.test.ts bound to fixed offsets of TEST_PORT (9880, 9881, 9882). Concurrent CI runs or stale test processes risked EADDRINUSE flakiness. Fix: bind `port: 0` so the OS assigns ephemeral ports. New getListeningPort() helper waits for the server's `listening` event and returns the assigned port for building localBase URLs. Chose port-0 over supertest because the tests build their own AppKit instance, hold the Server for afterAll cleanup, and use real fetch calls — moving to supertest would have required restructuring the fixture lifecycle. Tests: 1675 (was 1674 + 1 new). typecheck + biome clean. Co-authored-by: Isaac Signed-off-by: Atila Fassina --- packages/appkit/src/plugins/files/plugin.ts | 31 ++++-- .../files/tests/plugin.integration.test.ts | 41 +++++-- .../src/plugins/files/tests/plugin.test.ts | 104 ++++++++++++++++++ 3 files changed, 160 insertions(+), 16 deletions(-) diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts index b5a0f52bb..5c1434fc2 100644 --- a/packages/appkit/src/plugins/files/plugin.ts +++ b/packages/appkit/src/plugins/files/plugin.ts @@ -580,13 +580,17 @@ export class FilesPlugin extends Plugin { * missed delete only costs read freshness, not correctness, and * propagating the error would convert a successful write into an HTTP * 500. + * + * Returns a `Promise`; callers MUST `await` this before sending the + * HTTP success response so a follow-up `GET /list` issued in the same tick + * cannot race the underlying `cache.delete()` and observe stale data. */ - private _invalidateListCache( + private async _invalidateListCache( volumeKey: string, writtenPath: string, connector: FilesConnector, mode: "service-principal" | "on-behalf-of-user" = "service-principal", - ): void { + ): Promise { if (mode === "on-behalf-of-user") { // OBO read cache is disabled — nothing to invalidate. Skipping here // also prevents accidentally caching a wrong-namespace delete that @@ -601,9 +605,9 @@ export class FilesPlugin extends Plugin { // relative root-level files like `bar.txt`. Both map to the sentinel. const isRootLevel = !parent || parent === "/"; const userKey = getCurrentUserId(); - const tryDelete = (segment: string) => { + const tryDelete = async (segment: string): Promise => { try { - this.cache.delete( + await this.cache.delete( this.cache.generateKey([`files:${volumeKey}:list`, segment], userKey), ); } catch (err) { @@ -618,7 +622,7 @@ export class FilesPlugin extends Plugin { if (isRootLevel) { // A rootless `list()` produced the `"__root__"` key. - tryDelete("__root__"); + await tryDelete("__root__"); return; } @@ -634,7 +638,7 @@ export class FilesPlugin extends Plugin { ); return; } - tryDelete(resolved); + await tryDelete(resolved); } private _handleApiError( @@ -1070,7 +1074,10 @@ export class FilesPlugin extends Plugin { }, settings), ); - this._invalidateListCache(volumeKey, path, connector, mode); + // Awaited before sending the response so that a follow-up + // `GET /list` issued in the same tick cannot race the + // underlying `cache.delete()` and observe pre-write data. + await this._invalidateListCache(volumeKey, path, connector, mode); if (!result.ok) { logger.error( @@ -1133,7 +1140,10 @@ export class FilesPlugin extends Plugin { }, settings), ); - this._invalidateListCache(volumeKey, dirPath, connector, mode); + // Awaited before sending the response so that a follow-up + // `GET /list` issued in the same tick cannot race the + // underlying `cache.delete()` and observe pre-write data. + await this._invalidateListCache(volumeKey, dirPath, connector, mode); if (!result.ok) { this._sendStatusError(res, result.status); @@ -1176,7 +1186,10 @@ export class FilesPlugin extends Plugin { }, settings), ); - this._invalidateListCache(volumeKey, path, connector, mode); + // Awaited before sending the response so that a follow-up + // `GET /list` issued in the same tick cannot race the + // underlying `cache.delete()` and observe pre-write data. + await this._invalidateListCache(volumeKey, path, connector, mode); if (!result.ok) { this._sendStatusError(res, result.status); diff --git a/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts b/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts index 3c836f9b8..90b3220f7 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts @@ -65,11 +65,33 @@ const MOCK_AUTH_HEADERS = { /** Volume key used in all integration tests. */ const VOL = "files"; +/** + * Wait for the supplied server to finish binding, then return the + * OS-assigned port. Required when tests pass `port: 0` to `serverPlugin` + * — `appkit.server.start()` returns as soon as `listen()` is invoked but + * before the bind completes, so `server.address()` returns `null` until + * the `listening` event fires. + */ +async function getListeningPort(server: Server): Promise { + const addr = server.address(); + if (addr && typeof addr === "object" && typeof addr.port === "number") { + return addr.port; + } + await new Promise((resolve, reject) => { + server.once("listening", () => resolve()); + server.once("error", (err) => reject(err)); + }); + const ready = server.address(); + if (!ready || typeof ready !== "object") { + throw new Error("Server is listening but address() returned null"); + } + return ready.port; +} + describe("Files Plugin Integration", () => { let server: Server; let baseUrl: string; let serviceContextMock: Awaited>; - const TEST_PORT = 9880; beforeAll(async () => { setupDatabricksEnv({ @@ -84,8 +106,10 @@ describe("Files Plugin Integration", () => { const appkit = await createApp({ plugins: [ + // port: 0 → OS assigns an ephemeral port. Avoids EADDRINUSE + // when concurrent CI runs or stale processes hold a fixed port. serverPlugin({ - port: TEST_PORT, + port: 0, host: "127.0.0.1", autoStart: false, }), @@ -95,7 +119,8 @@ describe("Files Plugin Integration", () => { await appkit.server.start(); server = appkit.server.getServer(); - baseUrl = `http://127.0.0.1:${TEST_PORT}`; + const port = await getListeningPort(server); + baseUrl = `http://127.0.0.1:${port}`; }); afterAll(async () => { @@ -477,7 +502,7 @@ describe("Files Plugin Integration", () => { const appkit = await createApp({ plugins: [ serverPlugin({ - port: TEST_PORT + 1, + port: 0, host: "127.0.0.1", autoStart: false, }), @@ -491,7 +516,8 @@ describe("Files Plugin Integration", () => { try { await appkit.server.start(); - const localBase = `http://127.0.0.1:${TEST_PORT + 1}`; + const port = await getListeningPort(appkit.server.getServer()); + const localBase = `http://127.0.0.1:${port}`; const response = await fetch( `${localBase}/api/files/${VOL}/list?path=denied`, @@ -516,7 +542,7 @@ describe("Files Plugin Integration", () => { const appkit = await createApp({ plugins: [ serverPlugin({ - port: TEST_PORT + 2, + port: 0, host: "127.0.0.1", autoStart: false, }), @@ -530,7 +556,8 @@ describe("Files Plugin Integration", () => { try { await appkit.server.start(); - const localBase = `http://127.0.0.1:${TEST_PORT + 2}`; + const port = await getListeningPort(appkit.server.getServer()); + const localBase = `http://127.0.0.1:${port}`; mockFilesApi.listDirectoryContents.mockReturnValue( (async function* () { diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts index 76f25664c..f420067da 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -3002,6 +3002,110 @@ describe("FilesPlugin", () => { expect(listInvalidations[0].parts[1]).toBe("__root__"); }); + /** + * Fix A regression: SP-volume writes must AWAIT the underlying + * `cache.delete()` BEFORE sending the HTTP success response. Otherwise + * a client that fires a follow-up `GET /list` in the same tick can race + * the still-pending invalidation and observe pre-write data. + * + * Structural assertion (timing-independent, bug-sensitive): we install + * a `cache.delete` implementation backed by a deferred promise we + * control, then dispatch the `mkdir` handler WITHOUT awaiting it. + * Once `cache.delete` is observed to have been called, we drain a + * generous number of microtasks AND macrotasks while the deferred is + * still pending. The contract is that `res.json` MUST NOT fire until + * we resolve the deferred — proving the response is gated on the + * invalidation having actually completed (not just been issued). + * + * If anyone reverts to `this._invalidateListCache(...)` without + * `await`, `cache.delete()` is left dangling but `res.json` proceeds + * synchronously after it — making the "after delete called, before + * delete resolved" window observable, and the assertion below would + * fail. + */ + test("SP write awaits cache.delete BEFORE sending the response (no write→read race)", async () => { + // Restore the default (mocked) `getWorkspaceClient` and + // `getCurrentUserId`, since earlier tests in this block install the + // REAL implementations and `vi.clearAllMocks` does NOT reset + // implementations. + const ctx = await import("../../../context"); + vi.mocked(ctx.getWorkspaceClient).mockImplementation( + () => mockClient as any, + ); + vi.mocked(ctx.getCurrentUserId).mockImplementation( + () => "test-service-principal", + ); + + process.env.DATABRICKS_VOLUME_SP_VOL = "/Volumes/c/s/sp"; + try { + const plugin = new FilesPlugin({ + volumes: { + sp_vol: { policy: policy.allowAll() }, + uploads: {}, + exports: {}, + }, + }); + const mkdirHandler = getRouteHandler(plugin, "post", "/mkdir"); + + mockClient.files.createDirectory.mockResolvedValue(undefined); + mockCacheInstance.generateKey.mockReturnValue("stub-key"); + + // Deferred promise that gates the cache delete. The handler must + // await this before writing the success response. + let releaseDelete!: () => void; + const deletePending = new Promise((resolve) => { + releaseDelete = resolve; + }); + mockCacheInstance.delete.mockImplementation( + async () => await deletePending, + ); + + const res = mockRes(); + + // Fire the write WITHOUT awaiting — we want to inspect state while + // the handler is parked inside `_invalidateListCache`. + const handlerDone = mkdirHandler( + mockReq("sp_vol", {}, { body: { path: "/Volumes/c/s/sp/foo/bar" } }), + res, + ); + + // Wait until `cache.delete` is invoked (ie the SDK call has + // resolved and the handler has reached the invalidation step). + // Polling the mock's call count avoids any fixed-microtask budget + // assumptions about how many awaits the interceptor stack adds. + // Use setImmediate to also drain macrotask queue items (telemetry/ + // timeout interceptors may use setTimeout under the hood). + const deadline = Date.now() + 1000; + while ( + mockCacheInstance.delete.mock.calls.length === 0 && + Date.now() < deadline + ) { + await new Promise((resolve) => setImmediate(resolve)); + } + + expect(mockClient.files.createDirectory).toHaveBeenCalledTimes(1); + expect(mockCacheInstance.delete).toHaveBeenCalledTimes(1); + + // Critical assertion: drain plenty of microtasks AND macrotasks + // while `cache.delete` is still parked on the deferred. If the + // handler was NOT awaiting the invalidation, `res.json` would + // already have been called by now (the missing-await bug). With + // the fix, the response is gated on `releaseDelete()` below. + for (let i = 0; i < 50; i++) { + await new Promise((resolve) => setImmediate(resolve)); + } + expect(res.json).not.toHaveBeenCalled(); + + // Release the cache delete; the handler now finishes and responds. + releaseDelete(); + await handlerDone; + + expect(res.json).toHaveBeenCalledTimes(1); + } finally { + delete process.env.DATABRICKS_VOLUME_SP_VOL; + } + }); + /** * Fix 3 regression: cross-user OBO read freshness. The OBO read cache * is keyed by `getCurrentUserId()`, so user A's writes can only From f687fa2ca07a6db59065a7d99f9084c2308855a8 Mon Sep 17 00:00:00 2001 From: Atila Fassina Date: Tue, 28 Apr 2026 14:14:22 +0200 Subject: [PATCH 13/13] fix(appkit): files plugin OBO review hardening (5 findings) - _readPathQuery helper rejects array/object query params with 400 across all 7 path-bearing handlers (was: req.query.path raw-cast) - /read streams via connector.download + size-enforcing TransformStream capped at maxReadSize (new VolumeConfig field, default 10MB); /read no longer participates in the read-tier cache - Upload TransformStream enforces bytesReceived <= contentLength when declared, closing a per-user policy bypass where small Content-Length + larger body would exceed an approved upload size - _enforcePolicy 401 and _handleApiError 4xx now return generic public bodies (Unauthorized / standard HTTP STATUS_CODES); raw error.message goes to server-side logs only (CWE-209) Co-authored-by: Isaac Co-authored-by: Orca Signed-off-by: Atila Fassina --- packages/appkit/src/plugins/files/plugin.ts | 188 +++++++++++++++--- .../src/plugins/files/tests/plugin.test.ts | 47 +++-- packages/appkit/src/plugins/files/types.ts | 11 + 3 files changed, 194 insertions(+), 52 deletions(-) diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts index 5c1434fc2..13855e406 100644 --- a/packages/appkit/src/plugins/files/plugin.ts +++ b/packages/appkit/src/plugins/files/plugin.ts @@ -5,6 +5,7 @@ import type express from "express"; import type { IAppRouter, PluginExecutionSettings } from "shared"; import { contentTypeFromPath, + FILES_MAX_READ_SIZE, FilesConnector, isSafeInlineContentType, runWithFilesSpanAttributes, @@ -272,7 +273,12 @@ export class FilesPlugin extends Plugin { } } catch (error) { if (error instanceof AuthenticationError) { - res.status(401).json({ error: error.message, plugin: this.name }); + logger.warn( + "Authentication failed during policy evaluation for volume %s: %O", + volumeKey, + error, + ); + res.status(401).json({ error: "Unauthorized", plugin: this.name }); return false; } throw error; @@ -316,6 +322,7 @@ export class FilesPlugin extends Plugin { // Merge per-volume config with plugin-level defaults const mergedConfig: VolumeConfig = { maxUploadSize: volumeCfg.maxUploadSize ?? config.maxUploadSize, + maxReadSize: volumeCfg.maxReadSize ?? config.maxReadSize, customContentTypes: volumeCfg.customContentTypes ?? config.customContentTypes, policy: volumeCfg.policy ?? policy.publicRead(), @@ -490,6 +497,34 @@ export class FilesPlugin extends Plugin { return { connector, volumeKey }; } + /** + * Extract `req.query.path` as a single string when present. + * + * Express coerces repeated query parameters (`?path=a&path=b`) to a string + * array and dotted/nested params (`?path[k]=v`) to an object. Reject those + * with `400` instead of letting non-string values reach `_isValidPath` / + * `connector.resolvePath`, which would misbehave on arrays or objects. + * + * Returns `{ path }` (with `path` either a string or `undefined` when the + * query parameter was absent) on success. Returns `undefined` and writes a + * `400` response when the value is not a single string — callers must + * check the return for `undefined` before continuing. + */ + private _readPathQuery( + req: express.Request, + res: express.Response, + ): { path: string | undefined } | undefined { + const value = req.query.path; + if (value === undefined || typeof value === "string") { + return { path: value }; + } + res.status(400).json({ + error: "path query parameter must be a single string", + plugin: this.name, + }); + return undefined; + } + /** * Validate a file/directory path from user input. * Returns `true` if valid, or an error message string if invalid. @@ -654,17 +689,19 @@ export class FilesPlugin extends Plugin { return; } if (error instanceof AuthenticationError) { - res.status(401).json({ - error: error.message, - plugin: this.name, - }); + logger.warn("Authentication failed in %s: %O", this.name, error); + res.status(401).json({ error: "Unauthorized", plugin: this.name }); return; } if (error instanceof ApiError) { const status = error.statusCode ?? 500; if (status >= 400 && status < 500) { + // Don't reflect raw SDK error.message — it can leak internal volume + // paths, hostnames, or principal names. Use the standard HTTP status + // text for the public body and log the full error server-side. + logger.warn("Upstream %d in %s: %O", status, this.name, error); res.status(status).json({ - error: error.message, + error: STATUS_CODES[status] ?? "Client Error", statusCode: status, plugin: this.name, }); @@ -691,7 +728,9 @@ export class FilesPlugin extends Plugin { connector: FilesConnector, volumeKey: string, ): Promise { - const path = req.query.path as string | undefined; + const query = this._readPathQuery(req, res); + if (!query) return; + const path = query.path; if (!(await this._enforcePolicy(req, res, volumeKey, "list", path ?? "/"))) return; @@ -727,32 +766,85 @@ export class FilesPlugin extends Plugin { connector: FilesConnector, volumeKey: string, ): Promise { - const path = req.query.path as string; + const query = this._readPathQuery(req, res); + if (!query) return; + const rawPath = query.path; - const valid = this._isValidPath(path); + const valid = this._isValidPath(rawPath); if (valid !== true) { res.status(400).json({ error: valid, plugin: this.name }); return; } + const path = rawPath as string; if (!(await this._enforcePolicy(req, res, volumeKey, "read", path))) return; + const volumeCfg = this.volumeConfigs[volumeKey]; + const maxReadSize = volumeCfg.maxReadSize ?? FILES_MAX_READ_SIZE; const { mode, userCtx } = this._resolveAuthForRequest(req, volumeKey); + await this._runWithAuth(userCtx, async () => { try { - const result = await this.execute( - async () => connector.read(getWorkspaceClient(), path), - this._readSettings( - [`files:${volumeKey}:read`, connector.resolvePath(path)], - mode, - ), + // Stream the file body, capping at `maxReadSize` to avoid buffering + // arbitrary-size files. Uses download-tier settings (no cache) — + // `/read` no longer participates in the read-tier cache. Programmatic + // callers wanting a cached small-file read should use the SDK + // `volume.read(path, { maxSize })` method directly. + const response = await this.execute( + async () => connector.download(getWorkspaceClient(), path), + this._downloadSettings(mode), ); - - if (!result.ok) { - this._sendStatusError(res, result.status); + if (!response.ok) { + this._sendStatusError(res, response.status); + return; + } + if (!response.data.contents) { + res.type("text/plain").send(""); return; } - res.type("text/plain").send(result.data); + + let bytesSent = 0; + const limited = response.data.contents.pipeThrough( + new TransformStream({ + transform(chunk, controller) { + bytesSent += chunk.byteLength; + if (bytesSent > maxReadSize) { + controller.error( + new Error( + `File exceeds maxReadSize (${maxReadSize} bytes). Use /download for large files.`, + ), + ); + return; + } + controller.enqueue(chunk); + }, + }), + ); + + res.type("text/plain"); + const nodeStream = Readable.fromWeb( + limited as import("node:stream/web").ReadableStream, + ); + nodeStream.on("error", (err) => { + if ( + err instanceof Error && + err.message.includes("exceeds maxReadSize") + ) { + if (!res.headersSent) { + res.status(413).json({ error: err.message, plugin: this.name }); + return; + } + res.destroy(err); + return; + } + logger.error("Stream error during read: %O", err); + if (!res.headersSent) { + this._sendStatusError(res, 500); + } else { + res.destroy(); + } + }); + nodeStream.pipe(res); } catch (error) { this._handleApiError(res, error, "Read failed"); } @@ -793,13 +885,16 @@ export class FilesPlugin extends Plugin { volumeKey: string, opts: { mode: "download" | "raw" }, ): Promise { - const path = req.query.path as string; + const query = this._readPathQuery(req, res); + if (!query) return; + const rawPath = query.path; - const valid = this._isValidPath(path); + const valid = this._isValidPath(rawPath); if (valid !== true) { res.status(400).json({ error: valid, plugin: this.name }); return; } + const path = rawPath as string; if (!(await this._enforcePolicy(req, res, volumeKey, opts.mode, path))) return; @@ -874,13 +969,16 @@ export class FilesPlugin extends Plugin { connector: FilesConnector, volumeKey: string, ): Promise { - const path = req.query.path as string; + const query = this._readPathQuery(req, res); + if (!query) return; + const rawPath = query.path; - const valid = this._isValidPath(path); + const valid = this._isValidPath(rawPath); if (valid !== true) { res.status(400).json({ error: valid, plugin: this.name }); return; } + const path = rawPath as string; if (!(await this._enforcePolicy(req, res, volumeKey, "exists", path))) return; @@ -913,13 +1011,16 @@ export class FilesPlugin extends Plugin { connector: FilesConnector, volumeKey: string, ): Promise { - const path = req.query.path as string; + const query = this._readPathQuery(req, res); + if (!query) return; + const rawPath = query.path; - const valid = this._isValidPath(path); + const valid = this._isValidPath(rawPath); if (valid !== true) { res.status(400).json({ error: valid, plugin: this.name }); return; } + const path = rawPath as string; if (!(await this._enforcePolicy(req, res, volumeKey, "metadata", path))) return; @@ -952,13 +1053,16 @@ export class FilesPlugin extends Plugin { connector: FilesConnector, volumeKey: string, ): Promise { - const path = req.query.path as string; + const query = this._readPathQuery(req, res); + if (!query) return; + const rawPath = query.path; - const valid = this._isValidPath(path); + const valid = this._isValidPath(rawPath); if (valid !== true) { res.status(400).json({ error: valid, plugin: this.name }); return; } + const path = rawPath as string; if (!(await this._enforcePolicy(req, res, volumeKey, "preview", path))) return; @@ -991,12 +1095,15 @@ export class FilesPlugin extends Plugin { connector: FilesConnector, volumeKey: string, ): Promise { - const path = req.query.path as string; - const valid = this._isValidPath(path); + const query = this._readPathQuery(req, res); + if (!query) return; + const rawPath = query.path; + const valid = this._isValidPath(rawPath); if (valid !== true) { res.status(400).json({ error: valid, plugin: this.name }); return; } + const path = rawPath as string; const volumeCfg = this.volumeConfigs[volumeKey]; const maxSize = volumeCfg.maxUploadSize ?? FILES_MAX_UPLOAD_SIZE; @@ -1049,6 +1156,22 @@ export class FilesPlugin extends Plugin { ); return; } + // When the client declared a Content-Length, the policy was + // gated on that value (lines above pass `size: contentLength` + // to `_enforcePolicy`). Refuse bytes beyond the declared size + // so an attacker cannot bypass a per-user policy by sending a + // small Content-Length and then streaming up to maxSize. + if ( + contentLength !== undefined && + bytesReceived > contentLength + ) { + controller.error( + new Error( + `Upload stream exceeds declared Content-Length (${contentLength} bytes)`, + ), + ); + return; + } controller.enqueue(chunk); }, }), @@ -1101,7 +1224,8 @@ export class FilesPlugin extends Plugin { } catch (error) { if ( error instanceof Error && - error.message.includes("exceeds maximum allowed size") + (error.message.includes("exceeds maximum allowed size") || + error.message.includes("exceeds declared Content-Length")) ) { res.status(413).json({ error: error.message, plugin: this.name }); return; @@ -1163,7 +1287,9 @@ export class FilesPlugin extends Plugin { connector: FilesConnector, volumeKey: string, ): Promise { - const rawPath = req.query.path as string | undefined; + const query = this._readPathQuery(req, res); + if (!query) return; + const rawPath = query.path; const valid = this._isValidPath(rawPath); if (valid !== true) { diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts index f420067da..5bf7859ca 100644 --- a/packages/appkit/src/plugins/files/tests/plugin.test.ts +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -1,4 +1,4 @@ -import { Readable } from "node:stream"; +import { PassThrough, Readable } from "node:stream"; import { mockServiceContext, setupDatabricksEnv } from "@tools/test-helpers"; import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; import { ServiceContext } from "../../../context/service-context"; @@ -2064,15 +2064,12 @@ describe("FilesPlugin", () => { expect(res.status).toHaveBeenCalledWith(401); const errBody = res.json.mock.calls[0][0]; - expect(errBody).toEqual( - expect.objectContaining({ - error: expect.stringContaining("Missing"), - plugin: "files", - }), - ); - // The error shape comes from AuthenticationError.missingToken (mentions - // x-forwarded-access-token in our message). - expect(errBody.error).toMatch(/x-forwarded-access-token/); + // Public body is generic — internal "missing x-forwarded-access-token" + // detail is server-side log only (CWE-209 hardening). + expect(errBody).toEqual({ + error: "Unauthorized", + plugin: "files", + }); // Policy must not have been evaluated and the SDK must not have been // called. @@ -2198,13 +2195,18 @@ describe("FilesPlugin", () => { } function mockRes() { - const res: any = { headersSent: false }; + // PassThrough gives us a real Writable so the streaming `/read` handler + // can `pipe(res)` without crashing. Express-style helpers (status, json, + // type, send, setHeader) are layered on top as spies. + const stream = new PassThrough(); + const res: any = stream; + res.headersSent = false; res.status = vi.fn().mockReturnValue(res); res.json = vi.fn().mockReturnValue(res); res.type = vi.fn().mockReturnValue(res); res.send = vi.fn().mockReturnValue(res); res.setHeader = vi.fn().mockReturnValue(res); - res.end = vi.fn(); + res.destroy = vi.fn(); return res; } @@ -2343,14 +2345,18 @@ describe("FilesPlugin", () => { isServicePrincipal: false, }); - // 2xx path: handler called res.type/send rather than res.status(non-2xx). + // 2xx path: handler set Content-Type to text/plain and entered the + // streaming branch (no error status code was set). The body is piped + // through the PassThrough — exact byte-level assertion is covered by + // integration tests; here we just verify the handler took the success + // branch. const statusCodes = (res.status.mock.calls as number[][]).map( (c) => c[0], ); expect(statusCodes).not.toContain(401); expect(statusCodes).not.toContain(403); expect(statusCodes).not.toContain(500); - expect(res.send).toHaveBeenCalled(); + expect(res.type).toHaveBeenCalledWith("text/plain"); }); test("OBO read cache is DISABLED: cross-user reads do not share cache state", async () => { @@ -2772,13 +2778,12 @@ describe("FilesPlugin", () => { expect(res.status).toHaveBeenCalledWith(401); const errBody = res.json.mock.calls[0][0]; - expect(errBody).toEqual( - expect.objectContaining({ - error: expect.stringContaining("Missing"), - plugin: "files", - }), - ); - expect(errBody.error).toMatch(/x-forwarded-access-token/); + // Public body is generic — internal "missing x-forwarded-access-token" + // detail is server-side log only (CWE-209 hardening). + expect(errBody).toEqual({ + error: "Unauthorized", + plugin: "files", + }); // Neither the SDK upload nor the hand-rolled fetch ran. expect(mockClient.files.upload).not.toHaveBeenCalled(); diff --git a/packages/appkit/src/plugins/files/types.ts b/packages/appkit/src/plugins/files/types.ts index 2e92be5a4..d912efff9 100644 --- a/packages/appkit/src/plugins/files/types.ts +++ b/packages/appkit/src/plugins/files/types.ts @@ -8,6 +8,12 @@ import type { FilePolicy } from "./policy"; export interface VolumeConfig { /** Maximum upload size in bytes for this volume. Inherits from plugin-level `maxUploadSize` if not set. */ maxUploadSize?: number; + /** + * Maximum byte length the `/read` endpoint will stream before aborting with + * `413 Payload Too Large`. Inherits from plugin-level `maxReadSize` if not + * set; defaults to 10 MB. `/download` and `/raw` are unaffected. + */ + maxReadSize?: number; /** Map of file extensions to MIME types for this volume. Inherits from plugin-level `customContentTypes` if not set. */ customContentTypes?: Record; /** @@ -109,6 +115,11 @@ export interface IFilesConfig extends BasePluginConfig { customContentTypes?: Record; /** Maximum upload size in bytes. Defaults to 5 GB (Databricks Files API v2 limit). */ maxUploadSize?: number; + /** + * Plugin-level default for the `/read` endpoint's response size cap. + * Inherited by volumes without their own `maxReadSize`. Defaults to 10 MB. + */ + maxReadSize?: number; /** * Plugin-level default auth mode for all volumes. Volumes without an * explicit `auth` field inherit this default; volumes that set their own