diff --git a/src/pyodide/internal/serializeJsModule.ts b/src/pyodide/internal/serializeJsModule.ts new file mode 100644 index 00000000000..74d9f0a6575 --- /dev/null +++ b/src/pyodide/internal/serializeJsModule.ts @@ -0,0 +1,110 @@ +/* eslint-disable prefer-rest-params */ +/* eslint-disable @typescript-eslint/no-unsafe-member-access */ +/* eslint-disable @typescript-eslint/no-unsafe-argument */ +/* eslint-disable @typescript-eslint/no-unsafe-assignment */ +import { IS_CREATING_SNAPSHOT } from 'pyodide-internal:metadata'; + +export type SerializedJsModule = { + jsModule: true; + moduleName: string; + accessorList: string[]; +}; + +const importName = Symbol('importName'); +const getAccessorList = Symbol('getAccessorList'); +const getObject = Symbol('getObject'); +const getPrototypeOfKey = 'Reflect.getProtoTypeOf'; + +export function maybeSerializeJsModule( + obj_: any, + modules: Set +): SerializedJsModule | undefined { + const obj = obj_ as + | { [importName]: string; [getAccessorList]: string[] } + | undefined; + const moduleName = obj?.[importName]; + if (!moduleName) { + return undefined; + } + modules.add(moduleName); + const accessorList: string[] = obj[getAccessorList]; + return { jsModule: true, moduleName, accessorList }; +} + +interface JsModules { + [a: string]: JsModules; +} + +export function deserializeJsModule( + obj: SerializedJsModule, + jsModules: JsModules +): unknown { + return ( + obj.accessorList.reduce((x: JsModules, y: string): JsModules => { + if (y === getPrototypeOfKey) { + return Reflect.getPrototypeOf(x) as JsModules; + } + return x[y]!; + }, jsModules[obj.moduleName]!) ?? null + ); +} + +// This tracks the information needed to "serialize" attributes of js modules. We need the name and +// the sequence of attribute accesses. We store the name and accessorList under the importName and +// getAccessorList symbols. +// +// If the receiver of a function call is an import proxy, this can cause the call to crash, so we +// unwrap the receiver using the getObject symbol. +export function createImportProxy( + name: string, + mod: any, + accessorList: (string | symbol)[] = [] +): any { + if (!IS_CREATING_SNAPSHOT) { + return mod; + } + if (!mod || typeof mod !== 'object') { + return mod; + } + return new Proxy(mod, { + get(target: any, prop: string | symbol, _receiver): any { + if (prop === importName) { + return name; + } + if (prop === getAccessorList) { + return accessorList; + } + if (prop === getObject) { + return target; + } + // @ts-expect-error untyped Reflect.get + const orig = Reflect.get(...arguments); + const descr = Reflect.getOwnPropertyDescriptor(target, prop); + // We're required to return the original value unmodified if it's an own + // property with a non-writable, non-configurable data descriptor + if (descr && descr.writable === false && !descr.configurable) { + return orig; + } + // Or an accessor descriptor with a setter but no getter + if (descr && descr.set && !descr.get) { + return orig; + } + if (!['object', 'function'].includes(typeof orig)) { + return orig; + } + return createImportProxy(name, orig, [...accessorList, prop]); + }, + apply(target: any, thisArg: any, argumentList: any[]): any { + // If thisArg is a GlobalsProxy it may break APIs that expect the receiver + // to be unmodified. Unwrap any GlobalsProxy before making the call. + thisArg = thisArg?.[getObject] ?? thisArg; + return Reflect.apply(target, thisArg, argumentList); + }, + getPrototypeOf(target: object): any { + return createImportProxy(name, Reflect.getPrototypeOf(target), [ + ...accessorList, + getPrototypeOfKey, + ]); + }, + }); +} diff --git a/src/pyodide/internal/snapshot.ts b/src/pyodide/internal/snapshot.ts index 4c739b2ccc1..27bbdfe1c96 100644 --- a/src/pyodide/internal/snapshot.ts +++ b/src/pyodide/internal/snapshot.ts @@ -26,6 +26,11 @@ import { import { default as MetadataReader } from 'pyodide-internal:runtime-generated/metadata'; import type { PyodideEntrypointHelper } from 'pyodide:python-entrypoint-helper'; import { entropyAfterSnapshot } from 'pyodide-internal:topLevelEntropy/lib'; +import { + deserializeJsModule, + maybeSerializeJsModule, + type SerializedJsModule, +} from 'pyodide-internal:serializeJsModule'; // A handle is the pointer into the linear memory returned by dlopen. Multiple dlopens will return // multiple pointers. @@ -71,6 +76,7 @@ type SnapshotMeta = { readonly dsoHandles: DsoHandles; readonly settings: SnapshotSettings; readonly version: 1; + readonly jsModuleNames?: ReadonlyArray; } & DsoLoadInfo; // MEMORY_SNAPSHOT_READER has type SnapshotReader | undefined @@ -106,6 +112,9 @@ const HEADER_SIZE = 4 * 4; const LOADED_SNAPSHOT_META: LoadedSnapshotMeta | undefined = decodeSnapshot( MEMORY_SNAPSHOT_READER ); +const JS_MODULES: Record = await importJsModulesFromSnapshot( + LOADED_SNAPSHOT_META?.jsModuleNames +); const CREATED_SNAPSHOT_META: Required = { soMemoryBases: {}, soTableBases: {}, @@ -454,7 +463,6 @@ function memorySnapshotDoImports(Module: Module): string[] { // We've done all the imports for the baseline snapshot. return []; } - if (REQUIREMENTS.length == 0) { // Don't attempt to scan for package imports if the Worker has specified no package // requirements, as this means their code isn't going to be importing any modules that we need @@ -557,15 +565,37 @@ ${describeValue(obj)} return new PythonUserError(error); } -type CustomSerialized = { pyodide_entrypoint_helper: true }; +async function importJsModulesFromSnapshot( + jsModuleNames: ReadonlyArray | undefined +): Promise> { + if (jsModuleNames === undefined) { + return {}; + } + return Object.fromEntries( + await Promise.all( + jsModuleNames.map( + async (x): Promise<[string, any]> => [x, await import(x)] + ) + ) + ); +} + +type CustomSerialized = + | { pyodide_entrypoint_helper: true } + | SerializedJsModule; function getHiwireSerializer( - pyodide_entrypoint_helper: PyodideEntrypointHelper + pyodide_entrypoint_helper: PyodideEntrypointHelper, + modules: Set ): (obj: any) => CustomSerialized { return function serializer(obj: any): CustomSerialized { if (obj === pyodide_entrypoint_helper) { return { pyodide_entrypoint_helper: true }; } + const serializedModule = maybeSerializeJsModule(obj, modules); + if (serializedModule) { + return serializedModule; + } throw createUnserializableObjectError(obj); }; } @@ -577,6 +607,9 @@ function getHiwireDeserializer( if ('pyodide_entrypoint_helper' in obj) { return pyodide_entrypoint_helper; } + if ('jsModule' in obj) { + return deserializeJsModule(obj, JS_MODULES); + } unreachable(obj, `Can't deserialize ${obj}`); }; } @@ -594,9 +627,10 @@ function makeLinearMemorySnapshot( ): Uint8Array { const dsoHandles = recordDsoHandles(Module); let hiwire: SnapshotConfig | undefined; + const jsModuleNames: Set = new Set(); if (Module.API.version !== '0.26.0a2') { hiwire = Module.API.serializeHiwireState( - getHiwireSerializer(pyodide_entrypoint_helper) + getHiwireSerializer(pyodide_entrypoint_helper, jsModuleNames) ); } const settings: SnapshotSettings = { @@ -609,6 +643,7 @@ function makeLinearMemorySnapshot( dsoHandles, hiwire, importedModulesList, + jsModuleNames: Array.from(jsModuleNames), settings, ...CREATED_SNAPSHOT_META, }); @@ -686,6 +721,7 @@ function decodeSnapshot( compatFlags: {}, ...meta.settings, }, + jsModuleNames: [], ...extras, }; } diff --git a/src/pyodide/python-entrypoint-helper.ts b/src/pyodide/python-entrypoint-helper.ts index 77172ce386d..85385aa0bac 100644 --- a/src/pyodide/python-entrypoint-helper.ts +++ b/src/pyodide/python-entrypoint-helper.ts @@ -27,6 +27,7 @@ import { reportError, } from 'pyodide-internal:util'; import { LOADED_SNAPSHOT_TYPE } from 'pyodide-internal:snapshot'; +export { createImportProxy } from 'pyodide-internal:serializeJsModule'; import { patch_env_helper } from 'pyodide-internal:envHelpers'; type PyFuture = Promise & { copy(): PyFuture; destroy(): void }; @@ -86,16 +87,14 @@ function get_pyodide_entrypoint_helper(): PyodideEntrypointHelper { return _pyodide_entrypoint_helper; } -export function setDoAnImport( - func: (mod: string) => Promise, - cloudflareWorkersModule: any, - cloudflareSocketsModule: any, +export async function setDoAnImport( + doAnImport: (mod: string) => Promise, workerEntrypoint: any -): void { +): Promise { _pyodide_entrypoint_helper = { - doAnImport: func, - cloudflareWorkersModule, - cloudflareSocketsModule, + doAnImport, + cloudflareWorkersModule: await doAnImport('cloudflare:workers'), + cloudflareSocketsModule: await doAnImport('cloudflare:sockets'), workerEntrypoint, patchWaitUntil, patch_env_helper, diff --git a/src/pyodide/python-entrypoint.js b/src/pyodide/python-entrypoint.js index f2379a3dda6..22067fd654a 100644 --- a/src/pyodide/python-entrypoint.js +++ b/src/pyodide/python-entrypoint.js @@ -11,27 +11,29 @@ import { WorkerEntrypoint, WorkflowEntrypoint, } from 'cloudflare:workers'; -import * as cloudflareWorkersModule from 'cloudflare:workers'; -import * as cloudflareSocketsModule from 'cloudflare:sockets'; // The creation of `pythonDurableObjects` et al. has to be done here because // python-entrypoint-helper is a BUILTIN and so cannot import `DurableObject` et al. // (which are also builtins). As a workaround we call `makeEntrypointClass` here and pass it the // appropriate class. -import { setDoAnImport, initPython } from 'pyodide:python-entrypoint-helper'; +import { + setDoAnImport, + initPython, + createImportProxy, +} from 'pyodide:python-entrypoint-helper'; // Function to dynamically import JavaScript modules from Python +// We need the import "call" to occur in this file since it is the only file that is part of the +// user bundle and can see BUILTIN modules and not INTERNAL modules. If we put the import in any +// other file, it would be possible to import INTERNAL modules (not good) and not possible to import +// USER or BUILTIN modules. async function doAnImport(name) { - return await import(name); + const mod = await import(name); + return createImportProxy(name, mod); } // Pass the import function to the helper -setDoAnImport( - doAnImport, - cloudflareWorkersModule, - cloudflareSocketsModule, - WorkerEntrypoint -); +await setDoAnImport(doAnImport, WorkerEntrypoint); // Initialise Python only after the import function has been set above. const { handlers, pythonEntrypointClasses, makeEntrypointClass } = diff --git a/src/workerd/server/tests/python/BUILD.bazel b/src/workerd/server/tests/python/BUILD.bazel index 37493091382..27cfe48e1df 100644 --- a/src/workerd/server/tests/python/BUILD.bazel +++ b/src/workerd/server/tests/python/BUILD.bazel @@ -9,10 +9,7 @@ py_wd_test("env-param") py_wd_test( "top-level-env", - # TODO: snapshot for env - make_snapshot = False, skip_python_flags = ["0.26.0a2"], - use_snapshot = None, ) py_wd_test("asgi") @@ -64,11 +61,7 @@ py_wd_test( py_wd_test("js-import") -py_wd_test( - "importable-env", - # TODO: why doesn't dedicated snapshot work with this? - use_snapshot = None, -) +py_wd_test("importable-env") py_wd_test("python-rpc") diff --git a/src/workerd/server/tests/python/importable-env/importable-env.wd-test b/src/workerd/server/tests/python/importable-env/importable-env.wd-test index b5a50e68956..1618b55816f 100644 --- a/src/workerd/server/tests/python/importable-env/importable-env.wd-test +++ b/src/workerd/server/tests/python/importable-env/importable-env.wd-test @@ -7,7 +7,11 @@ const unitTests :Workerd.Config = ( modules = [ (name = "worker.py", pythonModule = embed "worker.py"), ], - compatibilityFlags = [%PYTHON_FEATURE_FLAGS, "disable_python_no_global_handlers", "unwrap_custom_thenables", "disable_python_dedicated_snapshot"], + compatibilityFlags = [ + %PYTHON_FEATURE_FLAGS, + "disable_python_no_global_handlers", + "unwrap_custom_thenables" + ], bindings = [ (name = "FOO", text = "BAR"), (name = "CACHE", memoryCache = (