Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions src/pyodide/internal/serializeJsModule.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/* eslint-disable prefer-rest-params */
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
/* eslint-disable @typescript-eslint/no-unsafe-argument */
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
import { IS_CREATING_SNAPSHOT } from 'pyodide-internal:metadata';

export type SerializedJsModule = {
jsModule: true;
moduleName: string;
accessorList: string[];
};

const importName = Symbol('importName');
const getAccessorList = Symbol('getAccessorList');
const getObject = Symbol('getObject');
const getPrototypeOfKey = 'Reflect.getProtoTypeOf';

export function maybeSerializeJsModule(
obj_: any,
modules: Set<string>
): SerializedJsModule | undefined {
const obj = obj_ as
| { [importName]: string; [getAccessorList]: string[] }
| undefined;
const moduleName = obj?.[importName];
if (!moduleName) {
return undefined;
}
modules.add(moduleName);
const accessorList: string[] = obj[getAccessorList];
return { jsModule: true, moduleName, accessorList };
}

interface JsModules {
[a: string]: JsModules;
}

export function deserializeJsModule(
obj: SerializedJsModule,
jsModules: JsModules
): unknown {
return (
obj.accessorList.reduce((x: JsModules, y: string): JsModules => {
if (y === getPrototypeOfKey) {
return Reflect.getPrototypeOf(x) as JsModules;
}
return x[y]!;
}, jsModules[obj.moduleName]!) ?? null
);
}

// This tracks the information needed to "serialize" attributes of js modules. We need the name and
// the sequence of attribute accesses. We store the name and accessorList under the importName and
// getAccessorList symbols.
//
// If the receiver of a function call is an import proxy, this can cause the call to crash, so we
// unwrap the receiver using the getObject symbol.
export function createImportProxy(
name: string,
mod: any,
accessorList: (string | symbol)[] = []
): any {
if (!IS_CREATING_SNAPSHOT) {
return mod;
}
if (!mod || typeof mod !== 'object') {
return mod;
}
return new Proxy(mod, {
get(target: any, prop: string | symbol, _receiver): any {
if (prop === importName) {
return name;
}
if (prop === getAccessorList) {
return accessorList;
}
if (prop === getObject) {
return target;
}
// @ts-expect-error untyped Reflect.get
const orig = Reflect.get(...arguments);
const descr = Reflect.getOwnPropertyDescriptor(target, prop);
// We're required to return the original value unmodified if it's an own
// property with a non-writable, non-configurable data descriptor
if (descr && descr.writable === false && !descr.configurable) {
return orig;
}
// Or an accessor descriptor with a setter but no getter
if (descr && descr.set && !descr.get) {
return orig;
}
if (!['object', 'function'].includes(typeof orig)) {
return orig;
}
return createImportProxy(name, orig, [...accessorList, prop]);
},
apply(target: any, thisArg: any, argumentList: any[]): any {
// If thisArg is a GlobalsProxy it may break APIs that expect the receiver
// to be unmodified. Unwrap any GlobalsProxy before making the call.
thisArg = thisArg?.[getObject] ?? thisArg;
return Reflect.apply(target, thisArg, argumentList);
},
getPrototypeOf(target: object): any {
return createImportProxy(name, Reflect.getPrototypeOf(target), [
...accessorList,
getPrototypeOfKey,
]);
},
});
}
44 changes: 40 additions & 4 deletions src/pyodide/internal/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ import {
import { default as MetadataReader } from 'pyodide-internal:runtime-generated/metadata';
import type { PyodideEntrypointHelper } from 'pyodide:python-entrypoint-helper';
import { entropyAfterSnapshot } from 'pyodide-internal:topLevelEntropy/lib';
import {
deserializeJsModule,
maybeSerializeJsModule,
type SerializedJsModule,
} from 'pyodide-internal:serializeJsModule';

// A handle is the pointer into the linear memory returned by dlopen. Multiple dlopens will return
// multiple pointers.
Expand Down Expand Up @@ -71,6 +76,7 @@ type SnapshotMeta = {
readonly dsoHandles: DsoHandles;
readonly settings: SnapshotSettings;
readonly version: 1;
readonly jsModuleNames?: ReadonlyArray<string>;
} & DsoLoadInfo;

// MEMORY_SNAPSHOT_READER has type SnapshotReader | undefined
Expand Down Expand Up @@ -106,6 +112,9 @@ const HEADER_SIZE = 4 * 4;
const LOADED_SNAPSHOT_META: LoadedSnapshotMeta | undefined = decodeSnapshot(
MEMORY_SNAPSHOT_READER
);
const JS_MODULES: Record<string, any> = await importJsModulesFromSnapshot(
LOADED_SNAPSHOT_META?.jsModuleNames
);
const CREATED_SNAPSHOT_META: Required<DsoLoadInfo> = {
soMemoryBases: {},
soTableBases: {},
Expand Down Expand Up @@ -454,7 +463,6 @@ function memorySnapshotDoImports(Module: Module): string[] {
// We've done all the imports for the baseline snapshot.
return [];
}

if (REQUIREMENTS.length == 0) {
// Don't attempt to scan for package imports if the Worker has specified no package
// requirements, as this means their code isn't going to be importing any modules that we need
Expand Down Expand Up @@ -557,15 +565,37 @@ ${describeValue(obj)}
return new PythonUserError(error);
}

type CustomSerialized = { pyodide_entrypoint_helper: true };
async function importJsModulesFromSnapshot(
jsModuleNames: ReadonlyArray<string> | undefined
): Promise<Record<string, any>> {
if (jsModuleNames === undefined) {
return {};
}
return Object.fromEntries(
await Promise.all(
jsModuleNames.map(
async (x): Promise<[string, any]> => [x, await import(x)]
)
)
);
}

type CustomSerialized =
| { pyodide_entrypoint_helper: true }
| SerializedJsModule;

function getHiwireSerializer(
pyodide_entrypoint_helper: PyodideEntrypointHelper
pyodide_entrypoint_helper: PyodideEntrypointHelper,
modules: Set<string>
): (obj: any) => CustomSerialized {
return function serializer(obj: any): CustomSerialized {
if (obj === pyodide_entrypoint_helper) {
return { pyodide_entrypoint_helper: true };
}
const serializedModule = maybeSerializeJsModule(obj, modules);
if (serializedModule) {
return serializedModule;
}
throw createUnserializableObjectError(obj);
};
}
Expand All @@ -577,6 +607,9 @@ function getHiwireDeserializer(
if ('pyodide_entrypoint_helper' in obj) {
return pyodide_entrypoint_helper;
}
if ('jsModule' in obj) {
return deserializeJsModule(obj, JS_MODULES);
}
unreachable(obj, `Can't deserialize ${obj}`);
};
}
Expand All @@ -594,9 +627,10 @@ function makeLinearMemorySnapshot(
): Uint8Array {
const dsoHandles = recordDsoHandles(Module);
let hiwire: SnapshotConfig | undefined;
const jsModuleNames: Set<string> = new Set();
if (Module.API.version !== '0.26.0a2') {
hiwire = Module.API.serializeHiwireState(
getHiwireSerializer(pyodide_entrypoint_helper)
getHiwireSerializer(pyodide_entrypoint_helper, jsModuleNames)
);
}
const settings: SnapshotSettings = {
Expand All @@ -609,6 +643,7 @@ function makeLinearMemorySnapshot(
dsoHandles,
hiwire,
importedModulesList,
jsModuleNames: Array.from(jsModuleNames),
settings,
...CREATED_SNAPSHOT_META,
});
Expand Down Expand Up @@ -686,6 +721,7 @@ function decodeSnapshot(
compatFlags: {},
...meta.settings,
},
jsModuleNames: [],
...extras,
};
}
Expand Down
15 changes: 7 additions & 8 deletions src/pyodide/python-entrypoint-helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import {
reportError,
} from 'pyodide-internal:util';
import { LOADED_SNAPSHOT_TYPE } from 'pyodide-internal:snapshot';
export { createImportProxy } from 'pyodide-internal:serializeJsModule';
import { patch_env_helper } from 'pyodide-internal:envHelpers';

type PyFuture<T> = Promise<T> & { copy(): PyFuture<T>; destroy(): void };
Expand Down Expand Up @@ -86,16 +87,14 @@ function get_pyodide_entrypoint_helper(): PyodideEntrypointHelper {
return _pyodide_entrypoint_helper;
}

export function setDoAnImport(
func: (mod: string) => Promise<any>,
cloudflareWorkersModule: any,
cloudflareSocketsModule: any,
export async function setDoAnImport(
doAnImport: (mod: string) => Promise<any>,
workerEntrypoint: any
): void {
): Promise<void> {
_pyodide_entrypoint_helper = {
doAnImport: func,
cloudflareWorkersModule,
cloudflareSocketsModule,
doAnImport,
cloudflareWorkersModule: await doAnImport('cloudflare:workers'),
cloudflareSocketsModule: await doAnImport('cloudflare:sockets'),
workerEntrypoint,
patchWaitUntil,
patch_env_helper,
Expand Down
22 changes: 12 additions & 10 deletions src/pyodide/python-entrypoint.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,29 @@ import {
WorkerEntrypoint,
WorkflowEntrypoint,
} from 'cloudflare:workers';
import * as cloudflareWorkersModule from 'cloudflare:workers';
import * as cloudflareSocketsModule from 'cloudflare:sockets';

// The creation of `pythonDurableObjects` et al. has to be done here because
// python-entrypoint-helper is a BUILTIN and so cannot import `DurableObject` et al.
// (which are also builtins). As a workaround we call `makeEntrypointClass` here and pass it the
// appropriate class.
import { setDoAnImport, initPython } from 'pyodide:python-entrypoint-helper';
import {
setDoAnImport,
initPython,
createImportProxy,
} from 'pyodide:python-entrypoint-helper';

// Function to dynamically import JavaScript modules from Python
// We need the import "call" to occur in this file since it is the only file that is part of the
// user bundle and can see BUILTIN modules and not INTERNAL modules. If we put the import in any
// other file, it would be possible to import INTERNAL modules (not good) and not possible to import
// USER or BUILTIN modules.
async function doAnImport(name) {
return await import(name);
const mod = await import(name);
return createImportProxy(name, mod);
}

// Pass the import function to the helper
setDoAnImport(
doAnImport,
cloudflareWorkersModule,
cloudflareSocketsModule,
WorkerEntrypoint
);
await setDoAnImport(doAnImport, WorkerEntrypoint);

// Initialise Python only after the import function has been set above.
const { handlers, pythonEntrypointClasses, makeEntrypointClass } =
Expand Down
9 changes: 1 addition & 8 deletions src/workerd/server/tests/python/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@ py_wd_test("env-param")

py_wd_test(
"top-level-env",
# TODO: snapshot for env
make_snapshot = False,
skip_python_flags = ["0.26.0a2"],
use_snapshot = None,
)

py_wd_test("asgi")
Expand Down Expand Up @@ -64,11 +61,7 @@ py_wd_test(

py_wd_test("js-import")

py_wd_test(
"importable-env",
# TODO: why doesn't dedicated snapshot work with this?
use_snapshot = None,
)
py_wd_test("importable-env")

py_wd_test("python-rpc")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ const unitTests :Workerd.Config = (
modules = [
(name = "worker.py", pythonModule = embed "worker.py"),
],
compatibilityFlags = [%PYTHON_FEATURE_FLAGS, "disable_python_no_global_handlers", "unwrap_custom_thenables", "disable_python_dedicated_snapshot"],
compatibilityFlags = [
%PYTHON_FEATURE_FLAGS,
"disable_python_no_global_handlers",
"unwrap_custom_thenables"
],
bindings = [
(name = "FOO", text = "BAR"),
(name = "CACHE", memoryCache = (
Expand Down
Loading