diff --git a/Cargo.lock b/Cargo.lock index 878616559..ecbbf57c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1371,6 +1371,7 @@ dependencies = [ "wit-bindgen-gen-host-js", "wit-bindgen-gen-host-wasmtime-py", "wit-bindgen-gen-host-wasmtime-rust", + "wit-component", "wit-parser", ] @@ -1590,6 +1591,15 @@ dependencies = [ "leb128", ] +[[package]] +name = "wasm-encoder" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64ac98d5d61192cc45c701b7e4bd0b9aff91e2edfc7a088406cfe2288581e2c" +dependencies = [ + "leb128", +] + [[package]] name = "wasmparser" version = "0.89.1" @@ -1601,21 +1611,21 @@ dependencies = [ [[package]] name = "wasmparser" -version = "0.91.0" +version = "0.92.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "239cdca8b8f356af8118c522d5fea23da45b60832ed4e18ef90bb3c9d8dce24a" +checksum = "7da34cec2a8c23db906cdf8b26e988d7a7f0d549eb5d51299129647af61a1b37" dependencies = [ "indexmap", ] [[package]] name = "wasmprinter" -version = "0.2.40" +version = "0.2.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b5931cf673c4bece6299719c024c08ebe52cbac7124160487a602c81e598c8" +checksum = "ca4374ec27194a12b85aa0e1681a42d5800e97f11c036fa85dea8087c8ccb10b" dependencies = [ "anyhow", - "wasmparser 0.91.0", + "wasmparser 0.92.0", ] [[package]] @@ -1837,7 +1847,7 @@ dependencies = [ "leb128", "memchr", "unicode-width", - "wasm-encoder", + "wasm-encoder 0.17.0", ] [[package]] @@ -2154,8 +2164,8 @@ dependencies = [ "indexmap", "log", "pretty_assertions", - "wasm-encoder", - "wasmparser 0.91.0", + "wasm-encoder 0.18.0", + "wasmparser 0.92.0", "wasmprinter", "wat", "wit-parser", diff --git a/Cargo.toml b/Cargo.toml index 4081d5cee..cd27e5212 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,12 +22,13 @@ bitflags = "1.3.2" heck = "0.3" pulldown-cmark = { version = "0.8", default-features = false } clap = { version = "4.0.9", features = ["derive"] } +env_logger = "0.9.1" wasmtime = "1.0" wasmtime-wasi = "1.0" -wasmprinter = "0.2.40" -wasmparser = "0.91.0" -wasm-encoder = "0.17.0" +wasmprinter = "0.2.41" +wasmparser = "0.92.0" +wasm-encoder = "0.18.0" wat = "1.0.49" wit-bindgen-core = { path = 'crates/bindgen-core', version = '0.2.0' } @@ -42,6 +43,7 @@ wit-bindgen-gen-rust-lib = { path = 'crates/gen-rust-lib', version = '0.2.0' } wit-bindgen-guest-rust = { path = 'crates/guest-rust', version = '0.2.0' } wit-bindgen-host-wasmtime-rust = { path = 'crates/host-wasmtime-rust', version = '0.2.0' } wit-parser = { path = 'crates/wit-parser', version = '0.2.0' } +wit-component = { path = 'crates/wit-component', version = '0.2.0' } [[bin]] name = "wit-bindgen" diff --git a/crates/test-helpers/Cargo.toml b/crates/test-helpers/Cargo.toml index 6f596ec53..28cf904b0 100644 --- a/crates/test-helpers/Cargo.toml +++ b/crates/test-helpers/Cargo.toml @@ -30,6 +30,7 @@ filetime = "0.2" wit-bindgen-gen-guest-c = { workspace = true } wit-bindgen-gen-guest-teavm-java = { workspace = true } wit-bindgen-core = { workspace = true } +wit-component = { workspace = true } [features] default = ['guest-rust', 'guest-c', 'guest-teavm-java', 'host-js', 'host-wasmtime-py', 'host-wasmtime-rust'] diff --git a/crates/wit-component/Cargo.toml b/crates/wit-component/Cargo.toml index 9c260db16..90924eb71 100644 --- a/crates/wit-component/Cargo.toml +++ b/crates/wit-component/Cargo.toml @@ -27,7 +27,7 @@ wit-parser = { workspace = true } anyhow = { workspace = true } indexmap = "1.9.1" clap = { workspace = true, optional = true } -env_logger = { version = "0.9.1", optional = true } +env_logger = { workspace = true, optional = true } log = { version = "0.4.17", optional = true } bitflags = { workspace = true } @@ -35,6 +35,7 @@ bitflags = { workspace = true } wasmprinter = { workspace = true } glob = "0.3.0" pretty_assertions = "1.3.0" +env_logger = { workspace = true } [features] default = ["cli"] diff --git a/crates/wit-component/src/cli.rs b/crates/wit-component/src/cli.rs index 7c994658b..cef566ca7 100644 --- a/crates/wit-component/src/cli.rs +++ b/crates/wit-component/src/cli.rs @@ -11,11 +11,24 @@ use clap::Parser; use std::path::{Path, PathBuf}; use wit_parser::Interface; -fn parse_named_interface(s: &str) -> Result { - let (name, path) = s - .split_once('=') - .ok_or_else(|| anyhow::anyhow!("expected a value with format `NAME=INTERFACE`"))?; +fn parse_optionally_name_file(s: &str) -> (&str, &str) { + let mut parts = s.splitn(2, '='); + let name_or_path = parts.next().unwrap(); + match parts.next() { + Some(path) => (name_or_path, path), + None => { + let name = Path::new(name_or_path) + .file_stem() + .unwrap() + .to_str() + .unwrap(); + (name, name_or_path) + } + } +} +fn parse_named_interface(s: &str) -> Result { + let (name, path) = parse_optionally_name_file(s); parse_interface(Some(name.to_string()), Path::new(path)) } @@ -36,6 +49,26 @@ fn parse_interface(name: Option, path: &Path) -> Result { Ok(interface) } +fn parse_adapter(s: &str) -> Result<(String, Vec, Interface)> { + let mut parts = s.splitn(2, ':'); + let maybe_named_module = parts.next().unwrap(); + let (name, path) = parse_optionally_name_file(maybe_named_module); + let wasm = wat::parse_file(path)?; + + match parts.next() { + Some(maybe_named_interface) => { + let interface = parse_named_interface(maybe_named_interface)?; + Ok((name.to_string(), wasm, interface)) + } + None => { + // TODO: implement inferring the `interface` from the `wasm` + // specified + drop((name, wasm)); + bail!("inferring from the core wasm module is not supported at this time") + } + } +} + /// WebAssembly component encoder. /// /// Encodes a WebAssembly component from a core WebAssembly module. @@ -43,13 +76,27 @@ fn parse_interface(name: Option, path: &Path) -> Result { #[clap(name = "component-encoder", version = env!("CARGO_PKG_VERSION"))] pub struct WitComponentApp { /// The path to an interface definition file the component imports. - #[clap(long = "import", value_name = "NAME=INTERFACE", value_parser = parse_named_interface)] + #[clap(long = "import", value_name = "[NAME=]INTERFACE", value_parser = parse_named_interface)] pub imports: Vec, /// The path to an interface definition file the component exports. - #[clap(long = "export", value_name = "NAME=INTERFACE", value_parser = parse_named_interface)] + #[clap(long = "export", value_name = "[NAME=]INTERFACE", value_parser = parse_named_interface)] pub exports: Vec, + /// The path to an adapter module to satisfy imports. + /// + /// An adapter module can be used to translate the `wasi_snapshot_preview1` + /// ABI, for example, to one that uses the component model. The first + /// `[NAME=]` specified in the argument is inferred from the name of file + /// specified by `MODULE` if not present and is the name of the import + /// module that's being implemented (e.g. `wasi_snapshot_preview1.wasm`. + /// + /// The second part of this argument, optionally specified, is the interface + /// that this adapter module imports. If not specified then the interface + /// imported is inferred from the adapter module itself. + #[clap(long = "adapt", value_name = "[NAME=]MODULE[:[NAME=]INTERFACE]", value_parser = parse_adapter)] + pub adapters: Vec<(String, Vec, Interface)>, + /// The path of the output WebAssembly component. #[clap(long, short = 'o', value_name = "OUTPUT")] pub output: Option, @@ -97,6 +144,10 @@ impl WitComponentApp { .exports(&self.exports) .validate(!self.skip_validation); + for (name, wasm, interface) in self.adapters.iter() { + encoder = encoder.adapter(name, wasm, interface); + } + if let Some(interface) = &self.interface { encoder = encoder.interface(interface); } diff --git a/crates/wit-component/src/encoding.rs b/crates/wit-component/src/encoding.rs index 937233342..dc6dbd613 100644 --- a/crates/wit-component/src/encoding.rs +++ b/crates/wit-component/src/encoding.rs @@ -1,5 +1,62 @@ +//! Support for encoding a core wasm module into a component. +//! +//! This module, at a high level, is tasked with transforming a core wasm +//! module into a component. This will process the imports/exports of the core +//! wasm module and translate between the `wit-parser` AST and the component +//! model binary format, producing a final component which sill import +//! `*.wit` defined interfaces and export `*.wit` defined interfaces as well +//! with everything wired up internally according to the canonical ABI and such. +//! +//! This doc block here is not currently 100% complete and doesn't cover the +//! full functionality of this module. +//! +//! # Adapter Modules +//! +//! One feature of this encoding process which is non-obvious is the support for +//! "adapter modules". The general idea here is that historical host API +//! definitions have been around for quite some time, such as +//! `wasi_snapshot_preview1`, but these host API definitions are not compatible +//! with the canonical ABI or component model exactly. These APIs, however, can +//! in most situations be roughly adapted to component-model equivalents. This +//! is where adapter modules come into play, they're converting from some +//! arbitrary API/ABI into a component-model using API. +//! +//! An adapter module is a separately compiled `*.wasm` blob which will export +//! functions matching the desired ABI (e.g. exporting functions matching the +//! `wasi_snapshot_preview1` ABI). The `*.wasm` blob will then import functions +//! in the canonical ABI and internally adapt the exported functions to the +//! imported functions. The encoding support in this module is what wires +//! everything up and makes sure that everything is imported and exported to the +//! right place. Adapter modules currently always use "indirect lowerings" +//! meaning that a shim module is created and provided as the imports to the +//! main core wasm module, and the shim module is "filled in" at a later time +//! during the instantiation process. +//! +//! Adapter modules are not intended to be general purpose and are currently +//! very restrictive, namely: +//! +//! * They must import a linear memory and not define their own linear memory +//! otherwise. In other words they import memory and cannot use multi-memory. +//! * They cannot define any `elem` or `data` segments since otherwise there's +//! no knowledge ahead-of-time of where their data or element segments could +//! go. This means things like no panics, no indirect calls, etc. +//! * Only one mutable global is allowed and it's assumed to be the stack +//! pointer. This stack pointer is automatically configured with an injected +//! `start` function that is allocated with `memory.grow (i32.const 1)`, +//! meaning that the shim module has 64k of stack space and no protection if +//! that overflows. +//! +//! This means that adapter modules are not meant to be written by everyone. +//! It's assumed that these will be relatively few and far between yet still a +//! crucial part of the transition process from to the component model since +//! otherwise there's no way to run a `wasi_snapshot_preview1` module within the +//! component model. + use crate::{ - validation::{expected_export_name, validate_module}, + validation::{ + expected_export_name, validate_adapter_module, validate_module, ValidatedAdapter, + ValidatedModule, + }, StringEncoding, }; use anyhow::{anyhow, bail, Context, Result}; @@ -471,33 +528,43 @@ impl<'a> TypeEncoder<'a> { fn encode_instance_imports( &mut self, interfaces: &'a [Interface], - required_imports: &IndexMap<&'a str, IndexSet<&'a str>>, + info: &ValidatedModule<'a>, imports: &mut ImportEncoder<'a>, ) -> Result<()> { for import in interfaces { - let required_funcs = match required_imports.get(import.name.as_str()) { + let required_funcs = match info.required_imports.get(import.name.as_str()) { Some(required) => required, None => continue, }; + self.encode_instance_import(import, required_funcs, imports)?; + } - Self::validate_interface(import)?; + Ok(()) + } - let mut instance = InstanceTypeEncoder::default(); + fn encode_instance_import( + &mut self, + import: &'a Interface, + required_funcs: &IndexSet<&'a str>, + imports: &mut ImportEncoder<'a>, + ) -> Result<()> { + Self::validate_interface(import)?; - for func in &import.functions { - if !required_funcs.contains(func.name.as_str()) { - continue; - } - Self::validate_function(func)?; + let mut instance = InstanceTypeEncoder::default(); - let index = self.encode_func_type(import, func, false)?; - instance.export(&func.name, ComponentTypeRef::Func(index))?; + for func in &import.functions { + if !required_funcs.contains(func.name.as_str()) { + continue; } + Self::validate_function(func)?; - let index = self.encode_instance_type(&instance.ty); - imports.import(import, ComponentTypeRef::Instance(index), required_funcs)?; + let index = self.encode_func_type(import, func, false)?; + instance.export(&func.name, ComponentTypeRef::Func(index))?; } + let index = self.encode_instance_type(&instance.ty); + imports.import(import, ComponentTypeRef::Instance(index), required_funcs)?; + Ok(()) } @@ -873,9 +940,17 @@ impl<'a> TypeEncoder<'a> { } bitflags::bitflags! { + /// Options in the `canon lower` or `canon lift` required for a particular + /// function. struct RequiredOptions: u8 { + /// A memory must be specified, typically the "main module"'s memory + /// export. const MEMORY = 1 << 0; + /// A `realloc` function must be specified, typically named + /// `cabi_realloc`. const REALLOC = 1 << 1; + /// A string encoding must be specified, which is always utf-8 for now + /// today. const STRING_ENCODING = 1 << 2; } } @@ -1016,6 +1091,8 @@ impl RequiredOptions { } bitflags::bitflags! { + /// Flags about what kinds of types are present within the recursive + /// structure of a type. struct TypeContents: u8 { const STRING = 1 << 0; const LIST = 1 << 1; @@ -1076,7 +1153,7 @@ impl TypeContents { /// State relating to encoding a component. #[derive(Default)] -struct EncodingState { +struct EncodingState<'a> { /// The component being encoded. component: ComponentEncoding, /// The index into the core module index space for the inner core module. @@ -1103,9 +1180,18 @@ struct EncodingState { /// /// If `None`, then a fixup module has not yet been encoded. fixups_module_index: Option, + + /// A map of named adapter modules and the index that the module was defined + /// at. + adapter_modules: IndexMap<&'a str, u32>, + /// A map of adapter module instances and the index of their instance. + adapter_instances: IndexMap<&'a str, u32>, + /// A map of the index of the aliased realloc function for each adapter + /// module. + adapter_reallocs: IndexMap<&'a str, Option>, } -impl EncodingState { +impl<'a> EncodingState<'a> { fn encode_core_module(&mut self, module: &[u8]) -> u32 { assert!(self.module_index.is_none()); let ret = self.component.core_module_raw(module); @@ -1113,53 +1199,119 @@ impl EncodingState { ret } + fn encode_core_adapter_module(&mut self, name: &'a str, module: &[u8]) -> u32 { + let index = self.component.core_module_raw(module); + assert!(self.adapter_modules.insert(name, index).is_none()); + index + } + fn encode_core_instantiation( &mut self, encoding: StringEncoding, - imports: &ImportEncoder, - has_memory: bool, - has_realloc: bool, + imports: &ImportEncoder<'a>, + info: &ValidatedModule<'a>, ) -> Result<()> { if imports.map.is_empty() { - self.instantiate_core_module([], has_memory, has_realloc); + self.instantiate_core_module([], info); return Ok(()); } // Encode a shim instantiation if needed - self.encode_shim_instantiation(imports); + let shims = self.encode_shim_instantiation(imports, info); + + // For each instance import into the main module create a + // pseudo-core-wasm-module via a bag-of-exports. + let mut args = Vec::new(); + for name in info.required_imports.keys() { + let index = self.import_instance_to_lowered_core_instance( + CustomModule::Main, + name, + imports, + &shims, + ); + args.push((*name, ModuleArg::Instance(index))); + } - let args: Vec<_> = imports - .map - .iter() - .enumerate() - .map(|(instance_index, (name, import))| { - let mut exports = Vec::with_capacity(import.direct.len() + import.indirect.len()); - - for lowering in &import.indirect { - let index = self.component.alias_core_item( - self.shim_instance_index - .expect("shim should be instantiated"), - ExportKind::Func, - &lowering.export_name, - ); - exports.push((lowering.name, ExportKind::Func, index)); - } + // For each adapter module instance imported into the core wasm module + // the appropriate shim is packaged up into a bag-of-exports instance. + // Note that adapter modules currently don't deal with + // indirect-vs-direct lowerings, everything is indirect. + for (adapter, funcs) in info.adapters_required.iter() { + let shim_instance = self + .shim_instance_index + .expect("shim should be instantiated"); + let mut exports = Vec::new(); + + for (func, _ty) in funcs { + let index = self.component.alias_core_item( + shim_instance, + ExportKind::Func, + &shims.shim_names[&ShimKind::Adapter { adapter, func }], + ); + exports.push((*func, ExportKind::Func, index)); + } - for lowering in &import.direct { - let func_index = self - .component - .alias_func(instance_index as u32, lowering.name); - let core_func_index = self.component.lower_func(func_index, []); - exports.push((lowering.name, ExportKind::Func, core_func_index)); - } + let index = self.component.instantiate_core_exports(exports); + args.push((*adapter, ModuleArg::Instance(index))); + } - let index = self.component.instantiate_core_exports(exports); - (*name, ModuleArg::Instance(index)) - }) - .collect(); + // Instantiate the main module now that all of its arguments have been + // prepared. With this we know have the main linear memory for + // liftings/lowerings later on as well as the adapter modules, if any, + // instantiated after the core wasm module. + self.instantiate_core_module(args, info); + self.instantiate_adapter_modules(imports, info, &shims); - self.instantiate_core_module(args, has_memory, has_realloc); - self.encode_indirect_lowerings(encoding, imports) + // With all the core wasm instances in play now the original shim + // module, if present, can be filled in with lowerings/adapters/etc. + self.encode_indirect_lowerings(encoding, imports, shims) + } + + /// Lowers a named imported interface a core wasm instances suitable to + /// provide as an instantiation argument to another core wasm module. + /// + /// * `for_module` the module that this instance is being created for, or + /// otherwise which `realloc` option is used for the lowerings. + /// * `name` - the name of the imported interface that's being lowered. + /// * `imports` - the list of all imports known for this encoding. + /// * `shims` - the indirect/adapter shims created prior, if any. + fn import_instance_to_lowered_core_instance( + &mut self, + for_module: CustomModule<'_>, + name: &str, + imports: &ImportEncoder<'_>, + shims: &Shims<'_>, + ) -> u32 { + let (instance_index, _, import) = imports.map.get_full(name).unwrap(); + let mut exports = Vec::with_capacity(import.direct.len() + import.indirect.len()); + + // Add an entry for all indirect lowerings which come as an export of + // the shim module. + for (i, lowering) in import.indirect.iter().enumerate() { + let index = self.component.alias_core_item( + self.shim_instance_index + .expect("shim should be instantiated"), + ExportKind::Func, + &shims.shim_names[&ShimKind::IndirectLowering { + interface: name, + indirect_index: i, + realloc: for_module, + }], + ); + exports.push((lowering.name, ExportKind::Func, index)); + } + + // All direct lowerings can be `canon lower`'d here immediately and + // passed as arguments. + for lowering in &import.direct { + let func_index = self + .component + .alias_func(instance_index as u32, lowering.name); + let core_func_index = self.component.lower_func(func_index, []); + exports.push((lowering.name, ExportKind::Func, core_func_index)); + } + + self.component.instantiate_core_exports(exports) } fn encode_imports(&mut self, imports: &ImportEncoder) { @@ -1168,11 +1320,11 @@ impl EncodingState { } } - fn encode_exports<'a>( + fn encode_exports<'b>( &mut self, encoding: StringEncoding, - exports: impl Iterator, - func_types: &IndexMap, u32>, + exports: impl Iterator, + func_types: &IndexMap, u32>, ) -> Result<()> { let core_instance_index = self.instance_index.expect("must be instantiated"); @@ -1239,9 +1391,60 @@ impl EncodingState { Ok(()) } - fn encode_shim_instantiation(&mut self, imports: &ImportEncoder) { - if imports.indirect_count == 0 { - return; + fn encode_shim_instantiation( + &mut self, + imports: &ImportEncoder<'a>, + info: &ValidatedModule<'a>, + ) -> Shims<'a> { + let mut signatures = Vec::new(); + let mut ret = Shims::default(); + + // For all interfaces imported into the main module record all of their + // indirect lowerings into `Shims`. + for name in info.required_imports.keys() { + let import = &imports.map[name]; + ret.append_indirect(name, CustomModule::Main, import, &mut signatures); + } + + // For all required adapter modules a shim is created for each required + // function and additionally a set of shims are created for the + // interface imported into the shim module itself. + for (adapter, funcs) in info.adapters_required.iter() { + let info = &imports.adapters[adapter]; + if let Some(name) = info.required_import { + let import = &imports.map[name]; + ret.append_indirect( + name, + CustomModule::Adapter(adapter), + import, + &mut signatures, + ); + } + for (func, ty) in funcs { + let name = ret.list.len().to_string(); + log::debug!("shim {name} is adapter `{adapter}::{func}`"); + signatures.push(WasmSignature { + params: ty.params().iter().map(to_wasm_type).collect(), + results: ty.results().iter().map(to_wasm_type).collect(), + indirect_params: false, + retptr: false, + }); + ret.list.push(Shim { + name, + // Pessimistically assume that all adapters require memory + // in one form or another. While this isn't technically true + // it's true enough for WASI. + options: RequiredOptions::MEMORY, + kind: ShimKind::Adapter { adapter, func }, + }); + } + } + if ret.list.is_empty() { + return ret; + } + + for shim in ret.list.iter() { + ret.shim_names.insert(shim.kind.clone(), shim.name.clone()); } assert!(self.shim_instance_index.is_none()); @@ -1265,38 +1468,29 @@ impl EncodingState { let mut elements = ElementSection::new(); let mut func_indexes = Vec::new(); - let mut func_index = 0; - for import in imports.map.values() { - for lowering in &import.indirect { - let type_index = *sigs.entry(&lowering.sig).or_insert_with(|| { - let index = types.len(); - types.function( - lowering.sig.params.iter().map(to_val_type), - lowering.sig.results.iter().map(to_val_type), - ); - index - }); - - functions.function(type_index); - Self::encode_shim_function( - type_index, - func_index, - &mut code, - lowering.sig.params.len() as u32, + for (i, (sig, shim)) in signatures.iter().zip(&ret.list).enumerate() { + let i = i as u32; + let type_index = *sigs.entry(sig).or_insert_with(|| { + let index = types.len(); + types.function( + sig.params.iter().map(to_val_type), + sig.results.iter().map(to_val_type), ); - exports.export(&lowering.export_name, ExportKind::Func, func_index); + index + }); - imports_section.import("", &lowering.export_name, EntityType::Function(type_index)); - func_indexes.push(func_index); + functions.function(type_index); + Self::encode_shim_function(type_index, i, &mut code, sig.params.len() as u32); + exports.export(&shim.name, ExportKind::Func, i); - func_index += 1; - } + imports_section.import("", &shim.name, EntityType::Function(type_index)); + func_indexes.push(i); } let table_type = TableType { element_type: ValType::FuncRef, - minimum: func_index, - maximum: Some(func_index), + minimum: signatures.len() as u32, + maximum: Some(signatures.len() as u32), }; tables.table(table_type); @@ -1326,6 +1520,18 @@ impl EncodingState { let shim_module_index = self.component.core_module(&shim); self.fixups_module_index = Some(self.component.core_module(&fixups)); self.shim_instance_index = Some(self.component.instantiate(shim_module_index, [])); + + return ret; + + fn to_wasm_type(ty: &wasmparser::ValType) -> WasmType { + match ty { + wasmparser::ValType::I32 => WasmType::I32, + wasmparser::ValType::I64 => WasmType::I64, + wasmparser::ValType::F32 => WasmType::F32, + wasmparser::ValType::F64 => WasmType::F64, + _ => unreachable!(), + } + } } fn encode_shim_function( @@ -1351,8 +1557,9 @@ impl EncodingState { &mut self, encoding: StringEncoding, imports: &ImportEncoder, + shims: Shims<'_>, ) -> Result<()> { - if imports.indirect_count == 0 { + if shims.list.is_empty() { return Ok(()); } @@ -1369,25 +1576,49 @@ impl EncodingState { let mut exports = Vec::with_capacity(imports.indirect_count as usize); exports.push((INDIRECT_TABLE_NAME, ExportKind::Table, table_index)); - for (instance_index, import) in imports.map.values().enumerate() { - for lowering in &import.indirect { - let func_index = self - .component - .alias_func(instance_index as u32, lowering.name); - - let core_func_index = self.component.lower_func( - func_index, - lowering - .options - .into_iter(encoding, self.memory_index, self.realloc_index)?, - ); + for shim in shims.list.iter() { + let core_func_index = match &shim.kind { + // Indirect lowerings are a `canon lower`'d function with + // options specified from a previously instantiated instance. + // This previous instance could either be the main module or an + // adapter module, which affects the `realloc` option here. + // Currently only one linear memory is supported so the linear + // memory always comes from the main module. + ShimKind::IndirectLowering { + interface, + indirect_index, + realloc, + } => { + let (instance_index, _, interface) = imports.map.get_full(interface).unwrap(); + let func_index = self.component.alias_func( + instance_index as u32, + &interface.indirect[*indirect_index].name, + ); + + let realloc = match realloc { + CustomModule::Main => self.realloc_index, + CustomModule::Adapter(name) => self.adapter_reallocs[name], + }; - exports.push(( - lowering.export_name.as_str(), + self.component.lower_func( + func_index, + shim.options + .into_iter(encoding, self.memory_index, realloc)?, + ) + } + + // Adapter shims are defined by an export from and adapter + // instance, so use the specified name here and the previously + // created instances to get the core item that represents the + // shim. + ShimKind::Adapter { adapter, func } => self.component.alias_core_item( + self.adapter_instances[adapter], ExportKind::Func, - core_func_index, - )); - } + func, + ), + }; + + exports.push((shim.name.as_str(), ExportKind::Func, core_func_index)); } let instance_index = self.component.instantiate_core_exports(exports); @@ -1398,9 +1629,9 @@ impl EncodingState { Ok(()) } - fn instantiate_core_module<'a, A>(&mut self, args: A, has_memory: bool, has_realloc: bool) + fn instantiate_core_module<'b, A>(&mut self, args: A, info: &ValidatedModule<'_>) where - A: IntoIterator, + A: IntoIterator, A::IntoIter: ExactSizeIterator, { assert!(self.instance_index.is_none()); @@ -1409,7 +1640,7 @@ impl EncodingState { .component .instantiate(self.module_index.expect("core module encoded"), args); - if has_memory { + if info.has_memory { self.memory_index = Some(self.component.alias_core_item( instance_index, ExportKind::Memory, @@ -1417,7 +1648,7 @@ impl EncodingState { )); } - if has_realloc { + if info.has_realloc { self.realloc_index = Some(self.component.alias_core_item( instance_index, ExportKind::Func, @@ -1427,6 +1658,173 @@ impl EncodingState { self.instance_index = Some(instance_index); } + + /// This function will instantiate all required adapter modules required by + /// the main module (specified by `info`). + /// + /// Each adapter here is instantiated with its required imported interface, + /// if any. + fn instantiate_adapter_modules( + &mut self, + imports: &ImportEncoder<'a>, + info: &ValidatedModule<'a>, + shims: &Shims<'_>, + ) { + for name in info.adapters_required.keys() { + let info = &imports.adapters[name]; + let mut args = Vec::new(); + + // If the adapter module requires a `memory` import then specify + // that here. For now assume that the module name of the memory is + // different from the imported interface. That's true enough for now + // since it's `env::memory`. + if let Some((module, name)) = &info.needs_memory { + if let Some(import_name) = info.required_import { + assert!(module != import_name); + } + assert!(module != name); + let memory = self.memory_index.unwrap(); + let instance = self.component.instantiate_core_exports([( + name.as_str(), + ExportKind::Memory, + memory, + )]); + args.push((module.as_str(), ModuleArg::Instance(instance))); + } + if let Some(import_name) = info.required_import { + let instance = self.import_instance_to_lowered_core_instance( + CustomModule::Adapter(name), + import_name, + imports, + shims, + ); + args.push((import_name, ModuleArg::Instance(instance))); + } + let instance = self.component.instantiate(self.adapter_modules[name], args); + self.adapter_instances.insert(name, instance); + let realloc = if info.has_realloc { + Some( + self.component + .alias_core_item(instance, ExportKind::Func, "cabi_realloc"), + ) + } else { + None + }; + self.adapter_reallocs.insert(name, realloc); + } + } +} + +/// A list of "shims" which start out during the component instantiation process +/// as functions which immediately trap due to a `call_indirect`-to-`null` but +/// will get filled in by the time the component instantiation process +/// completes. +/// +/// Shims currently include: +/// +/// * "Indirect functions" lowered from imported instances where the lowering +/// requires an item exported from the main module. These are indirect due to +/// the circular dependency between the module needing an import and the +/// import needing the module. +/// +/// * Adapter modules which convert from a historical ABI to the component +/// model's ABI (e.g. wasi preview1 to preview2) get a shim since the adapters +/// are currently indicated as always requiring the memory of the main module. +/// +/// This structure is created by `encode_shim_instantiation`. +#[derive(Default)] +struct Shims<'a> { + /// The list of all shims that a module will require. + list: Vec>, + + /// A map from a shim to the name of the shim in the shim instance. + shim_names: IndexMap, String>, +} + +struct Shim<'a> { + /// Canonical ABI options required by this shim, used during `canon lower` + /// operations. + options: RequiredOptions, + + /// The name, in the shim instance, of this shim. + /// + /// Currently this is `"0"`, `"1"`, ... + name: String, + + /// Precise information about what this shim is a lowering of. + kind: ShimKind<'a>, +} + +#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] +enum ShimKind<'a> { + /// This shim is a late indirect lowering of an imported function in a + /// component which is only possible after prior core wasm modules are + /// instantiated so their memories and functions are available. + IndirectLowering { + /// The name of the interface that's being lowered. + interface: &'a str, + /// The index within the `indirect` array of the function being lowered. + indirect_index: usize, + /// Which instance to pull the `realloc` function from, if necessary. + realloc: CustomModule<'a>, + }, + /// This shim is a core wasm function defined in an adapter module but isn't + /// available until the adapter module is itself instantiated. + Adapter { + /// The name of the adapter module this shim comes from. + adapter: &'a str, + /// The name of the export in the adapter module this shim points to. + func: &'a str, + }, +} + +/// Indicator for which module is being used for a lowering or where options +/// like `realloc` are drawn from. +/// +/// This is necessary for situations such as an imported function being lowered +/// into the main module and additionally into an adapter module. For example an +/// adapter might adapt from preview1 to preview2 for the standard library of a +/// programming language but the main module's custom application code may also +/// explicitly import from preview2. These two different lowerings of a preview2 +/// function are parameterized by this enumeration. +#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] +enum CustomModule<'a> { + /// This points to the "main module" which is generally the "output of LLVM" + /// or what a user wrote. + Main, + /// This is selecting an adapter module, identified by name here, where + /// something is being lowered into. + Adapter(&'a str), +} + +impl<'a> Shims<'a> { + /// Adds all shims necessary for the `import` provided, namely iterating + /// over its indirect lowerings and appending a shim per lowering. + fn append_indirect( + &mut self, + name: &'a str, + for_module: CustomModule<'a>, + import: &ImportedInterface<'a>, + sigs: &mut Vec, + ) { + for (indirect_index, lowering) in import.indirect.iter().enumerate() { + let shim_name = self.list.len().to_string(); + log::debug!( + "shim {shim_name} is import `{name}` lowering {indirect_index} `{}`", + lowering.name + ); + sigs.push(lowering.sig.clone()); + self.list.push(Shim { + name: shim_name, + options: lowering.options, + kind: ShimKind::IndirectLowering { + interface: name, + indirect_index, + realloc: for_module, + }, + }); + } + } } #[derive(Debug)] @@ -1439,7 +1837,6 @@ struct IndirectLowering<'a> { name: &'a str, sig: WasmSignature, options: RequiredOptions, - export_name: String, } #[derive(Debug)] @@ -1449,11 +1846,22 @@ struct ImportedInterface<'a> { indirect: Vec>, } +/// Helper type used when encoding a component to have helpers that +/// simultaneously encode an item while returning its corresponding index in the +/// generated index spaces as well. #[derive(Default)] struct ComponentEncoding { + /// The binary component as created by `wasm-encoder`. component: Component, - /// The last section which was appended to during encoding. + + /// The last section which was appended to during encoding. This type is + /// generated by the `section_accessors` macro below. + /// + /// When something is encoded this is used if it matches the kind of item + /// being encoded, otherwise it's "flushed" to the output component and a + /// new section is started. last_section: LastSection, + // Core index spaces core_modules: u32, core_funcs: u32, @@ -1564,6 +1972,9 @@ impl ComponentEncoding { } } +// Helper macro to generate methods on `ComponentEncoding` to get specific +// section encoders that automatically flush and write out prior sections as +// necessary. macro_rules! section_accessors { ($($method:ident => $section:ident)*) => ( #[derive(Default)] @@ -1577,7 +1988,13 @@ macro_rules! section_accessors { $( fn $method(&mut self) -> &mut $section { match &self.last_section { + // The last encoded section matches the section that's + // being requested, so no change is necessary. LastSection::$section(_) => {} + + // Otherwise the last section didn't match this section, + // so flush any prior section if needed and start + // encoding the desired section of this method. _ => { self.flush(); self.last_section = LastSection::$section($section::new()); @@ -1590,6 +2007,8 @@ macro_rules! section_accessors { } )* + /// Writes out the last section into the final component binary if + /// there is a section specified, otherwise does nothing. fn flush(&mut self) { match mem::take(&mut self.last_section) { LastSection::None => {} @@ -1635,6 +2054,7 @@ fn inc(idx: &mut u32) -> u32 { #[derive(Debug, Default)] struct ImportEncoder<'a> { map: IndexMap<&'a str, ImportedInterface<'a>>, + adapters: IndexMap<&'a str, ValidatedAdapter<'a>>, direct_count: u32, indirect_count: u32, } @@ -1665,14 +2085,12 @@ impl<'a> ImportEncoder<'a> { self.direct_count += 1; direct.push(DirectLowering { name: &f.name }); } else { - let element_index = self.indirect_count; let sig = interface.wasm_signature(AbiVariant::GuestImport, f); self.indirect_count += 1; indirect.push(IndirectLowering { name: &f.name, sig, options, - export_name: element_index.to_string(), }); } } @@ -1699,6 +2117,7 @@ pub struct ComponentEncoder<'a> { exports: &'a [Interface], validate: bool, types_only: bool, + adapters: IndexMap<&'a str, (&'a [u8], &'a Interface)>, } impl<'a> ComponentEncoder<'a> { @@ -1738,12 +2157,41 @@ impl<'a> ComponentEncoder<'a> { self } + /// Specifies a new adapter which is used to translate from a historical + /// wasm ABI to the canonical ABI and the `interface` provided. + /// + /// This is primarily used to polyfill, for example, + /// `wasi_snapshot_preview1` with a component-model using interface. The + /// `name` provided is the module name of the adapter that is being + /// polyfilled, for example `"wasi_snapshot_preview1"`. + /// + /// The `bytes` provided is a core wasm module which implements the `name` + /// interface in terms of the `interface` interface. This core wasm module + /// is severely restricted in its shape, for example it cannot have any data + /// segments or element segments. + /// + /// The `interface` provided is the component-model-using-interface that the + /// wasm module specified by `bytes` imports. The `bytes` will then import + /// `interface` and export functions to get imported from the module `name` + /// in the core wasm that's being wrapped. + pub fn adapter(mut self, name: &'a str, bytes: &'a [u8], interface: &'a Interface) -> Self { + self.adapters.insert(name, (bytes, interface)); + self + } + /// Encode the component and return the bytes. pub fn encode(&self) -> Result> { - let (required_imports, has_memory, has_realloc) = if !self.module.is_empty() { - validate_module(self.module, &self.interface, self.imports, self.exports)? + let info = if !self.module.is_empty() { + let adapters = self.adapters.keys().copied().collect::>(); + validate_module( + self.module, + &self.interface, + self.imports, + self.exports, + &adapters, + )? } else { - (Default::default(), false, false) + Default::default() }; let exports = self @@ -1757,21 +2205,37 @@ impl<'a> ComponentEncoder<'a> { let mut types = TypeEncoder::default(); let mut imports = ImportEncoder::default(); types.encode_func_types(exports.clone(), false)?; - types.encode_instance_imports(self.imports, &required_imports, &mut imports)?; - types.finish(&mut state.component); + types.encode_instance_imports(self.imports, &info, &mut imports)?; if self.types_only { if !self.module.is_empty() { bail!("a module cannot be specified for a types-only encoding"); } + types.finish(&mut state.component); } else { if self.module.is_empty() { bail!("a module is required when encoding a component"); } + // For all required adapters lookup the corresponding adapter + // provided to this encoder, gc it to an appropriate size, and then + // register its metadata in our data structures. + for (name, required) in info.adapters_required.iter() { + let (wasm, interface) = &self.adapters[*name]; + let wasm = crate::gc::run(wasm, required) + .context("failed to reduce input adapter module to its minimal size")?; + let info = validate_adapter_module(&wasm, interface, required) + .context("failed to validate the imports of the minimized adapter module")?; + state.encode_core_adapter_module(name, &wasm); + types.encode_instance_import(interface, &info.required_funcs, &mut imports)?; + imports.adapters.insert(name, info); + } + + types.finish(&mut state.component); + state.encode_imports(&imports); state.encode_core_module(self.module); - state.encode_core_instantiation(self.encoding, &imports, has_memory, has_realloc)?; + state.encode_core_instantiation(self.encoding, &imports, &info)?; state.encode_exports(self.encoding, exports, &types.func_type_map)?; } diff --git a/crates/wit-component/src/gc.rs b/crates/wit-component/src/gc.rs new file mode 100644 index 000000000..d6e7c8bc1 --- /dev/null +++ b/crates/wit-component/src/gc.rs @@ -0,0 +1,998 @@ +use self::bitvec::BitVec; +use anyhow::{bail, Result}; +use indexmap::{IndexMap, IndexSet}; +use std::collections::HashMap; +use std::mem; +use wasm_encoder::{Encode, EntityType}; +use wasmparser::*; + +/// This function will reduce the input core `wasm` module to only the set of +/// exports `required`. +/// +/// This internally performs a "gc" pass after removing exports to ensure that +/// the resulting module imports the minimal set of functions necessary. +pub fn run(wasm: &[u8], required: &IndexMap<&str, FuncType>) -> Result> { + assert!(!required.is_empty()); + + let mut module = Module::default(); + module.parse(wasm)?; + + // Make sure that all required names are present in the module, and then + // remove all names that are not required. + for (name, _ty) in required { + if !module.exports.contains_key(name) { + bail!("adapter module does not have export `{name}`") + } + } + let mut not_required = IndexSet::new(); + for name in module.exports.keys().copied() { + // Explicitly keep `cabi_realloc` if it's there in case an interface + // needs it for a lowering. + if !required.contains_key(name) && name != "cabi_realloc" { + not_required.insert(name); + } + } + for name in not_required { + module.exports.remove(name); + } + assert!(!module.exports.is_empty()); + module.liveness()?; + module.encode() +} + +// Representation of a wasm module which is used to GC a module to its minimal +// set of required items necessary to implement the `exports` +// +// Note that this is not a complete representation of a wasm module since it +// doesn't represent everything such as data and element segments. This is only +// used for adapter modules which otherwise have these restrictions and makes +// this gc pass a bit easier to write. +#[derive(Default)] +struct Module<'a> { + // Definitions found when parsing a module + types: Vec, + tables: Vec>, + globals: Vec>, + memories: Vec>, + funcs: Vec>, + exports: IndexMap<&'a str, Export<'a>>, + func_names: HashMap, + global_names: HashMap, + + // Known-live sets of indices after the `liveness` pass has run. + live_types: BitVec, + live_tables: BitVec, + live_globals: BitVec, + live_memories: BitVec, + live_funcs: BitVec, + + // Helper data structure used during the `liveness` path to avoid recursion. + // When calculating the liveness of an item this `worklist` is pushed to and + // then processed until it's empty. An item pushed onto this list represents + // a new index that has been discovered to be live and the function is wehat + // walks the item's definition to find other items that it references. + worklist: Vec<(u32, fn(&mut Module<'a>, u32) -> Result<()>)>, +} + +struct Table<'a> { + def: Definition<'a, ()>, + ty: TableType, +} + +struct Memory<'a> { + def: Definition<'a, ()>, + ty: MemoryType, +} + +struct Global<'a> { + def: Definition<'a, ConstExpr<'a>>, + ty: GlobalType, +} + +struct Func<'a> { + def: Definition<'a, FunctionBody<'a>>, + ty: u32, +} + +enum Definition<'a, T> { + Import(&'a str, &'a str), + Local(T), +} + +impl<'a> Module<'a> { + fn parse(&mut self, wasm: &'a [u8]) -> Result<()> { + let mut next_code_index = 0; + let mut validator = Validator::new(); + for payload in Parser::new(0).parse_all(wasm) { + let payload = payload?; + validator.payload(&payload)?; + match payload { + Payload::Version { encoding, .. } => { + if encoding != Encoding::Module { + bail!("adapter must be a core wasm module, not a component"); + } + } + Payload::End(_) => {} + Payload::TypeSection(s) => { + for ty in s { + self.types.push(ty?); + } + } + Payload::ImportSection(s) => { + for i in s { + let i = i?; + match i.ty { + TypeRef::Func(ty) => self.funcs.push(Func { + def: Definition::Import(i.module, i.name), + ty, + }), + TypeRef::Table(ty) => self.tables.push(Table { + def: Definition::Import(i.module, i.name), + ty, + }), + TypeRef::Global(ty) => self.globals.push(Global { + def: Definition::Import(i.module, i.name), + ty, + }), + TypeRef::Memory(ty) => self.memories.push(Memory { + def: Definition::Import(i.module, i.name), + ty, + }), + TypeRef::Tag(_) => bail!("unsupported `tag` type"), + } + } + } + Payload::TableSection(s) => { + for ty in s { + let ty = ty?; + self.tables.push(Table { + def: Definition::Local(()), + ty, + }); + } + } + Payload::MemorySection(s) => { + for ty in s { + let ty = ty?; + self.memories.push(Memory { + def: Definition::Local(()), + ty, + }); + } + } + Payload::GlobalSection(s) => { + for g in s { + let g = g?; + self.globals.push(Global { + def: Definition::Local(g.init_expr), + ty: g.ty, + }); + } + } + + Payload::ExportSection(s) => { + for e in s { + let e = e?; + self.exports.insert(e.name, e); + } + } + + Payload::FunctionSection(s) => { + next_code_index = self.funcs.len(); + for ty in s { + let ty = ty?; + self.funcs.push(Func { + // Specify a dummy definition to get filled in later + // when parsing the code section. + def: Definition::Local(FunctionBody::new(0, &[])), + ty, + }); + } + } + + Payload::CodeSectionStart { .. } => {} + Payload::CodeSectionEntry(body) => { + self.funcs[next_code_index].def = Definition::Local(body); + next_code_index += 1; + } + + // Ignore all custom sections except for the `name` section + // which we parse, but ignore errors within. + Payload::CustomSection(s) => { + if s.name() == "name" { + drop(self.parse_name_section(&s)); + } + } + + // sections that shouldn't appear in the specially-crafted core wasm + // adapter self we're processing + Payload::DataCountSection { .. } + | Payload::ElementSection(_) + | Payload::DataSection(_) + | Payload::StartSection { .. } + | Payload::TagSection(_) + | Payload::UnknownSection { .. } => { + bail!("unsupported section found in adapter module") + } + + // component-model related things that shouldn't show up + Payload::ModuleSection { .. } + | Payload::ComponentSection { .. } + | Payload::InstanceSection(_) + | Payload::ComponentInstanceSection(_) + | Payload::ComponentAliasSection(_) + | Payload::ComponentCanonicalSection(_) + | Payload::ComponentStartSection(_) + | Payload::ComponentImportSection(_) + | Payload::CoreTypeSection(_) + | Payload::ComponentExportSection(_) + | Payload::ComponentTypeSection(_) => { + bail!("component section found in adapter module") + } + } + } + + Ok(()) + } + + fn parse_name_section(&mut self, section: &CustomSectionReader<'a>) -> Result<()> { + let section = NameSectionReader::new(section.data(), section.data_offset())?; + for s in section { + match s? { + Name::Function(map) => { + let mut map = map.get_map()?; + for _ in 0..map.get_count() { + let naming = map.read()?; + self.func_names.insert(naming.index, naming.name); + } + } + Name::Global(map) => { + let mut map = map.get_map()?; + for _ in 0..map.get_count() { + let naming = map.read()?; + self.global_names.insert(naming.index, naming.name); + } + } + _ => {} + } + } + Ok(()) + } + + /// Iteratively calculates the set of live items within this module + /// considering all exports as the root of live functions. + fn liveness(&mut self) -> Result<()> { + let exports = mem::take(&mut self.exports); + for (_, e) in exports.iter() { + match e.kind { + ExternalKind::Func => self.func(e.index), + ExternalKind::Global => self.global(e.index), + ExternalKind::Table => self.table(e.index), + ExternalKind::Memory => self.memory(e.index), + ExternalKind::Tag => bail!("unsupported exported tag"), + } + } + self.exports = exports; + + while let Some((idx, func)) = self.worklist.pop() { + func(self, idx)?; + } + Ok(()) + } + + fn func(&mut self, func: u32) { + if !self.live_funcs.insert(func) { + return; + } + self.worklist.push((func, |me, func| { + let func = &me.funcs[func as usize]; + me.live_types.insert(func.ty); + let mut body = match &func.def { + Definition::Import(..) => return Ok(()), + Definition::Local(e) => e.get_binary_reader(), + }; + let local_count = body.read_var_u32()?; + for _ in 0..local_count { + body.read_var_u32()?; + body.read_val_type()?; + } + me.operators(body) + })); + } + + fn global(&mut self, global: u32) { + if !self.live_globals.insert(global) { + return; + } + self.worklist.push((global, |me, global| { + let init = match &me.globals[global as usize].def { + Definition::Import(..) => return Ok(()), + Definition::Local(e) => e, + }; + me.operators(init.get_binary_reader()) + })); + } + + fn table(&mut self, table: u32) { + self.live_tables.insert(table); + } + + fn memory(&mut self, memory: u32) { + self.live_memories.insert(memory); + } + + fn blockty(&mut self, ty: BlockType) { + if let BlockType::FuncType(ty) = ty { + self.live_types.insert(ty); + } + } + + fn operators(&mut self, mut reader: BinaryReader<'a>) -> Result<()> { + while !reader.eof() { + reader.visit_operator(self)?; + } + Ok(()) + } + + fn live_types(&self) -> impl Iterator + '_ { + live_iter(&self.live_types, self.types.iter()) + } + + fn live_funcs(&self) -> impl Iterator)> + '_ { + live_iter(&self.live_funcs, self.funcs.iter()) + } + + fn live_memories(&self) -> impl Iterator)> + '_ { + live_iter(&self.live_memories, self.memories.iter()) + } + + fn live_globals(&self) -> impl Iterator)> + '_ { + live_iter(&self.live_globals, self.globals.iter()) + } + + fn live_tables(&self) -> impl Iterator)> + '_ { + live_iter(&self.live_tables, self.tables.iter()) + } + + /// Encodes this `Module` to a new wasm module which is gc'd and only + /// contains the items that are live as calculated by the `liveness` pass. + fn encode(&mut self) -> Result> { + // Data structure used to track the mapping of old index to new index + // for all live items. + let mut map = Encoder::default(); + + // Sections that will be assembled into the final module at the end of + // this function. + let mut types = wasm_encoder::TypeSection::new(); + let mut imports = wasm_encoder::ImportSection::new(); + let mut funcs = wasm_encoder::FunctionSection::new(); + let mut tables = wasm_encoder::TableSection::new(); + let mut memories = wasm_encoder::MemorySection::new(); + let mut globals = wasm_encoder::GlobalSection::new(); + let mut code = wasm_encoder::CodeSection::new(); + + let mut empty_type = None; + for (i, ty) in self.live_types() { + map.types.push(i); + match ty { + Type::Func(ty) => { + types.function( + ty.params().iter().copied().map(valty), + ty.results().iter().copied().map(valty), + ); + + // Keep track of the "empty type" to see if we can reuse an + // existing one or one needs to be injected if a `start` + // function is calculated at the end. + if ty.params().len() == 0 && ty.results().len() == 0 { + empty_type = Some(map.types.remap(i)); + } + } + } + } + + let mut num_memories = 0; + for (i, mem) in self.live_memories() { + map.memories.push(i); + let ty = wasm_encoder::MemoryType { + minimum: mem.ty.initial, + maximum: mem.ty.maximum, + shared: mem.ty.shared, + memory64: mem.ty.memory64, + }; + match &mem.def { + Definition::Import(m, n) => { + imports.import(m, n, ty); + } + Definition::Local(()) => { + memories.memory(ty); + } + } + num_memories += 1; + } + + for (i, table) in self.live_tables() { + map.tables.push(i); + let ty = wasm_encoder::TableType { + minimum: table.ty.initial, + maximum: table.ty.maximum, + element_type: valty(table.ty.element_type), + }; + match &table.def { + Definition::Import(m, n) => { + imports.import(m, n, ty); + } + Definition::Local(()) => { + tables.table(ty); + } + } + } + + for (i, global) in self.live_globals() { + map.globals.push(i); + let ty = wasm_encoder::GlobalType { + mutable: global.ty.mutable, + val_type: valty(global.ty.content_type), + }; + match &global.def { + Definition::Import(m, n) => { + imports.import(m, n, ty); + } + Definition::Local(init) => { + let mut bytes = map.operators(init.get_binary_reader())?; + assert_eq!(bytes.pop(), Some(0xb)); + globals.global(ty, &wasm_encoder::ConstExpr::raw(bytes)); + } + } + } + + // For functions first assign a new index to all functions and then + // afterwards actually map the body of all functions so the `map` of all + // index mappings is fully populated before instructions are mapped. + let mut num_funcs = 0; + for (i, func) in self.live_funcs() { + map.funcs.push(i); + let ty = map.types.remap(func.ty); + match &func.def { + Definition::Import(m, n) => { + imports.import(m, n, EntityType::Function(ty)); + } + Definition::Local(_) => { + funcs.function(ty); + } + } + num_funcs += 1; + } + + for (_, func) in self.live_funcs() { + let mut body = match &func.def { + Definition::Import(..) => continue, + Definition::Local(body) => body.get_binary_reader(), + }; + let mut locals = Vec::new(); + for _ in 0..body.read_var_u32()? { + let cnt = body.read_var_u32()?; + let ty = body.read_val_type()?; + locals.push((cnt, valty(ty))); + } + let mut func = wasm_encoder::Function::new(locals); + let bytes = map.operators(body)?; + func.raw(bytes); + code.function(&func); + } + + // Inject a start function to initialize the stack pointer which will be + // local to this module. This only happens if a memory is preserved and + // a stack pointer global is found. + let mut start = None; + let sp = self.find_stack_pointer()?; + if let Some(sp) = sp { + if num_memories > 0 { + use wasm_encoder::Instruction::*; + + // If there are any memories or any mutable globals there must be + // precisely one of each as otherwise we don't know how to filter + // down to the right one. + if num_memories != 1 { + bail!("adapter modules don't support multi-memory"); + } + + let sp = map.globals.remap(sp); + + // Generate a function type for this start function, adding a new + // function type to the module if necessary. + let empty_type = empty_type.unwrap_or_else(|| { + types.function([], []); + types.len() - 1 + }); + funcs.function(empty_type); + + let mut func = wasm_encoder::Function::new([(1, wasm_encoder::ValType::I32)]); + // Grow the memory by 1 page to allocate ourselves some stack space. + func.instruction(&I32Const(1)); + func.instruction(&MemoryGrow(0)); + func.instruction(&LocalTee(0)); + + // Test if the return value of the growth was -1 and trap if so + // since we don't have a stack page. + func.instruction(&I32Const(-1)); + func.instruction(&I32Eq); + func.instruction(&If(wasm_encoder::BlockType::Empty)); + func.instruction(&Unreachable); + func.instruction(&End); + + // Set our stack pointer to the top of the page we were given, which + // is the page index times the page size plus the size of a page. + func.instruction(&LocalGet(0)); + func.instruction(&I32Const(1)); + func.instruction(&I32Add); + func.instruction(&I32Const(16)); + func.instruction(&I32Shl); + func.instruction(&GlobalSet(sp)); + func.instruction(&End); + code.function(&func); + + start = Some(wasm_encoder::StartSection { + function_index: num_funcs, + }); + } + } + + // Sanity-check the shape of the module since some parts won't work if + // this fails. Note that during parsing we've already validated there + // are no data segments or element segments. + + // Shouldn't have any tables if there are no element segments since + // otherwise there's no meaning to a defined or imported table. + if self.live_tables().count() != 0 { + bail!("tables should not be present in the final adapter module"); + } + + // multi-memory should not be enabled and if any memory it should be + // imported. + if self.live_memories().count() > 1 { + bail!("the adapter module should not use multi-memory"); + } + if !memories.is_empty() { + bail!("locally-defined memories are not allowed define a local memory"); + } + + let mut ret = wasm_encoder::Module::default(); + if !types.is_empty() { + ret.section(&types); + } + if !imports.is_empty() { + ret.section(&imports); + } + if !funcs.is_empty() { + ret.section(&funcs); + } + if !tables.is_empty() { + ret.section(&tables); + } + if !memories.is_empty() { + ret.section(&memories); + } + if !globals.is_empty() { + ret.section(&globals); + } + + if !self.exports.is_empty() { + let mut exports = wasm_encoder::ExportSection::new(); + for (_, export) in self.exports.iter() { + let (kind, index) = match export.kind { + ExternalKind::Func => ( + wasm_encoder::ExportKind::Func, + map.funcs.remap(export.index), + ), + ExternalKind::Table => ( + wasm_encoder::ExportKind::Table, + map.tables.remap(export.index), + ), + ExternalKind::Memory => ( + wasm_encoder::ExportKind::Memory, + map.memories.remap(export.index), + ), + ExternalKind::Global => ( + wasm_encoder::ExportKind::Global, + map.globals.remap(export.index), + ), + kind => bail!("unsupported export kind {kind:?}"), + }; + exports.export(export.name, kind, index); + } + ret.section(&exports); + } + + if let Some(start) = &start { + ret.section(start); + } + + if !code.is_empty() { + ret.section(&code); + } + + // Append a custom `name` section using the names of the functions that + // were found prior to the GC pass in the original module. + let mut func_names = Vec::new(); + let mut global_names = Vec::new(); + for (i, _func) in self.live_funcs() { + let name = match self.func_names.get(&i) { + Some(name) => name, + None => continue, + }; + func_names.push((map.funcs.remap(i), *name)); + } + if start.is_some() { + func_names.push((num_funcs, "initialize_stack_pointer")); + } + for (i, _global) in self.live_globals() { + let name = match self.global_names.get(&i) { + Some(name) => name, + None => continue, + }; + global_names.push((map.globals.remap(i), *name)); + } + let mut section = Vec::new(); + let mut encode_subsection = |code: u8, names: &[(u32, &str)]| { + if names.is_empty() { + return; + } + let mut subsection = Vec::new(); + names.len().encode(&mut subsection); + for (i, name) in names { + i.encode(&mut subsection); + name.encode(&mut subsection); + } + section.push(code); + subsection.encode(&mut section); + }; + encode_subsection(0x01, &func_names); + encode_subsection(0x07, &global_names); + if !section.is_empty() { + ret.section(&wasm_encoder::CustomSection { + name: "name", + data: §ion, + }); + } + + Ok(ret.finish()) + } + + fn find_stack_pointer(&self) -> Result> { + let mutable_i32_globals = self + .live_globals() + .filter(|(_, g)| g.ty.mutable && g.ty.content_type == ValType::I32) + .collect::>(); + + // If there are no 32-bit mutable globals then there's definitely no + // stack pointer in this module + if mutable_i32_globals.is_empty() { + return Ok(None); + } + + // If there are some mutable 32-bit globals then there's currently no + // great way of determining which is the stack pointer. For now a hack + // is used where we use the name section to find the name that LLVM + // injects. This hopefully can be improved in the future. + let stack_pointers = mutable_i32_globals + .iter() + .filter_map(|(i, _)| { + let name = *self.global_names.get(&i)?; + if name == "__stack_pointer" { + Some(*i) + } else { + None + } + }) + .collect::>(); + + match stack_pointers.len() { + 0 => Ok(None), + 1 => Ok(Some(stack_pointers[0])), + n => bail!("found {n} globals that look like the stack pointer"), + } + } +} + +// This helper macro is used to define a visitor of all instructions with +// special handling for all payloads of instructions to mark any referenced +// items live. +// +// Currently item identification happesn through the field name of the payload. +// While not exactly the most robust solution this should work well enough for +// now. +macro_rules! define_visit { + ($(@$p:ident $op:ident $({ $($arg:ident: $argty:ty),* })? => $visit:ident)*) => { + $( + fn $visit(&mut self, _offset: usize $(, $($arg: $argty),*)?) { + $( + $( + define_visit!(mark_live self $arg $arg); + )* + )? + } + )* + }; + + (mark_live $self:ident $arg:ident type_index) => {$self.live_types.insert($arg);}; + (mark_live $self:ident $arg:ident src_table) => {$self.table($arg);}; + (mark_live $self:ident $arg:ident dst_table) => {$self.table($arg);}; + (mark_live $self:ident $arg:ident table_index) => {$self.table($arg);}; + (mark_live $self:ident $arg:ident table) => {$self.table($arg);}; + (mark_live $self:ident $arg:ident table_index) => {$self.table($arg);}; + (mark_live $self:ident $arg:ident global_index) => {$self.global($arg);}; + (mark_live $self:ident $arg:ident function_index) => {$self.func($arg);}; + (mark_live $self:ident $arg:ident mem) => {$self.memory($arg);}; + (mark_live $self:ident $arg:ident src_mem) => {$self.memory($arg);}; + (mark_live $self:ident $arg:ident dst_mem) => {$self.memory($arg);}; + (mark_live $self:ident $arg:ident memarg) => {$self.memory($arg.memory);}; + (mark_live $self:ident $arg:ident blockty) => {$self.blockty($arg);}; + (mark_live $self:ident $arg:ident lane) => {}; + (mark_live $self:ident $arg:ident lanes) => {}; + (mark_live $self:ident $arg:ident flags) => {}; + (mark_live $self:ident $arg:ident value) => {}; + (mark_live $self:ident $arg:ident mem_byte) => {}; + (mark_live $self:ident $arg:ident table_byte) => {}; + (mark_live $self:ident $arg:ident local_index) => {}; + (mark_live $self:ident $arg:ident relative_depth) => {}; + (mark_live $self:ident $arg:ident tag_index) => {}; + (mark_live $self:ident $arg:ident targets) => {}; + (mark_live $self:ident $arg:ident ty) => {}; + (mark_live $self:ident $arg:ident data_index) => {}; + (mark_live $self:ident $arg:ident elem_index) => {}; +} + +impl<'a> VisitOperator<'a> for Module<'a> { + type Output = (); + + wasmparser::for_each_operator!(define_visit); +} + +/// Helper function to filter `iter` based on the `live` set, yielding an +/// iterator over the index of the item that's live as well as the item itself. +fn live_iter<'a, T>( + live: &'a BitVec, + iter: impl Iterator + 'a, +) -> impl Iterator + 'a { + iter.enumerate().filter_map(|(i, t)| { + let i = i as u32; + if live.contains(i) { + Some((i, t)) + } else { + None + } + }) +} + +#[derive(Default)] +struct Encoder { + types: Remap, + funcs: Remap, + memories: Remap, + globals: Remap, + tables: Remap, + buf: Vec, +} + +impl Encoder { + fn operators(&mut self, mut reader: BinaryReader<'_>) -> Result> { + assert!(self.buf.is_empty()); + while !reader.eof() { + reader.visit_operator(self)?; + } + Ok(mem::take(&mut self.buf)) + } + + fn memarg(&self, ty: MemArg) -> wasm_encoder::MemArg { + wasm_encoder::MemArg { + offset: ty.offset, + align: ty.align.into(), + memory_index: self.memories.remap(ty.memory), + } + } + + fn blockty(&self, ty: BlockType) -> wasm_encoder::BlockType { + match ty { + BlockType::Empty => wasm_encoder::BlockType::Empty, + BlockType::Type(ty) => wasm_encoder::BlockType::Result(valty(ty)), + BlockType::FuncType(ty) => wasm_encoder::BlockType::FunctionType(self.types.remap(ty)), + } + } +} + +// This is a helper macro to translate all `wasmparser` instructions to +// `wasm-encoder` instructions without having to list out every single +// instruction itself. +// +// The general goal of this macro is to have O(unique instruction payload) +// number of cases while also simultaneously adapting between the styles of +// wasmparser and wasm-encoder. +macro_rules! define_encode { + ($(@$p:ident $op:ident $({ $($arg:ident: $argty:ty),* })? => $visit:ident)*) => { + $( + fn $visit(&mut self, _offset: usize $(, $($arg: $argty),*)?) { + #[allow(unused_imports)] + use wasm_encoder::Instruction::*; + $( + $( + let $arg = define_encode!(map self $arg $arg); + )* + )? + let insn = define_encode!(mk $op $($($arg)*)?); + insn.encode(&mut self.buf); + } + )* + }; + + // No-payload instructions are named the same in wasmparser as they are in + // wasm-encoder + (mk $op:ident) => ($op); + + // Instructions which need "special care" to map from wasmparser to + // wasm-encoder + (mk BrTable $arg:ident) => ({ + BrTable($arg.0, $arg.1) + }); + (mk CallIndirect $ty:ident $table:ident $table_byte:ident) => ({ + drop($table_byte); + CallIndirect { ty: $ty, table: $table } + }); + (mk ReturnCallIndirect $ty:ident $table:ident) => ( + ReturnCallIndirect { ty: $ty, table: $table } + ); + (mk MemorySize $mem:ident $mem_byte:ident) => ({ + drop($mem_byte); + MemorySize($mem) + }); + (mk MemoryGrow $mem:ident $mem_byte:ident) => ({ + drop($mem_byte); + MemoryGrow($mem) + }); + (mk I32Const $v:ident) => (I32Const($v)); + (mk I64Const $v:ident) => (I64Const($v)); + (mk F32Const $v:ident) => (F32Const(f32::from_bits($v.bits()))); + (mk F64Const $v:ident) => (F64Const(f64::from_bits($v.bits()))); + (mk V128Const $v:ident) => (V128Const($v.i128())); + + // Catch-all for the translation of one payload argument which is typically + // represented as a tuple-enum in wasm-encoder. + (mk $op:ident $arg:ident) => ($op($arg)); + + // Catch-all of everything else where the wasmparser fields are simply + // translated to wasm-encoder fields. + (mk $op:ident $($arg:ident)*) => ($op { $($arg),* }); + + // Individual cases of mapping one argument type to another, similar to the + // `define_visit` macro above. + (map $self:ident $arg:ident memarg) => {$self.memarg($arg)}; + (map $self:ident $arg:ident blockty) => {$self.blockty($arg)}; + (map $self:ident $arg:ident tag_index) => {$arg}; + (map $self:ident $arg:ident relative_depth) => {$arg}; + (map $self:ident $arg:ident function_index) => {$self.funcs.remap($arg)}; + (map $self:ident $arg:ident global_index) => {$self.globals.remap($arg)}; + (map $self:ident $arg:ident mem) => {$self.memories.remap($arg)}; + (map $self:ident $arg:ident src_mem) => {$self.memories.remap($arg)}; + (map $self:ident $arg:ident dst_mem) => {$self.memories.remap($arg)}; + (map $self:ident $arg:ident table) => {$self.tables.remap($arg)}; + (map $self:ident $arg:ident table_index) => {$self.tables.remap($arg)}; + (map $self:ident $arg:ident src_table) => {$self.tables.remap($arg)}; + (map $self:ident $arg:ident dst_table) => {$self.tables.remap($arg)}; + (map $self:ident $arg:ident type_index) => {$self.types.remap($arg)}; + (map $self:ident $arg:ident ty) => {valty($arg)}; + (map $self:ident $arg:ident local_index) => {$arg}; + (map $self:ident $arg:ident lane) => {$arg}; + (map $self:ident $arg:ident lanes) => {$arg}; + (map $self:ident $arg:ident elem_index) => {$arg}; + (map $self:ident $arg:ident data_index) => {$arg}; + (map $self:ident $arg:ident table_byte) => {$arg}; + (map $self:ident $arg:ident mem_byte) => {$arg}; + (map $self:ident $arg:ident value) => {$arg}; + (map $self:ident $arg:ident targets) => (( + $arg.targets().map(|i| i.unwrap()).collect::>().into(), + $arg.default(), + )); +} + +impl<'a> VisitOperator<'a> for Encoder { + type Output = (); + + wasmparser::for_each_operator!(define_encode); +} + +fn valty(ty: wasmparser::ValType) -> wasm_encoder::ValType { + match ty { + wasmparser::ValType::I32 => wasm_encoder::ValType::I32, + wasmparser::ValType::I64 => wasm_encoder::ValType::I64, + wasmparser::ValType::F32 => wasm_encoder::ValType::F32, + wasmparser::ValType::F64 => wasm_encoder::ValType::F64, + wasmparser::ValType::V128 => wasm_encoder::ValType::V128, + wasmparser::ValType::FuncRef => wasm_encoder::ValType::FuncRef, + wasmparser::ValType::ExternRef => wasm_encoder::ValType::ExternRef, + } +} + +// Minimal definition of a bit vector necessary for the liveness calculations +// above. +mod bitvec { + use std::mem; + + type T = u64; + + #[derive(Default)] + pub struct BitVec { + bits: Vec, + } + + impl BitVec { + /// Inserts `idx` into this bit vector, returning whether it was not + /// previously present. + pub fn insert(&mut self, idx: u32) -> bool { + let (idx, bit) = idx_bit(idx); + match self.bits.get_mut(idx) { + Some(bits) => { + if *bits & bit != 0 { + return false; + } + *bits |= bit; + } + None => { + self.bits.resize(idx + 1, 0); + self.bits[idx] = bit; + } + } + true + } + + /// Returns whether this bit vector contains the specified `idx`th bit. + pub fn contains(&self, idx: u32) -> bool { + let (idx, bit) = idx_bit(idx); + match self.bits.get(idx) { + Some(bits) => (*bits & bit) != 0, + None => false, + } + } + } + + fn idx_bit(idx: u32) -> (usize, T) { + let idx = idx as usize; + let size = mem::size_of::() * 8; + let index = idx / size; + let bit = 1 << (idx % size); + (index, bit) + } +} + +/// Small data structure used to track index mappings from an old index space to +/// a new. +#[derive(Default)] +struct Remap { + /// Map, indexed by the old index set, to the new index set. + /// + /// Placeholders of `u32::MAX` means that the old index is not present in + /// the new index space. + map: Vec, + /// The next available index in the new index space. + next: u32, +} + +impl Remap { + /// Appends a new live "old index" into this remapping structure. + /// + /// This will assign a new index for the old index provided. This method + /// must be called in increasing order of old indexes. + fn push(&mut self, old: u32) { + self.map.resize(old as usize, u32::MAX); + self.map.push(self.next); + self.next += 1; + } + + /// Returns the new index corresponding to an old index. + /// + /// Panics if the `old` index was not added via `push` above. + fn remap(&self, old: u32) -> u32 { + let ret = self.map[old as usize]; + assert!(ret != u32::MAX); + return ret; + } +} diff --git a/crates/wit-component/src/lib.rs b/crates/wit-component/src/lib.rs index 2036fab43..a1f51744e 100644 --- a/crates/wit-component/src/lib.rs +++ b/crates/wit-component/src/lib.rs @@ -11,6 +11,7 @@ use wit_parser::Interface; pub mod cli; mod decoding; mod encoding; +mod gc; mod printing; mod validation; diff --git a/crates/wit-component/src/validation.rs b/crates/wit-component/src/validation.rs index 9533b7db9..d42fc16ce 100644 --- a/crates/wit-component/src/validation.rs +++ b/crates/wit-component/src/validation.rs @@ -10,10 +10,6 @@ use wit_parser::{ Interface, }; -fn is_wasi(name: &str) -> bool { - name == "wasi_unstable" || name == "wasi_snapshot_preview1" -} - fn is_canonical_function(name: &str) -> bool { name.starts_with("cabi_") } @@ -43,19 +39,58 @@ fn wasm_sig_to_func_type(signature: WasmSignature) -> FuncType { ) } +/// Metadata about a validated module and what was found internally. +/// +/// All imports to the module are described by the union of `required_imports` +/// and `adapters_required`. +/// +/// This structure is created by the `validate_module` function. +#[derive(Default)] +pub struct ValidatedModule<'a> { + /// The required imports into this module which are to be satisfied by + /// imported component model instances. + /// + /// The key of this map is the name of the interface that the module imports + /// from and the value is the set of functions required from that interface. + /// This is used to generate an appropriate instance import in the generated + /// component which imports only the set of required functions. + pub required_imports: IndexMap<&'a str, IndexSet<&'a str>>, + + /// This is the set of imports into the module which were not satisfied by + /// imported interfaces but are required to be satisfied by adapter modules. + /// + /// The key of this map is the name of the adapter that was imported into + /// the module and the value is a further map from function to function type + /// as required by this module. This map is used to shrink adapter modules + /// to the precise size required for this module by ensuring it doesn't + /// export (and subsequently import) extraneous functions. + pub adapters_required: IndexMap<&'a str, IndexMap<&'a str, FuncType>>, + + /// Whether or not this module exported a linear memory. + pub has_memory: bool, + + /// Whether or not this module exported a `cabi_realloc` function. + pub has_realloc: bool, +} + /// This function validates the following: -/// * The bytes represent a core WebAssembly module. -/// * The module's imports are all satisfied by the given import interfaces. -/// * The given default and exported interfaces are satisfied by the module's exports. /// -/// Returns a tuple of the set of imported interfaces required by the module, whether -/// the module exports a memory, and whether the module exports a realloc function. +/// * The `bytes` represent a valid core WebAssembly module. +/// * The module's imports are all satisfied by the given `imports` interfaces +/// or the `adapters` set. +/// * The given default and exported interfaces are satisfied by the module's +/// exports. +/// +/// The `ValidatedModule` return value contains the metadata which describes the +/// input module on success. This is then further used to generate a component +/// for this module. pub fn validate_module<'a>( bytes: &'a [u8], interface: &Option<&Interface>, imports: &[Interface], exports: &[Interface], -) -> Result<(IndexMap<&'a str, IndexSet<&'a str>>, bool, bool)> { + adapters: &IndexSet<&str>, +) -> Result> { let imports: IndexMap<&str, &Interface> = imports.iter().map(|i| (i.name.as_str(), i)).collect(); let exports: IndexMap<&str, &Interface> = @@ -65,8 +100,7 @@ pub fn validate_module<'a>( let mut types = None; let mut import_funcs = IndexMap::new(); let mut export_funcs = IndexMap::new(); - let mut has_memory = false; - let mut has_realloc = false; + let mut ret = ValidatedModule::default(); for payload in Parser::new(0).parse_all(bytes) { let payload = payload?; @@ -82,9 +116,6 @@ pub fn validate_module<'a>( Payload::ImportSection(s) => { for import in s { let import = import?; - if is_wasi(import.module) { - continue; - } match import.ty { TypeRef::Func(ty) => { let map = match import_funcs.entry(import.module) { @@ -107,7 +138,7 @@ pub fn validate_module<'a>( if is_canonical_function(export.name) { if export.name == "cabi_realloc" { // TODO: validate that the cabi_realloc function is [i32, i32, i32, i32] -> [i32] - has_realloc = true; + ret.has_realloc = true; } continue; } @@ -116,7 +147,7 @@ pub fn validate_module<'a>( } ExternalKind::Memory => { if export.name == "memory" { - has_memory = true; + ret.has_memory = true; } } _ => continue, @@ -137,6 +168,16 @@ pub fn validate_module<'a>( match imports.get(name) { Some(interface) => { validate_imported_interface(interface, name, funcs, &types)?; + let funcs = funcs.into_iter().map(|(f, _ty)| *f).collect(); + let prev = ret.required_imports.insert(name, funcs); + assert!(prev.is_none()); + } + None if adapters.contains(name) => { + let map = ret.adapters_required.entry(name).or_insert(IndexMap::new()); + for (func, ty) in funcs { + let ty = types.func_type_at(*ty).unwrap(); + map.insert(func, ty.clone()); + } } None => bail!("module requires an import interface named `{}`", name), } @@ -154,22 +195,182 @@ pub fn validate_module<'a>( validate_exported_interface(interface, Some(name), &export_funcs, &types)?; } - Ok(( - import_funcs - .into_iter() - .map(|(name, funcs)| (name, funcs.into_iter().map(|(f, _ty)| f).collect())) - .collect(), - has_memory, - has_realloc, - )) + Ok(ret) } -fn validate_imported_interface( - interface: &Interface, +/// Validation information from an "adapter module" which is distinct from a +/// "main module" validated above. +/// +/// This is created by the `validate_adapter_module` function. +#[derive(Default, Debug)] +pub struct ValidatedAdapter<'a> { + /// If specified then this is the name of the required interface imported + /// into the adapter module. + /// + /// At this time only one interface import is supported. If this is `None` + /// then the adapter module didn't import any component model functions to + /// implement the required functionality. + pub required_import: Option<&'a str>, + + /// This is the set of required functions imported from `required_import`, + /// if `required_import` is specified. + pub required_funcs: IndexSet<&'a str>, + + /// This is the module and field name of the memory import, if one is + /// specified. + /// + /// Due to LLVM codegen this is typically `env::memory` as a totally separte + /// import from the `required_import` above. + pub needs_memory: Option<(String, String)>, + + /// Flag for whether a `cabi_realloc` function was found within this module. + pub has_realloc: bool, +} + +/// This function will validate the `bytes` provided as a wasm adapter module. +/// Notably this will validate the wasm module itself in addition to ensuring +/// that it has the "shape" of an adapter module. Current constraints are: +/// +/// * The adapter module can import only one memory +/// * The adapter module can only import from the name of `interface` specified, +/// and all function imports must match the `required` types which correspond +/// to the lowered types of the functions in `interface`. +/// +/// The wasm module passed into this function is the output of the GC pass of an +/// adapter module's original source. This means that the adapter module is +/// already minimized and this is a double-check that the minimization pass +/// didn't accidentally break the wasm module. +pub fn validate_adapter_module<'a>( + bytes: &[u8], + interface: &'a Interface, + required: &IndexMap<&str, FuncType>, +) -> Result> { + let mut validator = Validator::new(); + let mut import_funcs = IndexMap::new(); + let mut export_funcs = IndexMap::new(); + let mut types = None; + let mut funcs = Vec::new(); + let mut ret = ValidatedAdapter::default(); + + for payload in Parser::new(0).parse_all(bytes) { + let payload = payload?; + match validator.payload(&payload)? { + ValidPayload::End(tys) => { + types = Some(tys); + break; + } + ValidPayload::Func(validator, body) => { + funcs.push((validator, body)); + } + _ => {} + } + + match payload { + Payload::Version { encoding, .. } if encoding != Encoding::Module => { + bail!("data is not a WebAssembly module"); + } + + Payload::ImportSection(s) => { + for import in s { + let import = import?; + match import.ty { + TypeRef::Func(ty) => { + let map = match import_funcs.entry(import.module) { + Entry::Occupied(e) => e.into_mut(), + Entry::Vacant(e) => e.insert(IndexMap::new()), + }; + + assert!(map.insert(import.name, ty).is_none()); + } + + // A memory is allowed to be imported into the adapter + // module so that's skipped here + TypeRef::Memory(_) => { + ret.needs_memory = + Some((import.module.to_string(), import.name.to_string())); + } + + _ => { + bail!("adapter module is only allowed to import functions and memories") + } + } + } + } + Payload::ExportSection(s) => { + for export in s { + let export = export?; + + match export.kind { + ExternalKind::Func => { + export_funcs.insert(export.name, export.index); + if export.name == "cabi_realloc" { + ret.has_realloc = true; + } + } + _ => continue, + } + } + } + _ => continue, + } + } + + let mut resources = Default::default(); + for (validator, body) in funcs { + let mut validator = validator.into_validator(resources); + validator.validate(&body)?; + resources = validator.into_allocations(); + } + + let types = types.unwrap(); + let mut import_funcs = import_funcs.iter(); + if let Some((name, funcs)) = import_funcs.next() { + if *name != interface.name { + bail!( + "adapter module imports from `{name}` which does not match \ + its interface `{}`", + interface.name + ); + } + ret.required_funcs = validate_imported_interface(interface, name, funcs, &types)?; + ret.required_import = Some(interface.name.as_str()); + + if let Some((name, _)) = import_funcs.next() { + bail!("adapter module cannot import from a second interface `{name}`") + } + } + + for (name, ty) in required { + let idx = match export_funcs.get(name) { + Some(idx) => *idx, + None => bail!("adapter module did not export `{name}`"), + }; + let actual = types.function_at(idx).unwrap(); + if ty == actual { + continue; + } + bail!( + "adapter module export `{name}` does not match the expected signature:\n\ + expected: {:?} -> {:?}\n\ + actual: {:?} -> {:?}\n\ + ", + ty.params(), + ty.results(), + actual.params(), + actual.results(), + ); + } + + Ok(ret) +} + +fn validate_imported_interface<'a>( + interface: &'a Interface, name: &str, imports: &IndexMap<&str, u32>, types: &Types, -) -> Result<()> { +) -> Result> { + let mut funcs = IndexSet::new(); for (func_name, ty) in imports { let f = interface .functions @@ -187,18 +388,20 @@ fn validate_imported_interface( let ty = types.func_type_at(*ty).unwrap(); if ty != &expected { bail!( - "type mismatch for function `{}` on imported interface `{}`: expected `{:?} -> {:?}` but found `{:?} -> {:?}`", - func_name, - name, - expected.params(), - expected.results(), - ty.params(), - ty.results() - ); + "type mismatch for function `{}` on imported interface `{}`: expected `{:?} -> {:?}` but found `{:?} -> {:?}`", + f.name, + name, + expected.params(), + expected.results(), + ty.params(), + ty.results() + ); } + + funcs.insert(f.name.as_str()); } - Ok(()) + Ok(funcs) } fn validate_exported_interface( diff --git a/crates/wit-component/tests/components.rs b/crates/wit-component/tests/components.rs index c95429327..ded443b0b 100644 --- a/crates/wit-component/tests/components.rs +++ b/crates/wit-component/tests/components.rs @@ -27,6 +27,32 @@ fn read_interfaces(dir: &Path, pattern: &str) -> Result> { .collect::>() } +fn read_adapters(dir: &Path) -> Result, Interface)>> { + glob::glob(dir.join("adapt-*.wat").to_str().unwrap())? + .map(|p| { + let p = p?; + let adapter = + wat::parse_file(&p).with_context(|| format!("expected file `{}`", p.display()))?; + let stem = p.file_stem().unwrap().to_str().unwrap(); + let glob = format!("{stem}-import-*.wit"); + let wit = match glob::glob(dir.join(&glob).to_str().unwrap())?.next() { + Some(path) => path?, + None => bail!("failed to find `{glob}` match"), + }; + let mut i = read_interface(&wit)?; + i.name = wit + .file_stem() + .unwrap() + .to_str() + .unwrap() + .trim_start_matches(stem) + .trim_start_matches("-import-") + .to_string(); + Ok((stem.trim_start_matches("adapt-").to_string(), adapter, i)) + }) + .collect::>() +} + /// Tests the encoding of components. /// /// This test looks in the `components/` directory for test cases. @@ -53,6 +79,8 @@ fn read_interfaces(dir: &Path, pattern: &str) -> Result> { /// either `component.wat` or `error.txt` depending on the outcome of the encoding. #[test] fn component_encoding() -> Result<()> { + drop(env_logger::try_init()); + for entry in fs::read_dir("tests/components")? { let path = entry?.path(); if !path.is_dir() { @@ -74,6 +102,7 @@ fn component_encoding() -> Result<()> { .transpose()?; let imports = read_interfaces(&path, "import-*.wit")?; let exports = read_interfaces(&path, "export-*.wit")?; + let adapters = read_adapters(&path)?; let mut encoder = ComponentEncoder::default() .module(&module) @@ -81,6 +110,10 @@ fn component_encoding() -> Result<()> { .exports(&exports) .validate(true); + for (name, wasm, interface) in adapters.iter() { + encoder = encoder.adapter(name, wasm, interface); + } + if let Some(interface) = &interface { encoder = encoder.interface(interface); } diff --git a/crates/wit-component/tests/components/adapt-inject-stack/adapt-old-import-new.wit b/crates/wit-component/tests/components/adapt-inject-stack/adapt-old-import-new.wit new file mode 100644 index 000000000..cf41681a2 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-inject-stack/adapt-old-import-new.wit @@ -0,0 +1 @@ +get-two: func() -> (a: u32, b: u32) diff --git a/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat b/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat new file mode 100644 index 000000000..9699792d5 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat @@ -0,0 +1,54 @@ +(module + (import "new" "get-two" (func $get_two (param i32))) + (import "env" "memory" (memory 0)) + + (global $__stack_pointer (mut i32) i32.const 0) + (global $some_other_mutable_global (mut i32) i32.const 0) + + ;; This is a sample adapter which is adapting between ABI. This exact function + ;; signature is imported by `module.wat` and we're implementing it here with a + ;; canonical-abi function that returns two integers. The canonical ABI for + ;; returning two integers is different than the ABI of this function, hence + ;; the adapter here. + ;; + ;; The purpose of this test case is to exercise the `$__stack_pointer` global. + ;; The stack pointer here needs to be initialized to something valid for + ;; this adapter module which is done with an injected `start` function into + ;; this adapter module when it's bundled into a component. + (func (export "get_sum") (result i32) + (local i32 i32) + + ;; Allocate 8 bytes of stack space for the two u32 return values. The + ;; original stack pointer is saved in local 0 and the stack frame for this + ;; function is saved in local 1. + global.get $__stack_pointer + local.tee 0 + i32.const 8 + i32.sub + local.tee 1 + global.set $__stack_pointer + + ;; Call the imported function which will return two u32 values into the + ;; return pointer specified here, our stack frame. + local.get 1 + call $get_two + + ;; Compute the result of this function by adding together the two return + ;; values. + (i32.add + (i32.load (local.get 1)) + (i32.load offset=4 (local.get 1))) + + ;; Test that if there is another mutable global in this module that it + ;; doesn't affect the detection of the stack pointer. This extra mutable + ;; global should not be initialized or tampered with as part of the + ;; initialize-the-stack-pointer injected function + (global.set $some_other_mutable_global (global.get $some_other_mutable_global)) + + ;; Restore the stack pointer to the value it was at prior to entering this + ;; function. + local.get 0 + global.set $__stack_pointer + ) + +) diff --git a/crates/wit-component/tests/components/adapt-inject-stack/component.wat b/crates/wit-component/tests/components/adapt-inject-stack/component.wat new file mode 100644 index 000000000..f6d3df924 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-inject-stack/component.wat @@ -0,0 +1,124 @@ +(component + (core module (;0;) + (type (;0;) (func (param i32))) + (type (;1;) (func (result i32))) + (type (;2;) (func)) + (import "env" "memory" (memory (;0;) 0)) + (import "new" "get-two" (func $get_two (;0;) (type 0))) + (func (;1;) (type 1) (result i32) + (local i32 i32) + global.get $__stack_pointer + local.tee 0 + i32.const 8 + i32.sub + local.tee 1 + global.set $__stack_pointer + local.get 1 + call $get_two + local.get 1 + i32.load + local.get 1 + i32.load offset=4 + i32.add + global.get $some_other_mutable_global + global.set $some_other_mutable_global + local.get 0 + global.set $__stack_pointer + ) + (func $initialize_stack_pointer (;2;) (type 2) + (local i32) + i32.const 1 + memory.grow + local.tee 0 + i32.const -1 + i32.eq + if ;; label = @1 + unreachable + end + local.get 0 + i32.const 1 + i32.add + i32.const 16 + i32.shl + global.set $__stack_pointer + ) + (global $__stack_pointer (;0;) (mut i32) i32.const 0) + (global $some_other_mutable_global (;1;) (mut i32) i32.const 0) + (export "get_sum" (func 1)) + (start $initialize_stack_pointer) + ) + (type (;0;) (func (result "a" u32) (result "b" u32))) + (type (;1;) + (instance + (alias outer 1 0 (type (;0;))) + (export "get-two" (func (type 0))) + ) + ) + (import "new" (instance (;0;) (type 1))) + (core module (;1;) + (type (;0;) (func (result i32))) + (import "old" "get_sum" (func (;0;) (type 0))) + (memory (;0;) 1) + (export "memory" (memory 0)) + ) + (core module (;2;) + (type (;0;) (func (param i32))) + (type (;1;) (func (result i32))) + (func (;0;) (type 0) (param i32) + local.get 0 + i32.const 0 + call_indirect (type 0) + ) + (func (;1;) (type 1) (result i32) + i32.const 1 + call_indirect (type 1) + ) + (table (;0;) 2 2 funcref) + (export "0" (func 0)) + (export "1" (func 1)) + (export "$imports" (table 0)) + ) + (core module (;3;) + (type (;0;) (func (param i32))) + (type (;1;) (func (result i32))) + (import "" "0" (func (;0;) (type 0))) + (import "" "1" (func (;1;) (type 1))) + (import "" "$imports" (table (;0;) 2 2 funcref)) + (elem (;0;) (i32.const 0) func 0 1) + ) + (core instance (;0;) (instantiate 2)) + (alias core export 0 "1" (core func (;0;))) + (core instance (;1;) + (export "get_sum" (func 0)) + ) + (core instance (;2;) (instantiate 1 + (with "old" (instance 1)) + ) + ) + (alias core export 2 "memory" (core memory (;0;))) + (core instance (;3;) + (export "memory" (memory 0)) + ) + (alias core export 0 "0" (core func (;1;))) + (core instance (;4;) + (export "get-two" (func 1)) + ) + (core instance (;5;) (instantiate 0 + (with "env" (instance 3)) + (with "new" (instance 4)) + ) + ) + (alias core export 0 "$imports" (core table (;0;))) + (alias export 0 "get-two" (func (;0;))) + (core func (;2;) (canon lower (func 0) (memory 0))) + (alias core export 5 "get_sum" (core func (;3;))) + (core instance (;6;) + (export "$imports" (table 0)) + (export "0" (func 2)) + (export "1" (func 3)) + ) + (core instance (;7;) (instantiate 3 + (with "" (instance 6)) + ) + ) +) \ No newline at end of file diff --git a/crates/wit-component/tests/components/adapt-inject-stack/module.wat b/crates/wit-component/tests/components/adapt-inject-stack/module.wat new file mode 100644 index 000000000..9fd5ac0fc --- /dev/null +++ b/crates/wit-component/tests/components/adapt-inject-stack/module.wat @@ -0,0 +1,4 @@ +(module + (import "old" "get_sum" (func (result i32))) + (memory (export "memory") 1) +) diff --git a/crates/wit-component/tests/components/adapt-list-return/adapt-old-import-new.wit b/crates/wit-component/tests/components/adapt-list-return/adapt-old-import-new.wit new file mode 100644 index 000000000..494fd104a --- /dev/null +++ b/crates/wit-component/tests/components/adapt-list-return/adapt-old-import-new.wit @@ -0,0 +1 @@ +read: func() -> list diff --git a/crates/wit-component/tests/components/adapt-list-return/adapt-old.wat b/crates/wit-component/tests/components/adapt-list-return/adapt-old.wat new file mode 100644 index 000000000..560563352 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-list-return/adapt-old.wat @@ -0,0 +1,14 @@ +(module + (import "new" "read" (func $read (param i32))) + (import "env" "memory" (memory 0)) + + (func (export "read") (param i32 i32) + i32.const 8 + call $read + unreachable + ) + + (func (export "cabi_realloc") (param i32 i32 i32 i32) (result i32) + unreachable + ) +) diff --git a/crates/wit-component/tests/components/adapt-list-return/component.wat b/crates/wit-component/tests/components/adapt-list-return/component.wat new file mode 100644 index 000000000..8813f7060 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-list-return/component.wat @@ -0,0 +1,92 @@ +(component + (core module (;0;) + (type (;0;) (func (param i32))) + (type (;1;) (func (param i32 i32))) + (type (;2;) (func (param i32 i32 i32 i32) (result i32))) + (import "new" "read" (func $read (;0;) (type 0))) + (func (;1;) (type 1) (param i32 i32) + i32.const 8 + call $read + unreachable + ) + (func (;2;) (type 2) (param i32 i32 i32 i32) (result i32) + unreachable + ) + (export "read" (func 1)) + (export "cabi_realloc" (func 2)) + ) + (type (;0;) (list u8)) + (type (;1;) (func (result 0))) + (type (;2;) + (instance + (alias outer 1 1 (type (;0;))) + (export "read" (func (type 0))) + ) + ) + (import "new" (instance (;0;) (type 2))) + (core module (;1;) + (type (;0;) (func (param i32 i32))) + (import "old" "read" (func (;0;) (type 0))) + (memory (;0;) 1) + (export "memory" (memory 0)) + ) + (core module (;2;) + (type (;0;) (func (param i32))) + (type (;1;) (func (param i32 i32))) + (func (;0;) (type 0) (param i32) + local.get 0 + i32.const 0 + call_indirect (type 0) + ) + (func (;1;) (type 1) (param i32 i32) + local.get 0 + local.get 1 + i32.const 1 + call_indirect (type 1) + ) + (table (;0;) 2 2 funcref) + (export "0" (func 0)) + (export "1" (func 1)) + (export "$imports" (table 0)) + ) + (core module (;3;) + (type (;0;) (func (param i32))) + (type (;1;) (func (param i32 i32))) + (import "" "0" (func (;0;) (type 0))) + (import "" "1" (func (;1;) (type 1))) + (import "" "$imports" (table (;0;) 2 2 funcref)) + (elem (;0;) (i32.const 0) func 0 1) + ) + (core instance (;0;) (instantiate 2)) + (alias core export 0 "1" (core func (;0;))) + (core instance (;1;) + (export "read" (func 0)) + ) + (core instance (;2;) (instantiate 1 + (with "old" (instance 1)) + ) + ) + (alias core export 2 "memory" (core memory (;0;))) + (alias core export 0 "0" (core func (;1;))) + (core instance (;3;) + (export "read" (func 1)) + ) + (core instance (;4;) (instantiate 0 + (with "new" (instance 3)) + ) + ) + (alias core export 4 "cabi_realloc" (core func (;2;))) + (alias core export 0 "$imports" (core table (;0;))) + (alias export 0 "read" (func (;0;))) + (core func (;3;) (canon lower (func 0) (memory 0) (realloc 2))) + (alias core export 4 "read" (core func (;4;))) + (core instance (;5;) + (export "$imports" (table 0)) + (export "0" (func 3)) + (export "1" (func 4)) + ) + (core instance (;6;) (instantiate 3 + (with "" (instance 5)) + ) + ) +) \ No newline at end of file diff --git a/crates/wit-component/tests/components/adapt-list-return/module.wat b/crates/wit-component/tests/components/adapt-list-return/module.wat new file mode 100644 index 000000000..e68c95b08 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-list-return/module.wat @@ -0,0 +1,4 @@ +(module + (import "old" "read" (func (param i32 i32))) + (memory (export "memory") 1) +) diff --git a/crates/wit-component/tests/components/adapt-memory-simple/adapt-old-import-new.wit b/crates/wit-component/tests/components/adapt-memory-simple/adapt-old-import-new.wit new file mode 100644 index 000000000..7f5b4d13d --- /dev/null +++ b/crates/wit-component/tests/components/adapt-memory-simple/adapt-old-import-new.wit @@ -0,0 +1 @@ +log: func(s: string) diff --git a/crates/wit-component/tests/components/adapt-memory-simple/adapt-old.wat b/crates/wit-component/tests/components/adapt-memory-simple/adapt-old.wat new file mode 100644 index 000000000..deb5d349d --- /dev/null +++ b/crates/wit-component/tests/components/adapt-memory-simple/adapt-old.wat @@ -0,0 +1,4 @@ +(module + (import "new" "log" (func $log (param i32 i32))) + (export "log" (func $log)) +) diff --git a/crates/wit-component/tests/components/adapt-memory-simple/component.wat b/crates/wit-component/tests/components/adapt-memory-simple/component.wat new file mode 100644 index 000000000..e2799b89f --- /dev/null +++ b/crates/wit-component/tests/components/adapt-memory-simple/component.wat @@ -0,0 +1,78 @@ +(component + (core module (;0;) + (type (;0;) (func (param i32 i32))) + (import "new" "log" (func $log (;0;) (type 0))) + (export "log" (func $log)) + ) + (type (;0;) (func (param "s" string))) + (type (;1;) + (instance + (alias outer 1 0 (type (;0;))) + (export "log" (func (type 0))) + ) + ) + (import "new" (instance (;0;) (type 1))) + (core module (;1;) + (type (;0;) (func (param i32 i32))) + (import "old" "log" (func (;0;) (type 0))) + (memory (;0;) 1) + (export "memory" (memory 0)) + ) + (core module (;2;) + (type (;0;) (func (param i32 i32))) + (func (;0;) (type 0) (param i32 i32) + local.get 0 + local.get 1 + i32.const 0 + call_indirect (type 0) + ) + (func (;1;) (type 0) (param i32 i32) + local.get 0 + local.get 1 + i32.const 1 + call_indirect (type 0) + ) + (table (;0;) 2 2 funcref) + (export "0" (func 0)) + (export "1" (func 1)) + (export "$imports" (table 0)) + ) + (core module (;3;) + (type (;0;) (func (param i32 i32))) + (import "" "0" (func (;0;) (type 0))) + (import "" "1" (func (;1;) (type 0))) + (import "" "$imports" (table (;0;) 2 2 funcref)) + (elem (;0;) (i32.const 0) func 0 1) + ) + (core instance (;0;) (instantiate 2)) + (alias core export 0 "1" (core func (;0;))) + (core instance (;1;) + (export "log" (func 0)) + ) + (core instance (;2;) (instantiate 1 + (with "old" (instance 1)) + ) + ) + (alias core export 2 "memory" (core memory (;0;))) + (alias core export 0 "0" (core func (;1;))) + (core instance (;3;) + (export "log" (func 1)) + ) + (core instance (;4;) (instantiate 0 + (with "new" (instance 3)) + ) + ) + (alias core export 0 "$imports" (core table (;0;))) + (alias export 0 "log" (func (;0;))) + (core func (;2;) (canon lower (func 0) (memory 0) string-encoding=utf8)) + (alias core export 4 "log" (core func (;3;))) + (core instance (;5;) + (export "$imports" (table 0)) + (export "0" (func 2)) + (export "1" (func 3)) + ) + (core instance (;6;) (instantiate 3 + (with "" (instance 5)) + ) + ) +) \ No newline at end of file diff --git a/crates/wit-component/tests/components/adapt-memory-simple/module.wat b/crates/wit-component/tests/components/adapt-memory-simple/module.wat new file mode 100644 index 000000000..5bdcb0eaf --- /dev/null +++ b/crates/wit-component/tests/components/adapt-memory-simple/module.wat @@ -0,0 +1,4 @@ +(module + (import "old" "log" (func (param i32 i32))) + (memory (export "memory") 1) +) diff --git a/crates/wit-component/tests/components/adapt-missing-memory/adapt-old-import-new.wit b/crates/wit-component/tests/components/adapt-missing-memory/adapt-old-import-new.wit new file mode 100644 index 000000000..7f5b4d13d --- /dev/null +++ b/crates/wit-component/tests/components/adapt-missing-memory/adapt-old-import-new.wit @@ -0,0 +1 @@ +log: func(s: string) diff --git a/crates/wit-component/tests/components/adapt-missing-memory/adapt-old.wat b/crates/wit-component/tests/components/adapt-missing-memory/adapt-old.wat new file mode 100644 index 000000000..deb5d349d --- /dev/null +++ b/crates/wit-component/tests/components/adapt-missing-memory/adapt-old.wat @@ -0,0 +1,4 @@ +(module + (import "new" "log" (func $log (param i32 i32))) + (export "log" (func $log)) +) diff --git a/crates/wit-component/tests/components/adapt-missing-memory/error.txt b/crates/wit-component/tests/components/adapt-missing-memory/error.txt new file mode 100644 index 000000000..33946ad75 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-missing-memory/error.txt @@ -0,0 +1 @@ +module does not export a memory named `memory` \ No newline at end of file diff --git a/crates/wit-component/tests/components/adapt-missing-memory/module.wat b/crates/wit-component/tests/components/adapt-missing-memory/module.wat new file mode 100644 index 000000000..1a705ff1a --- /dev/null +++ b/crates/wit-component/tests/components/adapt-missing-memory/module.wat @@ -0,0 +1,3 @@ +(module + (import "old" "log" (func (param i32 i32))) +) diff --git a/crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1-import-my_wasi.wit b/crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1-import-my_wasi.wit new file mode 100644 index 000000000..34edd66dc --- /dev/null +++ b/crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1-import-my_wasi.wit @@ -0,0 +1,6 @@ +// This is the interface imported by the `adapt-*.wat` file which is used +// to implement the `wasi_snapshot_preview1` interface. + +random-get: func(size: u32) -> list +proc-exit: func(code: u32) +something-not-used: func() diff --git a/crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1.wat b/crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1.wat new file mode 100644 index 000000000..b5a214409 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1.wat @@ -0,0 +1,13 @@ +;; this is a polyfill module that translates from wasi-preview1 to a different +;; interface + +(module + (import "my_wasi" "proc-exit" (func $proc_exit (param i32))) + (func (export "proc_exit") (param i32) + local.get 0 + call $proc_exit + ) + (func (export "random_get") (param i32 i32) (result i32) + i32.const 0) + (func (export "something_else")) +) diff --git a/crates/wit-component/tests/components/adapt-preview1/component.wat b/crates/wit-component/tests/components/adapt-preview1/component.wat new file mode 100644 index 000000000..0e74a9222 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-preview1/component.wat @@ -0,0 +1,108 @@ +(component + (core module (;0;) + (type (;0;) (func (param i32))) + (type (;1;) (func (param i32 i32) (result i32))) + (import "my_wasi" "proc-exit" (func $proc_exit (;0;) (type 0))) + (func (;1;) (type 0) (param i32) + local.get 0 + call $proc_exit + ) + (func (;2;) (type 1) (param i32 i32) (result i32) + i32.const 0 + ) + (export "proc_exit" (func 1)) + (export "random_get" (func 2)) + ) + (type (;0;) (func)) + (type (;1;) + (instance + (alias outer 1 0 (type (;0;))) + (export "foo" (func (type 0))) + ) + ) + (type (;2;) (func (param "code" u32))) + (type (;3;) + (instance + (alias outer 1 2 (type (;0;))) + (export "proc-exit" (func (type 0))) + ) + ) + (import "foo" (instance (;0;) (type 1))) + (import "my_wasi" (instance (;1;) (type 3))) + (core module (;1;) + (type (;0;) (func)) + (type (;1;) (func (param i32))) + (type (;2;) (func (param i32 i32) (result i32))) + (import "foo" "foo" (func (;0;) (type 0))) + (import "wasi_snapshot_preview1" "proc_exit" (func (;1;) (type 1))) + (import "wasi_snapshot_preview1" "random_get" (func (;2;) (type 2))) + (memory (;0;) 1) + (export "memory" (memory 0)) + ) + (core module (;2;) + (type (;0;) (func (param i32))) + (type (;1;) (func (param i32 i32) (result i32))) + (func (;0;) (type 0) (param i32) + local.get 0 + i32.const 0 + call_indirect (type 0) + ) + (func (;1;) (type 1) (param i32 i32) (result i32) + local.get 0 + local.get 1 + i32.const 1 + call_indirect (type 1) + ) + (table (;0;) 2 2 funcref) + (export "0" (func 0)) + (export "1" (func 1)) + (export "$imports" (table 0)) + ) + (core module (;3;) + (type (;0;) (func (param i32))) + (type (;1;) (func (param i32 i32) (result i32))) + (import "" "0" (func (;0;) (type 0))) + (import "" "1" (func (;1;) (type 1))) + (import "" "$imports" (table (;0;) 2 2 funcref)) + (elem (;0;) (i32.const 0) func 0 1) + ) + (core instance (;0;) (instantiate 2)) + (alias export 0 "foo" (func (;0;))) + (core func (;0;) (canon lower (func 0))) + (core instance (;1;) + (export "foo" (func 0)) + ) + (alias core export 0 "0" (core func (;1;))) + (alias core export 0 "1" (core func (;2;))) + (core instance (;2;) + (export "proc_exit" (func 1)) + (export "random_get" (func 2)) + ) + (core instance (;3;) (instantiate 1 + (with "foo" (instance 1)) + (with "wasi_snapshot_preview1" (instance 2)) + ) + ) + (alias core export 3 "memory" (core memory (;0;))) + (alias export 1 "proc-exit" (func (;1;))) + (core func (;3;) (canon lower (func 1))) + (core instance (;4;) + (export "proc-exit" (func 3)) + ) + (core instance (;5;) (instantiate 0 + (with "my_wasi" (instance 4)) + ) + ) + (alias core export 0 "$imports" (core table (;0;))) + (alias core export 5 "proc_exit" (core func (;4;))) + (alias core export 5 "random_get" (core func (;5;))) + (core instance (;6;) + (export "$imports" (table 0)) + (export "0" (func 4)) + (export "1" (func 5)) + ) + (core instance (;7;) (instantiate 3 + (with "" (instance 6)) + ) + ) +) \ No newline at end of file diff --git a/crates/wit-component/tests/components/adapt-preview1/import-foo.wit b/crates/wit-component/tests/components/adapt-preview1/import-foo.wit new file mode 100644 index 000000000..7c4c5bfc3 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-preview1/import-foo.wit @@ -0,0 +1 @@ +foo: func() diff --git a/crates/wit-component/tests/components/adapt-preview1/module.wat b/crates/wit-component/tests/components/adapt-preview1/module.wat new file mode 100644 index 000000000..370ede67a --- /dev/null +++ b/crates/wit-component/tests/components/adapt-preview1/module.wat @@ -0,0 +1,11 @@ +(module + ;; import something from an external interface + (import "foo" "foo" (func)) + + ;; import some wasi functions + (import "wasi_snapshot_preview1" "proc_exit" (func (param i32))) + (import "wasi_snapshot_preview1" "random_get" (func (param i32 i32) (result i32))) + + ;; required by wasi + (memory (export "memory") 1) +) diff --git a/crates/wit-component/tests/components/adapt-unused/adapt-old-import-new.wit b/crates/wit-component/tests/components/adapt-unused/adapt-old-import-new.wit new file mode 100644 index 000000000..7f5b4d13d --- /dev/null +++ b/crates/wit-component/tests/components/adapt-unused/adapt-old-import-new.wit @@ -0,0 +1 @@ +log: func(s: string) diff --git a/crates/wit-component/tests/components/adapt-unused/adapt-old.wat b/crates/wit-component/tests/components/adapt-unused/adapt-old.wat new file mode 100644 index 000000000..deb5d349d --- /dev/null +++ b/crates/wit-component/tests/components/adapt-unused/adapt-old.wat @@ -0,0 +1,4 @@ +(module + (import "new" "log" (func $log (param i32 i32))) + (export "log" (func $log)) +) diff --git a/crates/wit-component/tests/components/adapt-unused/component.wat b/crates/wit-component/tests/components/adapt-unused/component.wat new file mode 100644 index 000000000..3f194a21b --- /dev/null +++ b/crates/wit-component/tests/components/adapt-unused/component.wat @@ -0,0 +1,4 @@ +(component + (core module (;0;)) + (core instance (;0;) (instantiate 0)) +) \ No newline at end of file diff --git a/crates/wit-component/tests/components/adapt-unused/module.wat b/crates/wit-component/tests/components/adapt-unused/module.wat new file mode 100644 index 000000000..3af8f2545 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-unused/module.wat @@ -0,0 +1 @@ +(module) diff --git a/crates/wit-component/tests/components/import-conflict/component.wat b/crates/wit-component/tests/components/import-conflict/component.wat index 5502d9d28..81d11445a 100644 --- a/crates/wit-component/tests/components/import-conflict/component.wat +++ b/crates/wit-component/tests/components/import-conflict/component.wat @@ -70,23 +70,23 @@ (elem (;0;) (i32.const 0) func 0 1) ) (core instance (;0;) (instantiate 1)) - (alias core export 0 "0" (core func (;0;))) + (alias export 2 "a" (func (;0;))) + (core func (;0;) (canon lower (func 0))) (core instance (;1;) (export "a" (func 0)) ) - (alias core export 0 "1" (core func (;1;))) + (alias core export 0 "0" (core func (;1;))) (core instance (;2;) - (export "baz" (func 1)) + (export "a" (func 1)) ) - (alias export 2 "a" (func (;0;))) - (core func (;2;) (canon lower (func 0))) + (alias core export 0 "1" (core func (;2;))) (core instance (;3;) - (export "a" (func 2)) + (export "baz" (func 2)) ) (core instance (;4;) (instantiate 0 - (with "bar" (instance 1)) - (with "baz" (instance 2)) - (with "foo" (instance 3)) + (with "foo" (instance 1)) + (with "bar" (instance 2)) + (with "baz" (instance 3)) ) ) (alias core export 4 "memory" (core memory (;0;))) diff --git a/crates/wit-component/tests/components/imports/component.wat b/crates/wit-component/tests/components/imports/component.wat index c6a1f36b8..48ee50866 100644 --- a/crates/wit-component/tests/components/imports/component.wat +++ b/crates/wit-component/tests/components/imports/component.wat @@ -88,38 +88,38 @@ (elem (;0;) (i32.const 0) func 0 1) ) (core instance (;0;) (instantiate 1)) - (alias core export 0 "0" (core func (;0;))) - (alias export 0 "bar2" (func (;0;))) - (core func (;1;) (canon lower (func 0))) + (alias export 2 "foo1" (func (;0;))) + (core func (;0;) (canon lower (func 0))) + (alias export 2 "foo2" (func (;1;))) + (core func (;1;) (canon lower (func 1))) + (alias export 2 "foo3" (func (;2;))) + (core func (;2;) (canon lower (func 2))) (core instance (;1;) - (export "bar1" (func 0)) - (export "bar2" (func 1)) + (export "foo1" (func 0)) + (export "foo2" (func 1)) + (export "foo3" (func 2)) ) - (alias core export 0 "1" (core func (;2;))) - (alias export 1 "baz2" (func (;1;))) - (core func (;3;) (canon lower (func 1))) - (alias export 1 "baz3" (func (;2;))) - (core func (;4;) (canon lower (func 2))) + (alias core export 0 "0" (core func (;3;))) + (alias export 0 "bar2" (func (;3;))) + (core func (;4;) (canon lower (func 3))) (core instance (;2;) - (export "baz1" (func 2)) - (export "baz2" (func 3)) - (export "baz3" (func 4)) + (export "bar1" (func 3)) + (export "bar2" (func 4)) ) - (alias export 2 "foo1" (func (;3;))) - (core func (;5;) (canon lower (func 3))) - (alias export 2 "foo2" (func (;4;))) + (alias core export 0 "1" (core func (;5;))) + (alias export 1 "baz2" (func (;4;))) (core func (;6;) (canon lower (func 4))) - (alias export 2 "foo3" (func (;5;))) + (alias export 1 "baz3" (func (;5;))) (core func (;7;) (canon lower (func 5))) (core instance (;3;) - (export "foo1" (func 5)) - (export "foo2" (func 6)) - (export "foo3" (func 7)) + (export "baz1" (func 5)) + (export "baz2" (func 6)) + (export "baz3" (func 7)) ) (core instance (;4;) (instantiate 0 - (with "bar" (instance 1)) - (with "baz" (instance 2)) - (with "foo" (instance 3)) + (with "foo" (instance 1)) + (with "bar" (instance 2)) + (with "baz" (instance 3)) ) ) (alias core export 4 "memory" (core memory (;0;)))