From ea065eafe49d2b83fb45593a96748dd205ea8773 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 27 Sep 2022 11:39:46 -0700 Subject: [PATCH 01/10] Add support to `wit-component` to polyfill WASI This commit is an addition to the `wit-component` tool to be able to polyfill WASI imports today using `wasi_snapshot_preview1` with a component-model-using interface in the future. This is a large extension to the functionality of `wit-component` internally since the generated component is much "fancier". The support in this commit is modeled as the addition of "adapter modules" into the `wit-component` tool. An adapter module is understood to translate from some core-wasm ABI into a component-model using ABI. The intention is that for any previous API prior to the component model an adapter module could be written which would translate from the prior API to the new API. For example in WASI today there is: (@interface func (export "random_get") (param $buf (@witx pointer u8)) (param $buf_len $size) (result $error (expected (error $errno))) ) whereas a component-model-using API would look more like: random-get: func(size: u32) -> list This component-model version can be adapted with a module such as: (module $wasi_snapshot_preview1 (import "new-wasi" "random_get" (func $new_random_get (param i32 i32))) (import "env" "memory" (memory 0)) (global $last_ptr (mut i32) i32.const 0) (func (export "random_get") (param i32 i32) (result i32) ;; store buffer pointer in a saved global for `cabi_realloc` ;; later (global.set $last_ptr (local.get 0)) ;; 1st argument: the `size: u32` local.get 1 ;; 2nd argument: return pointer for `list` i32.const 8 call $new_random_get ;; return a "success" return code i32.const 0 ) ;; When the canonical ABI allocates space for the list return value ;; return the original buffer pointer to place it directly in the ;; target buffer (func (export "cabi_realloc") (param i32 i32 i32 i32) (result i32) global.get $last_ptr) ) Using this adapter module the internal structure of the generated component can be done such that everything is wired up in all the right places meaning that when the original module calls `wasi_snapshot_preview1::random_get` it actually calls this shim module which then calls the actual `new-wasi::random_get` import. There's a few details I'm glossing over here like the stack used by the shim module but this suffices to describe the general shape. My plan in the future is to use this support to generate a component from all test cases that this repository supports. That means that, specifically for `wit-bindgen` tests, a fresh new interface representing "future WASI" will be created and the WASI functions used by tests will be adapted via this adapter module. In this manner components will now be generated for all tests and then the next step is #314, actually ingesting these components into hosts. --- Cargo.lock | 7 +- crates/test-helpers/Cargo.toml | 1 + crates/test-helpers/build.rs | 16 +- crates/wit-component/Cargo.toml | 3 + crates/wit-component/src/adapter.rs | 19 + crates/wit-component/src/adapter/gc.rs | 884 ++++++++++++++++++ crates/wit-component/src/encoding.rs | 480 +++++++--- crates/wit-component/src/lib.rs | 1 + crates/wit-component/src/validation.rs | 199 +++- crates/wit-component/tests/components.rs | 33 + .../adapt-old-import-new.wit | 1 + .../adapt-list-return/adapt-old.wat | 14 + .../adapt-list-return/component.wat | 92 ++ .../components/adapt-list-return/module.wat | 4 + .../adapt-old-import-new.wit | 1 + .../adapt-memory-simple/adapt-old.wat | 4 + .../adapt-memory-simple/component.wat | 78 ++ .../components/adapt-memory-simple/module.wat | 4 + .../adapt-old-import-new.wit | 1 + .../adapt-missing-memory/adapt-old.wat | 4 + .../components/adapt-missing-memory/error.txt | 1 + .../adapt-missing-memory/module.wat | 3 + ...-wasi_snapshot_preview1-import-my_wasi.wit | 6 + .../adapt-wasi_snapshot_preview1.wat | 13 + .../components/adapt-preview1/component.wat | 108 +++ .../components/adapt-preview1/import-foo.wit | 1 + .../components/adapt-preview1/module.wat | 11 + .../components/import-conflict/component.wat | 18 +- .../tests/components/imports/component.wat | 46 +- 29 files changed, 1871 insertions(+), 182 deletions(-) create mode 100644 crates/wit-component/src/adapter.rs create mode 100644 crates/wit-component/src/adapter/gc.rs create mode 100644 crates/wit-component/tests/components/adapt-list-return/adapt-old-import-new.wit create mode 100644 crates/wit-component/tests/components/adapt-list-return/adapt-old.wat create mode 100644 crates/wit-component/tests/components/adapt-list-return/component.wat create mode 100644 crates/wit-component/tests/components/adapt-list-return/module.wat create mode 100644 crates/wit-component/tests/components/adapt-memory-simple/adapt-old-import-new.wit create mode 100644 crates/wit-component/tests/components/adapt-memory-simple/adapt-old.wat create mode 100644 crates/wit-component/tests/components/adapt-memory-simple/component.wat create mode 100644 crates/wit-component/tests/components/adapt-memory-simple/module.wat create mode 100644 crates/wit-component/tests/components/adapt-missing-memory/adapt-old-import-new.wit create mode 100644 crates/wit-component/tests/components/adapt-missing-memory/adapt-old.wat create mode 100644 crates/wit-component/tests/components/adapt-missing-memory/error.txt create mode 100644 crates/wit-component/tests/components/adapt-missing-memory/module.wat create mode 100644 crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1-import-my_wasi.wit create mode 100644 crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1.wat create mode 100644 crates/wit-component/tests/components/adapt-preview1/component.wat create mode 100644 crates/wit-component/tests/components/adapt-preview1/import-foo.wit create mode 100644 crates/wit-component/tests/components/adapt-preview1/module.wat diff --git a/Cargo.lock b/Cargo.lock index 878616559..49692912d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1371,6 +1371,7 @@ dependencies = [ "wit-bindgen-gen-host-js", "wit-bindgen-gen-host-wasmtime-py", "wit-bindgen-gen-host-wasmtime-rust", + "wit-component", "wit-parser", ] @@ -1584,8 +1585,7 @@ dependencies = [ [[package]] name = "wasm-encoder" version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e7ca71c70a6de5b10968ae4d298e548366d9cd9588176e6ff8866f3c49c96ee" +source = "git+https://github.com/alexcrichton/wasm-tools?branch=change-some-names#fbe9292d713440a285ae1fd0d037ada6dd502a8f" dependencies = [ "leb128", ] @@ -1602,8 +1602,7 @@ dependencies = [ [[package]] name = "wasmparser" version = "0.91.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "239cdca8b8f356af8118c522d5fea23da45b60832ed4e18ef90bb3c9d8dce24a" +source = "git+https://github.com/alexcrichton/wasm-tools?branch=change-some-names#fbe9292d713440a285ae1fd0d037ada6dd502a8f" dependencies = [ "indexmap", ] diff --git a/crates/test-helpers/Cargo.toml b/crates/test-helpers/Cargo.toml index 6f596ec53..28cf904b0 100644 --- a/crates/test-helpers/Cargo.toml +++ b/crates/test-helpers/Cargo.toml @@ -30,6 +30,7 @@ filetime = "0.2" wit-bindgen-gen-guest-c = { workspace = true } wit-bindgen-gen-guest-teavm-java = { workspace = true } wit-bindgen-core = { workspace = true } +wit-component = { workspace = true } [features] default = ['guest-rust', 'guest-c', 'guest-teavm-java', 'host-js', 'host-wasmtime-py', 'host-wasmtime-rust'] diff --git a/crates/test-helpers/build.rs b/crates/test-helpers/build.rs index 93da1f183..ea88e1601 100644 --- a/crates/test-helpers/build.rs +++ b/crates/test-helpers/build.rs @@ -61,14 +61,16 @@ fn main() { let import = Interface::parse_file(&test_dir.join("imports.wit")).unwrap(); let export = Interface::parse_file(&test_dir.join("exports.wit")).unwrap(); + let imports = &[import]; + let exports = &[export]; let mut files = Default::default(); // TODO: should combine this into one wit_bindgen_gen_guest_c::Opts::default() .build() - .generate_all(&[import], &[], &mut files); + .generate_all(imports, &[], &mut files); wit_bindgen_gen_guest_c::Opts::default() .build() - .generate_all(&[], &[export], &mut files); + .generate_all(&[], exports, &mut files); let out_dir = out_dir.join(format!( "c-{}", @@ -120,6 +122,16 @@ fn main() { test_dir.file_stem().unwrap().to_str().unwrap().to_string(), out_wasm.to_str().unwrap().to_string(), )); + + let wasm = std::fs::read(&out_wasm).unwrap(); + wit_component::ComponentEncoder::default() + .validate(true) + .module(&wasm) + .imports(imports) + .interface(&exports[0]) + .wasi(true) + .encode() + .unwrap(); } } diff --git a/crates/wit-component/Cargo.toml b/crates/wit-component/Cargo.toml index 9c260db16..bb2c4654f 100644 --- a/crates/wit-component/Cargo.toml +++ b/crates/wit-component/Cargo.toml @@ -31,10 +31,13 @@ env_logger = { version = "0.9.1", optional = true } log = { version = "0.4.17", optional = true } bitflags = { workspace = true } +wasmprinter = "*" + [dev-dependencies] wasmprinter = { workspace = true } glob = "0.3.0" pretty_assertions = "1.3.0" +env_logger = "0.9.1" [features] default = ["cli"] diff --git a/crates/wit-component/src/adapter.rs b/crates/wit-component/src/adapter.rs new file mode 100644 index 000000000..65525f598 --- /dev/null +++ b/crates/wit-component/src/adapter.rs @@ -0,0 +1,19 @@ +use crate::validation::ValidatedAdapter; +use anyhow::{Context, Result}; +use indexmap::IndexMap; +use wasmparser::FuncType; +use wit_parser::Interface; + +mod gc; + +pub fn adapt<'a>( + wasm: &[u8], + interface: &'a Interface, + required: &IndexMap<&str, FuncType>, +) -> Result<(Vec, ValidatedAdapter<'a>)> { + let wasm = gc::run(wasm, required) + .context("failed to reduce input adapter module to its minimal size")?; + let info = crate::validation::validate_adapter_module(&wasm, interface, required) + .context("failed to validate the imports of the minimized adapter module")?; + Ok((wasm, info)) +} diff --git a/crates/wit-component/src/adapter/gc.rs b/crates/wit-component/src/adapter/gc.rs new file mode 100644 index 000000000..5aa6cfbfa --- /dev/null +++ b/crates/wit-component/src/adapter/gc.rs @@ -0,0 +1,884 @@ +use self::bitvec::BitVec; +use anyhow::{bail, Result}; +use indexmap::{IndexMap, IndexSet}; +use std::collections::HashMap; +use std::mem; +use wasm_encoder::{Encode, EntityType}; +use wasmparser::*; + +/// This function will reduce the input core `wasm` module to only the set of +/// exports `required`. +/// +/// This internally performs a "gc" pass after removing exports to ensure that +/// the resulting module imports the minimal set of functions necessary. +pub fn run(wasm: &[u8], required: &IndexMap<&str, FuncType>) -> Result> { + assert!(!required.is_empty()); + + let mut module = Module::default(); + module.parse(wasm)?; + + // Make sure that all required names are present in the module, and then + // remove all names that are not required. + for (name, _ty) in required { + if !module.exports.contains_key(name) { + bail!("adapter module does not have export `{name}`") + } + } + let mut not_required = IndexSet::new(); + for name in module.exports.keys().copied() { + // Explicitly keep `cabi_realloc` if it's there in case an interface + // needs it for a lowering. + if !required.contains_key(name) && name != "cabi_realloc" { + not_required.insert(name); + } + } + for name in not_required { + module.exports.remove(name); + } + assert!(!module.exports.is_empty()); + module.liveness()?; + module.encode() +} + +#[derive(Default)] +struct Module<'a> { + types: Vec, + tables: Vec>, + globals: Vec>, + memories: Vec>, + funcs: Vec>, + exports: IndexMap<&'a str, Export<'a>>, + func_names: HashMap, + + worklist: Vec<(u32, fn(&mut Module<'a>, u32) -> Result<()>)>, + live_types: BitVec, + live_tables: BitVec, + live_globals: BitVec, + live_memories: BitVec, + live_funcs: BitVec, +} + +enum Definition<'a> { + Import(&'a str, &'a str), + Local, +} + +struct Table<'a> { + def: Definition<'a>, + ty: TableType, +} + +struct Memory<'a> { + def: Definition<'a>, + ty: MemoryType, +} + +struct Global<'a> { + def: GlobalDefinition<'a>, + ty: GlobalType, +} + +enum GlobalDefinition<'a> { + Import(&'a str, &'a str), + Local(ConstExpr<'a>), +} + +struct Func<'a> { + def: FuncDefinition<'a>, + ty: u32, +} + +enum FuncDefinition<'a> { + Import(&'a str, &'a str), + Local(FunctionBody<'a>), +} + +impl<'a> Module<'a> { + fn parse(&mut self, wasm: &'a [u8]) -> Result<()> { + let mut next_code_index = 0; + let mut validator = Validator::new(); + for payload in Parser::new(0).parse_all(wasm) { + let payload = payload?; + validator.payload(&payload)?; + match payload { + Payload::Version { encoding, .. } => { + if encoding != Encoding::Module { + bail!("adapter must be a core wasm module, not a component"); + } + } + Payload::End(_) => {} + Payload::TypeSection(s) => { + for ty in s { + self.types.push(ty?); + } + } + Payload::ImportSection(s) => { + for i in s { + let i = i?; + match i.ty { + TypeRef::Func(ty) => self.funcs.push(Func { + def: FuncDefinition::Import(i.module, i.name), + ty, + }), + TypeRef::Table(ty) => self.tables.push(Table { + def: Definition::Import(i.module, i.name), + ty, + }), + TypeRef::Global(ty) => self.globals.push(Global { + def: GlobalDefinition::Import(i.module, i.name), + ty, + }), + TypeRef::Memory(ty) => self.memories.push(Memory { + def: Definition::Import(i.module, i.name), + ty, + }), + TypeRef::Tag(_) => bail!("unsupported `tag` type"), + } + } + } + Payload::TableSection(s) => { + for ty in s { + let ty = ty?; + self.tables.push(Table { + def: Definition::Local, + ty, + }); + } + } + Payload::MemorySection(s) => { + for ty in s { + let ty = ty?; + self.memories.push(Memory { + def: Definition::Local, + ty, + }); + } + } + Payload::GlobalSection(s) => { + for g in s { + let g = g?; + self.globals.push(Global { + def: GlobalDefinition::Local(g.init_expr), + ty: g.ty, + }); + } + } + + Payload::ExportSection(s) => { + for e in s { + let e = e?; + self.exports.insert(e.name, e); + } + } + + Payload::FunctionSection(s) => { + next_code_index = self.funcs.len(); + for ty in s { + let ty = ty?; + self.funcs.push(Func { + def: FuncDefinition::Local(FunctionBody::new(0, &[])), + ty, + }); + } + } + + Payload::CodeSectionStart { .. } => {} + Payload::CodeSectionEntry(body) => { + self.funcs[next_code_index].def = FuncDefinition::Local(body); + next_code_index += 1; + } + + // drop all custom sections + Payload::CustomSection(s) => { + if s.name() != "name" { + continue; + } + drop(self.parse_name_section(&s)); + } + + // sections that shouldn't appear in the specially-crafted core wasm + // adapter self we're processing + Payload::DataCountSection { .. } + | Payload::ElementSection(_) + | Payload::DataSection(_) + | Payload::StartSection { .. } + | Payload::TagSection(_) + | Payload::UnknownSection { .. } => { + bail!("unsupported section found in adapter module") + } + + // component-model related things that shouldn't show up + Payload::ModuleSection { .. } + | Payload::ComponentSection { .. } + | Payload::InstanceSection(_) + | Payload::ComponentInstanceSection(_) + | Payload::ComponentAliasSection(_) + | Payload::ComponentCanonicalSection(_) + | Payload::ComponentStartSection(_) + | Payload::ComponentImportSection(_) + | Payload::CoreTypeSection(_) + | Payload::ComponentExportSection(_) + | Payload::ComponentTypeSection(_) => { + bail!("component section found in adapter module") + } + } + } + + Ok(()) + } + + fn parse_name_section(&mut self, section: &CustomSectionReader<'a>) -> Result<()> { + let section = NameSectionReader::new(section.data(), section.data_offset())?; + for s in section { + match s? { + Name::Function(map) => { + let mut map = map.get_map()?; + for _ in 0..map.get_count() { + let naming = map.read()?; + self.func_names.insert(naming.index, naming.name); + } + } + _ => {} + } + } + Ok(()) + } + + fn liveness(&mut self) -> Result<()> { + let exports = mem::take(&mut self.exports); + for (_, e) in exports.iter() { + match e.kind { + ExternalKind::Func => self.func(e.index), + ExternalKind::Global => self.global(e.index), + ExternalKind::Table => self.table(e.index), + ExternalKind::Memory => self.memory(e.index), + ExternalKind::Tag => bail!("unsupported exported tag"), + } + } + self.exports = exports; + + while let Some((idx, func)) = self.worklist.pop() { + func(self, idx)?; + } + Ok(()) + } + + fn func(&mut self, func: u32) { + if !self.live_funcs.insert(func) { + return; + } + self.worklist.push((func, |me, func| { + let func = &me.funcs[func as usize]; + me.live_types.insert(func.ty); + let mut body = match &func.def { + FuncDefinition::Import(..) => return Ok(()), + FuncDefinition::Local(e) => e.get_binary_reader(), + }; + let local_count = body.read_var_u32()?; + for _ in 0..local_count { + body.read_var_u32()?; + body.read_val_type()?; + } + me.operators(body) + })); + } + + fn global(&mut self, global: u32) { + if !self.live_globals.insert(global) { + return; + } + self.worklist.push((global, |me, global| { + let init = match &me.globals[global as usize].def { + GlobalDefinition::Import(..) => return Ok(()), + GlobalDefinition::Local(e) => e, + }; + me.operators(init.get_binary_reader()) + })); + } + + fn table(&mut self, table: u32) { + self.live_tables.insert(table); + } + + fn memory(&mut self, memory: u32) { + self.live_memories.insert(memory); + } + + fn blockty(&mut self, ty: BlockType) { + if let BlockType::FuncType(ty) = ty { + self.live_types.insert(ty); + } + } + + fn operators(&mut self, mut reader: BinaryReader<'a>) -> Result<()> { + while !reader.eof() { + reader.visit_operator(self)?; + } + Ok(()) + } + + fn live_types(&self) -> impl Iterator + '_ { + live_iter(&self.live_types, self.types.iter()) + } + + fn live_funcs(&self) -> impl Iterator)> + '_ { + live_iter(&self.live_funcs, self.funcs.iter()) + } + + fn live_memories(&self) -> impl Iterator)> + '_ { + live_iter(&self.live_memories, self.memories.iter()) + } + + fn live_globals(&self) -> impl Iterator)> + '_ { + live_iter(&self.live_globals, self.globals.iter()) + } + + fn live_tables(&self) -> impl Iterator)> + '_ { + live_iter(&self.live_tables, self.tables.iter()) + } + + fn encode(&mut self) -> Result> { + let mut map = Encoder::default(); + + let mut types = wasm_encoder::TypeSection::new(); + let mut imports = wasm_encoder::ImportSection::new(); + let mut funcs = wasm_encoder::FunctionSection::new(); + let mut tables = wasm_encoder::TableSection::new(); + let mut memories = wasm_encoder::MemorySection::new(); + let mut globals = wasm_encoder::GlobalSection::new(); + let mut code = wasm_encoder::CodeSection::new(); + + let mut empty_type = None; + for (i, ty) in self.live_types() { + map.types.push(i); + match ty { + Type::Func(ty) => { + types.function( + ty.params().iter().copied().map(valty), + ty.results().iter().copied().map(valty), + ); + if ty.params().len() == 0 && ty.results().len() == 0 { + empty_type = Some(map.types.remap(i)); + } + } + } + } + + let mut num_memories = 0; + for (i, mem) in self.live_memories() { + map.memories.push(i); + let ty = wasm_encoder::MemoryType { + minimum: mem.ty.initial, + maximum: mem.ty.maximum, + shared: mem.ty.shared, + memory64: mem.ty.memory64, + }; + match &mem.def { + Definition::Import(m, n) => { + imports.import(m, n, ty); + } + Definition::Local => { + memories.memory(ty); + } + } + num_memories += 1; + } + + for (i, table) in self.live_tables() { + map.tables.push(i); + let ty = wasm_encoder::TableType { + minimum: table.ty.initial, + maximum: table.ty.maximum, + element_type: valty(table.ty.element_type), + }; + match &table.def { + Definition::Import(m, n) => { + imports.import(m, n, ty); + } + Definition::Local => { + tables.table(ty); + } + } + } + + for (i, global) in self.live_globals() { + map.globals.push(i); + let ty = wasm_encoder::GlobalType { + mutable: global.ty.mutable, + val_type: valty(global.ty.content_type), + }; + match &global.def { + GlobalDefinition::Import(m, n) => { + imports.import(m, n, ty); + } + GlobalDefinition::Local(init) => { + let mut bytes = map.operators(init.get_binary_reader())?; + assert_eq!(bytes.pop(), Some(0xb)); + globals.global(ty, &wasm_encoder::ConstExpr::raw(bytes)); + } + } + } + + let mut num_funcs = 0; + for (i, func) in self.live_funcs() { + map.funcs.push(i); + let ty = map.types.remap(func.ty); + match &func.def { + FuncDefinition::Import(m, n) => { + imports.import(m, n, EntityType::Function(ty)); + } + FuncDefinition::Local(_) => { + funcs.function(ty); + } + } + num_funcs += 1; + } + + for (_, func) in self.live_funcs() { + let mut body = match &func.def { + FuncDefinition::Import(..) => continue, + FuncDefinition::Local(body) => body.get_binary_reader(), + }; + let mut locals = Vec::new(); + for _ in 0..body.read_var_u32()? { + let cnt = body.read_var_u32()?; + let ty = body.read_val_type()?; + locals.push((cnt, valty(ty))); + } + let mut func = wasm_encoder::Function::new(locals); + let bytes = map.operators(body)?; + func.raw(bytes); + code.function(&func); + } + + // Inject a start function to initialize the stack pointer which will be + // local to this module. This only happens if a memory is preserved and + // a stack pointer global is found. + let mut start = None; + let mutable_globals = self + .live_globals() + .filter(|(_, g)| g.ty.mutable) + .collect::>(); + if num_memories > 0 && mutable_globals.len() > 0 { + use wasm_encoder::Instruction::*; + + // If there are any memories or any mutable globals there must be + // precisely one of each as otherwise we don't know how to filter + // down to the right one. + assert_eq!(num_memories, 1); + assert_eq!(mutable_globals.len(), 1); + assert_eq!(mutable_globals[0].1.ty.content_type, ValType::I32); + let sp = map.globals.remap(mutable_globals[0].0); + + // Generate a function type for this start function, adding a new + // function type to the module if necessary. + let empty_type = empty_type.unwrap_or_else(|| { + types.function([], []); + types.len() - 1 + }); + funcs.function(empty_type); + + let mut func = wasm_encoder::Function::new([(1, wasm_encoder::ValType::I32)]); + // Grow the memory by 1 page to allocate ourselves some stack space. + func.instruction(&I32Const(1)); + func.instruction(&MemoryGrow(0)); + func.instruction(&LocalTee(0)); + + // Test if the return value of the growth was -1 and trap if so + // since we don't have a stack page. + func.instruction(&I32Const(-1)); + func.instruction(&I32Eq); + func.instruction(&If(wasm_encoder::BlockType::Empty)); + func.instruction(&Unreachable); + func.instruction(&End); + + // Set our stack pointer to the top of the page we were given, which + // is the page index times the page size plus the size of a page. + func.instruction(&LocalGet(0)); + func.instruction(&I32Const(1)); + func.instruction(&I32Add); + func.instruction(&I32Const(16)); + func.instruction(&I32Shl); + func.instruction(&GlobalSet(sp)); + code.function(&func); + + start = Some(wasm_encoder::StartSection { + function_index: num_funcs, + }); + } + + // Sanity-check the shape of the module since some parts won't work if + // this fails. Note that during parsing we've already validated there + // are no data segments or element segments. + + // Shouldn't have any tables if there are no element segments since + // otherwise there's no meaning to a defined or imported table. + if self.live_tables().count() != 0 { + bail!("tables should not be present in the final adapter module"); + } + + // multi-memory should not be enabled and if any memory it should be + // imported. + if self.live_memories().count() > 1 { + bail!("the adapter module should not use multi-memory"); + } + if !memories.is_empty() { + bail!("locally-defined memories are not allowed define a local memory"); + } + + let mut ret = wasm_encoder::Module::default(); + if !types.is_empty() { + ret.section(&types); + } + if !imports.is_empty() { + ret.section(&imports); + } + if !funcs.is_empty() { + ret.section(&funcs); + } + if !tables.is_empty() { + ret.section(&tables); + } + if !memories.is_empty() { + ret.section(&memories); + } + if !globals.is_empty() { + ret.section(&globals); + } + + let mut exports = wasm_encoder::ExportSection::new(); + for (_, export) in self.exports.iter() { + let (kind, index) = match export.kind { + ExternalKind::Func => ( + wasm_encoder::ExportKind::Func, + map.funcs.remap(export.index), + ), + ExternalKind::Table => ( + wasm_encoder::ExportKind::Table, + map.tables.remap(export.index), + ), + ExternalKind::Memory => ( + wasm_encoder::ExportKind::Memory, + map.memories.remap(export.index), + ), + ExternalKind::Global => ( + wasm_encoder::ExportKind::Global, + map.globals.remap(export.index), + ), + kind => bail!("unsupported export kind {kind:?}"), + }; + exports.export(export.name, kind, index); + } + ret.section(&exports); + + if let Some(start) = &start { + ret.section(start); + } + + if !code.is_empty() { + ret.section(&code); + } + + // Append a custom `name` section if one is found + let mut func_names = Vec::new(); + for (i, _func) in self.live_funcs() { + let name = match self.func_names.get(&i) { + Some(name) => name, + None => continue, + }; + func_names.push((map.funcs.remap(i), *name)); + } + if start.is_some() { + func_names.push((num_funcs, "initialize_stack_pointer")); + } + if !func_names.is_empty() { + let mut subsection = Vec::new(); + func_names.len().encode(&mut subsection); + for (i, name) in func_names { + i.encode(&mut subsection); + name.encode(&mut subsection); + } + let mut section = Vec::new(); + section.push(0x01); + subsection.encode(&mut section); + ret.section(&wasm_encoder::CustomSection { + name: "name", + data: §ion, + }); + } + + Ok(ret.finish()) + } +} + +// This helper macro is used to define a visitor of all instructions with +// special handling for all payloads of instructions to mark any referenced +// items live. +// +// Currently item identification happesn through the field name of the payload. +// While not exactly the most robust solution this should work well enough for +// now. +macro_rules! define_visit { + ($(@$p:ident $op:ident $({ $($arg:ident: $argty:ty),* })? => $visit:ident)*) => { + $( + fn $visit(&mut self, _offset: usize $(, $($arg: $argty),*)?) { + $( + $( + define_visit!(mark_live self $arg $arg); + )* + )? + } + )* + }; + + (mark_live $self:ident $arg:ident type_index) => {$self.live_types.insert($arg);}; + (mark_live $self:ident $arg:ident src_table) => {$self.table($arg);}; + (mark_live $self:ident $arg:ident dst_table) => {$self.table($arg);}; + (mark_live $self:ident $arg:ident table_index) => {$self.table($arg);}; + (mark_live $self:ident $arg:ident table) => {$self.table($arg);}; + (mark_live $self:ident $arg:ident global_index) => {$self.global($arg);}; + (mark_live $self:ident $arg:ident function_index) => {$self.func($arg);}; + (mark_live $self:ident $arg:ident mem) => {$self.memory($arg);}; + (mark_live $self:ident $arg:ident src_mem) => {$self.memory($arg);}; + (mark_live $self:ident $arg:ident dst_mem) => {$self.memory($arg);}; + (mark_live $self:ident $arg:ident memarg) => {$self.memory($arg.memory);}; + (mark_live $self:ident $arg:ident blockty) => {$self.blockty($arg);}; + (mark_live $self:ident $arg:ident lane) => {}; + (mark_live $self:ident $arg:ident lanes) => {}; + (mark_live $self:ident $arg:ident flags) => {}; + (mark_live $self:ident $arg:ident value) => {}; + (mark_live $self:ident $arg:ident segment) => {}; + (mark_live $self:ident $arg:ident mem_byte) => {}; + (mark_live $self:ident $arg:ident table_byte) => {}; + (mark_live $self:ident $arg:ident local_index) => {}; + (mark_live $self:ident $arg:ident relative_depth) => {}; + (mark_live $self:ident $arg:ident tag_index) => {}; + (mark_live $self:ident $arg:ident targets) => {}; + (mark_live $self:ident $arg:ident ty) => {}; +} + +impl<'a> VisitOperator<'a> for Module<'a> { + type Output = (); + + wasmparser::for_each_operator!(define_visit); +} + +/// Helper function to filter `iter` based on the `live` set, yielding an +/// iterator over the index of the item that's live as well as the item itself. +fn live_iter<'a, T>( + live: &'a BitVec, + iter: impl Iterator + 'a, +) -> impl Iterator + 'a { + iter.enumerate().filter_map(|(i, t)| { + let i = i as u32; + if live.contains(i) { + Some((i, t)) + } else { + None + } + }) +} + +#[derive(Default)] +struct Encoder { + types: Remap, + funcs: Remap, + memories: Remap, + globals: Remap, + tables: Remap, + buf: Vec, +} + +impl Encoder { + fn operators(&mut self, mut reader: BinaryReader<'_>) -> Result> { + assert!(self.buf.is_empty()); + while !reader.eof() { + reader.visit_operator(self)?; + } + Ok(mem::take(&mut self.buf)) + } + + fn memarg(&self, ty: MemArg) -> wasm_encoder::MemArg { + wasm_encoder::MemArg { + offset: ty.offset, + align: ty.align.into(), + memory_index: self.memories.remap(ty.memory), + } + } + + fn blockty(&self, ty: BlockType) -> wasm_encoder::BlockType { + match ty { + BlockType::Empty => wasm_encoder::BlockType::Empty, + BlockType::Type(ty) => wasm_encoder::BlockType::Result(valty(ty)), + BlockType::FuncType(ty) => wasm_encoder::BlockType::FunctionType(self.types.remap(ty)), + } + } +} + +fn unsupported_insn(x: &str) -> wasm_encoder::Instruction<'static> { + panic!("unsupported instruction {x}") +} + +// This is a helper macro to translate all `wasmparser` instructions to +// `wasm-encoder` instructions without having to list out every single +// instruction itself. +// +// The general goal of this macro is to have O(unique instruction payload) +// number of cases while also simultaneously adapting between the styles of +// wasmparser and wasm-encoder. +macro_rules! define_encode { + ($(@$p:ident $op:ident $({ $($arg:ident: $argty:ty),* })? => $visit:ident)*) => { + $( + fn $visit(&mut self, _offset: usize $(, $($arg: $argty),*)?) { + #[allow(unused_imports)] + use wasm_encoder::Instruction::*; + let insn = define_encode!(mk self $op $({ $($arg: $argty),* })?); + insn.encode(&mut self.buf); + } + )* + }; + + // No-payload instructions are named the same in wasmparser as they are in + // wasm-encoder + (mk $self:ident $op:ident) => ($op); + + // Instructions supported in wasmparser but not in wasm-encoder + (mk $self:ident ReturnCall $x:tt) => (unsupported_insn("ReturnCall")); + (mk $self:ident ReturnCallIndirect $x:tt) => (unsupported_insn("ReturnCallIndirect")); + + // Instructions which need "special care" to map from wasmparser to + // wasm-encoder + (mk $self:ident BrTable { $arg:ident: $ty:ty }) => ({ + let targets = $arg.targets().map(|i| i.unwrap()).collect::>(); + BrTable(targets.into(), $arg.default()) + }); + (mk $self:ident CallIndirect { $ty:ident: $a:ty, $table:ident: $b:ty, table_byte: $c:ty }) => ({ + CallIndirect { ty: $self.types.remap($ty), table: $self.tables.remap($table) } + }); + (mk $self:ident MemorySize { $mem:ident: $a:ty, mem_byte: $b:ty }) => ({ + MemorySize($self.memories.remap($mem)) + }); + (mk $self:ident MemoryGrow { $mem:ident: $a:ty, mem_byte: $b:ty }) => ({ + MemoryGrow($self.memories.remap($mem)) + }); + (mk self AtomicFence $x:tt) => (AtomicFence); + (mk self I32Const { $v:ident: $t:ty }) => (I32Const($v)); + (mk self I64Const { $v:ident: $t:ty }) => (I64Const($v)); + (mk self F32Const { $v:ident: $t:ty }) => (F32Const(f32::from_bits($v.bits()))); + (mk self F64Const { $v:ident: $t:ty }) => (F64Const(f64::from_bits($v.bits()))); + (mk self V128Const { $v:ident: $t:ty }) => (V128Const($v.i128())); + + // Catch-all for the translation of one payload argument which is typically + // represented as a tuple-enum in wasm-encoder. + (mk $self:ident $op:ident { $arg:ident: $t:ty }) => ($op(define_encode!(map $self $arg $arg))); + + // Catch-all of everything else where the wasmparser fields are simply + // translated to wasm-encoder fields. + (mk $self:ident $op:ident { $($arg:ident: $ty:ty),* }) => ($op { + $($arg: define_encode!(map $self $arg $arg)),* + }); + + // Individual cases of mapping one argument type to another, similar tot he + // `define_visit` macro above. + (map $self:ident $arg:ident memarg) => {$self.memarg($arg)}; + (map $self:ident $arg:ident blockty) => {$self.blockty($arg)}; + (map $self:ident $arg:ident tag_index) => {$arg}; + (map $self:ident $arg:ident relative_depth) => {$arg}; + (map $self:ident $arg:ident function_index) => {$self.funcs.remap($arg)}; + (map $self:ident $arg:ident global_index) => {$self.globals.remap($arg)}; + (map $self:ident $arg:ident mem) => {$self.memories.remap($arg)}; + (map $self:ident $arg:ident src_mem) => {$self.memories.remap($arg)}; + (map $self:ident $arg:ident dst_mem) => {$self.memories.remap($arg)}; + (map $self:ident $arg:ident table) => {$self.tables.remap($arg)}; + (map $self:ident $arg:ident src_table) => {$self.tables.remap($arg)}; + (map $self:ident $arg:ident dst_table) => {$self.tables.remap($arg)}; + (map $self:ident $arg:ident ty) => {valty($arg)}; + (map $self:ident $arg:ident local_index) => {$arg}; + (map $self:ident $arg:ident segment) => {$arg}; + (map $self:ident $arg:ident lane) => {$arg}; + (map $self:ident $arg:ident lanes) => {$arg}; +} + +impl<'a> VisitOperator<'a> for Encoder { + type Output = (); + + wasmparser::for_each_operator!(define_encode); +} + +fn valty(ty: wasmparser::ValType) -> wasm_encoder::ValType { + match ty { + wasmparser::ValType::I32 => wasm_encoder::ValType::I32, + wasmparser::ValType::I64 => wasm_encoder::ValType::I64, + wasmparser::ValType::F32 => wasm_encoder::ValType::F32, + wasmparser::ValType::F64 => wasm_encoder::ValType::F64, + wasmparser::ValType::V128 => wasm_encoder::ValType::V128, + wasmparser::ValType::FuncRef => wasm_encoder::ValType::FuncRef, + wasmparser::ValType::ExternRef => wasm_encoder::ValType::ExternRef, + } +} + +mod bitvec { + use std::mem; + + type T = u64; + + #[derive(Default)] + pub struct BitVec { + bits: Vec, + } + + impl BitVec { + pub fn insert(&mut self, idx: u32) -> bool { + let (idx, bit) = idx_bit(idx); + match self.bits.get_mut(idx) { + Some(bits) => { + if *bits & bit != 0 { + return false; + } + *bits |= bit; + } + None => { + self.bits.resize(idx + 1, 0); + self.bits[idx] = bit; + } + } + true + } + + pub fn contains(&self, idx: u32) -> bool { + let (idx, bit) = idx_bit(idx); + match self.bits.get(idx) { + Some(bits) => (*bits & bit) != 0, + None => false, + } + } + } + + fn idx_bit(idx: u32) -> (usize, T) { + let idx = idx as usize; + let size = mem::size_of::() * 8; + let index = idx / size; + let bit = 1 << (idx % size); + (index, bit) + } +} + +#[derive(Default)] +struct Remap { + map: Vec, + next: u32, +} + +impl Remap { + fn push(&mut self, idx: u32) { + self.map.resize(idx as usize, u32::MAX); + self.map.push(self.next); + self.next += 1; + } + + fn remap(&self, old: u32) -> u32 { + let ret = self.map[old as usize]; + assert!(ret != u32::MAX); + return ret; + } +} diff --git a/crates/wit-component/src/encoding.rs b/crates/wit-component/src/encoding.rs index 937233342..a2a520622 100644 --- a/crates/wit-component/src/encoding.rs +++ b/crates/wit-component/src/encoding.rs @@ -1,5 +1,5 @@ use crate::{ - validation::{expected_export_name, validate_module}, + validation::{expected_export_name, validate_module, ValidatedAdapter, ValidatedModule}, StringEncoding, }; use anyhow::{anyhow, bail, Context, Result}; @@ -471,33 +471,43 @@ impl<'a> TypeEncoder<'a> { fn encode_instance_imports( &mut self, interfaces: &'a [Interface], - required_imports: &IndexMap<&'a str, IndexSet<&'a str>>, + info: &ValidatedModule<'a>, imports: &mut ImportEncoder<'a>, ) -> Result<()> { for import in interfaces { - let required_funcs = match required_imports.get(import.name.as_str()) { + let required_funcs = match info.required_imports.get(import.name.as_str()) { Some(required) => required, None => continue, }; + self.encode_instance_import(import, required_funcs, imports)?; + } - Self::validate_interface(import)?; + Ok(()) + } - let mut instance = InstanceTypeEncoder::default(); + fn encode_instance_import( + &mut self, + import: &'a Interface, + required_funcs: &IndexSet<&'a str>, + imports: &mut ImportEncoder<'a>, + ) -> Result<()> { + Self::validate_interface(import)?; - for func in &import.functions { - if !required_funcs.contains(func.name.as_str()) { - continue; - } - Self::validate_function(func)?; + let mut instance = InstanceTypeEncoder::default(); - let index = self.encode_func_type(import, func, false)?; - instance.export(&func.name, ComponentTypeRef::Func(index))?; + for func in &import.functions { + if !required_funcs.contains(func.name.as_str()) { + continue; } + Self::validate_function(func)?; - let index = self.encode_instance_type(&instance.ty); - imports.import(import, ComponentTypeRef::Instance(index), required_funcs)?; + let index = self.encode_func_type(import, func, false)?; + instance.export(&func.name, ComponentTypeRef::Func(index))?; } + let index = self.encode_instance_type(&instance.ty); + imports.import(import, ComponentTypeRef::Instance(index), required_funcs)?; + Ok(()) } @@ -1076,7 +1086,7 @@ impl TypeContents { /// State relating to encoding a component. #[derive(Default)] -struct EncodingState { +struct EncodingState<'a> { /// The component being encoded. component: ComponentEncoding, /// The index into the core module index space for the inner core module. @@ -1103,9 +1113,18 @@ struct EncodingState { /// /// If `None`, then a fixup module has not yet been encoded. fixups_module_index: Option, + + /// A map of named adapter modules and the index that the module was defined + /// at. + adapter_modules: IndexMap<&'a str, u32>, + /// A map of adapter module instances and the index of their instance. + adapter_instances: IndexMap<&'a str, u32>, + /// A map of the index of the aliased realloc function for each adapter + /// module. + adapter_reallocs: IndexMap<&'a str, Option>, } -impl EncodingState { +impl<'a> EncodingState<'a> { fn encode_core_module(&mut self, module: &[u8]) -> u32 { assert!(self.module_index.is_none()); let ret = self.component.core_module_raw(module); @@ -1113,53 +1132,89 @@ impl EncodingState { ret } + fn encode_core_adapter_module(&mut self, name: &'a str, module: &[u8]) -> u32 { + let index = self.component.core_module_raw(module); + assert!(self.adapter_modules.insert(name, index).is_none()); + index + } + fn encode_core_instantiation( &mut self, encoding: StringEncoding, - imports: &ImportEncoder, - has_memory: bool, - has_realloc: bool, + imports: &ImportEncoder<'a>, + info: &ValidatedModule<'a>, ) -> Result<()> { if imports.map.is_empty() { - self.instantiate_core_module([], has_memory, has_realloc); + self.instantiate_core_module([], info); return Ok(()); } // Encode a shim instantiation if needed - self.encode_shim_instantiation(imports); + let shims = self.encode_shim_instantiation(imports, info); + + let mut args = Vec::new(); + for name in info.required_imports.keys() { + let index = self.import_instance_to_lowered_core_instance( + CustomModule::Main, + name, + imports, + &shims, + ); + args.push((*name, ModuleArg::Instance(index))); + } + for (name, funcs) in info.adapters_required.iter() { + let shim_instance = self + .shim_instance_index + .expect("shim should be instantiated"); + let mut exports = Vec::new(); + + for (func, _ty) in funcs { + let index = self.component.alias_core_item( + shim_instance, + ExportKind::Func, + &shims.adapter_shim_names[&(*name, *func)].clone(), + ); + exports.push((*func, ExportKind::Func, index)); + } - let args: Vec<_> = imports - .map - .iter() - .enumerate() - .map(|(instance_index, (name, import))| { - let mut exports = Vec::with_capacity(import.direct.len() + import.indirect.len()); - - for lowering in &import.indirect { - let index = self.component.alias_core_item( - self.shim_instance_index - .expect("shim should be instantiated"), - ExportKind::Func, - &lowering.export_name, - ); - exports.push((lowering.name, ExportKind::Func, index)); - } + let index = self.component.instantiate_core_exports(exports); + args.push((*name, ModuleArg::Instance(index))); + } - for lowering in &import.direct { - let func_index = self - .component - .alias_func(instance_index as u32, lowering.name); - let core_func_index = self.component.lower_func(func_index, []); - exports.push((lowering.name, ExportKind::Func, core_func_index)); - } + self.instantiate_core_module(args, info); + self.instantiate_adapter_modules(imports, info, &shims); + self.encode_indirect_lowerings(encoding, imports, shims) + } - let index = self.component.instantiate_core_exports(exports); - (*name, ModuleArg::Instance(index)) - }) - .collect(); + fn import_instance_to_lowered_core_instance( + &mut self, + for_module: CustomModule<'_>, + name: &str, + imports: &ImportEncoder<'_>, + shims: &Shims<'_>, + ) -> u32 { + let (instance_index, _, import) = imports.map.get_full(name).unwrap(); + let mut exports = Vec::with_capacity(import.direct.len() + import.indirect.len()); + + for (i, lowering) in import.indirect.iter().enumerate() { + let index = self.component.alias_core_item( + self.shim_instance_index + .expect("shim should be instantiated"), + ExportKind::Func, + &shims.indirect_lowering_names[&(for_module, name, i)], + ); + exports.push((lowering.name, ExportKind::Func, index)); + } - self.instantiate_core_module(args, has_memory, has_realloc); - self.encode_indirect_lowerings(encoding, imports) + for lowering in &import.direct { + let func_index = self + .component + .alias_func(instance_index as u32, lowering.name); + let core_func_index = self.component.lower_func(func_index, []); + exports.push((lowering.name, ExportKind::Func, core_func_index)); + } + + self.component.instantiate_core_exports(exports) } fn encode_imports(&mut self, imports: &ImportEncoder) { @@ -1168,11 +1223,11 @@ impl EncodingState { } } - fn encode_exports<'a>( + fn encode_exports<'b>( &mut self, encoding: StringEncoding, - exports: impl Iterator, - func_types: &IndexMap, u32>, + exports: impl Iterator, + func_types: &IndexMap, u32>, ) -> Result<()> { let core_instance_index = self.instance_index.expect("must be instantiated"); @@ -1239,9 +1294,51 @@ impl EncodingState { Ok(()) } - fn encode_shim_instantiation(&mut self, imports: &ImportEncoder) { - if imports.indirect_count == 0 { - return; + fn encode_shim_instantiation( + &mut self, + imports: &ImportEncoder<'a>, + info: &ValidatedModule<'a>, + ) -> Shims<'a> { + let mut signatures = Vec::new(); + let mut ret = Shims::default(); + for name in info.required_imports.keys() { + let import = &imports.map[name]; + ret.append_indirect(name, CustomModule::Main, import, &mut signatures); + } + + for (adapter, funcs) in info.adapters_required.iter() { + let info = &imports.adapters[adapter]; + if let Some(name) = info.required_import { + let import = &imports.map[name]; + ret.append_indirect( + name, + CustomModule::Adapter(adapter), + import, + &mut signatures, + ); + } + for (func, ty) in funcs { + let name = ret.list.len().to_string(); + log::debug!("shim {name} is adapter `{adapter}::{func}`"); + signatures.push(WasmSignature { + params: ty.params().iter().map(to_wasm_type).collect(), + results: ty.results().iter().map(to_wasm_type).collect(), + indirect_params: false, + retptr: false, + }); + ret.list.push(Shim { + name: name.clone(), + // Pessimistically assume that all adapters require memory + // in one form or another. While this isn't technically true + // it's true enough for WASI. + options: RequiredOptions::MEMORY, + kind: ShimKind::Adapter { adapter, func }, + }); + ret.adapter_shim_names.insert((adapter, func), name); + } + } + if ret.list.is_empty() { + return ret; } assert!(self.shim_instance_index.is_none()); @@ -1265,38 +1362,29 @@ impl EncodingState { let mut elements = ElementSection::new(); let mut func_indexes = Vec::new(); - let mut func_index = 0; - for import in imports.map.values() { - for lowering in &import.indirect { - let type_index = *sigs.entry(&lowering.sig).or_insert_with(|| { - let index = types.len(); - types.function( - lowering.sig.params.iter().map(to_val_type), - lowering.sig.results.iter().map(to_val_type), - ); - index - }); - - functions.function(type_index); - Self::encode_shim_function( - type_index, - func_index, - &mut code, - lowering.sig.params.len() as u32, + for (i, (sig, shim)) in signatures.iter().zip(&ret.list).enumerate() { + let i = i as u32; + let type_index = *sigs.entry(sig).or_insert_with(|| { + let index = types.len(); + types.function( + sig.params.iter().map(to_val_type), + sig.results.iter().map(to_val_type), ); - exports.export(&lowering.export_name, ExportKind::Func, func_index); + index + }); - imports_section.import("", &lowering.export_name, EntityType::Function(type_index)); - func_indexes.push(func_index); + functions.function(type_index); + Self::encode_shim_function(type_index, i, &mut code, sig.params.len() as u32); + exports.export(&shim.name, ExportKind::Func, i); - func_index += 1; - } + imports_section.import("", &shim.name, EntityType::Function(type_index)); + func_indexes.push(i); } let table_type = TableType { element_type: ValType::FuncRef, - minimum: func_index, - maximum: Some(func_index), + minimum: signatures.len() as u32, + maximum: Some(signatures.len() as u32), }; tables.table(table_type); @@ -1326,6 +1414,18 @@ impl EncodingState { let shim_module_index = self.component.core_module(&shim); self.fixups_module_index = Some(self.component.core_module(&fixups)); self.shim_instance_index = Some(self.component.instantiate(shim_module_index, [])); + + return ret; + + fn to_wasm_type(ty: &wasmparser::ValType) -> WasmType { + match ty { + wasmparser::ValType::I32 => WasmType::I32, + wasmparser::ValType::I64 => WasmType::I64, + wasmparser::ValType::F32 => WasmType::F32, + wasmparser::ValType::F64 => WasmType::F64, + _ => unreachable!(), + } + } } fn encode_shim_function( @@ -1351,8 +1451,9 @@ impl EncodingState { &mut self, encoding: StringEncoding, imports: &ImportEncoder, + shims: Shims<'_>, ) -> Result<()> { - if imports.indirect_count == 0 { + if shims.list.is_empty() { return Ok(()); } @@ -1369,25 +1470,38 @@ impl EncodingState { let mut exports = Vec::with_capacity(imports.indirect_count as usize); exports.push((INDIRECT_TABLE_NAME, ExportKind::Table, table_index)); - for (instance_index, import) in imports.map.values().enumerate() { - for lowering in &import.indirect { - let func_index = self - .component - .alias_func(instance_index as u32, lowering.name); - - let core_func_index = self.component.lower_func( - func_index, - lowering - .options - .into_iter(encoding, self.memory_index, self.realloc_index)?, - ); + for shim in shims.list.iter() { + let core_func_index = match &shim.kind { + ShimKind::IndirectLowering { + interface, + indirect_index, + realloc, + } => { + let (instance_index, _, interface) = imports.map.get_full(interface).unwrap(); + let func_index = self.component.alias_func( + instance_index as u32, + &interface.indirect[*indirect_index].name, + ); - exports.push(( - lowering.export_name.as_str(), + let realloc = match realloc { + CustomModule::Main => self.realloc_index, + CustomModule::Adapter(name) => self.adapter_reallocs[name], + }; + + self.component.lower_func( + func_index, + shim.options + .into_iter(encoding, self.memory_index, realloc)?, + ) + } + ShimKind::Adapter { adapter, func } => self.component.alias_core_item( + self.adapter_instances[adapter], ExportKind::Func, - core_func_index, - )); - } + func, + ), + }; + + exports.push((shim.name.as_str(), ExportKind::Func, core_func_index)); } let instance_index = self.component.instantiate_core_exports(exports); @@ -1398,9 +1512,9 @@ impl EncodingState { Ok(()) } - fn instantiate_core_module<'a, A>(&mut self, args: A, has_memory: bool, has_realloc: bool) + fn instantiate_core_module<'b, A>(&mut self, args: A, info: &ValidatedModule<'_>) where - A: IntoIterator, + A: IntoIterator, A::IntoIter: ExactSizeIterator, { assert!(self.instance_index.is_none()); @@ -1409,7 +1523,7 @@ impl EncodingState { .component .instantiate(self.module_index.expect("core module encoded"), args); - if has_memory { + if info.has_memory { self.memory_index = Some(self.component.alias_core_item( instance_index, ExportKind::Memory, @@ -1417,7 +1531,7 @@ impl EncodingState { )); } - if has_realloc { + if info.has_realloc { self.realloc_index = Some(self.component.alias_core_item( instance_index, ExportKind::Func, @@ -1427,6 +1541,108 @@ impl EncodingState { self.instance_index = Some(instance_index); } + + fn instantiate_adapter_modules( + &mut self, + imports: &ImportEncoder<'a>, + info: &ValidatedModule<'a>, + shims: &Shims<'_>, + ) { + for name in info.adapters_required.keys() { + let info = &imports.adapters[name]; + let mut args = Vec::new(); + if let Some((module, name)) = &info.needs_memory { + let memory = self.memory_index.unwrap(); + let instance = self.component.instantiate_core_exports([( + name.as_str(), + ExportKind::Memory, + memory, + )]); + args.push((module.as_str(), ModuleArg::Instance(instance))); + } + if let Some(import_name) = info.required_import { + let instance = self.import_instance_to_lowered_core_instance( + CustomModule::Adapter(name), + import_name, + imports, + shims, + ); + args.push((import_name, ModuleArg::Instance(instance))); + } + let instance = self.component.instantiate(self.adapter_modules[name], args); + self.adapter_instances.insert(name, instance); + let realloc = if info.has_realloc { + Some( + self.component + .alias_core_item(instance, ExportKind::Func, "cabi_realloc"), + ) + } else { + None + }; + self.adapter_reallocs.insert(name, realloc); + } + } +} + +#[derive(Default)] +struct Shims<'a> { + list: Vec>, + indirect_lowering_names: IndexMap<(CustomModule<'a>, &'a str, usize), String>, + adapter_shim_names: IndexMap<(&'a str, &'a str), String>, +} + +struct Shim<'a> { + options: RequiredOptions, + name: String, + kind: ShimKind<'a>, +} + +enum ShimKind<'a> { + IndirectLowering { + interface: &'a str, + indirect_index: usize, + realloc: CustomModule<'a>, + }, + Adapter { + adapter: &'a str, + func: &'a str, + }, +} + +#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] +enum CustomModule<'a> { + Main, + Adapter(&'a str), +} + +impl<'a> Shims<'a> { + fn append_indirect( + &mut self, + name: &'a str, + for_module: CustomModule<'a>, + import: &ImportedInterface<'a>, + sigs: &mut Vec, + ) { + for (indirect_index, lowering) in import.indirect.iter().enumerate() { + let shim_name = self.list.len().to_string(); + log::debug!( + "shim {shim_name} is import `{name}` lowering {indirect_index} `{}`", + lowering.name + ); + sigs.push(lowering.sig.clone()); + self.list.push(Shim { + name: shim_name.clone(), + options: lowering.options, + kind: ShimKind::IndirectLowering { + interface: name, + indirect_index, + realloc: for_module, + }, + }); + self.indirect_lowering_names + .insert((for_module, name, indirect_index), shim_name); + } + } } #[derive(Debug)] @@ -1439,7 +1655,6 @@ struct IndirectLowering<'a> { name: &'a str, sig: WasmSignature, options: RequiredOptions, - export_name: String, } #[derive(Debug)] @@ -1635,6 +1850,7 @@ fn inc(idx: &mut u32) -> u32 { #[derive(Debug, Default)] struct ImportEncoder<'a> { map: IndexMap<&'a str, ImportedInterface<'a>>, + adapters: IndexMap<&'a str, ValidatedAdapter<'a>>, direct_count: u32, indirect_count: u32, } @@ -1665,14 +1881,12 @@ impl<'a> ImportEncoder<'a> { self.direct_count += 1; direct.push(DirectLowering { name: &f.name }); } else { - let element_index = self.indirect_count; let sig = interface.wasm_signature(AbiVariant::GuestImport, f); self.indirect_count += 1; indirect.push(IndirectLowering { name: &f.name, sig, options, - export_name: element_index.to_string(), }); } } @@ -1699,6 +1913,7 @@ pub struct ComponentEncoder<'a> { exports: &'a [Interface], validate: bool, types_only: bool, + adapters: IndexMap<&'a str, (&'a [u8], &'a Interface)>, } impl<'a> ComponentEncoder<'a> { @@ -1738,12 +1953,41 @@ impl<'a> ComponentEncoder<'a> { self } + /// Specifies a new adapter which is used to translate from a historical + /// wasm ABI to the canonical ABI and the `interface` provided. + /// + /// This is primarily used to polyfill, for example, + /// `wasi_snapshot_preview1` with a component-model using interface. The + /// `name` provided is the module name of the adapter that is being + /// polyfilled, for example `"wasi_snapshot_preview1"`. + /// + /// The `bytes` provided is a core wasm module which implements the `name` + /// interface in terms of the `interface` interface. This core wasm module + /// is severely restricted in its shape, for example it cannot have any data + /// segments or element segments. + /// + /// The `interface` provided is the component-model-using-interface that the + /// wasm module specified by `bytes` imports. The `bytes` will then import + /// `interface` and export functions to get imported from the module `name` + /// in the core wasm that's being wrapped. + pub fn adapter(mut self, name: &'a str, bytes: &'a [u8], interface: &'a Interface) -> Self { + self.adapters.insert(name, (bytes, interface)); + self + } + /// Encode the component and return the bytes. pub fn encode(&self) -> Result> { - let (required_imports, has_memory, has_realloc) = if !self.module.is_empty() { - validate_module(self.module, &self.interface, self.imports, self.exports)? + let info = if !self.module.is_empty() { + let adapters = self.adapters.keys().copied().collect::>(); + validate_module( + self.module, + &self.interface, + self.imports, + self.exports, + &adapters, + )? } else { - (Default::default(), false, false) + Default::default() }; let exports = self @@ -1757,26 +2001,38 @@ impl<'a> ComponentEncoder<'a> { let mut types = TypeEncoder::default(); let mut imports = ImportEncoder::default(); types.encode_func_types(exports.clone(), false)?; - types.encode_instance_imports(self.imports, &required_imports, &mut imports)?; - types.finish(&mut state.component); + types.encode_instance_imports(self.imports, &info, &mut imports)?; if self.types_only { if !self.module.is_empty() { bail!("a module cannot be specified for a types-only encoding"); } + types.finish(&mut state.component); } else { if self.module.is_empty() { bail!("a module is required when encoding a component"); } + for (name, required) in info.adapters_required.iter() { + let (wasm, interface) = &self.adapters[*name]; + let (min_wasm, info) = crate::adapter::adapt(wasm, interface, required)?; + state.encode_core_adapter_module(name, &min_wasm); + types.encode_instance_import(interface, &info.required_funcs, &mut imports)?; + imports.adapters.insert(name, info); + } + + types.finish(&mut state.component); + state.encode_imports(&imports); state.encode_core_module(self.module); - state.encode_core_instantiation(self.encoding, &imports, has_memory, has_realloc)?; + state.encode_core_instantiation(self.encoding, &imports, &info)?; state.encode_exports(self.encoding, exports, &types.func_type_map)?; } let bytes = state.component.finish(); + println!("{}", wasmprinter::print_bytes(&bytes).unwrap()); + if self.validate { let mut validator = Validator::new_with_features(WasmFeatures { component_model: true, diff --git a/crates/wit-component/src/lib.rs b/crates/wit-component/src/lib.rs index 2036fab43..a95eebcb3 100644 --- a/crates/wit-component/src/lib.rs +++ b/crates/wit-component/src/lib.rs @@ -7,6 +7,7 @@ use std::str::FromStr; use wasm_encoder::CanonicalOption; use wit_parser::Interface; +mod adapter; #[cfg(feature = "cli")] pub mod cli; mod decoding; diff --git a/crates/wit-component/src/validation.rs b/crates/wit-component/src/validation.rs index 9533b7db9..8f5d92037 100644 --- a/crates/wit-component/src/validation.rs +++ b/crates/wit-component/src/validation.rs @@ -10,10 +10,6 @@ use wit_parser::{ Interface, }; -fn is_wasi(name: &str) -> bool { - name == "wasi_unstable" || name == "wasi_snapshot_preview1" -} - fn is_canonical_function(name: &str) -> bool { name.starts_with("cabi_") } @@ -43,6 +39,14 @@ fn wasm_sig_to_func_type(signature: WasmSignature) -> FuncType { ) } +#[derive(Default)] +pub struct ValidatedModule<'a> { + pub required_imports: IndexMap<&'a str, IndexSet<&'a str>>, + pub has_memory: bool, + pub has_realloc: bool, + pub adapters_required: IndexMap<&'a str, IndexMap<&'a str, FuncType>>, +} + /// This function validates the following: /// * The bytes represent a core WebAssembly module. /// * The module's imports are all satisfied by the given import interfaces. @@ -55,7 +59,8 @@ pub fn validate_module<'a>( interface: &Option<&Interface>, imports: &[Interface], exports: &[Interface], -) -> Result<(IndexMap<&'a str, IndexSet<&'a str>>, bool, bool)> { + adapters: &IndexSet<&str>, +) -> Result> { let imports: IndexMap<&str, &Interface> = imports.iter().map(|i| (i.name.as_str(), i)).collect(); let exports: IndexMap<&str, &Interface> = @@ -65,8 +70,7 @@ pub fn validate_module<'a>( let mut types = None; let mut import_funcs = IndexMap::new(); let mut export_funcs = IndexMap::new(); - let mut has_memory = false; - let mut has_realloc = false; + let mut ret = ValidatedModule::default(); for payload in Parser::new(0).parse_all(bytes) { let payload = payload?; @@ -82,9 +86,6 @@ pub fn validate_module<'a>( Payload::ImportSection(s) => { for import in s { let import = import?; - if is_wasi(import.module) { - continue; - } match import.ty { TypeRef::Func(ty) => { let map = match import_funcs.entry(import.module) { @@ -107,7 +108,7 @@ pub fn validate_module<'a>( if is_canonical_function(export.name) { if export.name == "cabi_realloc" { // TODO: validate that the cabi_realloc function is [i32, i32, i32, i32] -> [i32] - has_realloc = true; + ret.has_realloc = true; } continue; } @@ -116,7 +117,7 @@ pub fn validate_module<'a>( } ExternalKind::Memory => { if export.name == "memory" { - has_memory = true; + ret.has_memory = true; } } _ => continue, @@ -137,6 +138,16 @@ pub fn validate_module<'a>( match imports.get(name) { Some(interface) => { validate_imported_interface(interface, name, funcs, &types)?; + let funcs = funcs.into_iter().map(|(f, _ty)| *f).collect(); + let prev = ret.required_imports.insert(name, funcs); + assert!(prev.is_none()); + } + None if adapters.contains(name) => { + let map = ret.adapters_required.entry(name).or_insert(IndexMap::new()); + for (func, ty) in funcs { + let ty = types.func_type_at(*ty).unwrap(); + map.insert(func, ty.clone()); + } } None => bail!("module requires an import interface named `{}`", name), } @@ -154,22 +165,144 @@ pub fn validate_module<'a>( validate_exported_interface(interface, Some(name), &export_funcs, &types)?; } - Ok(( - import_funcs - .into_iter() - .map(|(name, funcs)| (name, funcs.into_iter().map(|(f, _ty)| f).collect())) - .collect(), - has_memory, - has_realloc, - )) + Ok(ret) } -fn validate_imported_interface( - interface: &Interface, +#[derive(Default, Debug)] +pub struct ValidatedAdapter<'a> { + pub required_funcs: IndexSet<&'a str>, + pub required_import: Option<&'a str>, + pub needs_memory: Option<(String, String)>, + pub has_realloc: bool, +} + +/// TODO +pub fn validate_adapter_module<'a>( + bytes: &[u8], + interface: &'a Interface, + required: &IndexMap<&str, FuncType>, +) -> Result> { + let mut validator = Validator::new(); + let mut import_funcs = IndexMap::new(); + let mut export_funcs = IndexMap::new(); + let mut types = None; + let mut funcs = Vec::new(); + let mut ret = ValidatedAdapter::default(); + + for payload in Parser::new(0).parse_all(bytes) { + let payload = payload?; + match validator.payload(&payload)? { + ValidPayload::End(tys) => { + types = Some(tys); + break; + } + ValidPayload::Func(validator, body) => { + funcs.push((validator, body)); + } + _ => {} + } + + match payload { + Payload::Version { encoding, .. } if encoding != Encoding::Module => { + bail!("data is not a WebAssembly module"); + } + + Payload::ImportSection(s) => { + for import in s { + let import = import?; + match import.ty { + TypeRef::Func(ty) => { + let map = match import_funcs.entry(import.module) { + Entry::Occupied(e) => e.into_mut(), + Entry::Vacant(e) => e.insert(IndexMap::new()), + }; + + assert!(map.insert(import.name, ty).is_none()); + } + + // A memory is allowed to be imported into the adapter + // module so that's skipped here + TypeRef::Memory(_) => { + ret.needs_memory = + Some((import.module.to_string(), import.name.to_string())); + } + + _ => { + bail!("adapter module is only allowed to import functions and memories") + } + } + } + } + Payload::ExportSection(s) => { + for export in s { + let export = export?; + + match export.kind { + ExternalKind::Func => { + export_funcs.insert(export.name, export.index); + if export.name == "cabi_realloc" { + ret.has_realloc = true; + } + } + _ => continue, + } + } + } + _ => continue, + } + } + + let mut resources = Default::default(); + for (validator, body) in funcs { + let mut validator = validator.into_validator(resources); + validator.validate(&body)?; + resources = validator.into_allocations(); + } + + let types = types.unwrap(); + for (name, funcs) in &import_funcs { + if *name != interface.name { + bail!( + "adapter module imports from `{name}` which does not match \ + its interface `{}`", + interface.name + ); + } + ret.required_funcs = validate_imported_interface(interface, name, funcs, &types)?; + ret.required_import = Some(interface.name.as_str()); + } + + for (name, ty) in required { + let idx = match export_funcs.get(name) { + Some(idx) => *idx, + None => bail!("adapter module did not export `{name}`"), + }; + let actual = types.function_at(idx).unwrap(); + if ty == actual { + continue; + } + bail!( + "adapter module export `{name}` does not match the expected signature:\n\ + expected: {:?} -> {:?}\n\ + actual: {:?} -> {:?}\n\ + ", + ty.params(), + ty.results(), + actual.params(), + actual.results(), + ); + } + + Ok(ret) +} + +fn validate_imported_interface<'a>( + interface: &'a Interface, name: &str, imports: &IndexMap<&str, u32>, types: &Types, -) -> Result<()> { +) -> Result> { + let mut funcs = IndexSet::new(); for (func_name, ty) in imports { let f = interface .functions @@ -187,18 +320,20 @@ fn validate_imported_interface( let ty = types.func_type_at(*ty).unwrap(); if ty != &expected { bail!( - "type mismatch for function `{}` on imported interface `{}`: expected `{:?} -> {:?}` but found `{:?} -> {:?}`", - func_name, - name, - expected.params(), - expected.results(), - ty.params(), - ty.results() - ); + "type mismatch for function `{}` on imported interface `{}`: expected `{:?} -> {:?}` but found `{:?} -> {:?}`", + f.name, + name, + expected.params(), + expected.results(), + ty.params(), + ty.results() + ); } + + funcs.insert(f.name.as_str()); } - Ok(()) + Ok(funcs) } fn validate_exported_interface( diff --git a/crates/wit-component/tests/components.rs b/crates/wit-component/tests/components.rs index c95429327..ded443b0b 100644 --- a/crates/wit-component/tests/components.rs +++ b/crates/wit-component/tests/components.rs @@ -27,6 +27,32 @@ fn read_interfaces(dir: &Path, pattern: &str) -> Result> { .collect::>() } +fn read_adapters(dir: &Path) -> Result, Interface)>> { + glob::glob(dir.join("adapt-*.wat").to_str().unwrap())? + .map(|p| { + let p = p?; + let adapter = + wat::parse_file(&p).with_context(|| format!("expected file `{}`", p.display()))?; + let stem = p.file_stem().unwrap().to_str().unwrap(); + let glob = format!("{stem}-import-*.wit"); + let wit = match glob::glob(dir.join(&glob).to_str().unwrap())?.next() { + Some(path) => path?, + None => bail!("failed to find `{glob}` match"), + }; + let mut i = read_interface(&wit)?; + i.name = wit + .file_stem() + .unwrap() + .to_str() + .unwrap() + .trim_start_matches(stem) + .trim_start_matches("-import-") + .to_string(); + Ok((stem.trim_start_matches("adapt-").to_string(), adapter, i)) + }) + .collect::>() +} + /// Tests the encoding of components. /// /// This test looks in the `components/` directory for test cases. @@ -53,6 +79,8 @@ fn read_interfaces(dir: &Path, pattern: &str) -> Result> { /// either `component.wat` or `error.txt` depending on the outcome of the encoding. #[test] fn component_encoding() -> Result<()> { + drop(env_logger::try_init()); + for entry in fs::read_dir("tests/components")? { let path = entry?.path(); if !path.is_dir() { @@ -74,6 +102,7 @@ fn component_encoding() -> Result<()> { .transpose()?; let imports = read_interfaces(&path, "import-*.wit")?; let exports = read_interfaces(&path, "export-*.wit")?; + let adapters = read_adapters(&path)?; let mut encoder = ComponentEncoder::default() .module(&module) @@ -81,6 +110,10 @@ fn component_encoding() -> Result<()> { .exports(&exports) .validate(true); + for (name, wasm, interface) in adapters.iter() { + encoder = encoder.adapter(name, wasm, interface); + } + if let Some(interface) = &interface { encoder = encoder.interface(interface); } diff --git a/crates/wit-component/tests/components/adapt-list-return/adapt-old-import-new.wit b/crates/wit-component/tests/components/adapt-list-return/adapt-old-import-new.wit new file mode 100644 index 000000000..494fd104a --- /dev/null +++ b/crates/wit-component/tests/components/adapt-list-return/adapt-old-import-new.wit @@ -0,0 +1 @@ +read: func() -> list diff --git a/crates/wit-component/tests/components/adapt-list-return/adapt-old.wat b/crates/wit-component/tests/components/adapt-list-return/adapt-old.wat new file mode 100644 index 000000000..560563352 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-list-return/adapt-old.wat @@ -0,0 +1,14 @@ +(module + (import "new" "read" (func $read (param i32))) + (import "env" "memory" (memory 0)) + + (func (export "read") (param i32 i32) + i32.const 8 + call $read + unreachable + ) + + (func (export "cabi_realloc") (param i32 i32 i32 i32) (result i32) + unreachable + ) +) diff --git a/crates/wit-component/tests/components/adapt-list-return/component.wat b/crates/wit-component/tests/components/adapt-list-return/component.wat new file mode 100644 index 000000000..8813f7060 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-list-return/component.wat @@ -0,0 +1,92 @@ +(component + (core module (;0;) + (type (;0;) (func (param i32))) + (type (;1;) (func (param i32 i32))) + (type (;2;) (func (param i32 i32 i32 i32) (result i32))) + (import "new" "read" (func $read (;0;) (type 0))) + (func (;1;) (type 1) (param i32 i32) + i32.const 8 + call $read + unreachable + ) + (func (;2;) (type 2) (param i32 i32 i32 i32) (result i32) + unreachable + ) + (export "read" (func 1)) + (export "cabi_realloc" (func 2)) + ) + (type (;0;) (list u8)) + (type (;1;) (func (result 0))) + (type (;2;) + (instance + (alias outer 1 1 (type (;0;))) + (export "read" (func (type 0))) + ) + ) + (import "new" (instance (;0;) (type 2))) + (core module (;1;) + (type (;0;) (func (param i32 i32))) + (import "old" "read" (func (;0;) (type 0))) + (memory (;0;) 1) + (export "memory" (memory 0)) + ) + (core module (;2;) + (type (;0;) (func (param i32))) + (type (;1;) (func (param i32 i32))) + (func (;0;) (type 0) (param i32) + local.get 0 + i32.const 0 + call_indirect (type 0) + ) + (func (;1;) (type 1) (param i32 i32) + local.get 0 + local.get 1 + i32.const 1 + call_indirect (type 1) + ) + (table (;0;) 2 2 funcref) + (export "0" (func 0)) + (export "1" (func 1)) + (export "$imports" (table 0)) + ) + (core module (;3;) + (type (;0;) (func (param i32))) + (type (;1;) (func (param i32 i32))) + (import "" "0" (func (;0;) (type 0))) + (import "" "1" (func (;1;) (type 1))) + (import "" "$imports" (table (;0;) 2 2 funcref)) + (elem (;0;) (i32.const 0) func 0 1) + ) + (core instance (;0;) (instantiate 2)) + (alias core export 0 "1" (core func (;0;))) + (core instance (;1;) + (export "read" (func 0)) + ) + (core instance (;2;) (instantiate 1 + (with "old" (instance 1)) + ) + ) + (alias core export 2 "memory" (core memory (;0;))) + (alias core export 0 "0" (core func (;1;))) + (core instance (;3;) + (export "read" (func 1)) + ) + (core instance (;4;) (instantiate 0 + (with "new" (instance 3)) + ) + ) + (alias core export 4 "cabi_realloc" (core func (;2;))) + (alias core export 0 "$imports" (core table (;0;))) + (alias export 0 "read" (func (;0;))) + (core func (;3;) (canon lower (func 0) (memory 0) (realloc 2))) + (alias core export 4 "read" (core func (;4;))) + (core instance (;5;) + (export "$imports" (table 0)) + (export "0" (func 3)) + (export "1" (func 4)) + ) + (core instance (;6;) (instantiate 3 + (with "" (instance 5)) + ) + ) +) \ No newline at end of file diff --git a/crates/wit-component/tests/components/adapt-list-return/module.wat b/crates/wit-component/tests/components/adapt-list-return/module.wat new file mode 100644 index 000000000..e68c95b08 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-list-return/module.wat @@ -0,0 +1,4 @@ +(module + (import "old" "read" (func (param i32 i32))) + (memory (export "memory") 1) +) diff --git a/crates/wit-component/tests/components/adapt-memory-simple/adapt-old-import-new.wit b/crates/wit-component/tests/components/adapt-memory-simple/adapt-old-import-new.wit new file mode 100644 index 000000000..7f5b4d13d --- /dev/null +++ b/crates/wit-component/tests/components/adapt-memory-simple/adapt-old-import-new.wit @@ -0,0 +1 @@ +log: func(s: string) diff --git a/crates/wit-component/tests/components/adapt-memory-simple/adapt-old.wat b/crates/wit-component/tests/components/adapt-memory-simple/adapt-old.wat new file mode 100644 index 000000000..deb5d349d --- /dev/null +++ b/crates/wit-component/tests/components/adapt-memory-simple/adapt-old.wat @@ -0,0 +1,4 @@ +(module + (import "new" "log" (func $log (param i32 i32))) + (export "log" (func $log)) +) diff --git a/crates/wit-component/tests/components/adapt-memory-simple/component.wat b/crates/wit-component/tests/components/adapt-memory-simple/component.wat new file mode 100644 index 000000000..e2799b89f --- /dev/null +++ b/crates/wit-component/tests/components/adapt-memory-simple/component.wat @@ -0,0 +1,78 @@ +(component + (core module (;0;) + (type (;0;) (func (param i32 i32))) + (import "new" "log" (func $log (;0;) (type 0))) + (export "log" (func $log)) + ) + (type (;0;) (func (param "s" string))) + (type (;1;) + (instance + (alias outer 1 0 (type (;0;))) + (export "log" (func (type 0))) + ) + ) + (import "new" (instance (;0;) (type 1))) + (core module (;1;) + (type (;0;) (func (param i32 i32))) + (import "old" "log" (func (;0;) (type 0))) + (memory (;0;) 1) + (export "memory" (memory 0)) + ) + (core module (;2;) + (type (;0;) (func (param i32 i32))) + (func (;0;) (type 0) (param i32 i32) + local.get 0 + local.get 1 + i32.const 0 + call_indirect (type 0) + ) + (func (;1;) (type 0) (param i32 i32) + local.get 0 + local.get 1 + i32.const 1 + call_indirect (type 0) + ) + (table (;0;) 2 2 funcref) + (export "0" (func 0)) + (export "1" (func 1)) + (export "$imports" (table 0)) + ) + (core module (;3;) + (type (;0;) (func (param i32 i32))) + (import "" "0" (func (;0;) (type 0))) + (import "" "1" (func (;1;) (type 0))) + (import "" "$imports" (table (;0;) 2 2 funcref)) + (elem (;0;) (i32.const 0) func 0 1) + ) + (core instance (;0;) (instantiate 2)) + (alias core export 0 "1" (core func (;0;))) + (core instance (;1;) + (export "log" (func 0)) + ) + (core instance (;2;) (instantiate 1 + (with "old" (instance 1)) + ) + ) + (alias core export 2 "memory" (core memory (;0;))) + (alias core export 0 "0" (core func (;1;))) + (core instance (;3;) + (export "log" (func 1)) + ) + (core instance (;4;) (instantiate 0 + (with "new" (instance 3)) + ) + ) + (alias core export 0 "$imports" (core table (;0;))) + (alias export 0 "log" (func (;0;))) + (core func (;2;) (canon lower (func 0) (memory 0) string-encoding=utf8)) + (alias core export 4 "log" (core func (;3;))) + (core instance (;5;) + (export "$imports" (table 0)) + (export "0" (func 2)) + (export "1" (func 3)) + ) + (core instance (;6;) (instantiate 3 + (with "" (instance 5)) + ) + ) +) \ No newline at end of file diff --git a/crates/wit-component/tests/components/adapt-memory-simple/module.wat b/crates/wit-component/tests/components/adapt-memory-simple/module.wat new file mode 100644 index 000000000..5bdcb0eaf --- /dev/null +++ b/crates/wit-component/tests/components/adapt-memory-simple/module.wat @@ -0,0 +1,4 @@ +(module + (import "old" "log" (func (param i32 i32))) + (memory (export "memory") 1) +) diff --git a/crates/wit-component/tests/components/adapt-missing-memory/adapt-old-import-new.wit b/crates/wit-component/tests/components/adapt-missing-memory/adapt-old-import-new.wit new file mode 100644 index 000000000..7f5b4d13d --- /dev/null +++ b/crates/wit-component/tests/components/adapt-missing-memory/adapt-old-import-new.wit @@ -0,0 +1 @@ +log: func(s: string) diff --git a/crates/wit-component/tests/components/adapt-missing-memory/adapt-old.wat b/crates/wit-component/tests/components/adapt-missing-memory/adapt-old.wat new file mode 100644 index 000000000..deb5d349d --- /dev/null +++ b/crates/wit-component/tests/components/adapt-missing-memory/adapt-old.wat @@ -0,0 +1,4 @@ +(module + (import "new" "log" (func $log (param i32 i32))) + (export "log" (func $log)) +) diff --git a/crates/wit-component/tests/components/adapt-missing-memory/error.txt b/crates/wit-component/tests/components/adapt-missing-memory/error.txt new file mode 100644 index 000000000..33946ad75 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-missing-memory/error.txt @@ -0,0 +1 @@ +module does not export a memory named `memory` \ No newline at end of file diff --git a/crates/wit-component/tests/components/adapt-missing-memory/module.wat b/crates/wit-component/tests/components/adapt-missing-memory/module.wat new file mode 100644 index 000000000..1a705ff1a --- /dev/null +++ b/crates/wit-component/tests/components/adapt-missing-memory/module.wat @@ -0,0 +1,3 @@ +(module + (import "old" "log" (func (param i32 i32))) +) diff --git a/crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1-import-my_wasi.wit b/crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1-import-my_wasi.wit new file mode 100644 index 000000000..34edd66dc --- /dev/null +++ b/crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1-import-my_wasi.wit @@ -0,0 +1,6 @@ +// This is the interface imported by the `adapt-*.wat` file which is used +// to implement the `wasi_snapshot_preview1` interface. + +random-get: func(size: u32) -> list +proc-exit: func(code: u32) +something-not-used: func() diff --git a/crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1.wat b/crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1.wat new file mode 100644 index 000000000..b5a214409 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-preview1/adapt-wasi_snapshot_preview1.wat @@ -0,0 +1,13 @@ +;; this is a polyfill module that translates from wasi-preview1 to a different +;; interface + +(module + (import "my_wasi" "proc-exit" (func $proc_exit (param i32))) + (func (export "proc_exit") (param i32) + local.get 0 + call $proc_exit + ) + (func (export "random_get") (param i32 i32) (result i32) + i32.const 0) + (func (export "something_else")) +) diff --git a/crates/wit-component/tests/components/adapt-preview1/component.wat b/crates/wit-component/tests/components/adapt-preview1/component.wat new file mode 100644 index 000000000..0e74a9222 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-preview1/component.wat @@ -0,0 +1,108 @@ +(component + (core module (;0;) + (type (;0;) (func (param i32))) + (type (;1;) (func (param i32 i32) (result i32))) + (import "my_wasi" "proc-exit" (func $proc_exit (;0;) (type 0))) + (func (;1;) (type 0) (param i32) + local.get 0 + call $proc_exit + ) + (func (;2;) (type 1) (param i32 i32) (result i32) + i32.const 0 + ) + (export "proc_exit" (func 1)) + (export "random_get" (func 2)) + ) + (type (;0;) (func)) + (type (;1;) + (instance + (alias outer 1 0 (type (;0;))) + (export "foo" (func (type 0))) + ) + ) + (type (;2;) (func (param "code" u32))) + (type (;3;) + (instance + (alias outer 1 2 (type (;0;))) + (export "proc-exit" (func (type 0))) + ) + ) + (import "foo" (instance (;0;) (type 1))) + (import "my_wasi" (instance (;1;) (type 3))) + (core module (;1;) + (type (;0;) (func)) + (type (;1;) (func (param i32))) + (type (;2;) (func (param i32 i32) (result i32))) + (import "foo" "foo" (func (;0;) (type 0))) + (import "wasi_snapshot_preview1" "proc_exit" (func (;1;) (type 1))) + (import "wasi_snapshot_preview1" "random_get" (func (;2;) (type 2))) + (memory (;0;) 1) + (export "memory" (memory 0)) + ) + (core module (;2;) + (type (;0;) (func (param i32))) + (type (;1;) (func (param i32 i32) (result i32))) + (func (;0;) (type 0) (param i32) + local.get 0 + i32.const 0 + call_indirect (type 0) + ) + (func (;1;) (type 1) (param i32 i32) (result i32) + local.get 0 + local.get 1 + i32.const 1 + call_indirect (type 1) + ) + (table (;0;) 2 2 funcref) + (export "0" (func 0)) + (export "1" (func 1)) + (export "$imports" (table 0)) + ) + (core module (;3;) + (type (;0;) (func (param i32))) + (type (;1;) (func (param i32 i32) (result i32))) + (import "" "0" (func (;0;) (type 0))) + (import "" "1" (func (;1;) (type 1))) + (import "" "$imports" (table (;0;) 2 2 funcref)) + (elem (;0;) (i32.const 0) func 0 1) + ) + (core instance (;0;) (instantiate 2)) + (alias export 0 "foo" (func (;0;))) + (core func (;0;) (canon lower (func 0))) + (core instance (;1;) + (export "foo" (func 0)) + ) + (alias core export 0 "0" (core func (;1;))) + (alias core export 0 "1" (core func (;2;))) + (core instance (;2;) + (export "proc_exit" (func 1)) + (export "random_get" (func 2)) + ) + (core instance (;3;) (instantiate 1 + (with "foo" (instance 1)) + (with "wasi_snapshot_preview1" (instance 2)) + ) + ) + (alias core export 3 "memory" (core memory (;0;))) + (alias export 1 "proc-exit" (func (;1;))) + (core func (;3;) (canon lower (func 1))) + (core instance (;4;) + (export "proc-exit" (func 3)) + ) + (core instance (;5;) (instantiate 0 + (with "my_wasi" (instance 4)) + ) + ) + (alias core export 0 "$imports" (core table (;0;))) + (alias core export 5 "proc_exit" (core func (;4;))) + (alias core export 5 "random_get" (core func (;5;))) + (core instance (;6;) + (export "$imports" (table 0)) + (export "0" (func 4)) + (export "1" (func 5)) + ) + (core instance (;7;) (instantiate 3 + (with "" (instance 6)) + ) + ) +) \ No newline at end of file diff --git a/crates/wit-component/tests/components/adapt-preview1/import-foo.wit b/crates/wit-component/tests/components/adapt-preview1/import-foo.wit new file mode 100644 index 000000000..7c4c5bfc3 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-preview1/import-foo.wit @@ -0,0 +1 @@ +foo: func() diff --git a/crates/wit-component/tests/components/adapt-preview1/module.wat b/crates/wit-component/tests/components/adapt-preview1/module.wat new file mode 100644 index 000000000..370ede67a --- /dev/null +++ b/crates/wit-component/tests/components/adapt-preview1/module.wat @@ -0,0 +1,11 @@ +(module + ;; import something from an external interface + (import "foo" "foo" (func)) + + ;; import some wasi functions + (import "wasi_snapshot_preview1" "proc_exit" (func (param i32))) + (import "wasi_snapshot_preview1" "random_get" (func (param i32 i32) (result i32))) + + ;; required by wasi + (memory (export "memory") 1) +) diff --git a/crates/wit-component/tests/components/import-conflict/component.wat b/crates/wit-component/tests/components/import-conflict/component.wat index 5502d9d28..81d11445a 100644 --- a/crates/wit-component/tests/components/import-conflict/component.wat +++ b/crates/wit-component/tests/components/import-conflict/component.wat @@ -70,23 +70,23 @@ (elem (;0;) (i32.const 0) func 0 1) ) (core instance (;0;) (instantiate 1)) - (alias core export 0 "0" (core func (;0;))) + (alias export 2 "a" (func (;0;))) + (core func (;0;) (canon lower (func 0))) (core instance (;1;) (export "a" (func 0)) ) - (alias core export 0 "1" (core func (;1;))) + (alias core export 0 "0" (core func (;1;))) (core instance (;2;) - (export "baz" (func 1)) + (export "a" (func 1)) ) - (alias export 2 "a" (func (;0;))) - (core func (;2;) (canon lower (func 0))) + (alias core export 0 "1" (core func (;2;))) (core instance (;3;) - (export "a" (func 2)) + (export "baz" (func 2)) ) (core instance (;4;) (instantiate 0 - (with "bar" (instance 1)) - (with "baz" (instance 2)) - (with "foo" (instance 3)) + (with "foo" (instance 1)) + (with "bar" (instance 2)) + (with "baz" (instance 3)) ) ) (alias core export 4 "memory" (core memory (;0;))) diff --git a/crates/wit-component/tests/components/imports/component.wat b/crates/wit-component/tests/components/imports/component.wat index c6a1f36b8..48ee50866 100644 --- a/crates/wit-component/tests/components/imports/component.wat +++ b/crates/wit-component/tests/components/imports/component.wat @@ -88,38 +88,38 @@ (elem (;0;) (i32.const 0) func 0 1) ) (core instance (;0;) (instantiate 1)) - (alias core export 0 "0" (core func (;0;))) - (alias export 0 "bar2" (func (;0;))) - (core func (;1;) (canon lower (func 0))) + (alias export 2 "foo1" (func (;0;))) + (core func (;0;) (canon lower (func 0))) + (alias export 2 "foo2" (func (;1;))) + (core func (;1;) (canon lower (func 1))) + (alias export 2 "foo3" (func (;2;))) + (core func (;2;) (canon lower (func 2))) (core instance (;1;) - (export "bar1" (func 0)) - (export "bar2" (func 1)) + (export "foo1" (func 0)) + (export "foo2" (func 1)) + (export "foo3" (func 2)) ) - (alias core export 0 "1" (core func (;2;))) - (alias export 1 "baz2" (func (;1;))) - (core func (;3;) (canon lower (func 1))) - (alias export 1 "baz3" (func (;2;))) - (core func (;4;) (canon lower (func 2))) + (alias core export 0 "0" (core func (;3;))) + (alias export 0 "bar2" (func (;3;))) + (core func (;4;) (canon lower (func 3))) (core instance (;2;) - (export "baz1" (func 2)) - (export "baz2" (func 3)) - (export "baz3" (func 4)) + (export "bar1" (func 3)) + (export "bar2" (func 4)) ) - (alias export 2 "foo1" (func (;3;))) - (core func (;5;) (canon lower (func 3))) - (alias export 2 "foo2" (func (;4;))) + (alias core export 0 "1" (core func (;5;))) + (alias export 1 "baz2" (func (;4;))) (core func (;6;) (canon lower (func 4))) - (alias export 2 "foo3" (func (;5;))) + (alias export 1 "baz3" (func (;5;))) (core func (;7;) (canon lower (func 5))) (core instance (;3;) - (export "foo1" (func 5)) - (export "foo2" (func 6)) - (export "foo3" (func 7)) + (export "baz1" (func 5)) + (export "baz2" (func 6)) + (export "baz3" (func 7)) ) (core instance (;4;) (instantiate 0 - (with "bar" (instance 1)) - (with "baz" (instance 2)) - (with "foo" (instance 3)) + (with "foo" (instance 1)) + (with "bar" (instance 2)) + (with "baz" (instance 3)) ) ) (alias core export 4 "memory" (core memory (;0;))) From 94afc0995027e0fcedd38b4061072cf5f44dbf9d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 3 Oct 2022 08:21:15 -0700 Subject: [PATCH 02/10] Update the gc translation macro --- Cargo.lock | 7 ++- crates/wit-component/src/adapter/gc.rs | 73 +++++++++++++++----------- 2 files changed, 45 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 49692912d..34c2bd010 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1585,7 +1585,7 @@ dependencies = [ [[package]] name = "wasm-encoder" version = "0.17.0" -source = "git+https://github.com/alexcrichton/wasm-tools?branch=change-some-names#fbe9292d713440a285ae1fd0d037ada6dd502a8f" +source = "git+https://github.com/alexcrichton/wasm-tools?branch=change-some-names#a65f3cbcea01437371c854cd4aea8cd44aac88f9" dependencies = [ "leb128", ] @@ -1602,7 +1602,7 @@ dependencies = [ [[package]] name = "wasmparser" version = "0.91.0" -source = "git+https://github.com/alexcrichton/wasm-tools?branch=change-some-names#fbe9292d713440a285ae1fd0d037ada6dd502a8f" +source = "git+https://github.com/alexcrichton/wasm-tools?branch=change-some-names#a65f3cbcea01437371c854cd4aea8cd44aac88f9" dependencies = [ "indexmap", ] @@ -1610,8 +1610,7 @@ dependencies = [ [[package]] name = "wasmprinter" version = "0.2.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b5931cf673c4bece6299719c024c08ebe52cbac7124160487a602c81e598c8" +source = "git+https://github.com/alexcrichton/wasm-tools?branch=change-some-names#a65f3cbcea01437371c854cd4aea8cd44aac88f9" dependencies = [ "anyhow", "wasmparser 0.91.0", diff --git a/crates/wit-component/src/adapter/gc.rs b/crates/wit-component/src/adapter/gc.rs index 5aa6cfbfa..15990f339 100644 --- a/crates/wit-component/src/adapter/gc.rs +++ b/crates/wit-component/src/adapter/gc.rs @@ -636,6 +636,7 @@ macro_rules! define_visit { (mark_live $self:ident $arg:ident dst_table) => {$self.table($arg);}; (mark_live $self:ident $arg:ident table_index) => {$self.table($arg);}; (mark_live $self:ident $arg:ident table) => {$self.table($arg);}; + (mark_live $self:ident $arg:ident table_index) => {$self.table($arg);}; (mark_live $self:ident $arg:ident global_index) => {$self.global($arg);}; (mark_live $self:ident $arg:ident function_index) => {$self.func($arg);}; (mark_live $self:ident $arg:ident mem) => {$self.memory($arg);}; @@ -647,7 +648,6 @@ macro_rules! define_visit { (mark_live $self:ident $arg:ident lanes) => {}; (mark_live $self:ident $arg:ident flags) => {}; (mark_live $self:ident $arg:ident value) => {}; - (mark_live $self:ident $arg:ident segment) => {}; (mark_live $self:ident $arg:ident mem_byte) => {}; (mark_live $self:ident $arg:ident table_byte) => {}; (mark_live $self:ident $arg:ident local_index) => {}; @@ -655,6 +655,8 @@ macro_rules! define_visit { (mark_live $self:ident $arg:ident tag_index) => {}; (mark_live $self:ident $arg:ident targets) => {}; (mark_live $self:ident $arg:ident ty) => {}; + (mark_live $self:ident $arg:ident data_index) => {}; + (mark_live $self:ident $arg:ident elem_index) => {}; } impl<'a> VisitOperator<'a> for Module<'a> { @@ -715,10 +717,6 @@ impl Encoder { } } -fn unsupported_insn(x: &str) -> wasm_encoder::Instruction<'static> { - panic!("unsupported instruction {x}") -} - // This is a helper macro to translate all `wasmparser` instructions to // `wasm-encoder` instructions without having to list out every single // instruction itself. @@ -732,7 +730,12 @@ macro_rules! define_encode { fn $visit(&mut self, _offset: usize $(, $($arg: $argty),*)?) { #[allow(unused_imports)] use wasm_encoder::Instruction::*; - let insn = define_encode!(mk self $op $({ $($arg: $argty),* })?); + $( + $( + let $arg = define_encode!(map self $arg $arg); + )* + )? + let insn = define_encode!(mk $op $($($arg)*)?); insn.encode(&mut self.buf); } )* @@ -740,43 +743,41 @@ macro_rules! define_encode { // No-payload instructions are named the same in wasmparser as they are in // wasm-encoder - (mk $self:ident $op:ident) => ($op); - - // Instructions supported in wasmparser but not in wasm-encoder - (mk $self:ident ReturnCall $x:tt) => (unsupported_insn("ReturnCall")); - (mk $self:ident ReturnCallIndirect $x:tt) => (unsupported_insn("ReturnCallIndirect")); + (mk $op:ident) => ($op); // Instructions which need "special care" to map from wasmparser to // wasm-encoder - (mk $self:ident BrTable { $arg:ident: $ty:ty }) => ({ - let targets = $arg.targets().map(|i| i.unwrap()).collect::>(); - BrTable(targets.into(), $arg.default()) + (mk BrTable $arg:ident) => ({ + BrTable($arg.0, $arg.1) }); - (mk $self:ident CallIndirect { $ty:ident: $a:ty, $table:ident: $b:ty, table_byte: $c:ty }) => ({ - CallIndirect { ty: $self.types.remap($ty), table: $self.tables.remap($table) } + (mk CallIndirect $ty:ident $table:ident $table_byte:ident) => ({ + drop($table_byte); + CallIndirect { ty: $ty, table: $table } }); - (mk $self:ident MemorySize { $mem:ident: $a:ty, mem_byte: $b:ty }) => ({ - MemorySize($self.memories.remap($mem)) + (mk ReturnCallIndirect $ty:ident $table:ident) => ( + ReturnCallIndirect { ty: $ty, table: $table } + ); + (mk MemorySize $mem:ident $mem_byte:ident) => ({ + drop($mem_byte); + MemorySize($mem) }); - (mk $self:ident MemoryGrow { $mem:ident: $a:ty, mem_byte: $b:ty }) => ({ - MemoryGrow($self.memories.remap($mem)) + (mk MemoryGrow $mem:ident $mem_byte:ident) => ({ + drop($mem_byte); + MemoryGrow($mem) }); - (mk self AtomicFence $x:tt) => (AtomicFence); - (mk self I32Const { $v:ident: $t:ty }) => (I32Const($v)); - (mk self I64Const { $v:ident: $t:ty }) => (I64Const($v)); - (mk self F32Const { $v:ident: $t:ty }) => (F32Const(f32::from_bits($v.bits()))); - (mk self F64Const { $v:ident: $t:ty }) => (F64Const(f64::from_bits($v.bits()))); - (mk self V128Const { $v:ident: $t:ty }) => (V128Const($v.i128())); + (mk I32Const $v:ident) => (I32Const($v)); + (mk I64Const $v:ident) => (I64Const($v)); + (mk F32Const $v:ident) => (F32Const(f32::from_bits($v.bits()))); + (mk F64Const $v:ident) => (F64Const(f64::from_bits($v.bits()))); + (mk V128Const $v:ident) => (V128Const($v.i128())); // Catch-all for the translation of one payload argument which is typically // represented as a tuple-enum in wasm-encoder. - (mk $self:ident $op:ident { $arg:ident: $t:ty }) => ($op(define_encode!(map $self $arg $arg))); + (mk $op:ident $arg:ident) => ($op($arg)); // Catch-all of everything else where the wasmparser fields are simply // translated to wasm-encoder fields. - (mk $self:ident $op:ident { $($arg:ident: $ty:ty),* }) => ($op { - $($arg: define_encode!(map $self $arg $arg)),* - }); + (mk $op:ident $($arg:ident)*) => ($op { $($arg),* }); // Individual cases of mapping one argument type to another, similar tot he // `define_visit` macro above. @@ -790,13 +791,23 @@ macro_rules! define_encode { (map $self:ident $arg:ident src_mem) => {$self.memories.remap($arg)}; (map $self:ident $arg:ident dst_mem) => {$self.memories.remap($arg)}; (map $self:ident $arg:ident table) => {$self.tables.remap($arg)}; + (map $self:ident $arg:ident table_index) => {$self.tables.remap($arg)}; (map $self:ident $arg:ident src_table) => {$self.tables.remap($arg)}; (map $self:ident $arg:ident dst_table) => {$self.tables.remap($arg)}; + (map $self:ident $arg:ident type_index) => {$self.types.remap($arg)}; (map $self:ident $arg:ident ty) => {valty($arg)}; (map $self:ident $arg:ident local_index) => {$arg}; - (map $self:ident $arg:ident segment) => {$arg}; (map $self:ident $arg:ident lane) => {$arg}; (map $self:ident $arg:ident lanes) => {$arg}; + (map $self:ident $arg:ident elem_index) => {$arg}; + (map $self:ident $arg:ident data_index) => {$arg}; + (map $self:ident $arg:ident table_byte) => {$arg}; + (map $self:ident $arg:ident mem_byte) => {$arg}; + (map $self:ident $arg:ident value) => {$arg}; + (map $self:ident $arg:ident targets) => (( + $arg.targets().map(|i| i.unwrap()).collect::>().into(), + $arg.default(), + )); } impl<'a> VisitOperator<'a> for Encoder { From 0823b9b5a78c0044215128a91f16349a031c8e9d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 3 Oct 2022 09:05:32 -0700 Subject: [PATCH 03/10] Add gc comments --- Cargo.lock | 30 ++- Cargo.toml | 12 +- crates/test-helpers/build.rs | 16 +- crates/wit-component/Cargo.toml | 6 +- crates/wit-component/src/adapter.rs | 19 -- crates/wit-component/src/cli.rs | 63 ++++- crates/wit-component/src/encoding.rs | 242 ++++++++++++++++-- crates/wit-component/src/{adapter => }/gc.rs | 176 ++++++++----- crates/wit-component/src/lib.rs | 2 +- .../adapt-unused/adapt-old-import-new.wit | 1 + .../components/adapt-unused/adapt-old.wat | 4 + .../components/adapt-unused/component.wat | 4 + .../tests/components/adapt-unused/module.wat | 1 + 13 files changed, 437 insertions(+), 139 deletions(-) delete mode 100644 crates/wit-component/src/adapter.rs rename crates/wit-component/src/{adapter => }/gc.rs (83%) create mode 100644 crates/wit-component/tests/components/adapt-unused/adapt-old-import-new.wit create mode 100644 crates/wit-component/tests/components/adapt-unused/adapt-old.wat create mode 100644 crates/wit-component/tests/components/adapt-unused/component.wat create mode 100644 crates/wit-component/tests/components/adapt-unused/module.wat diff --git a/Cargo.lock b/Cargo.lock index 34c2bd010..ecbbf57c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1585,7 +1585,17 @@ dependencies = [ [[package]] name = "wasm-encoder" version = "0.17.0" -source = "git+https://github.com/alexcrichton/wasm-tools?branch=change-some-names#a65f3cbcea01437371c854cd4aea8cd44aac88f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e7ca71c70a6de5b10968ae4d298e548366d9cd9588176e6ff8866f3c49c96ee" +dependencies = [ + "leb128", +] + +[[package]] +name = "wasm-encoder" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64ac98d5d61192cc45c701b7e4bd0b9aff91e2edfc7a088406cfe2288581e2c" dependencies = [ "leb128", ] @@ -1601,19 +1611,21 @@ dependencies = [ [[package]] name = "wasmparser" -version = "0.91.0" -source = "git+https://github.com/alexcrichton/wasm-tools?branch=change-some-names#a65f3cbcea01437371c854cd4aea8cd44aac88f9" +version = "0.92.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da34cec2a8c23db906cdf8b26e988d7a7f0d549eb5d51299129647af61a1b37" dependencies = [ "indexmap", ] [[package]] name = "wasmprinter" -version = "0.2.40" -source = "git+https://github.com/alexcrichton/wasm-tools?branch=change-some-names#a65f3cbcea01437371c854cd4aea8cd44aac88f9" +version = "0.2.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca4374ec27194a12b85aa0e1681a42d5800e97f11c036fa85dea8087c8ccb10b" dependencies = [ "anyhow", - "wasmparser 0.91.0", + "wasmparser 0.92.0", ] [[package]] @@ -1835,7 +1847,7 @@ dependencies = [ "leb128", "memchr", "unicode-width", - "wasm-encoder", + "wasm-encoder 0.17.0", ] [[package]] @@ -2152,8 +2164,8 @@ dependencies = [ "indexmap", "log", "pretty_assertions", - "wasm-encoder", - "wasmparser 0.91.0", + "wasm-encoder 0.18.0", + "wasmparser 0.92.0", "wasmprinter", "wat", "wit-parser", diff --git a/Cargo.toml b/Cargo.toml index 4081d5cee..a4e3a9028 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,13 +21,18 @@ anyhow = "1.0.65" bitflags = "1.3.2" heck = "0.3" pulldown-cmark = { version = "0.8", default-features = false } +<<<<<<< HEAD clap = { version = "4.0.9", features = ["derive"] } +======= +clap = { version = "3.2.22", features = ["derive"] } +env_logger = "0.9.1" +>>>>>>> Add gc comments wasmtime = "1.0" wasmtime-wasi = "1.0" -wasmprinter = "0.2.40" -wasmparser = "0.91.0" -wasm-encoder = "0.17.0" +wasmprinter = "0.2.41" +wasmparser = "0.92.0" +wasm-encoder = "0.18.0" wat = "1.0.49" wit-bindgen-core = { path = 'crates/bindgen-core', version = '0.2.0' } @@ -42,6 +47,7 @@ wit-bindgen-gen-rust-lib = { path = 'crates/gen-rust-lib', version = '0.2.0' } wit-bindgen-guest-rust = { path = 'crates/guest-rust', version = '0.2.0' } wit-bindgen-host-wasmtime-rust = { path = 'crates/host-wasmtime-rust', version = '0.2.0' } wit-parser = { path = 'crates/wit-parser', version = '0.2.0' } +wit-component = { path = 'crates/wit-component', version = '0.2.0' } [[bin]] name = "wit-bindgen" diff --git a/crates/test-helpers/build.rs b/crates/test-helpers/build.rs index ea88e1601..93da1f183 100644 --- a/crates/test-helpers/build.rs +++ b/crates/test-helpers/build.rs @@ -61,16 +61,14 @@ fn main() { let import = Interface::parse_file(&test_dir.join("imports.wit")).unwrap(); let export = Interface::parse_file(&test_dir.join("exports.wit")).unwrap(); - let imports = &[import]; - let exports = &[export]; let mut files = Default::default(); // TODO: should combine this into one wit_bindgen_gen_guest_c::Opts::default() .build() - .generate_all(imports, &[], &mut files); + .generate_all(&[import], &[], &mut files); wit_bindgen_gen_guest_c::Opts::default() .build() - .generate_all(&[], exports, &mut files); + .generate_all(&[], &[export], &mut files); let out_dir = out_dir.join(format!( "c-{}", @@ -122,16 +120,6 @@ fn main() { test_dir.file_stem().unwrap().to_str().unwrap().to_string(), out_wasm.to_str().unwrap().to_string(), )); - - let wasm = std::fs::read(&out_wasm).unwrap(); - wit_component::ComponentEncoder::default() - .validate(true) - .module(&wasm) - .imports(imports) - .interface(&exports[0]) - .wasi(true) - .encode() - .unwrap(); } } diff --git a/crates/wit-component/Cargo.toml b/crates/wit-component/Cargo.toml index bb2c4654f..90924eb71 100644 --- a/crates/wit-component/Cargo.toml +++ b/crates/wit-component/Cargo.toml @@ -27,17 +27,15 @@ wit-parser = { workspace = true } anyhow = { workspace = true } indexmap = "1.9.1" clap = { workspace = true, optional = true } -env_logger = { version = "0.9.1", optional = true } +env_logger = { workspace = true, optional = true } log = { version = "0.4.17", optional = true } bitflags = { workspace = true } -wasmprinter = "*" - [dev-dependencies] wasmprinter = { workspace = true } glob = "0.3.0" pretty_assertions = "1.3.0" -env_logger = "0.9.1" +env_logger = { workspace = true } [features] default = ["cli"] diff --git a/crates/wit-component/src/adapter.rs b/crates/wit-component/src/adapter.rs deleted file mode 100644 index 65525f598..000000000 --- a/crates/wit-component/src/adapter.rs +++ /dev/null @@ -1,19 +0,0 @@ -use crate::validation::ValidatedAdapter; -use anyhow::{Context, Result}; -use indexmap::IndexMap; -use wasmparser::FuncType; -use wit_parser::Interface; - -mod gc; - -pub fn adapt<'a>( - wasm: &[u8], - interface: &'a Interface, - required: &IndexMap<&str, FuncType>, -) -> Result<(Vec, ValidatedAdapter<'a>)> { - let wasm = gc::run(wasm, required) - .context("failed to reduce input adapter module to its minimal size")?; - let info = crate::validation::validate_adapter_module(&wasm, interface, required) - .context("failed to validate the imports of the minimized adapter module")?; - Ok((wasm, info)) -} diff --git a/crates/wit-component/src/cli.rs b/crates/wit-component/src/cli.rs index 7c994658b..cef566ca7 100644 --- a/crates/wit-component/src/cli.rs +++ b/crates/wit-component/src/cli.rs @@ -11,11 +11,24 @@ use clap::Parser; use std::path::{Path, PathBuf}; use wit_parser::Interface; -fn parse_named_interface(s: &str) -> Result { - let (name, path) = s - .split_once('=') - .ok_or_else(|| anyhow::anyhow!("expected a value with format `NAME=INTERFACE`"))?; +fn parse_optionally_name_file(s: &str) -> (&str, &str) { + let mut parts = s.splitn(2, '='); + let name_or_path = parts.next().unwrap(); + match parts.next() { + Some(path) => (name_or_path, path), + None => { + let name = Path::new(name_or_path) + .file_stem() + .unwrap() + .to_str() + .unwrap(); + (name, name_or_path) + } + } +} +fn parse_named_interface(s: &str) -> Result { + let (name, path) = parse_optionally_name_file(s); parse_interface(Some(name.to_string()), Path::new(path)) } @@ -36,6 +49,26 @@ fn parse_interface(name: Option, path: &Path) -> Result { Ok(interface) } +fn parse_adapter(s: &str) -> Result<(String, Vec, Interface)> { + let mut parts = s.splitn(2, ':'); + let maybe_named_module = parts.next().unwrap(); + let (name, path) = parse_optionally_name_file(maybe_named_module); + let wasm = wat::parse_file(path)?; + + match parts.next() { + Some(maybe_named_interface) => { + let interface = parse_named_interface(maybe_named_interface)?; + Ok((name.to_string(), wasm, interface)) + } + None => { + // TODO: implement inferring the `interface` from the `wasm` + // specified + drop((name, wasm)); + bail!("inferring from the core wasm module is not supported at this time") + } + } +} + /// WebAssembly component encoder. /// /// Encodes a WebAssembly component from a core WebAssembly module. @@ -43,13 +76,27 @@ fn parse_interface(name: Option, path: &Path) -> Result { #[clap(name = "component-encoder", version = env!("CARGO_PKG_VERSION"))] pub struct WitComponentApp { /// The path to an interface definition file the component imports. - #[clap(long = "import", value_name = "NAME=INTERFACE", value_parser = parse_named_interface)] + #[clap(long = "import", value_name = "[NAME=]INTERFACE", value_parser = parse_named_interface)] pub imports: Vec, /// The path to an interface definition file the component exports. - #[clap(long = "export", value_name = "NAME=INTERFACE", value_parser = parse_named_interface)] + #[clap(long = "export", value_name = "[NAME=]INTERFACE", value_parser = parse_named_interface)] pub exports: Vec, + /// The path to an adapter module to satisfy imports. + /// + /// An adapter module can be used to translate the `wasi_snapshot_preview1` + /// ABI, for example, to one that uses the component model. The first + /// `[NAME=]` specified in the argument is inferred from the name of file + /// specified by `MODULE` if not present and is the name of the import + /// module that's being implemented (e.g. `wasi_snapshot_preview1.wasm`. + /// + /// The second part of this argument, optionally specified, is the interface + /// that this adapter module imports. If not specified then the interface + /// imported is inferred from the adapter module itself. + #[clap(long = "adapt", value_name = "[NAME=]MODULE[:[NAME=]INTERFACE]", value_parser = parse_adapter)] + pub adapters: Vec<(String, Vec, Interface)>, + /// The path of the output WebAssembly component. #[clap(long, short = 'o', value_name = "OUTPUT")] pub output: Option, @@ -97,6 +144,10 @@ impl WitComponentApp { .exports(&self.exports) .validate(!self.skip_validation); + for (name, wasm, interface) in self.adapters.iter() { + encoder = encoder.adapter(name, wasm, interface); + } + if let Some(interface) = &self.interface { encoder = encoder.interface(interface); } diff --git a/crates/wit-component/src/encoding.rs b/crates/wit-component/src/encoding.rs index a2a520622..dc6dbd613 100644 --- a/crates/wit-component/src/encoding.rs +++ b/crates/wit-component/src/encoding.rs @@ -1,5 +1,62 @@ +//! Support for encoding a core wasm module into a component. +//! +//! This module, at a high level, is tasked with transforming a core wasm +//! module into a component. This will process the imports/exports of the core +//! wasm module and translate between the `wit-parser` AST and the component +//! model binary format, producing a final component which sill import +//! `*.wit` defined interfaces and export `*.wit` defined interfaces as well +//! with everything wired up internally according to the canonical ABI and such. +//! +//! This doc block here is not currently 100% complete and doesn't cover the +//! full functionality of this module. +//! +//! # Adapter Modules +//! +//! One feature of this encoding process which is non-obvious is the support for +//! "adapter modules". The general idea here is that historical host API +//! definitions have been around for quite some time, such as +//! `wasi_snapshot_preview1`, but these host API definitions are not compatible +//! with the canonical ABI or component model exactly. These APIs, however, can +//! in most situations be roughly adapted to component-model equivalents. This +//! is where adapter modules come into play, they're converting from some +//! arbitrary API/ABI into a component-model using API. +//! +//! An adapter module is a separately compiled `*.wasm` blob which will export +//! functions matching the desired ABI (e.g. exporting functions matching the +//! `wasi_snapshot_preview1` ABI). The `*.wasm` blob will then import functions +//! in the canonical ABI and internally adapt the exported functions to the +//! imported functions. The encoding support in this module is what wires +//! everything up and makes sure that everything is imported and exported to the +//! right place. Adapter modules currently always use "indirect lowerings" +//! meaning that a shim module is created and provided as the imports to the +//! main core wasm module, and the shim module is "filled in" at a later time +//! during the instantiation process. +//! +//! Adapter modules are not intended to be general purpose and are currently +//! very restrictive, namely: +//! +//! * They must import a linear memory and not define their own linear memory +//! otherwise. In other words they import memory and cannot use multi-memory. +//! * They cannot define any `elem` or `data` segments since otherwise there's +//! no knowledge ahead-of-time of where their data or element segments could +//! go. This means things like no panics, no indirect calls, etc. +//! * Only one mutable global is allowed and it's assumed to be the stack +//! pointer. This stack pointer is automatically configured with an injected +//! `start` function that is allocated with `memory.grow (i32.const 1)`, +//! meaning that the shim module has 64k of stack space and no protection if +//! that overflows. +//! +//! This means that adapter modules are not meant to be written by everyone. +//! It's assumed that these will be relatively few and far between yet still a +//! crucial part of the transition process from to the component model since +//! otherwise there's no way to run a `wasi_snapshot_preview1` module within the +//! component model. + use crate::{ - validation::{expected_export_name, validate_module, ValidatedAdapter, ValidatedModule}, + validation::{ + expected_export_name, validate_adapter_module, validate_module, ValidatedAdapter, + ValidatedModule, + }, StringEncoding, }; use anyhow::{anyhow, bail, Context, Result}; @@ -883,9 +940,17 @@ impl<'a> TypeEncoder<'a> { } bitflags::bitflags! { + /// Options in the `canon lower` or `canon lift` required for a particular + /// function. struct RequiredOptions: u8 { + /// A memory must be specified, typically the "main module"'s memory + /// export. const MEMORY = 1 << 0; + /// A `realloc` function must be specified, typically named + /// `cabi_realloc`. const REALLOC = 1 << 1; + /// A string encoding must be specified, which is always utf-8 for now + /// today. const STRING_ENCODING = 1 << 2; } } @@ -1026,6 +1091,8 @@ impl RequiredOptions { } bitflags::bitflags! { + /// Flags about what kinds of types are present within the recursive + /// structure of a type. struct TypeContents: u8 { const STRING = 1 << 0; const LIST = 1 << 1; @@ -1152,6 +1219,8 @@ impl<'a> EncodingState<'a> { // Encode a shim instantiation if needed let shims = self.encode_shim_instantiation(imports, info); + // For each instance import into the main module create a + // pseudo-core-wasm-module via a bag-of-exports. let mut args = Vec::new(); for name in info.required_imports.keys() { let index = self.import_instance_to_lowered_core_instance( @@ -1162,7 +1231,12 @@ impl<'a> EncodingState<'a> { ); args.push((*name, ModuleArg::Instance(index))); } - for (name, funcs) in info.adapters_required.iter() { + + // For each adapter module instance imported into the core wasm module + // the appropriate shim is packaged up into a bag-of-exports instance. + // Note that adapter modules currently don't deal with + // indirect-vs-direct lowerings, everything is indirect. + for (adapter, funcs) in info.adapters_required.iter() { let shim_instance = self .shim_instance_index .expect("shim should be instantiated"); @@ -1172,20 +1246,35 @@ impl<'a> EncodingState<'a> { let index = self.component.alias_core_item( shim_instance, ExportKind::Func, - &shims.adapter_shim_names[&(*name, *func)].clone(), + &shims.shim_names[&ShimKind::Adapter { adapter, func }], ); exports.push((*func, ExportKind::Func, index)); } let index = self.component.instantiate_core_exports(exports); - args.push((*name, ModuleArg::Instance(index))); + args.push((*adapter, ModuleArg::Instance(index))); } + // Instantiate the main module now that all of its arguments have been + // prepared. With this we know have the main linear memory for + // liftings/lowerings later on as well as the adapter modules, if any, + // instantiated after the core wasm module. self.instantiate_core_module(args, info); self.instantiate_adapter_modules(imports, info, &shims); + + // With all the core wasm instances in play now the original shim + // module, if present, can be filled in with lowerings/adapters/etc. self.encode_indirect_lowerings(encoding, imports, shims) } + /// Lowers a named imported interface a core wasm instances suitable to + /// provide as an instantiation argument to another core wasm module. + /// + /// * `for_module` the module that this instance is being created for, or + /// otherwise which `realloc` option is used for the lowerings. + /// * `name` - the name of the imported interface that's being lowered. + /// * `imports` - the list of all imports known for this encoding. + /// * `shims` - the indirect/adapter shims created prior, if any. fn import_instance_to_lowered_core_instance( &mut self, for_module: CustomModule<'_>, @@ -1196,16 +1285,24 @@ impl<'a> EncodingState<'a> { let (instance_index, _, import) = imports.map.get_full(name).unwrap(); let mut exports = Vec::with_capacity(import.direct.len() + import.indirect.len()); + // Add an entry for all indirect lowerings which come as an export of + // the shim module. for (i, lowering) in import.indirect.iter().enumerate() { let index = self.component.alias_core_item( self.shim_instance_index .expect("shim should be instantiated"), ExportKind::Func, - &shims.indirect_lowering_names[&(for_module, name, i)], + &shims.shim_names[&ShimKind::IndirectLowering { + interface: name, + indirect_index: i, + realloc: for_module, + }], ); exports.push((lowering.name, ExportKind::Func, index)); } + // All direct lowerings can be `canon lower`'d here immediately and + // passed as arguments. for lowering in &import.direct { let func_index = self .component @@ -1301,11 +1398,17 @@ impl<'a> EncodingState<'a> { ) -> Shims<'a> { let mut signatures = Vec::new(); let mut ret = Shims::default(); + + // For all interfaces imported into the main module record all of their + // indirect lowerings into `Shims`. for name in info.required_imports.keys() { let import = &imports.map[name]; ret.append_indirect(name, CustomModule::Main, import, &mut signatures); } + // For all required adapter modules a shim is created for each required + // function and additionally a set of shims are created for the + // interface imported into the shim module itself. for (adapter, funcs) in info.adapters_required.iter() { let info = &imports.adapters[adapter]; if let Some(name) = info.required_import { @@ -1327,20 +1430,23 @@ impl<'a> EncodingState<'a> { retptr: false, }); ret.list.push(Shim { - name: name.clone(), + name, // Pessimistically assume that all adapters require memory // in one form or another. While this isn't technically true // it's true enough for WASI. options: RequiredOptions::MEMORY, kind: ShimKind::Adapter { adapter, func }, }); - ret.adapter_shim_names.insert((adapter, func), name); } } if ret.list.is_empty() { return ret; } + for shim in ret.list.iter() { + ret.shim_names.insert(shim.kind.clone(), shim.name.clone()); + } + assert!(self.shim_instance_index.is_none()); assert!(self.fixups_module_index.is_none()); @@ -1472,6 +1578,12 @@ impl<'a> EncodingState<'a> { for shim in shims.list.iter() { let core_func_index = match &shim.kind { + // Indirect lowerings are a `canon lower`'d function with + // options specified from a previously instantiated instance. + // This previous instance could either be the main module or an + // adapter module, which affects the `realloc` option here. + // Currently only one linear memory is supported so the linear + // memory always comes from the main module. ShimKind::IndirectLowering { interface, indirect_index, @@ -1494,6 +1606,11 @@ impl<'a> EncodingState<'a> { .into_iter(encoding, self.memory_index, realloc)?, ) } + + // Adapter shims are defined by an export from and adapter + // instance, so use the specified name here and the previously + // created instances to get the core item that represents the + // shim. ShimKind::Adapter { adapter, func } => self.component.alias_core_item( self.adapter_instances[adapter], ExportKind::Func, @@ -1542,6 +1659,11 @@ impl<'a> EncodingState<'a> { self.instance_index = Some(instance_index); } + /// This function will instantiate all required adapter modules required by + /// the main module (specified by `info`). + /// + /// Each adapter here is instantiated with its required imported interface, + /// if any. fn instantiate_adapter_modules( &mut self, imports: &ImportEncoder<'a>, @@ -1551,7 +1673,16 @@ impl<'a> EncodingState<'a> { for name in info.adapters_required.keys() { let info = &imports.adapters[name]; let mut args = Vec::new(); + + // If the adapter module requires a `memory` import then specify + // that here. For now assume that the module name of the memory is + // different from the imported interface. That's true enough for now + // since it's `env::memory`. if let Some((module, name)) = &info.needs_memory { + if let Some(import_name) = info.required_import { + assert!(module != import_name); + } + assert!(module != name); let memory = self.memory_index.unwrap(); let instance = self.component.instantiate_core_exports([( name.as_str(), @@ -1584,38 +1715,91 @@ impl<'a> EncodingState<'a> { } } +/// A list of "shims" which start out during the component instantiation process +/// as functions which immediately trap due to a `call_indirect`-to-`null` but +/// will get filled in by the time the component instantiation process +/// completes. +/// +/// Shims currently include: +/// +/// * "Indirect functions" lowered from imported instances where the lowering +/// requires an item exported from the main module. These are indirect due to +/// the circular dependency between the module needing an import and the +/// import needing the module. +/// +/// * Adapter modules which convert from a historical ABI to the component +/// model's ABI (e.g. wasi preview1 to preview2) get a shim since the adapters +/// are currently indicated as always requiring the memory of the main module. +/// +/// This structure is created by `encode_shim_instantiation`. #[derive(Default)] struct Shims<'a> { + /// The list of all shims that a module will require. list: Vec>, - indirect_lowering_names: IndexMap<(CustomModule<'a>, &'a str, usize), String>, - adapter_shim_names: IndexMap<(&'a str, &'a str), String>, + + /// A map from a shim to the name of the shim in the shim instance. + shim_names: IndexMap, String>, } struct Shim<'a> { + /// Canonical ABI options required by this shim, used during `canon lower` + /// operations. options: RequiredOptions, + + /// The name, in the shim instance, of this shim. + /// + /// Currently this is `"0"`, `"1"`, ... name: String, + + /// Precise information about what this shim is a lowering of. kind: ShimKind<'a>, } +#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] enum ShimKind<'a> { + /// This shim is a late indirect lowering of an imported function in a + /// component which is only possible after prior core wasm modules are + /// instantiated so their memories and functions are available. IndirectLowering { + /// The name of the interface that's being lowered. interface: &'a str, + /// The index within the `indirect` array of the function being lowered. indirect_index: usize, + /// Which instance to pull the `realloc` function from, if necessary. realloc: CustomModule<'a>, }, + /// This shim is a core wasm function defined in an adapter module but isn't + /// available until the adapter module is itself instantiated. Adapter { + /// The name of the adapter module this shim comes from. adapter: &'a str, + /// The name of the export in the adapter module this shim points to. func: &'a str, }, } +/// Indicator for which module is being used for a lowering or where options +/// like `realloc` are drawn from. +/// +/// This is necessary for situations such as an imported function being lowered +/// into the main module and additionally into an adapter module. For example an +/// adapter might adapt from preview1 to preview2 for the standard library of a +/// programming language but the main module's custom application code may also +/// explicitly import from preview2. These two different lowerings of a preview2 +/// function are parameterized by this enumeration. #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] enum CustomModule<'a> { + /// This points to the "main module" which is generally the "output of LLVM" + /// or what a user wrote. Main, + /// This is selecting an adapter module, identified by name here, where + /// something is being lowered into. Adapter(&'a str), } impl<'a> Shims<'a> { + /// Adds all shims necessary for the `import` provided, namely iterating + /// over its indirect lowerings and appending a shim per lowering. fn append_indirect( &mut self, name: &'a str, @@ -1631,7 +1815,7 @@ impl<'a> Shims<'a> { ); sigs.push(lowering.sig.clone()); self.list.push(Shim { - name: shim_name.clone(), + name: shim_name, options: lowering.options, kind: ShimKind::IndirectLowering { interface: name, @@ -1639,8 +1823,6 @@ impl<'a> Shims<'a> { realloc: for_module, }, }); - self.indirect_lowering_names - .insert((for_module, name, indirect_index), shim_name); } } } @@ -1664,11 +1846,22 @@ struct ImportedInterface<'a> { indirect: Vec>, } +/// Helper type used when encoding a component to have helpers that +/// simultaneously encode an item while returning its corresponding index in the +/// generated index spaces as well. #[derive(Default)] struct ComponentEncoding { + /// The binary component as created by `wasm-encoder`. component: Component, - /// The last section which was appended to during encoding. + + /// The last section which was appended to during encoding. This type is + /// generated by the `section_accessors` macro below. + /// + /// When something is encoded this is used if it matches the kind of item + /// being encoded, otherwise it's "flushed" to the output component and a + /// new section is started. last_section: LastSection, + // Core index spaces core_modules: u32, core_funcs: u32, @@ -1779,6 +1972,9 @@ impl ComponentEncoding { } } +// Helper macro to generate methods on `ComponentEncoding` to get specific +// section encoders that automatically flush and write out prior sections as +// necessary. macro_rules! section_accessors { ($($method:ident => $section:ident)*) => ( #[derive(Default)] @@ -1792,7 +1988,13 @@ macro_rules! section_accessors { $( fn $method(&mut self) -> &mut $section { match &self.last_section { + // The last encoded section matches the section that's + // being requested, so no change is necessary. LastSection::$section(_) => {} + + // Otherwise the last section didn't match this section, + // so flush any prior section if needed and start + // encoding the desired section of this method. _ => { self.flush(); self.last_section = LastSection::$section($section::new()); @@ -1805,6 +2007,8 @@ macro_rules! section_accessors { } )* + /// Writes out the last section into the final component binary if + /// there is a section specified, otherwise does nothing. fn flush(&mut self) { match mem::take(&mut self.last_section) { LastSection::None => {} @@ -2013,10 +2217,16 @@ impl<'a> ComponentEncoder<'a> { bail!("a module is required when encoding a component"); } + // For all required adapters lookup the corresponding adapter + // provided to this encoder, gc it to an appropriate size, and then + // register its metadata in our data structures. for (name, required) in info.adapters_required.iter() { let (wasm, interface) = &self.adapters[*name]; - let (min_wasm, info) = crate::adapter::adapt(wasm, interface, required)?; - state.encode_core_adapter_module(name, &min_wasm); + let wasm = crate::gc::run(wasm, required) + .context("failed to reduce input adapter module to its minimal size")?; + let info = validate_adapter_module(&wasm, interface, required) + .context("failed to validate the imports of the minimized adapter module")?; + state.encode_core_adapter_module(name, &wasm); types.encode_instance_import(interface, &info.required_funcs, &mut imports)?; imports.adapters.insert(name, info); } @@ -2031,8 +2241,6 @@ impl<'a> ComponentEncoder<'a> { let bytes = state.component.finish(); - println!("{}", wasmprinter::print_bytes(&bytes).unwrap()); - if self.validate { let mut validator = Validator::new_with_features(WasmFeatures { component_model: true, diff --git a/crates/wit-component/src/adapter/gc.rs b/crates/wit-component/src/gc.rs similarity index 83% rename from crates/wit-component/src/adapter/gc.rs rename to crates/wit-component/src/gc.rs index 15990f339..38ea95daf 100644 --- a/crates/wit-component/src/adapter/gc.rs +++ b/crates/wit-component/src/gc.rs @@ -40,8 +40,16 @@ pub fn run(wasm: &[u8], required: &IndexMap<&str, FuncType>) -> Result> module.encode() } +// Representation of a wasm module which is used to GC a module to its minimal +// set of required items necessary to implement the `exports` +// +// Note that this is not a complete representation of a wasm module since it +// doesn't represent everything such as data and element segments. This is only +// used for adapter modules which otherwise have these restrictions and makes +// this gc pass a bit easier to write. #[derive(Default)] struct Module<'a> { + // Definitions found when parsing a module types: Vec, tables: Vec>, globals: Vec>, @@ -50,47 +58,44 @@ struct Module<'a> { exports: IndexMap<&'a str, Export<'a>>, func_names: HashMap, - worklist: Vec<(u32, fn(&mut Module<'a>, u32) -> Result<()>)>, + // Known-live sets of indices after the `liveness` pass has run. live_types: BitVec, live_tables: BitVec, live_globals: BitVec, live_memories: BitVec, live_funcs: BitVec, -} -enum Definition<'a> { - Import(&'a str, &'a str), - Local, + // Helper data structure used during the `liveness` path to avoid recursion. + // When calculating the liveness of an item this `worklist` is pushed to and + // then processed until it's empty. An item pushed onto this list represents + // a new index that has been discovered to be live and the function is wehat + // walks the item's definition to find other items that it references. + worklist: Vec<(u32, fn(&mut Module<'a>, u32) -> Result<()>)>, } struct Table<'a> { - def: Definition<'a>, + def: Definition<'a, ()>, ty: TableType, } struct Memory<'a> { - def: Definition<'a>, + def: Definition<'a, ()>, ty: MemoryType, } struct Global<'a> { - def: GlobalDefinition<'a>, + def: Definition<'a, ConstExpr<'a>>, ty: GlobalType, } -enum GlobalDefinition<'a> { - Import(&'a str, &'a str), - Local(ConstExpr<'a>), -} - struct Func<'a> { - def: FuncDefinition<'a>, + def: Definition<'a, FunctionBody<'a>>, ty: u32, } -enum FuncDefinition<'a> { +enum Definition<'a, T> { Import(&'a str, &'a str), - Local(FunctionBody<'a>), + Local(T), } impl<'a> Module<'a> { @@ -117,7 +122,7 @@ impl<'a> Module<'a> { let i = i?; match i.ty { TypeRef::Func(ty) => self.funcs.push(Func { - def: FuncDefinition::Import(i.module, i.name), + def: Definition::Import(i.module, i.name), ty, }), TypeRef::Table(ty) => self.tables.push(Table { @@ -125,7 +130,7 @@ impl<'a> Module<'a> { ty, }), TypeRef::Global(ty) => self.globals.push(Global { - def: GlobalDefinition::Import(i.module, i.name), + def: Definition::Import(i.module, i.name), ty, }), TypeRef::Memory(ty) => self.memories.push(Memory { @@ -140,7 +145,7 @@ impl<'a> Module<'a> { for ty in s { let ty = ty?; self.tables.push(Table { - def: Definition::Local, + def: Definition::Local(()), ty, }); } @@ -149,7 +154,7 @@ impl<'a> Module<'a> { for ty in s { let ty = ty?; self.memories.push(Memory { - def: Definition::Local, + def: Definition::Local(()), ty, }); } @@ -158,7 +163,7 @@ impl<'a> Module<'a> { for g in s { let g = g?; self.globals.push(Global { - def: GlobalDefinition::Local(g.init_expr), + def: Definition::Local(g.init_expr), ty: g.ty, }); } @@ -176,7 +181,9 @@ impl<'a> Module<'a> { for ty in s { let ty = ty?; self.funcs.push(Func { - def: FuncDefinition::Local(FunctionBody::new(0, &[])), + // Specify a dummy definition to get filled in later + // when parsing the code section. + def: Definition::Local(FunctionBody::new(0, &[])), ty, }); } @@ -184,16 +191,16 @@ impl<'a> Module<'a> { Payload::CodeSectionStart { .. } => {} Payload::CodeSectionEntry(body) => { - self.funcs[next_code_index].def = FuncDefinition::Local(body); + self.funcs[next_code_index].def = Definition::Local(body); next_code_index += 1; } - // drop all custom sections + // Ignore all custom sections except for the `name` section + // which we parse, but ignore errors within. Payload::CustomSection(s) => { - if s.name() != "name" { - continue; + if s.name() == "name" { + drop(self.parse_name_section(&s)); } - drop(self.parse_name_section(&s)); } // sections that shouldn't appear in the specially-crafted core wasm @@ -244,6 +251,8 @@ impl<'a> Module<'a> { Ok(()) } + /// Iteratively calculates the set of live items within this module + /// considering all exports as the root of live functions. fn liveness(&mut self) -> Result<()> { let exports = mem::take(&mut self.exports); for (_, e) in exports.iter() { @@ -271,8 +280,8 @@ impl<'a> Module<'a> { let func = &me.funcs[func as usize]; me.live_types.insert(func.ty); let mut body = match &func.def { - FuncDefinition::Import(..) => return Ok(()), - FuncDefinition::Local(e) => e.get_binary_reader(), + Definition::Import(..) => return Ok(()), + Definition::Local(e) => e.get_binary_reader(), }; let local_count = body.read_var_u32()?; for _ in 0..local_count { @@ -289,8 +298,8 @@ impl<'a> Module<'a> { } self.worklist.push((global, |me, global| { let init = match &me.globals[global as usize].def { - GlobalDefinition::Import(..) => return Ok(()), - GlobalDefinition::Local(e) => e, + Definition::Import(..) => return Ok(()), + Definition::Local(e) => e, }; me.operators(init.get_binary_reader()) })); @@ -337,9 +346,15 @@ impl<'a> Module<'a> { live_iter(&self.live_tables, self.tables.iter()) } + /// Encodes this `Module` to a new wasm module which is gc'd and only + /// contains the items that are live as calculated by the `liveness` pass. fn encode(&mut self) -> Result> { + // Data structure used to track the mapping of old index to new index + // for all live items. let mut map = Encoder::default(); + // Sections that will be assembled into the final module at the end of + // this function. let mut types = wasm_encoder::TypeSection::new(); let mut imports = wasm_encoder::ImportSection::new(); let mut funcs = wasm_encoder::FunctionSection::new(); @@ -357,6 +372,10 @@ impl<'a> Module<'a> { ty.params().iter().copied().map(valty), ty.results().iter().copied().map(valty), ); + + // Keep track of the "empty type" to see if we can reuse an + // existing one or one needs to be injected if a `start` + // function is calculated at the end. if ty.params().len() == 0 && ty.results().len() == 0 { empty_type = Some(map.types.remap(i)); } @@ -377,7 +396,7 @@ impl<'a> Module<'a> { Definition::Import(m, n) => { imports.import(m, n, ty); } - Definition::Local => { + Definition::Local(()) => { memories.memory(ty); } } @@ -395,7 +414,7 @@ impl<'a> Module<'a> { Definition::Import(m, n) => { imports.import(m, n, ty); } - Definition::Local => { + Definition::Local(()) => { tables.table(ty); } } @@ -408,10 +427,10 @@ impl<'a> Module<'a> { val_type: valty(global.ty.content_type), }; match &global.def { - GlobalDefinition::Import(m, n) => { + Definition::Import(m, n) => { imports.import(m, n, ty); } - GlobalDefinition::Local(init) => { + Definition::Local(init) => { let mut bytes = map.operators(init.get_binary_reader())?; assert_eq!(bytes.pop(), Some(0xb)); globals.global(ty, &wasm_encoder::ConstExpr::raw(bytes)); @@ -419,15 +438,18 @@ impl<'a> Module<'a> { } } + // For functions first assign a new index to all functions and then + // afterwards actually map the body of all functions so the `map` of all + // index mappings is fully populated before instructions are mapped. let mut num_funcs = 0; for (i, func) in self.live_funcs() { map.funcs.push(i); let ty = map.types.remap(func.ty); match &func.def { - FuncDefinition::Import(m, n) => { + Definition::Import(m, n) => { imports.import(m, n, EntityType::Function(ty)); } - FuncDefinition::Local(_) => { + Definition::Local(_) => { funcs.function(ty); } } @@ -436,8 +458,8 @@ impl<'a> Module<'a> { for (_, func) in self.live_funcs() { let mut body = match &func.def { - FuncDefinition::Import(..) => continue, - FuncDefinition::Local(body) => body.get_binary_reader(), + Definition::Import(..) => continue, + Definition::Local(body) => body.get_binary_reader(), }; let mut locals = Vec::new(); for _ in 0..body.read_var_u32()? { @@ -546,30 +568,32 @@ impl<'a> Module<'a> { ret.section(&globals); } - let mut exports = wasm_encoder::ExportSection::new(); - for (_, export) in self.exports.iter() { - let (kind, index) = match export.kind { - ExternalKind::Func => ( - wasm_encoder::ExportKind::Func, - map.funcs.remap(export.index), - ), - ExternalKind::Table => ( - wasm_encoder::ExportKind::Table, - map.tables.remap(export.index), - ), - ExternalKind::Memory => ( - wasm_encoder::ExportKind::Memory, - map.memories.remap(export.index), - ), - ExternalKind::Global => ( - wasm_encoder::ExportKind::Global, - map.globals.remap(export.index), - ), - kind => bail!("unsupported export kind {kind:?}"), - }; - exports.export(export.name, kind, index); + if !self.exports.is_empty() { + let mut exports = wasm_encoder::ExportSection::new(); + for (_, export) in self.exports.iter() { + let (kind, index) = match export.kind { + ExternalKind::Func => ( + wasm_encoder::ExportKind::Func, + map.funcs.remap(export.index), + ), + ExternalKind::Table => ( + wasm_encoder::ExportKind::Table, + map.tables.remap(export.index), + ), + ExternalKind::Memory => ( + wasm_encoder::ExportKind::Memory, + map.memories.remap(export.index), + ), + ExternalKind::Global => ( + wasm_encoder::ExportKind::Global, + map.globals.remap(export.index), + ), + kind => bail!("unsupported export kind {kind:?}"), + }; + exports.export(export.name, kind, index); + } + ret.section(&exports); } - ret.section(&exports); if let Some(start) = &start { ret.section(start); @@ -579,7 +603,8 @@ impl<'a> Module<'a> { ret.section(&code); } - // Append a custom `name` section if one is found + // Append a custom `name` section using the names of the functions that + // were found prior to the GC pass in the original module. let mut func_names = Vec::new(); for (i, _func) in self.live_funcs() { let name = match self.func_names.get(&i) { @@ -779,7 +804,7 @@ macro_rules! define_encode { // translated to wasm-encoder fields. (mk $op:ident $($arg:ident)*) => ($op { $($arg),* }); - // Individual cases of mapping one argument type to another, similar tot he + // Individual cases of mapping one argument type to another, similar to the // `define_visit` macro above. (map $self:ident $arg:ident memarg) => {$self.memarg($arg)}; (map $self:ident $arg:ident blockty) => {$self.blockty($arg)}; @@ -828,6 +853,8 @@ fn valty(ty: wasmparser::ValType) -> wasm_encoder::ValType { } } +// Minimal definition of a bit vector necessary for the liveness calculations +// above. mod bitvec { use std::mem; @@ -839,6 +866,8 @@ mod bitvec { } impl BitVec { + /// Inserts `idx` into this bit vector, returning whether it was not + /// previously present. pub fn insert(&mut self, idx: u32) -> bool { let (idx, bit) = idx_bit(idx); match self.bits.get_mut(idx) { @@ -856,6 +885,7 @@ mod bitvec { true } + /// Returns whether this bit vector contains the specified `idx`th bit. pub fn contains(&self, idx: u32) -> bool { let (idx, bit) = idx_bit(idx); match self.bits.get(idx) { @@ -874,19 +904,33 @@ mod bitvec { } } +/// Small data structure used to track index mappings from an old index space to +/// a new. #[derive(Default)] struct Remap { + /// Map, indexed by the old index set, to the new index set. + /// + /// Placeholders of `u32::MAX` means that the old index is not present in + /// the new index space. map: Vec, + /// The next available index in the new index space. next: u32, } impl Remap { - fn push(&mut self, idx: u32) { - self.map.resize(idx as usize, u32::MAX); + /// Appends a new live "old index" into this remapping structure. + /// + /// This will assign a new index for the old index provided. This method + /// must be called in increasing order of old indexes. + fn push(&mut self, old: u32) { + self.map.resize(old as usize, u32::MAX); self.map.push(self.next); self.next += 1; } + /// Returns the new index corresponding to an old index. + /// + /// Panics if the `old` index was not added via `push` above. fn remap(&self, old: u32) -> u32 { let ret = self.map[old as usize]; assert!(ret != u32::MAX); diff --git a/crates/wit-component/src/lib.rs b/crates/wit-component/src/lib.rs index a95eebcb3..a1f51744e 100644 --- a/crates/wit-component/src/lib.rs +++ b/crates/wit-component/src/lib.rs @@ -7,11 +7,11 @@ use std::str::FromStr; use wasm_encoder::CanonicalOption; use wit_parser::Interface; -mod adapter; #[cfg(feature = "cli")] pub mod cli; mod decoding; mod encoding; +mod gc; mod printing; mod validation; diff --git a/crates/wit-component/tests/components/adapt-unused/adapt-old-import-new.wit b/crates/wit-component/tests/components/adapt-unused/adapt-old-import-new.wit new file mode 100644 index 000000000..7f5b4d13d --- /dev/null +++ b/crates/wit-component/tests/components/adapt-unused/adapt-old-import-new.wit @@ -0,0 +1 @@ +log: func(s: string) diff --git a/crates/wit-component/tests/components/adapt-unused/adapt-old.wat b/crates/wit-component/tests/components/adapt-unused/adapt-old.wat new file mode 100644 index 000000000..deb5d349d --- /dev/null +++ b/crates/wit-component/tests/components/adapt-unused/adapt-old.wat @@ -0,0 +1,4 @@ +(module + (import "new" "log" (func $log (param i32 i32))) + (export "log" (func $log)) +) diff --git a/crates/wit-component/tests/components/adapt-unused/component.wat b/crates/wit-component/tests/components/adapt-unused/component.wat new file mode 100644 index 000000000..3f194a21b --- /dev/null +++ b/crates/wit-component/tests/components/adapt-unused/component.wat @@ -0,0 +1,4 @@ +(component + (core module (;0;)) + (core instance (;0;) (instantiate 0)) +) \ No newline at end of file diff --git a/crates/wit-component/tests/components/adapt-unused/module.wat b/crates/wit-component/tests/components/adapt-unused/module.wat new file mode 100644 index 000000000..3af8f2545 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-unused/module.wat @@ -0,0 +1 @@ +(module) From 3833f13f2683511c3cf85f1b134b2417a58e8812 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 3 Oct 2022 14:03:27 -0700 Subject: [PATCH 04/10] Fix a missing `end` instruction on sp initializer --- crates/wit-component/src/gc.rs | 1 + .../adapt-old-import-new.wit | 1 + .../adapt-inject-stack/adapt-old.wat | 27 ++++ .../adapt-inject-stack/component.wat | 121 ++++++++++++++++++ .../components/adapt-inject-stack/module.wat | 4 + 5 files changed, 154 insertions(+) create mode 100644 crates/wit-component/tests/components/adapt-inject-stack/adapt-old-import-new.wit create mode 100644 crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat create mode 100644 crates/wit-component/tests/components/adapt-inject-stack/component.wat create mode 100644 crates/wit-component/tests/components/adapt-inject-stack/module.wat diff --git a/crates/wit-component/src/gc.rs b/crates/wit-component/src/gc.rs index 38ea95daf..bbcbc19d9 100644 --- a/crates/wit-component/src/gc.rs +++ b/crates/wit-component/src/gc.rs @@ -522,6 +522,7 @@ impl<'a> Module<'a> { func.instruction(&I32Const(16)); func.instruction(&I32Shl); func.instruction(&GlobalSet(sp)); + func.instruction(&End); code.function(&func); start = Some(wasm_encoder::StartSection { diff --git a/crates/wit-component/tests/components/adapt-inject-stack/adapt-old-import-new.wit b/crates/wit-component/tests/components/adapt-inject-stack/adapt-old-import-new.wit new file mode 100644 index 000000000..cf41681a2 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-inject-stack/adapt-old-import-new.wit @@ -0,0 +1 @@ +get-two: func() -> (a: u32, b: u32) diff --git a/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat b/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat new file mode 100644 index 000000000..01301b3d6 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat @@ -0,0 +1,27 @@ +(module + (import "new" "get-two" (func $get_two (param i32))) + (import "env" "memory" (memory 0)) + + (global $sp (mut i32) i32.const 0) + + (func (export "get_sum") (result i32) + (local i32 i32) + global.get $sp + local.tee 0 + i32.const 8 + i32.sub + local.tee 1 + global.set $sp + + local.get 1 + call $get_two + + (i32.add + (i32.load (local.get 1)) + (i32.load offset=4 (local.get 1))) + + local.get 0 + global.set $sp + ) + +) diff --git a/crates/wit-component/tests/components/adapt-inject-stack/component.wat b/crates/wit-component/tests/components/adapt-inject-stack/component.wat new file mode 100644 index 000000000..74bf35527 --- /dev/null +++ b/crates/wit-component/tests/components/adapt-inject-stack/component.wat @@ -0,0 +1,121 @@ +(component + (core module (;0;) + (type (;0;) (func (param i32))) + (type (;1;) (func (result i32))) + (type (;2;) (func)) + (import "env" "memory" (memory (;0;) 0)) + (import "new" "get-two" (func $get_two (;0;) (type 0))) + (func (;1;) (type 1) (result i32) + (local i32 i32) + global.get 0 + local.tee 0 + i32.const 8 + i32.sub + local.tee 1 + global.set 0 + local.get 1 + call $get_two + local.get 1 + i32.load + local.get 1 + i32.load offset=4 + i32.add + local.get 0 + global.set 0 + ) + (func $initialize_stack_pointer (;2;) (type 2) + (local i32) + i32.const 1 + memory.grow + local.tee 0 + i32.const -1 + i32.eq + if ;; label = @1 + unreachable + end + local.get 0 + i32.const 1 + i32.add + i32.const 16 + i32.shl + global.set 0 + ) + (global (;0;) (mut i32) i32.const 0) + (export "get_sum" (func 1)) + (start $initialize_stack_pointer) + ) + (type (;0;) (func (result "a" u32) (result "b" u32))) + (type (;1;) + (instance + (alias outer 1 0 (type (;0;))) + (export "get-two" (func (type 0))) + ) + ) + (import "new" (instance (;0;) (type 1))) + (core module (;1;) + (type (;0;) (func (result i32))) + (import "old" "get_sum" (func (;0;) (type 0))) + (memory (;0;) 1) + (export "memory" (memory 0)) + ) + (core module (;2;) + (type (;0;) (func (param i32))) + (type (;1;) (func (result i32))) + (func (;0;) (type 0) (param i32) + local.get 0 + i32.const 0 + call_indirect (type 0) + ) + (func (;1;) (type 1) (result i32) + i32.const 1 + call_indirect (type 1) + ) + (table (;0;) 2 2 funcref) + (export "0" (func 0)) + (export "1" (func 1)) + (export "$imports" (table 0)) + ) + (core module (;3;) + (type (;0;) (func (param i32))) + (type (;1;) (func (result i32))) + (import "" "0" (func (;0;) (type 0))) + (import "" "1" (func (;1;) (type 1))) + (import "" "$imports" (table (;0;) 2 2 funcref)) + (elem (;0;) (i32.const 0) func 0 1) + ) + (core instance (;0;) (instantiate 2)) + (alias core export 0 "1" (core func (;0;))) + (core instance (;1;) + (export "get_sum" (func 0)) + ) + (core instance (;2;) (instantiate 1 + (with "old" (instance 1)) + ) + ) + (alias core export 2 "memory" (core memory (;0;))) + (core instance (;3;) + (export "memory" (memory 0)) + ) + (alias core export 0 "0" (core func (;1;))) + (core instance (;4;) + (export "get-two" (func 1)) + ) + (core instance (;5;) (instantiate 0 + (with "env" (instance 3)) + (with "new" (instance 4)) + ) + ) + (alias core export 0 "$imports" (core table (;0;))) + (alias export 0 "get-two" (func (;0;))) + (core func (;2;) (canon lower (func 0) (memory 0))) + (alias core export 5 "get_sum" (core func (;3;))) + (core instance (;6;) + (export "$imports" (table 0)) + (export "0" (func 2)) + (export "1" (func 3)) + ) + (core instance (;7;) (instantiate 3 + (with "" (instance 6)) + ) + ) +) \ No newline at end of file diff --git a/crates/wit-component/tests/components/adapt-inject-stack/module.wat b/crates/wit-component/tests/components/adapt-inject-stack/module.wat new file mode 100644 index 000000000..9fd5ac0fc --- /dev/null +++ b/crates/wit-component/tests/components/adapt-inject-stack/module.wat @@ -0,0 +1,4 @@ +(module + (import "old" "get_sum" (func (result i32))) + (memory (export "memory") 1) +) From 5026e4e48c0d1e68ccfec943b40c2555e9b5715a Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 3 Oct 2022 15:05:31 -0700 Subject: [PATCH 05/10] Preserve the names of globals in adapter modules Should help with debugging structure ideally --- crates/wit-component/src/gc.rs | 45 ++++++++++++++----- .../adapt-inject-stack/component.wat | 10 ++--- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/crates/wit-component/src/gc.rs b/crates/wit-component/src/gc.rs index bbcbc19d9..a20d53e9e 100644 --- a/crates/wit-component/src/gc.rs +++ b/crates/wit-component/src/gc.rs @@ -57,6 +57,7 @@ struct Module<'a> { funcs: Vec>, exports: IndexMap<&'a str, Export<'a>>, func_names: HashMap, + global_names: HashMap, // Known-live sets of indices after the `liveness` pass has run. live_types: BitVec, @@ -245,6 +246,13 @@ impl<'a> Module<'a> { self.func_names.insert(naming.index, naming.name); } } + Name::Global(map) => { + let mut map = map.get_map()?; + for _ in 0..map.get_count() { + let naming = map.read()?; + self.global_names.insert(naming.index, naming.name); + } + } _ => {} } } @@ -487,7 +495,9 @@ impl<'a> Module<'a> { // If there are any memories or any mutable globals there must be // precisely one of each as otherwise we don't know how to filter // down to the right one. - assert_eq!(num_memories, 1); + if num_memories != 1 { + bail!("adapter modules don't support multi-memory"); + } assert_eq!(mutable_globals.len(), 1); assert_eq!(mutable_globals[0].1.ty.content_type, ValType::I32); let sp = map.globals.remap(mutable_globals[0].0); @@ -607,6 +617,7 @@ impl<'a> Module<'a> { // Append a custom `name` section using the names of the functions that // were found prior to the GC pass in the original module. let mut func_names = Vec::new(); + let mut global_names = Vec::new(); for (i, _func) in self.live_funcs() { let name = match self.func_names.get(&i) { Some(name) => name, @@ -614,19 +625,33 @@ impl<'a> Module<'a> { }; func_names.push((map.funcs.remap(i), *name)); } + for (i, _global) in self.live_globals() { + let name = match self.global_names.get(&i) { + Some(name) => name, + None => continue, + }; + global_names.push((map.globals.remap(i), *name)); + } if start.is_some() { func_names.push((num_funcs, "initialize_stack_pointer")); } - if !func_names.is_empty() { - let mut subsection = Vec::new(); - func_names.len().encode(&mut subsection); - for (i, name) in func_names { - i.encode(&mut subsection); - name.encode(&mut subsection); - } + if !func_names.is_empty() || !global_names.is_empty() { let mut section = Vec::new(); - section.push(0x01); - subsection.encode(&mut section); + let mut encode_subsection = |code: u8, names: &[(u32, &str)]| { + if names.is_empty() { + return; + } + let mut subsection = Vec::new(); + names.len().encode(&mut subsection); + for (i, name) in names { + i.encode(&mut subsection); + name.encode(&mut subsection); + } + section.push(code); + subsection.encode(&mut section); + }; + encode_subsection(0x01, &func_names); + encode_subsection(0x07, &global_names); ret.section(&wasm_encoder::CustomSection { name: "name", data: §ion, diff --git a/crates/wit-component/tests/components/adapt-inject-stack/component.wat b/crates/wit-component/tests/components/adapt-inject-stack/component.wat index 74bf35527..ca58b8ed0 100644 --- a/crates/wit-component/tests/components/adapt-inject-stack/component.wat +++ b/crates/wit-component/tests/components/adapt-inject-stack/component.wat @@ -7,12 +7,12 @@ (import "new" "get-two" (func $get_two (;0;) (type 0))) (func (;1;) (type 1) (result i32) (local i32 i32) - global.get 0 + global.get $sp local.tee 0 i32.const 8 i32.sub local.tee 1 - global.set 0 + global.set $sp local.get 1 call $get_two local.get 1 @@ -21,7 +21,7 @@ i32.load offset=4 i32.add local.get 0 - global.set 0 + global.set $sp ) (func $initialize_stack_pointer (;2;) (type 2) (local i32) @@ -38,9 +38,9 @@ i32.add i32.const 16 i32.shl - global.set 0 + global.set $sp ) - (global (;0;) (mut i32) i32.const 0) + (global $sp (;0;) (mut i32) i32.const 0) (export "get_sum" (func 1)) (start $initialize_stack_pointer) ) From da2f7c6bd0877a8fdda378f3befb16f7797cbf6f Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 3 Oct 2022 15:06:37 -0700 Subject: [PATCH 06/10] Small refactor --- crates/wit-component/src/gc.rs | 40 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/crates/wit-component/src/gc.rs b/crates/wit-component/src/gc.rs index a20d53e9e..e34c955b5 100644 --- a/crates/wit-component/src/gc.rs +++ b/crates/wit-component/src/gc.rs @@ -625,6 +625,9 @@ impl<'a> Module<'a> { }; func_names.push((map.funcs.remap(i), *name)); } + if start.is_some() { + func_names.push((num_funcs, "initialize_stack_pointer")); + } for (i, _global) in self.live_globals() { let name = match self.global_names.get(&i) { Some(name) => name, @@ -632,26 +635,23 @@ impl<'a> Module<'a> { }; global_names.push((map.globals.remap(i), *name)); } - if start.is_some() { - func_names.push((num_funcs, "initialize_stack_pointer")); - } - if !func_names.is_empty() || !global_names.is_empty() { - let mut section = Vec::new(); - let mut encode_subsection = |code: u8, names: &[(u32, &str)]| { - if names.is_empty() { - return; - } - let mut subsection = Vec::new(); - names.len().encode(&mut subsection); - for (i, name) in names { - i.encode(&mut subsection); - name.encode(&mut subsection); - } - section.push(code); - subsection.encode(&mut section); - }; - encode_subsection(0x01, &func_names); - encode_subsection(0x07, &global_names); + let mut section = Vec::new(); + let mut encode_subsection = |code: u8, names: &[(u32, &str)]| { + if names.is_empty() { + return; + } + let mut subsection = Vec::new(); + names.len().encode(&mut subsection); + for (i, name) in names { + i.encode(&mut subsection); + name.encode(&mut subsection); + } + section.push(code); + subsection.encode(&mut section); + }; + encode_subsection(0x01, &func_names); + encode_subsection(0x07, &global_names); + if !section.is_empty() { ret.section(&wasm_encoder::CustomSection { name: "name", data: §ion, From 0435f3f4db0a073d2d1449ad32f65d71eeb1336d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 3 Oct 2022 15:17:38 -0700 Subject: [PATCH 07/10] Hack in more stack pointer detection This unfortunately suffers greatly from false negatives, but at this time it's unclear if this can be done better. --- Cargo.toml | 4 - crates/wit-component/src/gc.rs | 137 +++++++++++------- .../adapt-inject-stack/adapt-old.wat | 11 +- .../adapt-inject-stack/component.wat | 13 +- 4 files changed, 100 insertions(+), 65 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a4e3a9028..cd27e5212 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,12 +21,8 @@ anyhow = "1.0.65" bitflags = "1.3.2" heck = "0.3" pulldown-cmark = { version = "0.8", default-features = false } -<<<<<<< HEAD clap = { version = "4.0.9", features = ["derive"] } -======= -clap = { version = "3.2.22", features = ["derive"] } env_logger = "0.9.1" ->>>>>>> Add gc comments wasmtime = "1.0" wasmtime-wasi = "1.0" diff --git a/crates/wit-component/src/gc.rs b/crates/wit-component/src/gc.rs index e34c955b5..d6e7c8bc1 100644 --- a/crates/wit-component/src/gc.rs +++ b/crates/wit-component/src/gc.rs @@ -485,59 +485,57 @@ impl<'a> Module<'a> { // local to this module. This only happens if a memory is preserved and // a stack pointer global is found. let mut start = None; - let mutable_globals = self - .live_globals() - .filter(|(_, g)| g.ty.mutable) - .collect::>(); - if num_memories > 0 && mutable_globals.len() > 0 { - use wasm_encoder::Instruction::*; - - // If there are any memories or any mutable globals there must be - // precisely one of each as otherwise we don't know how to filter - // down to the right one. - if num_memories != 1 { - bail!("adapter modules don't support multi-memory"); - } - assert_eq!(mutable_globals.len(), 1); - assert_eq!(mutable_globals[0].1.ty.content_type, ValType::I32); - let sp = map.globals.remap(mutable_globals[0].0); - - // Generate a function type for this start function, adding a new - // function type to the module if necessary. - let empty_type = empty_type.unwrap_or_else(|| { - types.function([], []); - types.len() - 1 - }); - funcs.function(empty_type); - - let mut func = wasm_encoder::Function::new([(1, wasm_encoder::ValType::I32)]); - // Grow the memory by 1 page to allocate ourselves some stack space. - func.instruction(&I32Const(1)); - func.instruction(&MemoryGrow(0)); - func.instruction(&LocalTee(0)); - - // Test if the return value of the growth was -1 and trap if so - // since we don't have a stack page. - func.instruction(&I32Const(-1)); - func.instruction(&I32Eq); - func.instruction(&If(wasm_encoder::BlockType::Empty)); - func.instruction(&Unreachable); - func.instruction(&End); - - // Set our stack pointer to the top of the page we were given, which - // is the page index times the page size plus the size of a page. - func.instruction(&LocalGet(0)); - func.instruction(&I32Const(1)); - func.instruction(&I32Add); - func.instruction(&I32Const(16)); - func.instruction(&I32Shl); - func.instruction(&GlobalSet(sp)); - func.instruction(&End); - code.function(&func); + let sp = self.find_stack_pointer()?; + if let Some(sp) = sp { + if num_memories > 0 { + use wasm_encoder::Instruction::*; - start = Some(wasm_encoder::StartSection { - function_index: num_funcs, - }); + // If there are any memories or any mutable globals there must be + // precisely one of each as otherwise we don't know how to filter + // down to the right one. + if num_memories != 1 { + bail!("adapter modules don't support multi-memory"); + } + + let sp = map.globals.remap(sp); + + // Generate a function type for this start function, adding a new + // function type to the module if necessary. + let empty_type = empty_type.unwrap_or_else(|| { + types.function([], []); + types.len() - 1 + }); + funcs.function(empty_type); + + let mut func = wasm_encoder::Function::new([(1, wasm_encoder::ValType::I32)]); + // Grow the memory by 1 page to allocate ourselves some stack space. + func.instruction(&I32Const(1)); + func.instruction(&MemoryGrow(0)); + func.instruction(&LocalTee(0)); + + // Test if the return value of the growth was -1 and trap if so + // since we don't have a stack page. + func.instruction(&I32Const(-1)); + func.instruction(&I32Eq); + func.instruction(&If(wasm_encoder::BlockType::Empty)); + func.instruction(&Unreachable); + func.instruction(&End); + + // Set our stack pointer to the top of the page we were given, which + // is the page index times the page size plus the size of a page. + func.instruction(&LocalGet(0)); + func.instruction(&I32Const(1)); + func.instruction(&I32Add); + func.instruction(&I32Const(16)); + func.instruction(&I32Shl); + func.instruction(&GlobalSet(sp)); + func.instruction(&End); + code.function(&func); + + start = Some(wasm_encoder::StartSection { + function_index: num_funcs, + }); + } } // Sanity-check the shape of the module since some parts won't work if @@ -660,6 +658,41 @@ impl<'a> Module<'a> { Ok(ret.finish()) } + + fn find_stack_pointer(&self) -> Result> { + let mutable_i32_globals = self + .live_globals() + .filter(|(_, g)| g.ty.mutable && g.ty.content_type == ValType::I32) + .collect::>(); + + // If there are no 32-bit mutable globals then there's definitely no + // stack pointer in this module + if mutable_i32_globals.is_empty() { + return Ok(None); + } + + // If there are some mutable 32-bit globals then there's currently no + // great way of determining which is the stack pointer. For now a hack + // is used where we use the name section to find the name that LLVM + // injects. This hopefully can be improved in the future. + let stack_pointers = mutable_i32_globals + .iter() + .filter_map(|(i, _)| { + let name = *self.global_names.get(&i)?; + if name == "__stack_pointer" { + Some(*i) + } else { + None + } + }) + .collect::>(); + + match stack_pointers.len() { + 0 => Ok(None), + 1 => Ok(Some(stack_pointers[0])), + n => bail!("found {n} globals that look like the stack pointer"), + } + } } // This helper macro is used to define a visitor of all instructions with diff --git a/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat b/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat index 01301b3d6..4d5628609 100644 --- a/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat +++ b/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat @@ -2,16 +2,17 @@ (import "new" "get-two" (func $get_two (param i32))) (import "env" "memory" (memory 0)) - (global $sp (mut i32) i32.const 0) + (global $__stack_pointer (mut i32) i32.const 0) + (global $some_other_mutable_global (mut i32) i32.const 0) (func (export "get_sum") (result i32) (local i32 i32) - global.get $sp + global.get $__stack_pointer local.tee 0 i32.const 8 i32.sub local.tee 1 - global.set $sp + global.set $__stack_pointer local.get 1 call $get_two @@ -20,8 +21,10 @@ (i32.load (local.get 1)) (i32.load offset=4 (local.get 1))) + (global.set $some_other_mutable_global (global.get $some_other_mutable_global)) + local.get 0 - global.set $sp + global.set $__stack_pointer ) ) diff --git a/crates/wit-component/tests/components/adapt-inject-stack/component.wat b/crates/wit-component/tests/components/adapt-inject-stack/component.wat index ca58b8ed0..f6d3df924 100644 --- a/crates/wit-component/tests/components/adapt-inject-stack/component.wat +++ b/crates/wit-component/tests/components/adapt-inject-stack/component.wat @@ -7,12 +7,12 @@ (import "new" "get-two" (func $get_two (;0;) (type 0))) (func (;1;) (type 1) (result i32) (local i32 i32) - global.get $sp + global.get $__stack_pointer local.tee 0 i32.const 8 i32.sub local.tee 1 - global.set $sp + global.set $__stack_pointer local.get 1 call $get_two local.get 1 @@ -20,8 +20,10 @@ local.get 1 i32.load offset=4 i32.add + global.get $some_other_mutable_global + global.set $some_other_mutable_global local.get 0 - global.set $sp + global.set $__stack_pointer ) (func $initialize_stack_pointer (;2;) (type 2) (local i32) @@ -38,9 +40,10 @@ i32.add i32.const 16 i32.shl - global.set $sp + global.set $__stack_pointer ) - (global $sp (;0;) (mut i32) i32.const 0) + (global $__stack_pointer (;0;) (mut i32) i32.const 0) + (global $some_other_mutable_global (;1;) (mut i32) i32.const 0) (export "get_sum" (func 1)) (start $initialize_stack_pointer) ) From f5abf0d3ba5aef8790b636070c2a698bf09a4d13 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 4 Oct 2022 07:57:56 -0700 Subject: [PATCH 08/10] Improve readability of validation condition --- crates/wit-component/src/validation.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/wit-component/src/validation.rs b/crates/wit-component/src/validation.rs index 8f5d92037..99599abb6 100644 --- a/crates/wit-component/src/validation.rs +++ b/crates/wit-component/src/validation.rs @@ -260,7 +260,8 @@ pub fn validate_adapter_module<'a>( } let types = types.unwrap(); - for (name, funcs) in &import_funcs { + let mut import_funcs = import_funcs.iter(); + if let Some((name, funcs)) = import_funcs.next() { if *name != interface.name { bail!( "adapter module imports from `{name}` which does not match \ @@ -270,6 +271,10 @@ pub fn validate_adapter_module<'a>( } ret.required_funcs = validate_imported_interface(interface, name, funcs, &types)?; ret.required_import = Some(interface.name.as_str()); + + if let Some((name, _)) = import_funcs.next() { + bail!("adapter module cannot import from a second interface `{name}`") + } } for (name, ty) in required { From c7bc37efda7c5e0728a2c04b2fc6b3bed7e963ff Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 4 Oct 2022 08:06:58 -0700 Subject: [PATCH 09/10] Improve validation documentation --- crates/wit-component/src/validation.rs | 79 +++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 8 deletions(-) diff --git a/crates/wit-component/src/validation.rs b/crates/wit-component/src/validation.rs index 99599abb6..d42fc16ce 100644 --- a/crates/wit-component/src/validation.rs +++ b/crates/wit-component/src/validation.rs @@ -39,21 +39,51 @@ fn wasm_sig_to_func_type(signature: WasmSignature) -> FuncType { ) } +/// Metadata about a validated module and what was found internally. +/// +/// All imports to the module are described by the union of `required_imports` +/// and `adapters_required`. +/// +/// This structure is created by the `validate_module` function. #[derive(Default)] pub struct ValidatedModule<'a> { + /// The required imports into this module which are to be satisfied by + /// imported component model instances. + /// + /// The key of this map is the name of the interface that the module imports + /// from and the value is the set of functions required from that interface. + /// This is used to generate an appropriate instance import in the generated + /// component which imports only the set of required functions. pub required_imports: IndexMap<&'a str, IndexSet<&'a str>>, + + /// This is the set of imports into the module which were not satisfied by + /// imported interfaces but are required to be satisfied by adapter modules. + /// + /// The key of this map is the name of the adapter that was imported into + /// the module and the value is a further map from function to function type + /// as required by this module. This map is used to shrink adapter modules + /// to the precise size required for this module by ensuring it doesn't + /// export (and subsequently import) extraneous functions. + pub adapters_required: IndexMap<&'a str, IndexMap<&'a str, FuncType>>, + + /// Whether or not this module exported a linear memory. pub has_memory: bool, + + /// Whether or not this module exported a `cabi_realloc` function. pub has_realloc: bool, - pub adapters_required: IndexMap<&'a str, IndexMap<&'a str, FuncType>>, } /// This function validates the following: -/// * The bytes represent a core WebAssembly module. -/// * The module's imports are all satisfied by the given import interfaces. -/// * The given default and exported interfaces are satisfied by the module's exports. /// -/// Returns a tuple of the set of imported interfaces required by the module, whether -/// the module exports a memory, and whether the module exports a realloc function. +/// * The `bytes` represent a valid core WebAssembly module. +/// * The module's imports are all satisfied by the given `imports` interfaces +/// or the `adapters` set. +/// * The given default and exported interfaces are satisfied by the module's +/// exports. +/// +/// The `ValidatedModule` return value contains the metadata which describes the +/// input module on success. This is then further used to generate a component +/// for this module. pub fn validate_module<'a>( bytes: &'a [u8], interface: &Option<&Interface>, @@ -168,15 +198,48 @@ pub fn validate_module<'a>( Ok(ret) } +/// Validation information from an "adapter module" which is distinct from a +/// "main module" validated above. +/// +/// This is created by the `validate_adapter_module` function. #[derive(Default, Debug)] pub struct ValidatedAdapter<'a> { - pub required_funcs: IndexSet<&'a str>, + /// If specified then this is the name of the required interface imported + /// into the adapter module. + /// + /// At this time only one interface import is supported. If this is `None` + /// then the adapter module didn't import any component model functions to + /// implement the required functionality. pub required_import: Option<&'a str>, + + /// This is the set of required functions imported from `required_import`, + /// if `required_import` is specified. + pub required_funcs: IndexSet<&'a str>, + + /// This is the module and field name of the memory import, if one is + /// specified. + /// + /// Due to LLVM codegen this is typically `env::memory` as a totally separte + /// import from the `required_import` above. pub needs_memory: Option<(String, String)>, + + /// Flag for whether a `cabi_realloc` function was found within this module. pub has_realloc: bool, } -/// TODO +/// This function will validate the `bytes` provided as a wasm adapter module. +/// Notably this will validate the wasm module itself in addition to ensuring +/// that it has the "shape" of an adapter module. Current constraints are: +/// +/// * The adapter module can import only one memory +/// * The adapter module can only import from the name of `interface` specified, +/// and all function imports must match the `required` types which correspond +/// to the lowered types of the functions in `interface`. +/// +/// The wasm module passed into this function is the output of the GC pass of an +/// adapter module's original source. This means that the adapter module is +/// already minimized and this is a double-check that the minimization pass +/// didn't accidentally break the wasm module. pub fn validate_adapter_module<'a>( bytes: &[u8], interface: &'a Interface, From eafa5e6caf8b6ca3780c166a3e6b4b208daa5f1f Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 4 Oct 2022 08:11:06 -0700 Subject: [PATCH 10/10] Comment a test case --- .../adapt-inject-stack/adapt-old.wat | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat b/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat index 4d5628609..9699792d5 100644 --- a/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat +++ b/crates/wit-component/tests/components/adapt-inject-stack/adapt-old.wat @@ -5,8 +5,22 @@ (global $__stack_pointer (mut i32) i32.const 0) (global $some_other_mutable_global (mut i32) i32.const 0) + ;; This is a sample adapter which is adapting between ABI. This exact function + ;; signature is imported by `module.wat` and we're implementing it here with a + ;; canonical-abi function that returns two integers. The canonical ABI for + ;; returning two integers is different than the ABI of this function, hence + ;; the adapter here. + ;; + ;; The purpose of this test case is to exercise the `$__stack_pointer` global. + ;; The stack pointer here needs to be initialized to something valid for + ;; this adapter module which is done with an injected `start` function into + ;; this adapter module when it's bundled into a component. (func (export "get_sum") (result i32) (local i32 i32) + + ;; Allocate 8 bytes of stack space for the two u32 return values. The + ;; original stack pointer is saved in local 0 and the stack frame for this + ;; function is saved in local 1. global.get $__stack_pointer local.tee 0 i32.const 8 @@ -14,15 +28,25 @@ local.tee 1 global.set $__stack_pointer + ;; Call the imported function which will return two u32 values into the + ;; return pointer specified here, our stack frame. local.get 1 call $get_two + ;; Compute the result of this function by adding together the two return + ;; values. (i32.add (i32.load (local.get 1)) (i32.load offset=4 (local.get 1))) + ;; Test that if there is another mutable global in this module that it + ;; doesn't affect the detection of the stack pointer. This extra mutable + ;; global should not be initialized or tampered with as part of the + ;; initialize-the-stack-pointer injected function (global.set $some_other_mutable_global (global.get $some_other_mutable_global)) + ;; Restore the stack pointer to the value it was at prior to entering this + ;; function. local.get 0 global.set $__stack_pointer )