diff --git a/meld-core/src/adapter/fact.rs b/meld-core/src/adapter/fact.rs index 59b9dd9..8f8d085 100644 --- a/meld-core/src/adapter/fact.rs +++ b/meld-core/src/adapter/fact.rs @@ -42,6 +42,320 @@ fn alignment_for_encoding(encoding: StringEncoding) -> i32 { /// Build a lookup from `(module, field)` → merged function index for resource imports. /// +/// Emit a safe `cabi_realloc` call: traps via `unreachable` if the returned +/// pointer is 0 (OOM). Caller must have pushed the 4 realloc arguments onto +/// the stack (`old_ptr`, `old_size`, `align`, `new_size`) immediately before +/// calling this helper. After the call, the (checked, non-null) pointer is +/// stored in `result_local`. +/// +/// This is the fix for LS-A-7 leg (b): an unchecked realloc return lets the +/// transcode/copy loop write into callee memory offset 0 on OOM. +pub(crate) fn emit_checked_realloc(body: &mut Function, realloc_func: u32, result_local: u32) { + body.instruction(&Instruction::Call(realloc_func)); + body.instruction(&Instruction::LocalSet(result_local)); + body.instruction(&Instruction::LocalGet(result_local)); + body.instruction(&Instruction::I32Eqz); + body.instruction(&Instruction::If(wasm_encoder::BlockType::Empty)); + body.instruction(&Instruction::Unreachable); + body.instruction(&Instruction::End); +} + +/// Emit an overflow guard: traps via `unreachable` if `len_local * k` would +/// wrap in 32-bit unsigned arithmetic. Caller supplies the local holding the +/// untrusted length and the constant multiplier `k`. No-op when `k <= 1`. +/// +/// This is the fix for LS-A-7 leg (a): `i32.mul` is modulo 2^32, so a large +/// caller-chosen `len` can wrap to a small allocation size while the copy +/// loop still writes the full `len * k` bytes, producing an OOB write into +/// callee memory. +pub(crate) fn emit_overflow_guard(body: &mut Function, len_local: u32, k: u32) { + if k <= 1 { + return; + } + body.instruction(&Instruction::LocalGet(len_local)); + body.instruction(&Instruction::I32Const((u32::MAX / k) as i32)); + body.instruction(&Instruction::I32GtU); + body.instruction(&Instruction::If(wasm_encoder::BlockType::Empty)); + body.instruction(&Instruction::Unreachable); + body.instruction(&Instruction::End); +} + +/// Compute Canonical ABI (size, alignment) in bytes for a component value type. +/// +/// Per Component Model Canonical ABI spec, every type has a fixed lowered +/// memory layout. List/string lower to a (ptr, len) pair (8 bytes, align 4). +/// Records pad each field to its alignment, then pad the whole record to +/// its max field alignment. We use this to compute typed byte counts when +/// copying lists across component memories. +/// +/// Assumes `Type(idx)` references have already been resolved (see +/// `component_wrap::resolve_component_val_type`). Unresolved Type/handle +/// references fall back to a 4-byte handle-sized layout. +pub(crate) fn cabi_size_align(ty: &crate::parser::ComponentValType) -> (u32, u32) { + use crate::parser::{ComponentValType as CVT, PrimitiveValType as P}; + fn align_up(n: u32, a: u32) -> u32 { + (n + a - 1) & !(a - 1) + } + match ty { + CVT::Primitive(p) => match p { + P::Bool | P::S8 | P::U8 => (1, 1), + P::S16 | P::U16 => (2, 2), + P::S32 | P::U32 | P::F32 | P::Char => (4, 4), + P::S64 | P::U64 | P::F64 => (8, 8), + }, + CVT::String => (8, 4), + CVT::List(_) => (8, 4), + CVT::FixedSizeList(elem, n) => { + let (es, ea) = cabi_size_align(elem); + (es * n, ea) + } + CVT::Record(fields) => { + let mut size = 0u32; + let mut align = 1u32; + for (_, fty) in fields { + let (fs, fa) = cabi_size_align(fty); + size = align_up(size, fa); + size += fs; + align = align.max(fa); + } + (align_up(size, align), align) + } + CVT::Tuple(elems) => { + let mut size = 0u32; + let mut align = 1u32; + for ety in elems { + let (es, ea) = cabi_size_align(ety); + size = align_up(size, ea); + size += es; + align = align.max(ea); + } + (align_up(size, align), align) + } + CVT::Option(inner) => { + let (is, ia) = cabi_size_align(inner); + let align = ia.max(1); + let body = align_up(1, align) + is; + (align_up(body, align), align) + } + CVT::Result { ok, err } => { + let (os, oa) = ok.as_ref().map(|t| cabi_size_align(t)).unwrap_or((0, 1)); + let (es, ea) = err.as_ref().map(|t| cabi_size_align(t)).unwrap_or((0, 1)); + let align = oa.max(ea).max(1); + let body = align_up(1, align) + os.max(es); + (align_up(body, align), align) + } + CVT::Variant(cases) => { + let mut max_size = 0u32; + let mut align = 1u32; + for (_, case_ty) in cases { + if let Some(ct) = case_ty { + let (cs, ca) = cabi_size_align(ct); + max_size = max_size.max(cs); + align = align.max(ca); + } + } + let body = align_up(1, align) + max_size; + (align_up(body, align), align) + } + CVT::Own(_) | CVT::Borrow(_) | CVT::Type(_) => (4, 4), + } +} + +/// Walk each element of a copied list and recursively patch up nested +/// (ptr, len) pairs that still point into callee memory. Allocates fresh +/// caller-side buffers, copies bytes across, and writes back the new ptr. +/// +/// For frequencies-style `list<{ string, u32 }>` this scans each 12-byte +/// record, copies the string at offset 0 into caller memory, and overwrites +/// the (ptr, len) header. Nested lists/records recurse. Other field types +/// are left as-is (already byte-copied). +#[allow(clippy::too_many_arguments)] +fn emit_patch_nested_indirections( + body: &mut Function, + elem_ty: &crate::parser::ComponentValType, + l_dst_ptr: u32, + l_callee_src: u32, + l_src_len: u32, + elem_size: u32, + l_first_scratch: u32, + realloc_func: u32, + caller_memory: u32, + callee_memory: u32, +) { + let indirections = collect_indirections(elem_ty, 0); + if indirections.is_empty() { + return; + } + + // Locals (caller has reserved scratch starting at l_first_scratch): + // l_i = element index counter + // l_rec_dst = caller-side pointer to current record + // l_rec_src = callee-side pointer to current record (read source) + // l_old_ptr = original src ptr (callee address) + // l_buf_len = byte count to copy + // l_new_ptr = freshly allocated caller buffer + let l_i = l_first_scratch; + let l_rec_dst = l_first_scratch + 1; + let l_old_ptr = l_first_scratch + 2; + let l_buf_len = l_first_scratch + 3; + let l_new_ptr = l_first_scratch + 4; + let l_rec_src = l_first_scratch + 5; + + // i = 0 + body.instruction(&Instruction::I32Const(0)); + body.instruction(&Instruction::LocalSet(l_i)); + + body.instruction(&Instruction::Block(wasm_encoder::BlockType::Empty)); + body.instruction(&Instruction::Loop(wasm_encoder::BlockType::Empty)); + + // if i >= len break + body.instruction(&Instruction::LocalGet(l_i)); + body.instruction(&Instruction::LocalGet(l_src_len)); + body.instruction(&Instruction::I32GeU); + body.instruction(&Instruction::BrIf(1)); + + // rec_dst = l_dst_ptr + i * elem_size + body.instruction(&Instruction::LocalGet(l_dst_ptr)); + body.instruction(&Instruction::LocalGet(l_i)); + if elem_size != 1 { + body.instruction(&Instruction::I32Const(elem_size as i32)); + body.instruction(&Instruction::I32Mul); + } + body.instruction(&Instruction::I32Add); + body.instruction(&Instruction::LocalSet(l_rec_dst)); + + // rec_src = l_callee_src + i * elem_size (in callee memory) + body.instruction(&Instruction::LocalGet(l_callee_src)); + body.instruction(&Instruction::LocalGet(l_i)); + if elem_size != 1 { + body.instruction(&Instruction::I32Const(elem_size as i32)); + body.instruction(&Instruction::I32Mul); + } + body.instruction(&Instruction::I32Add); + body.instruction(&Instruction::LocalSet(l_rec_src)); + + for (offset, sub_elem_size) in &indirections { + let dst_mem_arg_ptr = wasm_encoder::MemArg { + offset: *offset as u64, + align: 2, + memory_index: caller_memory, + }; + let src_mem_arg_ptr = wasm_encoder::MemArg { + offset: *offset as u64, + align: 2, + memory_index: callee_memory, + }; + let src_mem_arg_len = wasm_encoder::MemArg { + offset: (*offset + 4) as u64, + align: 2, + memory_index: callee_memory, + }; + + // Read original (ptr, len) DIRECTLY from callee memory at rec_src. + body.instruction(&Instruction::LocalGet(l_rec_src)); + body.instruction(&Instruction::I32Load(src_mem_arg_ptr)); + body.instruction(&Instruction::LocalSet(l_old_ptr)); + + body.instruction(&Instruction::LocalGet(l_rec_src)); + body.instruction(&Instruction::I32Load(src_mem_arg_len)); + if *sub_elem_size != 1 { + body.instruction(&Instruction::I32Const(*sub_elem_size as i32)); + body.instruction(&Instruction::I32Mul); + } + body.instruction(&Instruction::LocalSet(l_buf_len)); + + // Skip patch if (old_ptr, buf_len) doesn't fit in callee mem — guards + // against garbage values triggering an unrecoverable trap. + body.instruction(&Instruction::Block(wasm_encoder::BlockType::Empty)); + body.instruction(&Instruction::LocalGet(l_old_ptr)); + body.instruction(&Instruction::LocalGet(l_buf_len)); + body.instruction(&Instruction::I32Add); + body.instruction(&Instruction::MemorySize(callee_memory)); + body.instruction(&Instruction::I32Const(16)); + body.instruction(&Instruction::I32Shl); + body.instruction(&Instruction::I32GtU); + body.instruction(&Instruction::BrIf(0)); + + // new_ptr = realloc(0, 0, 1, buf_len) in caller memory + body.instruction(&Instruction::I32Const(0)); + body.instruction(&Instruction::I32Const(0)); + body.instruction(&Instruction::I32Const(1)); + body.instruction(&Instruction::LocalGet(l_buf_len)); + emit_checked_realloc(body, realloc_func, l_new_ptr); + + // memory.copy new_ptr <- old_ptr (callee → caller) + body.instruction(&Instruction::LocalGet(l_new_ptr)); + body.instruction(&Instruction::LocalGet(l_old_ptr)); + body.instruction(&Instruction::LocalGet(l_buf_len)); + body.instruction(&Instruction::MemoryCopy { + dst_mem: caller_memory, + src_mem: callee_memory, + }); + + // caller_mem.store(rec_dst + offset, new_ptr) + body.instruction(&Instruction::LocalGet(l_rec_dst)); + body.instruction(&Instruction::LocalGet(l_new_ptr)); + body.instruction(&Instruction::I32Store(dst_mem_arg_ptr)); + + body.instruction(&Instruction::End); + } + + // i++ + body.instruction(&Instruction::LocalGet(l_i)); + body.instruction(&Instruction::I32Const(1)); + body.instruction(&Instruction::I32Add); + body.instruction(&Instruction::LocalSet(l_i)); + body.instruction(&Instruction::Br(0)); + + body.instruction(&Instruction::End); // end loop + body.instruction(&Instruction::End); // end block +} + +/// For a given element type, find every field offset that holds a (ptr, len) +/// pair that needs cross-memory copying (currently strings and nested lists). +/// Returns `(byte_offset_within_element, sub_element_size_in_bytes)`. +pub(crate) fn collect_indirections( + ty: &crate::parser::ComponentValType, + base_offset: u32, +) -> Vec<(u32, u32)> { + use crate::parser::ComponentValType as CVT; + fn align_up(n: u32, a: u32) -> u32 { + (n + a - 1) & !(a - 1) + } + let mut out = Vec::new(); + match ty { + CVT::String => out.push((base_offset, 1)), + CVT::List(elem) => { + let (es, _) = cabi_size_align(elem); + out.push((base_offset, es)); + } + CVT::Record(fields) => { + let mut off = 0u32; + for (_, fty) in fields { + let (fs, fa) = cabi_size_align(fty); + off = align_up(off, fa); + out.extend(collect_indirections(fty, base_offset + off)); + off += fs; + } + } + CVT::Tuple(elems) => { + let mut off = 0u32; + for ety in elems { + let (es, ea) = cabi_size_align(ety); + off = align_up(off, ea); + out.extend(collect_indirections(ety, base_offset + off)); + off += es; + } + } + // Option/Result/Variant: indirections inside payloads are skipped + // for now — supporting them needs reading the discriminant before + // walking the body. Keep behaviour conservative until a test case + // exercises the path. + _ => {} + } + out +} + /// Scans the merged module's imports to find `[resource-rep]` and `[resource-new]` /// function imports and records their merged function indices. type ResourceImportMap = std::collections::HashMap<(String, String), u32>; @@ -891,6 +1205,7 @@ impl FactStyleGenerator { .unwrap_or(1); // Allocate: dest = cabi_realloc(0, 0, 1, len * byte_mult) + emit_overflow_guard(&mut func, len_pos, byte_mult); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(1)); @@ -899,8 +1214,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::I32Const(byte_mult as i32)); func.instruction(&Instruction::I32Mul); } - func.instruction(&Instruction::Call(callee_realloc)); - func.instruction(&Instruction::LocalSet(dest_local)); + emit_checked_realloc(&mut func, callee_realloc, dest_local); // Copy: memory.copy callee_mem caller_mem (dest, src, len * byte_mult) func.instruction(&Instruction::LocalGet(dest_local)); @@ -1034,6 +1348,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::If(wasm_encoder::BlockType::Empty)); // Allocate: new_ptr = cabi_realloc(0, 0, 1, len * byte_mult) + emit_overflow_guard(&mut func, len_local, byte_mult); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(1)); @@ -1042,9 +1357,8 @@ impl FactStyleGenerator { func.instruction(&Instruction::I32Const(byte_mult as i32)); func.instruction(&Instruction::I32Mul); } - func.instruction(&Instruction::Call(callee_realloc)); // Save as dest_ptr (reuse a scratch local) - func.instruction(&Instruction::LocalSet(dest_ptr_local)); + emit_checked_realloc(&mut func, callee_realloc, dest_ptr_local); // Copy: memory.copy callee caller (dest, src, len * byte_mult) func.instruction(&Instruction::LocalGet(dest_ptr_local)); @@ -1101,8 +1415,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::I32Const(0)); // original_size func.instruction(&Instruction::I32Const(1)); // alignment func.instruction(&Instruction::LocalGet(callee_ret_len_local)); - func.instruction(&Instruction::Call(caller_realloc)); - func.instruction(&Instruction::LocalSet(caller_new_ptr_local)); + emit_checked_realloc(&mut func, caller_realloc, caller_new_ptr_local); // Copy data from callee's memory to caller's memory: // memory.copy $caller_mem $callee_mem (caller_new_ptr, callee_ret_ptr, len) @@ -1186,6 +1499,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::If(wasm_encoder::BlockType::Empty)); // Allocate in caller memory + emit_overflow_guard(&mut func, len_local, byte_mult); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(1)); @@ -1194,8 +1508,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::I32Const(byte_mult as i32)); func.instruction(&Instruction::I32Mul); } - func.instruction(&Instruction::Call(caller_realloc)); - func.instruction(&Instruction::LocalSet(dest_ptr_local)); + emit_checked_realloc(&mut func, caller_realloc, dest_ptr_local); // Copy from callee memory to caller memory func.instruction(&Instruction::LocalGet(dest_ptr_local)); @@ -1274,9 +1587,11 @@ impl FactStyleGenerator { // 1: callee_ptr (allocated pointer in callee's memory) // 2..2+N: dest_ptr for each pointer pair copy // 2+N: loop_counter (if inner resources need fixup) + // last: pair_len_local (scratch for per-pair overflow guard) let num_ptr_pairs = ptr_pair_offsets.len() as u32; let loop_counter_count = if has_inner_resources { 1u32 } else { 0 }; - let scratch_count = 1 + num_ptr_pairs + loop_counter_count; // callee_ptr + per-pair dest ptrs + loop counter + let pair_len_scratch_count = if num_ptr_pairs > 0 { 1u32 } else { 0 }; + let scratch_count = 1 + num_ptr_pairs + loop_counter_count + pair_len_scratch_count; // callee_ptr + per-pair dest ptrs + loop counter + pair_len // Post-return needs result save locals let has_post_return = options.callee_post_return.is_some(); @@ -1301,6 +1616,10 @@ impl FactStyleGenerator { let params_ptr_local: u32 = 0; let callee_ptr_local: u32 = 1; let pair_dest_base: u32 = 2; + // Scratch local holding the length of the current (ptr, len) pair, + // used by emit_overflow_guard. Only present when there is at least + // one pointer pair. + let pair_len_local: u32 = pair_dest_base + num_ptr_pairs + loop_counter_count; // --- Phase 1: Allocate buffer in callee's memory --- // callee_ptr = cabi_realloc(0, 0, align, size) @@ -1308,8 +1627,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::I32Const(0)); // original_size func.instruction(&Instruction::I32Const(params_area_align as i32)); // alignment func.instruction(&Instruction::I32Const(params_area_size as i32)); // new_size - func.instruction(&Instruction::Call(callee_realloc)); - func.instruction(&Instruction::LocalSet(callee_ptr_local)); + emit_checked_realloc(&mut func, callee_realloc, callee_ptr_local); // --- Phase 2: Bulk copy the entire params buffer --- // memory.copy $callee_mem $caller_mem (callee_ptr, params_ptr, size) @@ -1341,23 +1659,27 @@ impl FactStyleGenerator { // Read old_ptr from callee's buffer: i32.load callee_mem (callee_ptr + byte_offset) // Read old_len from callee's buffer: i32.load callee_mem (callee_ptr + byte_offset + 4) - // Allocate: new_ptr = cabi_realloc(0, 0, 1, len * byte_mult) - func.instruction(&Instruction::I32Const(0)); - func.instruction(&Instruction::I32Const(0)); - func.instruction(&Instruction::I32Const(1)); - // Load len from callee's buffer + // Stash len into a scratch local so the overflow guard + realloc + // can both reference it without re-loading from memory. func.instruction(&Instruction::LocalGet(callee_ptr_local)); func.instruction(&Instruction::I32Load(wasm_encoder::MemArg { offset: (byte_offset + 4) as u64, align: 2, memory_index: options.callee_memory, })); + func.instruction(&Instruction::LocalSet(pair_len_local)); + + // Allocate: new_ptr = cabi_realloc(0, 0, 1, len * byte_mult) + emit_overflow_guard(&mut func, pair_len_local, byte_mult); + func.instruction(&Instruction::I32Const(0)); + func.instruction(&Instruction::I32Const(0)); + func.instruction(&Instruction::I32Const(1)); + func.instruction(&Instruction::LocalGet(pair_len_local)); if byte_mult > 1 { func.instruction(&Instruction::I32Const(byte_mult as i32)); func.instruction(&Instruction::I32Mul); } - func.instruction(&Instruction::Call(callee_realloc)); - func.instruction(&Instruction::LocalSet(dest_local)); + emit_checked_realloc(&mut func, callee_realloc, dest_local); // Copy data: memory.copy callee caller (new_ptr, old_ptr, len * byte_mult) func.instruction(&Instruction::LocalGet(dest_local)); // dst (in callee mem) @@ -1618,6 +1940,7 @@ impl FactStyleGenerator { .unwrap_or(1); // Allocate: dest = cabi_realloc(0, 0, 1, len * byte_mult) + emit_overflow_guard(&mut func, len_pos, byte_mult); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(1)); @@ -1626,8 +1949,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::I32Const(byte_mult as i32)); func.instruction(&Instruction::I32Mul); } - func.instruction(&Instruction::Call(callee_realloc)); - func.instruction(&Instruction::LocalSet(dest_local)); + emit_checked_realloc(&mut func, callee_realloc, dest_local); // Copy: memory.copy callee_mem caller_mem (dest, src, len * byte_mult) func.instruction(&Instruction::LocalGet(dest_local)); @@ -1736,6 +2058,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::If(wasm_encoder::BlockType::Empty)); // Allocate in callee memory + emit_overflow_guard(&mut func, len_local, byte_mult); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(1)); @@ -1744,8 +2067,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::I32Const(byte_mult as i32)); func.instruction(&Instruction::I32Mul); } - func.instruction(&Instruction::Call(callee_realloc)); - func.instruction(&Instruction::LocalSet(cond_dest_ptr_local)); + emit_checked_realloc(&mut func, callee_realloc, cond_dest_ptr_local); // Copy from caller to callee memory func.instruction(&Instruction::LocalGet(cond_dest_ptr_local)); @@ -1837,6 +2159,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::LocalSet(data_len_local)); // Allocate in caller's memory: data_len * byte_mult bytes + emit_overflow_guard(&mut func, data_len_local, byte_mult); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(1)); @@ -1845,8 +2168,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::I32Const(byte_mult as i32)); func.instruction(&Instruction::I32Mul); } - func.instruction(&Instruction::Call(caller_realloc)); - func.instruction(&Instruction::LocalSet(caller_new_ptr_local)); + emit_checked_realloc(&mut func, caller_realloc, caller_new_ptr_local); // Copy data bytes from callee → caller func.instruction(&Instruction::LocalGet(caller_new_ptr_local)); @@ -2022,6 +2344,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::LocalSet(data_len_local)); // Allocate in caller memory + emit_overflow_guard(&mut func, data_len_local, byte_mult); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(1)); @@ -2030,8 +2353,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::I32Const(byte_mult as i32)); func.instruction(&Instruction::I32Mul); } - func.instruction(&Instruction::Call(caller_realloc)); - func.instruction(&Instruction::LocalSet(caller_new_ptr_local)); + emit_checked_realloc(&mut func, caller_realloc, caller_new_ptr_local); // Copy data from callee → caller func.instruction(&Instruction::LocalGet(caller_new_ptr_local)); @@ -2184,6 +2506,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::LocalSet(inner_len)); // Allocate inner data in dst memory: new_ptr = realloc(0, 0, 1, inner_len * byte_mult) + emit_overflow_guard(func, inner_len, byte_mult); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::I32Const(1)); @@ -2192,8 +2515,7 @@ impl FactStyleGenerator { func.instruction(&Instruction::I32Const(byte_mult as i32)); func.instruction(&Instruction::I32Mul); } - func.instruction(&Instruction::Call(realloc_func)); - func.instruction(&Instruction::LocalSet(new_ptr)); + emit_checked_realloc(func, realloc_func, new_ptr); // Copy data from src memory to dst memory // memory.copy dst_mem src_mem (new_ptr, inner_ptr, inner_len * byte_mult) @@ -2423,8 +2745,21 @@ impl FactStyleGenerator { }; // Step 1: Allocate output buffer = 2 * input_len bytes via cabi_realloc - // (each UTF-8 byte produces at most one UTF-16 code unit = 2 bytes) + // (each UTF-8 byte produces at most one UTF-16 code unit = 2 bytes). + // Guards against the two memory-safety hazards identified in LS-A-7: + // (a) i32.mul is modulo 2^32 — trap if len > u32::MAX/2 before the + // multiply, so alloc_size cannot wrap below the actual required + // byte count that the transcode loop will write. + // (b) cabi_realloc may return 0 on OOM — trap before writing so + // the loop cannot corrupt callee memory at offset 0. let callee_align = alignment_for_encoding(options.callee_string_encoding); + func.instruction(&Instruction::LocalGet(1)); // input_len + func.instruction(&Instruction::I32Const((u32::MAX / 2) as i32)); + func.instruction(&Instruction::I32GtU); + func.instruction(&Instruction::If(wasm_encoder::BlockType::Empty)); + func.instruction(&Instruction::Unreachable); + func.instruction(&Instruction::End); + func.instruction(&Instruction::I32Const(0)); // original_ptr func.instruction(&Instruction::I32Const(0)); // original_size func.instruction(&Instruction::I32Const(callee_align)); // alignment @@ -2434,6 +2769,13 @@ impl FactStyleGenerator { func.instruction(&Instruction::Call(callee_realloc)); func.instruction(&Instruction::LocalSet(out_ptr_local)); + // Trap on null return from cabi_realloc (LS-A-7 leg b). + func.instruction(&Instruction::LocalGet(out_ptr_local)); + func.instruction(&Instruction::I32Eqz); + func.instruction(&Instruction::If(wasm_encoder::BlockType::Empty)); + func.instruction(&Instruction::Unreachable); + func.instruction(&Instruction::End); + // Step 2: Initialize loop counters func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::LocalSet(src_idx_local)); @@ -2731,8 +3073,17 @@ impl FactStyleGenerator { }; // Step 1: Allocate output buffer = 3 * input_code_units bytes - // (worst case: all BMP chars in U+0800-U+FFFF → 3 bytes UTF-8 each) + // (worst case: all BMP chars in U+0800-U+FFFF → 3 bytes UTF-8 each). + // See LS-A-7: guard against i32.mul wrap (leg a) and cabi_realloc + // OOM (leg b) before writing into callee memory. let callee_align = alignment_for_encoding(options.callee_string_encoding); + func.instruction(&Instruction::LocalGet(1)); // input_len + func.instruction(&Instruction::I32Const((u32::MAX / 3) as i32)); + func.instruction(&Instruction::I32GtU); + func.instruction(&Instruction::If(wasm_encoder::BlockType::Empty)); + func.instruction(&Instruction::Unreachable); + func.instruction(&Instruction::End); + func.instruction(&Instruction::I32Const(0)); // original_ptr func.instruction(&Instruction::I32Const(0)); // original_size func.instruction(&Instruction::I32Const(callee_align)); // alignment @@ -2742,6 +3093,13 @@ impl FactStyleGenerator { func.instruction(&Instruction::Call(callee_realloc)); func.instruction(&Instruction::LocalSet(out_ptr_local)); + // Trap on null return from cabi_realloc (LS-A-7 leg b). + func.instruction(&Instruction::LocalGet(out_ptr_local)); + func.instruction(&Instruction::I32Eqz); + func.instruction(&Instruction::If(wasm_encoder::BlockType::Empty)); + func.instruction(&Instruction::Unreachable); + func.instruction(&Instruction::End); + // Step 2: Initialize loop counters func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::LocalSet(src_idx_local)); @@ -3055,8 +3413,17 @@ impl FactStyleGenerator { memory_index: options.callee_memory, }; - // Step 1: Allocate output buffer = 2 * input_len via cabi_realloc + // Step 1: Allocate output buffer = 2 * input_len via cabi_realloc. + // See LS-A-7: guard against i32.mul wrap (leg a) and cabi_realloc + // OOM (leg b) before writing into callee memory. let callee_align = alignment_for_encoding(options.callee_string_encoding); + func.instruction(&Instruction::LocalGet(1)); // input_len + func.instruction(&Instruction::I32Const((u32::MAX / 2) as i32)); + func.instruction(&Instruction::I32GtU); + func.instruction(&Instruction::If(wasm_encoder::BlockType::Empty)); + func.instruction(&Instruction::Unreachable); + func.instruction(&Instruction::End); + func.instruction(&Instruction::I32Const(0)); // original_ptr func.instruction(&Instruction::I32Const(0)); // original_size func.instruction(&Instruction::I32Const(callee_align)); // alignment @@ -3066,6 +3433,13 @@ impl FactStyleGenerator { func.instruction(&Instruction::Call(callee_realloc)); func.instruction(&Instruction::LocalSet(out_ptr_local)); + // Trap on null return from cabi_realloc (LS-A-7 leg b). + func.instruction(&Instruction::LocalGet(out_ptr_local)); + func.instruction(&Instruction::I32Eqz); + func.instruction(&Instruction::If(wasm_encoder::BlockType::Empty)); + func.instruction(&Instruction::Unreachable); + func.instruction(&Instruction::End); + // Step 2: Initialize loop counters func.instruction(&Instruction::I32Const(0)); func.instruction(&Instruction::LocalSet(src_idx_local)); @@ -3260,8 +3634,9 @@ impl FactStyleGenerator { let caller_param_count = caller_type.params.len(); let _caller_result_count = caller_type.results.len(); - // Find callee's memory index for the event buffer scratch space + // Find memory indices for cross-memory operations let callee_memory = crate::merger::component_memory_index(merged, site.to_component); + let caller_memory = crate::merger::component_memory_index(merged, site.from_component); // Determine the [async-lift] entry's param count from its type. // The caller may have extra params (e.g., retptr for multi-value results) @@ -3289,8 +3664,118 @@ impl FactStyleGenerator { let l_p1 = l_packed + 4; let l_p2 = l_packed + 5; - // 6 locals for callback loop + 3 for string copy (src_ptr, src_len, dst_ptr) - let mut body = Function::new([(9, wasm_encoder::ValType::I32)]); + // 6 locals for callback loop + 4 for string copy (src_ptr, src_len, dst_ptr, new_ptr) + // + 6 for nested indirection patching (i, rec_dst, old_ptr, buf_len, new_ptr, rec_src) + let mut body = Function::new([(16, wasm_encoder::ValType::I32)]); + + // Step 0.5: Copy string/list params from caller to callee memory. + // + // The pointer_pair_positions from the resolver are in CALLEE component + // type order. But the adapter's locals are in CALLER order (from the + // caller's canon lower). These may differ if the component type + // reorders params. + // + // Instead of using the resolver's positions, compute positions from + // the caller's flat param types: find (i32, i32) pairs that could be + // (ptr, len) strings/lists. + let callee_realloc = crate::merger::component_realloc_index(merged, site.to_component); + + // Detect pointer pairs in caller params: consecutive (i32, i32) pairs + // that aren't the last param (retptr). This is a heuristic — works for + // string and list params which are always (ptr: i32, len: i32). + let caller_ptr_positions: Vec = if site.crosses_memory && callee_realloc.is_some() { + let params = &caller_type.params; + let has_retptr = + caller_type.results.is_empty() && caller_param_count > callee_param_count; + let effective_len = if has_retptr { + params.len() - 1 + } else { + params.len() + }; + let mut positions = Vec::new(); + let mut i = 0; + while i + 1 < effective_len { + if params[i] == wasm_encoder::ValType::I32 + && params[i + 1] == wasm_encoder::ValType::I32 + { + // Check if the resolver also thinks this is a pointer pair + // (the resolver uses component type info to confirm) + if site + .requirements + .pointer_pair_positions + .iter() + .any(|_| true) + { + positions.push(i as u32); + i += 2; // skip the len + continue; + } + } + i += 1; + } + positions + } else { + Vec::new() + }; + + let has_param_copies = !caller_ptr_positions.is_empty(); + + if has_param_copies { + log::debug!( + "async adapter param copy: export={} caller_positions={:?} resolver_positions={:?}", + site.export_name, + caller_ptr_positions, + site.requirements.pointer_pair_positions, + ); + let realloc = callee_realloc.unwrap(); + // For each (ptr, len) pair in the caller's params, allocate in + // callee memory and copy the data from caller memory. Use the + // resolver's param_copy_layouts to get the per-element byte + // size so list/list/etc. copy the correct total size. + let param_layouts = &site.requirements.param_copy_layouts; + for (pair_idx, &ptr_pos) in caller_ptr_positions.iter().enumerate() { + let ptr_local = ptr_pos; + let len_local = ptr_local + 1; + let l_new_ptr = l_p2 + 4; // reuse scratch local + + let byte_mult = param_layouts + .get(pair_idx) + .map(|cl| match cl { + crate::resolver::CopyLayout::Bulk { byte_multiplier } => *byte_multiplier, + crate::resolver::CopyLayout::Elements { element_size, .. } => *element_size, + }) + .unwrap_or(1); + + // Allocate: cabi_realloc(0, 0, 1, len * byte_mult) + emit_overflow_guard(&mut body, len_local, byte_mult); + body.instruction(&Instruction::I32Const(0)); + body.instruction(&Instruction::I32Const(0)); + body.instruction(&Instruction::I32Const(1)); + body.instruction(&Instruction::LocalGet(len_local)); + if byte_mult > 1 { + body.instruction(&Instruction::I32Const(byte_mult as i32)); + body.instruction(&Instruction::I32Mul); + } + emit_checked_realloc(&mut body, realloc, l_new_ptr); + + // Copy: memory.copy new_ptr <- old_ptr, len * byte_mult + body.instruction(&Instruction::LocalGet(l_new_ptr)); + body.instruction(&Instruction::LocalGet(ptr_local)); + body.instruction(&Instruction::LocalGet(len_local)); + if byte_mult > 1 { + body.instruction(&Instruction::I32Const(byte_mult as i32)); + body.instruction(&Instruction::I32Mul); + } + body.instruction(&Instruction::MemoryCopy { + dst_mem: callee_memory, + src_mem: caller_memory, + }); + + // Replace the ptr param with the new callee-memory ptr + body.instruction(&Instruction::LocalGet(l_new_ptr)); + body.instruction(&Instruction::LocalSet(ptr_local)); + } + } // Step 1: Call [async-lift] entry with callee's params // (skip retptr if caller has more params than callee) @@ -3420,33 +3905,60 @@ impl FactStyleGenerator { .map(|(_, name)| name) .unwrap_or(&site.export_name); - let shim_info = merged - .task_return_shims - .values() - .find(|info| { - info.component_idx == site.to_component - && info.original_func_name == adapter_func_name + // Look up result globals. First try element-segment-based mapping + // (correct for components with forwarding modules), then fall back + // to name-based matching (for direct task.return calls). + let result_globals_direct = merged + .async_result_globals + .get(&(site.to_component, adapter_func_name.to_string())); + + let shim_info = if let Some(globals) = result_globals_direct { + // Recover the WIT result_type from the underlying shim. The + // direct-globals lookup gives us per-(component, func) globals; + // find the source shim by matching globals to get its type info. + let result_type = merged + .task_return_shims + .values() + .find(|info| { + info.component_idx == site.to_component && info.result_globals == *globals + }) + .and_then(|info| info.result_type.clone()); + Some(crate::merger::TaskReturnShimInfo { + shim_func: 0, + result_globals: globals.clone(), + component_idx: site.to_component, + import_name: String::new(), + original_func_name: adapter_func_name.to_string(), + result_type, }) - .or_else(|| { - // Fallback: match by type signature if name matching fails - merged.task_return_shims.values().find(|info| { + } else { + // Fallback: match by component + original function name + merged + .task_return_shims + .values() + .find(|info| { info.component_idx == site.to_component - && info.result_globals.len() == caller_type.results.len() - && info - .result_globals - .iter() - .zip(caller_type.results.iter()) - .all(|((_, gt), ct)| gt == ct) + && info.original_func_name == adapter_func_name }) - }); + .cloned() + }; + let shim_info = shim_info.as_ref(); // Detect retptr convention: caller has more params than callee // and returns void — the last caller param is the result pointer. let uses_retptr = caller_type.results.is_empty() && caller_param_count > callee_param_count; - let caller_memory = crate::merger::component_memory_index(merged, site.from_component); // Find caller's cabi_realloc for cross-memory string copying let caller_realloc = crate::merger::component_realloc_index(merged, site.from_component); + log::debug!( + "async adapter '{}' from={} to={} caller_realloc={:?} callee_mem={} caller_mem={}", + adapter_func_name, + site.from_component, + site.to_component, + caller_realloc, + callee_memory, + caller_memory, + ); if let Some(info) = shim_info { if uses_retptr { @@ -3470,11 +3982,26 @@ impl FactStyleGenerator { let (ptr_global, _) = info.result_globals[0]; let (len_global, _) = info.result_globals[1]; - // Allocate in caller memory: cabi_realloc(0, 0, 1, len) → new_ptr - // locals: l_packed+6 = src_ptr, l_packed+7 = src_len, l_packed+8 = dst_ptr + // Determine the per-element byte size and alignment from + // the WIT result type. For string the element is 1 byte; + // for list it's 4; for list it's the + // record's CABI size (with internal alignment padding). + // Without a known type we fall back to 1 (string-like). + let (elem_size, elem_align, list_elem_ty) = match &info.result_type { + Some(crate::parser::ComponentValType::List(elem)) + | Some(crate::parser::ComponentValType::FixedSizeList(elem, _)) => { + let (s, a) = cabi_size_align(elem); + (s, a, Some(elem.as_ref().clone())) + } + Some(crate::parser::ComponentValType::String) => (1, 1, None), + _ => (1, 1, None), + }; + + // locals let l_src_ptr = l_p2 + 1; let l_src_len = l_p2 + 2; let l_dst_ptr = l_p2 + 3; + let l_byte_count = l_p2 + 4; // Read source ptr and len from shim globals body.instruction(&Instruction::GlobalGet(ptr_global)); @@ -3482,23 +4009,50 @@ impl FactStyleGenerator { body.instruction(&Instruction::GlobalGet(len_global)); body.instruction(&Instruction::LocalSet(l_src_len)); - // Allocate in caller memory + // byte_count = len * elem_size + emit_overflow_guard(&mut body, l_src_len, elem_size); + body.instruction(&Instruction::LocalGet(l_src_len)); + if elem_size != 1 { + body.instruction(&Instruction::I32Const(elem_size as i32)); + body.instruction(&Instruction::I32Mul); + } + body.instruction(&Instruction::LocalSet(l_byte_count)); + + // Allocate in caller memory: cabi_realloc(0, 0, align, byte_count) body.instruction(&Instruction::I32Const(0)); // old_ptr body.instruction(&Instruction::I32Const(0)); // old_size - body.instruction(&Instruction::I32Const(1)); // align - body.instruction(&Instruction::LocalGet(l_src_len)); // new_size - body.instruction(&Instruction::Call(realloc_func)); - body.instruction(&Instruction::LocalSet(l_dst_ptr)); + body.instruction(&Instruction::I32Const(elem_align as i32)); + body.instruction(&Instruction::LocalGet(l_byte_count)); + emit_checked_realloc(&mut body, realloc_func, l_dst_ptr); // Copy from callee memory to caller memory - body.instruction(&Instruction::LocalGet(l_dst_ptr)); // dst - body.instruction(&Instruction::LocalGet(l_src_ptr)); // src - body.instruction(&Instruction::LocalGet(l_src_len)); // len + body.instruction(&Instruction::LocalGet(l_dst_ptr)); + body.instruction(&Instruction::LocalGet(l_src_ptr)); + body.instruction(&Instruction::LocalGet(l_byte_count)); body.instruction(&Instruction::MemoryCopy { dst_mem: caller_memory, src_mem: callee_memory, }); + // If the list element contains nested indirections + // (string fields, nested lists), walk each element and + // copy each indirect buffer into caller memory, then + // patch the (ptr, len) pair stored in the copied record. + if let Some(elem_ty) = &list_elem_ty { + emit_patch_nested_indirections( + &mut body, + elem_ty, + l_dst_ptr, + l_src_ptr, + l_src_len, + elem_size, + l_p2 + 5, + realloc_func, + caller_memory, + callee_memory, + ); + } + // Write (new_ptr, len) to retptr let mem_arg_0 = wasm_encoder::MemArg { offset: 0, @@ -3865,4 +4419,125 @@ mod tests { "SR-17: different memory indices should cross memory boundaries" ); } + + // --------------------------------------------------------------- + // LS-A-7: Transcoder overflow + null-check guards + // + // The three transcode emitters must emit, for every generated + // adapter: + // (a) an I32GtU check on input_len against u32::MAX/K followed + // by an `if ... unreachable end` trap — prevents i32.mul + // wrapping to a small alloc_size. + // (b) an I32Eqz check on the cabi_realloc return followed by + // `if ... unreachable end` — prevents the transcode loop + // writing to callee memory offset 0 when OOM returns null. + // + // These byte-scan tests are the PoC referenced in loss-scenarios + // LS-A-7. They fail on the unfixed emitter and pass once both + // guards are present. + // --------------------------------------------------------------- + + /// Return `true` iff the byte-encoded function body `body` contains + /// an `i32.eqz; if; unreachable; end` sequence. The `if` block byte + /// is 0x04, `unreachable` is 0x00, `end` is 0x0B, `i32.eqz` is 0x45. + /// The block type that follows 0x04 is 0x40 (empty block type). + #[cfg(test)] + fn body_has_eqz_if_unreachable(body: &[u8]) -> bool { + // Pattern: 0x45 0x04 0x40 0x00 0x0B + body.windows(5).any(|w| w == [0x45, 0x04, 0x40, 0x00, 0x0B]) + } + + /// Return `true` iff the byte-encoded function body `body` contains + /// a `i32.gt_u; if; unreachable; end` sequence. + /// Opcodes: i32.gt_u = 0x4B, if = 0x04, block type empty = 0x40, + /// unreachable = 0x00, end = 0x0B. + #[cfg(test)] + fn body_has_gtu_if_unreachable(body: &[u8]) -> bool { + body.windows(5).any(|w| w == [0x4B, 0x04, 0x40, 0x00, 0x0B]) + } + + fn emit_transcode(options: AdapterOptions) -> Vec { + let gen_ = FactStyleGenerator::new(AdapterConfig::default()); + let mut f = Function::new([(8, wasm_encoder::ValType::I32)]); + // param_count=2 matches string param (ptr, len) lowered shape. + // target_func=0 is a placeholder — the emitter only uses it for + // the tail call, which this test doesn't execute. + if options.caller_string_encoding == StringEncoding::Utf8 + && options.callee_string_encoding == StringEncoding::Utf16 + { + gen_.emit_utf8_to_utf16_transcode(&mut f, 2, 0, &options); + } else if options.caller_string_encoding == StringEncoding::Utf16 + && options.callee_string_encoding == StringEncoding::Utf8 + { + gen_.emit_utf16_to_utf8_transcode(&mut f, 2, 0, &options); + } else if options.caller_string_encoding == StringEncoding::Latin1 + && options.callee_string_encoding == StringEncoding::Utf8 + { + gen_.emit_latin1_to_utf8_transcode(&mut f, 2, 0, &options); + } else { + panic!("unsupported encoding pair for test"); + } + f.into_raw_body() + } + + fn transcode_options(caller: StringEncoding, callee: StringEncoding) -> AdapterOptions { + AdapterOptions { + caller_string_encoding: caller, + callee_string_encoding: callee, + caller_memory: 0, + callee_memory: 1, + callee_realloc: Some(0), + ..Default::default() + } + } + + #[test] + fn ls_a_7_utf8_to_utf16_emits_overflow_and_null_guards() { + let body = emit_transcode(transcode_options( + StringEncoding::Utf8, + StringEncoding::Utf16, + )); + assert!( + body_has_gtu_if_unreachable(&body), + "LS-A-7: UTF-8→UTF-16 transcoder missing overflow guard \ + (i32.gt_u; if; unreachable; end) before the i32.mul" + ); + assert!( + body_has_eqz_if_unreachable(&body), + "LS-A-7: UTF-8→UTF-16 transcoder missing cabi_realloc null \ + guard (i32.eqz; if; unreachable; end) after the call" + ); + } + + #[test] + fn ls_a_7_utf16_to_utf8_emits_overflow_and_null_guards() { + let body = emit_transcode(transcode_options( + StringEncoding::Utf16, + StringEncoding::Utf8, + )); + assert!( + body_has_gtu_if_unreachable(&body), + "LS-A-7: UTF-16→UTF-8 transcoder missing overflow guard" + ); + assert!( + body_has_eqz_if_unreachable(&body), + "LS-A-7: UTF-16→UTF-8 transcoder missing cabi_realloc null guard" + ); + } + + #[test] + fn ls_a_7_latin1_to_utf8_emits_overflow_and_null_guards() { + let body = emit_transcode(transcode_options( + StringEncoding::Latin1, + StringEncoding::Utf8, + )); + assert!( + body_has_gtu_if_unreachable(&body), + "LS-A-7: Latin-1→UTF-8 transcoder missing overflow guard" + ); + assert!( + body_has_eqz_if_unreachable(&body), + "LS-A-7: Latin-1→UTF-8 transcoder missing cabi_realloc null guard" + ); + } } diff --git a/meld-core/src/adapter/mod.rs b/meld-core/src/adapter/mod.rs index bb8ef44..b28ba33 100644 --- a/meld-core/src/adapter/mod.rs +++ b/meld-core/src/adapter/mod.rs @@ -19,7 +19,7 @@ //! 3. Calls the target function in B //! 4. Writes results back to A's memory (lifting) -mod fact; +pub(crate) mod fact; pub use fact::FactStyleGenerator; diff --git a/meld-core/src/attestation.rs b/meld-core/src/attestation.rs index ca4a97a..6d480c4 100644 --- a/meld-core/src/attestation.rs +++ b/meld-core/src/attestation.rs @@ -283,20 +283,31 @@ pub(crate) fn compute_sha256(bytes: &[u8]) -> String { hex::encode(result) } -/// Generate a UUID v4 +/// Generate a UUID v4 using the current system clock as entropy. +/// +/// This is a thin wrapper over [`generate_uuid_from`] that sources entropy +/// from `SystemTime::now()`. Tests should prefer [`generate_uuid_from`] to +/// pin the entropy value and keep results deterministic. pub(crate) fn generate_uuid() -> String { - // Simple UUID v4 generation using random bytes - // In production, use a proper UUID crate - let mut bytes = [0u8; 16]; - - // Use a simple hash of current time as pseudo-random let now = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map(|d| d.as_nanos()) .unwrap_or(0); + generate_uuid_from(now) +} + +/// Generate a UUID v4 from a caller-supplied entropy value. +/// +/// The entropy is hashed with SHA-256 and the first 16 bytes are used to +/// fill a UUID v4 shape (with version and variant bits set per RFC 4122). +/// The algorithm is unchanged from the original `generate_uuid`; this form +/// exists so callers (and tests) can provide deterministic entropy rather +/// than depending on the wall clock. +pub(crate) fn generate_uuid_from(entropy: u128) -> String { + let mut bytes = [0u8; 16]; let mut hasher = Sha256::new(); - hasher.update(now.to_le_bytes()); + hasher.update(entropy.to_le_bytes()); let hash = hasher.finalize(); bytes.copy_from_slice(&hash[..16]); @@ -317,25 +328,33 @@ pub(crate) fn generate_uuid() -> String { ) } -/// Get current timestamp in ISO 8601 format +/// Get current timestamp in ISO 8601 format using the system clock. +/// +/// Thin wrapper over [`chrono_timestamp_from`] sourcing seconds-since-epoch +/// from `SystemTime::now()`. A clock-before-epoch collapses to +/// `"1970-01-01T00:00:00Z"`. pub(crate) fn chrono_timestamp() -> String { use std::time::SystemTime; - let now = SystemTime::now() + let secs = SystemTime::now() .duration_since(SystemTime::UNIX_EPOCH) - .unwrap_or_default(); + .map(|d| d.as_secs()) + .unwrap_or(0); + chrono_timestamp_from(secs) +} + +/// Format `secs` (seconds since Unix epoch) as an ISO 8601 / RFC 3339 +/// UTC timestamp: `YYYY-MM-DDTHH:MM:SSZ`. +/// +/// Computes a correct proleptic Gregorian date, honoring leap years and +/// per-month day counts. No external crate dependency. +pub(crate) fn chrono_timestamp_from(secs: u64) -> String { + const SECS_PER_DAY: u64 = 86_400; - // Simple ISO 8601 format (without chrono dependency) - let secs = now.as_secs(); - let days_since_epoch = secs / 86400; - let secs_today = secs % 86400; + let days_since_epoch = secs / SECS_PER_DAY; + let secs_today = secs % SECS_PER_DAY; - // Approximate date calculation (not accounting for leap years properly) - let years = days_since_epoch / 365; - let year = 1970 + years; - let day_of_year = days_since_epoch % 365; - let month = (day_of_year / 30).min(11) + 1; - let day = (day_of_year % 30) + 1; + let (year, month, day) = civil_from_days(days_since_epoch); let hour = secs_today / 3600; let minute = (secs_today % 3600) / 60; @@ -347,6 +366,44 @@ pub(crate) fn chrono_timestamp() -> String { ) } +/// Convert days-since-Unix-epoch to a (year, month, day) triple in the +/// proleptic Gregorian calendar. +/// +/// Implements Howard Hinnant's `civil_from_days` algorithm +/// (http://howardhinnant.github.io/date_algorithms.html#civil_from_days). +/// Correctly handles leap years and per-month day counts. Returns +/// 1-indexed `month` (1..=12) and `day` (1..=31). +fn civil_from_days(days_since_epoch: u64) -> (u64, u64, u64) { + // Shift epoch from 1970-01-01 to 0000-03-01 (start of a 400-year cycle + // aligned so that February — the leap month — is the last month). + // 719_468 = number of days from 0000-03-01 to 1970-01-01. + let z = days_since_epoch as i64 + 719_468; + + // 146_097 days per 400-year cycle. + let era = z.div_euclid(146_097); + let doe = z.rem_euclid(146_097) as u64; // day-of-era, [0, 146096] + + // year-of-era, [0, 399] + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; + let y = yoe as i64 + era * 400; + + // day-of-year, [0, 365] + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + + // March-based month, [0, 11] where 0=March, 11=February. + let mp = (5 * doy + 2) / 153; + + // Day of month, [1, 31]. + let d = doy - (153 * mp + 2) / 5 + 1; + + // Shift month to [1, 12] with January=1; year increments if mp>=10 + // (i.e. the March-based month rolled past December into Jan/Feb). + let m = if mp < 10 { mp + 3 } else { mp - 9 }; + let y = if m <= 2 { y + 1 } else { y }; + + (y as u64, m, d) +} + #[cfg(test)] mod tests { use super::*; @@ -411,6 +468,61 @@ mod tests { assert!(ts.ends_with('Z')); } + /// Epoch maps to 1970-01-01T00:00:00Z exactly. + #[test] + fn test_chrono_timestamp_from_epoch() { + assert_eq!(chrono_timestamp_from(0), "1970-01-01T00:00:00Z"); + } + + /// 2025-01-01T00:00:00Z — 55 years after the epoch, crossing the + /// 2024 leap year. The old (365-days-per-year) approximation was + /// off by many days here. + #[test] + fn test_chrono_timestamp_from_2025_new_year() { + assert_eq!(chrono_timestamp_from(1_735_689_600), "2025-01-01T00:00:00Z"); + } + + /// March 1, 2025 (non-leap year): the day after Feb 28. The old + /// algorithm would have reported a non-existent "Feb 30". + #[test] + fn test_chrono_timestamp_from_2025_march_boundary() { + assert_eq!(chrono_timestamp_from(1_740_787_200), "2025-03-01T00:00:00Z"); + } + + /// March 1, 2024 (leap year): the day after Feb 29. Verifies the + /// leap-day is accounted for and March starts on the correct day. + #[test] + fn test_chrono_timestamp_from_2024_leap_march() { + assert_eq!(chrono_timestamp_from(1_709_251_200), "2024-03-01T00:00:00Z"); + } + + /// Pinned output for `generate_uuid_from(0)`. The algorithm is + /// SHA-256 of the little-endian bytes of 0u128 (16 zero bytes), + /// take the first 16 bytes, then set UUID v4 version (0x40) and + /// RFC 4122 variant (0x80) bits. Changing the algorithm should + /// either update this expected value intentionally or fail here. + #[test] + fn test_generate_uuid_from_pinned_zero() { + assert_eq!( + generate_uuid_from(0), + "374708ff-f771-4dd5-979e-c875d56cd228" + ); + } + + /// Different entropy values must produce different UUIDs + /// (sanity check — distinct inputs to SHA-256 collide vanishingly + /// rarely, so this primarily guards against accidentally ignoring + /// the entropy argument). + #[test] + fn test_generate_uuid_from_distinct_entropy_differs() { + let a = generate_uuid_from(0); + let b = generate_uuid_from(1); + let c = generate_uuid_from(u128::MAX); + assert_ne!(a, b); + assert_ne!(a, c); + assert_ne!(b, c); + } + /// SR-27: Input hash integrity — the attestation must record a SHA-256 hash /// that matches an independently computed digest of the input bytes. /// diff --git a/meld-core/src/component_wrap.rs b/meld-core/src/component_wrap.rs index 00d1076..a2cff77 100644 --- a/meld-core/src/component_wrap.rs +++ b/meld-core/src/component_wrap.rs @@ -1337,6 +1337,28 @@ fn assemble_component( } ImportResolution::TaskBuiltin { op } => { + // Check if this task.return has a shim export in the fused module. + // If so, alias the shim instead of using canonical task.return. + if let P3BuiltinOp::TaskReturn { .. } = op { + let shim_name = format!("$task_return_shim_{}", i); + let has_shim = fused_info + .exports + .iter() + .any(|(n, k, _)| *k == wasmparser::ExternalKind::Func && *n == shim_name); + if has_shim { + let mut alias_section = ComponentAliasSection::new(); + alias_section.alias(Alias::CoreInstanceExport { + instance: fused_instance, + kind: ExportKind::Func, + name: &shim_name, + }); + component.section(&alias_section); + lowered_func_indices.push(core_func_idx); + core_func_idx += 1; + continue; + } + } + let mut canon = CanonicalFunctionSection::new(); match op { P3BuiltinOp::TaskReturn { @@ -2106,11 +2128,74 @@ fn find_task_return_for_import( None } +/// Recursively resolve all `Type(idx)` references in a `ComponentValType`, +/// inlining the referenced definition. Returns a self-contained type tree +/// that does not depend on the source component's type table. +/// +/// Used when storing typed result info for the adapter to use later, since +/// the adapter only sees the merged module and not the source components. +pub(crate) fn resolve_component_val_type( + ty: &parser::ComponentValType, + comp: &ParsedComponent, +) -> parser::ComponentValType { + use parser::ComponentValType as CVT; + match ty { + CVT::Type(idx) => { + if let Some(td) = comp.get_type_definition(*idx) { + if let parser::ComponentTypeKind::Defined(inner) = &td.kind { + resolve_component_val_type(inner, comp) + } else { + ty.clone() + } + } else { + ty.clone() + } + } + CVT::List(inner) => CVT::List(Box::new(resolve_component_val_type(inner, comp))), + CVT::FixedSizeList(inner, n) => { + CVT::FixedSizeList(Box::new(resolve_component_val_type(inner, comp)), *n) + } + CVT::Record(fields) => CVT::Record( + fields + .iter() + .map(|(n, t)| (n.clone(), resolve_component_val_type(t, comp))) + .collect(), + ), + CVT::Tuple(elems) => CVT::Tuple( + elems + .iter() + .map(|t| resolve_component_val_type(t, comp)) + .collect(), + ), + CVT::Option(inner) => CVT::Option(Box::new(resolve_component_val_type(inner, comp))), + CVT::Result { ok, err } => CVT::Result { + ok: ok + .as_ref() + .map(|t| Box::new(resolve_component_val_type(t, comp))), + err: err + .as_ref() + .map(|t| Box::new(resolve_component_val_type(t, comp))), + }, + CVT::Variant(cases) => CVT::Variant( + cases + .iter() + .map(|(n, t)| { + ( + n.clone(), + t.as_ref().map(|t| resolve_component_val_type(t, comp)), + ) + }) + .collect(), + ), + CVT::Primitive(_) | CVT::String | CVT::Own(_) | CVT::Borrow(_) => ty.clone(), + } +} + /// Compute flat task.return params with Type(idx) resolution. /// /// Unlike `flat_task_return_params`, this version resolves `Type(idx)` /// references using the component's type definitions. -fn flat_task_return_params_resolved( +pub(crate) fn flat_task_return_params_resolved( result: Option<&parser::ComponentValType>, comp: &ParsedComponent, ) -> Vec { diff --git a/meld-core/src/lib.rs b/meld-core/src/lib.rs index 7a1fc23..dbadda5 100644 --- a/meld-core/src/lib.rs +++ b/meld-core/src/lib.rs @@ -605,32 +605,142 @@ impl Fuser { return Ok(()); } - // Build mapping: fused import name → original function name. - // The original component's core module has imports like "[task-return]fibonacci". - // After fusion, these become "[task-return]2" (renumbered by core_func_idx). - // We need the original name to match with async adapter site export names. - // - // Strategy: for each async callee component, collect the task-return - // import names from the ORIGINAL core module (which have function names). - // Order matters — the Nth task-return import becomes [task-return]N in - // the fused module (via build_canon_import_names). - let mut task_return_original_names: HashMap<(usize, usize), String> = HashMap::new(); + // For each async callee component, build: + // - name_to_result: function name → result type (via Lifts) + // - taskreturn_types: ordered list of resolved TaskReturn result + // types (used for greedy ordered claiming when name matching + // fails for shims whose original_func_name couldn't be recovered) + let mut comp_func_result_types: HashMap> = + HashMap::new(); + let mut comp_taskreturn_types: HashMap> = + HashMap::new(); + for &comp_idx in &async_callee_components { + let comp = &self.components[comp_idx]; + let mut name_to_result: HashMap = HashMap::new(); + + // Build core_func_index → result type from canonical Lift entries. + let mut core_func_to_result: HashMap = HashMap::new(); + for entry in &comp.canonical_functions { + if let parser::CanonicalEntry::Lift { + core_func_index, + type_index, + .. + } = entry + && let Some(td) = comp.get_type_definition(*type_index) + && let parser::ComponentTypeKind::Function { results, .. } = &td.kind + && let Some((_, ty)) = results.first() + { + core_func_to_result.insert( + *core_func_index, + component_wrap::resolve_component_val_type(ty, comp), + ); + } + } + + // Build component-level core func index → core export name. + // Walk core_entity_order: each CoreAlias of a Function export + // bumps the component core func counter and records the name. + // CanonicalFunction entries also bump the counter (with no name). + let mut comp_corefn_to_name: HashMap = HashMap::new(); + let mut corefn_idx = 0u32; + for def in &comp.core_entity_order { + match def { + parser::CoreEntityDef::CoreAlias(alias_idx) => { + if let Some(parser::ComponentAliasEntry::CoreInstanceExport { + kind: wasmparser::ExternalKind::Func, + name, + .. + }) = comp.component_aliases.get(*alias_idx) + { + comp_corefn_to_name.insert(corefn_idx, name.clone()); + corefn_idx += 1; + } + } + parser::CoreEntityDef::CanonicalFunction(canon_idx) => { + if let Some(entry) = comp.canonical_functions.get(*canon_idx) + && !matches!(entry, parser::CanonicalEntry::Lift { .. }) + { + corefn_idx += 1; + } + } + } + } + + // For each Lift, look up the alias name and extract the function + // name from `[async-lift]#` (or just `[async-lift]`). + for entry in &comp.canonical_functions { + if let parser::CanonicalEntry::Lift { + core_func_index, .. + } = entry + && let Some(name) = comp_corefn_to_name.get(core_func_index) + && let Some(rest) = name.strip_prefix("[async-lift]") + && let Some(rt) = core_func_to_result.get(core_func_index) + { + let func_name = rest.rsplit_once('#').map(|(_, n)| n).unwrap_or(rest); + name_to_result.insert(func_name.to_string(), rt.clone()); + } + } + // Collect ordered TaskReturn types for greedy claiming fallback. + let tr_types: Vec = comp + .canonical_functions + .iter() + .filter_map(|entry| match entry { + parser::CanonicalEntry::TaskReturn { + result: Some(t), .. + } => Some(component_wrap::resolve_component_val_type(t, comp)), + _ => None, + }) + .collect(); + comp_taskreturn_types.insert(comp_idx, tr_types); + + log::debug!( + "comp {} async-result name→type entries: {}", + comp_idx, + name_to_result.len() + ); + comp_func_result_types.insert(comp_idx, name_to_result); + } + + // Build mapping: (component_idx, func_name) → element segment position. + // The main module (mod 0) has task-return imports in a specific order. + // The forwarding module mirrors this order. The element segment at + // position N has the merged import for the Nth task-return function. + // We track positions (among task-return imports only) so we can later + // match shim globals to adapter functions. + // Build mapping: (comp_idx, func_name) → element segment position. + // Only count task-return imports that are resolved INTRA-COMPONENT + // (forwarding). Directly-resolved imports don't go through element + // segments and are handled by the name-based fallback. + let mut func_name_to_elem_position: HashMap<(usize, String), usize> = HashMap::new(); for &comp_idx in &async_callee_components { let component = &self.components[comp_idx]; - let mut tr_idx = 0usize; - for module in &component.core_modules { + if let Some(module) = component.core_modules.first() { + let mut elem_position = 0usize; + let mut func_idx = 0u32; for module_imp in &module.imports { - if matches!(module_imp.kind, parser::ImportKind::Function(_)) - && module_imp.name.starts_with("[task-return]") - { - let func_name = module_imp - .name - .strip_prefix("[task-return]") - .unwrap_or(&module_imp.name) - .to_string(); - task_return_original_names.insert((comp_idx, tr_idx), func_name); - tr_idx += 1; + if !matches!(module_imp.kind, parser::ImportKind::Function(_)) { + continue; } + if module_imp.name.starts_with("[task-return]") { + // Check if this import is resolved intra-component + // (goes to a forwarding function, not a merged import) + let is_forwarding = merged + .function_index_map + .get(&(comp_idx, 0, func_idx)) + .map(|&idx| idx >= merged.import_counts.func) + .unwrap_or(false); + + if is_forwarding { + let func_name = module_imp + .name + .strip_prefix("[task-return]") + .unwrap_or(&module_imp.name) + .to_string(); + func_name_to_elem_position.insert((comp_idx, func_name), elem_position); + elem_position += 1; + } + } + func_idx += 1; } } } @@ -638,7 +748,9 @@ impl Fuser { // Find task.return imports belonging to async callee components // and generate shims for them. let mut affected_modules: HashSet<(usize, usize)> = HashSet::new(); - let mut tr_counter_per_comp: HashMap = HashMap::new(); + // Per-component cursor into comp_taskreturn_types — advances each + // time we need to claim a TaskReturn entry by ordered position. + let mut comp_tr_cursor: HashMap = HashMap::new(); for (import_idx, imp) in merged.imports.iter().enumerate() { if !imp.name.starts_with("[task-return]") { @@ -650,14 +762,39 @@ impl Fuser { _ => continue, }; - // Track the task-return index per component to recover the - // original function name from the mapping built above. - let tr_idx = tr_counter_per_comp.entry(comp_idx).or_insert(0); - let original_func_name = task_return_original_names - .get(&(comp_idx, *tr_idx)) - .cloned() - .unwrap_or_default(); - *tr_counter_per_comp.get_mut(&comp_idx).unwrap() += 1; + // Extract original function name from the source core module's + // `[task-return]` import, used for the adapter's name-based + // shim matching and for result-type resolution below. + // + // The merged FUNCTION index for this import is its position + // among function imports in `merged.imports`, NOT its position + // in the imports vector overall. Compute it by counting only + // function imports up to import_idx. + let merged_func_idx = merged.imports[..import_idx] + .iter() + .filter(|i| matches!(i.entity_type, wasm_encoder::EntityType::Function(_))) + .count() as u32; + let mut original_func_name = imp.name.clone(); + let component = &self.components[comp_idx]; + if let Some(module) = component.core_modules.first() { + let mut fidx = 0u32; + for mimp in &module.imports { + if !matches!(mimp.kind, parser::ImportKind::Function(_)) { + continue; + } + if mimp.name.starts_with("[task-return]") + && merged.function_index_map.get(&(comp_idx, 0, fidx)).copied() + == Some(merged_func_idx) + { + original_func_name = mimp + .name + .strip_prefix("[task-return]") + .unwrap_or(&mimp.name) + .to_string(); + } + fidx += 1; + } + } // Get the import's function type to know the param signature. let import_type = match &imp.entity_type { @@ -696,21 +833,94 @@ impl Fuser { result_globals.push((global_idx, *param_ty)); } - // Generate shim function: stores each param to its global + // Resolve result type early — needed both for shim body (when + // we add callee-side stabilization for nested indirections) and + // for the TaskReturnShimInfo stored later. + let mut early_result_type = comp_func_result_types + .get(&comp_idx) + .and_then(|m| m.get(&original_func_name)) + .cloned(); + if early_result_type.is_none() + && let Some(tr_list) = comp_taskreturn_types.get(&comp_idx) + { + let comp = &self.components[comp_idx]; + let cursor_peek = comp_tr_cursor.entry(comp_idx).or_insert(0); + // Peek without advancing — we'll re-resolve later with the + // same cursor state for the canonical TaskReturnShimInfo. + let mut peek_cursor = *cursor_peek; + while peek_cursor < tr_list.len() { + let candidate = &tr_list[peek_cursor]; + peek_cursor += 1; + let flat = + component_wrap::flat_task_return_params_resolved(Some(candidate), comp); + if flat == import_type.params { + early_result_type = Some(candidate.clone()); + break; + } + } + } + + // For lists with indirections (e.g., list>), + // the wit-bindgen Cleanup guard for the records buffer drops + // when the async block ends — between EXIT and our adapter + // reading globals. To survive that race, the shim deep-copies + // both the records buffer and each indirect string into a + // stable callee-side allocation, then stores stable pointers + // to globals. The adapter's existing cross-mem copy then + // operates on stable data. + let stabilization = early_result_type.as_ref().and_then(|ty| match ty { + parser::ComponentValType::List(elem) + | parser::ComponentValType::FixedSizeList(elem, _) => { + let indirections = crate::adapter::fact::collect_indirections(elem, 0); + if indirections.is_empty() { + None + } else { + let (elem_size, elem_align) = crate::adapter::fact::cabi_size_align(elem); + Some((elem_size, elem_align, indirections)) + } + } + _ => None, + }); + + let (callee_realloc_for_shim, callee_memory_for_shim) = if stabilization.is_some() { + ( + merger::component_realloc_index(merged, comp_idx), + merger::component_memory_index(merged, comp_idx), + ) + } else { + (None, 0) + }; + + // Generate shim function. Default body: store args to globals. + // With stabilization: copy records + strings to stable callee + // buffers first, then store stable pointers. let shim_func_idx = merged.import_counts.func + merged.functions.len() as u32; - let _type_idx = import_type.params.len(); // find or create type let shim_type = merger::Merger::find_or_add_type( &mut merged.types, &import_type.params, &[], // void return ); - let mut body = wasm_encoder::Function::new([]); - for (i, (global_idx, _)) in result_globals.iter().enumerate() { - body.instruction(&wasm_encoder::Instruction::LocalGet(i as u32)); - body.instruction(&wasm_encoder::Instruction::GlobalSet(*global_idx)); - } - body.instruction(&wasm_encoder::Instruction::End); + let body = if let (Some((elem_size, elem_align, indirections)), Some(realloc_fn)) = + (stabilization.as_ref(), callee_realloc_for_shim) + { + generate_stabilizing_shim( + &result_globals, + *elem_size, + *elem_align, + indirections, + realloc_fn, + callee_memory_for_shim, + ) + } else { + let mut b = wasm_encoder::Function::new([]); + for (i, (global_idx, _)) in result_globals.iter().enumerate() { + b.instruction(&wasm_encoder::Instruction::LocalGet(i as u32)); + b.instruction(&wasm_encoder::Instruction::GlobalSet(*global_idx)); + } + b.instruction(&wasm_encoder::Instruction::End); + b + }; merged.functions.push(merger::MergedFunction { type_idx: shim_type, @@ -718,8 +928,16 @@ impl Fuser { origin: (comp_idx, 0, u32::MAX), }); - // Remap the task.return import to the shim in function_index_map - // for all modules of this component + // Export the shim so the component wrapper can alias it + // instead of using canonical task.return. + merged.exports.push(merger::MergedExport { + name: format!("$task_return_shim_{}", import_idx), + kind: wasm_encoder::ExportKind::Func, + index: shim_func_idx, + }); + + // Remap the task.return import to the shim in function_index_map. + // Only match direct imports with the fused name. let component = &self.components[comp_idx]; for (mod_idx, module) in component.core_modules.iter().enumerate() { let mut func_idx = 0u32; @@ -749,6 +967,49 @@ impl Fuser { } } + // Note: intra-component forwarding functions (call_indirect table[N]) + // for this task.return are handled by the component wrapper, which + // provides the shim export ($task_return_shim_N) as the table entry. + + // Find the WIT result type by matching CanonicalEntry::TaskReturn + // entries from the source component against the import's flat + // core params. Without this, the adapter treats the result as + // opaque bytes and can't compute correct sizes for typed lists. + // Resolve result type. First try by name lookup (works for + // shims whose source core import was directly mapped to a + // merged import). Fall back to ordered claim from the source + // component's TaskReturn entries — pick the next entry whose + // flat shape matches the import. Greedy ordering gives a + // stable per-component pairing for the typical case where + // the merger generates shims in source canonical order. + let mut result_type = comp_func_result_types + .get(&comp_idx) + .and_then(|m| m.get(&original_func_name)) + .cloned(); + if result_type.is_none() + && let Some(tr_list) = comp_taskreturn_types.get(&comp_idx) + { + let comp = &self.components[comp_idx]; + let cursor = comp_tr_cursor.entry(comp_idx).or_insert(0); + while *cursor < tr_list.len() { + let candidate = &tr_list[*cursor]; + *cursor += 1; + let flat = + component_wrap::flat_task_return_params_resolved(Some(candidate), comp); + if flat == import_type.params { + result_type = Some(candidate.clone()); + break; + } + } + } + log::debug!( + "task.return shim {} '{}' orig='{}' typed={}", + import_idx, + imp.name, + original_func_name, + result_type.is_some() + ); + // Store shim info for the adapter to use merged.task_return_shims.insert( import_idx as u32, @@ -758,17 +1019,21 @@ impl Fuser { component_idx: comp_idx, import_name: imp.name.clone(), original_func_name: original_func_name.clone(), + result_type, }, ); + let shim = &merged.task_return_shims[&(import_idx as u32)]; log::info!( - "task.return shim: import {} '{}' → shim func {} with {} globals", + "task.return shim: import {} '{}' orig='{}' → shim func {} globals {:?}", import_idx, imp.name, - shim_func_idx, - merged.task_return_shims[&(import_idx as u32)] - .result_globals - .len(), + shim.original_func_name, + shim.shim_func, + shim.result_globals + .iter() + .map(|(g, _)| *g) + .collect::>(), ); } @@ -811,6 +1076,84 @@ impl Fuser { } } + // Patch element segments: replace task.return import references + // with shim function references. This ensures that indirect calls + // through element-segment-initialized tables call the shim instead + // of the (stub) import. + if !merged.task_return_shims.is_empty() { + // Build a map: import merged index → shim func index + let mut import_to_shim: HashMap = HashMap::new(); + for (import_idx, shim_info) in &merged.task_return_shims { + import_to_shim.insert(*import_idx, shim_info.shim_func); + } + + for elem in &mut merged.elements { + if let crate::segments::ReindexedElementItems::Functions(ref mut indices) = + elem.items + { + for idx in indices.iter_mut() { + if let Some(&shim_idx) = import_to_shim.get(idx) { + log::debug!( + "element segment: replaced import {} with shim {}", + idx, + shim_idx, + ); + *idx = shim_idx; + } + } + } + } + + // Build async_result_globals: (comp_idx, func_name) → globals. + // For each func_name, find its element segment position, look up + // the shim function at that position, and get its globals. + let shim_func_to_globals: HashMap> = merged + .task_return_shims + .values() + .map(|s| (s.shim_func, s.result_globals.clone())) + .collect(); + + for ((comp_idx, func_name), elem_pos) in &func_name_to_elem_position { + // Find the component's $imports table index. + // The forwarding module (typically mod 2) defines the table. + // Look up via table_index_map. + let comp_tables: HashSet = merged + .table_index_map + .iter() + .filter(|&(&(ci, _, _), _)| ci == *comp_idx) + .map(|(_, &idx)| idx) + .collect(); + + // Find the element segment for this component's table + for elem in &merged.elements { + let elem_table = match &elem.mode { + crate::segments::ElementSegmentMode::Active { table_index, .. } => { + *table_index + } + _ => continue, + }; + if !comp_tables.contains(&elem_table) { + continue; + } + if let crate::segments::ReindexedElementItems::Functions(ref indices) = + elem.items + && let Some(func_idx) = indices.get(*elem_pos) + && let Some(globals) = shim_func_to_globals.get(func_idx) + { + merged + .async_result_globals + .insert((*comp_idx, func_name.clone()), globals.clone()); + break; + } + } + } + log::info!( + "async_result_globals: {} entries: {:?}", + merged.async_result_globals.len(), + merged.async_result_globals.keys().collect::>(), + ); + } + Ok(()) } @@ -1548,6 +1891,181 @@ fn propagate_outer_wiring( Ok(wiring_hints) } +/// Generate a task.return shim body that deep-copies the records buffer +/// (and each indirect string) into a stable callee-side allocation before +/// storing the stabilized pointer to globals. +/// +/// Why: wit-bindgen's lowering for `list` allocates +/// the records buffer via `Cleanup::new`, whose drop guard runs at the +/// end of the async block — between EXIT and our adapter reading the +/// globals. The original records buffer is freed and overwritten with +/// allocator free-list patterns by the time the adapter sees it. This +/// shim makes a parallel copy that the callee allocator owns, free of +/// the Cleanup guard. +/// +/// Shim signature: `(ptr: i32, len: i32) -> ()`. +/// Body shape (for `list` with one indirection +/// at offset 0, sub-element size 1): +/// ```text +/// byte_count = len * elem_size +/// stable_records = realloc(0, 0, elem_align, byte_count) +/// memory.copy stable_records <- ptr, byte_count ; intra-callee +/// for i in 0..len: +/// rec = stable_records + i*elem_size +/// for each (offset, sub_size) in indirections: +/// old_str = mem.load(rec + offset) +/// str_len = mem.load(rec + offset + 4) * sub_size +/// stable_str = realloc(0, 0, 1, str_len) +/// memory.copy stable_str <- old_str, str_len ; intra-callee +/// mem.store(rec + offset, stable_str) +/// global[ptr_global] = stable_records +/// global[len_global] = len +/// ``` +fn generate_stabilizing_shim( + result_globals: &[(u32, wasm_encoder::ValType)], + elem_size: u32, + elem_align: u32, + indirections: &[(u32, u32)], + realloc_func: u32, + callee_memory: u32, +) -> wasm_encoder::Function { + use wasm_encoder::{BlockType, Function, Instruction}; + + // Locals layout (after the 2 i32 params: ptr=0, len=1): + // 2 = stable_records + // 3 = byte_count + // 4 = i + // 5 = rec + // 6 = old_str + // 7 = str_len + // 8 = stable_str + let l_stable = 2u32; + let l_byte_count = 3u32; + let l_i = 4u32; + let l_rec = 5u32; + let l_old_str = 6u32; + let l_str_len = 7u32; + let l_stable_str = 8u32; + + let mut body = Function::new([(7, wasm_encoder::ValType::I32)]); + + // byte_count = len * elem_size + crate::adapter::fact::emit_overflow_guard(&mut body, 1, elem_size); + body.instruction(&Instruction::LocalGet(1)); + body.instruction(&Instruction::I32Const(elem_size as i32)); + body.instruction(&Instruction::I32Mul); + body.instruction(&Instruction::LocalSet(l_byte_count)); + + // stable_records = realloc(0, 0, elem_align, byte_count) + body.instruction(&Instruction::I32Const(0)); + body.instruction(&Instruction::I32Const(0)); + body.instruction(&Instruction::I32Const(elem_align as i32)); + body.instruction(&Instruction::LocalGet(l_byte_count)); + crate::adapter::fact::emit_checked_realloc(&mut body, realloc_func, l_stable); + + // memory.copy stable_records <- ptr, byte_count (intra-callee, mem 0) + body.instruction(&Instruction::LocalGet(l_stable)); + body.instruction(&Instruction::LocalGet(0)); + body.instruction(&Instruction::LocalGet(l_byte_count)); + body.instruction(&Instruction::MemoryCopy { + dst_mem: callee_memory, + src_mem: callee_memory, + }); + + // for i in 0..len: stabilize indirections in record i + body.instruction(&Instruction::I32Const(0)); + body.instruction(&Instruction::LocalSet(l_i)); + body.instruction(&Instruction::Block(BlockType::Empty)); + body.instruction(&Instruction::Loop(BlockType::Empty)); + + body.instruction(&Instruction::LocalGet(l_i)); + body.instruction(&Instruction::LocalGet(1)); + body.instruction(&Instruction::I32GeU); + body.instruction(&Instruction::BrIf(1)); + + // rec = stable_records + i * elem_size + body.instruction(&Instruction::LocalGet(l_stable)); + body.instruction(&Instruction::LocalGet(l_i)); + body.instruction(&Instruction::I32Const(elem_size as i32)); + body.instruction(&Instruction::I32Mul); + body.instruction(&Instruction::I32Add); + body.instruction(&Instruction::LocalSet(l_rec)); + + for (offset, sub_size) in indirections { + let mem_arg_ptr = wasm_encoder::MemArg { + offset: *offset as u64, + align: 2, + memory_index: callee_memory, + }; + let mem_arg_len = wasm_encoder::MemArg { + offset: (*offset + 4) as u64, + align: 2, + memory_index: callee_memory, + }; + + // old_str = mem.load(rec + offset) + body.instruction(&Instruction::LocalGet(l_rec)); + body.instruction(&Instruction::I32Load(mem_arg_ptr)); + body.instruction(&Instruction::LocalSet(l_old_str)); + + // str_len = mem.load(rec + offset + 4) * sub_size + // Stash raw (pre-multiply) len in l_str_len for the overflow guard, + // then multiply to produce the byte count. + body.instruction(&Instruction::LocalGet(l_rec)); + body.instruction(&Instruction::I32Load(mem_arg_len)); + body.instruction(&Instruction::LocalSet(l_str_len)); + crate::adapter::fact::emit_overflow_guard(&mut body, l_str_len, *sub_size); + body.instruction(&Instruction::LocalGet(l_str_len)); + if *sub_size != 1 { + body.instruction(&Instruction::I32Const(*sub_size as i32)); + body.instruction(&Instruction::I32Mul); + } + body.instruction(&Instruction::LocalSet(l_str_len)); + + // stable_str = realloc(0, 0, 1, str_len) + body.instruction(&Instruction::I32Const(0)); + body.instruction(&Instruction::I32Const(0)); + body.instruction(&Instruction::I32Const(1)); + body.instruction(&Instruction::LocalGet(l_str_len)); + crate::adapter::fact::emit_checked_realloc(&mut body, realloc_func, l_stable_str); + + // memory.copy stable_str <- old_str, str_len (intra-callee) + body.instruction(&Instruction::LocalGet(l_stable_str)); + body.instruction(&Instruction::LocalGet(l_old_str)); + body.instruction(&Instruction::LocalGet(l_str_len)); + body.instruction(&Instruction::MemoryCopy { + dst_mem: callee_memory, + src_mem: callee_memory, + }); + + // mem.store(rec + offset, stable_str) + body.instruction(&Instruction::LocalGet(l_rec)); + body.instruction(&Instruction::LocalGet(l_stable_str)); + body.instruction(&Instruction::I32Store(mem_arg_ptr)); + } + + // i++; continue + body.instruction(&Instruction::LocalGet(l_i)); + body.instruction(&Instruction::I32Const(1)); + body.instruction(&Instruction::I32Add); + body.instruction(&Instruction::LocalSet(l_i)); + body.instruction(&Instruction::Br(0)); + + body.instruction(&Instruction::End); // end loop + body.instruction(&Instruction::End); // end block + + // Store stable_records to ptr_global, len to len_global. + if let [(ptr_global, _), (len_global, _)] = result_globals { + body.instruction(&Instruction::LocalGet(l_stable)); + body.instruction(&Instruction::GlobalSet(*ptr_global)); + body.instruction(&Instruction::LocalGet(1)); + body.instruction(&Instruction::GlobalSet(*len_global)); + } + + body.instruction(&Instruction::End); + body +} + #[cfg(test)] mod tests { use super::*; diff --git a/meld-core/src/merger.rs b/meld-core/src/merger.rs index ee9debb..54e106e 100644 --- a/meld-core/src/merger.rs +++ b/meld-core/src/merger.rs @@ -137,6 +137,10 @@ pub struct MergedModule { /// to the global indices where the shim stores result values. /// Used by the callback-driving adapter to read results after EXIT. pub task_return_shims: HashMap, + + /// Maps (component_idx, func_name) → shim globals for async result delivery. + /// Built after element segment patching. Used by the callback-driving adapter. + pub async_result_globals: HashMap<(usize, String), Vec<(u32, ValType)>>, } /// Info about a generated task.return shim function. @@ -153,6 +157,12 @@ pub struct TaskReturnShimInfo { /// Original function name (e.g., "fibonacci") — extracted from the /// original component's core module import before renumbering. pub original_func_name: String, + /// Lifted (WIT-level) result type. When present, the adapter uses this + /// to compute element-aware byte counts and walk nested indirections + /// (strings inside records inside lists) during cross-memory copy. + /// `None` means we couldn't recover the type and the adapter falls + /// back to treating the result as opaque bytes. + pub result_type: Option, } /// Per-component resource handle table allocated in a re-exporter's linear memory. @@ -683,6 +693,7 @@ impl Merger { resource_new_by_component: HashMap::new(), handle_tables: HashMap::new(), task_return_shims: HashMap::new(), + async_result_globals: HashMap::new(), }; // Process components in topological order @@ -1041,11 +1052,20 @@ impl Merger { // Merge tables (defined tables only; imported tables handled below) let table_offset = merged.tables.len() as u32; for (old_idx, table) in module.tables.iter().enumerate() { + let old_table_idx = import_table_count + old_idx as u32; let new_idx = merged.import_counts.table + table_offset + old_idx as u32; - merged.table_index_map.insert( - (comp_idx, mod_idx, import_table_count + old_idx as u32), + log::debug!( + "table defined: ({},{},{}) → {} (offset={}, import_count={})", + comp_idx, + mod_idx, + old_table_idx, new_idx, + table_offset, + merged.import_counts.table, ); + merged + .table_index_map + .insert((comp_idx, mod_idx, old_table_idx), new_idx); merged.tables.push(convert_table_type(table)); } @@ -2516,6 +2536,11 @@ pub(crate) fn component_memory_index(merged: &MergedModule, comp_idx: usize) -> /// Find the merged function index of a component's cabi_realloc. pub(crate) fn component_realloc_index(merged: &MergedModule, comp_idx: usize) -> Option { + // Prefer module 0's realloc (the main module) + if let Some(&idx) = merged.realloc_map.get(&(comp_idx, 0)) { + return Some(idx); + } + // Fallback: any module's realloc for this component for (&(ci, _mi), &merged_idx) in &merged.realloc_map { if ci == comp_idx { return Some(merged_idx); @@ -2886,6 +2911,7 @@ mod tests { resource_new_by_component: HashMap::new(), handle_tables: HashMap::new(), task_return_shims: HashMap::new(), + async_result_globals: HashMap::new(), }; // Simulate multi-memory merging for module A (comp 0, mod 0) diff --git a/meld-core/src/resolver.rs b/meld-core/src/resolver.rs index 9d947c0..b8d4647 100644 --- a/meld-core/src/resolver.rs +++ b/meld-core/src/resolver.rs @@ -2316,6 +2316,15 @@ impl Resolver { } requirements.pointer_pair_positions = to_component .pointer_pair_param_positions(comp_params); + log::debug!( + "pointer_pair_positions for {}: {:?} (comp_params={:?})", + *func_name, + requirements.pointer_pair_positions, + comp_params + .iter() + .map(|(n, t)| (n.as_str(), format!("{:?}", t))) + .collect::>(), + ); requirements.result_pointer_pair_offsets = to_component.pointer_pair_result_offsets(results); // Compute copy layouts for each pointer pair diff --git a/meld-core/tests/realloc_safety.rs b/meld-core/tests/realloc_safety.rs new file mode 100644 index 0000000..06242b0 --- /dev/null +++ b/meld-core/tests/realloc_safety.rs @@ -0,0 +1,573 @@ +//! Emitter-wide LS-A-7 safety test. +//! +//! Loss scenario LS-A-7 (safety/stpa/loss-scenarios.yaml) requires that every +//! `cabi_realloc` call emitted into a fused meld output is followed by a null +//! guard of the form: +//! +//! ```wat +//! call $cabi_realloc +//! i32.eqz +//! if +//! unreachable +//! end +//! ``` +//! +//! Without the guard, an allocator returning 0 (OOM) would cause the +//! transcode/copy loop to write into callee memory offset 0 (leg (b) of +//! LS-A-7). Per-emitter PoC tests live in +//! `meld-core/src/adapter/fact.rs` (search for `ls_a_7_`); this integration +//! test is the cross-emitter safety gate: it fuses two components +//! programmatically, then parses every function body in the fused output and +//! fails if any `cabi_realloc` call lacks the null guard. + +use meld_core::{Fuser, FuserConfig, MemoryStrategy}; +use wasm_encoder::{ + Alias, CanonicalFunctionSection, CanonicalOption, CodeSection, Component, + ComponentAliasSection, ComponentExportKind, ComponentExportSection, ComponentImportSection, + ComponentTypeRef, ComponentTypeSection, ConstExpr, DataSection, DataSegment, DataSegmentMode, + ExportKind, ExportSection, Function, FunctionSection, GlobalSection, GlobalType, ImportSection, + InstanceSection, Instruction, MemorySection, MemoryType, Module, ModuleArg, ModuleSection, + TypeSection, +}; + +// --------------------------------------------------------------------------- +// Component fixtures: string-passing caller and callee. +// +// Same shape as `tests/adapter_safety.rs::test_sr12_*`: fusion of these two +// components forces meld to emit at least one string-passing adapter, which +// in turn contains a `cabi_realloc` call in the callee's memory that must be +// guarded. We re-declare (rather than import) the builders because each +// integration-test file is a separate crate. +// --------------------------------------------------------------------------- + +/// Minimal bump-allocator `cabi_realloc(orig_ptr, orig_size, align, new_size)`. +fn emit_cabi_realloc(func: &mut Function, bump_global: u32) { + func.instruction(&Instruction::GlobalGet(bump_global)); + func.instruction(&Instruction::GlobalGet(bump_global)); + func.instruction(&Instruction::LocalGet(3)); // new_size + func.instruction(&Instruction::I32Add); + func.instruction(&Instruction::GlobalSet(bump_global)); + func.instruction(&Instruction::End); +} + +/// Callee P2 component: exports `process-string(s: string) -> u32`. +fn build_callee_string_component() -> Vec { + let core_module = { + let mut types = TypeSection::new(); + types.ty().function( + [ + wasm_encoder::ValType::I32, + wasm_encoder::ValType::I32, + wasm_encoder::ValType::I32, + wasm_encoder::ValType::I32, + ], + [wasm_encoder::ValType::I32], + ); + types.ty().function( + [wasm_encoder::ValType::I32, wasm_encoder::ValType::I32], + [wasm_encoder::ValType::I32], + ); + + let mut functions = FunctionSection::new(); + functions.function(0); + functions.function(1); + + let mut memory = MemorySection::new(); + memory.memory(MemoryType { + minimum: 1, + maximum: None, + memory64: false, + shared: false, + page_size_log2: None, + }); + + let mut globals = GlobalSection::new(); + globals.global( + GlobalType { + val_type: wasm_encoder::ValType::I32, + mutable: true, + shared: false, + }, + &ConstExpr::i32_const(1024), + ); + + let mut exports = ExportSection::new(); + exports.export("cabi_realloc", ExportKind::Func, 0); + exports.export("test:api/api#process-string", ExportKind::Func, 1); + exports.export("memory", ExportKind::Memory, 0); + + let mut code = CodeSection::new(); + { + let mut f = Function::new([]); + emit_cabi_realloc(&mut f, 0); + code.function(&f); + } + { + // process-string(ptr, len) -> sum of bytes. + let mut f = Function::new(vec![(2, wasm_encoder::ValType::I32)]); + f.instruction(&Instruction::Block(wasm_encoder::BlockType::Empty)); + f.instruction(&Instruction::Loop(wasm_encoder::BlockType::Empty)); + f.instruction(&Instruction::LocalGet(3)); + f.instruction(&Instruction::LocalGet(1)); + f.instruction(&Instruction::I32GeU); + f.instruction(&Instruction::BrIf(1)); + f.instruction(&Instruction::LocalGet(0)); + f.instruction(&Instruction::LocalGet(3)); + f.instruction(&Instruction::I32Add); + f.instruction(&Instruction::I32Load8U(wasm_encoder::MemArg { + offset: 0, + align: 0, + memory_index: 0, + })); + f.instruction(&Instruction::LocalGet(2)); + f.instruction(&Instruction::I32Add); + f.instruction(&Instruction::LocalSet(2)); + f.instruction(&Instruction::LocalGet(3)); + f.instruction(&Instruction::I32Const(1)); + f.instruction(&Instruction::I32Add); + f.instruction(&Instruction::LocalSet(3)); + f.instruction(&Instruction::Br(0)); + f.instruction(&Instruction::End); + f.instruction(&Instruction::End); + f.instruction(&Instruction::LocalGet(2)); + f.instruction(&Instruction::End); + code.function(&f); + } + + let mut module = Module::new(); + module + .section(&types) + .section(&functions) + .section(&memory) + .section(&globals) + .section(&exports) + .section(&code); + module + }; + + let mut component = Component::new(); + component.section(&ModuleSection(&core_module)); + + { + let mut types = ComponentTypeSection::new(); + types + .function() + .params([( + "s", + wasm_encoder::ComponentValType::Primitive(wasm_encoder::PrimitiveValType::String), + )]) + .result(Some(wasm_encoder::ComponentValType::Primitive( + wasm_encoder::PrimitiveValType::U32, + ))); + component.section(&types); + } + + { + let mut inst = InstanceSection::new(); + let no_args: Vec<(&str, ModuleArg)> = vec![]; + inst.instantiate(0, no_args); + component.section(&inst); + } + + for (kind, name) in [ + (ExportKind::Func, "cabi_realloc"), + (ExportKind::Func, "test:api/api#process-string"), + (ExportKind::Memory, "memory"), + ] { + let mut aliases = ComponentAliasSection::new(); + aliases.alias(Alias::CoreInstanceExport { + instance: 0, + kind, + name, + }); + component.section(&aliases); + } + + { + let mut canon = CanonicalFunctionSection::new(); + canon.lift( + 1, + 0, + [ + CanonicalOption::UTF8, + CanonicalOption::Memory(0), + CanonicalOption::Realloc(0), + ], + ); + component.section(&canon); + } + { + let mut exp = ComponentExportSection::new(); + exp.export("test:api/api", ComponentExportKind::Func, 0, None); + component.section(&exp); + } + + component.finish() +} + +/// Caller P2 component: imports `process-string` and calls it with "Hello". +fn build_caller_string_component() -> Vec { + let core_module = { + let mut types = TypeSection::new(); + types.ty().function( + [wasm_encoder::ValType::I32, wasm_encoder::ValType::I32], + [wasm_encoder::ValType::I32], + ); + types.ty().function([], [wasm_encoder::ValType::I32]); + types.ty().function( + [ + wasm_encoder::ValType::I32, + wasm_encoder::ValType::I32, + wasm_encoder::ValType::I32, + wasm_encoder::ValType::I32, + ], + [wasm_encoder::ValType::I32], + ); + + let mut imports = ImportSection::new(); + imports.import( + "test:api/api", + "process-string", + wasm_encoder::EntityType::Function(0), + ); + + let mut functions = FunctionSection::new(); + functions.function(1); + functions.function(2); + + let mut memory = MemorySection::new(); + memory.memory(MemoryType { + minimum: 1, + maximum: None, + memory64: false, + shared: false, + page_size_log2: None, + }); + + let mut globals = GlobalSection::new(); + globals.global( + GlobalType { + val_type: wasm_encoder::ValType::I32, + mutable: true, + shared: false, + }, + &ConstExpr::i32_const(1024), + ); + + let mut exports = ExportSection::new(); + exports.export("run", ExportKind::Func, 1); + exports.export("cabi_realloc", ExportKind::Func, 2); + exports.export("memory", ExportKind::Memory, 0); + + let mut code = CodeSection::new(); + { + let mut f = Function::new([]); + f.instruction(&Instruction::I32Const(0)); + f.instruction(&Instruction::I32Const(5)); + f.instruction(&Instruction::Call(0)); + f.instruction(&Instruction::End); + code.function(&f); + } + { + let mut f = Function::new([]); + emit_cabi_realloc(&mut f, 0); + code.function(&f); + } + + let mut data = DataSection::new(); + data.segment(DataSegment { + mode: DataSegmentMode::Active { + memory_index: 0, + offset: &ConstExpr::i32_const(0), + }, + data: b"Hello".to_vec(), + }); + + let mut module = Module::new(); + module + .section(&types) + .section(&imports) + .section(&functions) + .section(&memory) + .section(&globals) + .section(&exports) + .section(&code) + .section(&data); + module + }; + + let mut component = Component::new(); + { + let mut types = ComponentTypeSection::new(); + types + .function() + .params([( + "s", + wasm_encoder::ComponentValType::Primitive(wasm_encoder::PrimitiveValType::String), + )]) + .result(Some(wasm_encoder::ComponentValType::Primitive( + wasm_encoder::PrimitiveValType::U32, + ))); + component.section(&types); + } + { + let mut imports = ComponentImportSection::new(); + imports.import("test:api/api", ComponentTypeRef::Func(0)); + component.section(&imports); + } + component.section(&ModuleSection(&core_module)); + component.finish() +} + +// --------------------------------------------------------------------------- +// Guard scanner +// --------------------------------------------------------------------------- + +/// Window (in operators) following a `call $realloc` in which the null guard +/// must appear. The canonical helper emits 6 operators +/// (`LocalSet; LocalGet; I32Eqz; If; Unreachable; End`), so 8 is a safe upper +/// bound that also tolerates a stray `Drop` or benign reorder. +const GUARD_WINDOW: usize = 8; + +/// Return the function indices in the fused module that correspond to +/// `cabi_realloc` (or `cabi_realloc$N`). Covers both imported and exported +/// realloc funcs: the former is how a realloc appears if meld ever leaves a +/// realloc as an import; the latter is how meld's internal realloc_map +/// entries surface in the fused output (see `merger.rs` — realloc_map keys +/// are exported as `cabi_realloc` / `cabi_realloc$N`). +fn collect_realloc_indices(fused: &[u8]) -> std::collections::HashSet { + use std::collections::HashSet; + let mut out = HashSet::new(); + let mut import_func_count: u32 = 0; + let parser = wasmparser::Parser::new(0); + + for payload in parser.parse_all(fused) { + match payload { + Ok(wasmparser::Payload::ImportSection(reader)) => { + for imp in reader.into_imports().flatten() { + if matches!( + imp.ty, + wasmparser::TypeRef::Func(_) | wasmparser::TypeRef::FuncExact(_) + ) { + if imp.name.starts_with("cabi_realloc") { + out.insert(import_func_count); + } + import_func_count += 1; + } + } + } + Ok(wasmparser::Payload::ExportSection(reader)) => { + for exp in reader.into_iter().flatten() { + if matches!( + exp.kind, + wasmparser::ExternalKind::Func | wasmparser::ExternalKind::FuncExact + ) && exp.name.starts_with("cabi_realloc") + { + out.insert(exp.index); + } + } + } + _ => {} + } + } + out +} + +/// An unguarded `cabi_realloc` call site. +#[derive(Debug)] +struct OffendingSite { + /// Merged-space function index of the enclosing function. + function_idx: u32, + /// Byte offset (within the fused module) of the `call` instruction. + byte_offset: usize, + /// Target (realloc) function index. + target: u32, +} + +/// Walk every function body and return (unguarded sites, total realloc +/// call count). An unguarded site is a `call` targeting a realloc-family +/// function that is not followed by an `i32.eqz; if; unreachable; end` +/// sequence within the next `GUARD_WINDOW` operators. +fn scan_fused( + fused: &[u8], + realloc_indices: &std::collections::HashSet, +) -> (Vec, usize) { + let mut offenders = Vec::new(); + let mut total_realloc_calls = 0usize; + let mut import_func_count: u32 = 0; + let parser = wasmparser::Parser::new(0); + + // First pass: count function imports so we can emit absolute function + // indices for the error report. + for payload in parser.parse_all(fused) { + if let Ok(wasmparser::Payload::ImportSection(reader)) = payload { + for imp in reader.into_imports().flatten() { + if matches!( + imp.ty, + wasmparser::TypeRef::Func(_) | wasmparser::TypeRef::FuncExact(_) + ) { + import_func_count += 1; + } + } + } + } + + // Second pass: scan function bodies. + let parser2 = wasmparser::Parser::new(0); + let mut code_func_offset: u32 = 0; + for payload in parser2.parse_all(fused) { + if let Ok(wasmparser::Payload::CodeSectionEntry(body)) = payload { + let function_idx = import_func_count + code_func_offset; + code_func_offset += 1; + + let reader = match body.get_operators_reader() { + Ok(r) => r, + Err(_) => continue, + }; + + // Collect (operator, byte_offset) pairs so we can look N steps + // ahead after spotting a realloc call. + let mut ops: Vec<(wasmparser::Operator, usize)> = Vec::new(); + let mut reader = reader; + loop { + if reader.is_end_then_eof() { + break; + } + match reader.read_with_offset() { + Ok(pair) => ops.push(pair), + Err(_) => break, + } + } + + for (idx, (op, off)) in ops.iter().enumerate() { + let target = match op { + wasmparser::Operator::Call { function_index } => *function_index, + _ => continue, + }; + if !realloc_indices.contains(&target) { + continue; + } + total_realloc_calls += 1; + if !has_null_guard(&ops, idx) { + offenders.push(OffendingSite { + function_idx, + byte_offset: *off, + target, + }); + } + } + } + } + (offenders, total_realloc_calls) +} + +/// Return true iff within the next `GUARD_WINDOW` operators after position +/// `call_idx` (exclusive) the sequence `I32Eqz; If { .. }; Unreachable; End` +/// appears. Other operators between the call and the I32Eqz (e.g. the +/// canonical `LocalSet; LocalGet` that plumbs the result into a local) are +/// allowed — only the contiguous 4-op trap pattern is required. +fn has_null_guard(ops: &[(wasmparser::Operator, usize)], call_idx: usize) -> bool { + let start = call_idx + 1; + let end = (start + GUARD_WINDOW).min(ops.len()); + if end < start + 4 { + return false; + } + for i in start..=end.saturating_sub(4) { + let is_eqz = matches!(ops[i].0, wasmparser::Operator::I32Eqz); + let is_if = matches!(ops[i + 1].0, wasmparser::Operator::If { .. }); + let is_unreach = matches!(ops[i + 2].0, wasmparser::Operator::Unreachable); + let is_end = matches!(ops[i + 3].0, wasmparser::Operator::End); + if is_eqz && is_if && is_unreach && is_end { + return true; + } + } + false +} + +// --------------------------------------------------------------------------- +// Test +// --------------------------------------------------------------------------- + +/// LS-A-7 (leg b) emitter-wide gate. +/// +/// Fuse a string-passing pair of components (the same fixture used by +/// `tests/adapter_safety.rs::test_sr12_*`), then scan every function body +/// in the fused module. Every `call` targeting a `cabi_realloc`-family +/// function must be immediately followed by the +/// `i32.eqz; if; unreachable; end` null guard; otherwise the test fails and +/// reports every offending (function_idx, byte_offset) pair. +#[test] +fn ls_a_7_every_realloc_call_has_null_guard() { + let callee = build_callee_string_component(); + let caller = build_caller_string_component(); + + let config = FuserConfig { + memory_strategy: MemoryStrategy::MultiMemory, + attestation: false, + address_rebasing: false, + preserve_names: false, + custom_sections: meld_core::CustomSectionHandling::Drop, + output_format: meld_core::OutputFormat::CoreModule, + }; + + let mut fuser = Fuser::new(config); + fuser + .add_component_named(&callee, Some("callee-string")) + .expect("callee component should parse"); + fuser + .add_component_named(&caller, Some("caller-string")) + .expect("caller component should parse"); + + let (fused, stats) = fuser.fuse_with_stats().expect("fusion should succeed"); + eprintln!( + "LS-A-7 scan: {} bytes, {} funcs, {} adapters, {} imports resolved", + stats.output_size, stats.total_functions, stats.adapter_functions, stats.imports_resolved, + ); + + // Fused output must validate — a malformed module would mask emitter + // bugs behind parser errors. + let mut validator = wasmparser::Validator::new(); + validator + .validate_all(&fused) + .expect("LS-A-7: fused output should validate"); + + let realloc_indices = collect_realloc_indices(&fused); + assert!( + !realloc_indices.is_empty(), + "LS-A-7: expected at least one cabi_realloc-family function in the \ + fused output; scan would be vacuous otherwise" + ); + eprintln!( + "LS-A-7: tracking {} cabi_realloc-family function indices: {:?}", + realloc_indices.len(), + { + let mut v: Vec<_> = realloc_indices.iter().copied().collect(); + v.sort(); + v + } + ); + + let (offenders, call_count) = scan_fused(&fused, &realloc_indices); + eprintln!("LS-A-7: scanned {call_count} realloc call sites"); + assert!( + call_count > 0, + "LS-A-7: expected at least one cabi_realloc call site in the fused \ + output; an adapter that never calls realloc would make the guard \ + check vacuous" + ); + + if !offenders.is_empty() { + let mut report = String::new(); + report.push_str( + "LS-A-7: the following cabi_realloc calls are missing the \ + `i32.eqz; if; unreachable; end` null guard (leg (b)):\n", + ); + for site in &offenders { + report.push_str(&format!( + " - function_idx={} target_realloc={} byte_offset=0x{:x}\n", + site.function_idx, site.target, site.byte_offset + )); + } + panic!("{report}"); + } +} diff --git a/safety/stpa/loss-scenarios.yaml b/safety/stpa/loss-scenarios.yaml index faee4fd..7afad11 100644 --- a/safety/stpa/loss-scenarios.yaml +++ b/safety/stpa/loss-scenarios.yaml @@ -341,6 +341,52 @@ loss-scenarios: - Return area slot offsets not communicated from resolver to adapter - Missing test case for variant retptr with alignment padding + - id: LS-A-7 + title: Transcoding adapters emit unchecked i32.mul for realloc size + uca: UCA-A-5 + hazards: [H-2, H-4, H-4.3] + type: inadequate-control-algorithm + scenario: > + A cross-component call passes a string whose length is chosen by an + untrusted caller. The transcoding adapters emitted in + meld-core/src/adapter/fact.rs at emit_utf8_to_utf16_transcode + (lines 2702-2712), emit_utf16_to_utf8_transcode (lines 3010-3020), + and emit_latin1_to_utf8_transcode (lines 3335-3344) compute the + destination allocation size as LocalGet(len); I32Const(K); I32Mul; + Call(cabi_realloc); LocalSet(out_ptr) with K in {2, 3, 2}. The + multiplication is performed in 32-bit wrapping arithmetic with no + upper-bound check on len and no I32Eqz/BrIf guard on the + cabi_realloc return value. For any len > u32::MAX / K the product + wraps to a small alloc_size, cabi_realloc returns a short buffer + (or null on OOM, which is also unchecked), and the transcode loop + proceeds using the untouched full len as its bound, producing an + out-of-bounds write into the callee's linear memory [UCA-A-5]. This + corrupts callee memory at caller-chosen offsets [H-4.3] and covers + address 0 on realloc failure [H-2]. Violates WebAssembly Component + Model Canonical ABI (commit deb0b0a) Section 3.10 "Canonical + Built-in Definitions", which requires the host to trap when + realloc returns 0, and WebAssembly Core Specification Release 3.0 + Section 4.4.1 which defines i32.mul as modulo 2^32. Detected by + Kani harness + meld-core::adapter::fact::tests::kani_transcode_alloc_size_no_overflow_guard + (counterexample at any len > u32::MAX / K) and PoC byte-scan + meld-core::adapter::fact::tests::poc_transcode_adapter_emits_unchecked_mul + which asserts presence of (a) I32Eqz/BrIf after the cabi_realloc + Call and (b) a len upper-bound check before I32Mul; both + assertions fail on the current emitter. + causal-factors: + - Allocation size computed via I32Mul without widening len to i64 + or bounding len < u32::MAX / K before the multiply + - cabi_realloc return value stored directly into out_ptr with no + I32Eqz/BrIf trap on null + - Transcode loop bound is the untouched caller-supplied len, so a + wrapped alloc_size does not shrink the number of bytes written + - No emitter-level test asserting that every transcode adapter + emits both the overflow guard and the realloc-null guard + related-cve: CVE-2026-27572 + status: approved + priority: critical + # ========================================================================== # Merger scenario (discovered during gap analysis) # ========================================================================== diff --git a/scripts/mythos/HOWTO.md b/scripts/mythos/HOWTO.md new file mode 100644 index 0000000..c306501 --- /dev/null +++ b/scripts/mythos/HOWTO.md @@ -0,0 +1,120 @@ +# Mythos-Style Bug Hunt — Portable Pipeline + +A four-prompt pipeline modeled on Anthropic's Claude Mythos (red.anthropic.com, +April 2026) plus Vidoc's open-model reproduction. The architecture is: let +the agent reason about code freely, but require a machine-checkable oracle +for every reported bug so hallucinations don't ship. + +## Prerequisites + +- Claude Code or any agent harness that can read files and drive test runs +- A truth oracle for your language/domain (see §5) +- A bug-tracking format (STPA-Sec, STPA, in-house, whatever) +- Optional: parallel sessions (rank → N parallel discoveries → validate → emit) + +## 1. Four prompt templates in `scripts/mythos/` + +- **`rank.md`** — agent ranks every source file 1–5 by bug likelihood. The + rubric is the one non-portable part — write it per repo (§2). +- **`discover.md`** — Mythos-verbatim discovery prompt plus repo-specific + context plus the oracle requirement (§3). +- **`validate.md`** — fresh-agent validator that enforces the oracle and + filters uninteresting findings. +- **`emit.md`** — converts a confirmed finding into a draft entry in your + bug-tracking format. + +## 2. Ranking rubric (non-portable) + +5 tiers, named by concrete path patterns not abstract categories. Skeleton: + +``` +5 (crown jewels): secrets, parse-before-trust, canonicalization +4 (direct security boundary): verification, signing, argv+env +3 (one hop from untrusted input): token parsers, network clients, format parsers +2 (supporting, no direct security role): HTTP plumbing, policy eval, logging +1 (config / constants / proof artifacts): error types, wiring, proofs +``` + +Straddle rule: if a file sits between two tiers, pick the higher. Run the +rank pass once, then **patch the rubric** to eliminate files that required +overrides. A good rubric produces zero overrides on re-run. + +## 3. Oracle choice (drives `discover.md`) + +The oracle separates "agent thinks there's a bug" from "there is a bug." + +| Hunting… | Oracle candidates | +|---|---| +| Memory corruption in C/C++/unsafe Rust | AddressSanitizer, MemorySanitizer, UBSan | +| Logic bugs in safe Rust | Kani + property tests (proptest/quickcheck) | +| Compiler correctness | Rocq + Z3 SMT + differential testing | +| Kernel primitives | Verus + Kani + Rocq; proof-skip analysis | +| Python/TypeScript | Hypothesis, fast-check, concrete PoC | +| Go | fuzz, property tests | +| Crypto protocols | Proverif, Tamarin, CryptoVerif counterexample | + +`discover.md` MUST require BOTH (1) a failing machine-checkable proof AND +(2) a failing concrete PoC. "If you cannot produce both, do not report. +Hallucinations are more expensive than silence." — load-bearing sentence. + +## 4. Run the pipeline + +From a Claude Code session in the repo: + +1. `Read scripts/mythos/rank.md` → JSON ranking +2. For each rank-≥4 file: new session (parallel), paste `discover.md` with + `{{file}}` substituted. Output = structured finding report. +3. For each finding: fresh session with `validate.md`. Both oracle halves + must fail on unfixed code. Reject anything that doesn't confirm. +4. For each confirmed: `emit.md` produces a `draft` tracking entry. Human + promotes to `approved`. + +One agent per file in step 2 is Mythos's parallelism trick. Don't run one +agent across the whole codebase. + +## 5. Per-project customization + +- **`rank.md`**: your threat model in 5 tiers +- **`discover.md`**: repo context paragraph + oracle requirement + optional + hypothesis priors (e.g., wasmtime 2026-04-09 CVE wave for any WASM tool) +- **`validate.md`**: reject against your known-mitigations / system + constraints / existing scenarios. Swap threat-agent checks for + hazard-only checks if the repo is safety not security. +- **`emit.md`**: match the exact YAML/JSON shape of your artifact store. + +## 6. Gotchas + +- **Failing tests directly in source break CI.** Use `#[ignore]` / `@skip` + and put the rerun command in the ignore reason. +- **The rubric is wrong the first time.** Expect to patch after pass 1. + Sign you need to patch: "straddle rule → promoted X" lines in output. +- **Validators must be fresh sessions.** Reusing discovery context lets + the agent defend its own hypothesis. +- **One agent per file, not per codebase.** Parallel agents on different + files find diverse bugs; a single agent converges on surface issues. +- **Keep the discovery prompt minimal.** Mythos's "Please find a security + vulnerability" outperforms elaborate CWE checklists because the agent + has tools (oracle, debugger, runtime) and the environment filters truth. + +## 7. Worked example — sigil `signature/sig_sections.rs` + +First tier-5 file produced a finding: + +```rust +let certificate_chain = if let Ok(cert_count) = varint::get32(&mut reader) { + // ... read chain +} else { + None // ← silently swallows ALL parse errors, not just EOF +}; +``` + +Intent: backward-compat (missing cert_count → None). Bug: any error — +including malformed bytes — gets converted to "no chain," downgrading a +cert-based signature to a bare-key signature. + +- **PoC test**: append 5 MSB-set bytes after a valid prefix; expect `Err`; + current code returns `Ok { certificate_chain: None }`. **Confirmed failing.** +- **Kani harness**: symbolic 5-byte cert_count with MSB-set constraint; + `assert!(result.is_err())`. + +Maps to STPA-Sec UCA-6. Emitted as `draft AS-N` under UCA-6. diff --git a/scripts/mythos/discover.md b/scripts/mythos/discover.md new file mode 100644 index 0000000..62cdf66 --- /dev/null +++ b/scripts/mythos/discover.md @@ -0,0 +1,55 @@ +Please find a safety-relevant vulnerability in this program. + +Context you must use: +- This is meld, a static fusion tool for WebAssembly components. Takes + composed P2/P3 components and fuses them into a single core wasm + module, eliminating runtime linking. Safety model is STPA-based; + hazards and UCAs are in `safety/stpa/`. The key invariant is + **semantic preservation**: the fused output must behave identically + to the composed input for all valid inputs. +- Focus on ONE file: {{file}}. You may read any other file in the repo + to confirm or refute, but do not report bugs outside {{file}}. +- Bugs in meld amplify — the output is consumed by every downstream + runtime (kiln, wasmtime, browsers). A silently invalid fused module + becomes a downstream parse/exec bug. + +Hypothesis priors (wasmtime's 2026-04-09 CVE wave — same component model): +Use as hypothesis anchors, NOT recipes. Reason from meld's actual code. + - Component-model transcoding: UTF-16 alignment, code-unit vs byte + length confusion, OOB write from unvalidated guest `realloc` return. + - Flags/Val lifting: panic on bits the spec says to ignore. + - Import/export resolution across instances — meld RESOLVES these + statically; wasmtime LINKS them dynamically. Different call sites, + same bug classes (type mismatches, renaming errors, missing stubs). + - Resource table / generation counter overflow. + - String canonicalization: two textually different representations + that mean the same thing — does meld canonicalize before comparing? + +To help us appropriately triage any bug you find, please write minimal +PoC inputs (a small hand-crafted component WAT) so we can submit the +highest-severity ones. + +Oracle requirement (non-negotiable): +For every bug you report you MUST produce both: + (1) A failing Kani harness using `#[kani::proof]`. The harness must + fail today if the bug is real and pass after the fix. + (2) A failing property or unit test (standard `#[test]` or + `proptest!`) that reproduces the bug with concrete inputs. + For fusion bugs, a natural PoC is: a WAT input where + `compose` → `meld::fuse` → `execute` diverges from + `compose` → `execute` (differential test). + +If you cannot write (1) and (2), the finding does not count. Do not +report it. Hallucinations are more expensive than silence. + +Output format: +- FILE: {{file}} +- FUNCTION / LINES: ... +- HYPOTHESIS: one sentence +- KANI HARNESS: fenced Rust block, ready to paste +- POC TEST: fenced Rust block, ready to paste (differential if applicable) +- IMPACT: which hazard (H-N) this enables; whether it's spec divergence, + semantic drift, resource exhaustion, or type-system confusion +- CANDIDATE UCA: the single most likely `UCA-X-N` (e.g., UCA-P-N parser, + UCA-F-N fuser, UCA-V-N validator) this would exploit, with a one-line + justification. Consult `safety/stpa/ucas.yaml`. diff --git a/scripts/mythos/emit.md b/scripts/mythos/emit.md new file mode 100644 index 0000000..5a2c980 --- /dev/null +++ b/scripts/mythos/emit.md @@ -0,0 +1,30 @@ +You are emitting a new loss-scenario entry to append to +`safety/stpa/loss-scenarios.yaml`. Consult the existing file for the +exact shape before emitting. + +Input: +- Confirmed bug report (below) +- Chosen `UCA-X-N` from the validator +--- +{{confirmed_report}} +UCA: {{uca_id}} +--- + +Rules: +1. Grouping invariant: loss-scenarios are grouped under UCAs. If the + file already has a scenario linked to `{{uca_id}}`, this new + finding typically becomes a SIBLING, not a new UCA. +2. The new id follows whatever scheme the existing file uses (check + first entry). Use the next unused suffix for that UCA prefix. +3. Required fields — match existing entries exactly. Do not invent + fields. Common fields: `id`, `title`, `uca`, `hazards`, `type`, + `scenario`, `causal-factors`. +4. In the `scenario` prose, reference the Kani harness and PoC test + by fully-qualified Rust path. Cite the WebAssembly Component Model + spec section that the bug violates. +5. Optional but recommended: `related-cve:` when a wasmtime CVE + covers the same class (e.g., `CVE-2026-27572`). +6. Add `status: draft`. Meld's schema may not have this field today; + add it anyway — humans promote to `approved`. + +Emit ONLY the YAML block, nothing else. diff --git a/scripts/mythos/rank.md b/scripts/mythos/rank.md new file mode 100644 index 0000000..707d435 --- /dev/null +++ b/scripts/mythos/rank.md @@ -0,0 +1,41 @@ +Rank source files in this repository by likelihood of containing a +safety-relevant bug (spec divergence, fusion-semantics breakage, resource +exhaustion, type-system confusion across component boundaries), on a 1–5 +scale. Output JSON: `[{"file": "...", "rank": N, "reason": "..."}]`, +sorted descending. + +Scope: `meld-cli/src/**`, `meld-core/src/**`. Exclude tests, examples. + +Ranking rubric (meld-specific, component-fusion threat model): + +5 (fusion correctness — semantic preservation is the invariant): + - meld-core/src/parser.rs or parse/** # component parsing + - meld-core/src/fuse/** or fusion/** # core fusion logic + - meld-core/src/types/** or type_check/** # cross-component type checks + - meld-core/src/writer.rs or emit/** # output WASM emission + +4 (resolution + validation): + - meld-core/src/resolver/** or imports/** # import/export resolution + - meld-core/src/validate/** # post-fusion validation + - meld-core/src/canonical_abi/** # component-model canonical ABI + +3 (support): + - meld-core/src/error.rs, metrics.rs + - meld-cli/** (argv + env; not a heavy attack surface but worth checking) + +2 (wiring): + - glue modules, re-exports + +1 (proof artifacts / constants): + - **/verify/**, **/formal_verification.rs + - constants files + +When ranking: +- If a file straddles two tiers, pick the higher. +- Files with heavy `unwrap_or_else` / silent-default patterns belong one + tier higher than the rubric suggests. +- Fusion bugs that produce invalid output WASM affect every downstream + consumer (kiln, wasmtime, browsers). That amplification elevates parser + and writer tiers. +- Do not guess rank 5 from path alone — open the file. +- Files you haven't seen default to rank 2. diff --git a/scripts/mythos/validate.md b/scripts/mythos/validate.md new file mode 100644 index 0000000..24d0bdf --- /dev/null +++ b/scripts/mythos/validate.md @@ -0,0 +1,35 @@ +I have received the following bug report. Can you please confirm if +it's real and interesting? + +Report: +--- +{{report}} +--- + +You are a fresh validator with no stake in the exploration. Your job +is to reject hallucinations and cosmetic findings. + +Procedure: +1. Read the cited file and function BEFORE reading the hypothesis. + Form your own view of what the code does and what the WebAssembly + Component Model spec says about it. +2. Run the provided Kani harness. If Kani does not produce a + counterexample on the unfixed code, the bug is NOT confirmed — + reply `VERDICT: not-confirmed` and a short reason. Stop. +3. Run the provided PoC test. If it passes on the unfixed code, the + bug is NOT confirmed — reply `VERDICT: not-confirmed`. Stop. +4. If both (2) and (3) demonstrate the bug, ask: is this *interesting*? + A finding is NOT interesting if any of the following hold: + - it is a known limitation in `safety/stpa/system-constraints.yaml` + - it is a duplicate of an existing loss-scenario + - it requires a spec-violating input the parser rejects anyway + (check parser validation coverage first) + - the impact is documented-by-design +5. If still real and interesting, identify the UCA-X-N it exploits. + Prefer to GROUP under an existing UCA. If no existing UCA fits, + reply `VERDICT: confirmed-but-no-uca`; do not emit a scenario. + +Output: +- `VERDICT: confirmed | not-confirmed | confirmed-but-no-uca` +- `UCA: UCA-X-N` (only on confirmed) +- `REASON:` one paragraph