diff --git a/Cargo.toml b/Cargo.toml index 5737fa1fa..888cc1082 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ file-per-thread-logger = "0.1.2" default = ["disas", "wasm"] disas = ["capstone"] wasm = ["wabt", "cranelift-wasm"] +wasm-debug = ["cranelift-wasm/dwarf"] # We want debug symbols on release binaries by default since it allows profiling # tools to give more accurate information. We can always strip them out later if diff --git a/cranelift-wasm/Cargo.toml b/cranelift-wasm/Cargo.toml index 7d9504aa5..e96f9bec8 100644 --- a/cranelift-wasm/Cargo.toml +++ b/cranelift-wasm/Cargo.toml @@ -20,6 +20,9 @@ failure = { version = "0.1.1", default-features = false, features = ["derive"] } failure_derive = { version = "0.1.1", default-features = false } log = { version = "0.4.6", default-features = false } cast = { version = "0.2.2", default-features = false } +target-lexicon = { version = "0.4.0", default-features = false } +gimli = { version = "0.18.0", optional = true } +faerie = { version = "0.10.0", optional = true } [dev-dependencies] wabt = "0.7.0" @@ -29,6 +32,7 @@ target-lexicon = "0.4.0" default = ["std"] std = ["cranelift-codegen/std", "cranelift-frontend/std", "wasmparser/std", "failure/std"] core = ["hashmap_core", "cranelift-codegen/core", "cranelift-frontend/core", "wasmparser/core"] +dwarf = ["gimli", "faerie"] [badges] maintenance = { status = "experimental" } diff --git a/cranelift-wasm/src/debug/address_transform.rs b/cranelift-wasm/src/debug/address_transform.rs new file mode 100644 index 000000000..05d762e77 --- /dev/null +++ b/cranelift-wasm/src/debug/address_transform.rs @@ -0,0 +1,139 @@ +//! Utility data structures for WebAssembly address space transformation. + +use crate::debug::data::ModuleAddressMap; +use crate::debug::read_debuginfo::WasmFileInfo; +use crate::DefinedFuncIndex; +use cranelift_entity::{EntityRef, PrimaryMap}; +use gimli::write; +use std::boxed::Box; +use std::collections::BTreeMap; +use std::ops::Bound::{Included, Unbounded}; +use std::vec::Vec; + +pub type GeneratedAddress = usize; +pub type WasmAddress = u64; +pub type SymbolIndex = usize; + +#[derive(Debug)] +pub struct AddressMap { + pub generated: GeneratedAddress, + pub wasm: WasmAddress, +} + +#[derive(Debug)] +pub struct FunctionMap { + pub offset: GeneratedAddress, + pub len: GeneratedAddress, + pub addresses: Box<[AddressMap]>, +} + +#[derive(Debug)] +pub struct AddressTransform { + lookup: BTreeMap, + map: PrimaryMap, + func_ranges: Vec<(usize, usize)>, +} + +impl AddressTransform { + pub fn new(at: &ModuleAddressMap, wasm_file: &WasmFileInfo) -> Self { + let code_section_offset = wasm_file.code_section_offset; + let function_offsets = &wasm_file.function_offsets_and_sizes; + let mut lookup = BTreeMap::new(); + let mut map = PrimaryMap::new(); + let mut func_ranges = Vec::new(); + for (i, ft) in at { + let index = i.index(); + let (fn_offset, fn_size) = function_offsets[index]; + assert!(code_section_offset <= fn_offset); + let fn_offset: WasmAddress = fn_offset - code_section_offset; + let fn_size = fn_size as WasmAddress; + func_ranges.push((ft.body_offset, ft.body_offset + ft.body_len)); + lookup.insert(fn_offset, (index, ft.body_offset, ft.body_offset)); + let mut fn_map = Vec::new(); + for t in &ft.instructions { + if t.srcloc.is_default() { + // TODO extend some range if possible + continue; + } + // src_offset is a wasm bytecode offset in the code section + let src_offset = t.srcloc.bits() as WasmAddress - code_section_offset; + assert!(fn_offset <= src_offset && src_offset <= fn_offset + fn_size); + lookup.insert( + src_offset, + (index, t.code_offset, t.code_offset + t.code_len), + ); + fn_map.push(AddressMap { + generated: t.code_offset, + wasm: src_offset, + }); + } + let last_addr = ft.body_offset + ft.body_len; + lookup.insert(fn_offset + fn_size, (index, last_addr, last_addr)); + fn_map.sort_by(|a, b| a.generated.cmp(&b.generated)); + map.push(FunctionMap { + offset: ft.body_offset, + len: ft.body_len, + addresses: fn_map.into_boxed_slice(), + }); + } + AddressTransform { + lookup, + map, + func_ranges, + } + } + + pub fn translate(&self, addr: u64) -> Option { + if addr == 0 { + // It's normally 0 for debug info without the linked code. + return None; + } + let search = self.lookup.range((Unbounded, Included(addr))); + if let Some((_, value)) = search.last() { + return Some(write::Address::Symbol { + symbol: value.0, + addend: value.1 as i64, + }); + } + // Address was not found: function was not compiled? + None + } + + pub fn diff(&self, addr1: u64, addr2: u64) -> Option { + let t1 = self.translate(addr1); + let t2 = self.translate(addr2); + if t1.is_none() || t2.is_none() { + return None; + } + if let ( + Some(write::Address::Symbol { + symbol: s1, + addend: a, + }), + Some(write::Address::Symbol { + symbol: s2, + addend: b, + }), + ) = (t1, t2) + { + if s1 != s2 { + panic!("different symbol"); + } + Some((b - a) as u64) + } else { + unreachable!(); + } + } + + pub fn delta(&self, addr1: u64, u: u64) -> Option { + self.diff(addr1, addr1 + u) + } + + pub fn map(&self) -> &PrimaryMap { + &self.map + } + + pub fn func_range(&self, index: usize) -> (usize, usize) { + self.func_ranges[index] + } +} diff --git a/cranelift-wasm/src/debug/data.rs b/cranelift-wasm/src/debug/data.rs new file mode 100644 index 000000000..0d5bee965 --- /dev/null +++ b/cranelift-wasm/src/debug/data.rs @@ -0,0 +1,42 @@ +//! External data structures needed for address transform. + +use crate::DefinedFuncIndex; +use cranelift_codegen::ir; +use cranelift_entity::PrimaryMap; +use std::vec::Vec; + +/// Single wasm source location to generated address mapping. +#[derive(Debug)] +pub struct InstructionAddressMap { + /// Original source location. + pub srcloc: ir::SourceLoc, + + /// Generated instructions offset. + pub code_offset: usize, + + /// Generated instructions length. + pub code_len: usize, +} + +/// Function and its instructions addresses mappings. +#[derive(Debug)] +pub struct FunctionAddressMap { + /// Instructions maps. + /// The array is sorted by the InstructionAddressMap::code_offset field. + pub instructions: Vec, + + /// Function start source location (normally declaration). + pub start_srcloc: ir::SourceLoc, + + /// Function end source location. + pub end_srcloc: ir::SourceLoc, + + /// Generated function body offset if applicable, otherwise 0. + pub body_offset: usize, + + /// Generated function body length. + pub body_len: usize, +} + +/// Module functions addresses mappings. +pub type ModuleAddressMap = PrimaryMap; diff --git a/cranelift-wasm/src/debug/mod.rs b/cranelift-wasm/src/debug/mod.rs new file mode 100644 index 000000000..9fd0db600 --- /dev/null +++ b/cranelift-wasm/src/debug/mod.rs @@ -0,0 +1,181 @@ +//! Debug utils for WebAssembly using Cranelift. + +use cranelift_codegen::isa::TargetFrontendConfig; +use faerie::{Artifact, Decl}; +use failure::Error; +use std::string::String; +use std::vec::Vec; +use target_lexicon::{BinaryFormat, Triple}; + +pub use crate::debug::data::*; +pub use crate::debug::read_debuginfo::{read_debuginfo, DebugInfoData}; +pub use crate::debug::transform::transform_dwarf; +pub use crate::debug::write_debuginfo::{emit_dwarf, ResolvedSymbol, SymbolResolver}; + +mod address_transform; +mod data; +mod read_debuginfo; +mod transform; +mod write_debuginfo; + +struct FunctionRelocResolver {} +impl SymbolResolver for FunctionRelocResolver { + fn resolve_symbol(&self, symbol: usize, addend: i64) -> ResolvedSymbol { + let name = format!("_wasm_function_{}", symbol); + ResolvedSymbol::Reloc { name, addend } + } +} + +/// Emits DWARF sections into the faerie `Artifact`. +pub fn emit_debugsections( + obj: &mut Artifact, + target_config: &TargetFrontendConfig, + debuginfo_data: &DebugInfoData, + at: &ModuleAddressMap, +) -> Result<(), Error> { + let dwarf = transform_dwarf(target_config, debuginfo_data, at)?; + let resolver = FunctionRelocResolver {}; + emit_dwarf(obj, dwarf, &resolver)?; + Ok(()) +} + +struct ImageRelocResolver<'a> { + func_offsets: &'a Vec, +} + +impl<'a> SymbolResolver for ImageRelocResolver<'a> { + fn resolve_symbol(&self, symbol: usize, addend: i64) -> ResolvedSymbol { + let func_start = self.func_offsets[symbol]; + ResolvedSymbol::PhysicalAddress(func_start + addend as u64) + } +} + +/// Emits image based on function code and DWARF information. +/// The builds valid ELF loadable file image. +pub fn emit_debugsections_image( + triple: Triple, + target_config: &TargetFrontendConfig, + debuginfo_data: &DebugInfoData, + at: &ModuleAddressMap, + funcs: &Vec<(*const u8, usize)>, +) -> Result, Error> { + let ref func_offsets = funcs + .iter() + .map(|(ptr, _)| *ptr as u64) + .collect::>(); + let mut obj = Artifact::new(triple, String::from("module")); + let dwarf = transform_dwarf(target_config, debuginfo_data, at)?; + let resolver = ImageRelocResolver { func_offsets }; + + // Assuming all functions in the same code block, looking min/max of its range. + assert!(funcs.len() > 0); + let mut segment_body: (usize, usize) = (!0, 0); + for (body_ptr, body_len) in funcs.iter() { + segment_body.0 = ::std::cmp::min(segment_body.0, *body_ptr as usize); + segment_body.1 = ::std::cmp::max(segment_body.1, *body_ptr as usize + body_len); + } + let segment_body = (segment_body.0 as *const u8, segment_body.1 - segment_body.0); + + let body = unsafe { ::std::slice::from_raw_parts(segment_body.0, segment_body.1) }; + obj.declare_with("all", Decl::function(), body.to_vec())?; + + emit_dwarf(&mut obj, dwarf, &resolver)?; + + // LLDB is too "magical" about mach-o, generating elf + let mut bytes = obj.emit_as(BinaryFormat::Elf)?; + // elf is still missing details... + convert_faerie_elf_to_loadable_file(&mut bytes, segment_body.0); + + Ok(bytes) +} + +fn convert_faerie_elf_to_loadable_file(bytes: &mut Vec, code_ptr: *const u8) { + use std::ffi::CStr; + use std::os::raw::c_char; + + assert!( + bytes[0x4] == 2 && bytes[0x5] == 1, + "bits and endianess in .ELF" + ); + let e_phoff = unsafe { *(bytes.as_ptr().offset(0x20) as *const u64) }; + let e_phnum = unsafe { *(bytes.as_ptr().offset(0x38) as *const u16) }; + assert!( + e_phoff == 0 && e_phnum == 0, + "program header table is empty" + ); + let e_phentsize = unsafe { *(bytes.as_ptr().offset(0x36) as *const u16) }; + assert!(e_phentsize == 0x38, "size of ph"); + let e_shentsize = unsafe { *(bytes.as_ptr().offset(0x3A) as *const u16) }; + assert!(e_shentsize == 0x40, "size of sh"); + + let e_shoff = unsafe { *(bytes.as_ptr().offset(0x28) as *const u64) }; + let e_shnum = unsafe { *(bytes.as_ptr().offset(0x3C) as *const u16) }; + let mut shstrtab_off = 0; + let mut segment = None; + for i in 0..e_shnum { + let off = e_shoff as isize + i as isize * e_shentsize as isize; + let sh_type = unsafe { *(bytes.as_ptr().offset(off + 0x4) as *const u32) }; + if sh_type == /* SHT_SYMTAB */ 3 { + shstrtab_off = unsafe { *(bytes.as_ptr().offset(off + 0x18) as *const u64) }; + } + if sh_type != /* SHT_PROGBITS */ 1 { + continue; + } + // It is a SHT_PROGBITS, but we need to check sh_name to ensure it is our function + let sh_name = unsafe { + let sh_name_off = *(bytes.as_ptr().offset(off) as *const u32); + CStr::from_ptr( + bytes + .as_ptr() + .offset((shstrtab_off + sh_name_off as u64) as isize) + as *const c_char, + ) + .to_str() + .expect("name") + }; + if sh_name != ".text.all" { + continue; + } + + assert!(segment.is_none()); + // Functions was added at emit_debugsections_image as .text.all. + // Patch vaddr, and save file location and its size. + unsafe { + *(bytes.as_ptr().offset(off + 0x10) as *mut u64) = code_ptr as u64; + }; + let sh_offset = unsafe { *(bytes.as_ptr().offset(off + 0x18) as *const u64) }; + let sh_size = unsafe { *(bytes.as_ptr().offset(off + 0x20) as *const u64) }; + segment = Some((sh_offset, code_ptr, sh_size)); + // Fix name too: cut it to just ".text" + unsafe { + let sh_name_off = *(bytes.as_ptr().offset(off) as *const u32); + bytes[(shstrtab_off + sh_name_off as u64) as usize + ".text".len()] = 0; + } + } + + // LLDB wants segment with virtual address set, placing them at the end of ELF. + let ph_off = bytes.len(); + if let Some((sh_offset, v_offset, sh_size)) = segment { + let mut segment = Vec::with_capacity(0x38); + segment.resize(0x38, 0); + unsafe { + *(segment.as_ptr() as *mut u32) = /* PT_LOAD */ 0x1; + *(segment.as_ptr().offset(0x8) as *mut u64) = sh_offset; + *(segment.as_ptr().offset(0x10) as *mut u64) = v_offset as u64; + *(segment.as_ptr().offset(0x18) as *mut u64) = v_offset as u64; + *(segment.as_ptr().offset(0x20) as *mut u64) = sh_size; + *(segment.as_ptr().offset(0x28) as *mut u64) = sh_size; + } + bytes.extend_from_slice(&segment); + } else { + unreachable!(); + } + + // It is somewhat loadable ELF file at this moment. + // Update e_flags, e_phoff and e_phnum. + unsafe { + *(bytes.as_ptr().offset(0x10) as *mut u16) = /* ET_DYN */ 3; + *(bytes.as_ptr().offset(0x20) as *mut u64) = ph_off as u64; + *(bytes.as_ptr().offset(0x38) as *mut u16) = 1u16; + } +} diff --git a/cranelift-wasm/src/debug/read_debuginfo.rs b/cranelift-wasm/src/debug/read_debuginfo.rs new file mode 100644 index 000000000..e34869418 --- /dev/null +++ b/cranelift-wasm/src/debug/read_debuginfo.rs @@ -0,0 +1,145 @@ +//! Reading of WebAssembly DWARF sections. + +use std::boxed::Box; +use std::collections::HashMap; +use std::vec::Vec; + +use wasmparser::{ModuleReader, SectionCode}; + +use gimli; + +use gimli::{ + DebugAbbrev, DebugAddr, DebugInfo, DebugLine, DebugLineStr, DebugLoc, DebugLocLists, + DebugRanges, DebugRngLists, DebugStr, DebugStrOffsets, DebugTypes, EndianSlice, LittleEndian, + LocationLists, RangeLists, +}; + +trait Reader: gimli::Reader {} + +impl<'input> Reader for gimli::EndianSlice<'input, LittleEndian> {} + +pub type Dwarf<'input> = gimli::Dwarf>; + +/// Additional wasm structure information, such as offset of code section. +#[derive(Debug)] +pub struct WasmFileInfo { + pub code_section_offset: u64, + pub function_offsets_and_sizes: Box<[(u64, u32)]>, +} + +/// Debug information extracted from the wasm file. +#[derive(Debug)] +pub struct DebugInfoData<'a> { + /// DWARF sections. + pub dwarf: Dwarf<'a>, + + /// Additional wasm structure information. + pub wasm_file: WasmFileInfo, +} + +fn convert_sections<'a>(sections: HashMap<&str, &'a [u8]>) -> Dwarf<'a> { + let endian = LittleEndian; + let debug_str = DebugStr::new(sections[".debug_str"], endian); + let debug_abbrev = DebugAbbrev::new(sections[".debug_abbrev"], endian); + let debug_info = DebugInfo::new(sections[".debug_info"], endian); + let debug_line = DebugLine::new(sections[".debug_line"], endian); + + if sections.contains_key(".debug_addr") { + panic!("Unexpected .debug_addr"); + } + + let debug_addr = DebugAddr::from(EndianSlice::new(&[], endian)); + + if sections.contains_key(".debug_line_str") { + panic!("Unexpected .debug_line_str"); + } + + let debug_line_str = DebugLineStr::from(EndianSlice::new(&[], endian)); + let debug_str_sup = DebugStr::from(EndianSlice::new(&[], endian)); + + if sections.contains_key(".debug_rnglists") { + panic!("Unexpected .debug_rnglists"); + } + + let debug_ranges = match sections.get(".debug_ranges") { + Some(section) => DebugRanges::new(section, endian), + None => DebugRanges::new(&[], endian), + }; + let debug_rnglists = DebugRngLists::new(&[], endian); + let ranges = RangeLists::new(debug_ranges, debug_rnglists); + + if sections.contains_key(".debug_loclists") { + panic!("Unexpected .debug_loclists"); + } + + let debug_loc = match sections.get(".debug_loc") { + Some(section) => DebugLoc::new(section, endian), + None => DebugLoc::new(&[], endian), + }; + let debug_loclists = DebugLocLists::new(&[], endian); + let locations = LocationLists::new(debug_loc, debug_loclists); + + if sections.contains_key(".debug_str_offsets") { + panic!("Unexpected .debug_str_offsets"); + } + + let debug_str_offsets = DebugStrOffsets::from(EndianSlice::new(&[], endian)); + + if sections.contains_key(".debug_types") { + panic!("Unexpected .debug_types"); + } + + let debug_types = DebugTypes::from(EndianSlice::new(&[], endian)); + + Dwarf { + debug_abbrev, + debug_addr, + debug_info, + debug_line, + debug_line_str, + debug_str, + debug_str_offsets, + debug_str_sup, + debug_types, + locations, + ranges, + } +} + +/// Read debug information from the wasm file/bytecode. +pub fn read_debuginfo(data: &[u8]) -> DebugInfoData { + let mut reader = ModuleReader::new(data).expect("reader"); + let mut sections = HashMap::new(); + let mut code_section_offset = 0; + let mut function_offsets_and_sizes = Vec::new(); + while !reader.eof() { + let section = reader.read().expect("section"); + if let SectionCode::Custom { name, .. } = section.code { + if name.starts_with(".debug_") { + let mut reader = section.get_binary_reader(); + let len = reader.bytes_remaining(); + sections.insert(name, reader.read_bytes(len).expect("bytes")); + } + } + if let SectionCode::Code = section.code { + code_section_offset = section.range().start as u64; + // TODO remove me later + let mut reader = section.get_code_section_reader().expect("code reader"); + for _ in 0..reader.get_count() { + let body = reader.read().expect("function body read"); + let range = body.range(); + let fn_body_size = range.end - range.start; + let fn_body_offset = range.start; + function_offsets_and_sizes.push((fn_body_offset as u64, fn_body_size as u32)); + } + } + } + let function_offsets_and_sizes = function_offsets_and_sizes.into_boxed_slice(); + DebugInfoData { + dwarf: convert_sections(sections), + wasm_file: WasmFileInfo { + code_section_offset, + function_offsets_and_sizes, + }, + } +} diff --git a/cranelift-wasm/src/debug/transform.rs b/cranelift-wasm/src/debug/transform.rs new file mode 100644 index 000000000..0845e06cd --- /dev/null +++ b/cranelift-wasm/src/debug/transform.rs @@ -0,0 +1,622 @@ +//! Transformation logic of WebAssembly DWARF into native format. + +use crate::debug::address_transform::AddressTransform; +use crate::debug::data::ModuleAddressMap; +pub use crate::debug::read_debuginfo::DebugInfoData; +use cranelift_codegen::isa::TargetFrontendConfig; +use cranelift_entity::EntityRef; +use failure::Error; +use std::collections::{BTreeMap, HashMap}; +use std::ops::Bound::{Included, Unbounded}; +use std::vec::Vec; + +use gimli; + +use gimli::{ + AttributeValue, CompilationUnitHeader, DebugAbbrev, DebugAddr, DebugAddrBase, DebugLine, + DebugLineOffset, DebugStr, DebuggingInformationEntry, LineEncoding, LocationLists, RangeLists, + UnitOffset, +}; + +use gimli::write; + +trait Reader: gimli::Reader {} + +impl<'input, Endian> Reader for gimli::EndianSlice<'input, Endian> where Endian: gimli::Endianity {} + +#[derive(Fail, Debug)] +#[fail(display = "Debug info transform error: {}", _0)] +pub struct TransformError(&'static str); + +struct DebugInputContext<'a, R> +where + R: Reader, +{ + debug_abbrev: &'a DebugAbbrev, + debug_str: &'a DebugStr, + debug_line: &'a DebugLine, + debug_addr: &'a DebugAddr, + debug_addr_base: DebugAddrBase, + rnglists: &'a RangeLists, + loclists: &'a LocationLists, +} + +type PendingDieRef = (write::UnitEntryId, gimli::DwAt, UnitOffset); + +enum FileAttributeContext<'a> { + Root(Option), + Children(&'a Vec), +} + +fn clone_die_attributes<'a, R>( + entry: &DebuggingInformationEntry, + context: &DebugInputContext, + addr_tr: &'a AddressTransform, + unit_encoding: &gimli::Encoding, + current_scope: &mut write::DebuggingInformationEntry, + current_scope_id: write::UnitEntryId, + subprogram_range: Option<(write::Address, u64)>, + out_strings: &mut write::StringTable, + die_ref_map: &HashMap, + pending_die_refs: &mut Vec, + file_context: FileAttributeContext<'a>, +) -> Result<(), Error> +where + R: Reader, +{ + let _tag = &entry.tag(); + let mut attrs = entry.attrs(); + let mut low_pc = None; + while let Some(attr) = attrs.next()? { + let attr_value = match attr.value() { + AttributeValue::Addr(_) + if attr.name() == gimli::DW_AT_low_pc && subprogram_range.is_some() => + { + write::AttributeValue::Address(subprogram_range.unwrap().0) + } + AttributeValue::Udata(_) + if attr.name() == gimli::DW_AT_high_pc && subprogram_range.is_some() => + { + write::AttributeValue::Udata(subprogram_range.unwrap().1) + } + AttributeValue::Addr(u) => { + let addr = addr_tr.translate(u).unwrap_or(write::Address::Constant(0)); + if attr.name() == gimli::DW_AT_low_pc { + low_pc = Some((u, addr)); + } + write::AttributeValue::Address(addr) + } + AttributeValue::Udata(u) => { + if attr.name() != gimli::DW_AT_high_pc || low_pc.is_none() { + write::AttributeValue::Udata(u) + } else { + let u = addr_tr.delta(low_pc.unwrap().0, u).unwrap_or(0); + write::AttributeValue::Udata(u) + } + } + AttributeValue::Data1(d) => write::AttributeValue::Data1(d), + AttributeValue::Data2(d) => write::AttributeValue::Data2(d), + AttributeValue::Data4(d) => write::AttributeValue::Data4(d), + AttributeValue::Sdata(d) => write::AttributeValue::Sdata(d), + AttributeValue::Flag(f) => write::AttributeValue::Flag(f), + AttributeValue::DebugLineRef(line_program_offset) => { + if let FileAttributeContext::Root(o) = file_context { + if o != Some(line_program_offset) { + return Err(TransformError("invalid debug_line offset").into()); + } + write::AttributeValue::LineProgramRef + } else { + return Err(TransformError("unexpected debug_line index attribute").into()); + } + } + AttributeValue::FileIndex(i) => { + if let FileAttributeContext::Children(file_map) = file_context { + write::AttributeValue::FileIndex(Some(file_map[(i - 1) as usize])) + } else { + return Err(TransformError("unexpected file index attribute").into()); + } + } + AttributeValue::DebugStrRef(str_offset) => { + let s = context.debug_str.get_str(str_offset)?.to_slice()?.to_vec(); + write::AttributeValue::StringRef(out_strings.add(s)) + } + AttributeValue::RangeListsRef(r) => { + let low_pc = 0; + let mut ranges = context.rnglists.ranges( + r, + *unit_encoding, + low_pc, + &context.debug_addr, + context.debug_addr_base, + )?; + let mut _result = Vec::new(); + while let Some(range) = ranges.next()? { + assert!(range.begin <= range.end); + _result.push((range.begin as i64, range.end as i64)); + } + // FIXME _result contains invalid code offsets; translate_address + continue; // ignore attribute + } + AttributeValue::LocationListsRef(r) => { + let low_pc = 0; + let mut locs = context.loclists.locations( + r, + *unit_encoding, + low_pc, + &context.debug_addr, + context.debug_addr_base, + )?; + let mut _result = Vec::new(); + while let Some(loc) = locs.next()? { + _result.push((loc.range.begin as i64, loc.range.end as i64, loc.data.0)); + } + // FIXME _result contains invalid expressions and code offsets + continue; // ignore attribute + } + AttributeValue::Exprloc(ref _expr) => { + // FIXME _expr contains invalid expression + continue; // ignore attribute + } + AttributeValue::Encoding(e) => write::AttributeValue::Encoding(e), + AttributeValue::DecimalSign(e) => write::AttributeValue::DecimalSign(e), + AttributeValue::Endianity(e) => write::AttributeValue::Endianity(e), + AttributeValue::Accessibility(e) => write::AttributeValue::Accessibility(e), + AttributeValue::Visibility(e) => write::AttributeValue::Visibility(e), + AttributeValue::Virtuality(e) => write::AttributeValue::Virtuality(e), + AttributeValue::Language(e) => write::AttributeValue::Language(e), + AttributeValue::AddressClass(e) => write::AttributeValue::AddressClass(e), + AttributeValue::IdentifierCase(e) => write::AttributeValue::IdentifierCase(e), + AttributeValue::CallingConvention(e) => write::AttributeValue::CallingConvention(e), + AttributeValue::Inline(e) => write::AttributeValue::Inline(e), + AttributeValue::Ordering(e) => write::AttributeValue::Ordering(e), + AttributeValue::UnitRef(ref offset) => { + if let Some(unit_id) = die_ref_map.get(offset) { + write::AttributeValue::ThisUnitEntryRef(*unit_id) + } else { + pending_die_refs.push((current_scope_id, attr.name(), *offset)); + continue; + } + } + // AttributeValue::DebugInfoRef(_) => { + // continue; + // } + _ => panic!(), //write::AttributeValue::StringRef(out_strings.add("_")), + }; + current_scope.set(attr.name(), attr_value); + } + Ok(()) +} + +fn clone_attr_string( + attr_value: &AttributeValue, + form: gimli::DwForm, + debug_str: &DebugStr, + out_strings: &mut write::StringTable, +) -> Result +where + R: Reader, +{ + let content = match attr_value { + AttributeValue::DebugStrRef(str_offset) => { + debug_str.get_str(*str_offset)?.to_slice()?.to_vec() + } + AttributeValue::String(b) => b.to_slice()?.to_vec(), + _ => panic!("Unexpected attribute value"), + }; + Ok(match form { + gimli::DW_FORM_strp => { + let id = out_strings.add(content); + write::LineString::StringRef(id) + } + gimli::DW_FORM_string => write::LineString::String(content), + _ => panic!("DW_FORM_line_strp or other not supported"), + }) +} + +#[derive(Debug)] +enum SavedLineProgramRow { + Normal { + address: u64, + op_index: u64, + file_index: u64, + line: u64, + column: u64, + discriminator: u64, + is_stmt: bool, + basic_block: bool, + prologue_end: bool, + epilogue_begin: bool, + isa: u64, + }, + EndOfSequence(u64), +} + +#[derive(Debug, Eq, PartialEq)] +enum ReadLineProgramState { + SequenceEnded, + ReadSequence, + IgnoreSequence, +} + +fn clone_line_program( + unit: &CompilationUnitHeader, + root: &DebuggingInformationEntry, + addr_tr: &AddressTransform, + out_encoding: &gimli::Encoding, + debug_str: &DebugStr, + debug_line: &DebugLine, + out_strings: &mut write::StringTable, +) -> Result<(write::LineProgram, DebugLineOffset, Vec), Error> +where + R: Reader, +{ + let offset = match root.attr_value(gimli::DW_AT_stmt_list)? { + Some(gimli::AttributeValue::DebugLineRef(offset)) => offset, + _ => { + return Err(TransformError("Debug line offset is not found").into()); + } + }; + let comp_dir = root.attr_value(gimli::DW_AT_comp_dir)?; + let comp_name = root.attr_value(gimli::DW_AT_name)?; + let out_comp_dir = clone_attr_string( + comp_dir.as_ref().expect("comp_dir"), + gimli::DW_FORM_strp, + debug_str, + out_strings, + )?; + let out_comp_name = clone_attr_string( + comp_name.as_ref().expect("comp_name"), + gimli::DW_FORM_strp, + debug_str, + out_strings, + )?; + + let program = debug_line.program( + offset, + unit.address_size(), + comp_dir.and_then(|val| val.string_value(&debug_str)), + comp_name.and_then(|val| val.string_value(&debug_str)), + ); + if let Ok(program) = program { + let header = program.header(); + assert!(header.version() <= 4, "not supported 5"); + let line_encoding = LineEncoding { + minimum_instruction_length: header.minimum_instruction_length(), + maximum_operations_per_instruction: header.maximum_operations_per_instruction(), + default_is_stmt: header.default_is_stmt(), + line_base: header.line_base(), + line_range: header.line_range(), + }; + let mut out_program = write::LineProgram::new( + *out_encoding, + line_encoding, + out_comp_dir, + out_comp_name, + None, + ); + let mut dirs = Vec::new(); + dirs.push(out_program.default_directory()); + for dir_attr in header.include_directories() { + let dir_id = out_program.add_directory(clone_attr_string( + dir_attr, + gimli::DW_FORM_string, + debug_str, + out_strings, + )?); + dirs.push(dir_id); + } + let mut files = Vec::new(); + for file_entry in header.file_names() { + let dir_id = dirs[file_entry.directory_index() as usize]; + let file_id = out_program.add_file( + clone_attr_string( + &file_entry.path_name(), + gimli::DW_FORM_string, + debug_str, + out_strings, + )?, + dir_id, + None, + ); + files.push(file_id); + } + + let mut rows = program.rows(); + let mut saved_rows = BTreeMap::new(); + let mut state = ReadLineProgramState::SequenceEnded; + while let Some((_header, row)) = rows.next_row()? { + if state == ReadLineProgramState::IgnoreSequence { + if row.end_sequence() { + state = ReadLineProgramState::SequenceEnded; + } + continue; + } + let saved_row = if row.end_sequence() { + state = ReadLineProgramState::SequenceEnded; + SavedLineProgramRow::EndOfSequence(row.address()) + } else { + if state == ReadLineProgramState::SequenceEnded { + // Discard sequences for non-existent code. + if row.address() == 0 { + state = ReadLineProgramState::IgnoreSequence; + continue; + } + state = ReadLineProgramState::ReadSequence; + } + SavedLineProgramRow::Normal { + address: row.address(), + op_index: row.op_index(), + file_index: row.file_index(), + line: row.line().unwrap_or(0), + column: match row.column() { + gimli::ColumnType::LeftEdge => 0, + gimli::ColumnType::Column(val) => val, + }, + discriminator: row.discriminator(), + is_stmt: row.is_stmt(), + basic_block: row.basic_block(), + prologue_end: row.prologue_end(), + epilogue_begin: row.epilogue_begin(), + isa: row.isa(), + } + }; + saved_rows.insert(row.address(), saved_row); + } + + for (i, map) in addr_tr.map() { + let symbol = i.index(); + let base_addr = map.offset; + out_program.begin_sequence(Some(write::Address::Symbol { symbol, addend: 0 })); + // TODO track and place function declaration line here + let mut last_address = None; + for addr_map in map.addresses.iter() { + let mut saved_row = saved_rows.get(&addr_map.wasm); + if saved_row.is_none() { + // No direct match -- repeat search with range. + saved_row = saved_rows + .range((Unbounded, Included(addr_map.wasm))) + .last() + .map(|p| p.1); + } + if let Some(SavedLineProgramRow::Normal { + address, + op_index, + file_index, + line, + column, + discriminator, + is_stmt, + basic_block, + prologue_end, + epilogue_begin, + isa, + }) = saved_row + { + // Ignore duplicates + if Some(*address) != last_address { + let address_offset = if last_address.is_none() { + // Extend first entry to the function declaration + // TODO use the function declaration line instead + 0 + } else { + (addr_map.generated - base_addr) as u64 + }; + out_program.row().address_offset = address_offset; + out_program.row().op_index = *op_index; + out_program.row().file = files[(file_index - 1) as usize]; + out_program.row().line = *line; + out_program.row().column = *column; + out_program.row().discriminator = *discriminator; + out_program.row().is_statement = *is_stmt; + out_program.row().basic_block = *basic_block; + out_program.row().prologue_end = *prologue_end; + out_program.row().epilogue_begin = *epilogue_begin; + out_program.row().isa = *isa; + out_program.generate_row(); + last_address = Some(*address); + } + } + } + let end_addr = (map.offset + map.len - 1) as u64; + out_program.end_sequence(end_addr); + } + Ok((out_program, offset, files)) + } else { + Err(TransformError("Valid line program not found").into()) + } +} + +fn get_subprogram_range<'a, R>( + entry: &DebuggingInformationEntry, + addr_tr: &'a AddressTransform, +) -> Result, Error> +where + R: Reader, +{ + let low_pc = entry.attr_value(gimli::DW_AT_low_pc)?; + if let Some(AttributeValue::Addr(addr)) = low_pc { + let transformed = addr_tr.translate(addr); + if let Some(write::Address::Symbol { symbol, .. }) = transformed { + let range = addr_tr.func_range(symbol); + let addr = write::Address::Symbol { + symbol, + addend: range.0 as i64, + }; + let len = (range.1 - range.0) as u64; + return Ok(Some((addr, len))); + } + } + Ok(None) +} + +fn clone_unit<'a, R>( + unit: &CompilationUnitHeader, + context: &DebugInputContext, + addr_tr: &'a AddressTransform, + out_encoding: &gimli::Encoding, + out_units: &mut write::UnitTable, + out_strings: &mut write::StringTable, +) -> Result<(), Error> +where + R: Reader, +{ + let abbrevs = unit.abbreviations(context.debug_abbrev)?; + + let mut die_ref_map = HashMap::new(); + let mut pending_die_refs = Vec::new(); + let mut stack = Vec::new(); + + // Iterate over all of this compilation unit's entries. + let mut entries = unit.entries(&abbrevs); + let (comp_unit, file_map) = if let Some((depth_delta, entry)) = entries.next_dfs()? { + assert!(depth_delta == 0); + let (out_line_program, debug_line_offset, file_map) = clone_line_program( + unit, + entry, + addr_tr, + out_encoding, + context.debug_str, + context.debug_line, + out_strings, + )?; + + if entry.tag() == gimli::DW_TAG_compile_unit { + let unit_id = out_units.add(write::Unit::new(*out_encoding, out_line_program)); + let comp_unit = out_units.get_mut(unit_id); + + let root_id = comp_unit.root(); + die_ref_map.insert(entry.offset(), root_id); + + clone_die_attributes( + entry, + context, + addr_tr, + &unit.encoding(), + comp_unit.get_mut(root_id), + root_id, + None, + out_strings, + &die_ref_map, + &mut pending_die_refs, + FileAttributeContext::Root(Some(debug_line_offset)), + )?; + + stack.push(root_id); + (comp_unit, file_map) + } else { + return Err(TransformError("Unexpected unit header").into()); + } + } else { + return Ok(()); // empty + }; + let mut skip_at_depth = None; + while let Some((depth_delta, entry)) = entries.next_dfs()? { + let depth_delta = if let Some(depth) = skip_at_depth { + let new_depth = depth + depth_delta; + if new_depth >= 0 { + skip_at_depth = Some(new_depth); + continue; + } + skip_at_depth = None; + new_depth + } else { + depth_delta + }; + let range = if entry.tag() == gimli::DW_TAG_subprogram { + let range = get_subprogram_range(entry, addr_tr)?; + if range.is_none() { + // Subprogram was not compiled: discarding all its info. + skip_at_depth = Some(0); + continue; + } + range + } else { + None + }; + + if depth_delta <= 0 { + for _ in depth_delta..1 { + stack.pop(); + } + } else { + assert!(depth_delta == 1); + } + let parent = stack.last().unwrap(); + let die_id = comp_unit.add(*parent, entry.tag()); + let current_scope = comp_unit.get_mut(die_id); + + stack.push(die_id); + die_ref_map.insert(entry.offset(), die_id); + + clone_die_attributes( + entry, + context, + addr_tr, + &unit.encoding(), + current_scope, + die_id, + range, + out_strings, + &die_ref_map, + &mut pending_die_refs, + FileAttributeContext::Children(&file_map), + )?; + } + for (die_id, attr_name, offset) in pending_die_refs { + let die = comp_unit.get_mut(die_id); + let unit_id = die_ref_map[&offset]; + die.set(attr_name, write::AttributeValue::ThisUnitEntryRef(unit_id)); + } + Ok(()) +} + +/// Transforms wasm debug information into native DWARF sections. +pub fn transform_dwarf( + target_config: &TargetFrontendConfig, + di: &DebugInfoData, + at: &ModuleAddressMap, +) -> Result { + let context = DebugInputContext { + debug_abbrev: &di.dwarf.debug_abbrev, + debug_str: &di.dwarf.debug_str, + debug_line: &di.dwarf.debug_line, + debug_addr: &di.dwarf.debug_addr, + debug_addr_base: DebugAddrBase(0), + rnglists: &di.dwarf.ranges, + loclists: &di.dwarf.locations, + }; + + let out_encoding = gimli::Encoding { + format: gimli::Format::Dwarf32, + // TODO: this should be configurable + // macOS doesn't seem to support DWARF > 3 + version: 3, + address_size: target_config.pointer_bytes(), + }; + + let addr_tr = AddressTransform::new(at, &di.wasm_file); + + let mut out_strings = write::StringTable::default(); + let mut out_units = write::UnitTable::default(); + + let out_line_strings = write::LineStringTable::default(); + + let mut iter = di.dwarf.debug_info.units(); + while let Some(ref unit) = iter.next().unwrap_or(None) { + clone_unit( + unit, + &context, + &addr_tr, + &out_encoding, + &mut out_units, + &mut out_strings, + )?; + } + + Ok(write::Dwarf { + units: out_units, + line_programs: vec![], + line_strings: out_line_strings, + strings: out_strings, + }) +} diff --git a/cranelift-wasm/src/debug/write_debuginfo.rs b/cranelift-wasm/src/debug/write_debuginfo.rs new file mode 100644 index 000000000..604054424 --- /dev/null +++ b/cranelift-wasm/src/debug/write_debuginfo.rs @@ -0,0 +1,172 @@ +//! Writing native DWARF sections. + +use std::string::String; +use std::string::ToString; +use std::vec::Vec; + +use gimli::write::{Address, Dwarf, EndianVec, Result, Sections, Writer}; +use gimli::{RunTimeEndian, SectionId}; + +use faerie::artifact::Decl; +use faerie::*; +use std::result; + +#[derive(Clone)] +struct DebugReloc { + offset: u32, + size: u8, + name: String, + addend: i64, +} + +/// Address or relocation entry of a symbol. +pub enum ResolvedSymbol { + /// Symbol is physical address (in file or memory). + PhysicalAddress(u64), + + /// Symbol is relocation entry in relation to the symbol name. + Reloc { + /// Object file symbol. + name: String, + + /// Offset from the object file symbol. + addend: i64, + }, +} + +/// Utility to resolve symbols into an address or relocation entry. +pub trait SymbolResolver { + /// Resolves symbols using its index and addend/offset. + fn resolve_symbol(&self, symbol: usize, addend: i64) -> ResolvedSymbol; +} + +/// Emits DWARF sections into the faerie `Artifact`. +pub fn emit_dwarf( + artifact: &mut Artifact, + mut dwarf: Dwarf, + symbol_resolver: &SymbolResolver, +) -> result::Result<(), failure::Error> { + let endian = RunTimeEndian::Little; + let mut sections = Sections::new(WriterRelocate::new(endian, symbol_resolver)); + + let debug_str_offsets = dwarf.strings.write(&mut sections.debug_str).unwrap(); + let debug_line_str_offsets = dwarf + .line_strings + .write(&mut sections.debug_line_str) + .unwrap(); + dwarf + .units + .write(&mut sections, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); + + sections.for_each_mut(|id, section| -> result::Result<(), failure::Error> { + if section.writer.slice().is_empty() { + return Ok(()); + } + artifact.declare_with(id.name(), Decl::debug_section(), section.writer.take()) + })?; + + sections.for_each(|id, section| -> result::Result<(), failure::Error> { + for reloc in §ion.relocs { + artifact.link_with( + faerie::Link { + from: id.name(), + to: &reloc.name, + at: u64::from(reloc.offset), + }, + faerie::Reloc::Debug { + size: reloc.size, + addend: reloc.addend as i32, + }, + )?; + } + Ok(()) + }) +} + +#[derive(Clone)] +struct WriterRelocate<'a> { + relocs: Vec, + writer: EndianVec, + symbol_resolver: &'a SymbolResolver, +} + +impl<'a> WriterRelocate<'a> { + fn new(endian: RunTimeEndian, symbol_resolver: &'a SymbolResolver) -> Self { + WriterRelocate { + relocs: Vec::new(), + writer: EndianVec::new(endian), + symbol_resolver, + } + } +} + +impl<'a> Writer for WriterRelocate<'a> { + type Endian = RunTimeEndian; + + fn endian(&self) -> Self::Endian { + self.writer.endian() + } + + fn len(&self) -> usize { + self.writer.len() + } + + fn write(&mut self, bytes: &[u8]) -> Result<()> { + self.writer.write(bytes) + } + + fn write_at(&mut self, offset: usize, bytes: &[u8]) -> Result<()> { + self.writer.write_at(offset, bytes) + } + + fn write_address(&mut self, address: Address, size: u8) -> Result<()> { + match address { + Address::Constant(val) => self.write_udata(val, size), + Address::Symbol { symbol, addend } => { + match self.symbol_resolver.resolve_symbol(symbol, addend) { + ResolvedSymbol::PhysicalAddress(addr) => self.write_udata(addr, size), + ResolvedSymbol::Reloc { name, addend } => { + let offset = self.len() as u64; + self.relocs.push(DebugReloc { + offset: offset as u32, + size, + name, + addend, + }); + self.write_udata(addend as u64, size) + } + } + } + } + } + + fn write_offset(&mut self, val: usize, section: SectionId, size: u8) -> Result<()> { + let offset = self.len() as u32; + let name = section.name().to_string(); + self.relocs.push(DebugReloc { + offset, + size, + name, + addend: val as i64, + }); + self.write_udata(val as u64, size) + } + + fn write_offset_at( + &mut self, + offset: usize, + val: usize, + section: SectionId, + size: u8, + ) -> Result<()> { + let name = section.name().to_string(); + self.relocs.push(DebugReloc { + offset: offset as u32, + size, + name, + addend: val as i64, + }); + self.write_udata_at(offset, val as u64, size) + } +} diff --git a/cranelift-wasm/src/lib.rs b/cranelift-wasm/src/lib.rs index c475b342b..847667063 100644 --- a/cranelift-wasm/src/lib.rs +++ b/cranelift-wasm/src/lib.rs @@ -37,6 +37,10 @@ extern crate alloc as std; #[macro_use] extern crate std; +#[cfg(feature = "dwarf")] +#[macro_use] +extern crate failure_derive; + #[cfg(not(feature = "std"))] use hashmap_core::{ hash_map::Entry::{Occupied, Vacant}, @@ -57,6 +61,9 @@ mod sections_translator; mod state; mod translation_utils; +#[cfg(feature = "dwarf")] +pub mod debug; + pub use crate::environ::{ DummyEnvironment, FuncEnvironment, GlobalVariable, ModuleEnvironment, ReturnMode, WasmError, WasmResult,