Skip to content
This repository was archived by the owner on Jun 26, 2020. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ file-per-thread-logger = "0.1.2"
default = ["disas", "wasm"]
disas = ["capstone"]
wasm = ["wabt", "cranelift-wasm"]
wasm-debug = ["cranelift-wasm/dwarf"]

# We want debug symbols on release binaries by default since it allows profiling
# tools to give more accurate information. We can always strip them out later if
Expand Down
4 changes: 4 additions & 0 deletions cranelift-wasm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ failure = { version = "0.1.1", default-features = false, features = ["derive"] }
failure_derive = { version = "0.1.1", default-features = false }
log = { version = "0.4.6", default-features = false }
cast = { version = "0.2.2", default-features = false }
target-lexicon = { version = "0.4.0", default-features = false }
gimli = { version = "0.18.0", optional = true }
faerie = { version = "0.10.0", optional = true }

[dev-dependencies]
wabt = "0.7.0"
Expand All @@ -29,6 +32,7 @@ target-lexicon = "0.4.0"
default = ["std"]
std = ["cranelift-codegen/std", "cranelift-frontend/std", "wasmparser/std", "failure/std"]
core = ["hashmap_core", "cranelift-codegen/core", "cranelift-frontend/core", "wasmparser/core"]
dwarf = ["gimli", "faerie"]

[badges]
maintenance = { status = "experimental" }
Expand Down
139 changes: 139 additions & 0 deletions cranelift-wasm/src/debug/address_transform.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
//! Utility data structures for WebAssembly address space transformation.

use crate::debug::data::ModuleAddressMap;
use crate::debug::read_debuginfo::WasmFileInfo;
use crate::DefinedFuncIndex;
use cranelift_entity::{EntityRef, PrimaryMap};
use gimli::write;
use std::boxed::Box;
use std::collections::BTreeMap;
use std::ops::Bound::{Included, Unbounded};
use std::vec::Vec;

pub type GeneratedAddress = usize;
pub type WasmAddress = u64;
pub type SymbolIndex = usize;

#[derive(Debug)]
pub struct AddressMap {
pub generated: GeneratedAddress,
pub wasm: WasmAddress,
}

#[derive(Debug)]
pub struct FunctionMap {
pub offset: GeneratedAddress,
pub len: GeneratedAddress,
pub addresses: Box<[AddressMap]>,
}

#[derive(Debug)]
pub struct AddressTransform {
lookup: BTreeMap<WasmAddress, (SymbolIndex, GeneratedAddress, GeneratedAddress)>,
map: PrimaryMap<DefinedFuncIndex, FunctionMap>,
func_ranges: Vec<(usize, usize)>,
}

impl AddressTransform {
pub fn new(at: &ModuleAddressMap, wasm_file: &WasmFileInfo) -> Self {
let code_section_offset = wasm_file.code_section_offset;
let function_offsets = &wasm_file.function_offsets_and_sizes;
let mut lookup = BTreeMap::new();
let mut map = PrimaryMap::new();
let mut func_ranges = Vec::new();
for (i, ft) in at {
let index = i.index();
let (fn_offset, fn_size) = function_offsets[index];
assert!(code_section_offset <= fn_offset);
let fn_offset: WasmAddress = fn_offset - code_section_offset;
let fn_size = fn_size as WasmAddress;
func_ranges.push((ft.body_offset, ft.body_offset + ft.body_len));
lookup.insert(fn_offset, (index, ft.body_offset, ft.body_offset));
let mut fn_map = Vec::new();
for t in &ft.instructions {
if t.srcloc.is_default() {
// TODO extend some range if possible
continue;
}
// src_offset is a wasm bytecode offset in the code section
let src_offset = t.srcloc.bits() as WasmAddress - code_section_offset;
assert!(fn_offset <= src_offset && src_offset <= fn_offset + fn_size);
lookup.insert(
src_offset,
(index, t.code_offset, t.code_offset + t.code_len),
);
fn_map.push(AddressMap {
generated: t.code_offset,
wasm: src_offset,
});
}
let last_addr = ft.body_offset + ft.body_len;
lookup.insert(fn_offset + fn_size, (index, last_addr, last_addr));
fn_map.sort_by(|a, b| a.generated.cmp(&b.generated));
map.push(FunctionMap {
offset: ft.body_offset,
len: ft.body_len,
addresses: fn_map.into_boxed_slice(),
});
}
AddressTransform {
lookup,
map,
func_ranges,
}
}

pub fn translate(&self, addr: u64) -> Option<write::Address> {
if addr == 0 {
// It's normally 0 for debug info without the linked code.
return None;
}
let search = self.lookup.range((Unbounded, Included(addr)));
if let Some((_, value)) = search.last() {
return Some(write::Address::Symbol {
symbol: value.0,
addend: value.1 as i64,
});
}
// Address was not found: function was not compiled?
None
}

pub fn diff(&self, addr1: u64, addr2: u64) -> Option<u64> {
let t1 = self.translate(addr1);
let t2 = self.translate(addr2);
if t1.is_none() || t2.is_none() {
return None;
}
if let (
Some(write::Address::Symbol {
symbol: s1,
addend: a,
}),
Some(write::Address::Symbol {
symbol: s2,
addend: b,
}),
) = (t1, t2)
{
if s1 != s2 {
panic!("different symbol");
}
Some((b - a) as u64)
} else {
unreachable!();
}
}

pub fn delta(&self, addr1: u64, u: u64) -> Option<u64> {
self.diff(addr1, addr1 + u)
}

pub fn map(&self) -> &PrimaryMap<DefinedFuncIndex, FunctionMap> {
&self.map
}

pub fn func_range(&self, index: usize) -> (usize, usize) {
self.func_ranges[index]
}
}
42 changes: 42 additions & 0 deletions cranelift-wasm/src/debug/data.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//! External data structures needed for address transform.

use crate::DefinedFuncIndex;
use cranelift_codegen::ir;
use cranelift_entity::PrimaryMap;
use std::vec::Vec;

/// Single wasm source location to generated address mapping.
#[derive(Debug)]
pub struct InstructionAddressMap {
/// Original source location.
pub srcloc: ir::SourceLoc,

/// Generated instructions offset.
pub code_offset: usize,

/// Generated instructions length.
pub code_len: usize,
}

/// Function and its instructions addresses mappings.
#[derive(Debug)]
pub struct FunctionAddressMap {
/// Instructions maps.
/// The array is sorted by the InstructionAddressMap::code_offset field.
pub instructions: Vec<InstructionAddressMap>,

/// Function start source location (normally declaration).
pub start_srcloc: ir::SourceLoc,

/// Function end source location.
pub end_srcloc: ir::SourceLoc,

/// Generated function body offset if applicable, otherwise 0.
pub body_offset: usize,

/// Generated function body length.
pub body_len: usize,
}

/// Module functions addresses mappings.
pub type ModuleAddressMap = PrimaryMap<DefinedFuncIndex, FunctionAddressMap>;
181 changes: 181 additions & 0 deletions cranelift-wasm/src/debug/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
//! Debug utils for WebAssembly using Cranelift.

use cranelift_codegen::isa::TargetFrontendConfig;
use faerie::{Artifact, Decl};
use failure::Error;
use std::string::String;
use std::vec::Vec;
use target_lexicon::{BinaryFormat, Triple};

pub use crate::debug::data::*;
pub use crate::debug::read_debuginfo::{read_debuginfo, DebugInfoData};
pub use crate::debug::transform::transform_dwarf;
pub use crate::debug::write_debuginfo::{emit_dwarf, ResolvedSymbol, SymbolResolver};

mod address_transform;
mod data;
mod read_debuginfo;
mod transform;
mod write_debuginfo;

struct FunctionRelocResolver {}
impl SymbolResolver for FunctionRelocResolver {
fn resolve_symbol(&self, symbol: usize, addend: i64) -> ResolvedSymbol {
let name = format!("_wasm_function_{}", symbol);
ResolvedSymbol::Reloc { name, addend }
}
}

/// Emits DWARF sections into the faerie `Artifact`.
pub fn emit_debugsections(
obj: &mut Artifact,
target_config: &TargetFrontendConfig,
debuginfo_data: &DebugInfoData,
at: &ModuleAddressMap,
) -> Result<(), Error> {
let dwarf = transform_dwarf(target_config, debuginfo_data, at)?;
let resolver = FunctionRelocResolver {};
emit_dwarf(obj, dwarf, &resolver)?;
Ok(())
}

struct ImageRelocResolver<'a> {
func_offsets: &'a Vec<u64>,
}

impl<'a> SymbolResolver for ImageRelocResolver<'a> {
fn resolve_symbol(&self, symbol: usize, addend: i64) -> ResolvedSymbol {
let func_start = self.func_offsets[symbol];
ResolvedSymbol::PhysicalAddress(func_start + addend as u64)
}
}

/// Emits image based on function code and DWARF information.
/// The builds valid ELF loadable file image.
pub fn emit_debugsections_image(
triple: Triple,
target_config: &TargetFrontendConfig,
debuginfo_data: &DebugInfoData,
at: &ModuleAddressMap,
funcs: &Vec<(*const u8, usize)>,
) -> Result<Vec<u8>, Error> {
let ref func_offsets = funcs
.iter()
.map(|(ptr, _)| *ptr as u64)
.collect::<Vec<u64>>();
let mut obj = Artifact::new(triple, String::from("module"));
let dwarf = transform_dwarf(target_config, debuginfo_data, at)?;
let resolver = ImageRelocResolver { func_offsets };

// Assuming all functions in the same code block, looking min/max of its range.
assert!(funcs.len() > 0);
let mut segment_body: (usize, usize) = (!0, 0);
for (body_ptr, body_len) in funcs.iter() {
segment_body.0 = ::std::cmp::min(segment_body.0, *body_ptr as usize);
segment_body.1 = ::std::cmp::max(segment_body.1, *body_ptr as usize + body_len);
}
let segment_body = (segment_body.0 as *const u8, segment_body.1 - segment_body.0);

let body = unsafe { ::std::slice::from_raw_parts(segment_body.0, segment_body.1) };
obj.declare_with("all", Decl::function(), body.to_vec())?;

emit_dwarf(&mut obj, dwarf, &resolver)?;

// LLDB is too "magical" about mach-o, generating elf
let mut bytes = obj.emit_as(BinaryFormat::Elf)?;
// elf is still missing details...
convert_faerie_elf_to_loadable_file(&mut bytes, segment_body.0);

Ok(bytes)
}

fn convert_faerie_elf_to_loadable_file(bytes: &mut Vec<u8>, code_ptr: *const u8) {
use std::ffi::CStr;
use std::os::raw::c_char;

assert!(
bytes[0x4] == 2 && bytes[0x5] == 1,
"bits and endianess in .ELF"
);
let e_phoff = unsafe { *(bytes.as_ptr().offset(0x20) as *const u64) };
let e_phnum = unsafe { *(bytes.as_ptr().offset(0x38) as *const u16) };
assert!(
e_phoff == 0 && e_phnum == 0,
"program header table is empty"
);
let e_phentsize = unsafe { *(bytes.as_ptr().offset(0x36) as *const u16) };
assert!(e_phentsize == 0x38, "size of ph");
let e_shentsize = unsafe { *(bytes.as_ptr().offset(0x3A) as *const u16) };
assert!(e_shentsize == 0x40, "size of sh");

let e_shoff = unsafe { *(bytes.as_ptr().offset(0x28) as *const u64) };
let e_shnum = unsafe { *(bytes.as_ptr().offset(0x3C) as *const u16) };
let mut shstrtab_off = 0;
let mut segment = None;
for i in 0..e_shnum {
let off = e_shoff as isize + i as isize * e_shentsize as isize;
let sh_type = unsafe { *(bytes.as_ptr().offset(off + 0x4) as *const u32) };
if sh_type == /* SHT_SYMTAB */ 3 {
shstrtab_off = unsafe { *(bytes.as_ptr().offset(off + 0x18) as *const u64) };
}
if sh_type != /* SHT_PROGBITS */ 1 {
continue;
}
// It is a SHT_PROGBITS, but we need to check sh_name to ensure it is our function
let sh_name = unsafe {
let sh_name_off = *(bytes.as_ptr().offset(off) as *const u32);
CStr::from_ptr(
bytes
.as_ptr()
.offset((shstrtab_off + sh_name_off as u64) as isize)
as *const c_char,
)
.to_str()
.expect("name")
};
if sh_name != ".text.all" {
continue;
}

assert!(segment.is_none());
// Functions was added at emit_debugsections_image as .text.all.
// Patch vaddr, and save file location and its size.
unsafe {
*(bytes.as_ptr().offset(off + 0x10) as *mut u64) = code_ptr as u64;
};
let sh_offset = unsafe { *(bytes.as_ptr().offset(off + 0x18) as *const u64) };
let sh_size = unsafe { *(bytes.as_ptr().offset(off + 0x20) as *const u64) };
segment = Some((sh_offset, code_ptr, sh_size));
// Fix name too: cut it to just ".text"
unsafe {
let sh_name_off = *(bytes.as_ptr().offset(off) as *const u32);
bytes[(shstrtab_off + sh_name_off as u64) as usize + ".text".len()] = 0;
}
}

// LLDB wants segment with virtual address set, placing them at the end of ELF.
let ph_off = bytes.len();
if let Some((sh_offset, v_offset, sh_size)) = segment {
let mut segment = Vec::with_capacity(0x38);
segment.resize(0x38, 0);
unsafe {
*(segment.as_ptr() as *mut u32) = /* PT_LOAD */ 0x1;
*(segment.as_ptr().offset(0x8) as *mut u64) = sh_offset;
*(segment.as_ptr().offset(0x10) as *mut u64) = v_offset as u64;
*(segment.as_ptr().offset(0x18) as *mut u64) = v_offset as u64;
*(segment.as_ptr().offset(0x20) as *mut u64) = sh_size;
*(segment.as_ptr().offset(0x28) as *mut u64) = sh_size;
}
bytes.extend_from_slice(&segment);
} else {
unreachable!();
}

// It is somewhat loadable ELF file at this moment.
// Update e_flags, e_phoff and e_phnum.
unsafe {
*(bytes.as_ptr().offset(0x10) as *mut u16) = /* ET_DYN */ 3;
*(bytes.as_ptr().offset(0x20) as *mut u64) = ph_off as u64;
*(bytes.as_ptr().offset(0x38) as *mut u16) = 1u16;
}
}
Loading