diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..361f8f81a --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/program_artifacts diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 000000000..ec257c825 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,169 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "elf" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4445909572dbd556c457c849c4ca58623d84b27c8fff1e74b0b4227d8b90d17b" + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", + "serde", +] + +[[package]] +name = "null-vm" +version = "0.1.0" +dependencies = [ + "elf", + "hashbrown", + "thiserror", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "zerocopy" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000..deea1b4bd --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "null-vm" +version = "0.1.0" +edition = "2024" + +[dependencies] +elf = "0.7.4" +hashbrown = { version = "0.14.5", features = ["serde", "inline-more"] } +thiserror = "1.0.68" diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..2aa0d1343 --- /dev/null +++ b/Makefile @@ -0,0 +1,52 @@ +ASM_PROGRAMS_DIR=./programs/asm +ASM_ARTIFACTS_DIR=./program_artifacts/asm + +RUST_PROGRAMS_DIR=./programs/rust +RUST_ARTIFACTS_DIR=./program_artifacts/rust + + +ASM_PROGRAMS = $(wildcard $(ASM_PROGRAMS_DIR)/*.s) +ARTIFACTS_ASM = $(patsubst $(ASM_PROGRAMS_DIR)/%.s, $(ASM_ARTIFACTS_DIR)/%.elf, $(ASM_PROGRAMS)) + +RUST_PROGRAM_DIRS := $(dir $(wildcard $(RUST_PROGRAMS_DIR)/*/Cargo.toml)) +RUST_PROGRAMS := $(notdir $(basename $(RUST_PROGRAM_DIRS:%/=%))) +RUST_ARTIFACTS := $(addprefix $(RUST_ARTIFACTS_DIR)/, $(addsuffix .elf, $(RUST_PROGRAMS))) + +compile-programs-asm: clean $(ARTIFACTS_ASM) + +compile-programs-rust: clean $(RUST_ARTIFACTS) + +compile-programs: compile-programs-asm compile-programs-rust + +# Compile assembly .s -> .o +$(ASM_ARTIFACTS_DIR)/%.o: $(ASM_PROGRAMS_DIR)/%.s + clang --target=riscv32 -c $< -o $@ + +# Link assembly .o -> .elf +$(ASM_ARTIFACTS_DIR)/%.elf: $(ASM_ARTIFACTS_DIR)/%.o + riscv64-unknown-elf-ld -m elf32lriscv $< -o $@ -e main + +# Compile rust +$(RUST_ARTIFACTS_DIR)/%.elf: $(RUST_PROGRAMS_DIR)/%/Cargo.toml + cd $(RUST_PROGRAMS_DIR)/$* && \ + cargo +nightly rustc \ + --target riscv32im-unknown-none-elf \ + -Z build-std=core,compiler_builtins \ + -- --emit asm -C debuginfo=0 -C link-arg=-e -C link-arg=main + cp $(RUST_PROGRAMS_DIR)/$*/target/riscv32im-unknown-none-elf/debug/$* $@ + rm -rf $(RUST_PROGRAMS_DIR)/$*/target + +clean: + -rm -rf $(ASM_ARTIFACTS_DIR) + mkdir -p $(ASM_ARTIFACTS_DIR) + -rm -rf $(RUST_ARTIFACTS_DIR) + mkdir -p $(RUST_ARTIFACTS_DIR) + +test: compile-programs + cargo test + +test-asm: compile-programs-asm + cargo test --test asm + +test-rust: compile-programs-rust + cargo test --test rust diff --git a/README.md b/README.md index 2bec3c697..9097b0491 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,26 @@ Verifiable VM made in collaboration with Lambdaclass and 3MI Non-null name TBD + +## Testing + +### ASM Tests + +In order to add a new asm test you should add the `.s` file under `programs/asm` +Then add the corresponding test under `tests/asm.rs` + +To run them you can use + +`make test` + +This will compile them and run the tests + +### Rust Tests + +In order to add a new rust test you should add the cargo project under `programs/rust` as a new directory. +The folder should have the same name as the `Cargo.toml` program name. +Then add the corresponding test under `tests/rust.rs` + +You can run it with + +`make test` diff --git a/programs/asm/basic_program.s b/programs/asm/basic_program.s new file mode 100644 index 000000000..16a368373 --- /dev/null +++ b/programs/asm/basic_program.s @@ -0,0 +1,8 @@ + .attribute 5, "rv32i2p1_m2p0_zmmul1p0" +.Lfunc_end0: + .globl main +main: + addi a0, zero, 0 + jalr zero, 0(ra) +.Lfunc_end1: + .size main, .Lfunc_end1-main diff --git a/programs/rust/basic_rust/Cargo.lock b/programs/rust/basic_rust/Cargo.lock new file mode 100644 index 000000000..475e2a1bf --- /dev/null +++ b/programs/rust/basic_rust/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "basic_rust" +version = "0.1.0" diff --git a/programs/rust/basic_rust/Cargo.toml b/programs/rust/basic_rust/Cargo.toml new file mode 100644 index 000000000..ad1faaaaf --- /dev/null +++ b/programs/rust/basic_rust/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "basic_rust" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/programs/rust/basic_rust/src/main.rs b/programs/rust/basic_rust/src/main.rs new file mode 100644 index 000000000..2b7adc2a9 --- /dev/null +++ b/programs/rust/basic_rust/src/main.rs @@ -0,0 +1,14 @@ +#![no_std] +#![no_main] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + loop {} +} + +#[unsafe(export_name = "main")] +pub fn main() -> i32 { + return 0; +} diff --git a/src/elf.rs b/src/elf.rs new file mode 100644 index 000000000..91c734afe --- /dev/null +++ b/src/elf.rs @@ -0,0 +1,114 @@ +use std::collections::BTreeMap; + +use elf::{ + ElfBytes, + abi::{EM_RISCV, ET_EXEC, PT_LOAD}, + endian::LittleEndian, + file::Class, +}; + +pub struct Elf { + pub entry_point: u32, + + pub image: BTreeMap, +} +pub(crate) const WORD_SIZE: u32 = 4; +pub const MAX_MEMORY_SIZE: u32 = u32::MAX; +pub const MAX_SEGMENTS: usize = 256; + +#[derive(Debug, thiserror::Error)] +pub enum ElfError { + #[error(transparent)] + Parse(#[from] elf::ParseError), + #[error("Not a 32-bit ELF")] + Not32Bit, + #[error("Not a RISC-V ELF")] + NotRiscV, + #[error("ELF is not executable")] + NotExecutable, + #[error("Entrypoint is invalid")] + InvalidEntryPoint, + #[error("ELF has no segments")] + NoSegments, + #[error("ELF has too many segments")] + TooManySegments, + #[error("Segment file size is too large")] + FileSizeTooLarge, + #[error("Segment memory size is too large")] + MemSizeTooLarge, + #[error("Segment virtual address is too large")] + VAddrTooLarge, + #[error("Segment virtual address is unaligned")] + UnalignedVAddr, + #[error("Segment offset is too large")] + OffsetTooLarge, + #[error("Segment address is too large")] + AddrTooLarge, + #[error("Segment offset is invalid")] + InvalidOffset, +} + +impl Elf { + pub fn load(input: &[u8]) -> Result { + let mut image: BTreeMap = BTreeMap::new(); + let elf = ElfBytes::::minimal_parse(input)?; + if elf.ehdr.class != Class::ELF32 { + return Err(ElfError::Not32Bit); + } + if elf.ehdr.e_machine != EM_RISCV { + return Err(ElfError::NotRiscV); + } + if elf.ehdr.e_type != ET_EXEC { + return Err(ElfError::NotExecutable); + } + let entry_point: u32 = elf + .ehdr + .e_entry + .try_into() + .map_err(|_| ElfError::InvalidEntryPoint)?; + if !entry_point.is_multiple_of(WORD_SIZE) { + return Err(ElfError::InvalidEntryPoint); + } + let segments = elf.segments().ok_or(ElfError::NoSegments)?; + if segments.len() > MAX_SEGMENTS { + return Err(ElfError::TooManySegments); + } + for segment in segments.iter().filter(|segment| segment.p_type == PT_LOAD) { + let file_size: u32 = segment + .p_filesz + .try_into() + .map_err(|_| ElfError::FileSizeTooLarge)?; + let mem_size: u32 = segment + .p_memsz + .try_into() + .map_err(|_| ElfError::MemSizeTooLarge)?; + let vaddr: u32 = segment + .p_vaddr + .try_into() + .map_err(|_| ElfError::VAddrTooLarge)?; + if !vaddr.is_multiple_of(WORD_SIZE) { + return Err(ElfError::UnalignedVAddr); + } + let offset: u32 = segment + .p_offset + .try_into() + .map_err(|_| ElfError::OffsetTooLarge)?; + for i in (0..mem_size).step_by(WORD_SIZE as usize) { + let addr = vaddr.checked_add(i).ok_or(ElfError::AddrTooLarge)?; + if i >= file_size { + image.insert(addr, 0); + } else { + let mut word = 0; + let len = (file_size - i).min(WORD_SIZE); + for j in 0..len { + let offset = (offset + i + j) as usize; + let byte = input.get(offset).ok_or(ElfError::InvalidOffset)?; + word |= (*byte as u32) << (j * 8); + } + image.insert(addr, word); + } + } + } + Ok(Self { entry_point, image }) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 000000000..cfb4f6880 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,2 @@ +pub mod elf; +pub mod vm; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 000000000..24e74d38e --- /dev/null +++ b/src/main.rs @@ -0,0 +1,12 @@ +use null_vm::{elf::Elf, vm::execution::run_program}; + +fn main() { + println!("Reading elf"); + let elf_data = std::fs::read("./program_artifacts/asm/basic_program.elf").unwrap(); + let program = Elf::load(&elf_data).unwrap(); + println!("Program entry: 0x{:08x}", program.entry_point); + program.image.iter().for_each(|(addr, word)| { + println!("0x{addr:08x}: 0x{word:08x}"); + }); + run_program(program.image, program.entry_point); +} diff --git a/src/vm/execution.rs b/src/vm/execution.rs new file mode 100644 index 000000000..aaa5d8f4c --- /dev/null +++ b/src/vm/execution.rs @@ -0,0 +1,139 @@ +use std::{ + collections::BTreeMap, + fmt::{Debug, Display}, +}; + +use crate::vm::instructions::{ArithOp, Comparison, Instruction, LoadStoreWidth}; + +pub fn run_program(instruction_map: BTreeMap, entrypoint: u32) -> (u32, u32) { + let mut memory = Memory::default(); + load_program(instruction_map, &mut memory); + run_from_entrypoint(&mut memory, entrypoint) +} + +fn load_program(instruction_map: BTreeMap, memory: &mut Memory) { + for (addr, instruction) in instruction_map { + memory.0.insert(addr, instruction); + } +} + +fn run_from_entrypoint(memory: &mut Memory, entrypoint: u32) -> (u32, u32) { + let mut pc = entrypoint; + let mut registers = Registers::default(); + while pc != registers.0[1] { + let next_instruction = memory.0[&pc]; + let instruction = Instruction::parse(next_instruction); + run_instruction(&instruction, &mut registers, &mut pc, memory); + } + println!("Final Register Values:\n {}", ®isters); + let return_values = (registers.0[10], registers.0[11]); + println!("Return Values: {return_values:?}"); + return_values +} + +// Toy Memory, TODO: Make expandable memory +#[derive(Default, Debug)] +struct Memory(BTreeMap); + +#[derive(Default, Debug)] +struct Registers([u32; 32]); +// Registers: +// 0x zero +// a0-ax function arguments: 0x10 -etc +// 0x1 return address (ra) +// +impl Display for Registers { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Registers:")?; + writeln!(f, "ReturnAddress(ra): {}", self.0[1])?; + writeln!(f, "StackPointer(sp): {}", self.0[2])?; + // Not used for now + // writeln!(f, "GlobalPointer(gp): {}", self.0[2])?; + // writeln!(f, "ThreadPointer(tp): {}", self.0[3])?; + let function_arguments = self.0[10..17] + .iter() + .enumerate() + .map(|(i, val)| match i { + i @ 0..=1 => format!("a{i} (return value {i}) : {val} "), + i => format!("a{i}: {val} "), + }) + .collect::>() + .concat(); + writeln!(f, "FunctionArguments: {function_arguments}")?; + // TODO: Add other registers as we use them + Ok(()) + } +} + +fn run_instruction( + inst: &Instruction, + registers: &mut Registers, + pc: &mut u32, + memory: &mut Memory, +) { + *pc += 4; + match inst { + Instruction::ArithImm { dst, src, imm, op } => { + let (a, b) = (registers.0[*src as usize], imm); + let res = match op { + ArithOp::Add => a + b, + _ => unimplemented!(), + }; + registers.0[*dst as usize] = res; + } + Instruction::JumpAndLinkRegister { dst, base, offset } => { + registers.0[*dst as usize] = *pc; + *pc = registers.0[*base as usize] + offset; + } + Instruction::JumpAndLink { dst, offset } => { + registers.0[*dst as usize] = *pc; + *pc += offset; + } + Instruction::Store { + src, + offset, + base, + width, + } => { + let value = registers.0[*src as usize]; + let value = match width { + LoadStoreWidth::Byte => todo!(), + LoadStoreWidth::Half => todo!(), + LoadStoreWidth::Word => value, + }; + memory.0.insert(*base + *offset, value); + } + Instruction::Load { + dst, + offset, + base, + width, + } => { + let value = memory.0[&(*base + *offset)]; + let value = match width { + LoadStoreWidth::Byte => todo!(), + LoadStoreWidth::Half => todo!(), + LoadStoreWidth::Word => value, + }; + registers.0[*dst as usize] = value; + } + Instruction::Branch { + src1, + src2, + cond, + offset, + } => { + let (a, b) = (registers.0[*src1 as usize], registers.0[*src2 as usize]); + let cmp_result = match cond { + Comparison::Equal => a == b, + Comparison::NotEqual => a != b, + Comparison::LessThan => a < b, + Comparison::GreaterOrEqual => a >= b, + }; + if cmp_result { + *pc += offset + } + } + _ => unimplemented!(), + } +} diff --git a/src/vm/instructions.rs b/src/vm/instructions.rs new file mode 100644 index 000000000..36e2c0fa7 --- /dev/null +++ b/src/vm/instructions.rs @@ -0,0 +1,365 @@ +// Opcodes +const ARITH_OPCODE: u32 = 0b0110011; +const ARITH_IMM_OPCODE: u32 = 0b0010011; +const LOAD_OPCODE: u32 = 0b0000011; +const STORE_OPCODE: u32 = 0b0100011; +const BRANCH_OPCODE: u32 = 0b1100011; +const JUMP_AND_LINK_REGISTER_OPCCODE: u32 = 0b1100111; +const JUMP_AND_LINK_OPCCODE: u32 = 0b1101111; + +// TODO: consider using num_enum dep to replace TyFrom/ using the constants here +enum Opcode { + Arith, + ArithImm, + Load, + Store, + Branch, + JumpAndLinkRegister, + JumpAndLink, +} + +impl TryFrom for Opcode { + type Error = (); + + fn try_from(value: u32) -> Result { + Ok(match value { + ARITH_OPCODE => Opcode::Arith, + ARITH_IMM_OPCODE => Opcode::ArithImm, + LOAD_OPCODE => Opcode::Load, + STORE_OPCODE => Opcode::Store, + BRANCH_OPCODE => Opcode::Branch, + JUMP_AND_LINK_REGISTER_OPCCODE => Opcode::JumpAndLinkRegister, + JUMP_AND_LINK_OPCCODE => Opcode::JumpAndLink, + _ => panic!("Unknown Opcode: {value}"), + }) + } +} + +enum InstructionFormat { + R, + I, + S, + B, + U, + J, +} + +impl Opcode { + fn instruction_format(&self) -> InstructionFormat { + match self { + &Opcode::Arith => InstructionFormat::R, + &Opcode::ArithImm | &Opcode::Load | &Opcode::JumpAndLinkRegister => { + InstructionFormat::I + } + &Opcode::Store => InstructionFormat::S, + &Opcode::Branch => InstructionFormat::B, + &Opcode::JumpAndLink => InstructionFormat::J, + _ => unimplemented!(), + } + } +} + +#[derive(Debug)] +pub enum ArithOp { + Add, + Sub, + Xor, + Or, + And, + ShiftLeftLogical, + ShiftRightLogical, + ShiftRightArith, + SetLessThan, + SetLessThanU, +} + +#[derive(Debug)] +pub enum LoadStoreWidth { + Byte, + Half, + Word, +} + +impl LoadStoreWidth { + fn from_func3(func3: u32) -> LoadStoreWidth { + const LOAD_STORE_BYTE_WITH: u32 = 0x0; + const LOAD_STORE_HALF_WITH: u32 = 0x1; + const LOAD_STORE_WORD_WITH: u32 = 0x2; + match func3 { + LOAD_STORE_BYTE_WITH => LoadStoreWidth::Byte, + LOAD_STORE_HALF_WITH => LoadStoreWidth::Half, + LOAD_STORE_WORD_WITH => LoadStoreWidth::Word, + _ => panic!("Invalid Width"), + } + } +} + +#[derive(Debug)] +pub enum Comparison { + Equal, + NotEqual, + LessThan, + GreaterOrEqual, +} + +#[derive(Debug)] +pub enum Instruction { + Arith { + dst: u32, + src1: u32, + src2: u32, + op: ArithOp, + }, + ArithImm { + dst: u32, + src: u32, + imm: u32, + op: ArithOp, + }, + JumpAndLink { + dst: u32, + offset: u32, + }, + JumpAndLinkRegister { + base: u32, + dst: u32, + offset: u32, + }, + Store { + src: u32, + offset: u32, + base: u32, + width: LoadStoreWidth, + }, + Load { + dst: u32, + offset: u32, + base: u32, + width: LoadStoreWidth, + }, + Branch { + src1: u32, + src2: u32, + cond: Comparison, + offset: u32, + }, +} + +const OPCODE_MASK: u32 = 0x0000007f; +const FUNC7_MASK: u32 = 0xfe000000; +const FUNC3_MASK: u32 = 0x00007000; +const RS1_MASK: u32 = 0x000f8000; +const RS2_MASK: u32 = 0x01f00000; +const RD_MASK: u32 = 0x00000f80; + +impl Instruction { + pub fn parse(instruction: u32) -> Instruction { + let opcode = parse_opcode(instruction); + match opcode.instruction_format() { + InstructionFormat::R => parse_r_instruction(instruction, opcode), + InstructionFormat::I => parse_i_instruction(instruction, opcode), + InstructionFormat::S => parse_s_instruction(instruction, opcode), + InstructionFormat::B => parse_b_instruction(instruction, opcode), + InstructionFormat::J => parse_j_instruction(instruction, opcode), + _ => unimplemented!(), + } + } +} + +fn parse_opcode(instruction: u32) -> Opcode { + let opcode = instruction & OPCODE_MASK; + Opcode::try_from(opcode).unwrap() +} + +// Function Identifiers (func7 & func3) +const ADD_FUNC_IDENTIFIERS: (u32, u32) = (0x0, 0x00); +const SUB_FUNC_IDENTIFIERS: (u32, u32) = (0x0, 0x20); +const XOR_FUNC_IDENTIFIERS: (u32, u32) = (0x4, 0x00); +const OR_FUNC_IDENTIFIERS: (u32, u32) = (0x6, 0x00); +const AND_FUNC_IDENTIFIERS: (u32, u32) = (0x7, 0x00); +const SHL_FUNC_IDENTIFIERS: (u32, u32) = (0x1, 0x00); +const SRL_FUNC_IDENTIFIERS: (u32, u32) = (0x5, 0x00); +const SRA_FUNC_IDENTIFIERS: (u32, u32) = (0x5, 0x20); +const SLT_FUNC_IDENTIFIERS: (u32, u32) = (0x2, 0x00); +const SLTU_FUNC_IDENTIFIERS: (u32, u32) = (0x3, 0x00); + +// R-Type Instruction Format +// |func7 | rs2 | rs1 |funct3| rd |opcode| +// |31..25|24..20|19..15|14..12|11..7| 6..0 | +fn parse_r_instruction(instruction: u32, opcode: Opcode) -> Instruction { + let func7 = (instruction & FUNC7_MASK) >> 25; + let func3 = (instruction & FUNC3_MASK) >> 12; + let rs2 = (instruction & RS2_MASK) >> 20; + let rs1 = (instruction & RS1_MASK) >> 15; + let rd = (instruction & RD_MASK) >> 7; + match opcode { + Opcode::Arith => { + let operation = match (func3, func7) { + ADD_FUNC_IDENTIFIERS => ArithOp::Add, + SUB_FUNC_IDENTIFIERS => ArithOp::Sub, + XOR_FUNC_IDENTIFIERS => ArithOp::Xor, + OR_FUNC_IDENTIFIERS => ArithOp::Or, + AND_FUNC_IDENTIFIERS => ArithOp::And, + SHL_FUNC_IDENTIFIERS => ArithOp::ShiftLeftLogical, + SRL_FUNC_IDENTIFIERS => ArithOp::ShiftRightLogical, + SRA_FUNC_IDENTIFIERS => ArithOp::ShiftRightArith, + SLT_FUNC_IDENTIFIERS => ArithOp::SetLessThan, + SLTU_FUNC_IDENTIFIERS => ArithOp::SetLessThanU, + _ => panic!("Unknown arith opcode identifier"), + }; + Instruction::Arith { + dst: rd, + src1: rs1, + src2: rs2, + op: operation, + } + } + _ => panic!("Invalid Instruction Encoding"), + } +} + +// Function Identifiers (func3) +const ADD_FUNC_IDENTIFIER: u32 = 0x0; +const XOR_FUNC_IDENTIFIER: u32 = 0x4; +const OR_FUNC_IDENTIFIER: u32 = 0x6; +const AND_FUNC_IDENTIFIER: u32 = 0x7; +const SHL_FUNC_IDENTIFIER: u32 = 0x1; +const SR_FUNC_IDENTIFIER: u32 = 0x5; +const SLT_FUNC_IDENTIFIER: u32 = 0x2; +const SLTU_FUNC_IDENTIFIER: u32 = 0x3; + +// I-Type Instruction Format +// | imm | rs1 |funct3| rd |opcode| +// |31..20|19..15|14..12|11..7| 6..0 | +fn parse_i_instruction(instruction: u32, opcode: Opcode) -> Instruction { + let func7 = (instruction & FUNC7_MASK) >> 25; + let func3 = (instruction & FUNC3_MASK) >> 12; + let rs2 = (instruction & RS2_MASK) >> 20; + let rs1 = (instruction & RS1_MASK) >> 15; + let mut imm = func7 | rs2; + let rd = (instruction & RD_MASK) >> 7; + match opcode { + Opcode::ArithImm => { + let operation = match func3 { + ADD_FUNC_IDENTIFIER => ArithOp::Add, + XOR_FUNC_IDENTIFIER => ArithOp::Xor, + OR_FUNC_IDENTIFIER => ArithOp::Or, + AND_FUNC_IDENTIFIER => ArithOp::And, + SHL_FUNC_IDENTIFIER => { + assert!(imm >> 5 == 0); + imm &= 0xF; + ArithOp::ShiftLeftLogical + } + SR_FUNC_IDENTIFIER => { + let func_id = imm >> 5; + imm &= 0xF; + match func_id { + 0x00 => ArithOp::ShiftRightLogical, + 0x20 => ArithOp::ShiftRightArith, + _ => unimplemented!(), + } + } + SLT_FUNC_IDENTIFIER => ArithOp::SetLessThan, + SLTU_FUNC_IDENTIFIER => ArithOp::SetLessThanU, + _ => panic!("Unknown arith opcode identifier"), + }; + Instruction::ArithImm { + dst: rd, + src: rs1, + imm, + op: operation, + } + } + Opcode::JumpAndLinkRegister => { + if func3 != 0x00 { + panic!("Invalid JALR Instruction") + }; + Instruction::JumpAndLinkRegister { + base: rs1, + dst: rd, + offset: imm, + } + } + Opcode::Load => Instruction::Load { + dst: rd, + offset: imm, + base: rs1, + width: LoadStoreWidth::from_func3(func3), + }, + _ => panic!("Invalid Instruction Encoding"), + } +} + +// S-Type Instruction Format +// imm[11:5] rs2 rs1 funct3 imm[4:0] opcode +// |imm[11:5]| rs2 | rs1 |funct3|imm[4:0]|opcode| +// | 31..25 |24..20|19..15|14..12| 11..7 | 6..0 | +fn parse_s_instruction(instruction: u32, opcode: Opcode) -> Instruction { + let func7 = (instruction & FUNC7_MASK) >> 25; + let func3 = (instruction & FUNC3_MASK) >> 12; + let rs2 = (instruction & RS2_MASK) >> 20; + let rs1 = (instruction & RS1_MASK) >> 15; + let rd = (instruction & RD_MASK) >> 7; + let imm = func7 | rd; + match opcode { + Opcode::Store => Instruction::Store { + src: rs2, + offset: imm, + base: rs1, + width: LoadStoreWidth::from_func3(func3), + }, + _ => panic!("Invalid Instruction Encoding"), + } +} + +// Function Identifiers (func3) +const BRANCH_EQ_IDENTIFIER: u32 = 0x0; +const BRANCH_NEQ_IDENTIFIER: u32 = 0x1; +const BRANCH_LT_IDENTIFIER: u32 = 0x4; +const BRANCH_GE_IDENTIFIER: u32 = 0x5; + +// B-Type Instruction Format +// |imm[12|10:5]| rs2 | rs1 |funct3|imm[4:1|11]|opcode| +// | 31..25 |24..20|19..15|14..12| 11..7 | 6..0 | +fn parse_b_instruction(instruction: u32, opcode: Opcode) -> Instruction { + let func3 = (instruction & FUNC3_MASK) >> 12; + let rs2 = (instruction & RS2_MASK) >> 20; + let rs1 = (instruction & RS1_MASK) >> 15; + let imm = + ((instruction >> 20) & 0x7e0) | ((instruction >> 7) & 0x1e) | ((instruction & 0x80) << 4); + match opcode { + Opcode::Branch => { + let comparison = match func3 { + BRANCH_EQ_IDENTIFIER => Comparison::Equal, + BRANCH_NEQ_IDENTIFIER => Comparison::NotEqual, + BRANCH_LT_IDENTIFIER => Comparison::LessThan, + BRANCH_GE_IDENTIFIER => Comparison::GreaterOrEqual, + // TODO: Missing bltu & bgeu + _ => unimplemented!(), + }; + Instruction::Branch { + src1: rs1, + src2: rs2, + cond: comparison, + offset: imm, + } + } + _ => panic!("Unknown Opcode"), + } +} + +// J-Type Instruction Format +// |imm[20|10:1|11|19:12] | rd |opcode| +// | 31..12 |11..7| 6..0 | +fn parse_j_instruction(instruction: u32, opcode: Opcode) -> Instruction { + let imm = + instruction & 0xff000 | ((instruction & 0x100000) >> 9) | ((instruction >> 20) & 0x7fe); + let rd = (instruction & RD_MASK) >> 7; + match opcode { + Opcode::JumpAndLink => Instruction::JumpAndLink { + dst: rd, + offset: imm, + }, + _ => unimplemented!(), + } +} diff --git a/src/vm/mod.rs b/src/vm/mod.rs new file mode 100644 index 000000000..10edcc241 --- /dev/null +++ b/src/vm/mod.rs @@ -0,0 +1,2 @@ +pub mod execution; +pub mod instructions; diff --git a/tests/asm.rs b/tests/asm.rs new file mode 100644 index 000000000..b98669a96 --- /dev/null +++ b/tests/asm.rs @@ -0,0 +1,15 @@ +use null_vm::{elf::Elf, vm::execution::run_program}; + +#[test] +fn test_basic_program() { + println!("Testing basic_program.elf"); + let elf_data = std::fs::read("./program_artifacts/asm/basic_program.elf").unwrap(); + let program = Elf::load(&elf_data).unwrap(); + println!("Program entry: 0x{:08x}", program.entry_point); + program.image.iter().for_each(|(addr, word)| { + println!("0x{:08x}: 0x{:08x}", addr, word); + }); + let results = run_program(program.image, program.entry_point); + + assert!(results.0 == 0); +} diff --git a/tests/rust.rs b/tests/rust.rs new file mode 100644 index 000000000..2f455a9bc --- /dev/null +++ b/tests/rust.rs @@ -0,0 +1,16 @@ +use null_vm::{elf::Elf, vm::execution::run_program}; + +#[test] +fn test_basic_rust() { + println!("Testing basic_rust.elf"); + let elf_data = std::fs::read("./program_artifacts/rust/basic_rust.elf").unwrap(); + let program = Elf::load(&elf_data).unwrap(); + println!("Program entry: 0x{:08x}", program.entry_point); + program.image.iter().for_each(|(addr, word)| { + println!("0x{:08x}: 0x{:08x}", addr, word); + }); + + let results = run_program(program.image, program.entry_point); + + assert!(results.0 == 0); +}