From 6c2cc0b66fca68ed962f99cb7fc8238d367883b7 Mon Sep 17 00:00:00 2001 From: Gianbelinche <39842759+gianbelinche@users.noreply.github.com> Date: Wed, 10 Dec 2025 15:32:57 -0300 Subject: [PATCH 01/10] Add elf parsing and testing --- .gitignore | 2 + Cargo.lock | 169 +++++++++++++++++++++++++++++++++++++++ Cargo.toml | 9 +++ Makefile | 23 ++++++ programs/basic_program.s | 8 ++ src/elf.rs | 114 ++++++++++++++++++++++++++ src/lib.rs | 1 + src/main.rs | 11 +++ tests/asm.rs | 14 ++++ 9 files changed, 351 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 Makefile create mode 100644 programs/basic_program.s create mode 100644 src/elf.rs create mode 100644 src/lib.rs create mode 100644 src/main.rs create mode 100644 tests/asm.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..361f8f81a --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/program_artifacts diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 000000000..ec257c825 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,169 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "elf" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4445909572dbd556c457c849c4ca58623d84b27c8fff1e74b0b4227d8b90d17b" + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", + "serde", +] + +[[package]] +name = "null-vm" +version = "0.1.0" +dependencies = [ + "elf", + "hashbrown", + "thiserror", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "zerocopy" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000..deea1b4bd --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "null-vm" +version = "0.1.0" +edition = "2024" + +[dependencies] +elf = "0.7.4" +hashbrown = { version = "0.14.5", features = ["serde", "inline-more"] } +thiserror = "1.0.68" diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..d39d6b09c --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ +PROGRAMS_DIR=./programs +ARTIFACTS_DIR=./program_artifacts + + +ASM_PROGRAMS = $(wildcard $(PROGRAMS_DIR)/*.s) +ARTIFACTS_ASM = $(patsubst $(PROGRAMS_DIR)/%.s, $(ARTIFACTS_DIR)/%.elf, $(ASM_PROGRAMS)) + +compile-programs-asm: clean $(ARTIFACTS_ASM) + +# Compile assembly .s -> .o +$(ARTIFACTS_DIR)/%.o: $(PROGRAMS_DIR)/%.s + clang --target=riscv32 -c $< -o $@ + +# Link assembly .o -> .elf +$(ARTIFACTS_DIR)/%.elf: $(ARTIFACTS_DIR)/%.o + riscv64-unknown-elf-ld -m elf32lriscv $< -o $@ -e main + +clean: + -rm -rf $(ARTIFACTS_DIR) + mkdir -p $(ARTIFACTS_DIR) + +test: compile-programs-asm + cargo test diff --git a/programs/basic_program.s b/programs/basic_program.s new file mode 100644 index 000000000..16a368373 --- /dev/null +++ b/programs/basic_program.s @@ -0,0 +1,8 @@ + .attribute 5, "rv32i2p1_m2p0_zmmul1p0" +.Lfunc_end0: + .globl main +main: + addi a0, zero, 0 + jalr zero, 0(ra) +.Lfunc_end1: + .size main, .Lfunc_end1-main diff --git a/src/elf.rs b/src/elf.rs new file mode 100644 index 000000000..91c734afe --- /dev/null +++ b/src/elf.rs @@ -0,0 +1,114 @@ +use std::collections::BTreeMap; + +use elf::{ + ElfBytes, + abi::{EM_RISCV, ET_EXEC, PT_LOAD}, + endian::LittleEndian, + file::Class, +}; + +pub struct Elf { + pub entry_point: u32, + + pub image: BTreeMap, +} +pub(crate) const WORD_SIZE: u32 = 4; +pub const MAX_MEMORY_SIZE: u32 = u32::MAX; +pub const MAX_SEGMENTS: usize = 256; + +#[derive(Debug, thiserror::Error)] +pub enum ElfError { + #[error(transparent)] + Parse(#[from] elf::ParseError), + #[error("Not a 32-bit ELF")] + Not32Bit, + #[error("Not a RISC-V ELF")] + NotRiscV, + #[error("ELF is not executable")] + NotExecutable, + #[error("Entrypoint is invalid")] + InvalidEntryPoint, + #[error("ELF has no segments")] + NoSegments, + #[error("ELF has too many segments")] + TooManySegments, + #[error("Segment file size is too large")] + FileSizeTooLarge, + #[error("Segment memory size is too large")] + MemSizeTooLarge, + #[error("Segment virtual address is too large")] + VAddrTooLarge, + #[error("Segment virtual address is unaligned")] + UnalignedVAddr, + #[error("Segment offset is too large")] + OffsetTooLarge, + #[error("Segment address is too large")] + AddrTooLarge, + #[error("Segment offset is invalid")] + InvalidOffset, +} + +impl Elf { + pub fn load(input: &[u8]) -> Result { + let mut image: BTreeMap = BTreeMap::new(); + let elf = ElfBytes::::minimal_parse(input)?; + if elf.ehdr.class != Class::ELF32 { + return Err(ElfError::Not32Bit); + } + if elf.ehdr.e_machine != EM_RISCV { + return Err(ElfError::NotRiscV); + } + if elf.ehdr.e_type != ET_EXEC { + return Err(ElfError::NotExecutable); + } + let entry_point: u32 = elf + .ehdr + .e_entry + .try_into() + .map_err(|_| ElfError::InvalidEntryPoint)?; + if !entry_point.is_multiple_of(WORD_SIZE) { + return Err(ElfError::InvalidEntryPoint); + } + let segments = elf.segments().ok_or(ElfError::NoSegments)?; + if segments.len() > MAX_SEGMENTS { + return Err(ElfError::TooManySegments); + } + for segment in segments.iter().filter(|segment| segment.p_type == PT_LOAD) { + let file_size: u32 = segment + .p_filesz + .try_into() + .map_err(|_| ElfError::FileSizeTooLarge)?; + let mem_size: u32 = segment + .p_memsz + .try_into() + .map_err(|_| ElfError::MemSizeTooLarge)?; + let vaddr: u32 = segment + .p_vaddr + .try_into() + .map_err(|_| ElfError::VAddrTooLarge)?; + if !vaddr.is_multiple_of(WORD_SIZE) { + return Err(ElfError::UnalignedVAddr); + } + let offset: u32 = segment + .p_offset + .try_into() + .map_err(|_| ElfError::OffsetTooLarge)?; + for i in (0..mem_size).step_by(WORD_SIZE as usize) { + let addr = vaddr.checked_add(i).ok_or(ElfError::AddrTooLarge)?; + if i >= file_size { + image.insert(addr, 0); + } else { + let mut word = 0; + let len = (file_size - i).min(WORD_SIZE); + for j in 0..len { + let offset = (offset + i + j) as usize; + let byte = input.get(offset).ok_or(ElfError::InvalidOffset)?; + word |= (*byte as u32) << (j * 8); + } + image.insert(addr, word); + } + } + } + Ok(Self { entry_point, image }) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 000000000..f6808c2aa --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod elf; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 000000000..5bf54c961 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,11 @@ +use null_vm::elf::Elf; + +fn main() { + println!("Reading elf"); + let elf_data = std::fs::read("./program_artifacts/basic_program.elf").unwrap(); + let program = Elf::load(&elf_data).unwrap(); + println!("Program entry: 0x{:08x}", program.entry_point); + program.image.iter().for_each(|(addr, word)| { + println!("0x{addr:08x}: 0x{word:08x}"); + }); +} diff --git a/tests/asm.rs b/tests/asm.rs new file mode 100644 index 000000000..566783081 --- /dev/null +++ b/tests/asm.rs @@ -0,0 +1,14 @@ +use null_vm::elf::Elf; + +#[test] +fn test_basic_program() { + println!("Testing basic_program.elf"); + let elf_data = std::fs::read("./program_artifacts/basic_program.elf").unwrap(); + let program = Elf::load(&elf_data).unwrap(); + println!("Program entry: 0x{:08x}", program.entry_point); + program.image.iter().for_each(|(addr, word)| { + println!("0x{:08x}: 0x{:08x}", addr, word); + }); + + // todo: execute and check result +} From dec414b25e67a269e218aadd12a7c67f439e8724 Mon Sep 17 00:00:00 2001 From: Gianbelinche <39842759+gianbelinche@users.noreply.github.com> Date: Wed, 10 Dec 2025 15:41:36 -0300 Subject: [PATCH 02/10] Rename programs dirs --- Makefile | 16 ++++++++-------- programs/{ => asm}/basic_program.s | 0 src/main.rs | 2 +- tests/asm.rs | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) rename programs/{ => asm}/basic_program.s (100%) diff --git a/Makefile b/Makefile index d39d6b09c..dd30e4d6f 100644 --- a/Makefile +++ b/Makefile @@ -1,23 +1,23 @@ -PROGRAMS_DIR=./programs -ARTIFACTS_DIR=./program_artifacts +ASM_PROGRAMS_DIR=./programs/asm +ASM_ARTIFACTS_DIR=./program_artifacts/asm -ASM_PROGRAMS = $(wildcard $(PROGRAMS_DIR)/*.s) -ARTIFACTS_ASM = $(patsubst $(PROGRAMS_DIR)/%.s, $(ARTIFACTS_DIR)/%.elf, $(ASM_PROGRAMS)) +ASM_PROGRAMS = $(wildcard $(ASM_PROGRAMS_DIR)/*.s) +ARTIFACTS_ASM = $(patsubst $(ASM_PROGRAMS_DIR)/%.s, $(ASM_ARTIFACTS_DIR)/%.elf, $(ASM_PROGRAMS)) compile-programs-asm: clean $(ARTIFACTS_ASM) # Compile assembly .s -> .o -$(ARTIFACTS_DIR)/%.o: $(PROGRAMS_DIR)/%.s +$(ASM_ARTIFACTS_DIR)/%.o: $(ASM_PROGRAMS_DIR)/%.s clang --target=riscv32 -c $< -o $@ # Link assembly .o -> .elf -$(ARTIFACTS_DIR)/%.elf: $(ARTIFACTS_DIR)/%.o +$(ASM_ARTIFACTS_DIR)/%.elf: $(ASM_ARTIFACTS_DIR)/%.o riscv64-unknown-elf-ld -m elf32lriscv $< -o $@ -e main clean: - -rm -rf $(ARTIFACTS_DIR) - mkdir -p $(ARTIFACTS_DIR) + -rm -rf $(ASM_ARTIFACTS_DIR) + mkdir -p $(ASM_ARTIFACTS_DIR) test: compile-programs-asm cargo test diff --git a/programs/basic_program.s b/programs/asm/basic_program.s similarity index 100% rename from programs/basic_program.s rename to programs/asm/basic_program.s diff --git a/src/main.rs b/src/main.rs index 5bf54c961..5ed68d292 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,7 @@ use null_vm::elf::Elf; fn main() { println!("Reading elf"); - let elf_data = std::fs::read("./program_artifacts/basic_program.elf").unwrap(); + let elf_data = std::fs::read("./program_artifacts/asm/basic_program.elf").unwrap(); let program = Elf::load(&elf_data).unwrap(); println!("Program entry: 0x{:08x}", program.entry_point); program.image.iter().for_each(|(addr, word)| { diff --git a/tests/asm.rs b/tests/asm.rs index 566783081..12198cbd8 100644 --- a/tests/asm.rs +++ b/tests/asm.rs @@ -3,7 +3,7 @@ use null_vm::elf::Elf; #[test] fn test_basic_program() { println!("Testing basic_program.elf"); - let elf_data = std::fs::read("./program_artifacts/basic_program.elf").unwrap(); + let elf_data = std::fs::read("./program_artifacts/asm/basic_program.elf").unwrap(); let program = Elf::load(&elf_data).unwrap(); println!("Program entry: 0x{:08x}", program.entry_point); program.image.iter().for_each(|(addr, word)| { From c2e98c2ac7168e5e8eab7d3d2a84454f31b59b79 Mon Sep 17 00:00:00 2001 From: Gianbelinche <39842759+gianbelinche@users.noreply.github.com> Date: Wed, 10 Dec 2025 16:26:50 -0300 Subject: [PATCH 03/10] Add rust tests --- Makefile | 31 +++++++++++++++++++++++++++- README.md | 23 +++++++++++++++++++++ programs/rust/basic_rust/Cargo.lock | 7 +++++++ programs/rust/basic_rust/Cargo.toml | 6 ++++++ programs/rust/basic_rust/src/main.rs | 14 +++++++++++++ tests/rust.rs | 14 +++++++++++++ 6 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 programs/rust/basic_rust/Cargo.lock create mode 100644 programs/rust/basic_rust/Cargo.toml create mode 100644 programs/rust/basic_rust/src/main.rs create mode 100644 tests/rust.rs diff --git a/Makefile b/Makefile index dd30e4d6f..2aa0d1343 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,23 @@ ASM_PROGRAMS_DIR=./programs/asm ASM_ARTIFACTS_DIR=./program_artifacts/asm +RUST_PROGRAMS_DIR=./programs/rust +RUST_ARTIFACTS_DIR=./program_artifacts/rust + ASM_PROGRAMS = $(wildcard $(ASM_PROGRAMS_DIR)/*.s) ARTIFACTS_ASM = $(patsubst $(ASM_PROGRAMS_DIR)/%.s, $(ASM_ARTIFACTS_DIR)/%.elf, $(ASM_PROGRAMS)) +RUST_PROGRAM_DIRS := $(dir $(wildcard $(RUST_PROGRAMS_DIR)/*/Cargo.toml)) +RUST_PROGRAMS := $(notdir $(basename $(RUST_PROGRAM_DIRS:%/=%))) +RUST_ARTIFACTS := $(addprefix $(RUST_ARTIFACTS_DIR)/, $(addsuffix .elf, $(RUST_PROGRAMS))) + compile-programs-asm: clean $(ARTIFACTS_ASM) +compile-programs-rust: clean $(RUST_ARTIFACTS) + +compile-programs: compile-programs-asm compile-programs-rust + # Compile assembly .s -> .o $(ASM_ARTIFACTS_DIR)/%.o: $(ASM_PROGRAMS_DIR)/%.s clang --target=riscv32 -c $< -o $@ @@ -15,9 +26,27 @@ $(ASM_ARTIFACTS_DIR)/%.o: $(ASM_PROGRAMS_DIR)/%.s $(ASM_ARTIFACTS_DIR)/%.elf: $(ASM_ARTIFACTS_DIR)/%.o riscv64-unknown-elf-ld -m elf32lriscv $< -o $@ -e main +# Compile rust +$(RUST_ARTIFACTS_DIR)/%.elf: $(RUST_PROGRAMS_DIR)/%/Cargo.toml + cd $(RUST_PROGRAMS_DIR)/$* && \ + cargo +nightly rustc \ + --target riscv32im-unknown-none-elf \ + -Z build-std=core,compiler_builtins \ + -- --emit asm -C debuginfo=0 -C link-arg=-e -C link-arg=main + cp $(RUST_PROGRAMS_DIR)/$*/target/riscv32im-unknown-none-elf/debug/$* $@ + rm -rf $(RUST_PROGRAMS_DIR)/$*/target + clean: -rm -rf $(ASM_ARTIFACTS_DIR) mkdir -p $(ASM_ARTIFACTS_DIR) + -rm -rf $(RUST_ARTIFACTS_DIR) + mkdir -p $(RUST_ARTIFACTS_DIR) -test: compile-programs-asm +test: compile-programs cargo test + +test-asm: compile-programs-asm + cargo test --test asm + +test-rust: compile-programs-rust + cargo test --test rust diff --git a/README.md b/README.md index 2bec3c697..9097b0491 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,26 @@ Verifiable VM made in collaboration with Lambdaclass and 3MI Non-null name TBD + +## Testing + +### ASM Tests + +In order to add a new asm test you should add the `.s` file under `programs/asm` +Then add the corresponding test under `tests/asm.rs` + +To run them you can use + +`make test` + +This will compile them and run the tests + +### Rust Tests + +In order to add a new rust test you should add the cargo project under `programs/rust` as a new directory. +The folder should have the same name as the `Cargo.toml` program name. +Then add the corresponding test under `tests/rust.rs` + +You can run it with + +`make test` diff --git a/programs/rust/basic_rust/Cargo.lock b/programs/rust/basic_rust/Cargo.lock new file mode 100644 index 000000000..475e2a1bf --- /dev/null +++ b/programs/rust/basic_rust/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "basic_rust" +version = "0.1.0" diff --git a/programs/rust/basic_rust/Cargo.toml b/programs/rust/basic_rust/Cargo.toml new file mode 100644 index 000000000..ad1faaaaf --- /dev/null +++ b/programs/rust/basic_rust/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "basic_rust" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/programs/rust/basic_rust/src/main.rs b/programs/rust/basic_rust/src/main.rs new file mode 100644 index 000000000..2b7adc2a9 --- /dev/null +++ b/programs/rust/basic_rust/src/main.rs @@ -0,0 +1,14 @@ +#![no_std] +#![no_main] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + loop {} +} + +#[unsafe(export_name = "main")] +pub fn main() -> i32 { + return 0; +} diff --git a/tests/rust.rs b/tests/rust.rs new file mode 100644 index 000000000..ee11d1c97 --- /dev/null +++ b/tests/rust.rs @@ -0,0 +1,14 @@ +use null_vm::elf::Elf; + +#[test] +fn test_basic_rust() { + println!("Testing basic_rust.elf"); + let elf_data = std::fs::read("./program_artifacts/rust/basic_rust.elf").unwrap(); + let program = Elf::load(&elf_data).unwrap(); + println!("Program entry: 0x{:08x}", program.entry_point); + program.image.iter().for_each(|(addr, word)| { + println!("0x{:08x}: 0x{:08x}", addr, word); + }); + + // todo: execute and check result +} From 90b884c7af986e68e7b7076cb4f12e0d2d3e4a69 Mon Sep 17 00:00:00 2001 From: fmoletta Date: Wed, 10 Dec 2025 16:43:16 -0300 Subject: [PATCH 04/10] Add basic instruction decoding and execution --- src/lib.rs | 1 + src/main.rs | 3 +- src/vm/execution.rs | 129 ++++++++++++++ src/vm/instructions.rs | 377 +++++++++++++++++++++++++++++++++++++++++ src/vm/mod.rs | 2 + tests/asm.rs | 3 +- 6 files changed, 513 insertions(+), 2 deletions(-) create mode 100644 src/vm/execution.rs create mode 100644 src/vm/instructions.rs create mode 100644 src/vm/mod.rs diff --git a/src/lib.rs b/src/lib.rs index f6808c2aa..cfb4f6880 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,2 @@ pub mod elf; +pub mod vm; diff --git a/src/main.rs b/src/main.rs index 5bf54c961..2848a836a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -use null_vm::elf::Elf; +use null_vm::{elf::Elf, vm::execution::run_program}; fn main() { println!("Reading elf"); @@ -8,4 +8,5 @@ fn main() { program.image.iter().for_each(|(addr, word)| { println!("0x{addr:08x}: 0x{word:08x}"); }); + run_program(program.image, program.entry_point); } diff --git a/src/vm/execution.rs b/src/vm/execution.rs new file mode 100644 index 000000000..2dc8b267e --- /dev/null +++ b/src/vm/execution.rs @@ -0,0 +1,129 @@ +use std::{collections::BTreeMap, fmt::Debug}; + +use crate::vm::instructions::{ArithOp, Comparison, Instruction, LoadStoreWidth}; + +pub fn run_program(instruction_map: BTreeMap, entrypoint: u32) { + let mut memory = Memory::default(); + load_program(instruction_map, &mut memory); + run_from_entrypoint(&mut memory, entrypoint); +} + +fn load_program(instruction_map: BTreeMap, memory: &mut Memory) { + for (addr, instruction) in instruction_map { + memory.0.insert(addr, instruction); + } +} + +fn run_from_entrypoint(memory: &mut Memory, entrypoint: u32) { + let mut pc = entrypoint; + dbg!(&pc, &memory); + let mut registers = Registers::default(); + while pc != registers.0[1] { + let next_instruction = memory.0[&pc]; + let instruction = Instruction::parse(next_instruction); + run_instruction(&instruction, &mut registers, &mut pc, memory); + } + dbg!(®isters); + let return_values = (registers.0[10], registers.0[11]); + println!("Return Values: {return_values:?}"); +} + +// Toy Memory, TODO: Make expandable memory +#[derive(Default, Debug)] +struct Memory(BTreeMap); + +#[derive(Default)] +struct Registers([u32; 32]); +// Registers: +// 0x zero +// a0-ax function arguments: 0x10 -etc +// 0x1 return address (ra) +// +impl Debug for Registers { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for (i, elem) in self.0.iter().enumerate() { + match i { + 1 => format!("ra: {elem}").fmt(f)?, + 2 => format!("sp: {elem}").fmt(f)?, + 3 => format!("gp: {elem}").fmt(f)?, + i @ 10..17 => format!("a{} : {}", i - 10, elem).fmt(f)?, + _ => {} + } + } + Ok(()) + } +} + +fn run_instruction( + inst: &Instruction, + registers: &mut Registers, + pc: &mut u32, + memory: &mut Memory, +) { + *pc += 4; + dbg!(inst); + match inst { + Instruction::ArithImm { dst, src, imm, op } => { + let (a, b) = (registers.0[*src as usize], imm); + let res = match op { + ArithOp::Add => a + b, + _ => unimplemented!(), + }; + registers.0[*dst as usize] = res; + } + Instruction::JumpAndLinkRegister { dst, base, offset } => { + registers.0[*dst as usize] = *pc; + *pc = registers.0[*base as usize] + offset; + } + Instruction::JumpAndLink { dst, offset } => { + registers.0[*dst as usize] = *pc; + *pc += offset; + } + Instruction::Store { + src, + offset, + base, + width, + } => { + let value = registers.0[*src as usize]; + let value = match width { + LoadStoreWidth::Byte => todo!(), + LoadStoreWidth::Half => todo!(), + LoadStoreWidth::Word => value, + }; + memory.0.insert(*base + *offset, value); + } + Instruction::Load { + dst, + offset, + base, + width, + } => { + let value = memory.0[&(*base + *offset)]; + let value = match width { + LoadStoreWidth::Byte => todo!(), + LoadStoreWidth::Half => todo!(), + LoadStoreWidth::Word => value, + }; + registers.0[*dst as usize] = value; + } + Instruction::Branch { + src1, + src2, + cond, + offset, + } => { + let (a, b) = (registers.0[*src1 as usize], registers.0[*src2 as usize]); + let cmp_result = match cond { + Comparison::Equal => a == b, + Comparison::NotEqual => a != b, + Comparison::LessThan => a < b, + Comparison::GreaterOrEqual => a >= b, + }; + if cmp_result { + *pc += offset + } + } + _ => unimplemented!(), + } +} diff --git a/src/vm/instructions.rs b/src/vm/instructions.rs new file mode 100644 index 000000000..e8c0b3b57 --- /dev/null +++ b/src/vm/instructions.rs @@ -0,0 +1,377 @@ +// Opcodes +const ARITH_OPCODE: u32 = 0b0110011; +const ARITH_IMM_OPCODE: u32 = 0b0010011; +const LOAD_OPCODE: u32 = 0b0000011; +const STORE_OPCODE: u32 = 0b0100011; +const BRANCH_OPCODE: u32 = 0b1100011; +const JUMP_AND_LINK_REGISTER_OPCCODE: u32 = 0b1100111; +const JUMP_AND_LINK_OPCCODE: u32 = 0b1101111; +const LOAD_UPPER_IMM_OPCODE: u32 = 0b0110111; +const ADD_UPPER_IMM_TO_PC_OPCODE: u32 = 0b0010111; + +// TODO: consider using num_enum dep to replace TyFrom/ using the constants here +enum Opcode { + Arith = 0b0110011, + ArithImm = 0b0010011, + Load = 0b0000011, + Store = 0b0100011, + Branch = 0b1100011, + JumpAndLinkRegister = 0b1100111, + JumpAndLink = 0b1101111, + LoadUpperImm = 0b0110111, + AddUpperImmToPc = 0b0010111, +} + +impl TryFrom for Opcode { + type Error = (); + + fn try_from(value: u32) -> Result { + Ok(match value { + ARITH_OPCODE => Opcode::Arith, + ARITH_IMM_OPCODE => Opcode::ArithImm, + LOAD_OPCODE => Opcode::Load, + JUMP_AND_LINK_REGISTER_OPCCODE => Opcode::JumpAndLinkRegister, + JUMP_AND_LINK_OPCCODE => Opcode::JumpAndLink, + STORE_OPCODE => Opcode::Store, + BRANCH_OPCODE => Opcode::Branch, + _ => panic!("Unknown Opcode: {value}"), + }) + } +} + +#[derive(Debug)] +enum InstructionFormat { + R, + I, + S, + B, + U, + J, +} + +impl Opcode { + fn instruction_format(&self) -> InstructionFormat { + match self { + &Opcode::Arith => InstructionFormat::R, + &Opcode::ArithImm | &Opcode::Load | &Opcode::JumpAndLinkRegister => { + InstructionFormat::I + } + &Opcode::Store => InstructionFormat::S, + &Opcode::Branch => InstructionFormat::B, + &Opcode::JumpAndLink => InstructionFormat::J, + _ => unimplemented!(), + } + } +} + +#[derive(Debug)] +pub enum ArithOp { + Add, + Sub, + Xor, + Or, + And, + ShiftLeftLogical, + ShiftRightLogical, + ShiftRightArith, + SetLessThan, + SetLessThanU, +} + +#[derive(Debug)] +pub enum LoadStoreWidth { + Byte, + Half, + Word, +} + +#[derive(Debug)] +pub enum Comparison { + Equal, + NotEqual, + LessThan, + GreaterOrEqual, +} + +#[derive(Debug)] +pub enum Instruction { + Arith { + dst: u32, + src1: u32, + src2: u32, + op: ArithOp, + }, + ArithImm { + dst: u32, + src: u32, + imm: u32, + op: ArithOp, + }, + JumpAndLink { + dst: u32, + offset: u32, + }, + JumpAndLinkRegister { + base: u32, + dst: u32, + offset: u32, + }, + Store { + src: u32, + offset: u32, + base: u32, + width: LoadStoreWidth, + }, + Load { + dst: u32, + offset: u32, + base: u32, + width: LoadStoreWidth, + }, + Branch { + src1: u32, + src2: u32, + cond: Comparison, + offset: u32, + }, +} + +const OPCODE_MASK: u32 = 0x0000007f; +const FUNC7_MASK: u32 = 0xfe000000; +const FUNC3_MASK: u32 = 0x00007000; +const RS1_MASK: u32 = 0x000f8000; +const RS2_MASK: u32 = 0x01f00000; +const RD_MASK: u32 = 0x00000f80; + +impl Instruction { + pub fn parse(instruction: u32) -> Instruction { + let opcode = parse_opcode(instruction); + match opcode.instruction_format() { + InstructionFormat::R => parse_r_instruction(instruction, opcode), + InstructionFormat::I => parse_i_instruction(instruction, opcode), + InstructionFormat::S => parse_s_instruction(instruction, opcode), + InstructionFormat::B => parse_b_instruction(instruction, opcode), + InstructionFormat::J => parse_j_instruction(instruction, opcode), + _ => unimplemented!(), + } + } +} + +fn parse_opcode(instruction: u32) -> Opcode { + let opcode = instruction & OPCODE_MASK; + Opcode::try_from(opcode).unwrap() +} + +// Function Identifiers (func7 & func3) +const ADD_FUNC_IDENTIFIERS: (u32, u32) = (0x0, 0x00); +const SUB_FUNC_IDENTIFIERS: (u32, u32) = (0x0, 0x20); +const XOR_FUNC_IDENTIFIERS: (u32, u32) = (0x4, 0x00); +const OR_FUNC_IDENTIFIERS: (u32, u32) = (0x6, 0x00); +const AND_FUNC_IDENTIFIERS: (u32, u32) = (0x7, 0x00); +const SHL_FUNC_IDENTIFIERS: (u32, u32) = (0x1, 0x00); +const SRL_FUNC_IDENTIFIERS: (u32, u32) = (0x5, 0x00); +const SRA_FUNC_IDENTIFIERS: (u32, u32) = (0x5, 0x20); +const SLT_FUNC_IDENTIFIERS: (u32, u32) = (0x2, 0x00); +const SLTU_FUNC_IDENTIFIERS: (u32, u32) = (0x3, 0x00); + +// R-Type Instruction Format +// |func7 | rs2 | rs1 |funct3| rd |opcode| +// |31..25|24..20|19..15|14..12|11..7| 6..0 | +fn parse_r_instruction(instruction: u32, opcode: Opcode) -> Instruction { + let func7 = (instruction & FUNC7_MASK) >> 25; + let func3 = (instruction & FUNC3_MASK) >> 12; + let rs2 = (instruction & RS2_MASK) >> 20; + let rs1 = (instruction & RS1_MASK) >> 15; + let rd = (instruction & RD_MASK) >> 7; + match opcode { + Opcode::Arith => { + let operation = match (func3, func7) { + ADD_FUNC_IDENTIFIERS => ArithOp::Add, + SUB_FUNC_IDENTIFIERS => ArithOp::Sub, + XOR_FUNC_IDENTIFIERS => ArithOp::Xor, + OR_FUNC_IDENTIFIERS => ArithOp::Or, + AND_FUNC_IDENTIFIERS => ArithOp::And, + SHL_FUNC_IDENTIFIERS => ArithOp::ShiftLeftLogical, + SRL_FUNC_IDENTIFIERS => ArithOp::ShiftRightLogical, + SRA_FUNC_IDENTIFIERS => ArithOp::ShiftRightArith, + SLT_FUNC_IDENTIFIERS => ArithOp::SetLessThan, + SLTU_FUNC_IDENTIFIERS => ArithOp::SetLessThanU, + _ => panic!("Unknown arith opcode identifier"), + }; + Instruction::Arith { + dst: rd, + src1: rs1, + src2: rs2, + op: operation, + } + } + _ => panic!("Invalid Instruction Encoding"), + } +} + +// Function Identifiers (func3) +const ADD_FUNC_IDENTIFIER: u32 = 0x0; +const XOR_FUNC_IDENTIFIER: u32 = 0x4; +const OR_FUNC_IDENTIFIER: u32 = 0x6; +const AND_FUNC_IDENTIFIER: u32 = 0x7; +const SHL_FUNC_IDENTIFIER: u32 = 0x1; +const SR_FUNC_IDENTIFIER: u32 = 0x5; +const SLT_FUNC_IDENTIFIER: u32 = 0x2; +const SLTU_FUNC_IDENTIFIER: u32 = 0x3; + +// I-Type Instruction Format +// | imm | rs1 |funct3| rd |opcode| +// |31..20|19..15|14..12|11..7| 6..0 | +fn parse_i_instruction(instruction: u32, opcode: Opcode) -> Instruction { + let func7 = (instruction & FUNC7_MASK) >> 25; + let func3 = (instruction & FUNC3_MASK) >> 12; + let rs2 = (instruction & RS2_MASK) >> 20; + let rs1 = (instruction & RS1_MASK) >> 15; + let mut imm = func7 | rs2; + let rd = (instruction & RD_MASK) >> 7; + match opcode { + Opcode::ArithImm => { + let operation = match func3 { + ADD_FUNC_IDENTIFIER => ArithOp::Add, + XOR_FUNC_IDENTIFIER => ArithOp::Xor, + OR_FUNC_IDENTIFIER => ArithOp::Or, + AND_FUNC_IDENTIFIER => ArithOp::And, + SHL_FUNC_IDENTIFIER => { + assert!(imm >> 5 == 0); + imm = imm & 0xF; + ArithOp::ShiftLeftLogical + } + SR_FUNC_IDENTIFIER => { + let func_id = imm >> 5; + imm = imm & 0xF; + match func_id { + 0x00 => ArithOp::ShiftRightLogical, + 0x20 => ArithOp::ShiftRightArith, + _ => unimplemented!(), + } + } + SLT_FUNC_IDENTIFIER => ArithOp::SetLessThan, + SLTU_FUNC_IDENTIFIER => ArithOp::SetLessThanU, + _ => panic!("Unknown arith opcode identifier"), + }; + Instruction::ArithImm { + dst: rd, + src: rs1, + imm, + op: operation, + } + } + Opcode::JumpAndLinkRegister => { + if func3 != 0x00 { + panic!("Invalid JALR Instruction") + }; + Instruction::JumpAndLinkRegister { + base: rs1, + dst: rd, + offset: imm, + } + } + Opcode::Load => { + let width = match func3 { + LOAD_STORE_BYTE_WITH => LoadStoreWidth::Byte, + LOAD_STORE_HALF_WITH => LoadStoreWidth::Half, + LOAD_STORE_WORD_WITH => LoadStoreWidth::Word, + _ => panic!("Invalid Width"), + }; + Instruction::Load { + dst: rd, + offset: imm, + base: rs1, + width, + } + } + _ => panic!("Invalid Instruction Encoding"), + } +} + +// Function Identifiers (func3) +const LOAD_STORE_BYTE_WITH: u32 = 0x0; +const LOAD_STORE_HALF_WITH: u32 = 0x1; +const LOAD_STORE_WORD_WITH: u32 = 0x2; + +// S-Type Instruction Format +// imm[11:5] rs2 rs1 funct3 imm[4:0] opcode +// |imm[11:5]| rs2 | rs1 |funct3|imm[4:0]|opcode| +// | 31..25 |24..20|19..15|14..12| 11..7 | 6..0 | +fn parse_s_instruction(instruction: u32, opcode: Opcode) -> Instruction { + let func7 = (instruction & FUNC7_MASK) >> 25; + let func3 = (instruction & FUNC3_MASK) >> 12; + let rs2 = (instruction & RS2_MASK) >> 20; + let rs1 = (instruction & RS1_MASK) >> 15; + let rd = (instruction & RD_MASK) >> 7; + let imm = func7 | rd; + match opcode { + Opcode::Store => { + let width = match func3 { + LOAD_STORE_BYTE_WITH => LoadStoreWidth::Byte, + LOAD_STORE_HALF_WITH => LoadStoreWidth::Half, + LOAD_STORE_WORD_WITH => LoadStoreWidth::Word, + _ => panic!("Invalid Width"), + }; + Instruction::Store { + src: rs2, + offset: imm, + base: rs1, + width, + } + } + _ => panic!("Invalid Instruction Encoding"), + } +} + +// Function Identifiers (func3) +const BRANCH_EQ_IDENTIFIER: u32 = 0x0; +const BRANCH_NEQ_IDENTIFIER: u32 = 0x1; +const BRANCH_LT_IDENTIFIER: u32 = 0x4; +const BRANCH_GE_IDENTIFIER: u32 = 0x5; + +// B-Type Instruction Format +// |imm[12|10:5]| rs2 | rs1 |funct3|imm[4:1|11]|opcode| +// | 31..25 |24..20|19..15|14..12| 11..7 | 6..0 | +fn parse_b_instruction(instruction: u32, opcode: Opcode) -> Instruction { + let func3 = (instruction & FUNC3_MASK) >> 12; + let rs2 = (instruction & RS2_MASK) >> 20; + let rs1 = (instruction & RS1_MASK) >> 15; + let imm = + ((instruction >> 20) & 0x7e0) | ((instruction >> 7) & 0x1e) | ((instruction & 0x80) << 4); + match opcode { + Opcode::Branch => { + let comparison = match func3 { + BRANCH_EQ_IDENTIFIER => Comparison::Equal, + BRANCH_NEQ_IDENTIFIER => Comparison::NotEqual, + BRANCH_LT_IDENTIFIER => Comparison::LessThan, + BRANCH_GE_IDENTIFIER => Comparison::GreaterOrEqual, + // TODO: Missing bltu & bgeu + _ => unimplemented!(), + }; + Instruction::Branch { + src1: rs1, + src2: rs2, + cond: comparison, + offset: imm, + } + } + _ => panic!("Unknown Opcode"), + } +} + +// J-Type Instruction Format +// |imm[20|10:1|11|19:12] | rd |opcode| +// | 31..12 |11..7| 6..0 | +fn parse_j_instruction(instruction: u32, opcode: Opcode) -> Instruction { + let imm = + instruction & 0xff000 | ((instruction & 0x100000) >> 9) | ((instruction >> 20) & 0x7fe); + let rd = (instruction & RD_MASK) >> 7; + match opcode { + Opcode::JumpAndLink => Instruction::JumpAndLink { + dst: rd, + offset: imm, + }, + _ => unimplemented!(), + } +} diff --git a/src/vm/mod.rs b/src/vm/mod.rs new file mode 100644 index 000000000..10edcc241 --- /dev/null +++ b/src/vm/mod.rs @@ -0,0 +1,2 @@ +pub mod execution; +pub mod instructions; diff --git a/tests/asm.rs b/tests/asm.rs index 566783081..9c03e2e0d 100644 --- a/tests/asm.rs +++ b/tests/asm.rs @@ -1,4 +1,4 @@ -use null_vm::elf::Elf; +use null_vm::{elf::Elf, vm::execution::run_program}; #[test] fn test_basic_program() { @@ -9,6 +9,7 @@ fn test_basic_program() { program.image.iter().for_each(|(addr, word)| { println!("0x{:08x}: 0x{:08x}", addr, word); }); + run_program(program.image, program.entry_point); // todo: execute and check result } From d59359cb865f8a95e1d2f6b17a77395a7146f146 Mon Sep 17 00:00:00 2001 From: Gianbelinche <39842759+gianbelinche@users.noreply.github.com> Date: Wed, 10 Dec 2025 17:07:46 -0300 Subject: [PATCH 05/10] Add return values --- src/vm/execution.rs | 7 ++++--- tests/asm.rs | 4 ++-- tests/rust.rs | 6 ++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/vm/execution.rs b/src/vm/execution.rs index 2dc8b267e..26bc5d386 100644 --- a/src/vm/execution.rs +++ b/src/vm/execution.rs @@ -2,10 +2,10 @@ use std::{collections::BTreeMap, fmt::Debug}; use crate::vm::instructions::{ArithOp, Comparison, Instruction, LoadStoreWidth}; -pub fn run_program(instruction_map: BTreeMap, entrypoint: u32) { +pub fn run_program(instruction_map: BTreeMap, entrypoint: u32) -> (u32, u32) { let mut memory = Memory::default(); load_program(instruction_map, &mut memory); - run_from_entrypoint(&mut memory, entrypoint); + run_from_entrypoint(&mut memory, entrypoint) } fn load_program(instruction_map: BTreeMap, memory: &mut Memory) { @@ -14,7 +14,7 @@ fn load_program(instruction_map: BTreeMap, memory: &mut Memory) { } } -fn run_from_entrypoint(memory: &mut Memory, entrypoint: u32) { +fn run_from_entrypoint(memory: &mut Memory, entrypoint: u32) -> (u32, u32) { let mut pc = entrypoint; dbg!(&pc, &memory); let mut registers = Registers::default(); @@ -26,6 +26,7 @@ fn run_from_entrypoint(memory: &mut Memory, entrypoint: u32) { dbg!(®isters); let return_values = (registers.0[10], registers.0[11]); println!("Return Values: {return_values:?}"); + return_values } // Toy Memory, TODO: Make expandable memory diff --git a/tests/asm.rs b/tests/asm.rs index 14c8f10e0..b98669a96 100644 --- a/tests/asm.rs +++ b/tests/asm.rs @@ -9,7 +9,7 @@ fn test_basic_program() { program.image.iter().for_each(|(addr, word)| { println!("0x{:08x}: 0x{:08x}", addr, word); }); - run_program(program.image, program.entry_point); + let results = run_program(program.image, program.entry_point); - // todo: execute and check result + assert!(results.0 == 0); } diff --git a/tests/rust.rs b/tests/rust.rs index ee11d1c97..2f455a9bc 100644 --- a/tests/rust.rs +++ b/tests/rust.rs @@ -1,4 +1,4 @@ -use null_vm::elf::Elf; +use null_vm::{elf::Elf, vm::execution::run_program}; #[test] fn test_basic_rust() { @@ -10,5 +10,7 @@ fn test_basic_rust() { println!("0x{:08x}: 0x{:08x}", addr, word); }); - // todo: execute and check result + let results = run_program(program.image, program.entry_point); + + assert!(results.0 == 0); } From cd2e8cb4fc78b2364f2f0f5676ec36165947d7c9 Mon Sep 17 00:00:00 2001 From: fmoletta Date: Wed, 10 Dec 2025 17:10:13 -0300 Subject: [PATCH 06/10] Improve register display --- src/vm/execution.rs | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/src/vm/execution.rs b/src/vm/execution.rs index 2dc8b267e..54a2f479d 100644 --- a/src/vm/execution.rs +++ b/src/vm/execution.rs @@ -1,4 +1,7 @@ -use std::{collections::BTreeMap, fmt::Debug}; +use std::{ + collections::BTreeMap, + fmt::{Debug, Display}, +}; use crate::vm::instructions::{ArithOp, Comparison, Instruction, LoadStoreWidth}; @@ -16,14 +19,13 @@ fn load_program(instruction_map: BTreeMap, memory: &mut Memory) { fn run_from_entrypoint(memory: &mut Memory, entrypoint: u32) { let mut pc = entrypoint; - dbg!(&pc, &memory); let mut registers = Registers::default(); while pc != registers.0[1] { let next_instruction = memory.0[&pc]; let instruction = Instruction::parse(next_instruction); run_instruction(&instruction, &mut registers, &mut pc, memory); } - dbg!(®isters); + println!("{}", ®isters); let return_values = (registers.0[10], registers.0[11]); println!("Return Values: {return_values:?}"); } @@ -32,24 +34,32 @@ fn run_from_entrypoint(memory: &mut Memory, entrypoint: u32) { #[derive(Default, Debug)] struct Memory(BTreeMap); -#[derive(Default)] +#[derive(Default, Debug)] struct Registers([u32; 32]); // Registers: // 0x zero // a0-ax function arguments: 0x10 -etc // 0x1 return address (ra) // -impl Debug for Registers { +impl Display for Registers { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for (i, elem) in self.0.iter().enumerate() { - match i { - 1 => format!("ra: {elem}").fmt(f)?, - 2 => format!("sp: {elem}").fmt(f)?, - 3 => format!("gp: {elem}").fmt(f)?, - i @ 10..17 => format!("a{} : {}", i - 10, elem).fmt(f)?, - _ => {} - } - } + writeln!(f, "Registers:")?; + writeln!(f, "ReturnAddress(ra): {}", self.0[1])?; + writeln!(f, "StackPointer(sp): {}", self.0[2])?; + // Not used for now + // writeln!(f, "GlobalPointer(gp): {}", self.0[2])?; + // writeln!(f, "ThreadPointer(tp): {}", self.0[3])?; + let function_arguments = self.0[10..17] + .iter() + .enumerate() + .map(|(i, val)| match i { + i @ 0..=1 => format!("a{i} (return value {i}) : {val} "), + i => format!("a{i}: {val} "), + }) + .collect::>() + .concat(); + writeln!(f, "FunctionArguments: {function_arguments}")?; + // TODO: Add other registers as we use them Ok(()) } } From dc9d146f442309769306ec874ffec14a768d4ffd Mon Sep 17 00:00:00 2001 From: fmoletta Date: Wed, 10 Dec 2025 17:12:59 -0300 Subject: [PATCH 07/10] fix some clippy lints --- src/vm/instructions.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/vm/instructions.rs b/src/vm/instructions.rs index e8c0b3b57..1ff50616b 100644 --- a/src/vm/instructions.rs +++ b/src/vm/instructions.rs @@ -39,7 +39,6 @@ impl TryFrom for Opcode { } } -#[derive(Debug)] enum InstructionFormat { R, I, @@ -238,12 +237,12 @@ fn parse_i_instruction(instruction: u32, opcode: Opcode) -> Instruction { AND_FUNC_IDENTIFIER => ArithOp::And, SHL_FUNC_IDENTIFIER => { assert!(imm >> 5 == 0); - imm = imm & 0xF; + imm &= 0xF; ArithOp::ShiftLeftLogical } SR_FUNC_IDENTIFIER => { let func_id = imm >> 5; - imm = imm & 0xF; + imm &= 0xF; match func_id { 0x00 => ArithOp::ShiftRightLogical, 0x20 => ArithOp::ShiftRightArith, From f992a3514a6da882182abc1b2cc94458e8a97227 Mon Sep 17 00:00:00 2001 From: fmoletta Date: Wed, 10 Dec 2025 17:13:45 -0300 Subject: [PATCH 08/10] remove debug print --- src/vm/execution.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/vm/execution.rs b/src/vm/execution.rs index b761afa28..aaa5d8f4c 100644 --- a/src/vm/execution.rs +++ b/src/vm/execution.rs @@ -25,7 +25,7 @@ fn run_from_entrypoint(memory: &mut Memory, entrypoint: u32) -> (u32, u32) { let instruction = Instruction::parse(next_instruction); run_instruction(&instruction, &mut registers, &mut pc, memory); } - println!("{}", ®isters); + println!("Final Register Values:\n {}", ®isters); let return_values = (registers.0[10], registers.0[11]); println!("Return Values: {return_values:?}"); return_values @@ -72,7 +72,6 @@ fn run_instruction( memory: &mut Memory, ) { *pc += 4; - dbg!(inst); match inst { Instruction::ArithImm { dst, src, imm, op } => { let (a, b) = (registers.0[*src as usize], imm); From 99c13b01e5ff2bf5ee345c46e49b8912846a1920 Mon Sep 17 00:00:00 2001 From: fmoletta Date: Wed, 10 Dec 2025 17:27:48 -0300 Subject: [PATCH 09/10] remove unused numeric from enum --- src/vm/instructions.rs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/vm/instructions.rs b/src/vm/instructions.rs index 1ff50616b..643a34a42 100644 --- a/src/vm/instructions.rs +++ b/src/vm/instructions.rs @@ -6,20 +6,16 @@ const STORE_OPCODE: u32 = 0b0100011; const BRANCH_OPCODE: u32 = 0b1100011; const JUMP_AND_LINK_REGISTER_OPCCODE: u32 = 0b1100111; const JUMP_AND_LINK_OPCCODE: u32 = 0b1101111; -const LOAD_UPPER_IMM_OPCODE: u32 = 0b0110111; -const ADD_UPPER_IMM_TO_PC_OPCODE: u32 = 0b0010111; // TODO: consider using num_enum dep to replace TyFrom/ using the constants here enum Opcode { - Arith = 0b0110011, - ArithImm = 0b0010011, - Load = 0b0000011, - Store = 0b0100011, - Branch = 0b1100011, - JumpAndLinkRegister = 0b1100111, - JumpAndLink = 0b1101111, - LoadUpperImm = 0b0110111, - AddUpperImmToPc = 0b0010111, + Arith, + ArithImm, + Load, + Store, + Branch, + JumpAndLinkRegister, + JumpAndLink, } impl TryFrom for Opcode { @@ -30,10 +26,10 @@ impl TryFrom for Opcode { ARITH_OPCODE => Opcode::Arith, ARITH_IMM_OPCODE => Opcode::ArithImm, LOAD_OPCODE => Opcode::Load, - JUMP_AND_LINK_REGISTER_OPCCODE => Opcode::JumpAndLinkRegister, - JUMP_AND_LINK_OPCCODE => Opcode::JumpAndLink, STORE_OPCODE => Opcode::Store, BRANCH_OPCODE => Opcode::Branch, + JUMP_AND_LINK_REGISTER_OPCCODE => Opcode::JumpAndLinkRegister, + JUMP_AND_LINK_OPCCODE => Opcode::JumpAndLink, _ => panic!("Unknown Opcode: {value}"), }) } From 8f34af55e606bb40082a0cd8cc8fcc187a9ce66b Mon Sep 17 00:00:00 2001 From: fmoletta Date: Wed, 10 Dec 2025 17:31:38 -0300 Subject: [PATCH 10/10] Avoid repeated code --- src/vm/instructions.rs | 59 +++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/src/vm/instructions.rs b/src/vm/instructions.rs index 643a34a42..36e2c0fa7 100644 --- a/src/vm/instructions.rs +++ b/src/vm/instructions.rs @@ -80,6 +80,20 @@ pub enum LoadStoreWidth { Word, } +impl LoadStoreWidth { + fn from_func3(func3: u32) -> LoadStoreWidth { + const LOAD_STORE_BYTE_WITH: u32 = 0x0; + const LOAD_STORE_HALF_WITH: u32 = 0x1; + const LOAD_STORE_WORD_WITH: u32 = 0x2; + match func3 { + LOAD_STORE_BYTE_WITH => LoadStoreWidth::Byte, + LOAD_STORE_HALF_WITH => LoadStoreWidth::Half, + LOAD_STORE_WORD_WITH => LoadStoreWidth::Word, + _ => panic!("Invalid Width"), + } + } +} + #[derive(Debug)] pub enum Comparison { Equal, @@ -266,29 +280,16 @@ fn parse_i_instruction(instruction: u32, opcode: Opcode) -> Instruction { offset: imm, } } - Opcode::Load => { - let width = match func3 { - LOAD_STORE_BYTE_WITH => LoadStoreWidth::Byte, - LOAD_STORE_HALF_WITH => LoadStoreWidth::Half, - LOAD_STORE_WORD_WITH => LoadStoreWidth::Word, - _ => panic!("Invalid Width"), - }; - Instruction::Load { - dst: rd, - offset: imm, - base: rs1, - width, - } - } + Opcode::Load => Instruction::Load { + dst: rd, + offset: imm, + base: rs1, + width: LoadStoreWidth::from_func3(func3), + }, _ => panic!("Invalid Instruction Encoding"), } } -// Function Identifiers (func3) -const LOAD_STORE_BYTE_WITH: u32 = 0x0; -const LOAD_STORE_HALF_WITH: u32 = 0x1; -const LOAD_STORE_WORD_WITH: u32 = 0x2; - // S-Type Instruction Format // imm[11:5] rs2 rs1 funct3 imm[4:0] opcode // |imm[11:5]| rs2 | rs1 |funct3|imm[4:0]|opcode| @@ -301,20 +302,12 @@ fn parse_s_instruction(instruction: u32, opcode: Opcode) -> Instruction { let rd = (instruction & RD_MASK) >> 7; let imm = func7 | rd; match opcode { - Opcode::Store => { - let width = match func3 { - LOAD_STORE_BYTE_WITH => LoadStoreWidth::Byte, - LOAD_STORE_HALF_WITH => LoadStoreWidth::Half, - LOAD_STORE_WORD_WITH => LoadStoreWidth::Word, - _ => panic!("Invalid Width"), - }; - Instruction::Store { - src: rs2, - offset: imm, - base: rs1, - width, - } - } + Opcode::Store => Instruction::Store { + src: rs2, + offset: imm, + base: rs1, + width: LoadStoreWidth::from_func3(func3), + }, _ => panic!("Invalid Instruction Encoding"), } }