From 429446f0cbf0d9a020fc87dc7d204ff9e0d18589 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Mon, 8 Dec 2025 10:38:23 -0800 Subject: [PATCH 1/2] Implement debug register abstraction Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/hypervisor/regs/debug_regs.rs | 269 ++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 src/hyperlight_host/src/hypervisor/regs/debug_regs.rs diff --git a/src/hyperlight_host/src/hypervisor/regs/debug_regs.rs b/src/hyperlight_host/src/hypervisor/regs/debug_regs.rs new file mode 100644 index 000000000..5b2127883 --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/regs/debug_regs.rs @@ -0,0 +1,269 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#[cfg(kvm)] +use kvm_bindings::kvm_debugregs; +#[cfg(mshv3)] +use mshv_bindings::DebugRegisters; + +#[derive(Debug, Default, Copy, Clone, PartialEq)] +pub(crate) struct CommonDebugRegs { + pub dr0: u64, + pub dr1: u64, + pub dr2: u64, + pub dr3: u64, + pub dr6: u64, + pub dr7: u64, +} + +#[cfg(kvm)] +impl From for CommonDebugRegs { + fn from(kvm_regs: kvm_debugregs) -> Self { + Self { + dr0: kvm_regs.db[0], + dr1: kvm_regs.db[1], + dr2: kvm_regs.db[2], + dr3: kvm_regs.db[3], + dr6: kvm_regs.dr6, + dr7: kvm_regs.dr7, + } + } +} +#[cfg(kvm)] +impl From<&CommonDebugRegs> for kvm_debugregs { + fn from(common_regs: &CommonDebugRegs) -> Self { + kvm_debugregs { + db: [ + common_regs.dr0, + common_regs.dr1, + common_regs.dr2, + common_regs.dr3, + ], + dr6: common_regs.dr6, + dr7: common_regs.dr7, + ..Default::default() + } + } +} +#[cfg(mshv3)] +impl From for CommonDebugRegs { + fn from(mshv_regs: DebugRegisters) -> Self { + Self { + dr0: mshv_regs.dr0, + dr1: mshv_regs.dr1, + dr2: mshv_regs.dr2, + dr3: mshv_regs.dr3, + dr6: mshv_regs.dr6, + dr7: mshv_regs.dr7, + } + } +} +#[cfg(mshv3)] +impl From<&CommonDebugRegs> for DebugRegisters { + fn from(common_regs: &CommonDebugRegs) -> Self { + DebugRegisters { + dr0: common_regs.dr0, + dr1: common_regs.dr1, + dr2: common_regs.dr2, + dr3: common_regs.dr3, + dr6: common_regs.dr6, + dr7: common_regs.dr7, + } + } +} + +#[cfg(target_os = "windows")] +use windows::Win32::System::Hypervisor::*; + +#[cfg(target_os = "windows")] +impl From<&CommonDebugRegs> + for [(WHV_REGISTER_NAME, Align16); WHP_DEBUG_REGS_NAMES_LEN] +{ + fn from(regs: &CommonDebugRegs) -> Self { + [ + ( + WHvX64RegisterDr0, + Align16(WHV_REGISTER_VALUE { Reg64: regs.dr0 }), + ), + ( + WHvX64RegisterDr1, + Align16(WHV_REGISTER_VALUE { Reg64: regs.dr1 }), + ), + ( + WHvX64RegisterDr2, + Align16(WHV_REGISTER_VALUE { Reg64: regs.dr2 }), + ), + ( + WHvX64RegisterDr3, + Align16(WHV_REGISTER_VALUE { Reg64: regs.dr3 }), + ), + ( + WHvX64RegisterDr6, + Align16(WHV_REGISTER_VALUE { Reg64: regs.dr6 }), + ), + ( + WHvX64RegisterDr7, + Align16(WHV_REGISTER_VALUE { Reg64: regs.dr7 }), + ), + ] + } +} + +#[cfg(target_os = "windows")] +use std::collections::HashSet; + +#[cfg(target_os = "windows")] +use super::{Align16, FromWhpRegisterError}; + +#[cfg(target_os = "windows")] +pub(crate) const WHP_DEBUG_REGS_NAMES_LEN: usize = 6; +#[cfg(target_os = "windows")] +pub(crate) const WHP_DEBUG_REGS_NAMES: [WHV_REGISTER_NAME; WHP_DEBUG_REGS_NAMES_LEN] = [ + WHvX64RegisterDr0, + WHvX64RegisterDr1, + WHvX64RegisterDr2, + WHvX64RegisterDr3, + WHvX64RegisterDr6, + WHvX64RegisterDr7, +]; + +#[cfg(target_os = "windows")] +impl TryFrom<&[(WHV_REGISTER_NAME, Align16)]> for CommonDebugRegs { + type Error = FromWhpRegisterError; + + #[expect( + non_upper_case_globals, + reason = "Windows API has lowercase register names" + )] + fn try_from( + regs: &[(WHV_REGISTER_NAME, Align16)], + ) -> Result { + if regs.len() != WHP_DEBUG_REGS_NAMES_LEN { + return Err(FromWhpRegisterError::InvalidLength(regs.len())); + } + let mut registers = CommonDebugRegs::default(); + let mut seen_registers = HashSet::new(); + + for &(name, value) in regs { + let name_id = name.0; + + // Check for duplicates + if !seen_registers.insert(name_id) { + return Err(FromWhpRegisterError::DuplicateRegister(name_id)); + } + + unsafe { + match name { + WHvX64RegisterDr0 => registers.dr0 = value.0.Reg64, + WHvX64RegisterDr1 => registers.dr1 = value.0.Reg64, + WHvX64RegisterDr2 => registers.dr2 = value.0.Reg64, + WHvX64RegisterDr3 => registers.dr3 = value.0.Reg64, + WHvX64RegisterDr6 => registers.dr6 = value.0.Reg64, + WHvX64RegisterDr7 => registers.dr7 = value.0.Reg64, + _ => { + // Given unexpected register + return Err(FromWhpRegisterError::InvalidRegister(name_id)); + } + } + } + } + + // Set of all expected register names + let expected_registers: HashSet = WHP_DEBUG_REGS_NAMES + .map(|name| name.0) + .into_iter() + .collect(); + + // Technically it should not be possible to have any missing registers at this point + // since we are guaranteed to have WHP_DEBUG_REGS_NAMES_LEN (6) non-duplicate registers that have passed the match-arm above, but leaving this here for safety anyway + let missing: HashSet<_> = expected_registers + .difference(&seen_registers) + .cloned() + .collect(); + + if !missing.is_empty() { + return Err(FromWhpRegisterError::MissingRegister(missing)); + } + + Ok(registers) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn common_debug_regs() -> CommonDebugRegs { + CommonDebugRegs { + dr0: 1, + dr1: 2, + dr2: 3, + dr3: 4, + dr6: 5, + dr7: 6, + } + } + + #[cfg(kvm)] + #[test] + fn round_trip_kvm_debug_regs() { + let original = common_debug_regs(); + let kvm_regs: kvm_debugregs = (&original).into(); + let converted: CommonDebugRegs = kvm_regs.into(); + assert_eq!(original, converted); + } + + #[cfg(mshv3)] + #[test] + fn round_trip_mshv_debug_regs() { + let original = common_debug_regs(); + let mshv_regs: DebugRegisters = (&original).into(); + let converted: CommonDebugRegs = mshv_regs.into(); + assert_eq!(original, converted); + } + + #[cfg(target_os = "windows")] + #[test] + fn round_trip_whp_debug_regs() { + let original = common_debug_regs(); + let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_DEBUG_REGS_NAMES_LEN] = + (&original).into(); + let converted: CommonDebugRegs = whp_regs.as_ref().try_into().unwrap(); + assert_eq!(original, converted); + + // test for duplicate register error handling + let original = common_debug_regs(); + let mut whp_regs: [(WHV_REGISTER_NAME, Align16); + WHP_DEBUG_REGS_NAMES_LEN] = (&original).into(); + whp_regs[0].0 = WHvX64RegisterDr1; + let err = CommonDebugRegs::try_from(whp_regs.as_ref()).unwrap_err(); + assert_eq!( + err, + FromWhpRegisterError::DuplicateRegister(WHvX64RegisterDr1.0) + ); + + // test for passing non-standard register (e.g. CR8) + let original = common_debug_regs(); + let mut whp_regs: [(WHV_REGISTER_NAME, Align16); + WHP_DEBUG_REGS_NAMES_LEN] = (&original).into(); + whp_regs[0].0 = WHvX64RegisterCr8; + let err = CommonDebugRegs::try_from(whp_regs.as_ref()).unwrap_err(); + assert_eq!( + err, + FromWhpRegisterError::InvalidRegister(WHvX64RegisterCr8.0) + ); + } +} From e793129f4bce844a9ce20ab6a7bff27c1a397ce5 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Mon, 8 Dec 2025 10:42:33 -0800 Subject: [PATCH 2/2] Reset more vcpu state after snapshot::restore Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- Cargo.lock | 6 +- src/hyperlight_host/Cargo.toml | 4 +- src/hyperlight_host/src/error.rs | 12 +- .../src/hypervisor/hyperlight_vm.rs | 231 +++++++++++++- .../src/hypervisor/hyperv_linux.rs | 57 +++- .../src/hypervisor/hyperv_windows.rs | 289 +++++++++++------- src/hyperlight_host/src/hypervisor/kvm.rs | 101 +++++- src/hyperlight_host/src/hypervisor/mod.rs | 21 +- src/hyperlight_host/src/hypervisor/regs.rs | 2 + src/hyperlight_host/src/mem/layout.rs | 6 +- src/hyperlight_host/src/sandbox/config.rs | 23 ++ .../src/sandbox/initialized_multi_use.rs | 79 +++++ src/tests/rust_guests/simpleguest/src/main.rs | 34 +++ 13 files changed, 738 insertions(+), 127 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 22067ba0a..ba06eb5da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1821,8 +1821,7 @@ dependencies = [ [[package]] name = "kvm-bindings" version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b3c06ff73c7ce03e780887ec2389d62d2a2a9ddf471ab05c2ff69207cd3f3b4" +source = "git+https://github.com/rust-vmm/kvm?rev=25f630aa384dad1306b288ec5eed3312f84e3393#25f630aa384dad1306b288ec5eed3312f84e3393" dependencies = [ "vmm-sys-util", ] @@ -1830,8 +1829,7 @@ dependencies = [ [[package]] name = "kvm-ioctls" version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "333f77a20344a448f3f70664918135fddeb804e938f28a99d685bd92926e0b19" +source = "git+https://github.com/rust-vmm/kvm?rev=25f630aa384dad1306b288ec5eed3312f84e3393#25f630aa384dad1306b288ec5eed3312f84e3393" dependencies = [ "bitflags 2.10.0", "kvm-bindings", diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 8cd0ef944..084af20f0 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -75,8 +75,8 @@ windows-version = "0.1" lazy_static = "1.4.0" [target.'cfg(unix)'.dependencies] -kvm-bindings = { version = "0.14", features = ["fam-wrappers"], optional = true } -kvm-ioctls = { version = "0.24", optional = true } +kvm-bindings = { git = "https://github.com/rust-vmm/kvm", rev = "25f630aa384dad1306b288ec5eed3312f84e3393", features = ["fam-wrappers"], optional = true } +kvm-ioctls = { git = "https://github.com/rust-vmm/kvm", rev = "25f630aa384dad1306b288ec5eed3312f84e3393", optional = true } mshv-bindings = { version = "0.6", optional = true } mshv-ioctls = { version = "0.6", optional = true} diff --git a/src/hyperlight_host/src/error.rs b/src/hyperlight_host/src/error.rs index dea5ca587..deb785cb8 100644 --- a/src/hyperlight_host/src/error.rs +++ b/src/hyperlight_host/src/error.rs @@ -144,6 +144,14 @@ pub enum HyperlightError { #[error("Memory Allocation Failed with OS Error {0:?}.")] MemoryAllocationFailed(Option), + /// MSR Read Violation - Guest attempted to read from a Model-Specific Register + #[error("Guest attempted to read from MSR {0:#x}")] + MsrReadViolation(u32), + + /// MSR Write Violation - Guest attempted to write to a Model-Specific Register + #[error("Guest attempted to write {1:#x} to MSR {0:#x}")] + MsrWriteViolation(u32, u64), + /// Memory Protection Failed #[error("Memory Protection Failed with OS Error {0:?}.")] MemoryProtectionFailed(Option), @@ -322,7 +330,9 @@ impl HyperlightError { | HyperlightError::PoisonedSandbox | HyperlightError::ExecutionAccessViolation(_) | HyperlightError::StackOverflow() - | HyperlightError::MemoryAccessViolation(_, _, _) => true, + | HyperlightError::MemoryAccessViolation(_, _, _) + | HyperlightError::MsrReadViolation(_) + | HyperlightError::MsrWriteViolation(_, _) => true, // All other errors do not poison the sandbox. HyperlightError::AnyhowError(_) diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs index e03df5fe5..ba27a8447 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs @@ -46,7 +46,7 @@ use crate::hypervisor::hyperv_linux::MshvVm; use crate::hypervisor::hyperv_windows::WhpVm; #[cfg(kvm)] use crate::hypervisor::kvm::KvmVm; -use crate::hypervisor::regs::CommonSpecialRegisters; +use crate::hypervisor::regs::{CommonDebugRegs, CommonSpecialRegisters}; #[cfg(target_os = "windows")] use crate::hypervisor::wrappers::HandleWrapper; use crate::hypervisor::{HyperlightExit, InterruptHandle, InterruptHandleImpl, get_max_log_level}; @@ -86,6 +86,9 @@ pub(crate) struct HyperlightVm { next_slot: u32, // Monotonically increasing slot number freed_slots: Vec, // Reusable slots from unmapped regions + // pml4 saved to be able to restore it if needed + #[cfg(feature = "init-paging")] + pml4_addr: u64, #[cfg(gdb)] gdb_conn: Option>, #[cfg(gdb)] @@ -94,6 +97,11 @@ pub(crate) struct HyperlightVm { trace_info: MemTraceInfo, #[cfg(crashdump)] rt_cfg: SandboxRuntimeConfig, + // Windows does not support just "zeroing" out the xsave area - we need to preserve the XCOMP_BV. + // Since it varies by hardware, we can't just hardcode a value, so we save it here. + // Also note that windows uses compacted format for xsave. + #[cfg(target_os = "windows")] + xcomp_bv: u64, } impl HyperlightVm { @@ -128,6 +136,11 @@ impl HyperlightVm { None => return Err(NoHypervisorFound()), }; + // Enable MSR intercepts unless explicitly allowed + if !config.get_allow_msr() { + vm.enable_msr_intercept()?; + } + for (i, region) in mem_regions.iter().enumerate() { // Safety: slots are unique and region points to valid memory since we created the regions unsafe { vm.map_memory((i as u32, region))? }; @@ -172,6 +185,18 @@ impl HyperlightVm { dropped: AtomicBool::new(false), }); + // get xsave header for debugging + #[cfg(target_os = "windows")] + { + let xsave = vm.xsave()?; + + let xcomp_bv = if xsave.len() >= 528 { + u64::from_le_bytes(xsave[520..528].try_into().unwrap()) + } else { + return Err(new_error!("XSAVE buffer too small to contain XCOMP_BV")); + }; + } + #[cfg_attr(not(gdb), allow(unused_mut))] let mut ret = Self { vm, @@ -185,6 +210,8 @@ impl HyperlightVm { mmap_regions: Vec::new(), freed_slots: Vec::new(), + #[cfg(feature = "init-paging")] + pml4_addr: _pml4_addr, #[cfg(gdb)] gdb_conn, #[cfg(gdb)] @@ -193,6 +220,8 @@ impl HyperlightVm { trace_info, #[cfg(crashdump)] rt_cfg, + #[cfg(target_os = "windows")] + xcomp_bv, }; // Send the interrupt handle to the GDB thread if debugging is enabled @@ -487,6 +516,12 @@ impl HyperlightVm { } } } + Ok(HyperlightExit::MsrRead(msr_index)) => { + break Err(HyperlightError::MsrReadViolation(msr_index)); + } + Ok(HyperlightExit::MsrWrite { msr_index, value }) => { + break Err(HyperlightError::MsrWriteViolation(msr_index, value)); + } Ok(HyperlightExit::Cancelled()) => { // If cancellation was not requested for this specific guest function call, // the vcpu was interrupted by a stale cancellation. This can occur when: @@ -581,6 +616,48 @@ impl HyperlightVm { Ok(()) } + // Resets the following vCPU state: + // - General purpose registers + // - FPU registers + // - Debug registers + // - Special registers + // - XSAVE area + pub(crate) fn reset_vcpu(&self) -> Result<()> { + self.vm.set_regs(&CommonRegisters::default())?; + self.vm.set_fpu(&CommonFpu::default())?; + self.vm.set_debug_regs(&CommonDebugRegs::default())?; + + #[cfg(feature = "init-paging")] + self.vm + .set_sregs(&CommonSpecialRegisters::standard_64bit_defaults( + self.pml4_addr, + ))?; + #[cfg(not(feature = "init-paging"))] + self.vm + .set_sregs(&CommonSpecialRegisters::standard_real_mode_defaults())?; + + // Windows does not support just "zeroing" out the xsave area - we need to preserve the XCOMP_BV. + #[cfg(target_os = "windows")] + { + // The XSAVE header starts at offset 512. + // Bytes 7:0 of the header is XSTATE_BV. + // Bytes 15:8 of the header is XCOMP_BV. + // So XCOMP_BV starts at offset 512 + 8 = 520. + // We are using a u32 array, so the index is 520 / 4 = 130. + const XCOMP_BV_OFFSET_U32: usize = 520 / std::mem::size_of::(); + + let mut xsave = [0u32; 1024]; + xsave[XCOMP_BV_OFFSET_U32] = self.xcomp_bv as u32; + xsave[XCOMP_BV_OFFSET_U32 + 1] = (self.xcomp_bv >> 32) as u32; + self.vm.set_xsave(&xsave)?; + } + #[cfg(not(target_os = "windows"))] + self.vm.set_xsave(&[0; 1024])?; + + // MSRs don't need to be reset as they cannot be modified by guest (unless unsafely-allowed) + Ok(()) + } + // Handle a debug exit #[cfg(gdb)] fn handle_debug( @@ -818,6 +895,158 @@ fn get_memory_access_violation<'a>( None } +#[cfg(test)] +mod tests { + use super::*; + use crate::is_hypervisor_present; + use crate::mem::layout::SandboxMemoryLayout; + use crate::mem::mgr::SandboxMemoryManager; + use crate::mem::ptr::RawPtr; + use crate::mem::ptr_offset::Offset; + use crate::mem::shared_mem::{ExclusiveSharedMemory, SharedMemory}; + use crate::sandbox::SandboxConfiguration; + use crate::sandbox::host_funcs::FunctionRegistry; + #[cfg(feature = "mem_profile")] + use crate::sandbox::trace::DummyUnwindInfo; + #[cfg(feature = "mem_profile")] + use crate::sandbox::trace::MemTraceInfo; + #[cfg(crashdump)] + use crate::sandbox::uninitialized::SandboxRuntimeConfig; + use std::sync::{Arc, Mutex}; + + #[test] + fn test_reset_vcpu() -> Result<()> { + if !is_hypervisor_present() { + return Ok(()); + } + + #[cfg(target_os = "windows")] + return Ok(()); + + #[cfg(not(target_os = "windows"))] + { + let mut code = Vec::new(); + // mov rax, 0x1111111111111111 + code.extend_from_slice(&[0x48, 0xb8, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11]); + // mov rbx, 0x2222222222222222 + code.extend_from_slice(&[0x48, 0xbb, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22]); + // mov rcx, 0x3333333333333333 + code.extend_from_slice(&[0x48, 0xb9, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33]); + + // hlt + code.extend_from_slice(&[0xf4]); + + let config: SandboxConfiguration = Default::default(); + #[cfg(crashdump)] + let rt_cfg: SandboxRuntimeConfig = Default::default(); + #[cfg(feature = "mem_profile")] + let trace_info = MemTraceInfo::new(Arc::new(DummyUnwindInfo {})).unwrap(); + + let layout = SandboxMemoryLayout::new( + config.clone(), + code.len(), // code size + 0, // stack + 0, // heap + 0, // init data + None, + )?; + + let mem_size = layout.get_memory_size()?; + let eshm = ExclusiveSharedMemory::new(mem_size)?; + + let stack_cookie = [0u8; 16]; + let mem_mgr = SandboxMemoryManager::new( + layout.clone(), + eshm, + RawPtr::from(0), + Offset::from(0), + stack_cookie, + ); + + let (mut mem_mgr_hshm, mut mem_mgr_gshm) = mem_mgr.build(); + + // Set up shared memory (page tables) + #[cfg(feature = "init-paging")] + let rsp = { + let mut regions = layout.get_memory_regions(&mem_mgr_gshm.shared_mem)?; + let mem_size = mem_mgr_gshm.shared_mem.mem_size() as u64; + mem_mgr_gshm.set_up_shared_memory(mem_size, &mut regions)? + }; + #[cfg(not(feature = "init-paging"))] + let rsp = 0; + + // Write code + let code_offset = layout.get_guest_code_offset(); + mem_mgr_hshm + .shared_mem + .copy_from_slice(&code, code_offset)?; + + // Get regions for VM + let regions = layout + .get_memory_regions(&mem_mgr_gshm.shared_mem)? + .into_iter() + .filter(|r| r.guest_region.len() > 0) + .collect(); + + // Calculate pml4_addr + #[cfg(feature = "init-paging")] + let pml4_addr = SandboxMemoryLayout::PML4_OFFSET as u64; + #[cfg(not(feature = "init-paging"))] + let pml4_addr = 0; + + // Entrypoint + let entrypoint = layout.get_guest_code_address() as u64; + + let mut vm = HyperlightVm::new( + regions, + pml4_addr, + entrypoint, + rsp, + &config, + #[cfg(gdb)] + None, + #[cfg(crashdump)] + rt_cfg, + #[cfg(feature = "mem_profile")] + trace_info, + )?; + + let host_funcs = Arc::new(Mutex::new(FunctionRegistry::default())); + #[cfg(gdb)] + let dbg_mem_access_fn = Arc::new(Mutex::new(mem_mgr_hshm.clone())); + + // Run the VM + vm.initialise( + RawPtr::from(0), // peb_addr + 0, // seed + 4096, // page_size + &mut mem_mgr_hshm, + &host_funcs, + None, + #[cfg(gdb)] + dbg_mem_access_fn.clone(), + )?; + + // After run, check registers + let regs = vm.vm.regs()?; + assert_eq!(regs.rax, 0x1111111111111111); + assert_eq!(regs.rbx, 0x2222222222222222); + assert_eq!(regs.rcx, 0x3333333333333333); + + // Reset vcpu + vm.reset_vcpu()?; + + // Check registers again + let regs = vm.vm.regs()?; + assert_eq!(regs.rax, 0); + assert_eq!(regs.rbx, 0); + assert_eq!(regs.rcx, 0); + } + + Ok(()) + } +} + #[cfg(gdb)] mod debug { use hyperlight_common::mem::PAGE_SIZE; diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs index 8c6feb351..4086b9fbf 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs @@ -21,18 +21,23 @@ use std::sync::LazyLock; #[cfg(gdb)] use mshv_bindings::{DebugRegisters, hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT}; use mshv_bindings::{ - FloatingPointUnit, SpecialRegisters, StandardRegisters, hv_message_type, + FloatingPointUnit, HV_INTERCEPT_ACCESS_MASK_READ, HV_INTERCEPT_ACCESS_MASK_WRITE, + HV_INTERCEPT_ACCESS_READ, HV_INTERCEPT_ACCESS_WRITE, SpecialRegisters, StandardRegisters, + XSave, hv_intercept_type_HV_INTERCEPT_TYPE_X64_MSR, hv_message_type, hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA, hv_message_type_HVMSG_X64_HALT, hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT, + hv_message_type_HVMSG_X64_MSR_INTERCEPT, hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, hv_partition_synthetic_processor_features, hv_register_assoc, - hv_register_name_HV_X64_REGISTER_RIP, hv_register_value, mshv_user_mem_region, + hv_register_name_HV_X64_REGISTER_RIP, hv_register_value, mshv_install_intercept, + mshv_user_mem_region, }; use mshv_ioctls::{Mshv, VcpuFd, VmFd}; use tracing::{Span, instrument}; #[cfg(gdb)] use crate::hypervisor::gdb::DebuggableVm; +use crate::hypervisor::regs::CommonDebugRegs; use crate::hypervisor::{HyperlightExit, Hypervisor}; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::{Result, new_error}; @@ -108,6 +113,7 @@ impl Hypervisor for MshvVm { hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT; const UNMAPPED_GPA_MESSAGE: hv_message_type = hv_message_type_HVMSG_UNMAPPED_GPA; const INVALID_GPA_ACCESS_MESSAGE: hv_message_type = hv_message_type_HVMSG_GPA_INTERCEPT; + const MSR_MESSAGE: hv_message_type = hv_message_type_HVMSG_X64_MSR_INTERCEPT; #[cfg(gdb)] const EXCEPTION_INTERCEPT: hv_message_type = hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT; @@ -152,6 +158,21 @@ impl Hypervisor for MshvVm { _ => HyperlightExit::Unknown("Unknown MMIO access".to_string()), } } + MSR_MESSAGE => { + let msr_message = m.to_msr_info().map_err(mshv_ioctls::MshvError::from)?; + let edx = msr_message.rdx; + let eax = msr_message.rax; + let written_value = (edx << 32) | eax; + let access = msr_message.header.intercept_access_type as u32; + match access { + HV_INTERCEPT_ACCESS_READ => HyperlightExit::MsrRead(msr_message.msr_number), + HV_INTERCEPT_ACCESS_WRITE => HyperlightExit::MsrWrite { + msr_index: msr_message.msr_number, + value: written_value, + }, + _ => HyperlightExit::Unknown(format!("Unknown MSR access type={}", access)), + } + } #[cfg(gdb)] EXCEPTION_INTERCEPT => { let ex_info = m @@ -208,11 +229,43 @@ impl Hypervisor for MshvVm { Ok(()) } + fn debug_regs(&self) -> Result { + let debug_regs = self.vcpu_fd.get_debug_regs()?; + Ok(debug_regs.into()) + } + + fn set_debug_regs(&self, drs: &CommonDebugRegs) -> Result<()> { + let mshv_debug_regs = drs.into(); + self.vcpu_fd.set_debug_regs(&mshv_debug_regs)?; + Ok(()) + } + #[cfg(crashdump)] fn xsave(&self) -> Result> { let xsave = self.vcpu_fd.get_xsave()?; Ok(xsave.buffer.to_vec()) } + + fn set_xsave(&self, xsave: &[u32; 1024]) -> Result<()> { + let mut buf = XSave::default(); + let (prefix, bytes, suffix) = unsafe { xsave.align_to() }; + assert!(prefix.is_empty()); + assert!(suffix.is_empty()); + buf.buffer.copy_from_slice(bytes); + self.vcpu_fd.set_xsave(&buf)?; + Ok(()) + } + + fn enable_msr_intercept(&mut self) -> Result<()> { + // Install MSR intercepts, will interrupt on all MSR accesses (READ & WRITE) + let intercept = mshv_install_intercept { + access_type_mask: HV_INTERCEPT_ACCESS_MASK_WRITE | HV_INTERCEPT_ACCESS_MASK_READ, + intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_X64_MSR, + intercept_parameter: Default::default(), + }; + self.vm_fd.install_intercept(intercept)?; + Ok(()) + } } #[cfg(gdb)] diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 76f6696bb..5996eee94 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -24,8 +24,8 @@ use windows::core::s; use windows_result::HRESULT; use super::regs::{ - Align16, WHP_FPU_NAMES, WHP_FPU_NAMES_LEN, WHP_REGS_NAMES, WHP_REGS_NAMES_LEN, WHP_SREGS_NAMES, - WHP_SREGS_NAMES_LEN, + Align16, WHP_DEBUG_REGS_NAMES, WHP_DEBUG_REGS_NAMES_LEN, WHP_FPU_NAMES, WHP_FPU_NAMES_LEN, + WHP_REGS_NAMES, WHP_REGS_NAMES_LEN, WHP_SREGS_NAMES, WHP_SREGS_NAMES_LEN, }; use super::surrogate_process::SurrogateProcess; use super::surrogate_process_manager::get_surrogate_process_manager; @@ -33,7 +33,7 @@ use super::wrappers::HandleWrapper; #[cfg(gdb)] use crate::hypervisor::gdb::DebuggableVm; use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; -use crate::hypervisor::{HyperlightExit, Hypervisor}; +use crate::hypervisor::{CommonDebugRegs, HyperlightExit, Hypervisor}; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::{Result, log_then_return, new_error}; @@ -254,6 +254,23 @@ impl Hypervisor for WhpVm { } // Execution was cancelled by the host. WHvRunVpExitReasonCanceled => HyperlightExit::Cancelled(), + // MSR access (read or write) - we configured all MSR writes to cause exits + WHvRunVpExitReasonX64MsrAccess => { + let msr_access = unsafe { exit_context.Anonymous.MsrAccess }; + let eax = msr_access.Rax; + let edx = msr_access.Rdx; + let written_value = (edx << 32) | eax; + let access = unsafe { msr_access.AccessInfo.AsUINT32 }; + // Missing from rust bindings for some reason, see https://github.com/MicrosoftDocs/Virtualization-Documentation/blob/265f685159dfd3b2fae8d1dcf1b7d206c31ee880/virtualization/api/hypervisor-platform/headers/WinHvPlatformDefs.h#L3020 + match access { + 0 => HyperlightExit::MsrRead(msr_access.MsrNumber), + 1 => HyperlightExit::MsrWrite { + msr_index: msr_access.MsrNumber, + value: written_value, + }, + _ => HyperlightExit::Unknown(format!("Unknown MSR access type={}", access)), + } + } #[cfg(gdb)] WHvRunVpExitReasonException => { let exception = unsafe { exit_context.Anonymous.VpException }; @@ -387,7 +404,41 @@ impl Hypervisor for WhpVm { Ok(()) } - #[cfg(crashdump)] + fn debug_regs(&self) -> Result { + let mut whp_debug_regs_values: [Align16; WHP_DEBUG_REGS_NAMES_LEN] = + unsafe { std::mem::zeroed() }; + + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + WHP_DEBUG_REGS_NAMES.as_ptr(), + whp_debug_regs_values.len() as u32, + whp_debug_regs_values.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + )?; + } + + WHP_DEBUG_REGS_NAMES + .into_iter() + .zip(whp_debug_regs_values) + .collect::)>>() + .as_slice() + .try_into() + .map_err(|e| { + new_error!( + "Failed to convert WHP registers to CommonDebugRegs: {:?}", + e + ) + }) + } + + fn set_debug_regs(&self, drs: &CommonDebugRegs) -> Result<()> { + let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_DEBUG_REGS_NAMES_LEN] = + drs.into(); + self.set_registers(&whp_regs)?; + Ok(()) + } + fn xsave(&self) -> Result> { use crate::HyperlightError; @@ -440,6 +491,113 @@ impl Hypervisor for WhpVm { Ok(xsave_buffer) } + fn set_xsave(&self, xsave: &[u32; 1024]) -> Result<()> { + use crate::HyperlightError; + + // Get the required buffer size by calling with NULL buffer. + // If the buffer is not large enough (0 won't be), WHvGetVirtualProcessorXsaveState returns + // WHV_E_INSUFFICIENT_BUFFER and sets buffer_size_needed to the required size. + let mut buffer_size_needed: u32 = 0; + + let result = unsafe { + WHvGetVirtualProcessorXsaveState( + self.partition, + 0, + std::ptr::null_mut(), + 0, + &mut buffer_size_needed, + ) + }; + + // Expect insufficient buffer error; any other error is unexpected + if let Err(e) = result + && e.code() != windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER + { + return Err(HyperlightError::WindowsAPIError(e)); + } + + let provided_size = std::mem::size_of_val(xsave) as u32; + if buffer_size_needed > provided_size { + return Err(new_error!( + "Xsave buffer too small: needed {} bytes, provided {} bytes", + buffer_size_needed, + provided_size + )); + } + + // Check if input is all zeros (reset request) + let is_zero_reset = xsave.iter().all(|&x| x == 0); + + if is_zero_reset { + // Create a buffer to hold the current state (to get the correct XCOMP_BV) + let mut current_state = vec![0u8; buffer_size_needed as usize]; + let mut written_bytes = 0; + unsafe { + WHvGetVirtualProcessorXsaveState( + self.partition, + 0, + current_state.as_mut_ptr() as *mut std::ffi::c_void, + buffer_size_needed, + &mut written_bytes, + ) + }?; + + // Zero out everything except XCOMP_BV (bytes 520-528) + // Legacy area (0-512) -> 0 + // XSTATE_BV (512-520) -> 0 + // XCOMP_BV (520-528) -> KEEP + // Reserved & Extended (528..) -> 0 + if buffer_size_needed >= 528 { + current_state[0..520].fill(0); + current_state[528..].fill(0); + } else { + // Should not happen for x64, but if it does, just zero everything + current_state.fill(0); + } + + unsafe { + WHvSetVirtualProcessorXsaveState( + self.partition, + 0, + current_state.as_ptr() as *const std::ffi::c_void, + buffer_size_needed, + ) + .map_err(|e| new_error!("Failed to set Xsave state (reset): {:?}", e))?; + } + } else { + unsafe { + WHvSetVirtualProcessorXsaveState( + self.partition, + 0, + xsave.as_ptr() as *const std::ffi::c_void, + buffer_size_needed, + ) + .map_err(|e| new_error!("Failed to set Xsave state: {:?}", e))?; + } + } + Ok(()) + } + + fn enable_msr_intercept(&mut self) -> Result<()> { + // Enable MSR exits through Extended VM Exits + // Note: This must be set BEFORE WHvSetupPartition for WHP, so this implementation + // is a no-op since the setup is already done in the constructor. + // For WHP, MSR intercepts must be enabled during partition creation. + // X64MsrExit bit position, missing from rust bindings for some reason. + // See https://github.com/MicrosoftDocs/Virtualization-Documentation/blob/265f685159dfd3b2fae8d1dcf1b7d206c31ee880/virtualization/api/hypervisor-platform/headers/WinHvPlatformDefs.h#L1495 + let mut extended_exits_property = WHV_PARTITION_PROPERTY::default(); + extended_exits_property.ExtendedVmExits.AsUINT64 = 1 << 1; + unsafe { + WHvSetPartitionProperty( + self.partition, + WHvPartitionPropertyCodeExtendedVmExits, + &extended_exits_property as *const _ as *const _, + std::mem::size_of::() as _, + )?; + } + Ok(()) + } + /// Mark that initial memory setup is complete. After this, map_memory will fail. fn complete_initial_memory_setup(&mut self) { self.initial_memory_setup_done = true; @@ -529,137 +687,46 @@ impl DebuggableVm for WhpVm { use crate::hypervisor::gdb::arch::MAX_NO_OF_HW_BP; // Get current debug registers - const LEN: usize = 6; - let names: [WHV_REGISTER_NAME; LEN] = [ - WHvX64RegisterDr0, - WHvX64RegisterDr1, - WHvX64RegisterDr2, - WHvX64RegisterDr3, - WHvX64RegisterDr6, - WHvX64RegisterDr7, - ]; - - let mut out: [Align16; LEN] = unsafe { std::mem::zeroed() }; - unsafe { - WHvGetVirtualProcessorRegisters( - self.partition, - 0, - names.as_ptr(), - LEN as u32, - out.as_mut_ptr() as *mut WHV_REGISTER_VALUE, - )?; - } - - let mut dr0 = unsafe { out[0].0.Reg64 }; - let mut dr1 = unsafe { out[1].0.Reg64 }; - let mut dr2 = unsafe { out[2].0.Reg64 }; - let mut dr3 = unsafe { out[3].0.Reg64 }; - let mut dr7 = unsafe { out[5].0.Reg64 }; + let mut regs = self.debug_regs()?; // Check if breakpoint already exists - if [dr0, dr1, dr2, dr3].contains(&addr) { + if [regs.dr0, regs.dr1, regs.dr2, regs.dr3].contains(&addr) { return Ok(()); } // Find the first available LOCAL (L0–L3) slot let i = (0..MAX_NO_OF_HW_BP) - .position(|i| dr7 & (1 << (i * 2)) == 0) + .position(|i| regs.dr7 & (1 << (i * 2)) == 0) .ok_or_else(|| new_error!("Tried to add more than 4 hardware breakpoints"))?; // Assign to corresponding debug register - *[&mut dr0, &mut dr1, &mut dr2, &mut dr3][i] = addr; + *[&mut regs.dr0, &mut regs.dr1, &mut regs.dr2, &mut regs.dr3][i] = addr; // Enable LOCAL bit - dr7 |= 1 << (i * 2); + regs.dr7 |= 1 << (i * 2); - // Set the debug registers - let registers = vec![ - ( - WHvX64RegisterDr0, - Align16(WHV_REGISTER_VALUE { Reg64: dr0 }), - ), - ( - WHvX64RegisterDr1, - Align16(WHV_REGISTER_VALUE { Reg64: dr1 }), - ), - ( - WHvX64RegisterDr2, - Align16(WHV_REGISTER_VALUE { Reg64: dr2 }), - ), - ( - WHvX64RegisterDr3, - Align16(WHV_REGISTER_VALUE { Reg64: dr3 }), - ), - ( - WHvX64RegisterDr7, - Align16(WHV_REGISTER_VALUE { Reg64: dr7 }), - ), - ]; - self.set_registers(®isters)?; + self.set_debug_regs(®s)?; Ok(()) } fn remove_hw_breakpoint(&mut self, addr: u64) -> Result<()> { // Get current debug registers - const LEN: usize = 6; - let names: [WHV_REGISTER_NAME; LEN] = [ - WHvX64RegisterDr0, - WHvX64RegisterDr1, - WHvX64RegisterDr2, - WHvX64RegisterDr3, - WHvX64RegisterDr6, - WHvX64RegisterDr7, - ]; - - let mut out: [Align16; LEN] = unsafe { std::mem::zeroed() }; - unsafe { - WHvGetVirtualProcessorRegisters( - self.partition, - 0, - names.as_ptr(), - LEN as u32, - out.as_mut_ptr() as *mut WHV_REGISTER_VALUE, - )?; - } + let mut debug_regs = self.debug_regs()?; - let mut dr0 = unsafe { out[0].0.Reg64 }; - let mut dr1 = unsafe { out[1].0.Reg64 }; - let mut dr2 = unsafe { out[2].0.Reg64 }; - let mut dr3 = unsafe { out[3].0.Reg64 }; - let mut dr7 = unsafe { out[5].0.Reg64 }; - - let regs = [&mut dr0, &mut dr1, &mut dr2, &mut dr3]; + let regs = [ + &mut debug_regs.dr0, + &mut debug_regs.dr1, + &mut debug_regs.dr2, + &mut debug_regs.dr3, + ]; if let Some(i) = regs.iter().position(|&&mut reg| reg == addr) { // Clear the address *regs[i] = 0; // Disable LOCAL bit - dr7 &= !(1 << (i * 2)); - - // Set the debug registers - let registers = vec![ - ( - WHvX64RegisterDr0, - Align16(WHV_REGISTER_VALUE { Reg64: dr0 }), - ), - ( - WHvX64RegisterDr1, - Align16(WHV_REGISTER_VALUE { Reg64: dr1 }), - ), - ( - WHvX64RegisterDr2, - Align16(WHV_REGISTER_VALUE { Reg64: dr2 }), - ), - ( - WHvX64RegisterDr3, - Align16(WHV_REGISTER_VALUE { Reg64: dr3 }), - ), - ( - WHvX64RegisterDr7, - Align16(WHV_REGISTER_VALUE { Reg64: dr7 }), - ), - ]; - self.set_registers(®isters)?; + debug_regs.dr7 &= !(1 << (i * 2)); + + self.set_debug_regs(&debug_regs)?; Ok(()) } else { Err(new_error!("Tried to remove non-existing hw-breakpoint")) diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index 037b60fc1..144b5307e 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -18,13 +18,19 @@ use std::sync::LazyLock; #[cfg(gdb)] use kvm_bindings::kvm_guest_debug; -use kvm_bindings::{kvm_fpu, kvm_regs, kvm_sregs, kvm_userspace_memory_region}; +use kvm_bindings::{ + KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER, KVM_MSR_FILTER_DEFAULT_DENY, + KVM_MSR_FILTER_READ, KVM_MSR_FILTER_WRITE, kvm_debugregs, kvm_enable_cap, kvm_fpu, + kvm_msr_filter, kvm_msr_filter_range, kvm_regs, kvm_sregs, kvm_userspace_memory_region, + kvm_xsave, +}; use kvm_ioctls::Cap::UserMemory; use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd}; use tracing::{Span, instrument}; #[cfg(gdb)] use crate::hypervisor::gdb::DebuggableVm; +use crate::hypervisor::regs::CommonDebugRegs; use crate::hypervisor::{HyperlightExit, Hypervisor}; use crate::mem::memory_region::MemoryRegion; use crate::{Result, new_error}; @@ -109,6 +115,42 @@ impl Hypervisor for KvmVm { Ok(VcpuExit::IoOut(port, data)) => Ok(HyperlightExit::IoOut(port, data.to_vec())), Ok(VcpuExit::MmioRead(addr, _)) => Ok(HyperlightExit::MmioRead(addr)), Ok(VcpuExit::MmioWrite(addr, _)) => Ok(HyperlightExit::MmioWrite(addr)), + /* Note from KVM docs: + For KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding operations are complete (and guest state is consistent) + only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. + The pending state of the operation is not preserved in state which is visible to userspace, thus userspace should ensure that the operation + is completed before performing a live migration. Userspace can re-enter the guest with an unmasked signal pending or with the immediate_exit field + set to complete pending operations without allowing any further instructions to be executed. + */ + Ok(VcpuExit::X86Rdmsr(msr_exit)) => { + let msr_index = msr_exit.index; + *msr_exit.error = 1; + self.vcpu_fd.set_kvm_immediate_exit(1); + let exit = self.vcpu_fd.run().unwrap_err(); + self.vcpu_fd.set_kvm_immediate_exit(0); + if exit.errno() != libc::EINTR { + return Err(new_error!( + "Expected EINTR after immediate exit run, got {:?}", + exit + )); + } + Ok(HyperlightExit::MsrRead(msr_index)) + } + Ok(VcpuExit::X86Wrmsr(msr_exit)) => { + let msr_index = msr_exit.index; + let value = msr_exit.data; + *msr_exit.error = 1; + self.vcpu_fd.set_kvm_immediate_exit(1); + let exit = self.vcpu_fd.run().unwrap_err(); + self.vcpu_fd.set_kvm_immediate_exit(0); + if exit.errno() != libc::EINTR { + return Err(new_error!( + "Expected EINTR after immediate exit run, got {:?}", + exit + )); + } + Ok(HyperlightExit::MsrWrite { msr_index, value }) + } #[cfg(gdb)] Ok(VcpuExit::Debug(debug_exit)) => Ok(HyperlightExit::Debug { dr6: debug_exit.dr6, @@ -163,6 +205,17 @@ impl Hypervisor for KvmVm { Ok(()) } + fn debug_regs(&self) -> Result { + let kvm_debug_regs = self.vcpu_fd.get_debug_regs()?; + Ok(kvm_debug_regs.into()) + } + + fn set_debug_regs(&self, drs: &CommonDebugRegs) -> Result<()> { + let kvm_debug_regs: kvm_debugregs = drs.into(); + self.vcpu_fd.set_debug_regs(&kvm_debug_regs)?; + Ok(()) + } + #[cfg(crashdump)] fn xsave(&self) -> Result> { let xsave = self.vcpu_fd.get_xsave()?; @@ -172,6 +225,52 @@ impl Hypervisor for KvmVm { .flat_map(u32::to_le_bytes) .collect()) } + + fn set_xsave(&self, xsave: &[u32; 1024]) -> Result<()> { + let xsave = kvm_xsave { + region: *xsave, + ..Default::default() + }; + // Safety: Safe because we only copy 4096 bytes + unsafe { self.vcpu_fd.set_xsave(&xsave)? }; + + Ok(()) + } + + fn enable_msr_intercept(&mut self) -> Result<()> { + let cap = kvm_enable_cap { + cap: KVM_CAP_X86_USER_SPACE_MSR, + flags: 0, + args: [KVM_MSR_EXIT_REASON_FILTER as u64, 0, 0, 0], + pad: [0; 64], + }; + self.vm_fd.enable_cap(&cap)?; + + // Deny all MSR accesses using KVM_MSR_FILTER_DEFAULT_DENY. + // We need at least one range (even a minimal dummy range) because + // "Calling this ioctl with an empty set of ranges (all nmsrs == 0) disables MSR filtering. + // In that mode, KVM_MSR_FILTER_DEFAULT_DENY is invalid and causes an error." + // So we create a minimal range covering just 1 MSR with a bitmap of 0 (deny). + // All other MSRs will be denied by the default policy. + let mut bitmap = vec![0u8; 1]; // 1 byte covers 8 MSRs, all bits set to 0 (deny) + let mut ranges = [kvm_msr_filter_range::default(); 16]; + ranges[0] = kvm_msr_filter_range { + flags: KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE, + nmsrs: 1, // Cover just 1 MSR + base: 0, // Starting at MSR index 0 + bitmap: bitmap.as_mut_ptr(), + }; + + let filter = kvm_msr_filter { + flags: KVM_MSR_FILTER_DEFAULT_DENY, + ranges, + }; + // Safety: The bitmap pointer is valid and nmsrs is set correctly + // TODO replace with safe version + unsafe { self.vm_fd.set_msr_filter_unchecked(&filter)? }; + + Ok(()) + } } #[cfg(gdb)] diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 573e2acb8..84798beeb 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -17,7 +17,9 @@ limitations under the License. use log::LevelFilter; use crate::Result; -use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; +use crate::hypervisor::regs::{ + CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, +}; use crate::mem::memory_region::MemoryRegion; /// HyperV-on-linux functionality @@ -73,6 +75,10 @@ pub(crate) enum HyperlightExit { MmioRead(u64), /// The vCPU tried to write to the given (unmapped) addr MmioWrite(u64), + /// The vCPU tried to read from the given MSR + MsrRead(u32), + /// The vCPU tried to write to the given MSR with the given value + MsrWrite { msr_index: u32, value: u64 }, /// The vCPU execution has been cancelled Cancelled(), /// The vCPU has exited for a reason that is not handled by Hyperlight @@ -125,8 +131,19 @@ pub(crate) trait Hypervisor: Debug + Send { fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> Result<()>; /// xsave - #[cfg(crashdump)] + #[cfg(any(crashdump, target_os = "windows"))] fn xsave(&self) -> Result>; + /// Set xsave + fn set_xsave(&self, xsave: &[u32; 1024]) -> Result<()>; + + /// Get the debug registers of the vCPU + #[allow(dead_code)] + fn debug_regs(&self) -> Result; + /// Set the debug registers of the vCPU + fn set_debug_regs(&self, drs: &CommonDebugRegs) -> Result<()>; + + /// Enable MSR intercepts to trap all MSR accesses (read and write). + fn enable_msr_intercept(&mut self) -> Result<()>; /// Get partition handle #[cfg(target_os = "windows")] diff --git a/src/hyperlight_host/src/hypervisor/regs.rs b/src/hyperlight_host/src/hypervisor/regs.rs index d29edf4bf..5d940ba69 100644 --- a/src/hyperlight_host/src/hypervisor/regs.rs +++ b/src/hyperlight_host/src/hypervisor/regs.rs @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ +mod debug_regs; mod fpu; mod special_regs; mod standard_regs; @@ -21,6 +22,7 @@ mod standard_regs; #[cfg(target_os = "windows")] use std::collections::HashSet; +pub(crate) use debug_regs::*; pub(crate) use fpu::*; pub(crate) use special_regs::*; pub(crate) use standard_regs::*; diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index e08e256af..8fd371ceb 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -257,7 +257,7 @@ impl SandboxMemoryLayout { /// Create a new `SandboxMemoryLayout` with the given /// `SandboxConfiguration`, code size and stack/heap size. #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn new( + pub(crate) fn new( cfg: SandboxConfiguration, code_size: usize, stack_size: usize, @@ -476,7 +476,7 @@ impl SandboxMemoryLayout { /// get the code offset /// This is the offset in the sandbox memory where the code starts #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_guest_code_offset(&self) -> usize { + pub(crate) fn get_guest_code_offset(&self) -> usize { self.guest_code_offset } @@ -541,7 +541,7 @@ impl SandboxMemoryLayout { /// Get the total size of guest memory in `self`'s memory /// layout aligned to page size boundaries. #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_memory_size(&self) -> Result { + pub(crate) fn get_memory_size(&self) -> Result { let total_memory = self.get_unaligned_memory_size(); // Size should be a multiple of page size. diff --git a/src/hyperlight_host/src/sandbox/config.rs b/src/hyperlight_host/src/sandbox/config.rs index dad3420bb..5c6f19fbb 100644 --- a/src/hyperlight_host/src/sandbox/config.rs +++ b/src/hyperlight_host/src/sandbox/config.rs @@ -84,6 +84,8 @@ pub struct SandboxConfiguration { /// Note: Since real-time signals can vary across platforms, ensure that the offset /// results in a signal number that is not already in use by other components of the system. interrupt_vcpu_sigrtmin_offset: u8, + /// Allow MSR (Model Specific Register) access in guest. This is disabled by default for security reasons. + allow_msr: bool, } impl SandboxConfiguration { @@ -119,6 +121,7 @@ impl SandboxConfiguration { interrupt_vcpu_sigrtmin_offset: u8, #[cfg(gdb)] guest_debug_info: Option, #[cfg(crashdump)] guest_core_dump: bool, + allow_msr: bool, ) -> Self { Self { input_data_size: max(input_data_size, Self::MIN_INPUT_SIZE), @@ -135,6 +138,7 @@ impl SandboxConfiguration { guest_debug_info, #[cfg(crashdump)] guest_core_dump, + allow_msr, } } @@ -227,6 +231,22 @@ impl SandboxConfiguration { self.guest_debug_info = Some(debug_info); } + /// Set whether MSR access is allowed. By default, MSR access is disabled for security reasons. + /// + /// # Safety + /// + /// If enabled, restoring a snapshot will not reset MSRs set by the guest. + /// This can result in data leaks between different guest executions (across a snapshot restore). + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + pub unsafe fn set_allow_msr(&mut self, allow_msr: bool) { + self.allow_msr = allow_msr; + } + + #[instrument(skip_all, parent = Span::current(), level= "Trace")] + pub(crate) fn get_allow_msr(&self) -> bool { + self.allow_msr + } + #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_host_function_definition_size(&self) -> usize { self.host_function_definition_size @@ -296,6 +316,7 @@ impl Default for SandboxConfiguration { None, #[cfg(crashdump)] true, + false, ) } } @@ -324,6 +345,7 @@ mod tests { None, #[cfg(crashdump)] true, + false, ); let exe_info = simple_guest_exe_info().unwrap(); @@ -358,6 +380,7 @@ mod tests { None, #[cfg(crashdump)] true, + false, ); assert_eq!(SandboxConfiguration::MIN_INPUT_SIZE, cfg.input_data_size); assert_eq!(SandboxConfiguration::MIN_OUTPUT_SIZE, cfg.output_data_size); diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index d40ad9b2b..e83e507ac 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -287,6 +287,8 @@ impl MultiUseSandbox { unsafe { self.vm.map_region(region)? }; } + self.vm.reset_vcpu()?; + // The restored snapshot is now our most current snapshot self.snapshot = Some(snapshot.clone()); @@ -789,6 +791,7 @@ mod tests { use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; use hyperlight_testing::simple_guest_as_string; + use rand::seq::IteratorRandom; #[cfg(target_os = "linux")] use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags, MemoryRegionType}; @@ -1297,4 +1300,80 @@ mod tests { }; assert_ne!(sandbox3.id, sandbox_id); } + + // Makes sure that reading and writing MSRs results in appropriate errors + #[test] + fn test_read_write_msr() { + let value: u64 = 0x0; + + const N: usize = 100; + + let msr_numbers = (0x0u32..0x20u32) + .chain(0x40000000u32..0x40000100u32) + .chain(0x80000000u32..0x80002000u32) + .chain(0xC0000000u32..0xC0002000u32) + .chain(0xC0010000u32..0xC0012000u32) + .choose_multiple(&mut rand::rng(), N); + + let mut sbox = UninitializedSandbox::new( + GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), + None, + ) + .unwrap() + .evolve() + .unwrap(); + let snapshot = sbox.snapshot().unwrap(); + + for msr_number in msr_numbers { + // read the msr + let err = sbox.call::("ReadMSR", msr_number).unwrap_err(); + assert!( + matches!(err, HyperlightError::MsrReadViolation(msr) if msr == msr_number), + "got error: {:?}", + err + ); + assert!(sbox.poisoned()); + sbox.restore(&snapshot).unwrap(); + + // write the msr + let err = sbox + .call::<()>("WriteMSR", (msr_number, value)) + .unwrap_err(); + assert!(sbox.poisoned()); + + sbox.restore(&snapshot).unwrap(); + assert!( + matches!(err, HyperlightError::MsrWriteViolation(msr_idx, value) if msr_idx == msr_number && value == value), + "got error: {:?}", + err + ); + } + + // Also try case where MSR access is allowed (should succeed) + let mut cfg = SandboxConfiguration::default(); + unsafe { cfg.set_allow_msr(true) }; + + let mut sbox = UninitializedSandbox::new( + GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), + Some(cfg), + ) + .unwrap() + .evolve() + .unwrap(); + + let msr_index: u32 = 0xC0000102; // IA32_KERNEL_GS_BASE + let value: u64 = 0x5; + + sbox.call::<()>("WriteMSR", (msr_index, value)).unwrap(); + let read_value: u64 = sbox.call("ReadMSR", msr_index).unwrap(); + assert_eq!(read_value, value); + assert!(!sbox.poisoned()); + + let value: u64 = 0x8; + + sbox.call::<()>("WriteMSR", (msr_index, value)).unwrap(); + let read_value: u64 = sbox.call("ReadMSR", msr_index).unwrap(); + assert_eq!(read_value, value); + assert!(!sbox.poisoned()); + } } diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 1cef4a858..592194d55 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -552,6 +552,40 @@ fn add(a: i32, b: i32) -> Result { host_add(a, b) } +#[guest_function("ReadMSR")] +fn read_msr(msr: u32) -> u64 { + let (read_eax, read_edx): (u32, u32); + unsafe { + core::arch::asm!( + "rdmsr", + in("ecx") msr, + out("eax") read_eax, + out("edx") read_edx, + options(nostack, nomem) + ); + } + + let read_value = ((read_edx as u64) << 32) | (read_eax as u64); + read_value +} + +#[guest_function("WriteMSR")] +fn write_msr(msr: u32, value: u64) { + // Split 64-bit value into EDX:EAX format for WRMSR + let eax = (value & 0xFFFFFFFF) as u32; + let edx = ((value >> 32) & 0xFFFFFFFF) as u32; + + unsafe { + core::arch::asm!( + "wrmsr", + in("ecx") msr, + in("eax") eax, + in("edx") edx, + options(nostack, nomem) + ); + } +} + // Does nothing, but used for testing large parameters #[guest_function("LargeParameters")] fn large_parameters(v: Vec, s: String) {