diff --git a/Cargo.lock b/Cargo.lock index 368f8e50..9060a5ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -268,6 +268,12 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "humantime" version = "2.1.0" @@ -375,6 +381,7 @@ dependencies = [ "krun-sys", "log", "nix", + "procfs", "rustix", "serde", "serde_json", @@ -440,6 +447,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "procfs" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc5b72d8145275d844d4b5f6d4e1eef00c8cd889edb6035c21675d1bb1f45c9f" +dependencies = [ + "bitflags", + "hex", + "procfs-core", + "rustix", +] + +[[package]] +name = "procfs-core" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "239df02d8349b06fc07398a3a1697b06418223b1c7725085e801e7c0fc6a12ec" +dependencies = [ + "bitflags", + "hex", +] + [[package]] name = "quote" version = "1.0.36" @@ -661,6 +690,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" dependencies = [ "getrandom", + "serde", ] [[package]] diff --git a/crates/muvm/Cargo.toml b/crates/muvm/Cargo.toml index e244c959..b5442f69 100644 --- a/crates/muvm/Cargo.toml +++ b/crates/muvm/Cargo.toml @@ -16,13 +16,14 @@ env_logger = { version = "0.11.3", default-features = false, features = ["auto-c krun-sys = { path = "../krun-sys", version = "1.9.1", default-features = false, features = [] } log = { version = "0.4.21", default-features = false, features = ["kv"] } nix = { version = "0.28.0", default-features = false, features = ["user"] } +procfs = { version = "0.17.0", default-features = false, features = [] } rustix = { version = "0.38.34", default-features = false, features = ["fs", "mount", "process", "std", "stdio", "system", "use-libc-auxv"] } serde = { version = "1.0.203", default-features = false, features = ["derive"] } serde_json = { version = "1.0.117", default-features = false, features = ["std"] } tempfile = { version = "3.10.1", default-features = false, features = [] } tokio = { version = "1.38.0", default-features = false, features = ["io-util", "macros", "net", "process", "rt-multi-thread", "sync"] } tokio-stream = { version = "0.1.15", default-features = false, features = ["net", "sync"] } -uuid = { version = "1.10.0", default-features = false, features = ["std", "v7"] } +uuid = { version = "1.10.0", default-features = false, features = ["serde", "std", "v7"] } [features] default = [] diff --git a/crates/muvm/src/bin/muvm.rs b/crates/muvm/src/bin/muvm.rs index 64375725..15bc6bc6 100644 --- a/crates/muvm/src/bin/muvm.rs +++ b/crates/muvm/src/bin/muvm.rs @@ -16,6 +16,7 @@ use muvm::cli_options::options; use muvm::cpu::{get_fallback_cores, get_performance_cores}; use muvm::env::{find_muvm_exec, prepare_env_vars}; use muvm::launch::{launch_or_lock, LaunchResult}; +use muvm::monitor::spawn_monitor; use muvm::net::{connect_to_passt, start_passt}; use muvm::types::MiB; use nix::sys::sysinfo::sysinfo; @@ -67,7 +68,7 @@ fn main() -> Result<()> { let options = options().fallback_to_usage().run(); - let (_lock_file, command, command_args, env) = match launch_or_lock( + let (cookie, _lock_file, command, command_args, env) = match launch_or_lock( options.server_port, options.command, options.command_args, @@ -79,11 +80,12 @@ fn main() -> Result<()> { return Ok(()); }, LaunchResult::LockAcquired { + cookie, lock_file, command, command_args, env, - } => (lock_file, command, command_args, env), + } => (cookie, lock_file, command, command_args, env), }; { @@ -253,7 +255,7 @@ fn main() -> Result<()> { .context("Failed to connect to `passt`")? .into() } else { - start_passt(options.server_port) + start_passt(options.server_port, options.root_server_port) .context("Failed to start `passt`")? .into() }; @@ -374,6 +376,11 @@ fn main() -> Result<()> { "MUVM_SERVER_PORT".to_owned(), options.server_port.to_string(), ); + env.insert( + "MUVM_ROOT_SERVER_PORT".to_owned(), + options.root_server_port.to_string(), + ); + env.insert("MUVM_SERVER_COOKIE".to_owned(), cookie.to_string()); if options.direct_x11 { let display = @@ -429,6 +436,8 @@ fn main() -> Result<()> { } } + spawn_monitor(options.root_server_port, cookie); + { // Start and enter the microVM. Unless there is some error while creating the // microVM this function never returns. diff --git a/crates/muvm/src/cli_options.rs b/crates/muvm/src/cli_options.rs index ce3d44e2..1d5950b2 100644 --- a/crates/muvm/src/cli_options.rs +++ b/crates/muvm/src/cli_options.rs @@ -13,6 +13,7 @@ pub struct Options { pub mem: Option, pub vram: Option, pub passt_socket: Option, + pub root_server_port: u32, pub server_port: u32, pub fex_images: Vec, pub direct_x11: bool, @@ -93,6 +94,12 @@ pub fn options() -> OptionParser { .help("Instead of starting passt, connect to passt socket at PATH") .argument("PATH") .optional(); + let root_server_port = long("root-server-port") + .short('r') + .help("Set the port to be used in root server mode") + .argument("ROOT_SERVER_PORT") + .fallback(3335) + .display_fallback(); let server_port = long("server-port") .short('p') .help("Set the port to be used in server mode") @@ -117,6 +124,7 @@ pub fn options() -> OptionParser { mem, vram, passt_socket, + root_server_port, server_port, fex_images, direct_x11, diff --git a/crates/muvm/src/guest/bin/muvm-guest.rs b/crates/muvm/src/guest/bin/muvm-guest.rs index 65fb157c..887d5c84 100644 --- a/crates/muvm/src/guest/bin/muvm-guest.rs +++ b/crates/muvm/src/guest/bin/muvm-guest.rs @@ -62,6 +62,12 @@ fn main() -> Result<()> { .spawn()?; } + // Before switching to the user, start another instance of muvm-server to serve + // launch requests as root. + Command::new("muvm-server") + .spawn() + .context("Failed to execute `muvm-server` as child process")?; + let run_path = match setup_user(options.username, options.uid, options.gid) { Ok(p) => p, Err(err) => return Err(err).context("Failed to set up user, bailing out"), diff --git a/crates/muvm/src/launch.rs b/crates/muvm/src/launch.rs index d4875e9b..69e4948b 100644 --- a/crates/muvm/src/launch.rs +++ b/crates/muvm/src/launch.rs @@ -2,14 +2,14 @@ use std::collections::HashMap; use std::env; use std::error::Error; use std::fmt::{Display, Formatter}; -use std::fs::File; +use std::fs::{self, File}; use std::io::{BufRead, BufReader, Read, Write}; use std::net::TcpStream; use std::path::{Path, PathBuf}; use anyhow::{anyhow, Context, Result}; use rustix::fs::{flock, FlockOperation}; -use rustix::path::Arg; +use uuid::Uuid; use crate::env::prepare_env_vars; use crate::utils::launch::Launch; @@ -17,6 +17,7 @@ use crate::utils::launch::Launch; pub enum LaunchResult { LaunchRequested, LockAcquired { + cookie: Uuid, lock_file: File, command: PathBuf, command_args: Vec, @@ -59,53 +60,63 @@ pub fn launch_or_lock( if let Some(port) = running_server_port { let port: u32 = port.parse()?; let env = prepare_env_vars(env)?; - if let Err(err) = request_launch(port, command, command_args, env) { + let cookie = read_cookie()?; + if let Err(err) = request_launch(port, cookie, command, command_args, env) { return Err(anyhow!("could not request launch to server: {err}")); } return Ok(LaunchResult::LaunchRequested); } - let (lock_file, running_server_port) = lock_file(server_port)?; + let (lock_file, cookie) = lock_file()?; match lock_file { Some(lock_file) => Ok(LaunchResult::LockAcquired { + cookie, lock_file, command, command_args, env, }), None => { - if let Some(port) = running_server_port { - let env = prepare_env_vars(env)?; - let mut tries = 0; - loop { - match request_launch(port, command.clone(), command_args.clone(), env.clone()) { - Err(err) => match err.downcast_ref::() { - Some(&LaunchError::Connection(_)) => { - if tries == 3 { - return Err(anyhow!( - "could not request launch to server: {err}" - )); - } else { - tries += 1; - } - }, - _ => { + let env = prepare_env_vars(env)?; + let mut tries = 0; + loop { + match request_launch( + server_port, + cookie, + command.clone(), + command_args.clone(), + env.clone(), + ) { + Err(err) => match err.downcast_ref::() { + Some(&LaunchError::Connection(_)) => { + if tries == 3 { return Err(anyhow!("could not request launch to server: {err}")); - }, + } else { + tries += 1; + } }, - Ok(_) => return Ok(LaunchResult::LaunchRequested), - } + _ => { + return Err(anyhow!("could not request launch to server: {err}")); + }, + }, + Ok(_) => return Ok(LaunchResult::LaunchRequested), } - } else { - Err(anyhow!( - "muvm is already running but couldn't find its server port, bailing out" - )) } }, } } -fn lock_file(server_port: u32) -> Result<(Option, Option)> { +fn read_cookie() -> Result { + let run_path = env::var("XDG_RUNTIME_DIR") + .context("Failed to read XDG_RUNTIME_DIR environment variable")?; + let lock_path = Path::new(&run_path).join("muvm.lock"); + let data: Vec = fs::read(lock_path).context("Failed to read lock file")?; + assert!(data.len() == 16); + + Uuid::from_slice(&data).context("Failed to read cookie from lock file") +} + +fn lock_file() -> Result<(Option, Uuid)> { let run_path = env::var("XDG_RUNTIME_DIR") .context("Failed to read XDG_RUNTIME_DIR environment variable")?; let lock_path = Path::new(&run_path).join("muvm.lock"); @@ -123,30 +134,23 @@ fn lock_file(server_port: u32) -> Result<(Option, Option)> { .context("Failed to create lock file")?; let ret = flock(&lock_file, FlockOperation::NonBlockingLockExclusive); if ret.is_err() { - let mut data: Vec = Vec::with_capacity(5); + let mut data: Vec = Vec::with_capacity(16); lock_file.read_to_end(&mut data)?; - let port = match data.to_string_lossy().parse::() { - Ok(port) => { - if port > 1024 { - Some(port) - } else { - None - } - }, - Err(_) => None, - }; - return Ok((None, port)); + let cookie = Uuid::from_slice(&data).context("Failed to read cookie from lock file")?; + return Ok((None, cookie)); } lock_file }; + let cookie = Uuid::now_v7(); lock_file.set_len(0)?; - lock_file.write_all(format!("{server_port}").as_bytes())?; - Ok((Some(lock_file), None)) + lock_file.write_all(cookie.as_bytes())?; + Ok((Some(lock_file), cookie)) } -fn request_launch( +pub fn request_launch( server_port: u32, + cookie: Uuid, command: PathBuf, command_args: Vec, env: HashMap, @@ -155,6 +159,7 @@ fn request_launch( TcpStream::connect(format!("127.0.0.1:{server_port}")).map_err(LaunchError::Connection)?; let launch = Launch { + cookie, command, command_args, env, diff --git a/crates/muvm/src/lib.rs b/crates/muvm/src/lib.rs index de1edc98..4451b77e 100644 --- a/crates/muvm/src/lib.rs +++ b/crates/muvm/src/lib.rs @@ -2,6 +2,7 @@ pub mod cli_options; pub mod cpu; pub mod env; pub mod launch; +pub mod monitor; pub mod net; pub mod types; diff --git a/crates/muvm/src/monitor.rs b/crates/muvm/src/monitor.rs new file mode 100644 index 00000000..c43b53b4 --- /dev/null +++ b/crates/muvm/src/monitor.rs @@ -0,0 +1,96 @@ +use std::collections::HashMap; +use std::path::PathBuf; +use std::thread; +use std::time; + +use anyhow::Result; +use log::debug; +use procfs::{Current, Meminfo}; +use uuid::Uuid; + +use crate::launch::request_launch; + +#[derive(Clone, Debug, PartialEq)] +pub enum GuestPressure { + None, + Low, + Medium, + High, + Critical, +} + +impl From for u32 { + fn from(pressure: GuestPressure) -> u32 { + match pressure { + GuestPressure::None => 10, + GuestPressure::Low => 1000, + GuestPressure::Medium => 2000, + GuestPressure::High => 3000, + // Same waterlevel as High, but also explicitly requesting + // the guest to drop its page cache. + GuestPressure::Critical => 3000, + } + } +} + +pub fn spawn_monitor(server_port: u32, cookie: Uuid) { + thread::spawn(move || run(server_port, cookie)); +} + +fn set_guest_pressure(server_port: u32, cookie: Uuid, pressure: GuestPressure) -> Result<()> { + if pressure == GuestPressure::Critical { + debug!("requesting the guest to drop its caches"); + // This is a fake command that tells muvm-server to write to "/proc/sys/vm/drop_caches" + let command = PathBuf::from("/muvmdropcaches"); + let command_args = vec![]; + let env = HashMap::new(); + request_launch(server_port, cookie, command, command_args, env)?; + } + + let wsf: u32 = pressure.into(); + debug!("setting watermark_scale_factor to {wsf}"); + + let command = PathBuf::from("/sbin/sysctl"); + let command_args = vec![format!("vm.watermark_scale_factor={}", wsf)]; + let env = HashMap::new(); + request_launch(server_port, cookie, command, command_args, env) +} + +fn run(server_port: u32, cookie: Uuid) { + let mut guest_pressure = GuestPressure::None; + loop { + let meminfo = Meminfo::current().ok(); + if let Some(meminfo) = meminfo { + if let Some(available) = meminfo.mem_available { + let avail_ratio = (available * 100) / meminfo.mem_total; + debug!( + "avail_ratio={avail_ratio}, avail={available}, total={}", + meminfo.mem_total + ); + let new_pressure = if avail_ratio <= 10 { + GuestPressure::Critical + } else if avail_ratio <= 15 { + GuestPressure::High + } else if avail_ratio <= 20 { + GuestPressure::Medium + } else if avail_ratio <= 25 { + GuestPressure::Low + } else { + GuestPressure::None + }; + + debug!("Pressure at {:?}", new_pressure); + + if new_pressure != guest_pressure { + if let Err(err) = set_guest_pressure(server_port, cookie, new_pressure.clone()) + { + println!("Failed to set the new pressure in the guest: {err}"); + } else { + guest_pressure = new_pressure; + } + } + } + } + thread::sleep(time::Duration::from_millis(500)); + } +} diff --git a/crates/muvm/src/net.rs b/crates/muvm/src/net.rs index d6431bc6..985652b0 100644 --- a/crates/muvm/src/net.rs +++ b/crates/muvm/src/net.rs @@ -14,7 +14,7 @@ where Ok(UnixStream::connect(passt_socket_path)?) } -pub fn start_passt(server_port: u32) -> Result { +pub fn start_passt(server_port: u32, root_server_port: u32) -> Result { // SAFETY: The child process should not inherit the file descriptor of // `parent_socket`. There is no documented guarantee of this, but the // implementation as of writing atomically sets `SOCK_CLOEXEC`. @@ -40,7 +40,9 @@ pub fn start_passt(server_port: u32) -> Result { // See https://doc.rust-lang.org/std/io/index.html#io-safety let child = Command::new("passt") .args(["-q", "-f", "-t"]) - .arg(format!("{server_port}:{server_port}")) + .arg(format!( + "{server_port}:{server_port},{root_server_port}:{root_server_port}" + )) .arg("--fd") .arg(format!("{}", child_fd.into_raw_fd())) .spawn(); diff --git a/crates/muvm/src/server/bin/muvm-server.rs b/crates/muvm/src/server/bin/muvm-server.rs index e73dfe58..b5380911 100644 --- a/crates/muvm/src/server/bin/muvm-server.rs +++ b/crates/muvm/src/server/bin/muvm-server.rs @@ -1,31 +1,57 @@ +use std::env; use std::os::unix::process::ExitStatusExt as _; +use std::path::PathBuf; -use anyhow::Result; +use anyhow::{Context, Result}; use log::error; use muvm::server::cli_options::options; use muvm::server::worker::{State, Worker}; +use nix::unistd::geteuid; use tokio::net::TcpListener; use tokio::process::Command; use tokio::sync::watch; use tokio_stream::wrappers::WatchStream; use tokio_stream::StreamExt as _; +use uuid::Uuid; -#[tokio::main] -async fn main() -> Result<()> { +fn main() -> Result<()> { + let cookie = env::var("MUVM_SERVER_COOKIE") + .with_context(|| "Could find server cookie as an environment variable")?; + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { tokio_main(cookie).await }) +} + +async fn tokio_main(cookie: String) -> Result<()> { env_logger::init(); - let options = options().run(); + let cookie = Uuid::try_parse(&cookie).context("Couldn't parse cookie as UUID v7")?; + let uid: u32 = geteuid().into(); + + let (server_port, command, command_args) = if uid == 0 { + let server_port = if let Ok(server_port) = env::var("MUVM_ROOT_SERVER_PORT") { + server_port.parse()? + } else { + 3335 + }; + ( + server_port, + PathBuf::from("/bin/sleep"), + vec!["inf".to_string()], + ) + } else { + let options = options().run(); + (options.server_port, options.command, options.command_args) + }; - let listener = TcpListener::bind(format!("0.0.0.0:{}", options.server_port)).await?; + let listener = TcpListener::bind(format!("0.0.0.0:{}", server_port)).await?; let (state_tx, state_rx) = watch::channel(State::new()); let mut worker_handle = tokio::spawn(async move { - let mut worker = Worker::new(listener, state_tx); + let mut worker = Worker::new(cookie, listener, state_tx); worker.run().await; }); - let command_status = Command::new(&options.command) - .args(options.command_args) - .status(); + let command_status = Command::new(&command).args(command_args).status(); tokio::pin!(command_status); let mut state_rx = WatchStream::new(state_rx); @@ -53,12 +79,12 @@ async fn main() -> Result<()> { if let Some(code) = status.code() { eprintln!( "{:?} process exited with status code: {code}", - options.command + command ); } else { eprintln!( "{:?} process terminated by signal: {}", - options.command, + command, status .signal() .expect("either one of status code or signal should be set") @@ -69,7 +95,7 @@ async fn main() -> Result<()> { Err(err) => { eprintln!( "Failed to execute {:?} as child process: {err}", - options.command + command ); }, } diff --git a/crates/muvm/src/server/worker.rs b/crates/muvm/src/server/worker.rs index 4a2a3e41..6330bfb8 100644 --- a/crates/muvm/src/server/worker.rs +++ b/crates/muvm/src/server/worker.rs @@ -1,6 +1,8 @@ use std::collections::HashMap; +use std::fs::File; +use std::io::Write; use std::os::unix::process::ExitStatusExt as _; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::process::{ExitStatus, Stdio}; use std::{env, io}; @@ -13,12 +15,19 @@ use tokio::sync::watch; use tokio::task::{JoinError, JoinSet}; use tokio_stream::wrappers::TcpListenerStream; use tokio_stream::StreamExt as _; +use uuid::Uuid; use crate::utils::launch::Launch; use crate::utils::stdio::make_stdout_stderr; +pub enum ConnRequest { + DropCaches, + ExecuteCommand { command: PathBuf, child: Child }, +} + #[derive(Debug)] pub struct Worker { + cookie: Uuid, listener_stream: TcpListenerStream, state_tx: watch::Sender, child_set: JoinSet<(PathBuf, ChildResult)>, @@ -33,8 +42,9 @@ pub struct State { type ChildResult = Result; impl Worker { - pub fn new(listener: TcpListener, state_tx: watch::Sender) -> Self { + pub fn new(cookie: Uuid, listener: TcpListener, state_tx: watch::Sender) -> Self { Worker { + cookie, listener_stream: TcpListenerStream::new(listener), state_tx, child_set: JoinSet::new(), @@ -56,10 +66,13 @@ impl Worker { }; let stream = BufStream::new(stream); - match handle_connection(stream).await { - Ok((command, mut child)) => { - self.child_set.spawn(async move { (command, child.wait().await) }); - self.set_child_processes(self.child_set.len()); + match handle_connection(self.cookie, stream).await { + Ok(request) => match request { + ConnRequest::DropCaches => {}, + ConnRequest::ExecuteCommand {command, mut child } => { + self.child_set.spawn(async move { (command, child.wait().await) }); + self.set_child_processes(self.child_set.len()); + } }, Err(err) => { eprintln!("Failed to process client request: {err:?}"); @@ -158,15 +171,42 @@ async fn read_request(stream: &mut BufStream) -> Result { } } -async fn handle_connection(mut stream: BufStream) -> Result<(PathBuf, Child)> { +async fn handle_connection( + server_cookie: Uuid, + mut stream: BufStream, +) -> Result { let mut envs: HashMap = env::vars().collect(); let Launch { + cookie, command, command_args, env, } = read_request(&mut stream).await?; debug!(command:?, command_args:?, env:?; "received launch request"); + if cookie != server_cookie { + debug!("invalid cookie in launch request"); + let msg = "Invalid cookie"; + stream.write_all(msg.as_bytes()).await.ok(); + stream.flush().await.ok(); + return Err(anyhow!(msg)); + } + + if command == Path::new("/muvmdropcaches") { + let mut file = File::options() + .write(true) + .open("/proc/sys/vm/drop_caches") + .context("Failed to open /proc/sys/vm/drop_caches for writing")?; + + { + file.write_all(b"1") + .context("Failed to write to /proc/sys/vm/drop_caches")?; + } + stream.write_all(b"OK").await.ok(); + stream.flush().await.ok(); + return Ok(ConnRequest::DropCaches); + } + envs.extend(env); let (stdout, stderr) = make_stdout_stderr(&command, &envs)?; @@ -187,5 +227,5 @@ async fn handle_connection(mut stream: BufStream) -> Result<(PathBuf, } stream.flush().await.ok(); - res.map(|child| (command, child)) + res.map(|child| ConnRequest::ExecuteCommand { command, child }) } diff --git a/crates/muvm/src/utils/launch.rs b/crates/muvm/src/utils/launch.rs index 78a3dcf4..1a17bfbe 100644 --- a/crates/muvm/src/utils/launch.rs +++ b/crates/muvm/src/utils/launch.rs @@ -2,9 +2,11 @@ use std::collections::HashMap; use std::path::PathBuf; use serde::{Deserialize, Serialize}; +use uuid::Uuid; #[derive(Clone, Eq, PartialEq, Debug, Deserialize, Serialize)] pub struct Launch { + pub cookie: Uuid, pub command: PathBuf, pub command_args: Vec, pub env: HashMap,