Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion crates/sandlock-core/src/netlink/handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,27 @@ use crate::netlink::{proxy, state::NetlinkState};
use crate::seccomp::notif::{read_child_mem, write_child_mem, NotifAction, OnInjectSuccess};
use crate::sys::structs::SeccompNotif;

const AF_UNIX: u64 = 1;
const AF_INET: u64 = 2;
const AF_INET6: u64 = 10;
const AF_NETLINK: u64 = 16;
const NETLINK_ROUTE: u64 = 0;

/// Socket families allowed to reach the kernel. Everything else returns
/// EAFNOSUPPORT — the same errno the kernel itself uses for unknown
/// families, so callers see a normal "not supported" error rather than a
/// sandbox-flavored one.
///
/// The set is intentionally tiny: an XOA agent has no legitimate need for
/// AF_ALG, AF_PACKET, AF_VSOCK, AF_XDP, AF_TIPC, AF_RDS, AF_BLUETOOTH, and
/// the rest of the niche families that have historically yielded LPEs
/// (Copy Fail / CVE-2026-31431 via AF_ALG, Dirty Pipe-adjacent splice
/// primitives, AF_PACKET PACKET_MMAP UAFs, etc.). Closing the surface
/// once is cheaper than chasing one CVE per family.
fn family_allowed(domain: u64) -> bool {
matches!(domain, AF_UNIX | AF_INET | AF_INET6 | AF_NETLINK)
}

/// Resolve `notif.pid` (which is a TID per the kernel's `task_pid_vnr`) to
/// the enclosing thread group id. fds are shared across all threads of a
/// process, so cookie entries must be keyed by TGID — otherwise a cookie
Expand Down Expand Up @@ -56,14 +74,18 @@ fn read_struct<T: Copy>(
/// Intercept `socket(AF_NETLINK, *, NETLINK_ROUTE)` and substitute one end
/// of a `socketpair(AF_UNIX, SOCK_SEQPACKET)`. A tokio task takes the
/// supervisor-side end and speaks synthesized NETLINK_ROUTE replies.
/// Other domains pass through; other netlink protocols are denied.
/// Allowed domains pass through; AF_NETLINK is virtualized; everything
/// else (and non-NETLINK_ROUTE netlink protocols) returns EAFNOSUPPORT.
pub async fn handle_socket(
notif: &SeccompNotif,
state: &Arc<NetlinkState>,
) -> NotifAction {
let domain = notif.data.args[0];
let protocol = notif.data.args[2];

if !family_allowed(domain) {
return NotifAction::Errno(libc::EAFNOSUPPORT);
}
if domain != AF_NETLINK {
return NotifAction::Continue;
}
Expand Down
75 changes: 75 additions & 0 deletions crates/sandlock-core/tests/integration/test_netlink_virt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,81 @@ async fn sys_class_net_blocked() {
assert!(result.success());
}

/// Regression for Copy Fail (CVE-2026-31431). The exploit's first step is
/// `socket(AF_ALG, SOCK_SEQPACKET, 0)`, then `bind()` to a sockaddr_alg
/// naming "authencesn(hmac(sha256),cbc(aes))". If `socket()` is denied
/// with EAFNOSUPPORT the page-cache corruption primitive is unreachable.
#[tokio::test]
async fn af_alg_socket_blocked() {
let out = temp_out("af-alg-blocked");
let script = format!(concat!(
"import socket, errno\n",
"AF_ALG = 38\n",
"try:\n",
" s = socket.socket(AF_ALG, socket.SOCK_SEQPACKET, 0)\n",
" s.close()\n",
" result = 'ALLOWED'\n",
"except OSError as e:\n",
" result = f'BLOCKED:{{e.errno}}'\n",
"open('{out}', 'w').write(result)\n",
), out = out.display());

let policy = base_policy().build().unwrap();
let result = Sandbox::run_interactive(&policy, Some("test"), &["python3", "-c", &script])
.await.unwrap();

let contents = std::fs::read_to_string(&out).unwrap_or_default();
let _ = std::fs::remove_file(&out);
// EAFNOSUPPORT == 97 on Linux. We assert the exact errno so a future
// accidental switch to EPERM/EACCES (which would surface differently
// to callers) is caught.
assert_eq!(
contents, "BLOCKED:97",
"AF_ALG socket() must return EAFNOSUPPORT, got: {contents}"
);
assert!(result.success());
}

/// Other niche socket families — same threat model as AF_ALG (kernel LPE
/// surface that XOA agents have no business reaching). AF_ALG has its own
/// dedicated test above; this one guards the broader class.
#[tokio::test]
async fn niche_socket_families_blocked() {
// (name, AF_* numeric value)
let families: &[(&str, i32)] = &[
("AF_PACKET", 17), // PACKET_MMAP has had UAFs
("AF_VSOCK", 40), // recurring use-after-frees
("AF_XDP", 44),
("AF_TIPC", 30),
];

for (name, af) in families {
let out = temp_out(&format!("family-blocked-{}", name));
let script = format!(concat!(
"import socket\n",
"try:\n",
" s = socket.socket({af}, socket.SOCK_RAW, 0)\n",
" s.close()\n",
" result = 'ALLOWED'\n",
"except OSError as e:\n",
" result = f'BLOCKED:{{e.errno}}'\n",
"open('{out}', 'w').write(result)\n",
), af = af, out = out.display());

let policy = base_policy().build().unwrap();
let result = Sandbox::run_interactive(&policy, Some("test"), &["python3", "-c", &script])
.await.unwrap();

let contents = std::fs::read_to_string(&out).unwrap_or_default();
let _ = std::fs::remove_file(&out);
assert!(
contents.starts_with("BLOCKED:"),
"{name} should be blocked, got: {contents}"
);
assert!(result.success());
}
}

#[tokio::test]
async fn non_route_netlink_still_blocked() {
let out = temp_out("netlink-audit-blocked");
Expand Down
Loading