diff --git a/.gitignore b/.gitignore index ddd49b82..482ca346 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,8 @@ generated*/ */__pycache__ .idea .vscode -venv \ No newline at end of file +venv + +# for clangd +compile_commands.json +.cache diff --git a/Makefile b/Makefile index 6cb9e005..1961b96a 100644 --- a/Makefile +++ b/Makefile @@ -76,7 +76,7 @@ LDFLAGS := $(DBGFLAGS) -lz3 -lpthread -lz ## Building Targets ######################################################################## -.PHONY: clean all lib fgen pgen bins gen-func-set gen-func-set-check-ubs gen-prog-set gen-prog-set-check +.PHONY: clean all lib fgen pgen bins gen-func-set gen-func-set-check-ubs gen-prog-set gen-prog-set-check bpf_test all: lib bins @@ -107,11 +107,17 @@ $(BIN_DIR)/pgen: $(LIB_OBJ) $(OBJ_DIR)/prog_gen.o @mkdir -p $(dir $@) $(CXX) -o $@ $^ $(LDFLAGS) +$(BIN_DIR)/bpf_test: $(LIB_OBJ) $(OBJ_DIR)/bpf_test.o + @mkdir -p $(dir $@) + $(CXX) -o $@ $^ $(LDFLAGS) + fgen: $(BIN_DIR)/fgen pgen: $(BIN_DIR)/pgen -bins: fgen pgen +bpf_test: $(BIN_DIR)/bpf_test + +bins: fgen pgen bpf_test ######################################################################## diff --git a/include/global.hpp b/include/global.hpp index 04719255..e57ff776 100644 --- a/include/global.hpp +++ b/include/global.hpp @@ -427,6 +427,10 @@ static std::filesystem::path GetProgramsDir(const std::filesystem::path &output) return output / "programs"; } +static std::filesystem::path GeteBPFDir(const std::filesystem::path &output) { + return output / "ebpf_progs"; +} + static std::string GetFunctionName(const std::string &uuid, const std::string &sno) { return std::string(FUNCTION_NAME_PREFIX) + "_" + uuid + "_" + sno; } @@ -489,6 +493,15 @@ static std::filesystem::path GetProgramPath( return GetProgramsDir(output) / GetProgramNameForFunctionName(GetFunctionName(uuid, sno)); } +static std::string GeteBPFProgramNameForFunctionName(const std::string &functionName) { + return functionName.substr(std::string(FUNCTION_NAME_PREFIX).size() + 1) + ".bpf"; +} + +static std::filesystem::path +GeteBPFPath(const std::string &uuid, const std::string &sno, const std::filesystem::path &output) { + return GeteBPFDir(output) / GeteBPFProgramNameForFunctionName(GetFunctionName(uuid, sno)); +} + static std::filesystem::path GetGetProgramPathPathForFunctionPath(const std::filesystem::path &functionPath) { return GetProgramsDir(functionPath.parent_path().parent_path()) / diff --git a/include/lib/bpf/bpf.h b/include/lib/bpf/bpf.h new file mode 100644 index 00000000..0c7586af --- /dev/null +++ b/include/lib/bpf/bpf.h @@ -0,0 +1,920 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _UAPI__LINUX_BPF_H__ +#define _UAPI__LINUX_BPF_H__ + +#include + +#include "lib/bpf/bpf_common.h" + +/* Extended instruction set based on top of classic BPF */ + +/* instruction classes */ +#define BPF_JMP32 0x06 /* jmp mode in word width */ +#define BPF_ALU64 0x07 /* alu mode in double word width */ + +/* ld/ldx fields */ +#define BPF_DW 0x18 /* double word (64-bit) */ +#define BPF_MEMSX 0x80 /* load with sign extension */ +#define BPF_ATOMIC 0xc0 /* atomic memory ops - op type in immediate */ +#define BPF_XADD 0xc0 /* exclusive add - legacy name */ + +/* alu/jmp fields */ +#define BPF_MOV 0xb0 /* mov reg to reg */ +#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ + +/* change endianness of a register */ +#define BPF_END 0xd0 /* flags for endianness conversion: */ +#define BPF_TO_LE 0x00 /* convert to little-endian */ +#define BPF_TO_BE 0x08 /* convert to big-endian */ +#define BPF_FROM_LE BPF_TO_LE +#define BPF_FROM_BE BPF_TO_BE + +/* jmp encodings */ +#define BPF_JNE 0x50 /* jump != */ +#define BPF_JLT 0xa0 /* LT is unsigned, '<' */ +#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */ +#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ +#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_JSLT 0xc0 /* SLT is signed, '<' */ +#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ +#define BPF_JCOND 0xe0 /* conditional pseudo jumps: may_goto, goto_or_nop */ +#define BPF_CALL 0x80 /* function call */ +#define BPF_EXIT 0x90 /* function return */ + +/* atomic op type fields (stored in immediate) */ +#define BPF_FETCH 0x01 /* not an opcode on its own, used to build others */ +#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ +#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ + +#define BPF_LOAD_ACQ 0x100 /* load-acquire */ +#define BPF_STORE_REL 0x110 /* store-release */ + +enum bpf_cond_pseudo_jmp { + BPF_MAY_GOTO = 0, +}; + + +/* When BPF ldimm64's insn[0].src_reg != 0 then this can have + * the following extensions: + * + * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX] + * insn[0].imm: map fd or fd_idx + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map + * verifier type: CONST_PTR_TO_MAP + */ + #define BPF_PSEUDO_MAP_FD 1 + #define BPF_PSEUDO_MAP_IDX 5 + + /* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE + * insn[0].imm: map fd or fd_idx + * insn[1].imm: offset into value + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map[0]+offset + * verifier type: PTR_TO_MAP_VALUE + */ + #define BPF_PSEUDO_MAP_VALUE 2 + #define BPF_PSEUDO_MAP_IDX_VALUE 6 + + /* insn[0].src_reg: BPF_PSEUDO_BTF_ID + * insn[0].imm: kernel btd id of VAR + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the kernel variable + * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var + * is struct/union. + */ + #define BPF_PSEUDO_BTF_ID 3 + /* insn[0].src_reg: BPF_PSEUDO_FUNC + * insn[0].imm: insn offset to the func + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the function + * verifier type: PTR_TO_FUNC. + */ + #define BPF_PSEUDO_FUNC 4 + + /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative + * offset to another bpf function + */ + #define BPF_PSEUDO_CALL 1 + /* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL, + * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel + */ + #define BPF_PSEUDO_KFUNC_CALL 2 + +/* Register numbers */ +enum { + BPF_REG_0 = 0, + BPF_REG_1, + BPF_REG_2, + BPF_REG_3, + BPF_REG_4, + BPF_REG_5, + BPF_REG_6, + BPF_REG_7, + BPF_REG_8, + BPF_REG_9, + BPF_REG_10, + __MAX_BPF_REG, +}; + +/* BPF has 10 general purpose 64-bit registers and stack frame. */ +#define MAX_BPF_REG __MAX_BPF_REG + +struct bpf_insn { + __u8 code; /* opcode */ + __u8 dst_reg:4; /* dest register */ + __u8 src_reg:4; /* source register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ +}; + +enum bpf_cmd { + BPF_MAP_CREATE, + BPF_MAP_LOOKUP_ELEM, + BPF_MAP_UPDATE_ELEM, + BPF_MAP_DELETE_ELEM, + BPF_MAP_GET_NEXT_KEY, + BPF_PROG_LOAD, + BPF_OBJ_PIN, + BPF_OBJ_GET, + BPF_PROG_ATTACH, + BPF_PROG_DETACH, + BPF_PROG_TEST_RUN, + BPF_PROG_RUN = BPF_PROG_TEST_RUN, + BPF_PROG_GET_NEXT_ID, + BPF_MAP_GET_NEXT_ID, + BPF_PROG_GET_FD_BY_ID, + BPF_MAP_GET_FD_BY_ID, + BPF_OBJ_GET_INFO_BY_FD, + BPF_PROG_QUERY, + BPF_RAW_TRACEPOINT_OPEN, + BPF_BTF_LOAD, + BPF_BTF_GET_FD_BY_ID, + BPF_TASK_FD_QUERY, + BPF_MAP_LOOKUP_AND_DELETE_ELEM, + BPF_MAP_FREEZE, + BPF_BTF_GET_NEXT_ID, + BPF_MAP_LOOKUP_BATCH, + BPF_MAP_LOOKUP_AND_DELETE_BATCH, + BPF_MAP_UPDATE_BATCH, + BPF_MAP_DELETE_BATCH, + BPF_LINK_CREATE, + BPF_LINK_UPDATE, + BPF_LINK_GET_FD_BY_ID, + BPF_LINK_GET_NEXT_ID, + BPF_ENABLE_STATS, + BPF_ITER_CREATE, + BPF_LINK_DETACH, + BPF_PROG_BIND_MAP, + BPF_TOKEN_CREATE, + BPF_PROG_STREAM_READ_BY_FD, + __MAX_BPF_CMD, +}; + +enum bpf_prog_type { + BPF_PROG_TYPE_UNSPEC, + BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_KPROBE, + BPF_PROG_TYPE_SCHED_CLS, + BPF_PROG_TYPE_SCHED_ACT, + BPF_PROG_TYPE_TRACEPOINT, + BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_PERF_EVENT, + BPF_PROG_TYPE_CGROUP_SKB, + BPF_PROG_TYPE_CGROUP_SOCK, + BPF_PROG_TYPE_LWT_IN, + BPF_PROG_TYPE_LWT_OUT, + BPF_PROG_TYPE_LWT_XMIT, + BPF_PROG_TYPE_SOCK_OPS, + BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_CGROUP_DEVICE, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_RAW_TRACEPOINT, + BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_PROG_TYPE_LWT_SEG6LOCAL, + BPF_PROG_TYPE_LIRC_MODE2, + BPF_PROG_TYPE_SK_REUSEPORT, + BPF_PROG_TYPE_FLOW_DISSECTOR, + BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + BPF_PROG_TYPE_CGROUP_SOCKOPT, + BPF_PROG_TYPE_TRACING, + BPF_PROG_TYPE_STRUCT_OPS, + BPF_PROG_TYPE_EXT, + BPF_PROG_TYPE_LSM, + BPF_PROG_TYPE_SK_LOOKUP, + BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ + BPF_PROG_TYPE_NETFILTER, + __MAX_BPF_PROG_TYPE +}; + +#define BPF_OBJ_NAME_LEN 16U + +union bpf_attr { + struct { /* anonymous struct used by BPF_MAP_CREATE command */ + __u32 map_type; /* one of enum bpf_map_type */ + __u32 key_size; /* size of key in bytes */ + __u32 value_size; /* size of value in bytes */ + __u32 max_entries; /* max number of entries in a map */ + __u32 map_flags; /* BPF_MAP_CREATE related + * flags defined above. + */ + __u32 inner_map_fd; /* fd pointing to the inner map */ + __u32 numa_node; /* numa node (effective only if + * BPF_F_NUMA_NODE is set). + */ + char map_name[BPF_OBJ_NAME_LEN]; + __u32 map_ifindex; /* ifindex of netdev to create on */ + __u32 btf_fd; /* fd pointing to a BTF type data */ + __u32 btf_key_type_id; /* BTF type_id of the key */ + __u32 btf_value_type_id; /* BTF type_id of the value */ + __u32 btf_vmlinux_value_type_id;/* BTF type_id of a kernel- + * struct stored as the + * map value + */ + /* Any per-map-type extra fields + * + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the + * number of hash functions (if 0, the bloom filter will default + * to using 5 hash functions). + * + * BPF_MAP_TYPE_ARENA - contains the address where user space + * is going to mmap() the arena. It has to be page aligned. + */ + __u64 map_extra; + + __s32 value_type_btf_obj_fd; /* fd pointing to a BTF + * type data for + * btf_vmlinux_value_type_id. + */ + /* BPF token FD to use with BPF_MAP_CREATE operation. + * If provided, map_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 map_token_fd; + }; + + struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */ + __u32 map_fd; + __aligned_u64 key; + union { + __aligned_u64 value; + __aligned_u64 next_key; + }; + __u64 flags; + }; + + struct { /* struct used by BPF_MAP_*_BATCH commands */ + __aligned_u64 in_batch; /* start batch, + * NULL to start from beginning + */ + __aligned_u64 out_batch; /* output: next start batch */ + __aligned_u64 keys; + __aligned_u64 values; + __u32 count; /* input/output: + * input: # of key/value + * elements + * output: # of filled elements + */ + __u32 map_fd; + __u64 elem_flags; + __u64 flags; + } batch; + + struct { /* anonymous struct used by BPF_PROG_LOAD command */ + __u32 prog_type; /* one of enum bpf_prog_type */ + __u32 insn_cnt; + __aligned_u64 insns; + __aligned_u64 license; + __u32 log_level; /* verbosity level of verifier */ + __u32 log_size; /* size of user buffer */ + __aligned_u64 log_buf; /* user supplied buffer */ + __u32 kern_version; /* not used */ + __u32 prog_flags; + char prog_name[BPF_OBJ_NAME_LEN]; + __u32 prog_ifindex; /* ifindex of netdev to prep for */ + /* For some prog types expected attach type must be known at + * load time to verify attach type specific parts of prog + * (context accesses, allowed helpers, etc). + */ + __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ + __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ + union { + /* valid prog_fd to attach to bpf prog */ + __u32 attach_prog_fd; + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; + __u32 core_relo_cnt; /* number of bpf_core_relo */ + __aligned_u64 fd_array; /* array of FDs */ + __aligned_u64 core_relos; + __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 log_true_size; + /* BPF token FD to use with BPF_PROG_LOAD operation. + * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 prog_token_fd; + /* The fd_array_cnt can be used to pass the length of the + * fd_array array. In this case all the [map] file descriptors + * passed in this array will be bound to the program, even if + * the maps are not referenced directly. The functionality is + * similar to the BPF_PROG_BIND_MAP syscall, but maps can be + * used by the verifier during the program load. If provided, + * then the fd_array[0,...,fd_array_cnt-1] is expected to be + * continuous. + */ + __u32 fd_array_cnt; + }; + + struct { /* anonymous struct used by BPF_OBJ_* commands */ + __aligned_u64 pathname; + __u32 bpf_fd; + __u32 file_flags; + /* Same as dirfd in openat() syscall; see openat(2) + * manpage for details of path FD and pathname semantics; + * path_fd should accompanied by BPF_F_PATH_FD flag set in + * file_flags field, otherwise it should be set to zero; + * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed. + */ + __s32 path_fd; + }; + + struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ + union { + __u32 target_fd; /* target object to attach to or ... */ + __u32 target_ifindex; /* target ifindex */ + }; + __u32 attach_bpf_fd; + __u32 attach_type; + __u32 attach_flags; + __u32 replace_bpf_fd; + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + }; + + struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ + __u32 prog_fd; + __u32 retval; + __u32 data_size_in; /* input: len of data_in */ + __u32 data_size_out; /* input/output: len of data_out + * returns ENOSPC if data_out + * is too small. + */ + __aligned_u64 data_in; + __aligned_u64 data_out; + __u32 repeat; + __u32 duration; + __u32 ctx_size_in; /* input: len of ctx_in */ + __u32 ctx_size_out; /* input/output: len of ctx_out + * returns ENOSPC if ctx_out + * is too small. + */ + __aligned_u64 ctx_in; + __aligned_u64 ctx_out; + __u32 flags; + __u32 cpu; + __u32 batch_size; + } test; + + struct { /* anonymous struct used by BPF_*_GET_*_ID */ + union { + __u32 start_id; + __u32 prog_id; + __u32 map_id; + __u32 btf_id; + __u32 link_id; + }; + __u32 next_id; + __u32 open_flags; + __s32 fd_by_id_token_fd; + }; + + struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ + __u32 bpf_fd; + __u32 info_len; + __aligned_u64 info; + } info; + + struct { /* anonymous struct used by BPF_PROG_QUERY command */ + union { + __u32 target_fd; /* target object to query or ... */ + __u32 target_ifindex; /* target ifindex */ + }; + __u32 attach_type; + __u32 query_flags; + __u32 attach_flags; + __aligned_u64 prog_ids; + union { + __u32 prog_cnt; + __u32 count; + }; + __u32 :32; + /* output: per-program attach_flags. + * not allowed to be set during effective query. + */ + __aligned_u64 prog_attach_flags; + __aligned_u64 link_ids; + __aligned_u64 link_attach_flags; + __u64 revision; + } query; + + struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ + __u64 name; + __u32 prog_fd; + __u32 :32; + __aligned_u64 cookie; + } raw_tracepoint; + + struct { /* anonymous struct for BPF_BTF_LOAD */ + __aligned_u64 btf; + __aligned_u64 btf_log_buf; + __u32 btf_size; + __u32 btf_log_size; + __u32 btf_log_level; + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 btf_log_true_size; + __u32 btf_flags; + /* BPF token FD to use with BPF_BTF_LOAD operation. + * If provided, btf_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 btf_token_fd; + }; + + struct { + __u32 pid; /* input: pid */ + __u32 fd; /* input: fd */ + __u32 flags; /* input: flags */ + __u32 buf_len; /* input/output: buf len */ + __aligned_u64 buf; /* input/output: + * tp_name for tracepoint + * symbol for kprobe + * filename for uprobe + */ + __u32 prog_id; /* output: prod_id */ + __u32 fd_type; /* output: BPF_FD_TYPE_* */ + __u64 probe_offset; /* output: probe_offset */ + __u64 probe_addr; /* output: probe_addr */ + } task_fd_query; + + struct { /* struct used by BPF_LINK_CREATE command */ + union { + __u32 prog_fd; /* eBPF program to attach */ + __u32 map_fd; /* struct_ops to attach */ + }; + union { + __u32 target_fd; /* target object to attach to or ... */ + __u32 target_ifindex; /* target ifindex */ + }; + __u32 attach_type; /* attach type */ + __u32 flags; /* extra flags */ + union { + __u32 target_btf_id; /* btf_id of target to attach to */ + struct { + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ + }; + struct { + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 bpf_cookie; + } perf_event; + struct { + __u32 flags; + __u32 cnt; + __aligned_u64 syms; + __aligned_u64 addrs; + __aligned_u64 cookies; + } kprobe_multi; + struct { + /* this is overlaid with the target_btf_id above. */ + __u32 target_btf_id; + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 cookie; + } tracing; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } tcx; + struct { + __aligned_u64 path; + __aligned_u64 offsets; + __aligned_u64 ref_ctr_offsets; + __aligned_u64 cookies; + __u32 cnt; + __u32 flags; + __u32 pid; + } uprobe_multi; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } netkit; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } cgroup; + }; + } link_create; + + struct { /* struct used by BPF_LINK_UPDATE command */ + __u32 link_fd; /* link fd */ + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; + __u32 flags; /* extra flags */ + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; + } link_update; + + struct { + __u32 link_fd; + } link_detach; + + struct { /* struct used by BPF_ENABLE_STATS command */ + __u32 type; + } enable_stats; + + struct { /* struct used by BPF_ITER_CREATE command */ + __u32 link_fd; + __u32 flags; + } iter_create; + + struct { /* struct used by BPF_PROG_BIND_MAP command */ + __u32 prog_fd; + __u32 map_fd; + __u32 flags; /* extra flags */ + } prog_bind_map; + + struct { /* struct used by BPF_TOKEN_CREATE command */ + __u32 flags; + __u32 bpffs_fd; + } token_create; + + struct { + __aligned_u64 stream_buf; + __u32 stream_buf_len; + __u32 stream_id; + __u32 prog_fd; + } prog_stream_read; + +} __attribute__((aligned(8))); + +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of + * programs for a cgroup. Though it's possible to replace an old program at + * any position by also specifying BPF_F_REPLACE flag and position itself in + * replace_bpf_fd attribute. Old program at this position will be released. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. + */ +#define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) +/* Generic attachment flags. */ +#define BPF_F_REPLACE (1U << 2) +#define BPF_F_BEFORE (1U << 3) +#define BPF_F_AFTER (1U << 4) +#define BPF_F_ID (1U << 5) +#define BPF_F_PREORDER (1U << 6) +#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */ + +/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the + * verifier will perform strict alignment checking as if the kernel + * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, + * and NET_IP_ALIGN defined to 2. + */ +#define BPF_F_STRICT_ALIGNMENT (1U << 0) + +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROG_LOAD command, the + * verifier will allow any alignment whatsoever. On platforms + * with strict alignment requirements for loads ands stores (such + * as sparc and mips) the verifier validates that all loads and + * stores provably follow this requirement. This flag turns that + * checking and enforcement off. + * + * It is mostly used for testing when we want to validate the + * context and memory access aspects of the verifier, but because + * of an unaligned access the alignment check would trigger before + * the one we are interested in. + */ +#define BPF_F_ANY_ALIGNMENT (1U << 1) + +/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose. + * Verifier does sub-register def/use analysis and identifies instructions whose + * def only matters for low 32-bit, high 32-bit is never referenced later + * through implicit zero extension. Therefore verifier notifies JIT back-ends + * that it is safe to ignore clearing high 32-bit for these instructions. This + * saves some back-ends a lot of code-gen. However such optimization is not + * necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends + * hence hasn't used verifier's analysis result. But, we really want to have a + * way to be able to verify the correctness of the described optimization on + * x86_64 on which testsuites are frequently exercised. + * + * So, this flag is introduced. Once it is set, verifier will randomize high + * 32-bit for those instructions who has been identified as safe to ignore them. + * Then, if verifier is not doing correct analysis, such randomization will + * regress tests to expose bugs. + */ +#define BPF_F_TEST_RND_HI32 (1U << 2) + +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_STATE_FREQ (1U << 3) + +/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will + * restrict map and helper usage for such programs. Sleepable BPF programs can + * only be attached to hooks where kernel execution context allows sleeping. + * Such programs are allowed to use helpers that may sleep like + * bpf_copy_from_user(). + */ +#define BPF_F_SLEEPABLE (1U << 4) + +/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program + * fully support xdp frags. + */ +#define BPF_F_XDP_HAS_FRAGS (1U << 5) + +/* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded + * program becomes device-bound but can access XDP metadata. + */ +#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) + +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_REG_INVARIANTS (1U << 7) + +/* BPF program can access up to 512 bytes of stack space. */ +#define MAX_BPF_STACK 512 + +/* Helper macros for filter block array initializers. */ + +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ + +#define BPF_ALU64_REG_OFF(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +#define BPF_ALU64_REG(OP, DST, SRC) \ + BPF_ALU64_REG_OFF(OP, DST, SRC, 0) + +#define BPF_ALU32_REG_OFF(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +#define BPF_ALU32_REG(OP, DST, SRC) \ + BPF_ALU32_REG_OFF(OP, DST, SRC, 0) + +/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ + +#define BPF_ALU64_IMM_OFF(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) +#define BPF_ALU64_IMM(OP, DST, IMM) \ + BPF_ALU64_IMM_OFF(OP, DST, IMM, 0) + +#define BPF_ALU32_IMM_OFF(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) +#define BPF_ALU32_IMM(OP, DST, IMM) \ + BPF_ALU32_IMM_OFF(OP, DST, IMM, 0) + +/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ + +#define BPF_ENDIAN(TYPE, DST, LEN) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_END | BPF_SRC(TYPE), \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = LEN }) + +/* Byte Swap, bswap16/32/64 */ + +#define BPF_BSWAP(DST, LEN) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_END | BPF_SRC(BPF_TO_LE), \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = LEN }) + +/* Short form of mov, dst_reg = src_reg */ + +#define BPF_MOV64_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_MOV32_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_MOV64_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_MOV32_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ + +#define BPF_JMP_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ + +#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Unconditional jumps, goto pc + off16 */ + +#define BPF_JMP_A(OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = 0 }) + +/* Unconditional jumps, gotol pc + imm32 */ + +#define BPF_JMP32_A(IMM) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_EXIT_INSN() \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_EXIT, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + +/* Relative call */ +#define BPF_CALL_REL(TGT) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_CALL, \ + .dst_reg = 0, \ + .src_reg = BPF_PSEUDO_CALL, \ + .off = 0, \ + .imm = TGT }) + +#endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/include/lib/bpf/bpf_common.h b/include/lib/bpf/bpf_common.h new file mode 100644 index 00000000..ee97668b --- /dev/null +++ b/include/lib/bpf/bpf_common.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_BPF_COMMON_H__ +#define _UAPI__LINUX_BPF_COMMON_H__ + +/* Instruction classes */ +#define BPF_CLASS(code) ((code) & 0x07) +#define BPF_LD 0x00 +#define BPF_LDX 0x01 +#define BPF_ST 0x02 +#define BPF_STX 0x03 +#define BPF_ALU 0x04 +#define BPF_JMP 0x05 +#define BPF_RET 0x06 +#define BPF_MISC 0x07 + +/* ld/ldx fields */ +#define BPF_SIZE(code) ((code) & 0x18) +#define BPF_W 0x00 /* 32-bit */ +#define BPF_H 0x08 /* 16-bit */ +#define BPF_B 0x10 /* 8-bit */ +/* eBPF BPF_DW 0x18 64-bit */ +#define BPF_MODE(code) ((code) & 0xe0) +#define BPF_IMM 0x00 +#define BPF_ABS 0x20 +#define BPF_IND 0x40 +#define BPF_MEM 0x60 +#define BPF_LEN 0x80 +#define BPF_MSH 0xa0 + +/* alu/jmp fields */ +#define BPF_OP(code) ((code) & 0xf0) +#define BPF_ADD 0x00 +#define BPF_SUB 0x10 +#define BPF_MUL 0x20 +#define BPF_DIV 0x30 +#define BPF_OR 0x40 +#define BPF_AND 0x50 +#define BPF_LSH 0x60 +#define BPF_RSH 0x70 +#define BPF_NEG 0x80 +#define BPF_MOD 0x90 +#define BPF_XOR 0xa0 + +#define BPF_JA 0x00 +#define BPF_JEQ 0x10 +#define BPF_JGT 0x20 +#define BPF_JGE 0x30 +#define BPF_JSET 0x40 +#define BPF_SRC(code) ((code) & 0x08) +#define BPF_K 0x00 +#define BPF_X 0x08 + +#ifndef BPF_MAXINSNS +#define BPF_MAXINSNS 4096 +#endif + +#endif /* _UAPI__LINUX_BPF_COMMON_H__ */ diff --git a/include/lib/function.hpp b/include/lib/function.hpp index baef5361..70897d73 100644 --- a/include/lib/function.hpp +++ b/include/lib/function.hpp @@ -31,6 +31,7 @@ #include #include "jnif/jnif.hpp" +#include "lib/bpf/bpf.h" #include "lib/ctrlflow.hpp" #include "lib/dbgutils.hpp" #include "lib/lang.hpp" @@ -119,6 +120,11 @@ class FunPlus { // Parse the map of initialisation-finalisation and return them [[nodiscard]] static InitFinaMap ParseMappingCode(const std::string &mapPath); + // Generate the eBPF code of the function for a given execution + // If `insns` is provided, the code is appended to it; otherwise return a new vector + std::vector + GenerateFuneBPFCode(const UBFreeExec &exec, std::vector *insns = nullptr) const; + private: // Generate a new basic block with random statements and symbols void generateBasicBlock(symir::FunctBuilder *funBd, int bblId, const BblSketch &bblSkt); diff --git a/include/lib/lowers.hpp b/include/lib/lowers.hpp index 9094f4bf..c449d8c3 100644 --- a/include/lib/lowers.hpp +++ b/include/lib/lowers.hpp @@ -28,6 +28,7 @@ #include #include "jnif/jnif.hpp" +#include "lib/bpf/bpf.h" #include "lib/lang.hpp" #define SYMIR_LOWER_INDENTATION_SIZE 2 @@ -202,9 +203,92 @@ namespace symir { jnif::ConstPool::NULLENTRY; // The method index in the constant pool const Funct *fun = nullptr; // The function that we're currently lowering std::map locals{}; // Map from variable names to local variable indices - std::map labels{ - }; // Map from block labels to bytecode labels + std::map + labels{}; // Map from block labels to bytecode labels }; + + /// Translates SymIR to eBPF bytecode with register allocation, arithmetic operations, + /// control flow handling, and oracle-based verification for bug detection. + class eBPFLower : public SymIRLower { + public: + explicit eBPFLower(std::vector &insns) : SymIRLower(devNull), insns(insns) {} + + protected: + void Visit(const VarUse &v) override; + void Visit(const Coef &c) override; + void Visit(const Term &t) override; + void Visit(const Expr &e) override; + void Visit(const Cond &c) override; + void Visit(const AssStmt &e) override; + void Visit(const RetStmt &r) override; + void Visit(const Branch &b) override; + void Visit(const Goto &g) override; + void Visit(const Param &p) override; + void Visit(const Local &l) override; + void Visit(const Block &b) override; + void Visit(const Funct &f) override; + + private: + using u32 = std::uint32_t; + using u16 = std::uint16_t; + using u8 = std::uint8_t; + + /* BPF has 11 regs, R0~R10: + * R0: return reg + * R1: ctx pointer + * R2~R5: other function parameters + * R6~R9: locals + * R10: stack pointer + * In this translation, we use: + * R2~R5: params + * R6~R7: locals + * R8: tmp reg (for the lower) + * R9: tmp reg + * So 6 regs are available (MAX_REG), AX0 and AX1 are tmps. + */ + static const u8 MAX_REG = 6; + static const u8 REG_AX0 = BPF_REG_8; // for imm result of term + static const u8 REG_AX1 = BPF_REG_9; // for expr + + template + u8 GetReg(const T *t, bool param = false) { + switch (t->GetType()) { + case SymIR::Type::I32: { + const auto name = t->GetName(); + if (regs.find(name) == regs.end()) { + Assert(regs.size() < MAX_REG, "Too many variables"); + u8 reg_n; + if (param) { + reg_n = preg_gen++; + Assert(reg_n <= BPF_REG_5, "Too many parameters"); + } else { + reg_n = lreg_gen++; + Assert(reg_n <= BPF_REG_7, "Too many locals"); + } + regs[name] = reg_n; + } + return regs[name]; + } + default: { + Panic("Unsupported var type"); + } + } + } + + void AddJmp(struct bpf_insn insn, const std::string &target) { + jmp_fixups[static_cast(insns.size())] = target; + insns.push_back(insn); + } + + std::vector &insns; + std::unordered_map regs; + u8 preg_gen = BPF_REG_2; // Next param reg, starting from R2 + u8 lreg_gen = BPF_REG_6; // Next local reg, starting from R6 + std::unordered_map labels; + std::unordered_map jmp_fixups; + }; + + } // namespace symir diff --git a/lib/function.cpp b/lib/function.cpp index d9a09302..9c5ab59b 100644 --- a/lib/function.cpp +++ b/lib/function.cpp @@ -444,3 +444,100 @@ FunPlus::InitFinaMap FunPlus::ParseMappingCode(const std::string &mapPath) { return InitFinaMap(std::move(initialisations), std::move(finalizations)); } + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnarrowing" + +std::vector FunPlus::GenerateFuneBPFCode( + const UBFreeExec &exec, std::vector *provided_insns_buf +) const { + using u8 = std::uint8_t; + using u32 = std::uint32_t; + + Assert(exec.GetOwner() == this, "The execution does not belong to this function!"); + const auto *fun = exec.GetFun(); + Assert(fun != nullptr, "Function is not generated yet!"); + + const auto &initializations = exec.GetInitializations(); + const auto &finalizations = exec.GetFinalizations(); + Assert( + initializations.size() == finalizations.size(), + "Initializations and finalizations must have the same size" + ); + // The ctx pointer of R1 is preserved, so we don't generate it. + Assert(initializations.size() <= 4, "Four parameters are supported for eBPF"); + + /* eBPF Program Layout: + * + * ╭─────────────────────────────────────────────────────────╮ + * │ MAIN FUNCTION │ + * ├─────────────────────────────────────────────────────────┤ + * │ R9 = 0 (counter initialization) │ + * │ │ + * │ ┌─ FOR EACH TEST CASE ─────────────────────────────┐ │ + * │ │ • Setup params (R2, R3, R4, R5) │ │ + * │ │ • CALL generated_func │ │ + * │ │ • Oracle: if (R0 != expected_csum) skip │ │ + * │ │ • R9++ (increment counter) │ │ + * │ └──────────────────────────────────────────────────┘ │ + * │ │ + * │ Final Oracle: │ + * │ • if (R9 != num_tests) exit(normal) │ + * │ • R10 = 0 (bug detected signal) │ + * │ • exit │ + * ├─────────────────────────────────────────────────────────┤ + * │ GENERATED FUNCTION │ + * ├─────────────────────────────────────────────────────────┤ + * │ Register Layout: │ + * │ • R2-R5: function parameters │ + * │ • R6-R7: local variables │ + * │ • R8 (AX0): temp for term results │ + * │ • R9 (AX1): temp for expression results │ + * │ │ + * │ Function Body (from SymIR lowering): │ + * │ • Param/Local initialization │ + * │ • Basic blocks with control flow │ + * │ • Arithmetic operations │ + * │ • Return: XOR checksum of finals in R0 │ + * ╰─────────────────────────────────────────────────────────╯ + */ + + std::vector local_insns_buf; + std::vector *prog = provided_insns_buf ? provided_insns_buf : &local_insns_buf; + + prog->push_back(BPF_MOV32_IMM(BPF_REG_9, 0)); + + std::vector call_fixups; + for (size_t i = 0; i < initializations.size(); i++) { + const auto &init = initializations[i]; + const auto &fina = finalizations[i]; + const auto numParams = static_cast(init.size()); + for (auto j = 0; j < numParams; j++) { + prog->push_back(BPF_MOV32_IMM(BPF_REG_2 + j, init[j])); + } + call_fixups.push_back(prog->size()); + prog->push_back(BPF_CALL_REL(0)); + + u32 csum = 0; + for (auto x: fina) { + csum ^= x; + } + // counter + prog->push_back(BPF_JMP32_IMM(BPF_JNE, BPF_REG_0, csum, 1)); + prog->push_back(BPF_ALU32_IMM(BPF_ADD, BPF_REG_9, 1)); + } + // oracle + prog->push_back(BPF_JMP32_IMM(BPF_JNE, BPF_REG_9, initializations.size(), 1)); + prog->push_back(BPF_MOV32_IMM(BPF_REG_10, 0)); + prog->push_back(BPF_EXIT_INSN()); + + for (size_t i = 0; i < call_fixups.size(); i++) + prog->at(call_fixups[i]).imm = prog->size() - call_fixups[i] - 1; + + symir::eBPFLower lower(*prog); + lower.Lower(*fun); // append the real prog + + return local_insns_buf; // empty if `provided_insns_buf` exists +} + +#pragma GCC diagnostic pop diff --git a/lib/lowers.cpp b/lib/lowers.cpp index 4c261bb2..c620262b 100644 --- a/lib/lowers.cpp +++ b/lib/lowers.cpp @@ -24,6 +24,7 @@ // SOFTWARE. #include "lib/lowers.hpp" +#include "lib/bpf/bpf.h" #include "lib/chksum.hpp" #include "lib/logger.hpp" @@ -497,4 +498,187 @@ namespace symir { b->Accept(*this); } } + +/* C++ has the following strange warning, which is basically fp in C. + * Suppress it in the eBPFLower code area. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnarrowing" + + void eBPFLower::Visit(const VarUse &v) { Panic("Unreachable"); } + + void eBPFLower::Visit(const Coef &c) { + Assert(c.GetType() == SymIR::Type::I32, "Unsupported coefficient type"); + Assert(c.IsValueSet(), "Coefficient value is not set"); + } + + void eBPFLower::Visit(const Term &t) { + /* term := op coef var + * This: stores coef to AX0, and get the reg of var + * compute the result into AX0 + */ + + t.GetCoef()->Accept(*this); + int coef = std::stoi(t.GetCoef()->GetValue()); + insns.push_back(BPF_MOV32_IMM(REG_AX0, coef)); + + u8 op; + u16 off = 0; + switch (t.GetOp()) { + case Term::Op::OP_ADD: + op = BPF_ADD; + break; + case Term::Op::OP_SUB: + op = BPF_SUB; + break; + case Term::Op::OP_MUL: + op = BPF_MUL; + break; + case Term::Op::OP_DIV: + op = BPF_DIV; + off = 1; // BPF_SDIV + break; + case Term::Op::OP_REM: + op = BPF_MOD; + off = 1; // BPF_SMOD + break; + case Term::Op::OP_CST: + // Do nothing + return; + default: + Panic("Unsupported term type"); + } + if (t.GetVar() != nullptr) { + auto insn = BPF_ALU32_REG(op, REG_AX0, GetReg(t.GetVar())); + insn.off = off; + insns.push_back(insn); + } + } + + void eBPFLower::Visit(const Expr &e) { + /* expr := op term1 term2 ... termN + * This is computed by: + * ax1 = term1 + * for i = 2 to N: + * ax1 = op(ax1, termi) + */ + + e.GetTerm(0)->Accept(*this); + insns.push_back(BPF_MOV32_REG(REG_AX1, REG_AX0)); + + u8 op; + switch (e.GetOp()) { + case Expr::Op::OP_ADD: + op = BPF_ADD; + break; + case Expr::Op::OP_SUB: + op = BPF_SUB; + break; + default: + Panic("Unsupported expression type"); + } + + for (size_t i = 1; i < e.GetTerms().size(); ++i) { + e.GetTerm(i)->Accept(*this); + insns.push_back(BPF_ALU32_REG(op, REG_AX1, REG_AX0)); + } + } + + void eBPFLower::Visit(const Cond &c) { c.GetExpr()->Accept(*this); } + + void eBPFLower::Visit(const AssStmt &a) { + u8 reg = GetReg(a.GetVar()); + a.GetExpr()->Accept(*this); + insns.push_back(BPF_MOV32_REG(reg, REG_AX1)); + } + + void eBPFLower::Visit(const RetStmt &r) { + /* csum = v1 ^ v2 ^ ... ^ vN + * The below adds an oracle: + * if (csum == csum_computed_during_gen_exe) + * verifier_sink(); + * The sink is expected to be detected; otherwise, a false negative. + */ + + insns.push_back(BPF_MOV32_IMM(BPF_REG_0, 0)); + for (const auto &v: r.GetVars()) + insns.push_back(BPF_ALU32_REG(BPF_XOR, BPF_REG_0, GetReg(v))); + insns.push_back(BPF_EXIT_INSN()); + } + + void eBPFLower::Visit(const Branch &b) { + /* Jmp in this ir has two targets, while bpf only has one. + * For `br cond l0 l1`, we translate it to: + * jmp cond l0 + * jmp l1 + */ + + b.GetCond()->Accept(*this); + + u8 op; + switch (b.GetCond()->GetOp()) { + case Cond::Op::OP_GTZ: + op = BPF_JSGT; + break; + case Cond::Op::OP_LTZ: + op = BPF_JSLT; + break; + case Cond::Op::OP_EQZ: + op = BPF_JEQ; + break; + default: + Panic("Unsupported condition type"); + } + AddJmp(BPF_JMP32_IMM(op, REG_AX1, 0, 0), b.GetTrueTarget()); + AddJmp(BPF_JMP_A(0), b.GetFalseTarget()); + } + + void eBPFLower::Visit(const Goto &g) { AddJmp(BPF_JMP_A(0), g.GetTarget()); } + + void eBPFLower::Visit(const Param &p) { GetReg(&p, true); } + + void eBPFLower::Visit(const Local &l) { + l.GetCoef()->Accept(*this); + insns.push_back(BPF_MOV32_IMM(GetReg(&l), std::stoi(l.GetCoef()->GetValue()))); + } + + void eBPFLower::Visit(const Block &b) { + u32 insn_cnt_before = insns.size(); + + labels[b.GetLabel()] = insns.size(); + for (const auto &s: b.GetStmts()) { + s->Accept(*this); + } + Assert(insn_cnt_before != insns.size(), "Empty block"); + } + + void eBPFLower::Visit(const Funct &f) { + for (const auto &p: f.GetParams()) { + p->Accept(*this); + } + + for (const auto &l: f.GetLocals()) { + l->Accept(*this); + } + + for (const auto &b: f.GetBlocks()) { + b->Accept(*this); + } + + // Fix block jump offsets + for (const auto &[insn_off, target]: jmp_fixups) { + const auto br_off = labels.at(target); + Assert(br_off != insn_off, "Dead jmp"); + int off = static_cast(br_off) - static_cast(insn_off); + // eBPF jump offset is relative to the next instruction + off -= 1; + Assert(off >= INT16_MIN && off <= INT16_MAX, "Jump offset too large"); + insns[insn_off].off = off; + } + jmp_fixups.clear(); + } + +#pragma GCC diagnostic pop + + } // namespace symir diff --git a/scripts/vm_bpf_test.sh b/scripts/vm_bpf_test.sh new file mode 100755 index 00000000..2918f708 --- /dev/null +++ b/scripts/vm_bpf_test.sh @@ -0,0 +1,416 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Default configuration +DEFAULT_MEM="64G" +DEFAULT_CPUS="32" +DEFAULT_SSH_PORT="10080" +DEFAULT_SSH_USER="root" +DEFAULT_SSH_KEY="imgs/bookworm.id_rsa" +DEFAULT_VM_IMG="imgs/bookworm.img" +DEFAULT_KERNEL_PATH="imgs/bzImage" +DEFAULT_VM_FSD=$(which virtiofsd) +DEFAULT_BPF_TEST_PATH="build/bin/bpf_test" +DEFAULT_OUTPUT_DIR="bpf_test_output" +DEFAULT_SHARED_DIR="output" +DEFAULT_PROCS="8" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Logging functions +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +fatal() { + log_error "$1" + exit 1 +} + +trap 'fatal "Error on line $LINENO"' ERR + +# Function to show usage +show_usage() { + cat << EOF +Usage: $0 [OPTIONS] -- [BPF_TEST_ARGS...] + +VM-based eBPF testing script that runs bpf_test inside a QEMU virtual machine. + +OPTIONS: + -h, --help Show this help message + -m, --memory MEM VM memory size (default: $DEFAULT_MEM) + -c, --cpus CPUS Number of VM CPUs (default: $DEFAULT_CPUS) + -p, --ssh-port PORT SSH port for VM access (default: $DEFAULT_SSH_PORT) + -u, --ssh-user USER SSH user for VM access (default: $DEFAULT_SSH_USER) + -k, --ssh-key PATH SSH key for VM access (default: $DEFAULT_SSH_KEY) + -i, --image PATH VM disk image path (default: $DEFAULT_VM_IMG) + -K, --kernel PATH VM kernel path (default: $DEFAULT_KERNEL_PATH) + -f, --fsd PATH virtiofsd binary path (default: $DEFAULT_VM_FSD) + -b, --bpf-test PATH Path to bpf_test binary (default: $DEFAULT_BPF_TEST_PATH) + -s, --shared DIR Result directory shared with VM (default: $DEFAULT_SHARED_DIR) + -o, --output DIR Output directory for bpf_test in VM (default: $DEFAULT_OUTPUT_DIR) + -t, --timeout SEC Timeout for VM boot in seconds (default: 120) + -n, --procs NUM Number of processes to spawn: bpf_test --procs (default: $DEFAULT_PROCS) + +BPF_TEST_ARGS: + All arguments after -- are passed directly to bpf_test inside the VM. + +EXAMPLES: + # Basic usage with default settings + $0 + + # Custom VM configuration + $0 -m 16G -c 8 -i /path/to/vm.img -K /path/to/kernel + + # With custom bpf_test binary + $0 -b /path/to/bpf_test + +EOF +} + +# Function to wait for a condition +wait_for() { + local what="$1" file="$2" max="$3" + log_info "Waiting for $what..." + for ((i=1; i<=max; i++)); do + if [[ -e "$file" ]]; then + log_success "$what is ready" + return 0 + fi + sleep 1 + done + fatal "$what did not appear within ${max}s: $file" +} + +# Function to execute command in VM via SSH +_vmcmd() { + ssh -o ConnectTimeout=1 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + -p "$SSH_PORT" -i "$SSH_KEY" "$SSH_USER"@localhost "$@" +} + +_vmcopy() { + local src="$1" dst="$2" + scp -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + -P "$SSH_PORT" -i "$SSH_KEY" "$src" "$SSH_USER@localhost:$dst" +} + +_vmget() { + local src="$1" dst="$2" + scp -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + -P "$SSH_PORT" -i "$SSH_KEY" "$SSH_USER@localhost:$src" "$dst" +} + +# Parse command line arguments +MEM="$DEFAULT_MEM" +CPUS="$DEFAULT_CPUS" +SSH_PORT="$DEFAULT_SSH_PORT" +SSH_USER="$DEFAULT_SSH_USER" +SSH_KEY="$DEFAULT_SSH_KEY" +VM_IMG="$DEFAULT_VM_IMG" +KERNEL_PATH="$DEFAULT_KERNEL_PATH" +VM_FSD="$DEFAULT_VM_FSD" +BPF_TEST_PATH="$DEFAULT_BPF_TEST_PATH" +OUTPUT_DIR="$DEFAULT_OUTPUT_DIR" +SHARED_DIR="$DEFAULT_SHARED_DIR" +PROCS="$DEFAULT_PROCS" +TIMEOUT=120 +DEBUG=false + +# Parse options +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + show_usage + exit 0 + ;; + -m|--memory) + if [[ $# -lt 2 ]]; then fatal "Missing argument for $1"; fi + MEM="$2" + shift 2 + ;; + -c|--cpus) + if [[ $# -lt 2 ]]; then fatal "Missing argument for $1"; fi + CPUS="$2" + shift 2 + ;; + -p|--ssh-port) + if [[ $# -lt 2 ]]; then fatal "Missing argument for $1"; fi + SSH_PORT="$2" + shift 2 + ;; + -u|--ssh-user) + if [[ $# -lt 2 ]]; then fatal "Missing argument for $1"; fi + SSH_USER="$2" + shift 2 + ;; + -k|--ssh-key) + if [[ $# -lt 2 ]]; then fatal "Missing argument for $1"; fi + SSH_KEY="$2" + shift 2 + ;; + -i|--image) + if [[ $# -lt 2 ]]; then fatal "Missing argument for $1"; fi + VM_IMG="$2" + shift 2 + ;; + -K|--kernel) + if [[ $# -lt 2 ]]; then fatal "Missing argument for $1"; fi + KERNEL_PATH="$2" + shift 2 + ;; + -f|--fsd) + if [[ $# -lt 2 ]]; then fatal "Missing argument for $1"; fi + VM_FSD="$2" + shift 2 + ;; + -b|--bpf-test) + if [[ $# -lt 2 ]]; then fatal "Missing argument for $1"; fi + BPF_TEST_PATH="$2" + shift 2 + ;; + -o|--output) + if [[ $# -lt 2 ]]; then fatal "Missing argument for $1"; fi + OUTPUT_DIR="$2" + shift 2 + ;; + -s|--shared) + if [[ $# -lt 2 ]]; then fatal "Missing argument for $1"; fi + SHARED_DIR="$2" + shift 2 + ;; + -t|--timeout) + if [[ $# -lt 2 ]]; then fatal "Missing argument for $1"; fi + TIMEOUT="$2" + shift 2 + ;; + -n|--procs) + if [[ $# -lt 2 ]]; then fatal "Missing argument for $1"; fi + PROCS="$2" + shift 2 + ;; + --debug) + DEBUG=true + shift + ;; + --) + shift + break + ;; + *) + log_error "Unknown option: $1" + show_usage + exit 1 + ;; + esac +done + +# Store bpf_test arguments +BPF_TEST_ARGS=("$@") + +# Debug output +if [[ "$DEBUG" == true ]]; then + log_info "Configuration:" + log_info " Memory: $MEM" + log_info " CPUs: $CPUS" + log_info " SSH Port: $SSH_PORT" + log_info " SSH User: $SSH_USER" + log_info " SSH Key: $SSH_KEY" + log_info " VM Image: $VM_IMG" + log_info " Kernel: $KERNEL_PATH" + log_info " virtiofsd: $VM_FSD" + log_info " bpf_test: $BPF_TEST_PATH" + log_info " Output Dir: $OUTPUT_DIR" + log_info " Shared Dir: $SHARED_DIR" + log_info " Timeout: $TIMEOUT" + log_info " Procs: $PROCS" +fi + +# Check required files +require() { + if [[ ! -f "$1" ]]; then + fatal "$2 not found: $1" + fi +} + +# Validate required files +require "$VM_IMG" "VM image" +require "$KERNEL_PATH" "Kernel" +if [[ -z "$VM_FSD" ]]; then + fatal "virtiofsd not found in PATH" +fi +require "$VM_FSD" "virtiofsd" +require "$SSH_KEY" "SSH key" +require "$BPF_TEST_PATH" "bpf_test binary" +command -v qemu-system-x86_64 >/dev/null 2>&1 || fatal "qemu-system-x86_64 not found in PATH" +command -v ssh >/dev/null 2>&1 || fatal "ssh not found in PATH" +command -v scp >/dev/null 2>&1 || fatal "scp not found in PATH" + +mkdir -p "$SHARED_DIR" + +RESULT_DIR="$SHARED_DIR" +SOCK="$RESULT_DIR/bpf-test.sock" +VIRTIOFSD_PIDFILE="$RESULT_DIR/virtiofsd.pid" +VIRTIOFSD_LOG="$RESULT_DIR/virtiofsd.log" +VM_PIDFILE="$RESULT_DIR/vm.pid" +VM_LOG="$RESULT_DIR/vm.log" +BPF_TEST_RESULTS="$RESULT_DIR/$OUTPUT_DIR" +VM_BPF_TEST_OUTPUT="$OUTPUT_DIR/bpf_test.log" # bpf_test output in VM +BPF_TEST_OUTPUT="$RESULT_DIR/bpf_test.log" # bpf_test output in host + +# Create results directory +mkdir -p "$BPF_TEST_RESULTS" +if [[ ! -d "$BPF_TEST_RESULTS" ]]; then + fatal "Failed to create results directory: $BPF_TEST_RESULTS" +fi + +# Clean up any existing files +rm -f "$SOCK" "$VM_PIDFILE" "$VIRTIOFSD_PIDFILE" "$VIRTIOFSD_LOG" "$VM_LOG" + +log_info "Starting VM-based eBPF testing..." + +# Start virtiofsd +log_info "Starting virtiofsd..." +"$VM_FSD" --socket-path "$SOCK" --shared-dir "$SHARED_DIR" > "$VIRTIOFSD_LOG" 2>&1 & +VIRTIOFSD_PID=$! +echo "$VIRTIOFSD_PID" > "$VIRTIOFSD_PIDFILE" + +wait_for "virtiofsd socket" "$SOCK" 5 +[[ -S "$SOCK" ]] || fatal "virtiofsd did not create socket $SOCK" + +# Launch QEMU VM +log_info "Launching QEMU VM..." +qemu-system-x86_64 \ + -m "$MEM" \ + -smp "$CPUS" \ + -kernel "$KERNEL_PATH" \ + -append "console=ttyS0 root=/dev/sda earlyprintk=serial net.ifnames=0" \ + -drive file="$VM_IMG",format=raw \ + -net user,host=10.0.2.10,hostfwd=tcp:127.0.0.1:"$SSH_PORT"-:22 \ + -net nic,model=e1000 \ + -enable-kvm \ + -nographic \ + -pidfile "$VM_PIDFILE" \ + -object memory-backend-file,id=mem,size="$MEM",mem-path=/dev/shm,share=on \ + -numa node,memdev=mem \ + -chardev socket,id=char0,path="$SOCK" \ + -device vhost-user-fs-pci,queue-size=1024,chardev=char0,tag=bpf-test \ + -snapshot \ + > "$VM_LOG" 2>&1 & + +wait_for "QEMU PID file" "$VM_PIDFILE" 10 + +# Wait for VM to boot +log_info "Waiting for VM to boot..." +VM_BOOTED=false +for ((i=1; i<=TIMEOUT; i++)); do + echo -ne "\rWaiting for VM to boot $i/$TIMEOUT (s)..." + if _vmcmd "pwd" > /dev/null 2>&1; then + echo + log_success "VM is ready. SSH available on port $SSH_PORT." + VM_BOOTED=true + break + fi + sleep 1 +done + +if [[ "$VM_BOOTED" != true ]]; then + echo + fatal "VM did not become available via SSH on port $SSH_PORT within ${TIMEOUT}s" +fi + +# Mount shared directory in VM +log_info "Mounting shared directory..." +VM_WORK_DIR="/mnt/shared" +_vmcmd "mkdir -p $VM_WORK_DIR" || true +if ! _vmcmd "mount -t virtiofs bpf-test $VM_WORK_DIR"; then + fatal "Failed to mount virtiofs at $VM_WORK_DIR" +fi +log_success "Shared directory mounted: $SHARED_DIR => $VM_WORK_DIR" + +# Copy bpf_test binary to VM +log_info "Copying bpf_test binary to VM..." +VM_BPF_TEST_PATH="$VM_WORK_DIR/bpf_test" +if ! _vmcopy "$BPF_TEST_PATH" "$VM_BPF_TEST_PATH"; then + fatal "Failed to copy bpf_test binary to VM" +fi +if ! _vmcmd "chmod +x $VM_BPF_TEST_PATH"; then + fatal "Failed to chmod bpf_test binary in VM" +fi +log_success "bpf_test binary copied to VM: $VM_BPF_TEST_PATH" + +# Create results directory in VM +VM_RESULTS_DIR="$VM_WORK_DIR/$OUTPUT_DIR" +if ! _vmcmd "mkdir -p $VM_RESULTS_DIR"; then + fatal "Failed to create results directory in VM: $VM_RESULTS_DIR" +fi + +# Execute bpf_test in VM +# If func_db.jsonl is in $SHARED_DIR, use the graph mode +if [[ -f "$SHARED_DIR/func_db.jsonl" ]]; then + log_info "Using func graph db: $SHARED_DIR/func_db.jsonl" + BPF_TEST_ARGS+=("--unstable-graphdb $VM_WORK_DIR/func_db.jsonl") # VM_WORK_DIR is mounted in VM +fi + +log_info "Executing bpf_test in VM..." +log_info "Command: $VM_BPF_TEST_PATH --Xenable-ub-inject --Xenable-all-ops --output $VM_RESULTS_DIR --procs $PROCS ${BPF_TEST_ARGS[*]}" + +# Run bpf_test in the background inside the VM and return immediately +# Use a wrapper script to properly handle background execution +_vmcmd "cat > $VM_WORK_DIR/run_bpf_test.sh << 'EOF' +#!/bin/bash +cd $VM_WORK_DIR +exec $VM_BPF_TEST_PATH --Xenable-ub-inject --Xenable-all-ops --output $VM_RESULTS_DIR --procs $PROCS ${BPF_TEST_ARGS[*]} > $VM_BPF_TEST_OUTPUT 2>&1 +EOF" + +_vmcmd "chmod +x $VM_WORK_DIR/run_bpf_test.sh" + +# Start the process in background using nohup and redirect to /dev/null to avoid SSH hanging +if _vmcmd "nohup $VM_WORK_DIR/run_bpf_test.sh > /dev/null 2>&1 & echo \$! > $VM_WORK_DIR/bpf_test.pid"; then + BPF_TEST_PID=$(_vmcmd "cat $VM_WORK_DIR/bpf_test.pid") + if [[ -n "$BPF_TEST_PID" ]]; then + log_success "bpf_test started successfully in background (PID: $BPF_TEST_PID)" + # Verify the process is actually running + sleep 1 + if _vmcmd "kill -0 $BPF_TEST_PID 2>/dev/null"; then + log_success "bpf_test process confirmed running" + else + log_warning "bpf_test process may have failed to start properly" + fi + else + log_warning "Failed to get bpf_test PID" + fi +else + log_warning "Failed to start bpf_test in background (check $BPF_TEST_OUTPUT for details)" +fi + +# Show summary +log_success "VM-based testing started." +log_info "Output files:" +log_info " VM log: $VM_LOG" +log_info " bpf_test output: $BPF_TEST_OUTPUT" +log_info " VM PID: $(cat "$VM_PIDFILE" 2>/dev/null || echo 'unknown')" +if [[ -n "$BPF_TEST_PID" ]]; then + log_info " bpf_test PID: $BPF_TEST_PID" +fi +log_info " SSH port: $SSH_PORT" +log_info " To connect: ssh -p $SSH_PORT $SSH_USER@localhost -i $SSH_KEY" +log_info " To check bpf_test status: ssh -p $SSH_PORT $SSH_USER@localhost -i $SSH_KEY 'ps -p $BPF_TEST_PID'" +log_info " To view bpf_test output: ssh -p $SSH_PORT $SSH_USER@localhost -i $SSH_KEY 'tail -f $VM_BPF_TEST_OUTPUT'" diff --git a/src/bpf_test.cpp b/src/bpf_test.cpp new file mode 100644 index 00000000..fcb891d2 --- /dev/null +++ b/src/bpf_test.cpp @@ -0,0 +1,742 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "global.hpp" +#include "lib/random.hpp" +#include "lib/ubfexec.hpp" +#include "z3.h" + +using std::cerr; +using std::cout; +using std::endl; +using std::string; +using std::unique_ptr; +using std::vector; + +using u64 = std::int64_t; +using u32 = std::int32_t; + +struct eBPFTestOpts { + string uuid; + string sno; + string output; + u32 max_report; + u32 report_num; + bool verbose; + u32 num_procs; + u32 proc_id; // 0 for parent, 1-N for workers + bool no_save_prog; + bool verify_crash; + string load_prog; + bool de_oracle; + + static eBPFTestOpts Parse(int argc, char **argv) { + cxxopts::Options options("bpf_test"); + options.add_options() + ("o,output", "Directory to save the testing eBPF program", cxxopts::value()) + ("s,seed", "Seed for random sampling (negative for true random)", cxxopts::value()->default_value("-1")) + ("max_report", "Maximum number of reports to generate", cxxopts::value()->default_value("1024")) + ("procs", "Number of processes to spawn", cxxopts::value()->default_value("1")) + ("no_save_prog", "Do not save the generated prog before loading", cxxopts::value()->default_value("false")->implicit_value("true")) + ("verify_crash", "Verify the crash discovered", cxxopts::value()->default_value("false")->implicit_value("true")) + ("load_prog", "Load the prog from the file", cxxopts::value()->default_value("")) + ("de_oracle", "De-oracleize the prog before loading", cxxopts::value()->default_value("false")->implicit_value("true")) + ("v,verbose", "Enable verbose output", cxxopts::value()->default_value("false")->implicit_value("true")) + ("h,help", "Print help message", cxxopts::value()->default_value("false")->implicit_value("true")); + + GlobalOptions::AddFuncOpts(options); + + cxxopts::ParseResult args; + try { + args = options.parse(argc, argv); + } catch (cxxopts::exceptions::exception &e) { + std::cerr << "Error: " << e.what() << std::endl; + exit(1); + } + + if (args.count("help")) { + std::cout << options.help() << std::endl; + exit(0); + } + + std::string output; + if (!args.count("output")) { + std::cerr << "Error: The output directory (--output) is not given." << std::endl; + exit(1); + } else { + output = args["output"].as(); + } + + if (const int seed = args["seed"].as(); seed >= 0) { + Random::Get().Seed(seed); + } + + u32 num_procs = args["procs"].as(); + + GlobalOptions::Get().HandleFuncArgs(args); + + return { + .uuid = "prog_" + std::to_string(getpid()), + .sno = "generated", + .output = output, + .max_report = args["max_report"].as(), + .report_num = 0, + .verbose = args["verbose"].as(), + .num_procs = num_procs, + .proc_id = 0, + .no_save_prog = args["no_save_prog"].as(), + .verify_crash = args["verify_crash"].as(), + .load_prog = args["load_prog"].as(), + .de_oracle = args["de_oracle"].as(), + }; + } +}; + +// We don't spawn multi threads right now, but spawn multi processes. +// If this changed, make this local. +const size_t log_buf_size = 1024 * 1024; +static char log_buf[log_buf_size]; + +void gen_prog(const eBPFTestOpts &opts, vector &prog); + +// Fork-server for program generation with timeout +class GenForkServer { +private: + int pipe_fd[2]; // [0] for reading, [1] for writing + pid_t child_pid; + bool is_parent; + +public: + GenForkServer() : child_pid(-1), is_parent(false) { + if (pipe(pipe_fd) == -1) { + throw std::runtime_error("Failed to create pipe: " + std::string(strerror(errno))); + } + } + + ~GenForkServer() { + close_read(); + close_write(); + kill_child(); + } + + void close_write() { + if (pipe_fd[1] <= 0) + return; + close(pipe_fd[1]); + pipe_fd[1] = -1; + } + + void close_read() { + if (pipe_fd[0] <= 0) + return; + close(pipe_fd[0]); + pipe_fd[0] = -1; + } + + void kill_child() { + if (child_pid <= 0 || !is_parent) + return; + kill(child_pid, SIGKILL); + waitpid(child_pid, nullptr, 0); + child_pid = -1; + } + + bool generate_program_with_timeout( + const eBPFTestOpts &opts, vector &prog, u32 timeout_seconds + ) { + child_pid = fork(); + + if (child_pid == -1) { + throw std::runtime_error("Failed to fork: " + std::string(strerror(errno))); + } + + if (child_pid == 0) { + // Child process - generate program + is_parent = false; + close_read(); + + try { + gen_prog(opts, prog); + + // Write program size and data + u32 prog_size = prog.size(); + write(pipe_fd[1], &prog_size, sizeof(prog_size)); + write(pipe_fd[1], prog.data(), prog_size * sizeof(struct bpf_insn)); + + close(pipe_fd[1]); + exit(0); + } catch (const std::exception &e) { + // Write error indicator + u32 error_size = 0; + write(pipe_fd[1], &error_size, sizeof(error_size)); + close(pipe_fd[1]); + exit(1); + } + } else { + // Parent process - wait with timeout + is_parent = true; + close_write(); + + // Set pipe to non-blocking + int flags = fcntl(pipe_fd[0], F_GETFL, 0); + fcntl(pipe_fd[0], F_SETFL, flags | O_NONBLOCK); + + auto start_time = std::chrono::steady_clock::now(); + bool success = false; + while (true) { + // Check timeout + auto elapsed = std::chrono::steady_clock::now() - start_time; + if (elapsed > std::chrono::seconds(timeout_seconds)) { + cout << "[!] Generation timeout after " << timeout_seconds << " seconds" << endl; + kill_child(); + return false; + } + + // Check if child is still running + int status; + pid_t result = waitpid(child_pid, &status, WNOHANG); + if (result == child_pid) { + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { + success = true; + } + break; + } else if (result == -1) { + cout << "[!] Error waiting for child process" << endl; + return false; + } + + // Try to read from pipe + u32 prog_size; + ssize_t bytes_read = read(pipe_fd[0], &prog_size, sizeof(prog_size)); + if (bytes_read == sizeof(prog_size)) { + if (prog_size > 0) { + prog.resize(prog_size); + bytes_read = read(pipe_fd[0], prog.data(), prog_size * sizeof(struct bpf_insn)); + if (bytes_read == prog_size * sizeof(struct bpf_insn)) { + success = true; + break; + } + } + break; + } + + // Sleep briefly before next check + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + close_read(); + return success; + } + } +}; + +// Wrapper function for generation with timeout +bool gen_prog_with_timeout( + const eBPFTestOpts &opts, vector &prog, u32 timeout_seconds +) { + GenForkServer server; + return server.generate_program_with_timeout(opts, prog, timeout_seconds); +} + +int load_prog( + vector &prog, u32 log_level = 1, u32 prog_flags = BPF_F_TEST_REG_INVARIANTS +) { + const char license[] = "GPL"; + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT; + attr.insns = reinterpret_cast<__u64>(prog.data()); + attr.insn_cnt = prog.size(); + attr.license = reinterpret_cast<__u64>(license); + attr.log_level = log_level; + attr.log_buf = reinterpret_cast<__u64>(log_buf); + attr.log_size = log_buf_size; + attr.prog_flags = prog_flags; + + return syscall(SYS_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); +} + +int test_run_prog(int prog_fd) { + u64 ctx_in[12]; + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.test.prog_fd = prog_fd; + attr.test.ctx_in = reinterpret_cast<__u64>(ctx_in); + attr.test.ctx_size_in = sizeof(ctx_in); + + int ret = syscall(SYS_bpf, BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + if (ret < 0) { + cerr << "[-] Failed to run the program: " << strerror(errno) << endl; + exit(1); + } + return attr.test.retval; +} + +void gen_prog(const eBPFTestOpts &opts, vector &prog) { + const u32 MAX_PARAMS = 4; + const u32 MAX_LOCALS = 2; + +again: + FunPlus fun( + GetFunctionName(opts.uuid, opts.sno), MAX_PARAMS, MAX_LOCALS, + GlobalOptions::Get().NumBblsPerFun, GlobalOptions::Get().MaxNumLoopsPerFun, + GlobalOptions::Get().MaxNumBblsPerLoop + ); + fun.Generate(false); + + // Set Z3 timeout to 3 seconds to prevent hanging + z3::set_param("timeout", 3000); + + std::unique_ptr exec = nullptr; + for (int tries = 0; tries < GlobalOptions::Get().MaxNumExecsPerFun; ++tries) { + vector execPath = fun.SampleExec( + GlobalOptions::Get().MaxNumExecStepsPerFun, GlobalOptions::Get().EnableConsistentExecs + ); + exec = std::make_unique(fun, execPath); + int numSolved = exec->Solve( + GlobalOptions::Get().NumInitsPerExec, GlobalOptions::Get().EnableInterestInits, + GlobalOptions::Get().EnableRandomInits, GlobalOptions::Get().EnableInterestCoefs, + /*debug=*/opts.verbose + ); + if (numSolved != 0) { + break; + } + exec = nullptr; + } + + // We are unable to find any available executable executions + if (exec == nullptr) { + std::cerr << "[-] Unable to obtain any UB-free solutions within " + << GlobalOptions::Get().MaxNumExecsPerFun << " execution samples" << std::endl; + // TODO Confirm if this is necessary + goto again; + } + + fun.GenerateFuneBPFCode(*exec, &prog); + cout << "[+] Prog size: " << prog.size() << endl; +} + +void save_prog(eBPFTestOpts &opts, vector &prog_buf, const string &name) { + string path = opts.output + "/" + name; + std::ofstream ebpfFile(path, std::ios::binary); + ebpfFile.write( + reinterpret_cast(prog_buf.data()), prog_buf.size() * sizeof(struct bpf_insn) + ); + ebpfFile.close(); +} + +void save_report_prog(eBPFTestOpts &opts, vector &prog_buf, const string &prefix) { + + if (opts.report_num > opts.max_report) { + cerr << "[+] Report suppressed" << endl; + return; + } + + string prog_name = prefix + std::to_string(opts.report_num) + ".bpf"; + save_prog(opts, prog_buf, prog_name); + + string vlog_name = + opts.output + "/" + prefix + "_vlog" + std::to_string(opts.report_num) + ".txt"; + std::ofstream vlogFile(vlog_name); + vlogFile.write(log_buf, strlen(log_buf)); + vlogFile.close(); + + cout << "[+] Report saved: " << prog_name << " " << vlog_name << endl; + opts.report_num++; +} + +string get_gen_prog_name() { return "gen.bpf"; } + +void save_gen_prog(eBPFTestOpts &opts, vector &prog_buf) { + save_prog(opts, prog_buf, get_gen_prog_name()); +} + +// Analyze the rejection reason. +// +// The verifier log follows the format: +// nth: insn ; reg_state +// ... +// rejection reason +// processed .. insns (limit 1000000) max_states_per_insn xx total_states xx peak_states xx ... +// +// Where nth indicates the intruction index, insn is the instruction analyzed, and reg_state is +// the abstract state after analyzing insn, e.g., R8_w=Scalar(umax=..,umin=...); +// Then, the rejection reason follows, e.g., "frame pointer is read only" and "unreachable insn"; +// Finally, the analysis summary follows. +// +// What we are currently interested is the rejection reason and the summary; +// For different reasons, we have different post processes; +// The summary is a great information for the test case. +void extract_vlog(char *vlog, string &summary, string &reason) { + size_t len = std::strlen(vlog); + Assert(len > 0, "Empty vlog"); + + const char *p = vlog + len; + int newline_count = 0; + int line_length = 0; + + while (p != vlog) { + --p; + if (*p == '\n') { + if (line_length == 0) + continue; + + newline_count++; + if (newline_count == 1) { + summary = std::string(p + 1, line_length); + } else if (newline_count == 2) { + reason = std::string(p + 1, line_length); + line_length = 0; + break; + } else { + break; + } + line_length = 0; + continue; + } + line_length++; + } + + if (line_length > 0 && newline_count == 1) + reason = std::string(p, line_length); + + Assert(summary.size() > 0, "Failed to locate the summary"); + Assert(reason.size() > 0, "Failed to locate the reason"); + + summary.erase(summary.find_last_not_of(" \n\r\t") + 1); + reason.erase(reason.find_last_not_of(" \n\r\t") + 1); + return; +} + +bool fp_interesting(const string &reason) { + // The error reason "sequence of 8193 jumps is too complex" indicates the prog + // contains too many jumps, which is not interesting + static const std::vector filters = { + "The sequence of 8193 jumps is too complex", "BPF program is too large", + "old state:", /* This is actually "infinite loop detected at insn ..", but it appears early + * (not just the last two entries), and the extract_vlog() does not handle it + * well currently. + */ + }; + return std::none_of(filters.begin(), filters.end(), [&](const std::string &f) { + return reason.find(f) != std::string::npos; + }); +} + +const u32 EXEC_MAGIC = 0xdeadbeef; +const struct bpf_insn RETURN_MAGIC_INSN = BPF_MOV32_IMM(BPF_REG_0, EXEC_MAGIC); +const struct bpf_insn VERIFIER_SINK_INSN = BPF_MOV32_IMM(BPF_REG_10, 0); + +bool replace_oracle(vector &prog_buf, struct bpf_insn new_insn) { + bool found = false; + for (auto &insn: prog_buf) { + if (memcmp(&insn, &VERIFIER_SINK_INSN, sizeof(struct bpf_insn)) == 0) { + insn = new_insn; + found = true; + break; + } + } + return found; +} + +void test_one(eBPFTestOpts &opts, vector &prog_buf) { + + prog_buf.clear(); + + // Use fork-server with timeout for generation + if (!gen_prog_with_timeout(opts, prog_buf, 15)) { // 15 second timeout + cout << "[!] Generation failed or timed out, skipping this iteration" << endl; + return; // Skip to next iteration + } + + // In case the kernel crashes, during loading the prog + if (!opts.no_save_prog) { + save_gen_prog(opts, prog_buf); + } + + cout << "[*] Loading prog..." << endl; + int prog_fd = load_prog(prog_buf); + + if (prog_fd >= 0) { + // With the oracle embedded, we expect the prog to be rejected; + // Accept effectively indicates a verifier bug. + cout << "[!!] INTERESTING FINDING: Verifier BUG: oracle prog accepted" << endl; + cout << "[!!] Prog size: " << prog_buf.size() << endl; + save_report_prog(opts, prog_buf, "fn_oracle"); + close(prog_fd); + return; + } + + // if the reason is "frame pointer is read only", it's due to the oracle; + // if the reason is "unreachable insn", it's due to the generator; + // otherwise, it's a verifier false positive: we the prog is expected to + // only be rejected due to the oracle. + string summary, reason; + extract_vlog(log_buf, summary, reason); + + if (reason.find("frame pointer is read only") != string::npos) { + cout << "[+] \tResult: Rejected due to the oracle" << endl; + } else if (reason.find("unreachable insn") != string::npos) { + // unreachable blocks generated, fix the generator. + cout << "[!] \tWARNING: " << reason << endl; + return; + } else { + if (fp_interesting(reason)) { + cout << "[!!] \tResult: Verifier false positive: " << reason << endl; + save_report_prog(opts, prog_buf, "fp_oracle"); + } + /* We cannot continue further: if we remove the oracle, then the prog + * would still be rejected due to the same error. + */ + return; + } + + cout << "[*] \tSummary: " << summary << endl; + + // Replace the oracle with a return instruction, load and run the prog + if (!replace_oracle(prog_buf, RETURN_MAGIC_INSN)) { + cerr << "[!] WARNING: Oracle not found" << endl; + return; + } + + prog_fd = load_prog(prog_buf); + if (prog_fd < 0) { + extract_vlog(log_buf, summary, reason); + if (fp_interesting(reason)) { + cout << "[!!] \tResult: Verifier false positive: " << reason << endl; + save_report_prog(opts, prog_buf, "fp_de-oracle"); + } + return; + } + + int ret = test_run_prog(prog_fd); + if (ret != EXEC_MAGIC) { + cout << "[!] \tWARNING: De-oracleized prog failed to run: " << ret << endl; + save_report_prog(opts, prog_buf, "fp_de-oracle"); + } + + cout << "[*] \tde-oracleized prog executed successfully" << endl; + close(prog_fd); +} + +void run_worker(eBPFTestOpts opts) { + std::filesystem::path outputDirectory = opts.output; + std::filesystem::create_directories(outputDirectory); + z3::set_param("parallel.enable", true); + + if (opts.verbose) { + std::filesystem::create_directories(GetLoggingsDir(outputDirectory)); + Log::Get().SetFout(GetGenLogPath(opts.uuid, "log", outputDirectory, /*devnull=*/false)); + } else { + Log::Get().SetFout(GetGenLogPath(opts.uuid, "log", outputDirectory, /*devnull=*/true)); + } + + cout << "[+] Worker " << opts.proc_id << " started (PID: " << getpid() << ")" << endl; + + vector prog_buf; + double prog_per_sec = 0; + u64 iterations = 0; + u64 last_iter = 0; + + auto iter_start = std::chrono::high_resolution_clock::now(); + while (true) { + cout << "[*] Worker " << opts.proc_id << " - Iterations: #" << iterations << endl; + + test_one(opts, prog_buf); + + if (iterations && iterations % 256 == 0) { + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - iter_start); + if (duration.count() == 0) { + iterations++; + continue; + } + prog_per_sec = (double) (iterations - last_iter) / duration.count(); + cout << "[+] Worker " << opts.proc_id << " - Iterations: " << iterations + << "\tProgs/min: " << prog_per_sec << endl; + last_iter = iterations; + iter_start = end; + } + iterations++; + cout << "--------------------------------" << endl; + } +} + +void run_watcher(const eBPFTestOpts &opts) { + cout << "[+] Spawning " << opts.num_procs << " worker processes..." << endl; + + // top level output directory + std::filesystem::create_directories(opts.output); + + vector child_pids; + for (u32 i = 1; i <= opts.num_procs; ++i) { + pid_t pid = fork(); + + if (pid == 0) { + eBPFTestOpts worker_opts = opts; + worker_opts.proc_id = i; + worker_opts.output = opts.output + "/proc_" + std::to_string(i); + run_worker(worker_opts); + exit(0); + } else if (pid > 0) { + child_pids.push_back(pid); + cout << "[+] Spawned worker " << i << " (PID: " << pid << ")" << endl; + } else { + cerr << "[-] Failed to spawn worker " << i << endl; + } + } + + cout << "[+] All workers spawned. Monitoring..." << endl; + + while (!child_pids.empty()) { + int status; + pid_t pid = wait(&status); + + if (pid > 0) { + auto it = std::find(child_pids.begin(), child_pids.end(), pid); + if (it != child_pids.end()) { + u32 worker_id = std::distance(child_pids.begin(), it) + 1; + if (WIFEXITED(status)) { + cout << "[+] Worker " << worker_id << " (PID: " << pid << ") exited with code " + << WEXITSTATUS(status) << endl; + } else if (WIFSIGNALED(status)) { + cout << "[!] Worker " << worker_id << " (PID: " << pid << ") killed by signal " + << WTERMSIG(status) << endl; + } + child_pids.erase(it); + } + } + } + + cout << "[+] All workers finished." << endl; +} + +void load_prog_from_file( + const eBPFTestOpts &opts, const string &path, vector &prog_buf +) { + std::ifstream prog_file(path, std::ios::binary); + auto prog_size = std::filesystem::file_size(path); // in bytes + prog_buf.resize(prog_size / sizeof(struct bpf_insn)); + prog_file.read(reinterpret_cast(prog_buf.data()), prog_size); + prog_file.close(); +} + +void do_verify(const eBPFTestOpts &opts, const string &dir) { + cout << "[+] Verifying " << dir << endl; + + if (!std::filesystem::exists(dir)) { + cerr << "[-] Directory does not exist: " << dir << endl; + return; + } + + // Check if the `gen.bpf` file exists + string gen_prog_name = dir + "/" + get_gen_prog_name(); + if (!std::filesystem::exists(gen_prog_name)) { + cerr << "[-] " << gen_prog_name << " does not exist" << endl; + return; + } + + // Load the prog + vector prog_buf; + load_prog_from_file(opts, gen_prog_name, prog_buf); + + cout << "[+] Loading the prog: " << gen_prog_name << " (" << prog_buf.size() << " insns)" << endl; + + int prog_fd = load_prog(prog_buf, 2); + string summary, reason; + extract_vlog(log_buf, summary, reason); + cout << "[+] \tSummary: " << summary << endl; + cout << "[+] \tReason: " << reason << endl; + if (prog_fd >= 0) { + int ret = test_run_prog(prog_fd); + cout << "[*] \tReturn value: " << ret << endl; + close(prog_fd); + } + return; +} + +void verify_crash(const eBPFTestOpts &opts) { + // Check which generated prog caused the kernel crash + // If proc is one, then only checks the `output` directory + // Otherwise, checks the `output/proc_` directory + if (opts.num_procs == 1) { + do_verify(opts, opts.output); + } else { + for (u32 i = 1; i <= opts.num_procs; ++i) { + do_verify(opts, opts.output + "/proc_" + std::to_string(i)); + } + } +} + +void load_prog_only(const eBPFTestOpts &opts) { + cout << "[+] Loading the prog: " << opts.load_prog << "..." << endl; + vector prog_buf; + load_prog_from_file(opts, opts.load_prog, prog_buf); + + if (opts.de_oracle) { + if (!replace_oracle(prog_buf, RETURN_MAGIC_INSN)) { + cout << "[!] \tWARNING: Oracle not found, skipping de-oracleization" << endl; + } else { + cout << "[+] \tDe-oracleized the prog" << endl; + } + } + + // Load the prog + int prog_fd = load_prog(prog_buf, 2, BPF_F_TEST_REG_INVARIANTS); + cout << "--------------------------------" << endl; + cout << log_buf; + cout << "--------------------------------" << endl; + + if (prog_fd < 0) + return; + + int ret = test_run_prog(prog_fd); + cout << "[*] \tProg executed, return value: 0x" << std::hex << ret << std::dec << endl; + if (opts.de_oracle && ret != EXEC_MAGIC) { + cout << "[!] \tWARNING: De-oracleized prog failed to run, expected magic: " << EXEC_MAGIC + << endl; + } + + close(prog_fd); + return; +} + +int main(int argc, char **argv) { + auto cliOpts = eBPFTestOpts::Parse(argc, argv); + + if (cliOpts.verify_crash) { + cout << "[+] Verifying the errors discovered..." << endl; + verify_crash(cliOpts); + return 0; + } + + if (!cliOpts.load_prog.empty()) { + load_prog_only(cliOpts); + return 0; + } + + if (cliOpts.num_procs == 1) { + run_worker(cliOpts); + } else { + run_watcher(cliOpts); + } + + return 0; +} diff --git a/src/func_gen.cpp b/src/func_gen.cpp index 20e5a332..bffe32a2 100644 --- a/src/func_gen.cpp +++ b/src/func_gen.cpp @@ -84,6 +84,7 @@ struct FunGenOpts { bool main; bool sexpression; bool javaclass; + bool ebpf; bool verbose; static FunGenOpts Parse(int argc, char **argv) { @@ -98,6 +99,7 @@ struct FunGenOpts { ("m,main", "Generate a main function with all mappings", cxxopts::value()->default_value("false")->implicit_value("true")) ("S,sexpression", "Also generate the S Expression of the generated function", cxxopts::value()->default_value("false")->implicit_value("true")) ("J,unstable-javaclass", "Also generate a Java class (bytecode) identical to the generated function", cxxopts::value()->default_value("false")->implicit_value("true")) + ("E,ebpf", "Also generate an eBPF program identical to the generated function", cxxopts::value()->default_value("false")->implicit_value("true")) ("v,verbose", "Enable verbose output", cxxopts::value()->default_value("false")->implicit_value("true")) ("h,help", "Print help message", cxxopts::value()->default_value("false")->implicit_value("true")); options.parse_positional("uuid"); @@ -174,6 +176,7 @@ struct FunGenOpts { const bool main = args["main"].as(); const bool sexpression = args["sexpression"].as(); const bool javaclass = args["unstable-javaclass"].as(); + const bool ebpf = args["ebpf"].as(); const bool verbose = args["verbose"].as(); GlobalOptions::Get().HandleFuncArgs(args); @@ -186,6 +189,7 @@ struct FunGenOpts { .main = main, .sexpression = sexpression, .javaclass = javaclass, + .ebpf = ebpf, .verbose = verbose }; } @@ -200,6 +204,7 @@ int main(int argc, char **argv) { bool mainfun = cliOpts.main; bool sexpression = cliOpts.sexpression; bool javaclass = cliOpts.javaclass; + bool ebpf = cliOpts.ebpf; bool verbose = cliOpts.verbose; std::filesystem::path outputDirectory = cliOpts.output; @@ -309,5 +314,16 @@ int main(int argc, char **argv) { ); } + if (ebpf) { + std::filesystem::create_directories(GeteBPFDir(outputDirectory)); + std::ofstream ebpfFile = + std::ofstream(GeteBPFPath(uuid, sno, outputDirectory), std::ios::binary); + auto ebpfCode = fun.GenerateFuneBPFCode(*exec); + ebpfFile.write( + reinterpret_cast(ebpfCode.data()), ebpfCode.size() * sizeof(struct bpf_insn) + ); + ebpfFile.close(); + } + return 0; }