diff --git a/.gitignore b/.gitignore index 7afd412dadd2c1..d1b48f01037a1e 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ *.gz *.i *.ko +*.ko.lds *.lex.c *.ll *.lst diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f5a27f067db9ed..bb8804c5fa5ce6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2235,6 +2235,12 @@ kernel and module base offset ASLR (Address Space Layout Randomization). + nofgkaslr [KNL] + When CONFIG_FG_KASLR is set, this parameter + disables kernel function granular ASLR + (Address Space Layout Randomization). + See Documentation/security/fgkaslr.rst. + kasan_multi_shot [KNL] Enforce KASAN (Kernel Address Sanitizer) to print report on every invalid memory access. Without this diff --git a/Documentation/security/fgkaslr.rst b/Documentation/security/fgkaslr.rst new file mode 100644 index 00000000000000..50dc24f675b581 --- /dev/null +++ b/Documentation/security/fgkaslr.rst @@ -0,0 +1,172 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================================================================== +Function Granular Kernel Address Space Layout Randomization (fgkaslr) +===================================================================== + +:Date: 6 April 2020 +:Author: Kristen Accardi + +Kernel Address Space Layout Randomization (KASLR) was merged into the kernel +with the objective of increasing the difficulty of code reuse attacks. Code +reuse attacks reused existing code snippets to get around existing memory +protections. They exploit software bugs which expose addresses of useful code +snippets to control the flow of execution for their own nefarious purposes. +KASLR as it was originally implemented moves the entire kernel code text as a +unit at boot time in order to make addresses less predictable. The order of the +code within the segment is unchanged - only the base address is shifted. There +are a few shortcomings to this algorithm. + +1. Low Entropy - there are only so many locations the kernel can fit in. This + means an attacker could guess without too much trouble. +2. Knowledge of a single address can reveal the offset of the base address, + exposing all other locations for a published/known kernel image. +3. Info leaks abound. + +Finer grained ASLR has been proposed as a way to make ASLR more resistant +to info leaks. It is not a new concept at all, and there are many variations +possible. Function reordering is an implementation of finer grained ASLR +which randomizes the layout of an address space on a function level +granularity. The term "fgkaslr" is used in this document to refer to the +technique of function reordering when used with KASLR, as well as finer grained +KASLR in general. + +The objective of this patch set is to improve a technology that is already +merged into the kernel (KASLR). This code will not prevent all code reuse +attacks, and should be considered as one of several tools that can be used. + +Implementation Details +====================== + +The over-arching objective of the fgkaslr implementation is incremental +improvement over the existing KASLR algorithm. It is designed to work with +the existing solution, and there are two main area where code changes occur: +Build time, and Load time. + +Build time +---------- + +GCC has had an option to place functions into individual .text sections +for many years now (-ffunction-sections). This option is used to implement +function reordering at load time. The final compiled vmlinux retains all the +section headers, which can be used to help find the address ranges of each +function. Using this information and an expanded table of relocation addresses, +individual text sections can be shuffled immediately after decompression. +Some data tables inside the kernel that have assumptions about order +require sorting after the update. In order to modify these tables, +a few key symbols from the objcopy symbol stripping process are preserved +for use after shuffling the text segments. Any special input sections which are +defined by the kernel build process and collected into the .text output +segment are left unmodified and will still be present inside the .text segment, +unrandomized other than normal base address randomization. + +Load time +--------- + +The boot kernel was modified to parse the vmlinux elf file after +decompression to check for symbols for modifying data tables, and to +look for any .text.* sections to randomize. The sections are then shuffled, +and tables are updated or resorted. The existing code which updated relocation +addresses was modified to account for not just a fixed delta from the load +address, but the offset that the function section was moved to. This requires +inspection of each address to see if it was impacted by a randomization. + +In order to hide the new layout, symbols reported through /proc/kallsyms will +be displayed in a random order. + +Performance Impact +================== + +There are two areas where function reordering can impact performance: boot +time latency, and run time performance. + +Boot time latency +----------------- + +This implementation of finer grained KASLR impacts the boot time of the kernel +in several places. It requires additional parsing of the kernel ELF file to +obtain the section headers of the sections to be randomized. It calls the +random number generator for each section to be randomized to determine that +section's new memory location. It copies the decompressed kernel into a new +area of memory to avoid corruption when laying out the newly randomized +sections. It increases the number of relocations the kernel has to perform at +boot time vs. standard KASLR, and it also requires a lookup on each address +that needs to be relocated to see if it was in a randomized section and needs +to be adjusted by a new offset. Finally, it re-sorts a few data tables that +are required to be sorted by address. + +Booting a test VM on a modern, well appointed system showed an increase in +latency of approximately 1 second. + +Run time +-------- + +The performance impact at run-time of function reordering varies by workload. +Randomly reordering the functions will cause an increase in cache misses +for some workloads. Some workloads perform significantly worse under FGKASLR, +while others stay the same or even improve. In general, it will depend on the +code flow whether or not finer grained KASLR will impact a workload, and how +the underlying code was designed. Because the layout changes per boot, each +time a system is rebooted the performance of a workload may change. + +Image Size +========== + +fgkaslr increases the size of the kernel binary due to the extra section +headers that are included, as well as the extra relocations that need to +be added. You can expect fgkaslr to increase the size of the resulting +vmlinux by about 3%, and the compressed image (bzImage) by 15%. + +Memory Usage +============ + +fgkaslr increases the amount of heap that is required at boot time, +although this extra memory is released when the kernel has finished +decompression. As a result, it may not be appropriate to use this feature +on systems without much memory. + +Building +======== + +To enable fine grained KASLR, you need to have the following config options +set (including all the ones you would use to build normal KASLR) + +``CONFIG_FG_KASLR=y`` + +fgkaslr for the kernel is only supported for the X86_64 architecture. + +Modules +======= + +Modules are randomized similarly to the rest of the kernel by shuffling +the sections at load time prior to moving them into memory. The module must +also have been build with the -ffunction-sections compiler option. + +Although fgkaslr for the kernel is only supported for the X86_64 architecture, +it is possible to use fgkaslr with modules on other architectures. To enable +this feature, select the following config option: + +``CONFIG_MODULE_FG_KASLR`` + +This option is selected automatically for X86_64 when CONFIG_FG_KASLR is set. + +Disabling +========= + +Disabling normal kaslr using the nokaslr command line option also disables +fgkaslr. In addition, it is possible to disable fgkaslr separately by booting +with "nofgkaslr" on the commandline. + +Further Information +=================== + +There are a lot of academic papers which explore finer grained ASLR. +This paper in particular contributed significantly to the implementation design. + +Selfrando: Securing the Tor Browser against De-anonymization Exploits, +M. Conti, S. Crane, T. Frassetto, et al. + +For more information on how function layout impacts performance, see: + +Optimizing Function Placement for Large-Scale Data-Center Applications, +G. Ottoni, B. Maher diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst index 16335de04e8c6d..41444124090f17 100644 --- a/Documentation/security/index.rst +++ b/Documentation/security/index.rst @@ -7,6 +7,7 @@ Security Documentation credentials IMA-templates + fgkaslr keys/index lsm lsm-development diff --git a/MAINTAINERS b/MAINTAINERS index f53d30463c21c1..49a0974d32a7a8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7925,6 +7925,18 @@ L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/fujitsu-tablet.c +FUNCTION-GRAINED KASLR (FG-KASLR) +M: Alexander Lobakin +R: Kristen Carlson Accardi +R: Kees Cook +L: linux-hardening@vger.kernel.org +S: Supported +F: Documentation/security/fgkaslr.rst +F: arch/x86/boot/compressed/fgkaslr.c +F: arch/x86/boot/compressed/gen-symbols.h +F: arch/x86/boot/compressed/utils.c +F: scripts/generate_text_sections.pl + FUSE: FILESYSTEM IN USERSPACE M: Miklos Szeredi L: linux-fsdevel@vger.kernel.org diff --git a/Makefile b/Makefile index ceb987e5c87b94..cf7cf5cbdad9ae 100644 --- a/Makefile +++ b/Makefile @@ -871,8 +871,47 @@ ifdef CONFIG_DEBUG_SECTION_MISMATCH KBUILD_CFLAGS += -fno-inline-functions-called-once endif +# Prefer linking with the `-z unique-symbol` if available, this eliminates +# position-based search. Also is a requirement for FG-KASLR +ifeq ($(CONFIG_LD_HAS_Z_UNIQUE_SYMBOL)$(CONFIG_LIVEPATCH),yy) +KBUILD_LDFLAGS += -z unique-symbol +endif + +# Allow ASM code to generate separate sections for each function. See +# `include/linux/linkage.h` for explanation. This flag is to enable GAS to +# insert the name of the previous section instead of `%S` inside .pushsection +ifdef CONFIG_HAVE_ASM_FUNCTION_SECTIONS +ifneq ($(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION)$(CONFIG_LTO_CLANG)$(CONFIG_FG_KASLR),) +SECSUBST_AFLAGS := -Wa,--sectname-subst +KBUILD_AFLAGS_KERNEL += $(SECSUBST_AFLAGS) +KBUILD_CFLAGS_KERNEL += $(SECSUBST_AFLAGS) +export SECSUBST_AFLAGS +endif + +# Same for modules. LD DCE doesn't work for them, thus not checking for it +ifneq ($(CONFIG_MODULE_FG_KASLR)$(CONFIG_LTO_CLANG),) +KBUILD_AFLAGS_MODULE += -Wa,--sectname-subst +KBUILD_CFLAGS_MODULE += -Wa,--sectname-subst +endif +endif # CONFIG_HAVE_ASM_FUNCTION_SECTIONS + +# ClangLTO implies `-ffunction-sections -fdata-sections`, no need +# to specify them manually and trigger a pointless full rebuild +ifndef CONFIG_LTO_CLANG +ifdef CONFIG_MODULE_FG_KASLR +KBUILD_CFLAGS_MODULE += -ffunction-sections +endif + +ifneq ($(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION)$(CONFIG_FG_KASLR),) +KBUILD_CFLAGS_KERNEL += -ffunction-sections +endif + +ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION +KBUILD_CFLAGS_KERNEL += -fdata-sections +endif +endif # CONFIG_LTO_CLANG + ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION -KBUILD_CFLAGS_KERNEL += -ffunction-sections -fdata-sections LDFLAGS_vmlinux += --gc-sections endif diff --git a/arch/Kconfig b/arch/Kconfig index 678a80713b2133..e06aeeea39f4f3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1322,6 +1322,16 @@ config DYNAMIC_SIGFRAME config HAVE_ARCH_NODE_DEV_GROUP bool +config ARCH_SUPPORTS_ASM_FUNCTION_SECTIONS + bool + help + An arch should select this if it can be built and run with its + asm functions placed into separate sections to improve DCE, LTO + and FG-KASLR. + +config ARCH_SUPPORTS_FG_KASLR + bool + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 391c4cac8958f8..d5442107a915bd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -102,8 +102,10 @@ config X86 select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_STACKWALK select ARCH_SUPPORTS_ACPI + select ARCH_SUPPORTS_ASM_FUNCTION_SECTIONS select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC + select ARCH_SUPPORTS_FG_KASLR if X86_64 && RANDOMIZE_BASE select ARCH_SUPPORTS_PAGE_TABLE_CHECK if X86_64 select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index b5aecb524a8aa6..080990b09f06c9 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -68,6 +68,7 @@ targets += cpustr.h KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ +KBUILD_AFLAGS += $(SECSUBST_AFLAGS) KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables GCOV_PROFILE := n diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore index 25805199a50611..bc5f8436be1d7f 100644 --- a/arch/x86/boot/compressed/.gitignore +++ b/arch/x86/boot/compressed/.gitignore @@ -3,5 +3,6 @@ relocs vmlinux.bin.all vmlinux.relocs vmlinux.lds +vmlinux.symbols mkpiggy piggy.S diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6115274fe10fc5..caa7d120f0eb3f 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -58,6 +58,7 @@ KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ +KBUILD_AFLAGS += $(SECSUBST_AFLAGS) GCOV_PROFILE := n UBSAN_SANITIZE :=n @@ -92,6 +93,7 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/kernel_info.o $(obj)/head_$(BITS).o vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o +vmlinux-objs-$(CONFIG_FG_KASLR) += $(obj)/fgkaslr.o $(obj)/utils.o ifdef CONFIG_X86_64 vmlinux-objs-y += $(obj)/ident_map_64.o vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o @@ -109,14 +111,29 @@ $(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE $(call if_changed,ld) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S -$(obj)/vmlinux.bin: vmlinux FORCE + +targets += vmlinux.symbols + +ifdef CONFIG_FG_KASLR +quiet_cmd_vmlinux_symbols = GEN $@ + cmd_vmlinux_symbols = $(CPP) $(cpp_flags) -P -D"GEN(s)"=s -o $@ $< + +VMLINUX_SYMBOLS = $(obj)/vmlinux.symbols +$(VMLINUX_SYMBOLS): $(src)/gen-symbols.h FORCE + $(call if_changed_dep,vmlinux_symbols) + +OBJCOPYFLAGS += --keep-symbols=$(VMLINUX_SYMBOLS) +RELOCS_ARGS += --fg-kaslr +endif # CONFIG_FG_KASLR + +$(obj)/vmlinux.bin: vmlinux $(VMLINUX_SYMBOLS) FORCE $(call if_changed,objcopy) targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs CMD_RELOCS = arch/x86/tools/relocs quiet_cmd_relocs = RELOCS $@ - cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $< + cmd_relocs = $(CMD_RELOCS) $(RELOCS_ARGS) $< > $@;$(CMD_RELOCS) $(RELOCS_ARGS) --abs-relocs $< $(obj)/vmlinux.relocs: vmlinux FORCE $(call if_changed,relocs) diff --git a/arch/x86/boot/compressed/fgkaslr.c b/arch/x86/boot/compressed/fgkaslr.c new file mode 100644 index 00000000000000..85eb1ef574a21f --- /dev/null +++ b/arch/x86/boot/compressed/fgkaslr.c @@ -0,0 +1,752 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This contains the routines needed to reorder the kernel text section + * at boot time. + * + * Copyright (C) 2020-2022, Intel Corporation. + * Author: Kristen Carlson Accardi + */ + +#include "misc.h" +#include "error.h" +#include "pgtable.h" +#include "../string.h" +#include "../voffset.h" +#include +#include +#include +#include "../../include/asm/extable.h" +#include "../../include/asm/orc_types.h" + +/* + * Use normal definitions of mem*() from string.c. There are already + * included header files which expect a definition of memset() and by + * the time we define memset macro, it is too late. + */ +#undef memcpy +#undef memset +#define memmove memmove + +void *memmove(void *dest, const void *src, size_t n); + +static int nofgkaslr; + +static unsigned long percpu_start; +static unsigned long percpu_end; + +#define GEN(s) static long addr_##s; +#include "gen-symbols.h" +#undef GEN + +/* addresses in mapped address space */ +static int *kallsyms_base; +static u8 *names; +static unsigned long relative_base; +static unsigned int *markers_addr; + +struct kallsyms_name { + u8 len; + u8 indices[256]; +}; + +static struct kallsyms_name *names_table; + +/* Array of pointers to sections headers for randomized sections */ +static Elf_Shdr **sections; + +/* Number of elements in the randomized section header array (sections) */ +static int sections_size; + +/* Array of all section headers, randomized or otherwise */ +static Elf_Shdr *sechdrs; + +static bool is_orc_unwind(long addr) +{ + if (addr >= addr___start_orc_unwind_ip && + addr < addr___stop_orc_unwind_ip) + return true; + return false; +} + +static bool is_text(long addr) +{ + if ((addr >= addr__stext && addr < addr__etext) || + (addr >= addr__sinittext && addr < addr__einittext) || + (addr >= addr___altinstr_replacement && + addr < addr___altinstr_replacement_end)) + return true; + return false; +} + +bool is_percpu_addr(long pc, long offset) +{ + unsigned long ptr; + long address; + + address = pc + offset + 4; + + ptr = (unsigned long)address; + + if (ptr >= percpu_start && ptr < percpu_end) + return true; + + return false; +} + +static bool cur_addr_orc; + +static int cmp_section_addr(const void *a, const void *b) +{ + const Elf_Shdr *s = *(const Elf_Shdr **)b; + unsigned long end = s->sh_addr + s->sh_size; + unsigned long ptr = (unsigned long)a; + + if (cur_addr_orc) + /* orc relocations can be one past the end of the section */ + end++; + + if (ptr >= s->sh_addr && ptr < end) + return 0; + + if (ptr < s->sh_addr) + return -1; + + return 1; +} + +/* + * Discover if the address is in a randomized section and if so, + * adjust by the saved offset. + */ +Elf_Shdr *adjust_address(long *address) +{ + Elf_Shdr **s, *shdr; + + if (nofgkaslr) + return NULL; + + s = bsearch((const void *)*address, sections, sections_size, + sizeof(*s), cmp_section_addr); + if (!s) + return NULL; + + shdr = *s; + *address += shdr->sh_offset; + + return shdr; +} + +void adjust_relative_offset(long pc, long *value, Elf_Shdr *section) +{ + long address; + Elf_Shdr *s; + + if (nofgkaslr) + return; + + /* + * sometimes we are updating a relative offset that would + * normally be relative to the next instruction (such as a call). + * In this case to calculate the target, you need to add 32bits to + * the pc to get the next instruction value. However, sometimes + * targets are just data that was stored in a table such as ksymtab + * or cpu alternatives. In this case our target is not relative to + * the next instruction. + */ + + /* Calculate the address that this offset would call. */ + if (!is_text(pc)) + address = pc + *value; + else + address = pc + *value + 4; + + /* + * orc ip addresses are sorted at build time after relocs have + * been applied, making the relocs no longer valid. Skip any + * relocs for the orc_unwind_ip table. These will be updated + * separately. + */ + if (is_orc_unwind(pc)) + return; + + s = adjust_address(&address); + + /* + * if the address is in section that was randomized, + * we need to adjust the offset. + */ + if (s) + *value += s->sh_offset; + + /* + * If the PC that this offset was calculated for was in a section + * that has been randomized, the value needs to be adjusted by the + * same amount as the randomized section was adjusted from it's original + * location. + */ + if (section) + *value -= section->sh_offset; +} + +static void kallsyms_swp(void *a, void *b, int size) +{ + struct kallsyms_name name_a; + int idx1, idx2; + + /* Determine our index into the array. */ + idx1 = (const int *)a - kallsyms_base; + idx2 = (const int *)b - kallsyms_base; + swap(kallsyms_base[idx1], kallsyms_base[idx2]); + + /* Swap the names table. */ + memcpy(&name_a, &names_table[idx1], sizeof(name_a)); + memcpy(&names_table[idx1], &names_table[idx2], + sizeof(struct kallsyms_name)); + memcpy(&names_table[idx2], &name_a, sizeof(struct kallsyms_name)); +} + +static int kallsyms_cmp(const void *a, const void *b) +{ + unsigned long uaddr_a, uaddr_b; + int addr_a, addr_b; + + addr_a = *(const int *)a; + addr_b = *(const int *)b; + + if (addr_a >= 0) + uaddr_a = addr_a; + if (addr_b >= 0) + uaddr_b = addr_b; + + if (addr_a < 0) + uaddr_a = relative_base - 1 - addr_a; + if (addr_b < 0) + uaddr_b = relative_base - 1 - addr_b; + + if (uaddr_b > uaddr_a) + return -1; + + return 0; +} + +static void deal_with_names(int num_syms) +{ + int num_bytes; + int offset; + int i, j; + + /* we should have num_syms kallsyms_name entries */ + num_bytes = num_syms * sizeof(*names_table); + names_table = malloc(num_syms * sizeof(*names_table)); + if (!names_table) { + debug_putstr("\nbytes requested: "); + debug_puthex(num_bytes); + error("\nunable to allocate space for names table\n"); + } + + /* read all the names entries */ + offset = 0; + for (i = 0; i < num_syms; i++) { + names_table[i].len = names[offset]; + offset++; + for (j = 0; j < names_table[i].len; j++) { + names_table[i].indices[j] = names[offset]; + offset++; + } + } +} + +static void write_sorted_names(int num_syms) +{ + unsigned int *markers; + int offset = 0; + int i, j; + + /* + * we are going to need to regenerate the markers table, which is a + * table of offsets into the compressed stream every 256 symbols. + * this code copied almost directly from scripts/kallsyms.c + */ + markers = malloc(sizeof(unsigned int) * ((num_syms + 255) / 256)); + if (!markers) { + debug_putstr("\nfailed to allocate heap space of "); + debug_puthex(((num_syms + 255) / 256)); + debug_putstr(" bytes\n"); + error("Unable to allocate space for markers table"); + } + + for (i = 0; i < num_syms; i++) { + if ((i & 0xFF) == 0) + markers[i >> 8] = offset; + + names[offset] = (u8)names_table[i].len; + offset++; + for (j = 0; j < names_table[i].len; j++) { + names[offset] = names_table[i].indices[j]; + offset++; + } + } + + /* write new markers table over old one */ + for (i = 0; i < ((num_syms + 255) >> 8); i++) + markers_addr[i] = markers[i]; + + free(markers); + free(names_table); +} + +static void sort_kallsyms(unsigned long map) +{ + int num_syms; + int i; + + debug_putstr("\nRe-sorting kallsyms...\n"); + + num_syms = *(int *)(addr_kallsyms_num_syms + map); + kallsyms_base = (int *)(addr_kallsyms_offsets + map); + relative_base = *(unsigned long *)(addr_kallsyms_relative_base + map); + markers_addr = (unsigned int *)(addr_kallsyms_markers + map); + names = (u8 *)(addr_kallsyms_names + map); + + /* + * the kallsyms table was generated prior to any randomization. + * it is a bunch of offsets from "relative base". In order for + * us to check if a symbol has an address that was in a randomized + * section, we need to reconstruct the address to it's original + * value prior to handle_relocations. + */ + for (i = 0; i < num_syms; i++) { + unsigned long addr; + + /* + * according to kernel/kallsyms.c, positive offsets are absolute + * values and negative offsets are relative to the base. + */ + if (kallsyms_base[i] >= 0) + addr = kallsyms_base[i]; + else + addr = relative_base - 1 - kallsyms_base[i]; + + if (adjust_address(&addr)) + /* here we need to recalcuate the offset */ + kallsyms_base[i] = relative_base - 1 - addr; + } + + /* + * here we need to read in all the kallsyms_names info + * so that we can regenerate it. + */ + deal_with_names(num_syms); + + sort(kallsyms_base, num_syms, sizeof(int), kallsyms_cmp, kallsyms_swp); + + /* write the newly sorted names table over the old one */ + write_sorted_names(num_syms); +} + +/* + * We need to include this file here rather than in utils.c because + * some of the helper functions in extable.c are used to update + * the extable below and are defined as "static" in extable.c + */ +#include "../../../../lib/extable.c" + +static inline unsigned long +ex_fixup_addr(const struct exception_table_entry *x) +{ + return ((unsigned long)&x->fixup + x->fixup); +} + +static void update_ex_table(unsigned long map) +{ + struct exception_table_entry *start_ex_table = + (struct exception_table_entry *)(addr___start___ex_table + map); + int num_entries = + (addr___stop___ex_table - addr___start___ex_table) / + sizeof(struct exception_table_entry); + int i; + + debug_putstr("\nUpdating exception table..."); + for (i = 0; i < num_entries; i++) { + unsigned long fixup = ex_fixup_addr(&start_ex_table[i]); + unsigned long insn = ex_to_insn(&start_ex_table[i]); + unsigned long addr; + Elf_Shdr *s; + + /* check each address to see if it needs adjusting */ + addr = insn - map; + s = adjust_address(&addr); + if (s) + start_ex_table[i].insn += s->sh_offset; + + addr = fixup - map; + s = adjust_address(&addr); + if (s) + start_ex_table[i].fixup += s->sh_offset; + } +} + +static void sort_ex_table(unsigned long map) +{ + struct exception_table_entry *start_ex_table = + (struct exception_table_entry *)(addr___start___ex_table + map); + struct exception_table_entry *stop_ex_table = + (struct exception_table_entry *)(addr___stop___ex_table + map); + + debug_putstr("\nRe-sorting exception table..."); + + sort_extable(start_ex_table, stop_ex_table); +} + +static void update_orc_table(unsigned long map) +{ + int *ip_table = (int *)(addr___start_orc_unwind_ip + map); + int num_entries, i; + + num_entries = addr___stop_orc_unwind_ip - addr___start_orc_unwind_ip; + num_entries /= sizeof(int); + + debug_putstr("\nUpdating orc tables...\n"); + cur_addr_orc = true; + + for (i = 0; i < num_entries; i++) { + unsigned long ip = orc_ip(ip_table + i); + Elf_Shdr *s; + + /* check each address to see if it needs adjusting */ + ip = ip - map; + + /* + * objtool places terminator entries just outside the end of + * the section. To identify an orc_unwind_ip address that might + * need adjusting, the address should be compared differently + * than a normal address. + */ + s = adjust_address(&ip); + if (s) + ip_table[i] += s->sh_offset; + } + + cur_addr_orc = false; +} + +static void sort_orc_table(unsigned long map) +{ + struct orc_entry *orc_table; + int num_entries; + int *ip_table; + + orc_table = (struct orc_entry *)(addr___start_orc_unwind + map); + ip_table = (int *)(addr___start_orc_unwind_ip + map); + + num_entries = addr___stop_orc_unwind_ip - addr___start_orc_unwind_ip; + num_entries /= sizeof(int); + + debug_putstr("\nRe-sorting orc tables...\n"); + orc_sort(ip_table, orc_table, num_entries); +} + +void post_relocations_cleanup(unsigned long map) +{ + if (!nofgkaslr) { + update_ex_table(map); + sort_ex_table(map); + update_orc_table(map); + sort_orc_table(map); + } + + /* + * maybe one day free will do something. So, we "free" this memory + * in either case + */ + free(sections); + free(sechdrs); +} + +void pre_relocations_cleanup(unsigned long map) +{ + if (nofgkaslr) + return; + + sort_kallsyms(map); +} + +static void move_text(int num_sections, char *secstrings, Elf_Shdr *text, + void *source, void *dest, Elf64_Phdr *phdr) +{ + unsigned long adjusted_addr; + int *index_list; + int copy_bytes; + void *stash; + int i, j; + + memmove(dest, source + text->sh_offset, text->sh_size); + copy_bytes = text->sh_size; + dest += text->sh_size; + adjusted_addr = text->sh_addr + text->sh_size; + + /* + * we leave the sections sorted in their original order + * by s->sh_addr, but shuffle the indexes in a random + * order for copying. + */ + index_list = malloc(sizeof(int) * num_sections); + if (!index_list) + error("Failed to allocate space for index list"); + + for (i = 0; i < num_sections; i++) + index_list[i] = i; + +#define get_random_long() kaslr_get_random_long(NULL) + shuffle_array(index_list, num_sections); +#undef get_random_long + + /* + * to avoid overwriting earlier sections before they can get + * copied to dest, stash everything into a buffer first. + * this will cause our source address to be off by + * phdr->p_offset though, so we'll adjust s->sh_offset below. + * + * TBD: ideally we'd simply decompress higher up so that our + * copy wasn't in danger of overwriting anything important. + */ + stash = malloc(phdr->p_filesz); + if (!stash) + error("Failed to allocate space for text stash"); + + memcpy(stash, source + phdr->p_offset, phdr->p_filesz); + + /* now we'd walk through the sections. */ + for (j = 0; j < num_sections; j++) { + unsigned long aligned_addr; + int pad_bytes; + Elf_Shdr *s; + void *src; + + s = sections[index_list[j]]; + + /* align addr for this section */ + aligned_addr = ALIGN(adjusted_addr, s->sh_addralign); + + /* + * copy out of stash, so adjust offset + */ + src = stash + s->sh_offset - phdr->p_offset; + + /* + * Fill any space between sections with int3 + */ + pad_bytes = aligned_addr - adjusted_addr; + memset(dest, 0xcc, pad_bytes); + + dest = (void *)ALIGN((unsigned long)dest, s->sh_addralign); + + memmove(dest, src, s->sh_size); + + dest += s->sh_size; + copy_bytes += s->sh_size + pad_bytes; + adjusted_addr = aligned_addr + s->sh_size; + + /* we can blow away sh_offset for our own uses */ + s->sh_offset = aligned_addr - s->sh_addr; + } + + free(index_list); + + /* + * move remainder of text segment. Ok to just use original source + * here since this area is untouched. + */ + memmove(dest, source + text->sh_offset + copy_bytes, + phdr->p_filesz - copy_bytes); + free(stash); +} + +static void parse_symtab(const Elf64_Sym *symtab, const char *strtab, + long num_syms) +{ + const Elf64_Sym *sym; + + if (!symtab || !strtab) + return; + + debug_putstr("\nLooking for symbols... "); + + /* + * walk through the symbol table looking for the symbols + * that we care about. + */ + for (sym = symtab; --num_syms >= 0; sym++) { + if (!sym->st_name) + continue; + +#define GEN(s) ({ \ + if (!addr_##s && !strcmp(#s, strtab + sym->st_name)) { \ + addr_##s = sym->st_value; \ + continue; \ + } \ +}); +#include "gen-symbols.h" +#undef GEN + } +} + +void layout_randomized_image(void *output, Elf64_Ehdr *ehdr, Elf64_Phdr *phdrs) +{ + Elf64_Sym *symtab = NULL; + Elf_Shdr *percpu = NULL; + Elf_Shdr *text = NULL; + unsigned int shstrndx; + int num_sections = 0; + unsigned long shnum; + char *strtab = NULL; + long num_syms = 0; + const char *sname; + char *secstrings; + Elf_Shdr shdr; + Elf_Shdr *s; + void *dest; + int i; + + debug_putstr("\nParsing ELF section headers... "); + + /* + * Even though fgkaslr may have been disabled, we still + * need to parse through the section headers to get the + * start and end of the percpu section. This is because + * if we were built with CONFIG_FG_KASLR, there are more + * relative relocations present in vmlinux.relocs than + * just the percpu, and only the percpu relocs need to be + * adjusted when using just normal base address kaslr. + */ + if (cmdline_find_option_bool("nofgkaslr")) { + warn("FG_KASLR disabled on cmdline."); + nofgkaslr = 1; + } + + /* read the first section header */ + shnum = ehdr->e_shnum; + shstrndx = ehdr->e_shstrndx; + if (shnum == SHN_UNDEF || shstrndx == SHN_XINDEX) { + memcpy(&shdr, output + ehdr->e_shoff, sizeof(shdr)); + if (shnum == SHN_UNDEF) + shnum = shdr.sh_size; + if (shstrndx == SHN_XINDEX) + shstrndx = shdr.sh_link; + } + + /* we are going to need to allocate space for the section headers */ + sechdrs = malloc(sizeof(*sechdrs) * shnum); + if (!sechdrs) + error("Failed to allocate space for shdrs"); + + sections = malloc(sizeof(*sections) * shnum); + if (!sections) + error("Failed to allocate space for section pointers"); + + memcpy(sechdrs, output + ehdr->e_shoff, + sizeof(*sechdrs) * shnum); + + /* we need to allocate space for the section string table */ + s = &sechdrs[shstrndx]; + + secstrings = malloc(s->sh_size); + if (!secstrings) + error("Failed to allocate space for shstr"); + + memcpy(secstrings, output + s->sh_offset, s->sh_size); + + /* + * now we need to walk through the section headers and collect the + * sizes of the .text sections to be randomized. + */ + for (i = 0; i < shnum; i++) { + s = &sechdrs[i]; + sname = secstrings + s->sh_name; + + if (s->sh_type == SHT_SYMTAB) { + /* only one symtab per image */ + if (symtab) + error("Unexpected duplicate symtab"); + + symtab = malloc(s->sh_size); + if (!symtab) + error("Failed to allocate space for symtab"); + + memcpy(symtab, output + s->sh_offset, s->sh_size); + num_syms = s->sh_size / sizeof(*symtab); + continue; + } + + if (s->sh_type == SHT_STRTAB && i != ehdr->e_shstrndx) { + if (strtab) + error("Unexpected duplicate strtab"); + + strtab = malloc(s->sh_size); + if (!strtab) + error("Failed to allocate space for strtab"); + + memcpy(strtab, output + s->sh_offset, s->sh_size); + } + + if (!strcmp(sname, ".text")) { + if (text) + error("Unexpected duplicate .text section"); + + text = s; + continue; + } + + if (!strcmp(sname, ".data..percpu")) { + /* get start addr for later */ + percpu = s; + continue; + } + + if (!(s->sh_flags & SHF_ALLOC) || + !(s->sh_flags & SHF_EXECINSTR) || + !(strstarts(sname, ".text"))) + continue; + + sections[num_sections] = s; + + num_sections++; + } + sections[num_sections] = NULL; + sections_size = num_sections; + + parse_symtab(symtab, strtab, num_syms); + + for (i = 0; i < ehdr->e_phnum; i++) { + Elf64_Phdr *phdr = &phdrs[i]; + + switch (phdr->p_type) { + case PT_LOAD: + if ((phdr->p_align % 0x200000) != 0) + error("Alignment of LOAD segment isn't multiple of 2MB"); + dest = output; + dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); + if (!nofgkaslr && + (text && phdr->p_offset == text->sh_offset)) { + move_text(num_sections, secstrings, text, + output, dest, phdr); + } else { + if (percpu && + phdr->p_offset == percpu->sh_offset) { + percpu_start = percpu->sh_addr; + percpu_end = percpu_start + + phdr->p_filesz; + } + memmove(dest, output + phdr->p_offset, + phdr->p_filesz); + } + break; + default: /* Ignore other PT_* */ + break; + } + } + + /* we need to keep the section info to redo relocs */ + free(secstrings); + free(phdrs); +} diff --git a/arch/x86/boot/compressed/gen-symbols.h b/arch/x86/boot/compressed/gen-symbols.h new file mode 100644 index 00000000000000..15b7ddec276265 --- /dev/null +++ b/arch/x86/boot/compressed/gen-symbols.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * List of symbols needed for both C code and objcopy when FG-KASLR is on. + * We declare them once and then just use GEN() definition. + * + * Copyright (C) 2021-2022, Intel Corporation. + * Author: Alexander Lobakin + */ + +#ifdef GEN +GEN(__altinstr_replacement) +GEN(__altinstr_replacement_end) +GEN(__start___ex_table) +GEN(__start_orc_unwind) +GEN(__start_orc_unwind_ip) +GEN(__stop___ex_table) +GEN(__stop_orc_unwind_ip) +GEN(_einittext) +GEN(_etext) +GEN(_sinittext) +GEN(_stext) +GEN(kallsyms_addresses) +GEN(kallsyms_markers) +GEN(kallsyms_names) +GEN(kallsyms_num_syms) +GEN(kallsyms_offsets) +GEN(kallsyms_relative_base) +GEN(kallsyms_token_index) +GEN(kallsyms_token_table) +#endif /* GEN */ diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 659fad53ca8234..a3a667f5e5cdb3 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -158,8 +158,8 @@ SYM_FUNC_START_ALIAS(efi_stub_entry) call efi_main /* efi_main returns the possibly relocated address of startup_32 */ jmp *%eax -SYM_FUNC_END(efi32_stub_entry) SYM_FUNC_END_ALIAS(efi_stub_entry) +SYM_FUNC_END(efi32_stub_entry) #endif .text diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fd9441f4045709..b6fb27fd7abd58 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -72,7 +72,7 @@ #define rva(X) ((X) - startup_32) .code32 -SYM_FUNC_START(startup_32) +SYM_FUNC_START_SECT(startup_32, startup) /* * 32bit entry is 0 and it is ABI so immutable! * If we come here directly from a bootloader, @@ -297,8 +297,10 @@ SYM_FUNC_START(startup_32) SYM_FUNC_END(startup_32) #ifdef CONFIG_EFI_MIXED +SYM_PUSH_SECTION(startup) .org 0x190 -SYM_FUNC_START(efi32_stub_entry) +SYM_POP_SECTION() +SYM_FUNC_START_SECT(efi32_stub_entry, startup) add $0x4, %esp /* Discard return address */ popl %ecx popl %edx @@ -332,8 +334,10 @@ SYM_FUNC_END(efi32_stub_entry) #endif .code64 +SYM_PUSH_SECTION(startup) .org 0x200 -SYM_CODE_START(startup_64) +SYM_POP_SECTION() +SYM_CODE_START_SECT(startup_64, startup) /* * 64bit entry is 0x200 and it is ABI so immutable! * We come here either from startup_32 or directly from a @@ -533,8 +537,10 @@ trampoline_return: SYM_CODE_END(startup_64) #ifdef CONFIG_EFI_STUB +SYM_PUSH_SECTION(startup) .org 0x390 -SYM_FUNC_START(efi64_stub_entry) +SYM_POP_SECTION() +SYM_FUNC_START_SECT(efi64_stub_entry, startup) SYM_FUNC_START_ALIAS(efi_stub_entry) and $~0xf, %rsp /* realign the stack */ movq %rdx, %rbx /* save boot_params pointer */ @@ -542,12 +548,12 @@ SYM_FUNC_START_ALIAS(efi_stub_entry) movq %rbx,%rsi leaq rva(startup_64)(%rax), %rax jmp *%rax -SYM_FUNC_END(efi64_stub_entry) SYM_FUNC_END_ALIAS(efi_stub_entry) +SYM_FUNC_END(efi64_stub_entry) #endif .text -SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) +SYM_FUNC_START_LOCAL_NOALIGN_SECT(.Lrelocated, relocated) /* * Clear BSS (stack is currently empty) @@ -670,20 +676,22 @@ SYM_CODE_START(trampoline_32bit_src) SYM_CODE_END(trampoline_32bit_src) .code64 -SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled) +SYM_FUNC_START_LOCAL_NOALIGN_SECT(.Lpaging_enabled, trampoline_32bit_src) /* Return from the trampoline */ jmp *%rdi SYM_FUNC_END(.Lpaging_enabled) +SYM_PUSH_SECTION(trampoline_32bit_src) /* * The trampoline code has a size limit. * Make sure we fail to compile if the trampoline code grows * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes. */ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE +SYM_POP_SECTION() .code32 -SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) +SYM_FUNC_START_LOCAL_NOALIGN_SECT(.Lno_longmode, no_longmode) /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 1: hlt @@ -747,7 +755,7 @@ SYM_DATA(efi_is64, .byte 1) __HEAD .code32 -SYM_FUNC_START(efi32_pe_entry) +SYM_FUNC_START_SECT(efi32_pe_entry, startup) /* * efi_status_t efi32_pe_entry(efi_handle_t image_handle, * efi_system_table_32_t *sys_table) @@ -839,7 +847,7 @@ SYM_DATA_END(loaded_image_proto) * * Physical offset is expected in %ebp */ -SYM_FUNC_START(startup32_set_idt_entry) +SYM_FUNC_START_SECT(startup32_set_idt_entry, startup) push %ebx push %ecx @@ -872,7 +880,7 @@ SYM_FUNC_START(startup32_set_idt_entry) SYM_FUNC_END(startup32_set_idt_entry) #endif -SYM_FUNC_START(startup32_load_idt) +SYM_FUNC_START_SECT(startup32_load_idt, startup) #ifdef CONFIG_AMD_MEM_ENCRYPT /* #VC handler */ leal rva(startup32_vc_handler)(%ebp), %eax @@ -904,7 +912,7 @@ SYM_FUNC_END(startup32_load_idt) * succeed. An incorrect C-bit position will map all memory unencrypted, so that * the compare will use the encrypted random data and fail. */ -SYM_FUNC_START(startup32_check_sev_cbit) +SYM_FUNC_START_SECT(startup32_check_sev_cbit, startup) #ifdef CONFIG_AMD_MEM_ENCRYPT pushl %eax pushl %ebx diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index a4339cb2d247fd..45f3fae35b8ace 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -207,10 +207,19 @@ static void handle_relocations(void *output, unsigned long output_len, if (IS_ENABLED(CONFIG_X86_64)) delta = virt_addr - LOAD_PHYSICAL_ADDR; - if (!delta) { + /* + * it is possible to have delta be zero and still have enabled + * FG-KASLR. We need to perform relocations for it regardless + * of whether the base address has moved. + */ + if ((cmdline_find_option_bool("nokaslr") || + !IS_ENABLED(CONFIG_FG_KASLR)) && !delta) { debug_putstr("No relocation needed... "); return; } + + pre_relocations_cleanup(map); + debug_putstr("Performing relocations... "); /* @@ -234,35 +243,103 @@ static void handle_relocations(void *output, unsigned long output_len, */ for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) { long extended = *reloc; + long value; + + /* + * if using fgkaslr, we might have moved the address + * of the relocation. Check it to see if it needs adjusting + * from the original address. + */ + adjust_address(&extended); + extended += map; ptr = (unsigned long)extended; if (ptr < min_addr || ptr > max_addr) error("32-bit relocation outside of kernel!\n"); - *(uint32_t *)ptr += delta; + value = *(s32 *)ptr; + + /* + * If using FG-KASLR, the value of the relocation + * might need to be changed because it referred + * to an address that has moved. + */ + adjust_address(&value); + + value += delta; + *(u32 *)ptr = value; } #ifdef CONFIG_X86_64 while (*--reloc) { long extended = *reloc; + long oldvalue, value; + Elf64_Shdr *s; + + /* + * if using FG-KASLR, we might have moved the address of + * the relocation. Check it to see if it needs adjusting + * from the original address. + */ + s = adjust_address(&extended); + extended += map; ptr = (unsigned long)extended; if (ptr < min_addr || ptr > max_addr) error("inverse 32-bit relocation outside of kernel!\n"); - *(int32_t *)ptr -= delta; + value = *(s32 *)ptr; + oldvalue = value; + + /* + * If using fgkaslr, these relocs will contain + * relative offsets which might need to be + * changed because it referred + * to an address that has moved. + */ + adjust_relative_offset(*reloc, &value, s); + + /* + * only percpu symbols need to have their values adjusted for + * base address KASLR since relative offsets within the .text + * and .text.* sections are ok wrt each other. + */ + if (is_percpu_addr(*reloc, oldvalue)) + value -= delta; + + *(s32 *)ptr = value; } for (reloc--; *reloc; reloc--) { long extended = *reloc; + long value; + + /* + * if using FG-KASLR, we might have moved the address of the + * relocation. Check it to see if it needs adjusting from the + * original address. + */ + adjust_address(&extended); + extended += map; ptr = (unsigned long)extended; if (ptr < min_addr || ptr > max_addr) error("64-bit relocation outside of kernel!\n"); - *(uint64_t *)ptr += delta; + value = *(s64 *)ptr; + + /* + * If using fgkaslr, the value of the relocation + * might need to be changed because it referred + * to an address that has moved. + */ + adjust_address(&value); + + value += delta; + *(u64 *)ptr = value; } + post_relocations_cleanup(map); #endif } #else @@ -271,6 +348,34 @@ static inline void handle_relocations(void *output, unsigned long output_len, { } #endif +static void layout_image(void *output, Elf_Ehdr *ehdr, Elf_Phdr *phdrs) +{ + u32 i; + + for (i = 0; i < ehdr->e_phnum; i++) { + const Elf_Phdr *phdr = &phdrs[i]; + void *dest; + + switch (phdr->p_type) { + case PT_LOAD: +#ifdef CONFIG_X86_64 + if ((phdr->p_align % 0x200000) != 0) + error("Alignment of LOAD segment isn't multiple of 2MB"); +#endif +#ifdef CONFIG_RELOCATABLE + dest = output; + dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); +#else + dest = (void *)(phdr->p_paddr); +#endif + memmove(dest, output + phdr->p_offset, phdr->p_filesz); + break; + default: /* Ignore other PT_* */ + break; + } + } +} + static void parse_elf(void *output) { #ifdef CONFIG_X86_64 @@ -280,6 +385,7 @@ static void parse_elf(void *output) Elf32_Ehdr ehdr; Elf32_Phdr *phdrs, *phdr; #endif + int nokaslr; void *dest; int i; @@ -292,6 +398,12 @@ static void parse_elf(void *output) return; } + if (IS_ENABLED(CONFIG_FG_KASLR)) { + nokaslr = cmdline_find_option_bool("nokaslr"); + if (nokaslr) + warn("FG_KASLR disabled: 'nokaslr' on cmdline."); + } + debug_putstr("Parsing ELF... "); phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); @@ -300,26 +412,10 @@ static void parse_elf(void *output) memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum); - for (i = 0; i < ehdr.e_phnum; i++) { - phdr = &phdrs[i]; - - switch (phdr->p_type) { - case PT_LOAD: -#ifdef CONFIG_X86_64 - if ((phdr->p_align % 0x200000) != 0) - error("Alignment of LOAD segment isn't multiple of 2MB"); -#endif -#ifdef CONFIG_RELOCATABLE - dest = output; - dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); -#else - dest = (void *)(phdr->p_paddr); -#endif - memmove(dest, output + phdr->p_offset, phdr->p_filesz); - break; - default: /* Ignore other PT_* */ break; - } - } + if (IS_ENABLED(CONFIG_FG_KASLR) && !nokaslr) + layout_randomized_image(output, &ehdr, phdrs); + else + layout_image(output, &ehdr, phdrs); free(phdrs); } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 16ed360b6692db..1315a101c1c941 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -83,6 +83,34 @@ struct mem_vector { u64 size; }; +#ifdef CONFIG_X86_64 +#define Elf_Ehdr Elf64_Ehdr +#define Elf_Phdr Elf64_Phdr +#define Elf_Shdr Elf64_Shdr +#else +#define Elf_Ehdr Elf32_Ehdr +#define Elf_Phdr Elf32_Phdr +#define Elf_Shdr Elf32_Shdr +#endif + +#ifdef CONFIG_FG_KASLR +void layout_randomized_image(void *output, Elf_Ehdr *ehdr, Elf_Phdr *phdrs); +void pre_relocations_cleanup(unsigned long map); +void post_relocations_cleanup(unsigned long map); +Elf_Shdr *adjust_address(long *address); +void adjust_relative_offset(long pc, long *value, Elf_Shdr *section); +bool is_percpu_addr(long pc, long offset); +#else +static inline void layout_randomized_image(void *output, Elf_Ehdr *ehdr, + Elf_Phdr *phdrs) { } +static inline void pre_relocations_cleanup(unsigned long map) { } +static inline void post_relocations_cleanup(unsigned long map) { } +static inline Elf_Shdr *adjust_address(long *address) { return NULL; } +static inline void adjust_relative_offset(long pc, long *value, + Elf_Shdr *section) { } +static inline bool is_percpu_addr(long pc, long offset) { return true; } +#endif + #ifdef CONFIG_RANDOMIZE_BASE /* kaslr.c */ void choose_random_location(unsigned long input, diff --git a/arch/x86/boot/compressed/utils.c b/arch/x86/boot/compressed/utils.c new file mode 100644 index 00000000000000..0fbc2c18d0b911 --- /dev/null +++ b/arch/x86/boot/compressed/utils.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This contains various libraries that are needed for FG-KASLR. + * + * Copyright (C) 2020-2022, Intel Corporation. + * Author: Kristen Carlson Accardi + */ + +#define _LINUX_KPROBES_H +#define NOKPROBE_SYMBOL(fname) + +#include "../../../../lib/sort.c" +#include "../../../../lib/bsearch.c" + +#define ORC_COMPRESSED_BOOT +#include "../../lib/orc.c" diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S index cbec8bd0841fa7..e647c17000a993 100644 --- a/arch/x86/boot/pmjump.S +++ b/arch/x86/boot/pmjump.S @@ -46,7 +46,7 @@ SYM_FUNC_END(protected_mode_jump) .code32 .section ".text32","ax" -SYM_FUNC_START_LOCAL_NOALIGN(.Lin_pm32) +SYM_FUNC_START_LOCAL_NOALIGN_SECT(.Lin_pm32, in_pm32) # Set up data segments for flat 32-bit mode movl %ecx, %ds movl %ecx, %es diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 363699dd72206e..6b92beb7820a95 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1752,8 +1752,8 @@ SYM_FUNC_END(aesni_gcm_finalize) #endif -SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128) SYM_FUNC_START_LOCAL(_key_expansion_256a) +SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128) pshufd $0b11111111, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 pxor %xmm4, %xmm0 @@ -1763,8 +1763,8 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a) movaps %xmm0, (TKEYP) add $0x10, TKEYP RET -SYM_FUNC_END(_key_expansion_256a) SYM_FUNC_END_ALIAS(_key_expansion_128) +SYM_FUNC_END(_key_expansion_256a) SYM_FUNC_START_LOCAL(_key_expansion_192a) pshufd $0b01010101, %xmm1, %xmm1 diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index 71fae5a09e56d4..221a4596f39009 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -421,6 +421,7 @@ sub poly1305_iteration { map("%xmm$_",(0..15)); $code.=<<___; +SYM_PUSH_SECTION(__poly1305_block) .type __poly1305_block,\@abi-omnipotent .align 32 __poly1305_block: @@ -431,7 +432,9 @@ sub poly1305_iteration { pop $ctx ret .size __poly1305_block,.-__poly1305_block +SYM_POP_SECTION() +SYM_PUSH_SECTION(__poly1305_init_avx) .type __poly1305_init_avx,\@abi-omnipotent .align 32 __poly1305_init_avx: @@ -596,6 +599,7 @@ sub poly1305_iteration { pop %rbp ret .size __poly1305_init_avx,.-__poly1305_init_avx +SYM_POP_SECTION() ___ &declare_function("poly1305_blocks_avx", 32, 4); diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 9191280d9ea316..a37be0dd3179e6 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -24,7 +24,18 @@ # error "Invalid value for CONFIG_PHYSICAL_ALIGN" #endif -#if defined(CONFIG_KERNEL_BZIP2) +#ifdef CONFIG_FG_KASLR +/* + * We need extra boot heap when using FG-KASLR because we make a copy + * of the original decompressed kernel to avoid issues with writing + * over ourselves when shuffling the sections. We also need extra + * space for resorting kallsyms after shuffling. This value could + * be decreased if free() would release memory properly, or if we + * could avoid the kernel copy. It would need to be increased if we + * find additional tables that need to be resorted. + */ +# define BOOT_HEAP_SIZE 0x4800000 +#elif defined(CONFIG_KERNEL_BZIP2) # define BOOT_HEAP_SIZE 0x400000 #elif defined(CONFIG_KERNEL_ZSTD) /* diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index 5a2baf28a1dcda..7708548713c4b2 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -67,6 +67,13 @@ struct orc_entry { #endif } __packed; +static inline unsigned long orc_ip(const int *ip) +{ + return (unsigned long)ip + *ip; +} + +void orc_sort(int *ip_table, struct orc_entry *orc_table, u32 num_orcs); + #endif /* __ASSEMBLY__ */ #endif /* _ORC_TYPES_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 0d76502cc6f5be..3efdac789bc46a 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -663,6 +663,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); extern typeof(func) __raw_callee_save_##func; \ \ asm(".pushsection " section ", \"ax\";" \ + ASM_PUSH_SECTION(__raw_callee_save_##func) ";" \ ".globl " PV_THUNK_NAME(func) ";" \ ".type " PV_THUNK_NAME(func) ", @function;" \ PV_THUNK_NAME(func) ":" \ @@ -673,6 +674,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); FRAME_END \ ASM_RET \ ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \ + ASM_POP_SECTION() ";" \ ".popsection") #define PV_CALLEE_SAVE_REGS_THUNK(func) \ diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 1474cf96251dd5..3fde623bbcb2c4 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -35,6 +35,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); * rdx = internal variable (set to 0) */ asm (".pushsection .text;" + ASM_PUSH_SECTION(__raw_callee_save___pv_queued_spin_unlock) ";" ".globl " PV_UNLOCK ";" ".type " PV_UNLOCK ", @function;" ".align 4,0x90;" @@ -58,6 +59,7 @@ asm (".pushsection .text;" FRAME_END ASM_RET ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" + ASM_POP_SECTION() ";" ".popsection"); #else /* CONFIG_64BIT */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index eb8656bac99b6b..d58422148481d2 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -342,7 +342,7 @@ setup_once: andl $0,setup_once_ref /* Once is enough, thanks */ RET -SYM_FUNC_START(early_idt_handler_array) +SYM_FUNC_START_SECT(early_idt_handler_array, early_idt_handler) # 36(%esp) %eflags # 32(%esp) %cs # 28(%esp) %eip @@ -359,7 +359,7 @@ SYM_FUNC_START(early_idt_handler_array) .endr SYM_FUNC_END(early_idt_handler_array) -SYM_CODE_START_LOCAL(early_idt_handler_common) +SYM_CODE_START_LOCAL_SECT(early_idt_handler_common, early_idt_handler) /* * The stack is the hardware frame, an error code or zero, and the * vector number. diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 9c63fc5988cdac..a19b6fa2bf8794 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -367,7 +367,7 @@ SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE) __FINITDATA __INIT -SYM_CODE_START(early_idt_handler_array) +SYM_CODE_START_SECT(early_idt_handler_array, early_idt_handler) i = 0 .rept NUM_EXCEPTION_VECTORS .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 @@ -385,7 +385,7 @@ SYM_CODE_START(early_idt_handler_array) UNWIND_HINT_IRET_REGS offset=16 SYM_CODE_END(early_idt_handler_array) -SYM_CODE_START_LOCAL(early_idt_handler_common) +SYM_CODE_START_LOCAL_SECT(early_idt_handler_common, early_idt_handler) /* * The stack is the hardware frame, an error code or zero, and the * vector number. diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6290712cb36d2a..e0be6bc763370e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1019,6 +1019,7 @@ NOKPROBE_SYMBOL(kprobe_int3_handler); */ asm( ".text\n" + ASM_PUSH_SECTION(__kretprobe_trampoline) "\n" ".global __kretprobe_trampoline\n" ".type __kretprobe_trampoline, @function\n" "__kretprobe_trampoline:\n" @@ -1053,6 +1054,7 @@ asm( #endif ASM_RET ".size __kretprobe_trampoline, .-__kretprobe_trampoline\n" + ASM_POP_SECTION() "\n" ); NOKPROBE_SYMBOL(__kretprobe_trampoline); /* diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a438217cbface2..6a497d5647e75f 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -1021,6 +1021,7 @@ extern bool __raw_callee_save___kvm_vcpu_is_preempted(long); */ asm( ".pushsection .text;" +ASM_PUSH_SECTION(__raw_callee_save___kvm_vcpu_is_preempted) ";" ".global __raw_callee_save___kvm_vcpu_is_preempted;" ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;" "__raw_callee_save___kvm_vcpu_is_preempted:" @@ -1029,6 +1030,7 @@ asm( "setne %al;" "ret;" ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" +ASM_POP_SECTION() ";" ".popsection"); #endif diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index fcc8a7699103a4..ff36f21e665a2f 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -35,7 +35,7 @@ #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) .text -SYM_CODE_START_NOALIGN(relocate_kernel) +SYM_CODE_START_NOALIGN_SECT(relocate_kernel, kexec_control_code) /* Save the CPU context, used for jumping back */ pushl %ebx @@ -94,7 +94,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) RET SYM_CODE_END(relocate_kernel) -SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) +SYM_CODE_START_LOCAL_NOALIGN_SECT(identity_mapped, kexec_control_code) /* set return address to 0 if not preserving context */ pushl $0 /* store the start address on the stack */ @@ -193,7 +193,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) RET SYM_CODE_END(identity_mapped) -SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) +SYM_CODE_START_LOCAL_NOALIGN_SECT(virtual_mapped, kexec_control_code) movl CR4(%edi), %eax movl %eax, %cr4 movl CR3(%edi), %eax @@ -212,7 +212,7 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) SYM_CODE_END(virtual_mapped) /* Do the copies */ -SYM_CODE_START_LOCAL_NOALIGN(swap_pages) +SYM_CODE_START_LOCAL_NOALIGN_SECT(swap_pages, kexec_control_code) movl 8(%esp), %edx movl 4(%esp), %ecx pushl %ebp @@ -274,5 +274,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) RET SYM_CODE_END(swap_pages) +SYM_PUSH_SECTION(kexec_control_code) .globl kexec_control_code_size .set kexec_control_code_size, . - relocate_kernel +SYM_POP_SECTION() diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 399f075ccdc461..fb8ff461436e94 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -38,9 +38,11 @@ #define CP_PA_BACKUP_PAGES_MAP DATA(0x30) .text +SYM_PUSH_SECTION(kexec_control_code) .align PAGE_SIZE .code64 -SYM_CODE_START_NOALIGN(relocate_kernel) +SYM_POP_SECTION() +SYM_CODE_START_NOALIGN_SECT(relocate_kernel, kexec_control_code) UNWIND_HINT_EMPTY /* * %rdi indirection_page @@ -107,7 +109,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel) RET SYM_CODE_END(relocate_kernel) -SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) +SYM_CODE_START_LOCAL_NOALIGN_SECT(identity_mapped, kexec_control_code) UNWIND_HINT_EMPTY /* set return address to 0 if not preserving context */ pushq $0 @@ -213,7 +215,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) RET SYM_CODE_END(identity_mapped) -SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) +SYM_CODE_START_LOCAL_NOALIGN_SECT(virtual_mapped, kexec_control_code) UNWIND_HINT_EMPTY movq RSP(%r8), %rsp movq CR4(%r8), %rax @@ -235,7 +237,7 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) SYM_CODE_END(virtual_mapped) /* Do the copies */ -SYM_CODE_START_LOCAL_NOALIGN(swap_pages) +SYM_CODE_START_LOCAL_NOALIGN_SECT(swap_pages, kexec_control_code) UNWIND_HINT_EMPTY movq %rdi, %rcx /* Put the page_list in %rcx */ xorl %edi, %edi @@ -291,5 +293,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) RET SYM_CODE_END(swap_pages) +SYM_PUSH_SECTION(kexec_control_code) .globl kexec_control_code_size .set kexec_control_code_size, . - relocate_kernel +SYM_POP_SECTION() diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 2de3c8c5eba9fb..e5748bf15966aa 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -25,11 +25,6 @@ extern struct orc_entry __stop_orc_unwind[]; static bool orc_init __ro_after_init; static unsigned int lookup_num_blocks __ro_after_init; -static inline unsigned long orc_ip(const int *ip) -{ - return (unsigned long)ip + *ip; -} - static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table, unsigned int num_entries, unsigned long ip) { @@ -188,53 +183,6 @@ static struct orc_entry *orc_find(unsigned long ip) } #ifdef CONFIG_MODULES - -static DEFINE_MUTEX(sort_mutex); -static int *cur_orc_ip_table = __start_orc_unwind_ip; -static struct orc_entry *cur_orc_table = __start_orc_unwind; - -static void orc_sort_swap(void *_a, void *_b, int size) -{ - struct orc_entry *orc_a, *orc_b; - struct orc_entry orc_tmp; - int *a = _a, *b = _b, tmp; - int delta = _b - _a; - - /* Swap the .orc_unwind_ip entries: */ - tmp = *a; - *a = *b + delta; - *b = tmp - delta; - - /* Swap the corresponding .orc_unwind entries: */ - orc_a = cur_orc_table + (a - cur_orc_ip_table); - orc_b = cur_orc_table + (b - cur_orc_ip_table); - orc_tmp = *orc_a; - *orc_a = *orc_b; - *orc_b = orc_tmp; -} - -static int orc_sort_cmp(const void *_a, const void *_b) -{ - struct orc_entry *orc_a; - const int *a = _a, *b = _b; - unsigned long a_val = orc_ip(a); - unsigned long b_val = orc_ip(b); - - if (a_val > b_val) - return 1; - if (a_val < b_val) - return -1; - - /* - * The "weak" section terminator entries need to always be on the left - * to ensure the lookup code skips them in favor of real entries. - * These terminator entries exist to handle any gaps created by - * whitelisted .o files which didn't get objtool generation. - */ - orc_a = cur_orc_table + (a - cur_orc_ip_table); - return orc_a->sp_reg == ORC_REG_UNDEFINED && !orc_a->end ? -1 : 1; -} - void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size, void *_orc, size_t orc_size) { @@ -246,16 +194,7 @@ void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size, orc_size % sizeof(*orc) != 0 || num_entries != orc_size / sizeof(*orc)); - /* - * The 'cur_orc_*' globals allow the orc_sort_swap() callback to - * associate an .orc_unwind_ip table entry with its corresponding - * .orc_unwind entry so they can both be swapped. - */ - mutex_lock(&sort_mutex); - cur_orc_ip_table = orc_ip; - cur_orc_table = orc; - sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap); - mutex_unlock(&sort_mutex); + orc_sort(orc_ip, orc, num_entries); mod->arch.orc_unwind_ip = orc_ip; mod->arch.orc_unwind = orc; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 27f830345b6f08..06ba33f5bc5825 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -141,14 +141,16 @@ SECTIONS #ifdef CONFIG_RETPOLINE __indirect_thunk_start = .; - *(.text.__x86.indirect_thunk) + *(SECT_WILDCARD(.text.__x86.indirect_thunk)) __indirect_thunk_end = .; #endif } :text =0xcccc + TEXT_FG_KASLR + /* End of text section, which should occupy whole number of pages */ - _etext = .; . = ALIGN(PAGE_SIZE); + _etext = .; X86_ALIGN_RODATA_BEGIN RO_DATA(PAGE_SIZE) @@ -303,7 +305,9 @@ SECTIONS * get the address and the length of them to patch the kernel safely. */ .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { + __altinstr_replacement = .; *(.altinstr_replacement) + __altinstr_replacement_end = .; } /* diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 40da8c7f3019ec..85ce58b041ac8a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -309,6 +309,7 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define __FOP_FUNC(name) \ + __ASM_PUSH_SECTION(name) "\n\t" \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ name ":\n\t" @@ -318,7 +319,8 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define __FOP_RET(name) \ "11: " ASM_RET \ - ".size " name ", .-" name "\n\t" + ".size " name ", .-" name "\n\t" \ + ASM_POP_SECTION() "\n\t" #define FOP_RET(name) \ __FOP_RET(#name) @@ -326,11 +328,13 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define FOP_START(op) \ extern void em_##op(struct fastop *fake); \ asm(".pushsection .text, \"ax\" \n\t" \ + ASM_PUSH_SECTION(em_##op) "\n\t" \ ".global em_" #op " \n\t" \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ "em_" #op ":\n\t" #define FOP_END \ + ASM_POP_SECTION() "\n\t" \ ".popsection") #define __FOPNOP(name) \ @@ -430,6 +434,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); /* Special case for SETcc - 1 instruction per cc */ #define FOP_SETCC(op) \ + ASM_PUSH_SECTION(op) "\n\t" \ ".align 4 \n\t" \ ".type " #op ", @function \n\t" \ #op ": \n\t" \ diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index f76747862bd2e0..ff094cecebc496 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -50,6 +50,9 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RETPOLINE) += retpoline.o +ifdef CONFIG_MODULES +lib-$(CONFIG_UNWINDER_ORC) += orc.o +endif obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o obj-y += iomem.o diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 8ca5ecf16dc477..ddbd6b5b7b47e5 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -220,7 +220,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string) * Output: * eax uncopied bytes or 0 if successful. */ -SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) +SYM_CODE_START_LOCAL_SECT(.Lcopy_user_handle_tail, copy_user_handle_tail) cmp $X86_TRAP_MC,%eax je 3f diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c index 520897061ee09e..720a73d4ee70b8 100644 --- a/arch/x86/lib/error-inject.c +++ b/arch/x86/lib/error-inject.c @@ -8,11 +8,13 @@ asmlinkage void just_return_func(void); asm( ".text\n" + ASM_PUSH_SECTION(just_return_func) "\n" ".type just_return_func, @function\n" ".globl just_return_func\n" "just_return_func:\n" ASM_RET ".size just_return_func, .-just_return_func\n" + ASM_POP_SECTION() "\n" ); void override_function_with_return(struct pt_regs *regs) diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index b70d98d79a9da8..06d288909a6849 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -163,8 +163,7 @@ SYM_FUNC_START(__get_user_nocheck_8) SYM_FUNC_END(__get_user_nocheck_8) EXPORT_SYMBOL(__get_user_nocheck_8) - -SYM_CODE_START_LOCAL(.Lbad_get_user_clac) +SYM_CODE_START_LOCAL_SECT(.Lbad_get_user_clac, bad_get_user_clac) ASM_CLAC bad_get_user: xor %edx,%edx @@ -173,7 +172,7 @@ bad_get_user: SYM_CODE_END(.Lbad_get_user_clac) #ifdef CONFIG_X86_32 -SYM_CODE_START_LOCAL(.Lbad_get_user_8_clac) +SYM_CODE_START_LOCAL_SECT(.Lbad_get_user_8_clac, bad_get_user_8_clac) ASM_CLAC bad_get_user_8: xor %edx,%edx diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 59cf2343f3d906..2dc6033e2932d1 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -27,8 +27,8 @@ * Output: * rax original destination */ -SYM_FUNC_START_ALIAS(__memcpy) SYM_FUNC_START_WEAK(memcpy) +SYM_FUNC_START_ALIAS(__memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -40,8 +40,8 @@ SYM_FUNC_START_WEAK(memcpy) movl %edx, %ecx rep movsb RET -SYM_FUNC_END(memcpy) SYM_FUNC_END_ALIAS(__memcpy) +SYM_FUNC_END(memcpy) EXPORT_SYMBOL(memcpy) EXPORT_SYMBOL(__memcpy) diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 50ea390df7128d..0040ac38751be0 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -24,9 +24,8 @@ * Output: * rax: dest */ -SYM_FUNC_START_WEAK(memmove) SYM_FUNC_START(__memmove) - +SYM_FUNC_START_WEAK_ALIAS(memmove) mov %rdi, %rax /* Decide forward/backward copy mode */ @@ -206,7 +205,7 @@ SYM_FUNC_START(__memmove) movb %r11b, (%rdi) 13: RET -SYM_FUNC_END(__memmove) SYM_FUNC_END_ALIAS(memmove) +SYM_FUNC_END(__memmove) EXPORT_SYMBOL(__memmove) EXPORT_SYMBOL(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index d624f2bc42f168..32cf147393e708 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -17,8 +17,9 @@ * * rax original destination */ -SYM_FUNC_START_WEAK(memset) + SYM_FUNC_START(__memset) +SYM_FUNC_START_WEAK_ALIAS(memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended * to use it when possible. If not available, use fast string instructions. @@ -41,8 +42,8 @@ SYM_FUNC_START(__memset) rep stosb movq %r9,%rax RET -SYM_FUNC_END(__memset) SYM_FUNC_END_ALIAS(memset) +SYM_FUNC_END(__memset) EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) diff --git a/arch/x86/lib/orc.c b/arch/x86/lib/orc.c new file mode 100644 index 00000000000000..6a42842bf4ef91 --- /dev/null +++ b/arch/x86/lib/orc.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ORC sorting shared by the compressed boot code and ORC module + * support. + */ + +#include +#include +#include + +#ifndef ORC_COMPRESSED_BOOT +static DEFINE_MUTEX(sort_mutex); + +#define sort_mutex_lock() mutex_lock(&sort_mutex) +#define sort_mutex_unlock() mutex_unlock(&sort_mutex) +#else /* ORC_COMPRESSED_BOOT */ +#define sort_mutex_lock() +#define sort_mutex_unlock() +#endif /* ORC_COMPRESSED_BOOT */ + +static int *cur_orc_ip_table; +static struct orc_entry *cur_orc_table; + +static void orc_sort_swap(void *_a, void *_b, int size) +{ + struct orc_entry *orc_a, *orc_b; + int *a = _a, *b = _b, tmp; + int delta = _b - _a; + + /* Swap the .orc_unwind_ip entries: */ + tmp = *a; + *a = *b + delta; + *b = tmp - delta; + + /* Swap the corresponding .orc_unwind entries: */ + orc_a = cur_orc_table + (a - cur_orc_ip_table); + orc_b = cur_orc_table + (b - cur_orc_ip_table); + swap(*orc_a, *orc_b); +} + +static int orc_sort_cmp(const void *_a, const void *_b) +{ + const int *a = _a, *b = _b; + unsigned long a_val = orc_ip(a); + unsigned long b_val = orc_ip(b); + struct orc_entry *orc_a; + + if (a_val > b_val) + return 1; + if (a_val < b_val) + return -1; + + /* + * The "weak" section terminator entries need to always be on the left + * to ensure the lookup code skips them in favor of real entries. + * These terminator entries exist to handle any gaps created by + * whitelisted .o files which didn't get objtool generation. + */ + orc_a = cur_orc_table + (a - cur_orc_ip_table); + + return orc_a->sp_reg == ORC_REG_UNDEFINED && !orc_a->end ? -1 : 1; +} + +void orc_sort(int *ip_table, struct orc_entry *orc_table, u32 num_orcs) +{ + /* + * The 'cur_orc_*' globals allow the orc_sort_swap() callback to + * associate an .orc_unwind_ip table entry with its corresponding + * .orc_unwind entry so they can both be swapped. + */ + sort_mutex_lock(); + + cur_orc_ip_table = ip_table; + cur_orc_table = orc_table; + sort(ip_table, num_orcs, sizeof(int), orc_sort_cmp, orc_sort_swap); + + sort_mutex_unlock(); +} diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index ecb2049c1273f6..be6b2dc0967c5a 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -102,7 +102,7 @@ SYM_FUNC_END(__put_user_8) EXPORT_SYMBOL(__put_user_8) EXPORT_SYMBOL(__put_user_nocheck_8) -SYM_CODE_START_LOCAL(.Lbad_put_user_clac) +SYM_CODE_START_LOCAL_SECT(.Lbad_put_user_clac, bad_put_user_clac) ASM_CLAC .Lbad_put_user: movl $-EFAULT,%ecx diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index 5606a15cf9a173..06f666e3e28a48 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S @@ -16,7 +16,7 @@ .text -SYM_FUNC_START(swsusp_arch_suspend) +SYM_FUNC_START_SECT(swsusp_arch_suspend, hibernate_page) movl %esp, saved_context_esp movl %ebx, saved_context_ebx movl %ebp, saved_context_ebp @@ -35,7 +35,7 @@ SYM_FUNC_START(swsusp_arch_suspend) RET SYM_FUNC_END(swsusp_arch_suspend) -SYM_CODE_START(restore_image) +SYM_CODE_START_SECT(restore_image, hibernate_page) /* prepare to jump to the image kernel */ movl restore_jump_address, %ebx movl restore_cr3, %ebp @@ -48,7 +48,7 @@ SYM_CODE_START(restore_image) SYM_CODE_END(restore_image) /* code below has been relocated to a safe page */ -SYM_CODE_START(core_restore_code) +SYM_CODE_START_SECT(core_restore_code, hibernate_page) movl temp_pgt, %eax movl %eax, %cr3 @@ -81,8 +81,10 @@ done: SYM_CODE_END(core_restore_code) /* code below belongs to the image kernel */ +SYM_PUSH_SECTION(hibernate_page) .align PAGE_SIZE -SYM_FUNC_START(restore_registers) +SYM_POP_SECTION() +SYM_FUNC_START_SECT(restore_registers, hibernate_page) /* go back to the original page tables */ movl %ebp, %cr3 movl mmu_cr4_features, %ecx diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 0a0539e1cc8142..fdca1949ad9fa4 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -24,8 +24,10 @@ #include /* code below belongs to the image kernel */ +SYM_PUSH_SECTION(hibernate_page) .align PAGE_SIZE -SYM_FUNC_START(restore_registers) +SYM_POP_SECTION() +SYM_FUNC_START_SECT(restore_registers, hibernate_page) /* go back to the original page tables */ movq %r9, %cr3 @@ -69,7 +71,7 @@ SYM_FUNC_START(restore_registers) RET SYM_FUNC_END(restore_registers) -SYM_FUNC_START(swsusp_arch_suspend) +SYM_FUNC_START_SECT(swsusp_arch_suspend, hibernate_page) movq $saved_context, %rax movq %rsp, pt_regs_sp(%rax) movq %rbp, pt_regs_bp(%rax) @@ -99,7 +101,7 @@ SYM_FUNC_START(swsusp_arch_suspend) RET SYM_FUNC_END(swsusp_arch_suspend) -SYM_FUNC_START(restore_image) +SYM_FUNC_START_SECT(restore_image, hibernate_page) /* prepare to jump to the image kernel */ movq restore_jump_address(%rip), %r8 movq restore_cr3(%rip), %r9 @@ -118,7 +120,7 @@ SYM_FUNC_START(restore_image) SYM_FUNC_END(restore_image) /* code below has been relocated to a safe page */ -SYM_FUNC_START(core_restore_code) +SYM_FUNC_START_SECT(core_restore_code, hibernate_page) /* switch to temporary page tables */ movq %rax, %cr3 /* flush TLB */ diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index e2c5b296120d65..96b6042fc76f3c 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -45,6 +45,8 @@ struct section { }; static struct section *secs; +static int fgkaslr_mode; + static const char * const sym_regex_kernel[S_NSYMTYPES] = { /* * Following symbols have been audited. There values are constant and do @@ -823,6 +825,24 @@ static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) strncmp(symname, "init_per_cpu_", 13); } +static int is_function_section(struct section *sec) +{ + if (!fgkaslr_mode) + return 0; + + return !strncmp(sec_name(sec->shdr.sh_info), ".text.", 6); +} + +static int is_randomized_sym(ElfW(Sym) *sym) +{ + if (!fgkaslr_mode) + return 0; + + if (sym->st_shndx > shnum) + return 0; + + return !strncmp(sec_name(sym_index(sym)), ".text.", 6); +} static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, const char *symname) @@ -848,12 +868,15 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, case R_X86_64_PC32: case R_X86_64_PLT32: /* - * PC relative relocations don't need to be adjusted unless - * referencing a percpu symbol. + * we need to keep pc relative relocations for sections which + * might be randomized, and for the percpu section. + * We also need to keep relocations for any offset which might + * reference an address in a section which has been randomized. * * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32. */ - if (is_percpu_sym(sym, symname)) + if (is_function_section(sec) || is_randomized_sym(sym) || + is_percpu_sym(sym, symname)) add_reloc(&relocs32neg, offset); break; @@ -1168,8 +1191,9 @@ static void print_reloc_info(void) void process(FILE *fp, int use_real_mode, int as_text, int show_absolute_syms, int show_absolute_relocs, - int show_reloc_info) + int show_reloc_info, int fgkaslr) { + fgkaslr_mode = fgkaslr; regex_init(use_real_mode); read_ehdr(fp); read_shdrs(fp); diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h index 4c49c82446eb52..269db511b2434a 100644 --- a/arch/x86/tools/relocs.h +++ b/arch/x86/tools/relocs.h @@ -32,8 +32,8 @@ enum symtype { void process_32(FILE *fp, int use_real_mode, int as_text, int show_absolute_syms, int show_absolute_relocs, - int show_reloc_info); + int show_reloc_info, int fgkaslr); void process_64(FILE *fp, int use_real_mode, int as_text, int show_absolute_syms, int show_absolute_relocs, - int show_reloc_info); + int show_reloc_info, int fgkaslr); #endif /* RELOCS_H */ diff --git a/arch/x86/tools/relocs_common.c b/arch/x86/tools/relocs_common.c index 6634352a20bc78..d6acda36575a3e 100644 --- a/arch/x86/tools/relocs_common.c +++ b/arch/x86/tools/relocs_common.c @@ -12,14 +12,13 @@ void die(char *fmt, ...) static void usage(void) { - die("relocs [--abs-syms|--abs-relocs|--reloc-info|--text|--realmode]" \ - " vmlinux\n"); + die("relocs [--abs-syms|--abs-relocs|--reloc-info|--text|--realmode|--fg-kaslr] vmlinux\n"); } int main(int argc, char **argv) { int show_absolute_syms, show_absolute_relocs, show_reloc_info; - int as_text, use_real_mode; + int as_text, use_real_mode, fgkaslr_opt; const char *fname; FILE *fp; int i; @@ -30,6 +29,7 @@ int main(int argc, char **argv) show_reloc_info = 0; as_text = 0; use_real_mode = 0; + fgkaslr_opt = 0; fname = NULL; for (i = 1; i < argc; i++) { char *arg = argv[i]; @@ -54,6 +54,10 @@ int main(int argc, char **argv) use_real_mode = 1; continue; } + if (strcmp(arg, "--fg-kaslr") == 0) { + fgkaslr_opt = 1; + continue; + } } else if (!fname) { fname = arg; @@ -75,11 +79,11 @@ int main(int argc, char **argv) if (e_ident[EI_CLASS] == ELFCLASS64) process_64(fp, use_real_mode, as_text, show_absolute_syms, show_absolute_relocs, - show_reloc_info); + show_reloc_info, fgkaslr_opt); else process_32(fp, use_real_mode, as_text, show_absolute_syms, show_absolute_relocs, - show_reloc_info); + show_reloc_info, fgkaslr_opt); fclose(fp); return 0; } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 42f3866bca6978..9f67660ace180e 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -84,6 +84,9 @@ /* Align . to a 8 byte boundary equals to maximum function alignment. */ #define ALIGN_FUNCTION() . = ALIGN(8) +/* This is useful for collecting individual sections back into one main */ +#define SECT_WILDCARD(sect) sect sect.[0-9a-zA-Z_]* + /* * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which * generates .data.identifier sections, which need to be pulled in with @@ -97,14 +100,12 @@ * sections to be brought in with rodata. */ #if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) -#define TEXT_MAIN .text .text.[0-9a-zA-Z_]* -#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* -#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* -#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* -#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral* -#define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* +#define DATA_MAIN SECT_WILDCARD(.data) .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* +#define SDATA_MAIN SECT_WILDCARD(.sdata) +#define RODATA_MAIN SECT_WILDCARD(.rodata) .rodata..L* +#define BSS_MAIN SECT_WILDCARD(.bss) .bss..compoundliteral* +#define SBSS_MAIN SECT_WILDCARD(.sbss) #else -#define TEXT_MAIN .text #define DATA_MAIN .data #define SDATA_MAIN .sdata #define RODATA_MAIN .rodata @@ -112,6 +113,41 @@ #define SBSS_MAIN .sbss #endif +/* + * LTO_CLANG, LD_DEAD_CODE_DATA_ELIMINATION and FG_KASLR options enable + * -ffunction-sections, which produces separately named .text sections. In + * the case of CONFIG_FG_KASLR, they need to stay distict so they can be + * separately randomized. Without CONFIG_FG_KASLR, the separate .text + * sections can be collected back into a common section, which makes the + * resulting image slightly smaller + */ +#if (defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || \ + defined(CONFIG_LTO_CLANG)) && !defined(CONFIG_FG_KASLR) +#define TEXT_MAIN SECT_WILDCARD(.text) +#elif defined(CONFIG_FG_KASLR) +#define TEXT_MAIN .text.__unused__ +#else +#define TEXT_MAIN .text +#endif + +/* + * Same for modules. However, LD_DEAD_CODE_DATA_ELIMINATION doesn't touch + * them, so no need to check for it here. + */ +#if defined(CONFIG_LTO_CLANG) && !defined(CONFIG_MODULE_FG_KASLR) +#define TEXT_MAIN_MODULE SECT_WILDCARD(.text) +#elif defined(CONFIG_MODULE_FG_KASLR) +#define TEXT_MAIN_MODULE .text.__unused__ +#else +#define TEXT_MAIN_MODULE .text +#endif + +/* + * Used by scripts/generate_text_sections.pl to inject text sections, + * harmless if FG-KASLR is disabled. + */ +#define TEXT_FG_KASLR __fg_kaslr_magic = .; + /* * GCC 4.5 and later have a 32 bytes section alignment for structures. * Except GCC 4.9, that feels the need to align on 64 bytes. @@ -564,7 +600,7 @@ #define NOINSTR_TEXT \ ALIGN_FUNCTION(); \ __noinstr_text_start = .; \ - *(.noinstr.text) \ + *(SECT_WILDCARD(.noinstr.text)) \ __noinstr_text_end = .; /* @@ -621,7 +657,7 @@ #define ENTRY_TEXT \ ALIGN_FUNCTION(); \ __entry_text_start = .; \ - *(.entry.text) \ + *(SECT_WILDCARD(.entry.text)) \ __entry_text_end = .; #define IRQENTRY_TEXT \ @@ -643,7 +679,7 @@ __static_call_text_end = .; /* Section used for early init (in .S files) */ -#define HEAD_TEXT KEEP(*(.head.text)) +#define HEAD_TEXT KEEP(*(SECT_WILDCARD(.head.text))) #define HEAD_TEXT_SECTION \ .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { \ @@ -840,6 +876,7 @@ #define ELF_DETAILS \ .comment 0 : { *(.comment) } \ .symtab 0 : { *(.symtab) } \ + .symtab_shndx 0 : { *(.symtab_shndx) } \ .strtab 0 : { *(.strtab) } \ .shstrtab 0 : { *(.shstrtab) } diff --git a/include/linux/linkage.h b/include/linux/linkage.h index dbf8506decca0f..d4de269478aee6 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -73,6 +73,40 @@ #define __ALIGN_STR ".align 4,0x90" #endif +/* + * Allow ASM symbols to have their own unique sections if they are being + * generated by the compiler for C functions (DCE, FG-KASLR, LTO). Correlates + * with the presence of the `-ffunction-section` in KBUILD_CFLAGS. + */ +#if defined(CONFIG_HAVE_ASM_FUNCTION_SECTIONS) && \ + ((defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) && !defined(MODULE)) || \ + (defined(CONFIG_FG_KASLR) && !defined(MODULE)) || \ + (defined(CONFIG_MODULE_FG_KASLR) && defined(MODULE)) || \ + (defined(CONFIG_LTO_CLANG))) + +#define SYM_PUSH_SECTION(name) \ + .pushsection %S.name, "ax" + +#define SYM_POP_SECTION() \ + .popsection + +#define __ASM_PUSH_SECTION(name) \ + ".pushsection %S." name ", \"ax\"" + +#else /* !(CONFIG_HAVE_ASM_FUNCTION_SECTIONS && (DCE || FG_KASLR || LTO)) */ + +#define SYM_PUSH_SECTION(name) +#define SYM_POP_SECTION() +#define __ASM_PUSH_SECTION(name) + +#endif /* !(CONFIG_HAVE_ASM_FUNCTION_SECTIONS && (DCE || FG_KASLR || LTO)) */ + +#define ASM_PUSH_SECTION(name) \ + __ASM_PUSH_SECTION(__stringify(name)) + +#define ASM_POP_SECTION() \ + __stringify(SYM_POP_SECTION()) + #ifdef __ASSEMBLY__ /* SYM_T_FUNC -- type used by assembler to mark functions */ @@ -209,6 +243,15 @@ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) #endif +/* + * SYM_FUNC_START_WEAK_ALIAS -- use where there are two global names for one + * function, and one of them is weak + */ +#ifndef SYM_FUNC_START_WEAK_ALIAS +#define SYM_FUNC_START_WEAK_ALIAS(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) +#endif + /* * SYM_FUNC_START_ALIAS -- use where there are two global names for one * function @@ -225,12 +268,24 @@ * later. */ #define SYM_FUNC_START(name) \ + SYM_PUSH_SECTION(name) ASM_NL \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +/* + * SYM_FUNC_START_SECT -- use for global functions, will be conditionally + * placed into a section specified in the second argument + */ +#ifndef SYM_FUNC_START_SECT +#define SYM_FUNC_START_SECT(name, sect) \ + SYM_PUSH_SECTION(sect) ASM_NL \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif /* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ #ifndef SYM_FUNC_START_NOALIGN #define SYM_FUNC_START_NOALIGN(name) \ + SYM_PUSH_SECTION(name) ASM_NL \ SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) #endif @@ -238,24 +293,38 @@ #ifndef SYM_FUNC_START_LOCAL /* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_START_LOCAL(name) \ + SYM_PUSH_SECTION(name) ASM_NL \ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) #endif /* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */ #ifndef SYM_FUNC_START_LOCAL_NOALIGN #define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_PUSH_SECTION(name) ASM_NL \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) +#endif + +/* + * SYM_FUNC_START_LOCAL_NOALIGN_SECT -- use for local functions, w/o alignment, + * will be conditionally placed into a section specified in the second argument + */ +#ifndef SYM_FUNC_START_LOCAL_NOALIGN_SECT +#define SYM_FUNC_START_LOCAL_NOALIGN_SECT(name, sect) \ + SYM_PUSH_SECTION(sect) ASM_NL \ SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) #endif /* SYM_FUNC_START_WEAK -- use for weak functions */ #ifndef SYM_FUNC_START_WEAK #define SYM_FUNC_START_WEAK(name) \ + SYM_PUSH_SECTION(name) ASM_NL \ SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) #endif /* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */ #ifndef SYM_FUNC_START_WEAK_NOALIGN #define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_PUSH_SECTION(name) ASM_NL \ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) #endif @@ -272,24 +341,59 @@ #ifndef SYM_FUNC_END /* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_END(name) \ - SYM_END(name, SYM_T_FUNC) + SYM_END(name, SYM_T_FUNC) ASM_NL \ + SYM_POP_SECTION() #endif /* SYM_CODE_START -- use for non-C (special) functions */ #ifndef SYM_CODE_START #define SYM_CODE_START(name) \ + SYM_PUSH_SECTION(name) ASM_NL \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +/* + * SYM_CODE_START_SECT -- use for non-C (special) functions, will be + * conditionally placed into a section specified in the second argument + */ +#ifndef SYM_CODE_START_SECT +#define SYM_CODE_START_SECT(name, sect) \ + SYM_PUSH_SECTION(sect) ASM_NL \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif /* SYM_CODE_START_NOALIGN -- use for non-C (special) functions, w/o alignment */ #ifndef SYM_CODE_START_NOALIGN #define SYM_CODE_START_NOALIGN(name) \ + SYM_PUSH_SECTION(name) ASM_NL \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) +#endif + +/* + * SYM_CODE_START_NOALIGN_SECT -- use for non-C (special) functions, + * w/o alignment, will be conditionally placed into a section specified + * in the second argument + */ +#ifndef SYM_CODE_START_NOALIGN_SECT +#define SYM_CODE_START_NOALIGN_SECT(name, sect) \ + SYM_PUSH_SECTION(sect) ASM_NL \ SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) #endif /* SYM_CODE_START_LOCAL -- use for local non-C (special) functions */ #ifndef SYM_CODE_START_LOCAL #define SYM_CODE_START_LOCAL(name) \ + SYM_PUSH_SECTION(name) ASM_NL \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) +#endif + +/* + * SYM_CODE_START_LOCAL -- use for local non-C (special) functions, will + * be conditionally placing into a section specified in the second argument + */ +#ifndef SYM_CODE_START_LOCAL_SECT +#define SYM_CODE_START_LOCAL_SECT(name, sect) \ + SYM_PUSH_SECTION(sect) ASM_NL \ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) #endif @@ -299,13 +403,26 @@ */ #ifndef SYM_CODE_START_LOCAL_NOALIGN #define SYM_CODE_START_LOCAL_NOALIGN(name) \ + SYM_PUSH_SECTION(name) ASM_NL \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) +#endif + +/* + * SYM_CODE_START_LOCAL_NOALIGN_SECT -- use for local non-C (special) + * functions, w/o alignment, will be conditionally placed into a section + * specified in the second argument + */ +#ifndef SYM_CODE_START_LOCAL_NOALIGN_SECT +#define SYM_CODE_START_LOCAL_NOALIGN_SECT(name, sect) \ + SYM_PUSH_SECTION(sect) ASM_NL \ SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) #endif /* SYM_CODE_END -- the end of SYM_CODE_START_LOCAL, SYM_CODE_START, ... */ #ifndef SYM_CODE_END #define SYM_CODE_END(name) \ - SYM_END(name, SYM_T_NONE) + SYM_END(name, SYM_T_NONE) ASM_NL \ + SYM_POP_SECTION() #endif /* === data annotations === */ diff --git a/include/linux/random.h b/include/linux/random.h index c45b2693e51fb8..596b274231508c 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -110,6 +110,22 @@ declare_get_random_var_wait(long) unsigned long randomize_page(unsigned long start, unsigned long range); +/** + * shuffle_array - use a Fisher-Yates algorithm to shuffle an array. + * @arr: pointer to the array + * @nents: the number of elements in the array + */ +#define shuffle_array(arr, nents) ({ \ + typeof(&(arr)[0]) __arr = &(arr)[0]; \ + size_t __i; \ + \ + for (__i = (nents) - 1; __i > 0; __i--) { \ + size_t __j = get_random_long() % (__i + 1); \ + \ + swap(__arr[__i], __arr[__j]); \ + } \ +}) + /* * This is designed to be standalone for just prandom * users, but for now we include it from diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 61bf4774b8f2a1..1c74d959491970 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -299,6 +299,7 @@ typedef struct elf64_phdr { #define SHN_LIVEPATCH 0xff20 #define SHN_ABS 0xfff1 #define SHN_COMMON 0xfff2 +#define SHN_XINDEX 0xffff #define SHN_HIRESERVE 0xffff typedef struct elf32_shdr { diff --git a/init/Kconfig b/init/Kconfig index e9119bf54b1f32..90951631aa0386 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -86,6 +86,9 @@ config CC_HAS_ASM_INLINE config CC_HAS_NO_PROFILE_FN_ATTR def_bool $(success,echo '__attribute__((no_profile_instrument_function)) int x();' | $(CC) -x c - -c -o /dev/null -Werror) +config LD_HAS_Z_UNIQUE_SYMBOL + def_bool $(ld-option,-z unique-symbol) + config CONSTRUCTORS bool @@ -1383,6 +1386,17 @@ config CC_OPTIMIZE_FOR_SIZE endchoice +config HAVE_ASM_FUNCTION_SECTIONS + depends on ARCH_SUPPORTS_ASM_FUNCTION_SECTIONS + depends on $(cc-option,-Wa$(comma)--sectname-subst) + def_bool y + help + This enables asm function sections if both architecture and + toolchain support it. It allows creating a separate section + for each function written in assembly in order to improve DCE, + LTO and FG-KASLR (works the same way as -ffunction-sections + for C code). + config HAVE_LD_DEAD_CODE_DATA_ELIMINATION bool help @@ -2048,6 +2062,34 @@ config PROFILING config TRACEPOINTS bool +config FG_KASLR + bool "Function Granular Kernel Address Space Layout Randomization" + depends on ARCH_SUPPORTS_FG_KASLR + depends on $(cc-option,-ffunction-sections) + depends on LD_HAS_Z_UNIQUE_SYMBOL || !LIVEPATCH + help + This option improves the randomness of the kernel text + over basic Kernel Address Space Layout Randomization (KASLR) + by reordering the kernel text at boot time. This feature + uses information generated at compile time to re-layout the + kernel text section at boot time at function level granularity. + + If unsure, say N. + +config FG_KASLR_SHIFT + int "FG-KASLR granularity (number of functions per section shift)" + depends on FG_KASLR + range 0 16 + default 0 + help + This sets the number of functions that will be put in each section + as a power of two. + Decreasing the value increases the randomization, but also increases + the size of the final kernel/vmlinux due to the amount of sections. + 0 means that a separate section will be created for each function. + 16 almost disables the randomization, leaving only the manual + separation. + endmenu # General setup source "arch/Kconfig" @@ -2344,6 +2386,33 @@ config UNUSED_KSYMS_WHITELIST one per line. The path can be absolute, or relative to the kernel source tree. +config MODULE_FG_KASLR + bool "Module Function Granular Layout Randomization" + depends on $(cc-option,-ffunction-sections) + depends on LD_HAS_Z_UNIQUE_SYMBOL || !LIVEPATCH + default FG_KASLR + help + This option randomizes the module text section by reordering the text + section by function at module load time. In order to use this + feature, the module must have been compiled with the + -ffunction-sections compiler flag. + + If unsure, say N. + +config MODULE_FG_KASLR_SHIFT + int "Module FG-KASLR granularity (functions per section shift)" + depends on MODULE_FG_KASLR + range 0 16 + default 0 + help + This sets the number of functions that will be put in each section + as a power of two. + Decreasing the value increases the randomization, but also increases + the size of the final kernel module due to the amount of sections. + 0 means that a separate section will be created for each function. + 16 almost disables the randomization, leaving only the manual + separation. + endif # MODULES config MODULES_TREE_LOOKUP diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 951c93216fc4d9..d991afa9b5bfaa 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -575,13 +575,15 @@ struct kallsym_iter { loff_t pos_mod_end; loff_t pos_ftrace_mod_end; loff_t pos_bpf_end; + loff_t pos_end; unsigned long value; unsigned int nameoff; /* If iterating in core kernel symbols. */ char type; char name[KSYM_NAME_LEN]; char module_name[MODULE_NAME_LEN]; int exported; - int show_value; + int show_layout:1; + loff_t shuffled_pos[]; }; int __weak arch_get_kallsym(unsigned int symnum, unsigned long *value, @@ -661,11 +663,19 @@ static int get_ksymbol_bpf(struct kallsym_iter *iter) */ static int get_ksymbol_kprobe(struct kallsym_iter *iter) { + int ret; + strlcpy(iter->module_name, "__builtin__kprobes", MODULE_NAME_LEN); iter->exported = 0; - return kprobe_get_kallsym(iter->pos - iter->pos_bpf_end, - &iter->value, &iter->type, - iter->name) < 0 ? 0 : 1; + ret = kprobe_get_kallsym(iter->pos - iter->pos_bpf_end, + &iter->value, &iter->type, + iter->name); + if (ret < 0) { + iter->pos_end = iter->pos; + return 0; + } + + return 1; } /* Returns space to next name. */ @@ -688,11 +698,16 @@ static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) iter->name[0] = '\0'; iter->nameoff = get_symbol_offset(new_pos); iter->pos = new_pos; + + if (!iter->show_layout) + return; + if (new_pos == 0) { iter->pos_arch_end = 0; iter->pos_mod_end = 0; iter->pos_ftrace_mod_end = 0; iter->pos_bpf_end = 0; + iter->pos_end = 0; } } @@ -721,13 +736,23 @@ static int update_iter_mod(struct kallsym_iter *iter, loff_t pos) get_ksymbol_bpf(iter)) return 1; - return get_ksymbol_kprobe(iter); + if ((!iter->pos_end || iter->pos_end > pos) && + get_ksymbol_kprobe(iter)) + return 1; + + return 0; } /* Returns false if pos at or past end of file. */ static int update_iter(struct kallsym_iter *iter, loff_t pos) { - /* Module symbols can be accessed randomly. */ + if (!iter->show_layout) { + if (pos > iter->pos_end) + return 0; + + pos = iter->shuffled_pos[pos]; + } + if (pos >= kallsyms_num_syms) return update_iter_mod(iter, pos); @@ -770,7 +795,7 @@ static int s_show(struct seq_file *m, void *p) if (!iter->name[0]) return 0; - value = iter->show_value ? (void *)iter->value : NULL; + value = iter->show_layout ? (void *)iter->value : NULL; if (iter->module_name[0]) { char type; @@ -807,9 +832,10 @@ static inline int kallsyms_for_perf(void) } /* - * We show kallsyms information even to normal users if we've enabled - * kernel profiling and are explicitly not paranoid (so kptr_restrict - * is clear, and sysctl_perf_event_paranoid isn't set). + * We show kallsyms information and display them sorted by address even + * to normal users if we've enabled kernel profiling and are explicitly + * not paranoid (so kptr_restrict is clear, and sysctl_perf_event_paranoid + * isn't set). * * Otherwise, require CAP_SYSLOG (assuming kptr_restrict isn't set to * block even that). @@ -839,16 +865,53 @@ static int kallsyms_open(struct inode *inode, struct file *file) * using get_symbol_offset for every symbol. */ struct kallsym_iter *iter; - iter = __seq_open_private(file, &kallsyms_op, sizeof(*iter)); - if (!iter) - return -ENOMEM; - reset_iter(iter, 0); + /* + * This fake iter is needed for the cases with unprivileged + * access. We need to know the exact number of symbols to + * randomize the display layout. + */ + struct kallsym_iter fake; + size_t size = sizeof(*iter); + loff_t pos; + + fake.show_layout = true; + reset_iter(&fake, 0); /* * Instead of checking this on every s_show() call, cache * the result here at open time. */ - iter->show_value = kallsyms_show_value(file->f_cred); + fake.show_layout = kallsyms_show_value(file->f_cred); + if (!fake.show_layout) { + pos = kallsyms_num_syms; + while (update_iter_mod(&fake, pos)) + pos++; + + size = struct_size(iter, shuffled_pos, fake.pos_end + 1); + } + + iter = __seq_open_private(file, &kallsyms_op, size); + if (!iter) + return -ENOMEM; + + iter->show_layout = fake.show_layout; + reset_iter(iter, 0); + + if (iter->show_layout) + return 0; + + /* Copy the bounds since they were already discovered above */ + iter->pos_arch_end = fake.pos_arch_end; + iter->pos_mod_end = fake.pos_mod_end; + iter->pos_ftrace_mod_end = fake.pos_ftrace_mod_end; + iter->pos_bpf_end = fake.pos_bpf_end; + iter->pos_end = fake.pos_end; + + for (pos = 0; pos <= iter->pos_end; pos++) + iter->shuffled_pos[pos] = pos; + + shuffle_array(iter->shuffled_pos, iter->pos_end + 1); + return 0; } @@ -859,6 +922,7 @@ const char *kdb_walk_kallsyms(loff_t *pos) if (*pos == 0) { memset(&kdb_walk_kallsyms_iter, 0, sizeof(kdb_walk_kallsyms_iter)); + kdb_walk_kallsyms_iter.show_layout = true; reset_iter(&kdb_walk_kallsyms_iter, 0); } while (1) { diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 585494ec464f98..7a330465a8c770 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -143,11 +143,13 @@ static int klp_find_callback(void *data, const char *name, args->count++; /* - * Finish the search when the symbol is found for the desired position - * or the position is not defined for a non-unique symbol. + * Finish the search when unique symbol names are enabled + * or the symbol is found for the desired position or the + * position is not defined for a non-unique symbol. */ - if ((args->pos && (args->count == args->pos)) || - (!args->pos && (args->count > 1))) + if (IS_ENABLED(CONFIG_LD_HAS_Z_UNIQUE_SYMBOL) || + (args->pos && args->count == args->pos) || + (!args->pos && args->count > 1)) return 1; return 0; @@ -169,6 +171,13 @@ static int klp_find_object_symbol(const char *objname, const char *name, else kallsyms_on_each_symbol(klp_find_callback, &args); + /* + * If the LD's `-z unique-symbol` flag is available and enabled, + * sympos checks are not relevant. + */ + if (IS_ENABLED(CONFIG_LD_HAS_Z_UNIQUE_SYMBOL)) + sympos = 0; + /* * Ensure an address was found. If sympos is 0, ensure symbol is unique; * otherwise ensure the symbol position count matches sympos. diff --git a/kernel/module.c b/kernel/module.c index 46a5c2ed192855..616a622953fa15 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include "module-internal.h" @@ -1526,7 +1527,7 @@ static void free_sect_attrs(struct module_sect_attrs *sect_attrs) for (section = 0; section < sect_attrs->nsections; section++) kfree(sect_attrs->attrs[section].battr.attr.name); - kfree(sect_attrs); + kvfree(sect_attrs); } static void add_sect_attrs(struct module *mod, const struct load_info *info) @@ -1543,7 +1544,7 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info) size[0] = ALIGN(struct_size(sect_attrs, attrs, nloaded), sizeof(sect_attrs->grp.bin_attrs[0])); size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.bin_attrs[0]); - sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL); + sect_attrs = kvzalloc(size[0] + size[1], GFP_KERNEL); if (sect_attrs == NULL) return; @@ -2415,6 +2416,71 @@ static bool module_init_layout_section(const char *sname) return module_init_section(sname); } +/* + * randomize_text() + * Look through the core section looking for executable code sections. + * Store sections in an array and then shuffle the sections + * to reorder the functions. + */ +static void randomize_text(struct module *mod, struct load_info *info) +{ + int max_sections = info->hdr->e_shnum; + int num_text_sections = 0; + Elf_Shdr **text_list; + int i, size; + + text_list = kvmalloc_array(max_sections, sizeof(*text_list), GFP_KERNEL); + if (!text_list) + return; + + for (i = 0; i < max_sections; i++) { + Elf_Shdr *shdr = &info->sechdrs[i]; + const char *sname = info->secstrings + shdr->sh_name; + + if (!(shdr->sh_flags & SHF_ALLOC) || + !(shdr->sh_flags & SHF_EXECINSTR) || + (shdr->sh_flags & ARCH_SHF_SMALL) || + module_init_layout_section(sname)) + continue; + + /* + * With CONFIG_CFI_CLANG, .text with __cfi_check() must come + * before any other text sections, and be aligned to PAGE_SIZE. + * Don't include it in the shuffle list. + */ + if (IS_ENABLED(CONFIG_CFI_CLANG) && !strcmp(sname, ".text")) + continue; + + if (!num_text_sections) + size = shdr->sh_entsize; + + text_list[num_text_sections] = shdr; + num_text_sections++; + } + + if (!num_text_sections) + goto exit; + + shuffle_array(text_list, num_text_sections); + + for (i = 0; i < num_text_sections; i++) { + Elf_Shdr *shdr = text_list[i]; + + /* + * get_offset has a section index for it's last + * argument, that is only used by arch_mod_section_prepend(), + * which is only defined by parisc. Since this type + * of randomization isn't supported on parisc, we can + * safely pass in zero as the last argument, as it is + * ignored. + */ + shdr->sh_entsize = get_offset(mod, &size, shdr, 0); + } + +exit: + kvfree(text_list); +} + /* * Lay out the SHF_ALLOC sections in a way not dissimilar to how ld * might -- code, read-only data, read-write data, small data. Tally @@ -2509,6 +2575,9 @@ static void layout_sections(struct module *mod, struct load_info *info) break; } } + + if (IS_ENABLED(CONFIG_MODULE_FG_KASLR)) + randomize_text(mod, info); } static void set_license(struct module *mod, const char *license) diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 7f39599e9faedd..4ca9d8fc978d17 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -28,13 +28,25 @@ quiet_cmd_cc_o_c = CC [M] $@ %.mod.o: %.mod.c FORCE $(call if_changed_dep,cc_o_c) +ifdef CONFIG_MODULE_FG_KASLR +quiet_cmd_gen_modules_lds = GEN [M] $@ + cmd_gen_modules_lds = \ + $(PERL) $(srctree)/scripts/generate_text_sections.pl \ + $(if $(CONFIG_HAVE_ASM_FUNCTION_SECTIONS),-a) \ + -s $(CONFIG_MODULE_FG_KASLR_SHIFT) $(filter %.o, $^) \ + < $(filter %.lds, $^) > $@ + +%.ko.lds: %$(mod-prelink-ext).o scripts/module.lds FORCE + $(call if_changed,gen_modules_lds) +endif + ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) quiet_cmd_ld_ko_o = LD [M] $@ cmd_ld_ko_o += \ $(LD) -r $(KBUILD_LDFLAGS) \ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ - -T scripts/module.lds -o $@ $(filter %.o, $^); \ + -T $(filter %.lds, $^) -o $@ $(filter %.o, $^); \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) quiet_cmd_btf_ko = BTF [M] $@ @@ -56,13 +68,15 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check), \ # Re-generate module BTFs if either module's .ko or vmlinux changed -$(modules): %.ko: %$(mod-prelink-ext).o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE +$(modules): %.ko: %$(mod-prelink-ext).o %.mod.o +$(modules): %.ko: $(if $(CONFIG_MODULE_FG_KASLR),%.ko.lds,scripts/module.lds) +$(modules): %.ko: $(if $(KBUILD_BUILTIN),vmlinux) FORCE +$(call if_changed_except,ld_ko_o,vmlinux) ifdef CONFIG_DEBUG_INFO_BTF_MODULES +$(if $(newer-prereqs),$(call cmd,btf_ko)) endif -targets += $(modules) $(modules:.ko=.mod.o) +targets += $(modules) $(modules:.ko=.mod.o) $(modules:.ko=.ko.lds) # Add FORCE to the prequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- diff --git a/scripts/generate_text_sections.pl b/scripts/generate_text_sections.pl new file mode 100755 index 00000000000000..a05ae9fb0041fd --- /dev/null +++ b/scripts/generate_text_sections.pl @@ -0,0 +1,172 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0-only +# +# Generates a new LD script with every .text.* section described for FG-KASLR +# to avoid orphan/heuristic section placement and double-checks we don't have +# any symbols in plain .text section. +# +# Copyright (C) 2021-2022, Intel Corporation. +# Author: Alexander Lobakin +# + +use strict; +use warnings; + +## parameters +my $add_assert = 0; +my $expecting = 0; +my $shift = 0; +my $file; + +foreach (@ARGV) { + if ($_ eq '-a') { + $add_assert = 1; + } elsif ($_ eq '-s') { + $expecting = 1; + } elsif ($expecting) { + $shift = $_ + 0; + if ($shift < 0) { + $shift = 0; + } elsif ($shift > 16) { + $shift = 16; + } + $expecting = 0; + } elsif (!defined($file)) { + $file = $_; + } else { + die "$0: usage: $0 [-a] [-s shift] binary < linker script"; + } +} + +if (!defined($file)) { + die "$0: usage: $0 [-a] [-s shift] binary < linker script"; +} + +## environment +my $readelf = $ENV{'READELF'} || die "$0: ERROR: READELF not set?"; + +## text sections array +my @sections = (); +my $has_ccf = 0; +my $vmlinux = 0; + +## max alignment found to reserve some space. It would probably be +## better to start from 64, but CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B +## (which aligns every function to 64b) would explode the $count then +my $max_align = 128; +my $count = 0; + +sub read_sections { + open(my $fh, "\"$readelf\" -SW \"$file\" 2>/dev/null |") + or die "$0: ERROR: failed to execute \"$readelf\": $!"; + + while (<$fh>) { + my $name; + my $align; + chomp; + + ($name, $align) = $_ =~ /^\s*\[[\s0-9]*\]\s*(\.\S*)\s*[A-Z]*\s*[0-9a-f]{16}\s*[0-9a-f]*\s*[0-9a-f]*\s*[0-9a-f]*\s*[0-9a-f]{2}\s*[A-Z]{2}\s*[0-9]\s*[0-9]\s*([0-9]*)$/; + + if (!defined($name)) { + next; + } + + ## Clang 13 onwards emits __cfi_check_fail only on final + ## linking, so it won't appear in .o files and will be + ## missing in @sections. Add it manually to prevent + ## spawning orphans. + if ($name eq ".text.__cfi_check_fail") { + $has_ccf = 1; + } + + ## If we're processing a module, don't reserve any space + ## at the end as its sections are being allocated separately. + if ($name eq ".sched.text") { + $vmlinux = 1; + } + + if (!($name =~ /^\.text(\.(?!hot\.|unknown\.|unlikely\.|.san\.)[0-9a-zA-Z_]*){1,2}((\.constprop|\.isra|\.part)\.[0-9]){0,2}(|\.[0-9cfi]*)$/)) { + next; + } + + if ($align > $max_align) { + $max_align = $align; + $count = 1; + } elsif ($align == $max_align) { + $count++; + } + + push(@sections, $name); + } + + close($fh); + + if (!$has_ccf) { + push(@sections, ".text.__cfi_check_fail"); + } + + @sections = sort @sections; +} + +sub print_sections { + my $fps = 1 << $shift; + my $counter = 1; + + print "\t.text.0 : ALIGN(16) {\n"; + print "\t\t*(.text)\n"; + print "\t}\n"; + + ## If we have asm function sections, we shouldn't have anything + ## in here. + if ($add_assert) { + print "\tASSERT(SIZEOF(.text.0) == 0, \"Plain .text is not empty!\")\n\n"; + } + + if (!@sections) { + return; + } + + while () { + print "\t.text.$counter : ALIGN(16) {\n"; + + my @a = (($counter - 1) * $fps .. ($counter * $fps) - 1); + for (@a) { + print "\t\t*($sections[$_])\n"; + + if ($sections[$_] eq $sections[-1]) { + print "\t}\n"; + return; + } + } + + print "\t}\n"; + $counter++; + } +} + +sub print_reserve { + ## If we have text sections aligned with 128 bytes or more, make + ## sure we reserve some space for them to not overlap _etext + ## while shuffling sections. + if (!$vmlinux or !$count) { + return; + } + + print "\n\t. += $max_align * $count;\n"; +} + +sub print_lds { + while () { + if ($_ =~ /^\s*__fg_kaslr_magic = \.;$/) { + print_sections(); + print_reserve(); + } else { + print $_; + } + } +} + +## main + +read_sections(); +print_lds(); diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 666f7bbc13ebba..701cf540c12ecb 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -70,6 +70,23 @@ gen_symversions() done } +# If CONFIG_FG_KASLR is selected, generate a linker script which will +# declare all custom text sections for future boottime shuffling +gen_text_sections() +{ + local shift=$(sed -n 's/^CONFIG_FG_KASLR_SHIFT=\(.*\)$/\1/p' include/config/auto.conf) + local assert="" + + is_enabled CONFIG_HAVE_ASM_FUNCTION_SECTIONS && assert="-a" + + info GEN .tmp_vmlinux.lds + + ${PERL} ${srctree}/scripts/generate_text_sections.pl \ + ${assert} -s "${shift}" vmlinux.o \ + < "${objtree}/${KBUILD_LDS}" \ + > .tmp_vmlinux.lds +} + # Link of vmlinux.o used for section mismatch analysis # ${1} output file modpost_link() @@ -162,12 +179,19 @@ vmlinux_link() local ld local ldflags local ldlibs + local lds info LD ${output} # skip output file argument shift + if is_enabled CONFIG_FG_KASLR; then + lds=".tmp_vmlinux.lds" + else + lds="${objtree}/${KBUILD_LDS}" + fi + if is_enabled CONFIG_LTO_CLANG; then # Use vmlinux.o instead of performing the slow LTO link again. objs=vmlinux.o @@ -189,7 +213,7 @@ vmlinux_link() ldlibs= fi - ldflags="${ldflags} ${wl}--script=${objtree}/${KBUILD_LDS}" + ldflags="${ldflags} ${wl}--script=${lds}" # The kallsyms linking does not need debug symbols included. if [ "$output" != "${output#.tmp_vmlinux.kallsyms}" ] ; then @@ -346,6 +370,10 @@ info GEN modules.builtin tr '\0' '\n' < modules.builtin.modinfo | sed -n 's/^[[:alnum:]:_]*\.file=//p' | tr ' ' '\n' | uniq | sed -e 's:^:kernel/:' -e 's/$/.ko/' > modules.builtin +if is_enabled CONFIG_FG_KASLR; then + gen_text_sections +fi + btf_vmlinux_bin_o="" if is_enabled CONFIG_DEBUG_INFO_BTF; then btf_vmlinux_bin_o=.btf.vmlinux.bin.o diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 6bfa332179140b..84d2c44f93832c 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -689,11 +689,28 @@ static void handle_modversion(const struct module *mod, sym_set_crc(symname, crc); } +static char *remove_dot(char *s) +{ + size_t n = strcspn(s, "."); + + if (n && s[n]) { + size_t m = strspn(s + n + 1, "0123456789"); + + if (m && (s[n + m + 1] == '.' || s[n + m + 1] == 0)) + s[n] = 0; + + /* strip trailing .lto */ + if (strends(s, ".lto")) + s[strlen(s) - 4] = '\0'; + } + + return s; +} + static void handle_symbol(struct module *mod, struct elf_info *info, const Elf_Sym *sym, const char *symname) { enum export export; - const char *name; if (strstarts(symname, "__ksymtab")) export = export_from_secname(info, get_secindex(info, sym)); @@ -734,8 +751,11 @@ static void handle_symbol(struct module *mod, struct elf_info *info, default: /* All exported symbols */ if (strstarts(symname, "__ksymtab_")) { - name = symname + strlen("__ksymtab_"); - sym_add_exported(name, mod, export); + char *name; + + name = NOFAIL(strdup(symname + strlen("__ksymtab_"))); + sym_add_exported(remove_dot(name), mod, export); + free(name); } if (strcmp(symname, "init_module") == 0) mod->has_init = 1; @@ -940,7 +960,9 @@ static void check_section(const char *modname, struct elf_info *elf, ".kprobes.text", ".cpuidle.text", ".noinstr.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", ".text.*", \ - ".coldtext", ".softirqentry.text" + ".coldtext", ".softirqentry.text", ".text.unlikely.*", \ + ".noinstr.text.*", ".head.text.*", ".fixup.*", \ + ".entry.text.*" #define INIT_SECTIONS ".init.*" #define MEM_INIT_SECTIONS ".meminit.*" @@ -1021,7 +1043,7 @@ enum mismatch { struct sectioncheck { const char *fromsec[20]; const char *bad_tosec[20]; - const char *good_tosec[20]; + const char *good_tosec[25]; enum mismatch mismatch; const char *symbol_white_list[20]; void (*handler)(const char *modname, struct elf_info *elf, @@ -1980,22 +2002,6 @@ static void check_sec_ref(struct module *mod, const char *modname, } } -static char *remove_dot(char *s) -{ - size_t n = strcspn(s, "."); - - if (n && s[n]) { - size_t m = strspn(s + n + 1, "0123456789"); - if (m && (s[n + m] == '.' || s[n + m] == 0)) - s[n] = 0; - - /* strip trailing .lto */ - if (strends(s, ".lto")) - s[strlen(s) - 4] = '\0'; - } - return s; -} - static void read_symbols(const char *modname) { const char *symname; diff --git a/scripts/module.lds.S b/scripts/module.lds.S index 1d0e1e4dc3d2a6..6e957aa614b118 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -3,6 +3,11 @@ * Archs are free to supply their own linker scripts. ld will * combine them automatically. */ + +#include + +#undef SANITIZER_DISCARDS + #ifdef CONFIG_CFI_CLANG # include # define ALIGN_CFI ALIGN(PAGE_SIZE) @@ -58,9 +63,16 @@ SECTIONS { */ .text : ALIGN_CFI { *(.text.__cfi_check) - *(.text .text.[0-9a-zA-Z_]* .text..L.cfi*) + *(TEXT_MAIN_MODULE) + *(.text..L.cfi.jumptable .text..L.cfi.jumptable.*) + } +#elif defined(CONFIG_MODULE_FG_KASLR) + .text : { + *(TEXT_MAIN_MODULE) } #endif + + TEXT_FG_KASLR } /* bring in arch-specific sections */ diff --git a/scripts/sorttable.h b/scripts/sorttable.h index deb7c1d3e979d4..a6bb46f36854cf 100644 --- a/scripts/sorttable.h +++ b/scripts/sorttable.h @@ -103,11 +103,6 @@ struct orc_entry *g_orc_table; pthread_t orc_sort_thread; -static inline unsigned long orc_ip(const int *ip) -{ - return (unsigned long)ip + *ip; -} - static int orc_sort_cmp(const void *_a, const void *_b) { struct orc_entry *orc_a; diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h index 5a2baf28a1dcda..7708548713c4b2 100644 --- a/tools/arch/x86/include/asm/orc_types.h +++ b/tools/arch/x86/include/asm/orc_types.h @@ -67,6 +67,13 @@ struct orc_entry { #endif } __packed; +static inline unsigned long orc_ip(const int *ip) +{ + return (unsigned long)ip + *ip; +} + +void orc_sort(int *ip_table, struct orc_entry *orc_table, u32 num_orcs); + #endif /* __ASSEMBLY__ */ #endif /* _ORC_TYPES_H */