[lvc-project] [PATCH 13/16] efi/x86: Support extracting kernel from libstub

Ard Biesheuvel ardb at kernel.org
Wed Oct 19 10:35:04 MSK 2022


On Tue, 6 Sept 2022 at 12:42, Evgeniy Baskov <baskov at ispras.ru> wrote:
>
> Doing it that way allows setting up stricter memory attributes,
> simplifies boot code path and removes potential relocation
> of kernel image.
>
> Wire up required interfaces and minimally initialize zero page
> fields needed for it to function correctly.
>
> Signed-off-by: Evgeniy Baskov <baskov at ispras.ru>
>
>  create mode 100644 arch/x86/include/asm/shared/extract.h
>  create mode 100644 drivers/firmware/efi/libstub/x86-extract-direct.c
> ---
>  arch/x86/boot/compressed/head_32.S            |   6 +-
>  arch/x86/boot/compressed/head_64.S            |  45 ++++
>  arch/x86/include/asm/shared/extract.h         |  25 ++
>  drivers/firmware/efi/Kconfig                  |  14 ++
>  drivers/firmware/efi/libstub/Makefile         |   1 +
>  drivers/firmware/efi/libstub/efistub.h        |   5 +
>  .../firmware/efi/libstub/x86-extract-direct.c | 220 ++++++++++++++++++
>  drivers/firmware/efi/libstub/x86-stub.c       |  45 ++--
>  8 files changed, 343 insertions(+), 18 deletions(-)
>  create mode 100644 arch/x86/include/asm/shared/extract.h
>  create mode 100644 drivers/firmware/efi/libstub/x86-extract-direct.c
>
> diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
> index b46a1c4109cf..d2866f06bc9f 100644
> --- a/arch/x86/boot/compressed/head_32.S
> +++ b/arch/x86/boot/compressed/head_32.S
> @@ -155,7 +155,11 @@ SYM_FUNC_START(efi32_stub_entry)
>         add     $0x4, %esp
>         movl    8(%esp), %esi   /* save boot_params pointer */
>         call    efi_main
> -       /* efi_main returns the possibly relocated address of startup_32 */
> +
> +       /*
> +        * efi_main returns the possibly
> +        * relocated address of exteracted kernel entry point.

extracted

> +        */
>         jmp     *%eax
>  SYM_FUNC_END(efi32_stub_entry)
>  SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry)
> diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
> index 37ce094571b5..b6bae8e7ee71 100644
> --- a/arch/x86/boot/compressed/head_64.S
> +++ b/arch/x86/boot/compressed/head_64.S
> @@ -555,9 +555,54 @@ SYM_FUNC_START(efi64_stub_entry)
>         and     $~0xf, %rsp                     /* realign the stack */
>         movq    %rdx, %rbx                      /* save boot_params pointer */
>         call    efi_main
> +
> +#ifdef CONFIG_EFI_STUB_EXTRACT_DIRECT
> +       cld
> +       cli
> +
> +       movq    %rbx, %rdi /* boot_params */
> +       movq    %rax, %rsi /* decompressed kernel address */
> +
> +       /* Make sure we have GDT with 32-bit code segment */
> +       leaq    gdt64(%rip), %rax
> +       addq    %rax, 2(%rax)
> +       lgdt    (%rax)
> +
> +       /* Setup data segments. */
> +       xorl    %eax, %eax
> +       movl    %eax, %ds
> +       movl    %eax, %es
> +       movl    %eax, %ss
> +       movl    %eax, %fs
> +       movl    %eax, %gs
> +
> +       pushq   %rsi
> +       pushq   %rdi
> +
> +       call startup32_enable_nx_if_supported
> +
> +       call    trampoline_pgtable_init
> +       movq    %rax, %rdx
> +
> +
> +       /* Swap %rsi and %rsi */
> +       popq    %rsi
> +       popq    %rdi
> +
> +       /* Save the trampoline address in RCX */
> +       movq    trampoline_32bit(%rip), %rcx
> +
> +       /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
> +       pushq   $__KERNEL32_CS
> +       leaq    TRAMPOLINE_32BIT_CODE_OFFSET(%rcx), %rax
> +       pushq   %rax
> +       lretq
> +#else
>         movq    %rbx,%rsi
>         leaq    rva(startup_64)(%rax), %rax
>         jmp     *%rax
> +#endif
> +
>  SYM_FUNC_END(efi64_stub_entry)
>  SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
>  #endif
> diff --git a/arch/x86/include/asm/shared/extract.h b/arch/x86/include/asm/shared/extract.h
> new file mode 100644
> index 000000000000..163678145884
> --- /dev/null
> +++ b/arch/x86/include/asm/shared/extract.h
> @@ -0,0 +1,25 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef ASM_SHARED_EXTRACT_H
> +#define ASM_SHARED_EXTRACT_H
> +
> +#define MAP_WRITE      0x02 /* Writable memory */
> +#define MAP_EXEC       0x04 /* Executable memory */
> +#define MAP_ALLOC      0x10 /* Range needs to be allocated */
> +#define MAP_PROTECT    0x20 /* Set exact memory attributes for memory range */
> +
> +struct efi_iofunc {
> +       void (*putstr)(const char *msg);
> +       void (*puthex)(unsigned long x);
> +       unsigned long (*map_range)(unsigned long start,
> +                                  unsigned long end,
> +                                  unsigned int flags);

This looks a bit random - having a map_range() routine as a member of
the console I/O struct. Can we make this abstraction a bit more
natural?

> +};
> +
> +void *efi_extract_kernel(struct boot_params *rmode,
> +                        struct efi_iofunc *iofunc,
> +                        unsigned char *input_data,
> +                        unsigned long input_len,
> +                        unsigned char *output,
> +                        unsigned long output_len);
> +
> +#endif /* ASM_SHARED_EXTRACT_H */
> diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
> index 6cb7384ad2ac..2418402a0bda 100644
> --- a/drivers/firmware/efi/Kconfig
> +++ b/drivers/firmware/efi/Kconfig
> @@ -91,6 +91,20 @@ config EFI_DXE_MEM_ATTRIBUTES
>           Use DXE services to check and alter memory protection
>           attributes during boot via EFISTUB to ensure that memory
>           ranges used by the kernel are writable and executable.
> +         This option also enables stricter memory attributes
> +         on compressed kernel PE image.
> +
> +config EFI_STUB_EXTRACT_DIRECT
> +       bool "Extract kernel directly from UEFI environment"
> +       depends on EFI && EFI_STUB && X86_64
> +       default y

What is the reason for making this configurable? Couldn't we just
enable it unconditionally?

> +       help
> +         Extract kernel before exiting EFI boot services
> +         This allows maintaining W^X for kernel image for
> +         the whole execution of compressed kernel code.
> +         This also slightly improves efficiency of extraction
> +         code by removing the need to copy the kernel around
> +         and rebuild page tables.
>
>  config EFI_PARAMS_FROM_FDT
>         bool
> diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
> index d0537573501e..1cea7d913356 100644
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -69,6 +69,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB)        += efi-stub.o fdt.o string.o \
>  lib-$(CONFIG_ARM)              += arm32-stub.o
>  lib-$(CONFIG_ARM64)            += arm64-stub.o
>  lib-$(CONFIG_X86)              += x86-stub.o
> +lib-$(CONFIG_EFI_STUB_EXTRACT_DIRECT)  += x86-extract-direct.o
>  lib-$(CONFIG_RISCV)            += riscv-stub.o
>  CFLAGS_arm32-stub.o            := -DTEXT_OFFSET=$(TEXT_OFFSET)
>
> diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
> index 22fe28385db7..cdd1bb50c786 100644
> --- a/drivers/firmware/efi/libstub/efistub.h
> +++ b/drivers/firmware/efi/libstub/efistub.h
> @@ -968,6 +968,11 @@ static inline void
>  efi_enable_reset_attack_mitigation(void) { }
>  #endif
>
> +#ifdef CONFIG_X86
> +unsigned long extract_kernel_direct(struct boot_params *boot_params);
> +void startup_32(struct boot_params *boot_params);
> +#endif
> +

Please put this somewhere else

>  void efi_retrieve_tpm2_eventlog(void);
>
>  #endif
> diff --git a/drivers/firmware/efi/libstub/x86-extract-direct.c b/drivers/firmware/efi/libstub/x86-extract-direct.c
> new file mode 100644
> index 000000000000..6076bd75cfd6
> --- /dev/null
> +++ b/drivers/firmware/efi/libstub/x86-extract-direct.c
> @@ -0,0 +1,220 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#include <linux/acpi.h>
> +#include <linux/efi.h>
> +#include <linux/elf.h>
> +#include <linux/stddef.h>
> +
> +#include <asm/efi.h>
> +#include <asm/e820/types.h>
> +#include <asm/desc.h>
> +#include <asm/boot.h>
> +#include <asm/bootparam_utils.h>
> +#include <asm/shared/extract.h>
> +#include <asm/shared/pgtable.h>
> +
> +#include "efistub.h"
> +
> +static void do_puthex(unsigned long value);
> +static void do_putstr(const char *msg);
> +

Can we get rid of these forward declarations?

> +static unsigned long do_map_range(unsigned long start,
> +                                 unsigned long end,
> +                                 unsigned int flags)
> +{
> +       efi_status_t status;
> +
> +       unsigned long size = end - start;
> +
> +       if (flags & MAP_ALLOC) {
> +               if (start == (unsigned long)startup_32)
> +                       start = LOAD_PHYSICAL_ADDR;
> +
> +               unsigned long addr;
> +
> +               status = efi_low_alloc_above(size, CONFIG_PHYSICAL_ALIGN,
> +                                            &addr, start);
> +               if (status != EFI_SUCCESS)
> +                       efi_err("Unable to allocate memory for uncompressed kernel");
> +
> +               if (start != addr) {
> +                       efi_debug("Unable to allocate at given address"
> +                                 " (desired=0x%lx, actual=0x%lx)",
> +                                 (unsigned long)start, addr);
> +                       start = addr;
> +               }
> +       }
> +
> +       if (flags & (MAP_PROTECT | MAP_ALLOC)) {
> +               unsigned long attr = 0;
> +
> +               if (!(flags & MAP_EXEC))
> +                       attr |= EFI_MEMORY_XP;
> +
> +               if (!(flags & MAP_WRITE))
> +                       attr |= EFI_MEMORY_RO;
> +
> +               status = efi_adjust_memory_range_protection(start,
> +                                                           end - start,
> +                                                           attr);
> +               if (status != EFI_SUCCESS)
> +                       efi_err("Unable to protect memory range");
> +       }
> +
> +       return start;
> +}
> +
> +/*
> + * Trampoline takes 3 pages and can be loaded in first megabyte of memory
> + * with its end placed between 0 and 640k where BIOS might start.
> + * (see arch/x86/boot/compressed/pgtable_64.c)
> + */
> +
> +#ifdef CONFIG_64BIT
> +static efi_status_t prepare_trampoline(void)
> +{
> +       efi_status_t status;
> +
> +       status = efi_allocate_pages(TRAMPOLINE_32BIT_SIZE,
> +                                   (unsigned long *)&trampoline_32bit,
> +                                   TRAMPOLINE_32BIT_PLACEMENT_MAX);
> +
> +       if (status != EFI_SUCCESS)
> +               return status;
> +
> +       unsigned long trampoline_start = (unsigned long)trampoline_32bit;
> +
> +       memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
> +
> +       /* First page of trampoline is a top level page table */
> +       efi_adjust_memory_range_protection(trampoline_start,
> +                                          PAGE_SIZE,
> +                                          EFI_MEMORY_XP);
> +
> +       /* Second page of trampoline is the code (with a padding) */
> +
> +       void *caddr = (void *)trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET;
> +
> +       memcpy(caddr, trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
> +
> +       efi_adjust_memory_range_protection((unsigned long)caddr,
> +                                          PAGE_SIZE,
> +                                          EFI_MEMORY_RO);
> +
> +       /* And the last page of trampoline is the stack */
> +
> +       efi_adjust_memory_range_protection(trampoline_start + 2 * PAGE_SIZE,
> +                                          PAGE_SIZE,
> +                                          EFI_MEMORY_XP);
> +
> +       return EFI_SUCCESS;
> +}
> +#else
> +static inline efi_status_t prepare_trampoline(void)
> +{
> +       return EFI_SUCCESS;
> +}
> +#endif
> +
> +static efi_status_t init_loader_data(struct boot_params *params)
> +{
> +       struct efi_info *efi = (void *)&params->efi_info;
> +       efi_status_t status;
> +
> +       unsigned long map_size, desc_size, buff_size;
> +       u32 desc_ver;
> +       efi_memory_desc_t *mmap;
> +
> +       struct efi_boot_memmap map = {
> +               .map            = &mmap,
> +               .map_size       = &map_size,
> +               .desc_size      = &desc_size,
> +               .desc_ver       = &desc_ver,
> +               .key_ptr        = NULL,
> +               .buff_size      = &buff_size,
> +       };
> +
> +       status = efi_get_memory_map(&map);

efi_get_memory_map() has been updated in the mean time, so this needs a rewrite.

> +
> +       if (status != EFI_SUCCESS) {
> +               efi_err("Unable to get EFI memory map...\n");
> +               return status;
> +       }
> +
> +       const char *signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE
> +                                              : EFI32_LOADER_SIGNATURE;
> +
> +       memcpy(&efi->efi_loader_signature, signature, sizeof(__u32));
> +
> +       efi->efi_memdesc_size = desc_size;
> +       efi->efi_memdesc_version = desc_ver;
> +       efi->efi_memmap_size = map_size;
> +
> +       efi_set_u64_split((unsigned long)mmap,
> +                         &efi->efi_memmap, &efi->efi_memmap_hi);
> +
> +       efi_set_u64_split((unsigned long)efi_system_table,
> +                         &efi->efi_systab, &efi->efi_systab_hi);
> +
> +       return EFI_SUCCESS;
> +}
> +
> +static void free_loader_data(struct boot_params *params)
> +{
> +       struct efi_info *efi = (void *)&params->efi_info;
> +       unsigned long mmap = efi->efi_memmap;
> +
> +#ifdef CONFIG_64BIT
> +       mmap |= ((unsigned long)efi->efi_memmap_hi << 32);
> +#endif
> +
> +       efi_bs_call(free_pool, (void *)mmap);
> +
> +       efi->efi_memdesc_size = 0;
> +       efi->efi_memdesc_version = 0;
> +       efi->efi_memmap_size = 0;
> +       efi_set_u64_split(0, &efi->efi_memmap, &efi->efi_memmap_hi);
> +}
> +
> +unsigned long extract_kernel_direct(struct boot_params *params)
> +{
> +
> +       extern unsigned char input_data[];
> +       extern unsigned int output_len, input_len;
> +
> +       void *res;
> +       efi_status_t status;
> +       struct efi_iofunc iof = { 0 };
> +
> +       status = prepare_trampoline();
> +
> +       if (status != EFI_SUCCESS)
> +               return 0;
> +
> +       /* Prepare environment for do_extract_kernel() call */
> +       status = init_loader_data(params);
> +
> +       if (status != EFI_SUCCESS)
> +               return 0;
> +
> +       iof.puthex = do_puthex;
> +       iof.putstr = do_putstr;
> +       iof.map_range = do_map_range;
> +
> +       res = efi_extract_kernel(params, &iof, input_data, input_len,
> +                                (unsigned char *)startup_32, output_len);
> +
> +       free_loader_data(params);
> +
> +       return (unsigned long)res;
> +}
> +
> +static void do_puthex(unsigned long value)
> +{
> +       efi_printk("%08lx", value);
> +}
> +
> +static void do_putstr(const char *msg)
> +{
> +       efi_printk("%s", msg);
> +}
> diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
> index 678f9c2ccafc..680184034cb7 100644
> --- a/drivers/firmware/efi/libstub/x86-stub.c
> +++ b/drivers/firmware/efi/libstub/x86-stub.c
> @@ -230,26 +230,25 @@ static void
>  setup_memory_protection(unsigned long image_base, unsigned long image_size)
>  {
>         /*
> -        * Allow execution of possible trampoline used
> -        * for switching between 4- and 5-level page tables
> -        * and relocated kernel image.
> -        */
> +       * Allow execution of possible trampoline used
> +       * for switching between 4- and 5-level page tables
> +       * and relocated kernel image.
> +       */
>

Drop this hunk please

>         efi_adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE,
>                                            TRAMPOLINE_PLACEMENT_SIZE, 0);
>
>  #ifdef CONFIG_64BIT
> -       if (image_base != (unsigned long)startup_32)
> -               efi_adjust_memory_range_protection(image_base, image_size, 0);
> +       efi_adjust_memory_range_protection(image_base, image_size, 0);
>  #else
>         /*
> -        * Clear protection flags on a whole range of possible
> -        * addresses used for KASLR. We don't need to do that
> -        * on x86_64, since KASLR/extraction is performed after
> -        * dedicated identity page tables are built and we only
> -        * need to remove possible protection on relocated image
> -        * itself disregarding further relocations.
> -        */
> +       * Clear protection flags on a whole range of possible
> +       * addresses used for KASLR. We don't need to do that
> +       * on x86_64, since KASLR/extraction is performed after
> +       * dedicated identity page tables are built and we only
> +       * need to remove possible protection on relocated image
> +       * itself disregarding further relocations.
> +       */

Drop this hunk please

>         efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
>                                            KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR,
>                                            0);
> @@ -270,8 +269,10 @@ static void setup_quirks(struct boot_params *boot_params,
>                         retrieve_apple_device_properties(boot_params);
>         }
>
> -       if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES))
> +       if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES) &&
> +           !IS_ENABLED(CONFIG_EFI_STUB_EXTRACT_DIRECT)) {
>                 setup_memory_protection(image_base, image_size);
> +       }
>  }
>
>  /*
> @@ -710,8 +711,10 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
>  }
>
>  /*
> - * On success, we return the address of startup_32, which has potentially been
> - * relocated by efi_relocate_kernel.
> + * On success, we return:
> + *   - the address of startup_32, which has potentially been
> + *     relocated by efi_relocate_kernel, if libstub direct extraction is disabled.
> + *   - extracted kernel entry point if libstub direct extraction is enabled.
>   * On failure, we exit to the firmware via efi_exit instead of returning.
>   */
>  unsigned long efi_main(efi_handle_t handle,
> @@ -736,6 +739,7 @@ unsigned long efi_main(efi_handle_t handle,
>                 efi_dxe_table = NULL;
>         }
>
> +#ifndef CONFIG_EFI_STUB_EXTRACT_DIRECT
>         /*
>          * If the kernel isn't already loaded at a suitable address,
>          * relocate it.
> @@ -789,6 +793,7 @@ unsigned long efi_main(efi_handle_t handle,
>                  */
>                 image_offset = 0;
>         }
> +#endif
>
>  #ifdef CONFIG_CMDLINE_BOOL
>         status = efi_parse_options(CONFIG_CMDLINE);
> @@ -845,7 +850,13 @@ unsigned long efi_main(efi_handle_t handle,
>
>         setup_efi_pci(boot_params);
>
> -       setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start);
> +       setup_quirks(boot_params, buffer_start, buffer_end - buffer_start);
> +
> +#ifdef CONFIG_EFI_STUB_EXTRACT_DIRECT
> +       bzimage_addr = extract_kernel_direct(boot_params);
> +       if (!bzimage_addr)
> +               goto fail;
> +#endif
>
>         status = exit_boot(boot_params, handle);
>         if (status != EFI_SUCCESS) {
> --
> 2.35.1
>



More information about the lvc-project mailing list