From ba4c1a10525e37d5f479f52ea79316bafbef8639 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:46:56 -0800 Subject: [PATCH 01/21] Revert "x86/hyperv/vtl: Use the wakeup mailbox to boot secondary CPUs" This reverts commit 1458b6bb9a3d95794ffe2d50337a90be15faf979. --- arch/x86/hyperv/hv_vtl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index be5df5d513c7f..2d26a6caf855e 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -278,8 +278,7 @@ int __init hv_vtl_early_init(void) * Otherwise, use an enlightened path since SIPI is not * available for VTL2. */ - if (!((hv_isolation_type_snp() || hv_isolation_type_tdx()) - && !hyperv_paravisor_present)) + if (!(hv_isolation_type_snp() && !hyperv_paravisor_present)) apic_update_callback(wakeup_secondary_cpu_64, hv_vtl_wakeup_secondary_cpu); return 0; From eb251d869e25d27488e88b56bd4fcc2d6310501e Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:49:09 -0800 Subject: [PATCH 02/21] Revert "x86/hyperv/vtl: Mark the wakeup mailbox page as private" This reverts commit fa67c9407a5fc688ffd37b5473a06cea1bf4b877. --- arch/x86/hyperv/hv_vtl.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 2d26a6caf855e..1a5dacc22cb46 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -36,18 +36,6 @@ static bool __init hv_vtl_msi_ext_dest_id(void) return true; } -static inline bool within_page(u64 addr, u64 start) -{ - return addr >= start && addr < (start + PAGE_SIZE); -} - -static bool hv_vtl_is_private_mmio_tdx(u64 addr) -{ - u64 mb_addr = acpi_get_mp_wakeup_mailbox_paddr(); - - return mb_addr && within_page(addr, mb_addr); -} - /* * The `native_machine_emergency_restart` function from `reboot.c` writes * to the physical address 0x472 to indicate the type of reboot for the @@ -85,8 +73,6 @@ void __init hv_vtl_init_platform(void) /* There is no paravisor present if we are here. */ if (hv_isolation_type_tdx()) { x86_init.resources.realmode_limit = SZ_4G; - x86_platform.hyper.is_private_mmio = hv_vtl_is_private_mmio_tdx; - } else { x86_platform.realmode_reserve = x86_init_noop; x86_platform.realmode_init = x86_init_noop; From 7d6eb27af45632ff41fa10bfb876603dd9218ee5 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:49:20 -0800 Subject: [PATCH 03/21] Revert "x86/smpwakeup: Add a helper get the address of the wakeup mailbox" This reverts commit 2992c7589b3268f9f59e0fea8da15cf2a23aa6f7. --- arch/x86/include/asm/smp.h | 1 - arch/x86/kernel/smpwakeup.c | 5 ----- 2 files changed, 6 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 6a51102d6e42f..fcdea76380de8 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -154,7 +154,6 @@ static inline struct cpumask *cpu_l2c_shared_mask(int cpu) void acpi_setup_mp_wakeup_mailbox(u64 addr); struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void); -u64 acpi_get_mp_wakeup_mailbox_paddr(void); #else /* !CONFIG_SMP */ #define wbinvd_on_cpu(cpu) wbinvd() diff --git a/arch/x86/kernel/smpwakeup.c b/arch/x86/kernel/smpwakeup.c index f730a66b6fc81..5089bcda615d6 100644 --- a/arch/x86/kernel/smpwakeup.c +++ b/arch/x86/kernel/smpwakeup.c @@ -81,8 +81,3 @@ struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void) { return acpi_mp_wake_mailbox; } - -u64 acpi_get_mp_wakeup_mailbox_paddr(void) -{ - return acpi_mp_wake_mailbox_paddr; -} From 887fd6e2b680116a49fad60e89db089e792148be Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:49:22 -0800 Subject: [PATCH 04/21] Revert "x86/hyperv/vtl: Setup the 64-bit trampoline for TDX guests" This reverts commit 45aeed8f1ecb1e60c45a827d83ba017f04b85714. --- arch/x86/hyperv/hv_vtl.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 1a5dacc22cb46..d5b97ad0388f4 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -70,16 +70,10 @@ void __init hv_vtl_init_platform(void) { pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); - /* There is no paravisor present if we are here. */ - if (hv_isolation_type_tdx()) { - x86_init.resources.realmode_limit = SZ_4G; - } else { - x86_platform.realmode_reserve = x86_init_noop; - x86_platform.realmode_init = x86_init_noop; - real_mode_header = &hv_vtl_real_mode_header; - } - x86_init.resources.probe_roms = x86_init_noop; + x86_platform.realmode_reserve = x86_init_noop; + x86_platform.realmode_init = x86_init_noop; + real_mode_header = &hv_vtl_real_mode_header; x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; x86_init.resources.probe_roms = x86_init_noop; From 7b8022d8ef59eacb525999abfc3b7d84e7670352 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:49:25 -0800 Subject: [PATCH 05/21] Revert "x86/realmode: Make the location of the trampoline configurable" This reverts commit e46d6e593617f0c98f8a778af8a345197e9ccc39. --- arch/x86/include/asm/x86_init.h | 3 --- arch/x86/kernel/x86_init.c | 3 --- arch/x86/realmode/init.c | 7 ++++--- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 28b11838a6909..213cf5379a5a6 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -31,15 +31,12 @@ struct x86_init_mpparse { * platform * @memory_setup: platform specific memory setup * @dmi_setup: platform specific DMI setup - * @realmode_limit: platform specific address limit for the real mode trampoline - * (default 1M) */ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); char *(*memory_setup)(void); void (*dmi_setup)(void); - unsigned long realmode_limit; }; /** diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index a25fd72828117..0a2bbd674a6d9 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -70,8 +69,6 @@ struct x86_init_ops x86_init __initdata = { .reserve_resources = reserve_standard_io_resources, .memory_setup = e820__memory_setup_default, .dmi_setup = dmi_setup, - /* Has to be under 1M so we can execute real-mode AP code. */ - .realmode_limit = SZ_1M, }, .mpparse = { diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 9006806cc7934..f9bc444a3064d 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -45,7 +45,7 @@ void load_trampoline_pgtable(void) void __init reserve_real_mode(void) { - phys_addr_t mem, limit = x86_init.resources.realmode_limit; + phys_addr_t mem; size_t size = real_mode_size_needed(); if (!size) @@ -53,9 +53,10 @@ void __init reserve_real_mode(void) WARN_ON(slab_is_available()); - mem = memblock_phys_alloc_range(size, PAGE_SIZE, 0, limit); + /* Has to be under 1M so we can execute real-mode AP code. */ + mem = memblock_phys_alloc_range(size, PAGE_SIZE, 0, 1<<20); if (!mem) - pr_info("No memory below %pa for the real-mode trampoline\n", &limit); + pr_info("No sub-1M memory is available for the trampoline\n"); else set_real_mode_mem(mem); From 7a6ea8818953911b14db14330fd36e07866207ac Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:49:27 -0800 Subject: [PATCH 06/21] Revert "x86/hyperv/vtl: Set real_mode_header in hv_vtl_init_platform()" This reverts commit 96dc08271f5f90c6d0769fdd2f1748f75f4c2b99. --- arch/x86/hyperv/hv_vtl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index d5b97ad0388f4..7c67f43a485e9 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -73,7 +73,6 @@ void __init hv_vtl_init_platform(void) x86_init.resources.probe_roms = x86_init_noop; x86_platform.realmode_reserve = x86_init_noop; x86_platform.realmode_init = x86_init_noop; - real_mode_header = &hv_vtl_real_mode_header; x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; x86_init.resources.probe_roms = x86_init_noop; @@ -261,6 +260,9 @@ int __init hv_vtl_early_init(void) if (!(hv_isolation_type_snp() && !hyperv_paravisor_present)) apic_update_callback(wakeup_secondary_cpu_64, hv_vtl_wakeup_secondary_cpu); + if (!hv_isolation_type_tdx()) + real_mode_header = &hv_vtl_real_mode_header; + return 0; } From 88161f5e164e3c8e5d8fae7504ba61e3e51696cd Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:49:29 -0800 Subject: [PATCH 07/21] Revert "x86/dt: Parse the Wakeup Mailbox for Intel processors" This reverts commit 9fbc758482bce4aea7b989affb90a48e336f86ef. --- arch/x86/Kconfig | 2 +- arch/x86/kernel/devicetree.c | 47 ------------------------------------ 2 files changed, 1 insertion(+), 48 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e24f247a217f3..27acb8f887cb6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1137,7 +1137,7 @@ config X86_LOCAL_APIC config X86_MAILBOX_WAKEUP def_bool y - depends on OF || ACPI_MADT_WAKEUP + depends on ACPI_MADT_WAKEUP depends on X86_64 depends on SMP depends on X86_LOCAL_APIC diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 7c5fbed035390..94436c7f51e39 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -126,51 +125,6 @@ static void __init dtb_setup_hpet(void) #endif } -#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) - -#define WAKEUP_MAILBOX_SIZE 0x1000 -#define WAKEUP_MAILBOX_ALIGN 0x1000 - -/** dtb_wakeup_mailbox_setup() - Parse the wakeup mailbox from the device tree - * - * Look for the presence of a wakeup mailbox in the DeviceTree. The mailbox is - * expected to follow the structure and operation described in the Multiprocessor - * Wakeup Structure of the ACPI specification. - */ -static void __init dtb_wakeup_mailbox_setup(void) -{ - struct device_node *node; - struct resource res; - - node = of_find_compatible_node(NULL, NULL, "intel,wakeup-mailbox"); - if (!node) - return; - - if (of_address_to_resource(node, 0, &res)) - goto done; - - /* The mailbox is a 4KB-aligned region.*/ - if (res.start & (WAKEUP_MAILBOX_ALIGN - 1)) - goto done; - - /* The mailbox has a size of 4KB. */ - if (res.end - res.start + 1 != WAKEUP_MAILBOX_SIZE) - goto done; - - /* Not supported when the mailbox is used. */ - cpu_hotplug_disable_offlining(); - - acpi_setup_mp_wakeup_mailbox(res.start); -done: - of_node_put(node); -} -#else /* !CONFIG_X86_64 || !CONFIG_SMP */ -static inline int dtb_wakeup_mailbox_setup(void) -{ - return -EOPNOTSUPP; -} -#endif /* CONFIG_X86_64 && CONFIG_SMP */ - #ifdef CONFIG_X86_LOCAL_APIC static void __init dtb_cpu_setup(void) @@ -339,7 +293,6 @@ static void __init x86_dtb_parse_smp_config(void) dtb_setup_hpet(); dtb_apic_setup(); - dtb_wakeup_mailbox_setup(); } void __init x86_flattree_get_config(void) From a090dc4d4ffe7925a08bd7aaa978bc8e0a950a9e Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:49:31 -0800 Subject: [PATCH 08/21] Revert "dt-bindings: reserved-memory: Wakeup Mailbox for Intel processors" This reverts commit 3a75341eba300bedabe58ac2d43c8c745246aa9d. --- .../reserved-memory/intel,wakeup-mailbox.yaml | 50 ------------------- 1 file changed, 50 deletions(-) delete mode 100644 Documentation/devicetree/bindings/reserved-memory/intel,wakeup-mailbox.yaml diff --git a/Documentation/devicetree/bindings/reserved-memory/intel,wakeup-mailbox.yaml b/Documentation/devicetree/bindings/reserved-memory/intel,wakeup-mailbox.yaml deleted file mode 100644 index a80d3bac44c23..0000000000000 --- a/Documentation/devicetree/bindings/reserved-memory/intel,wakeup-mailbox.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/reserved-memory/intel,wakeup-mailbox.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Wakeup Mailbox for Intel processors - -description: | - The Wakeup Mailbox provides a mechanism for the operating system to wake up - secondary CPUs on Intel processors. It is an alternative to the INIT-!INIT- - SIPI sequence used on most x86 systems. - - The structure and operation of the mailbox is described in the Multiprocessor - Wakeup Structure of the ACPI specification version 6.6 section 5.2.12.19 [1]. - - The implementation of the mailbox in platform firmware is described in the - Intel TDX Virtual Firmware Design Guide section 4.3.5 [2]. - - 1: https://uefi.org/specs/ACPI/6.6/05_ACPI_Software_Programming_Model.html#multiprocessor-wakeup-structure - 2: https://www.intel.com/content/www/us/en/content-details/733585/intel-tdx-virtual-firmware-design-guide.html - - -maintainers: - - Ricardo Neri - -allOf: - - $ref: reserved-memory.yaml - -properties: - compatible: - const: intel,wakeup-mailbox - -required: - - compatible - - reg - -unevaluatedProperties: false - -examples: - - | - reserved-memory { - #address-cells = <2>; - #size-cells = <1>; - - wakeup-mailbox@ffff0000 { - compatible = "intel,wakeup-mailbox"; - reg = <0x0 0xffff0000 0x1000>; - }; - }; From 21e082faf5d5239d206d0199868c22f4c385cefd Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:49:33 -0800 Subject: [PATCH 09/21] Revert "x86/acpi: Move acpi_wakeup_cpu() and helpers to smpwakeup.c" This reverts commit 672fc21ee1aef5e73a8ba630d676d6e1d966a889. --- arch/x86/Kconfig | 7 --- arch/x86/kernel/Makefile | 1 - arch/x86/kernel/acpi/madt_wakeup.c | 76 +++++++++++++++++++++++++++ arch/x86/kernel/smpwakeup.c | 83 ------------------------------ 4 files changed, 76 insertions(+), 91 deletions(-) delete mode 100644 arch/x86/kernel/smpwakeup.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 27acb8f887cb6..0ce8416858072 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1135,13 +1135,6 @@ config X86_LOCAL_APIC depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI select IRQ_DOMAIN_HIERARCHY -config X86_MAILBOX_WAKEUP - def_bool y - depends on ACPI_MADT_WAKEUP - depends on X86_64 - depends on SMP - depends on X86_LOCAL_APIC - config ACPI_MADT_WAKEUP def_bool y depends on X86_64 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9fdbf9cc8e8b4..f7918980667a3 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -92,7 +92,6 @@ apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += smpboot.o -obj-$(CONFIG_X86_MAILBOX_WAKEUP) += smpwakeup.o obj-$(CONFIG_X86_TSC) += tsc_sync.o obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index d6112109cef4d..8416c4b09a500 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -2,10 +2,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -13,6 +15,12 @@ #include #include +/* Physical address of the Multiprocessor Wakeup Structure mailbox */ +static u64 acpi_mp_wake_mailbox_paddr __ro_after_init; + +/* Virtual address of the Multiprocessor Wakeup Structure mailbox */ +static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox; + static u64 acpi_mp_pgd __ro_after_init; static u64 acpi_mp_reset_vector_paddr __ro_after_init; @@ -162,6 +170,63 @@ static int __init acpi_mp_setup_reset(u64 reset_vector) return 0; } +static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip, unsigned int cpu) +{ + if (!acpi_mp_wake_mailbox_paddr) { + pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n"); + return -EOPNOTSUPP; + } + + /* + * Remap mailbox memory only for the first call to acpi_wakeup_cpu(). + * + * Wakeup of secondary CPUs is fully serialized in the core code. + * No need to protect acpi_mp_wake_mailbox from concurrent accesses. + */ + if (!acpi_mp_wake_mailbox) { + acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr, + sizeof(*acpi_mp_wake_mailbox), + MEMREMAP_WB); + } + + /* + * Mailbox memory is shared between the firmware and OS. Firmware will + * listen on mailbox command address, and once it receives the wakeup + * command, the CPU associated with the given apicid will be booted. + * + * The value of 'apic_id' and 'wakeup_vector' must be visible to the + * firmware before the wakeup command is visible. smp_store_release() + * ensures ordering and visibility. + */ + acpi_mp_wake_mailbox->apic_id = apicid; + acpi_mp_wake_mailbox->wakeup_vector = start_ip; + smp_store_release(&acpi_mp_wake_mailbox->command, + ACPI_MP_WAKE_COMMAND_WAKEUP); + + /* + * Wait for the CPU to wake up. + * + * The CPU being woken up is essentially in a spin loop waiting to be + * woken up. It should not take long for it wake up and acknowledge by + * zeroing out ->command. + * + * ACPI specification doesn't provide any guidance on how long kernel + * has to wait for a wake up acknowledgment. It also doesn't provide + * a way to cancel a wake up request if it takes too long. + * + * In TDX environment, the VMM has control over how long it takes to + * wake up secondary. It can postpone scheduling secondary vCPU + * indefinitely. Giving up on wake up request and reporting error opens + * possible attack vector for VMM: it can wake up a secondary CPU when + * kernel doesn't expect it. Wait until positive result of the wake up + * request. + */ + while (READ_ONCE(acpi_mp_wake_mailbox->command)) + cpu_relax(); + + return 0; +} + static void acpi_mp_disable_offlining(struct acpi_madt_multiproc_wakeup *mp_wake) { cpu_hotplug_disable_offlining(); @@ -224,3 +289,14 @@ int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, return 0; } + +void __init acpi_setup_mp_wakeup_mailbox(u64 mailbox_paddr) +{ + acpi_mp_wake_mailbox_paddr = mailbox_paddr; + apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu); +} + +struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void) +{ + return acpi_mp_wake_mailbox; +} diff --git a/arch/x86/kernel/smpwakeup.c b/arch/x86/kernel/smpwakeup.c deleted file mode 100644 index 5089bcda615d6..0000000000000 --- a/arch/x86/kernel/smpwakeup.c +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include -#include -#include -#include -#include -#include - -/* Physical address of the Multiprocessor Wakeup Structure mailbox */ -static u64 acpi_mp_wake_mailbox_paddr __ro_after_init; - -/* Virtual address of the Multiprocessor Wakeup Structure mailbox */ -static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox; - -static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip, unsigned int cpu) -{ - if (!acpi_mp_wake_mailbox_paddr) { - pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n"); - return -EOPNOTSUPP; - } - - /* - * Remap mailbox memory only for the first call to acpi_wakeup_cpu(). - * - * Wakeup of secondary CPUs is fully serialized in the core code. - * No need to protect acpi_mp_wake_mailbox from concurrent accesses. - */ - if (!acpi_mp_wake_mailbox) { - acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr, - sizeof(*acpi_mp_wake_mailbox), - MEMREMAP_WB); - } - - /* - * Mailbox memory is shared between the firmware and OS. Firmware will - * listen on mailbox command address, and once it receives the wakeup - * command, the CPU associated with the given apicid will be booted. - * - * The value of 'apic_id' and 'wakeup_vector' must be visible to the - * firmware before the wakeup command is visible. smp_store_release() - * ensures ordering and visibility. - */ - acpi_mp_wake_mailbox->apic_id = apicid; - acpi_mp_wake_mailbox->wakeup_vector = start_ip; - smp_store_release(&acpi_mp_wake_mailbox->command, - ACPI_MP_WAKE_COMMAND_WAKEUP); - - /* - * Wait for the CPU to wake up. - * - * The CPU being woken up is essentially in a spin loop waiting to be - * woken up. It should not take long for it wake up and acknowledge by - * zeroing out ->command. - * - * ACPI specification doesn't provide any guidance on how long kernel - * has to wait for a wake up acknowledgment. It also doesn't provide - * a way to cancel a wake up request if it takes too long. - * - * In TDX environment, the VMM has control over how long it takes to - * wake up secondary. It can postpone scheduling secondary vCPU - * indefinitely. Giving up on wake up request and reporting error opens - * possible attack vector for VMM: it can wake up a secondary CPU when - * kernel doesn't expect it. Wait until positive result of the wake up - * request. - */ - while (READ_ONCE(acpi_mp_wake_mailbox->command)) - cpu_relax(); - - return 0; -} - -void __init acpi_setup_mp_wakeup_mailbox(u64 mailbox_paddr) -{ - acpi_mp_wake_mailbox_paddr = mailbox_paddr; - apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu); -} - -struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void) -{ - return acpi_mp_wake_mailbox; -} From 2cc43ff3589f51479cbb39a8181ad4bda17a5f91 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:49:35 -0800 Subject: [PATCH 10/21] Revert "x86/acpi: Add a helper functions to setup and access the wakeup mailbox" This reverts commit b20a615b554ad60131797137a8a12aa53e8acb31. --- arch/x86/include/asm/smp.h | 3 --- arch/x86/kernel/acpi/madt_wakeup.c | 20 +++++--------------- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index fcdea76380de8..ca073f40698fa 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -152,9 +152,6 @@ static inline struct cpumask *cpu_l2c_shared_mask(int cpu) return per_cpu(cpu_l2c_shared_map, cpu); } -void acpi_setup_mp_wakeup_mailbox(u64 addr); -struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void); - #else /* !CONFIG_SMP */ #define wbinvd_on_cpu(cpu) wbinvd() static inline int wbinvd_on_all_cpus(void) diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index 8416c4b09a500..f48581888d53e 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -37,7 +37,6 @@ static void acpi_mp_play_dead(void) static void acpi_mp_cpu_die(unsigned int cpu) { - struct acpi_madt_multiproc_wakeup_mailbox *mailbox = acpi_get_mp_wakeup_mailbox(); u32 apicid = per_cpu(x86_cpu_to_apicid, cpu); unsigned long timeout; @@ -47,13 +46,13 @@ static void acpi_mp_cpu_die(unsigned int cpu) * * BIOS has to clear 'command' field of the mailbox. */ - mailbox->apic_id = apicid; - smp_store_release(&mailbox->command, + acpi_mp_wake_mailbox->apic_id = apicid; + smp_store_release(&acpi_mp_wake_mailbox->command, ACPI_MP_WAKE_COMMAND_TEST); /* Don't wait longer than a second. */ timeout = USEC_PER_SEC; - while (READ_ONCE(mailbox->command) && --timeout) + while (READ_ONCE(acpi_mp_wake_mailbox->command) && --timeout) udelay(1); if (!timeout) @@ -271,7 +270,7 @@ int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, acpi_table_print_madt_entry(&header->common); - acpi_setup_mp_wakeup_mailbox(mp_wake->mailbox_address); + acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address; if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 && mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) { @@ -287,16 +286,7 @@ int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, acpi_mp_disable_offlining(mp_wake); } - return 0; -} - -void __init acpi_setup_mp_wakeup_mailbox(u64 mailbox_paddr) -{ - acpi_mp_wake_mailbox_paddr = mailbox_paddr; apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu); -} -struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void) -{ - return acpi_mp_wake_mailbox; + return 0; } From 4ec79a544d9928b7d16ec3d99e5dfaffbdaa702c Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:31:01 -0800 Subject: [PATCH 11/21] x86/acpi: Add functions to setup and access the wakeup mailbox Systems that describe hardware using DeviceTree graphs may enumerate and implement the wakeup mailbox as defined in the ACPI specification but do not otherwise depend on ACPI. Expose functions to setup and access the location of the wakeup mailbox from outside ACPI code. The function acpi_setup_mp_wakeup_mailbox() stores the physical address of the mailbox and updates the wakeup_secondary_cpu_64() APIC callback. The function acpi_madt_multiproc_wakeup_mailbox() returns a pointer to the mailbox. Signed-off-by: Ricardo Neri --- arch/x86/include/asm/acpi.h | 10 ++++++++++ arch/x86/kernel/acpi/madt_wakeup.c | 11 +++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 5ab1a4598d00b..315f69dd45fc7 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -182,6 +182,9 @@ void __iomem *x86_acpi_os_ioremap(acpi_physical_address phys, acpi_size size); #define acpi_os_ioremap acpi_os_ioremap #endif +void acpi_setup_mp_wakeup_mailbox(u64 addr); +struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void); + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 @@ -200,6 +203,13 @@ static inline u64 x86_default_get_root_pointer(void) return 0; } +static inline void acpi_setup_mp_wakeup_mailbox(u64 addr) { } + +static inline struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void) +{ + return NULL; +} + #endif /* !CONFIG_ACPI */ #define ARCH_HAS_POWER_INIT 1 diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index f48581888d53e..f2e8970ea9d24 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -290,3 +290,14 @@ int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, return 0; } + +void __init acpi_setup_mp_wakeup_mailbox(u64 mailbox_paddr) +{ + acpi_mp_wake_mailbox_paddr = mailbox_paddr; + apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu); +} + +struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void) +{ + return acpi_mp_wake_mailbox; +} From 5f8e80bcb14ef6c10e0c7392ffae43bbf68e1afe Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 11:50:16 -0800 Subject: [PATCH 12/21] x86/topology: Add a missing attribute dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sysctl_sched_itmt_enabled is declared in asm/topology.h with the __read_mostly attribute, but the header does not include linux/cache.h. This causes a build failure when a file includes asm/topology.h without including linux/cache.h: ./arch/x86/include/asm/topology.h:264:27: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘sysctl_sched_itmt_enabled’ 264 | extern bool __read_mostly sysctl_sched_itmt_enabled; | ^~~~~~~~~~~~~~~~~~~~~~~~~ Include the needed header. Signed-off-by: Ricardo Neri --- arch/x86/include/asm/topology.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 92f3664dd933b..c2cb76380cb02 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -239,6 +239,7 @@ extern bool x86_topology_update; #ifdef CONFIG_SCHED_MC_PRIO #include +#include DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority); extern unsigned int __read_mostly sysctl_sched_itmt_enabled; From 6ae8254ab31e21b770671b2b4d8a5a1155e67efd Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:31:02 -0800 Subject: [PATCH 13/21] dt-bindings: reserved-memory: Wakeup Mailbox for Intel processors Add DeviceTree bindings to enumerate the wakeup mailbox used in platform firmware for Intel processors. x86 platforms commonly boot secondary CPUs using an INIT assert, de-assert followed by Start-Up IPI messages. The wakeup mailbox can be used when this mechanism is unavailable. The wakeup mailbox offers more control to the operating system to boot secondary CPUs than a spin-table. It allows the reuse of the same wakeup vector for all CPUs while maintaining control over which CPUs to boot and when. While it is possible to achieve the same level of control using a spin-table, it would require specifying a separate `cpu-release-addr` for each secondary CPU. The operation and structure of the mailbox are described in the Multiprocessor Wakeup Structure defined in the ACPI specification. Note that this structure does not specify how to publish the mailbox to the operating system (ACPI-based platform firmware uses a separate table). No ACPI table is needed in DeviceTree-based firmware to enumerate the mailbox. Nodes that want to refer to the reserved memory usually define a `memory-region` property. /cpus/cpu* nodes would want to refer to the mailbox, but they do not have such property defined in the DeviceTree specification. Moreover, it would imply that there is a memory region per CPU. Instead, add a `compatible` property that the operating system can use to discover the mailbox. Reviewed-by: Dexuan Cui Reviewed-by: Rob Herring (Arm) Acked-by: Rafael J. Wysocki (Intel) Co-developed-by: Yunhong Jiang Signed-off-by: Yunhong Jiang Signed-off-by: Ricardo Neri --- .../reserved-memory/intel,wakeup-mailbox.yaml | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 Documentation/devicetree/bindings/reserved-memory/intel,wakeup-mailbox.yaml diff --git a/Documentation/devicetree/bindings/reserved-memory/intel,wakeup-mailbox.yaml b/Documentation/devicetree/bindings/reserved-memory/intel,wakeup-mailbox.yaml new file mode 100644 index 0000000000000..a80d3bac44c23 --- /dev/null +++ b/Documentation/devicetree/bindings/reserved-memory/intel,wakeup-mailbox.yaml @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/reserved-memory/intel,wakeup-mailbox.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Wakeup Mailbox for Intel processors + +description: | + The Wakeup Mailbox provides a mechanism for the operating system to wake up + secondary CPUs on Intel processors. It is an alternative to the INIT-!INIT- + SIPI sequence used on most x86 systems. + + The structure and operation of the mailbox is described in the Multiprocessor + Wakeup Structure of the ACPI specification version 6.6 section 5.2.12.19 [1]. + + The implementation of the mailbox in platform firmware is described in the + Intel TDX Virtual Firmware Design Guide section 4.3.5 [2]. + + 1: https://uefi.org/specs/ACPI/6.6/05_ACPI_Software_Programming_Model.html#multiprocessor-wakeup-structure + 2: https://www.intel.com/content/www/us/en/content-details/733585/intel-tdx-virtual-firmware-design-guide.html + + +maintainers: + - Ricardo Neri + +allOf: + - $ref: reserved-memory.yaml + +properties: + compatible: + const: intel,wakeup-mailbox + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + reserved-memory { + #address-cells = <2>; + #size-cells = <1>; + + wakeup-mailbox@ffff0000 { + compatible = "intel,wakeup-mailbox"; + reg = <0x0 0xffff0000 0x1000>; + }; + }; From c49d781e8c0b84271dce4f3126a81625e65b8e5a Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:31:03 -0800 Subject: [PATCH 14/21] x86/dt: Parse the Wakeup Mailbox for Intel processors The Wakeup Mailbox is a mechanism to boot secondary CPUs on systems that do not want or cannot use the INIT + StartUp IPI messages. The platform firmware is expected to implement the mailbox as described in the Multiprocessor Wakeup Structure of the ACPI specification. It is also expected to publish the mailbox to the operating system as described in the corresponding DeviceTree schema that accompanies the documentation of the Linux kernel. Reuse the existing functionality to set the memory location of the mailbox and update the wakeup_secondary_cpu_64() APIC callback. Make this functionality available to DeviceTree-based systems by making CONFIG_X86_ MAILBOX_WAKEUP depend on either CONFIG_OF or CONFIG_ACPI_MADT_WAKEUP. do_boot_cpu() uses wakeup_secondary_cpu_64() when set. It will be set if a wakeup mailbox is enumerated via an ACPI table or a DeviceTree node. For cases in which this behavior is not desired, this APIC callback can be updated later during boot using platform-specific hooks. Reviewed-by: Dexuan Cui Co-developed-by: Yunhong Jiang Signed-off-by: Yunhong Jiang Signed-off-by: Ricardo Neri --- arch/x86/kernel/devicetree.c | 47 ++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 94436c7f51e39..b457eb3c78a33 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -125,6 +126,51 @@ static void __init dtb_setup_hpet(void) #endif } +#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) + +#define WAKEUP_MAILBOX_SIZE 0x1000 +#define WAKEUP_MAILBOX_ALIGN 0x1000 + +/** dtb_wakeup_mailbox_setup() - Parse the wakeup mailbox from the device tree + * + * Look for the presence of a wakeup mailbox in the DeviceTree. The mailbox is + * expected to follow the structure and operation described in the Multiprocessor + * Wakeup Structure of the ACPI specification. + */ +static void __init dtb_wakeup_mailbox_setup(void) +{ + struct device_node *node; + struct resource res; + + node = of_find_compatible_node(NULL, NULL, "intel,wakeup-mailbox"); + if (!node) + return; + + if (of_address_to_resource(node, 0, &res)) + goto done; + + /* The mailbox is a 4KB-aligned region.*/ + if (res.start & (WAKEUP_MAILBOX_ALIGN - 1)) + goto done; + + /* The mailbox has a size of 4KB. */ + if (res.end - res.start + 1 != WAKEUP_MAILBOX_SIZE) + goto done; + + /* Not supported when the mailbox is used. */ + cpu_hotplug_disable_offlining(); + + acpi_setup_mp_wakeup_mailbox(res.start); +done: + of_node_put(node); +} +#else /* !CONFIG_X86_64 || !CONFIG_SMP */ +static inline int dtb_wakeup_mailbox_setup(void) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_X86_64 && CONFIG_SMP */ + #ifdef CONFIG_X86_LOCAL_APIC static void __init dtb_cpu_setup(void) @@ -293,6 +339,7 @@ static void __init x86_dtb_parse_smp_config(void) dtb_setup_hpet(); dtb_apic_setup(); + dtb_wakeup_mailbox_setup(); } void __init x86_flattree_get_config(void) From d9294a1cccb04e6ad65d7777228aa702babd24d9 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Fri, 14 Nov 2025 08:31:04 -0800 Subject: [PATCH 15/21] x86/hyperv/vtl: Set real_mode_header in hv_vtl_init_platform() Hyper-V VTL clears x86_platform.realmode_{init(), reserve()} in hv_vtl_init_platform() whereas it sets real_mode_header later in hv_vtl_early_init(). There is no need to deal with the settings of real mode memory in two places. Also, both functions are called much earlier than x86_platform.realmode_init() (via an early_initcall), where the real_mode_header is needed. Set real_mode_header in hv_vtl_init_platform() to keep all code dealing with memory for the real mode trampoline in one place. Besides making the code more readable, it prepares it for a subsequent changeset in which the behavior needs to change to support Hyper-V VTL guests in TDX a environment. Reviewed-by: Dexuan Cui Reviewed-by: Michael Kelley Suggested-by: Thomas Gleixner Signed-off-by: Yunhong Jiang Signed-off-by: Ricardo Neri --- arch/x86/hyperv/hv_vtl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 7c67f43a485e9..d5b97ad0388f4 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -73,6 +73,7 @@ void __init hv_vtl_init_platform(void) x86_init.resources.probe_roms = x86_init_noop; x86_platform.realmode_reserve = x86_init_noop; x86_platform.realmode_init = x86_init_noop; + real_mode_header = &hv_vtl_real_mode_header; x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; x86_init.resources.probe_roms = x86_init_noop; @@ -260,9 +261,6 @@ int __init hv_vtl_early_init(void) if (!(hv_isolation_type_snp() && !hyperv_paravisor_present)) apic_update_callback(wakeup_secondary_cpu_64, hv_vtl_wakeup_secondary_cpu); - if (!hv_isolation_type_tdx()) - real_mode_header = &hv_vtl_real_mode_header; - return 0; } From dbe886885711bbb9ab101c9662b7cc66e2f9d3c2 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Fri, 14 Nov 2025 08:31:05 -0800 Subject: [PATCH 16/21] x86/realmode: Make the location of the trampoline configurable x86 CPUs boot in real mode. This mode uses a 1MB address space. The trampoline must reside below this 1MB memory boundary. There are platforms in which the firmware boots the secondary CPUs, switches them to long mode and transfers control to the kernel. An example of such a mechanism is the ACPI Multiprocessor Wakeup Structure. In this scenario there is no restriction on locating the trampoline under 1MB memory. Moreover, certain platforms (for example, Hyper-V VTL guests) may not have memory available for allocation below 1MB. Add a new member to struct x86_init_resources to specify the upper bound for the location of the trampoline memory. Preserve the default upper bound of 1MB to conserve the current behavior. Reviewed-by: Dexuan Cui Reviewed-by: Michael Kelley Originally-by: Thomas Gleixner Signed-off-by: Yunhong Jiang Signed-off-by: Ricardo Neri --- arch/x86/include/asm/x86_init.h | 3 +++ arch/x86/kernel/x86_init.c | 3 +++ arch/x86/realmode/init.c | 7 +++---- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 213cf5379a5a6..28b11838a6909 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -31,12 +31,15 @@ struct x86_init_mpparse { * platform * @memory_setup: platform specific memory setup * @dmi_setup: platform specific DMI setup + * @realmode_limit: platform specific address limit for the real mode trampoline + * (default 1M) */ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); char *(*memory_setup)(void); void (*dmi_setup)(void); + unsigned long realmode_limit; }; /** diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 0a2bbd674a6d9..a25fd72828117 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -69,6 +70,8 @@ struct x86_init_ops x86_init __initdata = { .reserve_resources = reserve_standard_io_resources, .memory_setup = e820__memory_setup_default, .dmi_setup = dmi_setup, + /* Has to be under 1M so we can execute real-mode AP code. */ + .realmode_limit = SZ_1M, }, .mpparse = { diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index f9bc444a3064d..9006806cc7934 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -45,7 +45,7 @@ void load_trampoline_pgtable(void) void __init reserve_real_mode(void) { - phys_addr_t mem; + phys_addr_t mem, limit = x86_init.resources.realmode_limit; size_t size = real_mode_size_needed(); if (!size) @@ -53,10 +53,9 @@ void __init reserve_real_mode(void) WARN_ON(slab_is_available()); - /* Has to be under 1M so we can execute real-mode AP code. */ - mem = memblock_phys_alloc_range(size, PAGE_SIZE, 0, 1<<20); + mem = memblock_phys_alloc_range(size, PAGE_SIZE, 0, limit); if (!mem) - pr_info("No sub-1M memory is available for the trampoline\n"); + pr_info("No memory below %pa for the real-mode trampoline\n", &limit); else set_real_mode_mem(mem); From 2ff2b2cbc6399cfeacebaa61ac854d7d19f79512 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Fri, 14 Nov 2025 08:31:06 -0800 Subject: [PATCH 17/21] x86/hyperv/vtl: Setup the 64-bit trampoline for TDX guests The hypervisor is an untrusted entity for TDX guests. It cannot be used to boot secondary CPUs - neither via hypercalls nor the INIT assert, de-assert, plus Start-Up IPI messages. Instead, the platform virtual firmware boots the secondary CPUs and puts them in a state to transfer control to the kernel. This mechanism uses the wakeup mailbox described in the Multiprocessor Wakeup Structure of the ACPI specification. The entry point to the kernel is trampoline_start64. Allocate and setup the trampoline using the default x86_platform callbacks. The platform firmware configures the secondary CPUs in long mode. It is no longer necessary to locate the trampoline under 1MB memory. After handoff from firmware, the trampoline code switches briefly to 32-bit addressing mode, which has an addressing limit of 4GB. Set the upper bound of the trampoline memory accordingly. Reviewed-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Yunhong Jiang Signed-off-by: Ricardo Neri --- arch/x86/hyperv/hv_vtl.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index d5b97ad0388f4..e225c84936f8f 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -71,9 +71,14 @@ void __init hv_vtl_init_platform(void) pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); x86_init.resources.probe_roms = x86_init_noop; - x86_platform.realmode_reserve = x86_init_noop; - x86_platform.realmode_init = x86_init_noop; - real_mode_header = &hv_vtl_real_mode_header; + /* There is no paravisor present if we are here. */ + if (hv_isolation_type_tdx()) { + x86_init.resources.realmode_limit = SZ_4G; + } else { + x86_platform.realmode_reserve = x86_init_noop; + x86_platform.realmode_init = x86_init_noop; + real_mode_header = &hv_vtl_real_mode_header; + } x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; x86_init.resources.probe_roms = x86_init_noop; From 61a21d404ec84c59be83a8ddc78ad3dd81f5028f Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:31:07 -0800 Subject: [PATCH 18/21] x86/acpi: Add a helper get the address of the wakeup mailbox A Hyper-V VTL level 2 guest in a TDX environment needs to map the physical page of the ACPI Multiprocessor Wakeup Structure as private (encrypted). It needs to know the physical address of this structure. Add a helper function to retrieve the address. Suggested-by: Michael Kelley Signed-off-by: Ricardo Neri --- arch/x86/include/asm/acpi.h | 6 ++++++ arch/x86/kernel/acpi/madt_wakeup.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 315f69dd45fc7..12072d23ba57c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -184,6 +184,7 @@ void __iomem *x86_acpi_os_ioremap(acpi_physical_address phys, acpi_size size); void acpi_setup_mp_wakeup_mailbox(u64 addr); struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void); +u64 acpi_get_mp_wakeup_mailbox_paddr(void); #else /* !CONFIG_ACPI */ @@ -210,6 +211,11 @@ static inline struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mail return NULL; } +static inline u64 acpi_get_mp_wakeup_mailbox_paddr(void) +{ + return 0; +} + #endif /* !CONFIG_ACPI */ #define ARCH_HAS_POWER_INIT 1 diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index f2e8970ea9d24..69091e220cf7d 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -301,3 +301,8 @@ struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void) { return acpi_mp_wake_mailbox; } + +u64 acpi_get_mp_wakeup_mailbox_paddr(void) +{ + return acpi_mp_wake_mailbox_paddr; +} From f290b51fe66f3850247c8d2954d3399e2c51c21a Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Fri, 14 Nov 2025 08:31:08 -0800 Subject: [PATCH 19/21] x86/hyperv/vtl: Mark the wakeup mailbox page as private The current code maps MMIO devices as shared (decrypted) by default in a confidential computing VM. In a TDX environment, secondary CPUs are booted using the Multiprocessor Wakeup Structure defined in the ACPI specification. The virtual firmware and the operating system function in the guest context, without intervention from the VMM. Map the physical memory of the mailbox as private. Use the is_private_mmio() callback. Reviewed-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Yunhong Jiang Signed-off-by: Ricardo Neri --- arch/x86/hyperv/hv_vtl.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index e225c84936f8f..cf0923a37e25a 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -6,6 +6,9 @@ * Saurabh Sengar */ +#include + +#include #include #include #include @@ -66,6 +69,18 @@ static void __noreturn hv_vtl_restart(char __maybe_unused *cmd) hv_vtl_emergency_restart(); } +static inline bool within_page(u64 addr, u64 start) +{ + return addr >= start && addr < (start + PAGE_SIZE); +} + +static bool hv_vtl_is_private_mmio_tdx(u64 addr) +{ + u64 mb_addr = acpi_get_mp_wakeup_mailbox_paddr(); + + return mb_addr && within_page(addr, mb_addr); +} + void __init hv_vtl_init_platform(void) { pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); @@ -74,6 +89,8 @@ void __init hv_vtl_init_platform(void) /* There is no paravisor present if we are here. */ if (hv_isolation_type_tdx()) { x86_init.resources.realmode_limit = SZ_4G; + x86_platform.hyper.is_private_mmio = hv_vtl_is_private_mmio_tdx; + } else { x86_platform.realmode_reserve = x86_init_noop; x86_platform.realmode_init = x86_init_noop; From 04a67e5ec5a9b30c3fbe52dd3d0753a01dfcdc1f Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 08:31:09 -0800 Subject: [PATCH 20/21] x86/hyperv/vtl: Use the wakeup mailbox to boot secondary CPUs The hypervisor is an untrusted entity for TDX guests. It cannot be used to boot secondary CPUs. The function hv_vtl_wakeup_secondary_cpu() cannot be used. Instead, the virtual firmware boots the secondary CPUs and places them in a state to transfer control to the kernel using the wakeup mailbox. The firmware enumerates the mailbox via either an ACPI table or a DeviceTree node. If the wakeup mailbox is present, the kernel updates the APIC callback wakeup_secondary_cpu_64() to use it. Reviewed-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Ricardo Neri --- arch/x86/hyperv/hv_vtl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index cf0923a37e25a..939a897dd3b9c 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -280,7 +280,8 @@ int __init hv_vtl_early_init(void) * Otherwise, use an enlightened path since SIPI is not * available for VTL2. */ - if (!(hv_isolation_type_snp() && !hyperv_paravisor_present)) + if (!((hv_isolation_type_snp() || hv_isolation_type_tdx()) && + !hyperv_paravisor_present)) apic_update_callback(wakeup_secondary_cpu_64, hv_vtl_wakeup_secondary_cpu); return 0; From 23e4de9719eb51ce3f8ae6d07dc210fa7edf5282 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Nov 2025 14:15:33 -0800 Subject: [PATCH 21/21] hcl-x64.config: Set CONFIG_ACPI=y The wakeup mailbox that the virtual firmware implements to boot secondary CPUs is defined in the ACPI specification (see version 6.6 section 5.2.12.19). The code in the kernel that makes use of the mailbox resides in the x86 ACPI subsystem. CONFIG_ACPI needs to be set as 'y' to select it. The option CONFIG_ACPI selects or enables many other configuration options which in turn select more options that are not used with DeviceTree-based firmware. Unselect all the options that have a menuconfig prompt. The newly selected code remains dormant if the acpi=off is specified in the kernel command line. The code that interacts with the mailbox remains usable for DeviceTree platform firmware. These are the options that are selected after running `make olddefconfig` with this changeset: * CONFIG_ACPI_MADT_WAKEUP=y Enables the wakeup mailbox. * CONFIG_ACPI_LPIT Support for ACPI low-power idle table (not used with DeviceTree FW). * CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y Tweaks ACPI root table lookup (not used with DeviceTree FW). * CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y * CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y Architecture capabilities (unused). * CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y Support for ACPI-based suspend/resume (not used with DeviceTree FW). * CONFIG_ACPI_HOTPLUG_IOAPIC=y Support for IO-APIC hotplug (not used with DeviceTree FW). * CONFIG_HAVE_ACPI_APEI=y * CONFIG_HAVE_ACPI_APEI_NMI=y Architecture capabilities (not used as CONFIG_ACPI_APEI=n). * CONFIG_PCI_LABEL=y ACPI-provided PCI naming facilities (not used with DeviceTree FW). * CONFIG_PNP=y * CONFIG_PNPACPI=y Support only. Does not add drivers. * CONFIG_FIRMWARE_TABLE=y Library for parsing ACPI tables. Signed-off-by: Ricardo Neri --- Microsoft/hcl-x64.config | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/Microsoft/hcl-x64.config b/Microsoft/hcl-x64.config index 412736dca50ba..fa30d755cf82f 100644 --- a/Microsoft/hcl-x64.config +++ b/Microsoft/hcl-x64.config @@ -354,6 +354,7 @@ CONFIG_X86_DIRECT_GBPAGES=y # CONFIG_AMD_MEM_ENCRYPT is not set CONFIG_NUMA=y CONFIG_AMD_NUMA=y +# CONFIG_X86_64_ACPI_NUMA is not set CONFIG_NODES_SHIFT=6 CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_DEFAULT=y @@ -433,7 +434,33 @@ CONFIG_ARCH_HAS_ADD_PAGES=y # CONFIG_SUSPEND is not set # CONFIG_PM is not set CONFIG_ARCH_SUPPORTS_ACPI=y -# CONFIG_ACPI is not set +CONFIG_ACPI=y +# CONFIG_ACPI_DEBUGGER is not set +# CONFIG_ACPI_SPCR_TABLE is not set +# CONFIG_ACPI_FPDT is not set +# CONFIG_ACPI_REV_OVERRIDE_POSSIBLE is not set +# CONFIG_ACPI_EC_DEBUGFS is not set +# CONFIG_ACPI_AC is not set +# CONFIG_ACPI_BATTERY is not set +# CONFIG_ACPI_TINY_POWER_BUTTON is not set +# CONFIG_ACPI_DOCK is not set +# CONFIG_ACPI_PROCESSOR is not set +# CONFIG_ACPI_TABLE_UPGRADE is not set +# CONFIG_ACPI_DEBUG is not set +# CONFIG_ACPI_PCI_SLOT is not set +# CONFIG_ACPI_CONTAINER is not set +# CONFIG_ACPI_HOTPLUG_MEMORY is not set +# CONFIG_ACPI_SBS is not set +# CONFIG_ACPI_HED is not set +# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set +# CONFIG_ACPI_NFIT is not set +# CONFIG_ACPI_APEI is not set +# CONFIG_ACPI_DPTF is not set +# CONFIG_ACPI_CONFIGFS is not set +# CONFIG_ACPI_PFRUT is not set +# CONFIG_ACPI_FFH is not set +# CONFIG_PMIC_OPREGION is not set +# CONFIG_X86_PM_TIMER is not set # # CPU Frequency scaling @@ -452,6 +479,7 @@ CONFIG_ARCH_SUPPORTS_ACPI=y # Bus options (PCI etc.) # CONFIG_PCI_DIRECT=y +# CONFIG_PCI_MMCONFIG is not set # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_ISA_BUS is not set # CONFIG_ISA_DMA_API is not set @@ -1230,6 +1258,7 @@ CONFIG_LDISC_AUTOLOAD=y CONFIG_SERIAL_EARLYCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_DEPRECATED_OPTIONS=y +# CONFIG_SERIAL_8250_PNP is not set CONFIG_SERIAL_8250_16550A_VARIANTS=y # CONFIG_SERIAL_8250_FINTEK is not set CONFIG_SERIAL_8250_CONSOLE=y