diff --git a/Microsoft/hcl-x64.config b/Microsoft/hcl-x64.config index 412736dca50ba..fa30d755cf82f 100644 --- a/Microsoft/hcl-x64.config +++ b/Microsoft/hcl-x64.config @@ -354,6 +354,7 @@ CONFIG_X86_DIRECT_GBPAGES=y # CONFIG_AMD_MEM_ENCRYPT is not set CONFIG_NUMA=y CONFIG_AMD_NUMA=y +# CONFIG_X86_64_ACPI_NUMA is not set CONFIG_NODES_SHIFT=6 CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_DEFAULT=y @@ -433,7 +434,33 @@ CONFIG_ARCH_HAS_ADD_PAGES=y # CONFIG_SUSPEND is not set # CONFIG_PM is not set CONFIG_ARCH_SUPPORTS_ACPI=y -# CONFIG_ACPI is not set +CONFIG_ACPI=y +# CONFIG_ACPI_DEBUGGER is not set +# CONFIG_ACPI_SPCR_TABLE is not set +# CONFIG_ACPI_FPDT is not set +# CONFIG_ACPI_REV_OVERRIDE_POSSIBLE is not set +# CONFIG_ACPI_EC_DEBUGFS is not set +# CONFIG_ACPI_AC is not set +# CONFIG_ACPI_BATTERY is not set +# CONFIG_ACPI_TINY_POWER_BUTTON is not set +# CONFIG_ACPI_DOCK is not set +# CONFIG_ACPI_PROCESSOR is not set +# CONFIG_ACPI_TABLE_UPGRADE is not set +# CONFIG_ACPI_DEBUG is not set +# CONFIG_ACPI_PCI_SLOT is not set +# CONFIG_ACPI_CONTAINER is not set +# CONFIG_ACPI_HOTPLUG_MEMORY is not set +# CONFIG_ACPI_SBS is not set +# CONFIG_ACPI_HED is not set +# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set +# CONFIG_ACPI_NFIT is not set +# CONFIG_ACPI_APEI is not set +# CONFIG_ACPI_DPTF is not set +# CONFIG_ACPI_CONFIGFS is not set +# CONFIG_ACPI_PFRUT is not set +# CONFIG_ACPI_FFH is not set +# CONFIG_PMIC_OPREGION is not set +# CONFIG_X86_PM_TIMER is not set # # CPU Frequency scaling @@ -452,6 +479,7 @@ CONFIG_ARCH_SUPPORTS_ACPI=y # Bus options (PCI etc.) # CONFIG_PCI_DIRECT=y +# CONFIG_PCI_MMCONFIG is not set # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_ISA_BUS is not set # CONFIG_ISA_DMA_API is not set @@ -1230,6 +1258,7 @@ CONFIG_LDISC_AUTOLOAD=y CONFIG_SERIAL_EARLYCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_DEPRECATED_OPTIONS=y +# CONFIG_SERIAL_8250_PNP is not set CONFIG_SERIAL_8250_16550A_VARIANTS=y # CONFIG_SERIAL_8250_FINTEK is not set CONFIG_SERIAL_8250_CONSOLE=y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e24f247a217f3..0ce8416858072 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1135,13 +1135,6 @@ config X86_LOCAL_APIC depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI select IRQ_DOMAIN_HIERARCHY -config X86_MAILBOX_WAKEUP - def_bool y - depends on OF || ACPI_MADT_WAKEUP - depends on X86_64 - depends on SMP - depends on X86_LOCAL_APIC - config ACPI_MADT_WAKEUP def_bool y depends on X86_64 diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index be5df5d513c7f..939a897dd3b9c 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -6,6 +6,9 @@ * Saurabh Sengar */ +#include + +#include #include #include #include @@ -36,18 +39,6 @@ static bool __init hv_vtl_msi_ext_dest_id(void) return true; } -static inline bool within_page(u64 addr, u64 start) -{ - return addr >= start && addr < (start + PAGE_SIZE); -} - -static bool hv_vtl_is_private_mmio_tdx(u64 addr) -{ - u64 mb_addr = acpi_get_mp_wakeup_mailbox_paddr(); - - return mb_addr && within_page(addr, mb_addr); -} - /* * The `native_machine_emergency_restart` function from `reboot.c` writes * to the physical address 0x472 to indicate the type of reboot for the @@ -78,10 +69,23 @@ static void __noreturn hv_vtl_restart(char __maybe_unused *cmd) hv_vtl_emergency_restart(); } +static inline bool within_page(u64 addr, u64 start) +{ + return addr >= start && addr < (start + PAGE_SIZE); +} + +static bool hv_vtl_is_private_mmio_tdx(u64 addr) +{ + u64 mb_addr = acpi_get_mp_wakeup_mailbox_paddr(); + + return mb_addr && within_page(addr, mb_addr); +} + void __init hv_vtl_init_platform(void) { pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); + x86_init.resources.probe_roms = x86_init_noop; /* There is no paravisor present if we are here. */ if (hv_isolation_type_tdx()) { x86_init.resources.realmode_limit = SZ_4G; @@ -92,8 +96,6 @@ void __init hv_vtl_init_platform(void) x86_platform.realmode_init = x86_init_noop; real_mode_header = &hv_vtl_real_mode_header; } - - x86_init.resources.probe_roms = x86_init_noop; x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; x86_init.resources.probe_roms = x86_init_noop; @@ -278,8 +280,8 @@ int __init hv_vtl_early_init(void) * Otherwise, use an enlightened path since SIPI is not * available for VTL2. */ - if (!((hv_isolation_type_snp() || hv_isolation_type_tdx()) - && !hyperv_paravisor_present)) + if (!((hv_isolation_type_snp() || hv_isolation_type_tdx()) && + !hyperv_paravisor_present)) apic_update_callback(wakeup_secondary_cpu_64, hv_vtl_wakeup_secondary_cpu); return 0; diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 5ab1a4598d00b..12072d23ba57c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -182,6 +182,10 @@ void __iomem *x86_acpi_os_ioremap(acpi_physical_address phys, acpi_size size); #define acpi_os_ioremap acpi_os_ioremap #endif +void acpi_setup_mp_wakeup_mailbox(u64 addr); +struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void); +u64 acpi_get_mp_wakeup_mailbox_paddr(void); + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 @@ -200,6 +204,18 @@ static inline u64 x86_default_get_root_pointer(void) return 0; } +static inline void acpi_setup_mp_wakeup_mailbox(u64 addr) { } + +static inline struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void) +{ + return NULL; +} + +static inline u64 acpi_get_mp_wakeup_mailbox_paddr(void) +{ + return 0; +} + #endif /* !CONFIG_ACPI */ #define ARCH_HAS_POWER_INIT 1 diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 6a51102d6e42f..ca073f40698fa 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -152,10 +152,6 @@ static inline struct cpumask *cpu_l2c_shared_mask(int cpu) return per_cpu(cpu_l2c_shared_map, cpu); } -void acpi_setup_mp_wakeup_mailbox(u64 addr); -struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void); -u64 acpi_get_mp_wakeup_mailbox_paddr(void); - #else /* !CONFIG_SMP */ #define wbinvd_on_cpu(cpu) wbinvd() static inline int wbinvd_on_all_cpus(void) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 92f3664dd933b..c2cb76380cb02 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -239,6 +239,7 @@ extern bool x86_topology_update; #ifdef CONFIG_SCHED_MC_PRIO #include +#include DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority); extern unsigned int __read_mostly sysctl_sched_itmt_enabled; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9fdbf9cc8e8b4..f7918980667a3 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -92,7 +92,6 @@ apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += smpboot.o -obj-$(CONFIG_X86_MAILBOX_WAKEUP) += smpwakeup.o obj-$(CONFIG_X86_TSC) += tsc_sync.o obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index d6112109cef4d..69091e220cf7d 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -2,10 +2,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -13,6 +15,12 @@ #include #include +/* Physical address of the Multiprocessor Wakeup Structure mailbox */ +static u64 acpi_mp_wake_mailbox_paddr __ro_after_init; + +/* Virtual address of the Multiprocessor Wakeup Structure mailbox */ +static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox; + static u64 acpi_mp_pgd __ro_after_init; static u64 acpi_mp_reset_vector_paddr __ro_after_init; @@ -29,7 +37,6 @@ static void acpi_mp_play_dead(void) static void acpi_mp_cpu_die(unsigned int cpu) { - struct acpi_madt_multiproc_wakeup_mailbox *mailbox = acpi_get_mp_wakeup_mailbox(); u32 apicid = per_cpu(x86_cpu_to_apicid, cpu); unsigned long timeout; @@ -39,13 +46,13 @@ static void acpi_mp_cpu_die(unsigned int cpu) * * BIOS has to clear 'command' field of the mailbox. */ - mailbox->apic_id = apicid; - smp_store_release(&mailbox->command, + acpi_mp_wake_mailbox->apic_id = apicid; + smp_store_release(&acpi_mp_wake_mailbox->command, ACPI_MP_WAKE_COMMAND_TEST); /* Don't wait longer than a second. */ timeout = USEC_PER_SEC; - while (READ_ONCE(mailbox->command) && --timeout) + while (READ_ONCE(acpi_mp_wake_mailbox->command) && --timeout) udelay(1); if (!timeout) @@ -162,6 +169,63 @@ static int __init acpi_mp_setup_reset(u64 reset_vector) return 0; } +static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip, unsigned int cpu) +{ + if (!acpi_mp_wake_mailbox_paddr) { + pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n"); + return -EOPNOTSUPP; + } + + /* + * Remap mailbox memory only for the first call to acpi_wakeup_cpu(). + * + * Wakeup of secondary CPUs is fully serialized in the core code. + * No need to protect acpi_mp_wake_mailbox from concurrent accesses. + */ + if (!acpi_mp_wake_mailbox) { + acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr, + sizeof(*acpi_mp_wake_mailbox), + MEMREMAP_WB); + } + + /* + * Mailbox memory is shared between the firmware and OS. Firmware will + * listen on mailbox command address, and once it receives the wakeup + * command, the CPU associated with the given apicid will be booted. + * + * The value of 'apic_id' and 'wakeup_vector' must be visible to the + * firmware before the wakeup command is visible. smp_store_release() + * ensures ordering and visibility. + */ + acpi_mp_wake_mailbox->apic_id = apicid; + acpi_mp_wake_mailbox->wakeup_vector = start_ip; + smp_store_release(&acpi_mp_wake_mailbox->command, + ACPI_MP_WAKE_COMMAND_WAKEUP); + + /* + * Wait for the CPU to wake up. + * + * The CPU being woken up is essentially in a spin loop waiting to be + * woken up. It should not take long for it wake up and acknowledge by + * zeroing out ->command. + * + * ACPI specification doesn't provide any guidance on how long kernel + * has to wait for a wake up acknowledgment. It also doesn't provide + * a way to cancel a wake up request if it takes too long. + * + * In TDX environment, the VMM has control over how long it takes to + * wake up secondary. It can postpone scheduling secondary vCPU + * indefinitely. Giving up on wake up request and reporting error opens + * possible attack vector for VMM: it can wake up a secondary CPU when + * kernel doesn't expect it. Wait until positive result of the wake up + * request. + */ + while (READ_ONCE(acpi_mp_wake_mailbox->command)) + cpu_relax(); + + return 0; +} + static void acpi_mp_disable_offlining(struct acpi_madt_multiproc_wakeup *mp_wake) { cpu_hotplug_disable_offlining(); @@ -206,7 +270,7 @@ int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, acpi_table_print_madt_entry(&header->common); - acpi_setup_mp_wakeup_mailbox(mp_wake->mailbox_address); + acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address; if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 && mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) { @@ -222,5 +286,23 @@ int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, acpi_mp_disable_offlining(mp_wake); } + apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu); + return 0; } + +void __init acpi_setup_mp_wakeup_mailbox(u64 mailbox_paddr) +{ + acpi_mp_wake_mailbox_paddr = mailbox_paddr; + apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu); +} + +struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void) +{ + return acpi_mp_wake_mailbox; +} + +u64 acpi_get_mp_wakeup_mailbox_paddr(void) +{ + return acpi_mp_wake_mailbox_paddr; +} diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 7c5fbed035390..b457eb3c78a33 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -17,8 +17,8 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/smpwakeup.c b/arch/x86/kernel/smpwakeup.c deleted file mode 100644 index f730a66b6fc81..0000000000000 --- a/arch/x86/kernel/smpwakeup.c +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include -#include -#include -#include -#include -#include - -/* Physical address of the Multiprocessor Wakeup Structure mailbox */ -static u64 acpi_mp_wake_mailbox_paddr __ro_after_init; - -/* Virtual address of the Multiprocessor Wakeup Structure mailbox */ -static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox; - -static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip, unsigned int cpu) -{ - if (!acpi_mp_wake_mailbox_paddr) { - pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n"); - return -EOPNOTSUPP; - } - - /* - * Remap mailbox memory only for the first call to acpi_wakeup_cpu(). - * - * Wakeup of secondary CPUs is fully serialized in the core code. - * No need to protect acpi_mp_wake_mailbox from concurrent accesses. - */ - if (!acpi_mp_wake_mailbox) { - acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr, - sizeof(*acpi_mp_wake_mailbox), - MEMREMAP_WB); - } - - /* - * Mailbox memory is shared between the firmware and OS. Firmware will - * listen on mailbox command address, and once it receives the wakeup - * command, the CPU associated with the given apicid will be booted. - * - * The value of 'apic_id' and 'wakeup_vector' must be visible to the - * firmware before the wakeup command is visible. smp_store_release() - * ensures ordering and visibility. - */ - acpi_mp_wake_mailbox->apic_id = apicid; - acpi_mp_wake_mailbox->wakeup_vector = start_ip; - smp_store_release(&acpi_mp_wake_mailbox->command, - ACPI_MP_WAKE_COMMAND_WAKEUP); - - /* - * Wait for the CPU to wake up. - * - * The CPU being woken up is essentially in a spin loop waiting to be - * woken up. It should not take long for it wake up and acknowledge by - * zeroing out ->command. - * - * ACPI specification doesn't provide any guidance on how long kernel - * has to wait for a wake up acknowledgment. It also doesn't provide - * a way to cancel a wake up request if it takes too long. - * - * In TDX environment, the VMM has control over how long it takes to - * wake up secondary. It can postpone scheduling secondary vCPU - * indefinitely. Giving up on wake up request and reporting error opens - * possible attack vector for VMM: it can wake up a secondary CPU when - * kernel doesn't expect it. Wait until positive result of the wake up - * request. - */ - while (READ_ONCE(acpi_mp_wake_mailbox->command)) - cpu_relax(); - - return 0; -} - -void __init acpi_setup_mp_wakeup_mailbox(u64 mailbox_paddr) -{ - acpi_mp_wake_mailbox_paddr = mailbox_paddr; - apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu); -} - -struct acpi_madt_multiproc_wakeup_mailbox *acpi_get_mp_wakeup_mailbox(void) -{ - return acpi_mp_wake_mailbox; -} - -u64 acpi_get_mp_wakeup_mailbox_paddr(void) -{ - return acpi_mp_wake_mailbox_paddr; -}