~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/kernel/acpi/madt_wakeup.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 #include <linux/acpi.h>
  3 #include <linux/cpu.h>
  4 #include <linux/delay.h>
  5 #include <linux/io.h>
  6 #include <linux/kexec.h>
  7 #include <linux/memblock.h>
  8 #include <linux/pgtable.h>
  9 #include <linux/sched/hotplug.h>
 10 #include <asm/apic.h>
 11 #include <asm/barrier.h>
 12 #include <asm/init.h>
 13 #include <asm/intel_pt.h>
 14 #include <asm/nmi.h>
 15 #include <asm/processor.h>
 16 #include <asm/reboot.h>
 17 
 18 /* Physical address of the Multiprocessor Wakeup Structure mailbox */
 19 static u64 acpi_mp_wake_mailbox_paddr __ro_after_init;
 20 
 21 /* Virtual address of the Multiprocessor Wakeup Structure mailbox */
 22 static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox;
 23 
 24 static u64 acpi_mp_pgd __ro_after_init;
 25 static u64 acpi_mp_reset_vector_paddr __ro_after_init;
 26 
 27 static void acpi_mp_stop_this_cpu(void)
 28 {
 29         asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
 30 }
 31 
 32 static void acpi_mp_play_dead(void)
 33 {
 34         play_dead_common();
 35         asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
 36 }
 37 
 38 static void acpi_mp_cpu_die(unsigned int cpu)
 39 {
 40         u32 apicid = per_cpu(x86_cpu_to_apicid, cpu);
 41         unsigned long timeout;
 42 
 43         /*
 44          * Use TEST mailbox command to prove that BIOS got control over
 45          * the CPU before declaring it dead.
 46          *
 47          * BIOS has to clear 'command' field of the mailbox.
 48          */
 49         acpi_mp_wake_mailbox->apic_id = apicid;
 50         smp_store_release(&acpi_mp_wake_mailbox->command,
 51                           ACPI_MP_WAKE_COMMAND_TEST);
 52 
 53         /* Don't wait longer than a second. */
 54         timeout = USEC_PER_SEC;
 55         while (READ_ONCE(acpi_mp_wake_mailbox->command) && --timeout)
 56                 udelay(1);
 57 
 58         if (!timeout)
 59                 pr_err("Failed to hand over CPU %d to BIOS\n", cpu);
 60 }
 61 
 62 /* The argument is required to match type of x86_mapping_info::alloc_pgt_page */
 63 static void __init *alloc_pgt_page(void *dummy)
 64 {
 65         return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 66 }
 67 
 68 static void __init free_pgt_page(void *pgt, void *dummy)
 69 {
 70         return memblock_free(pgt, PAGE_SIZE);
 71 }
 72 
 73 /*
 74  * Make sure asm_acpi_mp_play_dead() is present in the identity mapping at
 75  * the same place as in the kernel page tables. asm_acpi_mp_play_dead() switches
 76  * to the identity mapping and the function has be present at the same spot in
 77  * the virtual address space before and after switching page tables.
 78  */
 79 static int __init init_transition_pgtable(pgd_t *pgd)
 80 {
 81         pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
 82         unsigned long vaddr, paddr;
 83         p4d_t *p4d;
 84         pud_t *pud;
 85         pmd_t *pmd;
 86         pte_t *pte;
 87 
 88         vaddr = (unsigned long)asm_acpi_mp_play_dead;
 89         pgd += pgd_index(vaddr);
 90         if (!pgd_present(*pgd)) {
 91                 p4d = (p4d_t *)alloc_pgt_page(NULL);
 92                 if (!p4d)
 93                         return -ENOMEM;
 94                 set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
 95         }
 96         p4d = p4d_offset(pgd, vaddr);
 97         if (!p4d_present(*p4d)) {
 98                 pud = (pud_t *)alloc_pgt_page(NULL);
 99                 if (!pud)
100                         return -ENOMEM;
101                 set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
102         }
103         pud = pud_offset(p4d, vaddr);
104         if (!pud_present(*pud)) {
105                 pmd = (pmd_t *)alloc_pgt_page(NULL);
106                 if (!pmd)
107                         return -ENOMEM;
108                 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
109         }
110         pmd = pmd_offset(pud, vaddr);
111         if (!pmd_present(*pmd)) {
112                 pte = (pte_t *)alloc_pgt_page(NULL);
113                 if (!pte)
114                         return -ENOMEM;
115                 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
116         }
117         pte = pte_offset_kernel(pmd, vaddr);
118 
119         paddr = __pa(vaddr);
120         set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
121 
122         return 0;
123 }
124 
125 static int __init acpi_mp_setup_reset(u64 reset_vector)
126 {
127         struct x86_mapping_info info = {
128                 .alloc_pgt_page = alloc_pgt_page,
129                 .free_pgt_page  = free_pgt_page,
130                 .page_flag      = __PAGE_KERNEL_LARGE_EXEC,
131                 .kernpg_flag    = _KERNPG_TABLE_NOENC,
132         };
133         pgd_t *pgd;
134 
135         pgd = alloc_pgt_page(NULL);
136         if (!pgd)
137                 return -ENOMEM;
138 
139         for (int i = 0; i < nr_pfn_mapped; i++) {
140                 unsigned long mstart, mend;
141 
142                 mstart = pfn_mapped[i].start << PAGE_SHIFT;
143                 mend   = pfn_mapped[i].end << PAGE_SHIFT;
144                 if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
145                         kernel_ident_mapping_free(&info, pgd);
146                         return -ENOMEM;
147                 }
148         }
149 
150         if (kernel_ident_mapping_init(&info, pgd,
151                                       PAGE_ALIGN_DOWN(reset_vector),
152                                       PAGE_ALIGN(reset_vector + 1))) {
153                 kernel_ident_mapping_free(&info, pgd);
154                 return -ENOMEM;
155         }
156 
157         if (init_transition_pgtable(pgd)) {
158                 kernel_ident_mapping_free(&info, pgd);
159                 return -ENOMEM;
160         }
161 
162         smp_ops.play_dead = acpi_mp_play_dead;
163         smp_ops.stop_this_cpu = acpi_mp_stop_this_cpu;
164         smp_ops.cpu_die = acpi_mp_cpu_die;
165 
166         acpi_mp_reset_vector_paddr = reset_vector;
167         acpi_mp_pgd = __pa(pgd);
168 
169         return 0;
170 }
171 
172 static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip)
173 {
174         if (!acpi_mp_wake_mailbox_paddr) {
175                 pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n");
176                 return -EOPNOTSUPP;
177         }
178 
179         /*
180          * Remap mailbox memory only for the first call to acpi_wakeup_cpu().
181          *
182          * Wakeup of secondary CPUs is fully serialized in the core code.
183          * No need to protect acpi_mp_wake_mailbox from concurrent accesses.
184          */
185         if (!acpi_mp_wake_mailbox) {
186                 acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr,
187                                                 sizeof(*acpi_mp_wake_mailbox),
188                                                 MEMREMAP_WB);
189         }
190 
191         /*
192          * Mailbox memory is shared between the firmware and OS. Firmware will
193          * listen on mailbox command address, and once it receives the wakeup
194          * command, the CPU associated with the given apicid will be booted.
195          *
196          * The value of 'apic_id' and 'wakeup_vector' must be visible to the
197          * firmware before the wakeup command is visible.  smp_store_release()
198          * ensures ordering and visibility.
199          */
200         acpi_mp_wake_mailbox->apic_id       = apicid;
201         acpi_mp_wake_mailbox->wakeup_vector = start_ip;
202         smp_store_release(&acpi_mp_wake_mailbox->command,
203                           ACPI_MP_WAKE_COMMAND_WAKEUP);
204 
205         /*
206          * Wait for the CPU to wake up.
207          *
208          * The CPU being woken up is essentially in a spin loop waiting to be
209          * woken up. It should not take long for it wake up and acknowledge by
210          * zeroing out ->command.
211          *
212          * ACPI specification doesn't provide any guidance on how long kernel
213          * has to wait for a wake up acknowledgment. It also doesn't provide
214          * a way to cancel a wake up request if it takes too long.
215          *
216          * In TDX environment, the VMM has control over how long it takes to
217          * wake up secondary. It can postpone scheduling secondary vCPU
218          * indefinitely. Giving up on wake up request and reporting error opens
219          * possible attack vector for VMM: it can wake up a secondary CPU when
220          * kernel doesn't expect it. Wait until positive result of the wake up
221          * request.
222          */
223         while (READ_ONCE(acpi_mp_wake_mailbox->command))
224                 cpu_relax();
225 
226         return 0;
227 }
228 
229 static void acpi_mp_disable_offlining(struct acpi_madt_multiproc_wakeup *mp_wake)
230 {
231         cpu_hotplug_disable_offlining();
232 
233         /*
234          * ACPI MADT doesn't allow to offline a CPU after it was onlined. This
235          * limits kexec: the second kernel won't be able to use more than one CPU.
236          *
237          * To prevent a kexec kernel from onlining secondary CPUs invalidate the
238          * mailbox address in the ACPI MADT wakeup structure which prevents a
239          * kexec kernel to use it.
240          *
241          * This is safe as the booting kernel has the mailbox address cached
242          * already and acpi_wakeup_cpu() uses the cached value to bring up the
243          * secondary CPUs.
244          *
245          * Note: This is a Linux specific convention and not covered by the
246          *       ACPI specification.
247          */
248         mp_wake->mailbox_address = 0;
249 }
250 
251 int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
252                               const unsigned long end)
253 {
254         struct acpi_madt_multiproc_wakeup *mp_wake;
255 
256         mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
257 
258         /*
259          * Cannot use the standard BAD_MADT_ENTRY() to sanity check the @mp_wake
260          * entry.  'sizeof (struct acpi_madt_multiproc_wakeup)' can be larger
261          * than the actual size of the MP wakeup entry in ACPI table because the
262          * 'reset_vector' is only available in the V1 MP wakeup structure.
263          */
264         if (!mp_wake)
265                 return -EINVAL;
266         if (end - (unsigned long)mp_wake < ACPI_MADT_MP_WAKEUP_SIZE_V0)
267                 return -EINVAL;
268         if (mp_wake->header.length < ACPI_MADT_MP_WAKEUP_SIZE_V0)
269                 return -EINVAL;
270 
271         acpi_table_print_madt_entry(&header->common);
272 
273         acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address;
274 
275         if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 &&
276             mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) {
277                 if (acpi_mp_setup_reset(mp_wake->reset_vector)) {
278                         pr_warn("Failed to setup MADT reset vector\n");
279                         acpi_mp_disable_offlining(mp_wake);
280                 }
281         } else {
282                 /*
283                  * CPU offlining requires version 1 of the ACPI MADT wakeup
284                  * structure.
285                  */
286                 acpi_mp_disable_offlining(mp_wake);
287         }
288 
289         apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu);
290 
291         return 0;
292 }
293 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php