1 // SPDX-License-Identifier: GPL-2.0-only << 2 /* 1 /* 3 * crash.c - kernel crash support code. 2 * crash.c - kernel crash support code. 4 * Copyright (C) 2002-2004 Eric Biederman <eb 3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> >> 4 * >> 5 * This source code is licensed under the GNU General Public License, >> 6 * Version 2. See the file COPYING for more details. 5 */ 7 */ 6 8 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt !! 9 #include <linux/crash_core.h> 8 << 9 #include <linux/buildid.h> << 10 #include <linux/init.h> << 11 #include <linux/utsname.h> 10 #include <linux/utsname.h> 12 #include <linux/vmalloc.h> 11 #include <linux/vmalloc.h> 13 #include <linux/sizes.h> << 14 #include <linux/kexec.h> << 15 #include <linux/memory.h> << 16 #include <linux/mm.h> << 17 #include <linux/cpuhotplug.h> << 18 #include <linux/memblock.h> << 19 #include <linux/kmemleak.h> << 20 #include <linux/crash_core.h> << 21 #include <linux/reboot.h> << 22 #include <linux/btf.h> << 23 #include <linux/objtool.h> << 24 12 25 #include <asm/page.h> 13 #include <asm/page.h> 26 #include <asm/sections.h> 14 #include <asm/sections.h> 27 15 28 #include <crypto/sha1.h> !! 16 /* vmcoreinfo stuff */ >> 17 static unsigned char *vmcoreinfo_data; >> 18 static size_t vmcoreinfo_size; >> 19 u32 *vmcoreinfo_note; 29 20 30 #include "kallsyms_internal.h" !! 21 /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ 31 #include "kexec_internal.h" !! 22 static unsigned char *vmcoreinfo_data_safecopy; 32 23 33 /* Per cpu memory for storing cpu states in ca !! 24 /* 34 note_buf_t __percpu *crash_notes; !! 25 * parsing the "crashkernel" commandline >> 26 * >> 27 * this code is intended to be called from architecture specific code >> 28 */ 35 29 36 #ifdef CONFIG_CRASH_DUMP << 37 30 38 int kimage_crash_copy_vmcoreinfo(struct kimage !! 31 /* 39 { !! 32 * This function parses command lines in the format 40 struct page *vmcoreinfo_page; !! 33 * 41 void *safecopy; !! 34 * crashkernel=ramsize-range:size[,...][@offset] >> 35 * >> 36 * The function returns 0 on success and -EINVAL on failure. >> 37 */ >> 38 static int __init parse_crashkernel_mem(char *cmdline, >> 39 unsigned long long system_ram, >> 40 unsigned long long *crash_size, >> 41 unsigned long long *crash_base) >> 42 { >> 43 char *cur = cmdline, *tmp; >> 44 >> 45 /* for each entry of the comma-separated list */ >> 46 do { >> 47 unsigned long long start, end = ULLONG_MAX, size; >> 48 >> 49 /* get the start of the range */ >> 50 start = memparse(cur, &tmp); >> 51 if (cur == tmp) { >> 52 pr_warn("crashkernel: Memory value expected\n"); >> 53 return -EINVAL; >> 54 } >> 55 cur = tmp; >> 56 if (*cur != '-') { >> 57 pr_warn("crashkernel: '-' expected\n"); >> 58 return -EINVAL; >> 59 } >> 60 cur++; 42 61 43 if (!IS_ENABLED(CONFIG_CRASH_DUMP)) !! 62 /* if no ':' is here, than we read the end */ 44 return 0; !! 63 if (*cur != ':') { 45 if (image->type != KEXEC_TYPE_CRASH) !! 64 end = memparse(cur, &tmp); 46 return 0; !! 65 if (cur == tmp) { >> 66 pr_warn("crashkernel: Memory value expected\n"); >> 67 return -EINVAL; >> 68 } >> 69 cur = tmp; >> 70 if (end <= start) { >> 71 pr_warn("crashkernel: end <= start\n"); >> 72 return -EINVAL; >> 73 } >> 74 } 47 75 48 /* !! 76 if (*cur != ':') { 49 * For kdump, allocate one vmcoreinfo !! 77 pr_warn("crashkernel: ':' expected\n"); 50 * crash memory. as we have arch_kexec !! 78 return -EINVAL; 51 * after kexec syscall, we naturally p !! 79 } 52 * (even read) access under kernel dir !! 80 cur++; 53 * the other hand, we still need to op !! 81 54 * happens to generate vmcoreinfo note !! 82 size = memparse(cur, &tmp); 55 * vmap for this purpose. !! 83 if (cur == tmp) { 56 */ !! 84 pr_warn("Memory value expected\n"); 57 vmcoreinfo_page = kimage_alloc_control !! 85 return -EINVAL; 58 if (!vmcoreinfo_page) { !! 86 } 59 pr_warn("Could not allocate vm !! 87 cur = tmp; 60 return -ENOMEM; !! 88 if (size >= system_ram) { 61 } !! 89 pr_warn("crashkernel: invalid size\n"); 62 safecopy = vmap(&vmcoreinfo_page, 1, V !! 90 return -EINVAL; 63 if (!safecopy) { !! 91 } 64 pr_warn("Could not vmap vmcore << 65 return -ENOMEM; << 66 } << 67 92 68 image->vmcoreinfo_data_copy = safecopy !! 93 /* match ? */ 69 crash_update_vmcoreinfo_safecopy(safec !! 94 if (system_ram >= start && system_ram < end) { >> 95 *crash_size = size; >> 96 break; >> 97 } >> 98 } while (*cur++ == ','); >> 99 >> 100 if (*crash_size > 0) { >> 101 while (*cur && *cur != ' ' && *cur != '@') >> 102 cur++; >> 103 if (*cur == '@') { >> 104 cur++; >> 105 *crash_base = memparse(cur, &tmp); >> 106 if (cur == tmp) { >> 107 pr_warn("Memory value expected after '@'\n"); >> 108 return -EINVAL; >> 109 } >> 110 } >> 111 } 70 112 71 return 0; 113 return 0; 72 } 114 } 73 115 >> 116 /* >> 117 * That function parses "simple" (old) crashkernel command lines like >> 118 * >> 119 * crashkernel=size[@offset] >> 120 * >> 121 * It returns 0 on success and -EINVAL on failure. >> 122 */ >> 123 static int __init parse_crashkernel_simple(char *cmdline, >> 124 unsigned long long *crash_size, >> 125 unsigned long long *crash_base) >> 126 { >> 127 char *cur = cmdline; >> 128 >> 129 *crash_size = memparse(cmdline, &cur); >> 130 if (cmdline == cur) { >> 131 pr_warn("crashkernel: memory value expected\n"); >> 132 return -EINVAL; >> 133 } >> 134 >> 135 if (*cur == '@') >> 136 *crash_base = memparse(cur+1, &cur); >> 137 else if (*cur != ' ' && *cur != '\0') { >> 138 pr_warn("crashkernel: unrecognized char: %c\n", *cur); >> 139 return -EINVAL; >> 140 } 74 141 75 << 76 int kexec_should_crash(struct task_struct *p) << 77 { << 78 /* << 79 * If crash_kexec_post_notifiers is en << 80 * crash_kexec() here yet, which must << 81 * notifiers in panic(). << 82 */ << 83 if (crash_kexec_post_notifiers) << 84 return 0; << 85 /* << 86 * There are 4 panic() calls in make_t << 87 * corresponds to each of these 4 cond << 88 */ << 89 if (in_interrupt() || !p->pid || is_gl << 90 return 1; << 91 return 0; 142 return 0; 92 } 143 } 93 144 94 int kexec_crash_loaded(void) !! 145 #define SUFFIX_HIGH 0 95 { !! 146 #define SUFFIX_LOW 1 96 return !!kexec_crash_image; !! 147 #define SUFFIX_NULL 2 97 } !! 148 static __initdata char *suffix_tbl[] = { 98 EXPORT_SYMBOL_GPL(kexec_crash_loaded); !! 149 [SUFFIX_HIGH] = ",high", >> 150 [SUFFIX_LOW] = ",low", >> 151 [SUFFIX_NULL] = NULL, >> 152 }; 99 153 100 /* 154 /* 101 * No panic_cpu check version of crash_kexec() !! 155 * That function parses "suffix" crashkernel command lines like 102 * only when panic_cpu holds the current CPU n !! 156 * 103 * which processes crash_kexec routines. !! 157 * crashkernel=size,[high|low] >> 158 * >> 159 * It returns 0 on success and -EINVAL on failure. 104 */ 160 */ 105 void __noclone __crash_kexec(struct pt_regs *r !! 161 static int __init parse_crashkernel_suffix(char *cmdline, 106 { !! 162 unsigned long long *crash_size, 107 /* Take the kexec_lock here to prevent !! 163 const char *suffix) 108 * running on one cpu from replacing t !! 164 { 109 * we are using after a panic on a dif !! 165 char *cur = cmdline; 110 * !! 166 111 * If the crash kernel was not located !! 167 *crash_size = memparse(cmdline, &cur); 112 * of memory the xchg(&kexec_crash_ima !! 168 if (cmdline == cur) { 113 * sufficient. But since I reuse the !! 169 pr_warn("crashkernel: memory value expected\n"); 114 */ !! 170 return -EINVAL; 115 if (kexec_trylock()) { !! 171 } 116 if (kexec_crash_image) { !! 172 117 struct pt_regs fixed_r !! 173 /* check with suffix */ 118 !! 174 if (strncmp(cur, suffix, strlen(suffix))) { 119 crash_setup_regs(&fixe !! 175 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 120 crash_save_vmcoreinfo( !! 176 return -EINVAL; 121 machine_crash_shutdown !! 177 } 122 machine_kexec(kexec_cr !! 178 cur += strlen(suffix); 123 } !! 179 if (*cur != ' ' && *cur != '\0') { 124 kexec_unlock(); !! 180 pr_warn("crashkernel: unrecognized char: %c\n", *cur); >> 181 return -EINVAL; 125 } 182 } >> 183 >> 184 return 0; 126 } 185 } 127 STACK_FRAME_NON_STANDARD(__crash_kexec); << 128 186 129 __bpf_kfunc void crash_kexec(struct pt_regs *r !! 187 static __init char *get_last_crashkernel(char *cmdline, 130 { !! 188 const char *name, 131 int old_cpu, this_cpu; !! 189 const char *suffix) >> 190 { >> 191 char *p = cmdline, *ck_cmdline = NULL; >> 192 >> 193 /* find crashkernel and use the last one if there are more */ >> 194 p = strstr(p, name); >> 195 while (p) { >> 196 char *end_p = strchr(p, ' '); >> 197 char *q; >> 198 >> 199 if (!end_p) >> 200 end_p = p + strlen(p); >> 201 >> 202 if (!suffix) { >> 203 int i; >> 204 >> 205 /* skip the one with any known suffix */ >> 206 for (i = 0; suffix_tbl[i]; i++) { >> 207 q = end_p - strlen(suffix_tbl[i]); >> 208 if (!strncmp(q, suffix_tbl[i], >> 209 strlen(suffix_tbl[i]))) >> 210 goto next; >> 211 } >> 212 ck_cmdline = p; >> 213 } else { >> 214 q = end_p - strlen(suffix); >> 215 if (!strncmp(q, suffix, strlen(suffix))) >> 216 ck_cmdline = p; >> 217 } >> 218 next: >> 219 p = strstr(p+1, name); >> 220 } 132 221 133 /* !! 222 if (!ck_cmdline) 134 * Only one CPU is allowed to execute !! 223 return NULL; 135 * panic(). Otherwise parallel calls << 136 * may stop each other. To exclude th << 137 */ << 138 old_cpu = PANIC_CPU_INVALID; << 139 this_cpu = raw_smp_processor_id(); << 140 224 141 if (atomic_try_cmpxchg(&panic_cpu, &ol !! 225 return ck_cmdline; 142 /* This is the 1st CPU which c << 143 __crash_kexec(regs); << 144 << 145 /* << 146 * Reset panic_cpu to allow an << 147 * call. << 148 */ << 149 atomic_set(&panic_cpu, PANIC_C << 150 } << 151 } 226 } 152 227 153 static inline resource_size_t crash_resource_s !! 228 static int __init __parse_crashkernel(char *cmdline, >> 229 unsigned long long system_ram, >> 230 unsigned long long *crash_size, >> 231 unsigned long long *crash_base, >> 232 const char *name, >> 233 const char *suffix) 154 { 234 { 155 return !res->end ? 0 : resource_size(r !! 235 char *first_colon, *first_space; 156 } !! 236 char *ck_cmdline; 157 237 >> 238 BUG_ON(!crash_size || !crash_base); >> 239 *crash_size = 0; >> 240 *crash_base = 0; 158 241 >> 242 ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 159 243 >> 244 if (!ck_cmdline) >> 245 return -EINVAL; 160 246 161 int crash_prepare_elf64_headers(struct crash_m !! 247 ck_cmdline += strlen(name); 162 void **addr, unsigne << 163 { << 164 Elf64_Ehdr *ehdr; << 165 Elf64_Phdr *phdr; << 166 unsigned long nr_cpus = num_possible_c << 167 unsigned char *buf; << 168 unsigned int cpu, i; << 169 unsigned long long notes_addr; << 170 unsigned long mstart, mend; << 171 << 172 /* extra phdr for vmcoreinfo ELF note << 173 nr_phdr = nr_cpus + 1; << 174 nr_phdr += mem->nr_ranges; << 175 248 >> 249 if (suffix) >> 250 return parse_crashkernel_suffix(ck_cmdline, crash_size, >> 251 suffix); 176 /* 252 /* 177 * kexec-tools creates an extra PT_LOA !! 253 * if the commandline contains a ':', then that's the extended 178 * area (for example, ffffffff80000000 !! 254 * syntax -- if not, it must be the classic syntax 179 * I think this is required by tools l << 180 * memory will be mapped in two ELF he << 181 * text virtual addresses and other wi << 182 */ 255 */ >> 256 first_colon = strchr(ck_cmdline, ':'); >> 257 first_space = strchr(ck_cmdline, ' '); >> 258 if (first_colon && (!first_space || first_colon < first_space)) >> 259 return parse_crashkernel_mem(ck_cmdline, system_ram, >> 260 crash_size, crash_base); 183 261 184 nr_phdr++; !! 262 return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); 185 elf_sz = sizeof(Elf64_Ehdr) + nr_phdr << 186 elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER << 187 << 188 buf = vzalloc(elf_sz); << 189 if (!buf) << 190 return -ENOMEM; << 191 << 192 ehdr = (Elf64_Ehdr *)buf; << 193 phdr = (Elf64_Phdr *)(ehdr + 1); << 194 memcpy(ehdr->e_ident, ELFMAG, SELFMAG) << 195 ehdr->e_ident[EI_CLASS] = ELFCLASS64; << 196 ehdr->e_ident[EI_DATA] = ELFDATA2LSB; << 197 ehdr->e_ident[EI_VERSION] = EV_CURRENT << 198 ehdr->e_ident[EI_OSABI] = ELF_OSABI; << 199 memset(ehdr->e_ident + EI_PAD, 0, EI_N << 200 ehdr->e_type = ET_CORE; << 201 ehdr->e_machine = ELF_ARCH; << 202 ehdr->e_version = EV_CURRENT; << 203 ehdr->e_phoff = sizeof(Elf64_Ehdr); << 204 ehdr->e_ehsize = sizeof(Elf64_Ehdr); << 205 ehdr->e_phentsize = sizeof(Elf64_Phdr) << 206 << 207 /* Prepare one phdr of type PT_NOTE fo << 208 for_each_possible_cpu(cpu) { << 209 phdr->p_type = PT_NOTE; << 210 notes_addr = per_cpu_ptr_to_ph << 211 phdr->p_offset = phdr->p_paddr << 212 phdr->p_filesz = phdr->p_memsz << 213 (ehdr->e_phnum)++; << 214 phdr++; << 215 } << 216 << 217 /* Prepare one PT_NOTE header for vmco << 218 phdr->p_type = PT_NOTE; << 219 phdr->p_offset = phdr->p_paddr = paddr << 220 phdr->p_filesz = phdr->p_memsz = VMCOR << 221 (ehdr->e_phnum)++; << 222 phdr++; << 223 << 224 /* Prepare PT_LOAD type program header << 225 if (need_kernel_map) { << 226 phdr->p_type = PT_LOAD; << 227 phdr->p_flags = PF_R|PF_W|PF_X << 228 phdr->p_vaddr = (unsigned long << 229 phdr->p_filesz = phdr->p_memsz << 230 phdr->p_offset = phdr->p_paddr << 231 ehdr->e_phnum++; << 232 phdr++; << 233 } << 234 << 235 /* Go through all the ranges in mem->r << 236 for (i = 0; i < mem->nr_ranges; i++) { << 237 mstart = mem->ranges[i].start; << 238 mend = mem->ranges[i].end; << 239 << 240 phdr->p_type = PT_LOAD; << 241 phdr->p_flags = PF_R|PF_W|PF_X << 242 phdr->p_offset = mstart; << 243 << 244 phdr->p_paddr = mstart; << 245 phdr->p_vaddr = (unsigned long << 246 phdr->p_filesz = phdr->p_memsz << 247 phdr->p_align = 0; << 248 ehdr->e_phnum++; << 249 #ifdef CONFIG_KEXEC_FILE << 250 kexec_dprintk("Crash PT_LOAD E << 251 phdr, phdr->p_va << 252 ehdr->e_phnum, p << 253 #endif << 254 phdr++; << 255 } << 256 << 257 *addr = buf; << 258 *sz = elf_sz; << 259 return 0; << 260 } 263 } 261 264 262 int crash_exclude_mem_range(struct crash_mem * !! 265 /* 263 unsigned long long !! 266 * That function is the entry point for command line parsing and should be >> 267 * called from the arch-specific code. >> 268 */ >> 269 int __init parse_crashkernel(char *cmdline, >> 270 unsigned long long system_ram, >> 271 unsigned long long *crash_size, >> 272 unsigned long long *crash_base) 264 { 273 { 265 int i; !! 274 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 266 unsigned long long start, end, p_start !! 275 "crashkernel=", NULL); 267 << 268 for (i = 0; i < mem->nr_ranges; i++) { << 269 start = mem->ranges[i].start; << 270 end = mem->ranges[i].end; << 271 p_start = mstart; << 272 p_end = mend; << 273 << 274 if (p_start > end) << 275 continue; << 276 << 277 /* << 278 * Because the memory ranges i << 279 * ascending order, when we de << 280 * immediately exit the for lo << 281 * ranges will definitely be o << 282 * for. << 283 */ << 284 if (p_end < start) << 285 break; << 286 << 287 /* Truncate any area outside o << 288 if (p_start < start) << 289 p_start = start; << 290 if (p_end > end) << 291 p_end = end; << 292 << 293 /* Found completely overlappin << 294 if (p_start == start && p_end << 295 memmove(&mem->ranges[i << 296 (mem->nr_range << 297 i--; << 298 mem->nr_ranges--; << 299 } else if (p_start > start && << 300 /* Split original rang << 301 if (mem->nr_ranges >= << 302 return -ENOMEM << 303 << 304 memmove(&mem->ranges[i << 305 (mem->nr_range << 306 << 307 mem->ranges[i].end = p << 308 mem->ranges[i + 1].sta << 309 mem->ranges[i + 1].end << 310 << 311 i++; << 312 mem->nr_ranges++; << 313 } else if (p_start != start) << 314 mem->ranges[i].end = p << 315 else << 316 mem->ranges[i].start = << 317 } << 318 << 319 return 0; << 320 } 276 } 321 277 322 ssize_t crash_get_memory_size(void) !! 278 int __init parse_crashkernel_high(char *cmdline, >> 279 unsigned long long system_ram, >> 280 unsigned long long *crash_size, >> 281 unsigned long long *crash_base) 323 { 282 { 324 ssize_t size = 0; !! 283 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 325 !! 284 "crashkernel=", suffix_tbl[SUFFIX_HIGH]); 326 if (!kexec_trylock()) << 327 return -EBUSY; << 328 << 329 size += crash_resource_size(&crashk_re << 330 size += crash_resource_size(&crashk_lo << 331 << 332 kexec_unlock(); << 333 return size; << 334 } 285 } 335 286 336 static int __crash_shrink_memory(struct resour !! 287 int __init parse_crashkernel_low(char *cmdline, 337 unsigned long !! 288 unsigned long long system_ram, >> 289 unsigned long long *crash_size, >> 290 unsigned long long *crash_base) 338 { 291 { 339 struct resource *ram_res; !! 292 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 340 !! 293 "crashkernel=", suffix_tbl[SUFFIX_LOW]); 341 ram_res = kzalloc(sizeof(*ram_res), GF << 342 if (!ram_res) << 343 return -ENOMEM; << 344 << 345 ram_res->start = old_res->start + new_ << 346 ram_res->end = old_res->end; << 347 ram_res->flags = IORESOURCE_BUSY | IOR << 348 ram_res->name = "System RAM"; << 349 << 350 if (!new_size) { << 351 release_resource(old_res); << 352 old_res->start = 0; << 353 old_res->end = 0; << 354 } else { << 355 crashk_res.end = ram_res->star << 356 } << 357 << 358 crash_free_reserved_phys_range(ram_res << 359 insert_resource(&iomem_resource, ram_r << 360 << 361 return 0; << 362 } 294 } 363 295 364 int crash_shrink_memory(unsigned long new_size !! 296 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, >> 297 void *data, size_t data_len) 365 { 298 { 366 int ret = 0; !! 299 struct elf_note *note = (struct elf_note *)buf; 367 unsigned long old_size, low_size; << 368 << 369 if (!kexec_trylock()) << 370 return -EBUSY; << 371 << 372 if (kexec_crash_image) { << 373 ret = -ENOENT; << 374 goto unlock; << 375 } << 376 << 377 low_size = crash_resource_size(&crashk << 378 old_size = crash_resource_size(&crashk << 379 new_size = roundup(new_size, KEXEC_CRA << 380 if (new_size >= old_size) { << 381 ret = (new_size == old_size) ? << 382 goto unlock; << 383 } << 384 << 385 /* << 386 * (low_size > new_size) implies that << 387 * This also means that if low_size is << 388 * << 389 * If low_size is greater than 0, (low << 390 * crashk_low_res also needs to be shr << 391 * needs to be shrunken. << 392 */ << 393 if (low_size > new_size) { << 394 ret = __crash_shrink_memory(&c << 395 if (ret) << 396 goto unlock; << 397 << 398 ret = __crash_shrink_memory(&c << 399 } else { << 400 ret = __crash_shrink_memory(&c << 401 } << 402 300 403 /* Swap crashk_res and crashk_low_res !! 301 note->n_namesz = strlen(name) + 1; 404 if (!crashk_res.end && crashk_low_res. !! 302 note->n_descsz = data_len; 405 crashk_res.start = crashk_low_ !! 303 note->n_type = type; 406 crashk_res.end = crashk_low_ !! 304 buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word)); 407 release_resource(&crashk_low_r !! 305 memcpy(buf, name, note->n_namesz); 408 crashk_low_res.start = 0; !! 306 buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word)); 409 crashk_low_res.end = 0; !! 307 memcpy(buf, data, data_len); 410 insert_resource(&iomem_resourc !! 308 buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word)); 411 } << 412 309 413 unlock: !! 310 return buf; 414 kexec_unlock(); << 415 return ret; << 416 } 311 } 417 312 418 void crash_save_cpu(struct pt_regs *regs, int !! 313 void final_note(Elf_Word *buf) 419 { 314 { 420 struct elf_prstatus prstatus; !! 315 memset(buf, 0, sizeof(struct elf_note)); 421 u32 *buf; !! 316 } 422 317 423 if ((cpu < 0) || (cpu >= nr_cpu_ids)) !! 318 static void update_vmcoreinfo_note(void) 424 return; !! 319 { >> 320 u32 *buf = vmcoreinfo_note; 425 321 426 /* Using ELF notes here is opportunist !! 322 if (!vmcoreinfo_size) 427 * I need a well defined structure for << 428 * for the data I pass, and I need tag << 429 * on the data to indicate what inform << 430 * squirrelled away. ELF notes happen << 431 * all of that, so there is no need to << 432 */ << 433 buf = (u32 *)per_cpu_ptr(crash_notes, << 434 if (!buf) << 435 return; 323 return; 436 memset(&prstatus, 0, sizeof(prstatus)) !! 324 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 437 prstatus.common.pr_pid = current->pid; !! 325 vmcoreinfo_size); 438 elf_core_copy_regs(&prstatus.pr_reg, r << 439 buf = append_elf_note(buf, KEXEC_CORE_ << 440 &prstatus, sizeo << 441 final_note(buf); 326 final_note(buf); 442 } 327 } 443 328 444 !! 329 void crash_update_vmcoreinfo_safecopy(void *ptr) 445 << 446 static int __init crash_notes_memory_init(void << 447 { 330 { 448 /* Allocate memory for saving cpu regi !! 331 if (ptr) 449 size_t size, align; !! 332 memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size); 450 << 451 /* << 452 * crash_notes could be allocated acro << 453 * is vmalloc based . vmalloc doesn't << 454 * pages are also on 2 continuous phys << 455 * 2nd part of crash_notes in 2nd page << 456 * starting address and size of crash_ << 457 * Here round up the size of crash_not << 458 * and pass it to __alloc_percpu as al << 459 * crash_notes is allocated inside one << 460 */ << 461 size = sizeof(note_buf_t); << 462 align = min(roundup_pow_of_two(sizeof( << 463 333 464 /* !! 334 vmcoreinfo_data_safecopy = ptr; 465 * Break compile if size is bigger tha << 466 * definitely will be in 2 pages with << 467 */ << 468 BUILD_BUG_ON(size > PAGE_SIZE); << 469 << 470 crash_notes = __alloc_percpu(size, ali << 471 if (!crash_notes) { << 472 pr_warn("Memory allocation for << 473 return -ENOMEM; << 474 } << 475 return 0; << 476 } 335 } 477 subsys_initcall(crash_notes_memory_init); << 478 << 479 #endif /*CONFIG_CRASH_DUMP*/ << 480 336 481 #ifdef CONFIG_CRASH_HOTPLUG !! 337 void crash_save_vmcoreinfo(void) 482 #undef pr_fmt << 483 #define pr_fmt(fmt) "crash hp: " fmt << 484 << 485 /* << 486 * Different than kexec/kdump loading/unloadin << 487 * usually rarely happen, there will be many c << 488 * during one short period, e.g one memory boa << 489 * regions are online. So mutex lock __crash_ << 490 * serialize the crash hotplug handling specif << 491 */ << 492 static DEFINE_MUTEX(__crash_hotplug_lock); << 493 #define crash_hotplug_lock() mutex_lock(&__cra << 494 #define crash_hotplug_unlock() mutex_unlock(&_ << 495 << 496 /* << 497 * This routine utilized when the crash_hotplu << 498 * It reflects the kernel's ability/permission << 499 * image directly. << 500 */ << 501 int crash_check_hotplug_support(void) << 502 { 338 { 503 int rc = 0; !! 339 if (!vmcoreinfo_note) >> 340 return; 504 341 505 crash_hotplug_lock(); !! 342 /* Use the safe copy to generate vmcoreinfo note if have */ 506 /* Obtain lock while reading crash inf !! 343 if (vmcoreinfo_data_safecopy) 507 if (!kexec_trylock()) { !! 344 vmcoreinfo_data = vmcoreinfo_data_safecopy; 508 pr_info("kexec_trylock() faile << 509 crash_hotplug_unlock(); << 510 return 0; << 511 } << 512 if (kexec_crash_image) { << 513 rc = kexec_crash_image->hotplu << 514 } << 515 /* Release lock now that update comple << 516 kexec_unlock(); << 517 crash_hotplug_unlock(); << 518 345 519 return rc; !! 346 vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); >> 347 update_vmcoreinfo_note(); 520 } 348 } 521 349 522 /* !! 350 void vmcoreinfo_append_str(const char *fmt, ...) 523 * To accurately reflect hot un/plug changes o << 524 * (including onling and offlining of those re << 525 * kexec segments must be updated with latest << 526 * << 527 * Architectures must ensure two things for al << 528 * updating during hotplug events: << 529 * << 530 * 1. Segments must be large enough to accommo << 531 * resources. << 532 * 2. Exclude the segments from SHA verificati << 533 * << 534 * For example, on most architectures, the elf << 535 * to the crash kernel via the elfcorehdr= par << 536 * new list of CPUs and memory. To make change << 537 * should be large enough to permit a growing << 538 * resources. One can estimate the elfcorehdr << 539 * NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES << 540 * excluded from SHA verification by default i << 541 * supports crash hotplug. << 542 */ << 543 static void crash_handle_hotplug_event(unsigne << 544 { 351 { 545 struct kimage *image; !! 352 va_list args; 546 !! 353 char buf[0x50]; 547 crash_hotplug_lock(); !! 354 size_t r; 548 /* Obtain lock while changing crash in << 549 if (!kexec_trylock()) { << 550 pr_info("kexec_trylock() faile << 551 crash_hotplug_unlock(); << 552 return; << 553 } << 554 << 555 /* Check kdump is not loaded */ << 556 if (!kexec_crash_image) << 557 goto out; << 558 << 559 image = kexec_crash_image; << 560 << 561 /* Check that kexec segments update is << 562 if (!image->hotplug_support) << 563 goto out; << 564 << 565 if (hp_action == KEXEC_CRASH_HP_ADD_CP << 566 hp_action == KEXEC_CRASH_HP_RE << 567 pr_debug("hp_action %u, cpu %u << 568 else << 569 pr_debug("hp_action %u\n", hp_ << 570 << 571 /* << 572 * The elfcorehdr_index is set to -1 w << 573 * is allocated. Find the segment cont << 574 * if not already found. << 575 */ << 576 if (image->elfcorehdr_index < 0) { << 577 unsigned long mem; << 578 unsigned char *ptr; << 579 unsigned int n; << 580 << 581 for (n = 0; n < image->nr_segm << 582 mem = image->segment[n << 583 ptr = kmap_local_page( << 584 if (ptr) { << 585 /* The segment << 586 if (memcmp(ptr << 587 image- << 588 kunmap_local(p << 589 } << 590 } << 591 } << 592 355 593 if (image->elfcorehdr_index < 0) { !! 356 va_start(args, fmt); 594 pr_err("unable to locate elfco !! 357 r = vscnprintf(buf, sizeof(buf), fmt, args); 595 goto out; !! 358 va_end(args); 596 } << 597 << 598 /* Needed in order for the segments to << 599 arch_kexec_unprotect_crashkres(); << 600 359 601 /* Differentiate between normal load a !! 360 r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size); 602 image->hp_action = hp_action; << 603 361 604 /* Now invoke arch-specific update han !! 362 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 605 arch_crash_handle_hotplug_event(image, << 606 363 607 /* No longer handling a hotplug event !! 364 vmcoreinfo_size += r; 608 image->hp_action = KEXEC_CRASH_HP_NONE !! 365 } 609 image->elfcorehdr_updated = true; << 610 366 611 /* Change back to read-only */ !! 367 /* 612 arch_kexec_protect_crashkres(); !! 368 * provide an empty default implementation here -- architecture >> 369 * code may override this >> 370 */ >> 371 void __weak arch_crash_save_vmcoreinfo(void) >> 372 {} 613 373 614 /* Errors in the callback is not a rea !! 374 phys_addr_t __weak paddr_vmcoreinfo_note(void) 615 out: !! 375 { 616 /* Release lock now that update comple !! 376 return __pa(vmcoreinfo_note); 617 kexec_unlock(); << 618 crash_hotplug_unlock(); << 619 } 377 } 620 378 621 static int crash_memhp_notifier(struct notifie !! 379 static int __init crash_save_vmcoreinfo_init(void) 622 { 380 { 623 switch (val) { !! 381 vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); 624 case MEM_ONLINE: !! 382 if (!vmcoreinfo_data) { 625 crash_handle_hotplug_event(KEX !! 383 pr_warn("Memory allocation for vmcoreinfo_data failed\n"); 626 KEXEC_CRASH_HP_INVALID !! 384 return -ENOMEM; 627 break; << 628 << 629 case MEM_OFFLINE: << 630 crash_handle_hotplug_event(KEX << 631 KEXEC_CRASH_HP_INVALID << 632 break; << 633 } 385 } 634 return NOTIFY_OK; << 635 } << 636 << 637 static struct notifier_block crash_memhp_nb = << 638 .notifier_call = crash_memhp_notifier, << 639 .priority = 0 << 640 }; << 641 386 642 static int crash_cpuhp_online(unsigned int cpu !! 387 vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE, 643 { !! 388 GFP_KERNEL | __GFP_ZERO); 644 crash_handle_hotplug_event(KEXEC_CRASH !! 389 if (!vmcoreinfo_note) { 645 return 0; !! 390 free_page((unsigned long)vmcoreinfo_data); 646 } !! 391 vmcoreinfo_data = NULL; >> 392 pr_warn("Memory allocation for vmcoreinfo_note failed\n"); >> 393 return -ENOMEM; >> 394 } 647 395 648 static int crash_cpuhp_offline(unsigned int cp !! 396 VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 649 { !! 397 VMCOREINFO_PAGESIZE(PAGE_SIZE); 650 crash_handle_hotplug_event(KEXEC_CRASH << 651 return 0; << 652 } << 653 398 654 static int __init crash_hotplug_init(void) !! 399 VMCOREINFO_SYMBOL(init_uts_ns); 655 { !! 400 VMCOREINFO_SYMBOL(node_online_map); 656 int result = 0; !! 401 #ifdef CONFIG_MMU >> 402 VMCOREINFO_SYMBOL(swapper_pg_dir); >> 403 #endif >> 404 VMCOREINFO_SYMBOL(_stext); >> 405 VMCOREINFO_SYMBOL(vmap_area_list); 657 406 658 if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) !! 407 #ifndef CONFIG_NEED_MULTIPLE_NODES 659 register_memory_notifier(&cras !! 408 VMCOREINFO_SYMBOL(mem_map); >> 409 VMCOREINFO_SYMBOL(contig_page_data); >> 410 #endif >> 411 #ifdef CONFIG_SPARSEMEM >> 412 VMCOREINFO_SYMBOL(mem_section); >> 413 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); >> 414 VMCOREINFO_STRUCT_SIZE(mem_section); >> 415 VMCOREINFO_OFFSET(mem_section, section_mem_map); >> 416 #endif >> 417 VMCOREINFO_STRUCT_SIZE(page); >> 418 VMCOREINFO_STRUCT_SIZE(pglist_data); >> 419 VMCOREINFO_STRUCT_SIZE(zone); >> 420 VMCOREINFO_STRUCT_SIZE(free_area); >> 421 VMCOREINFO_STRUCT_SIZE(list_head); >> 422 VMCOREINFO_SIZE(nodemask_t); >> 423 VMCOREINFO_OFFSET(page, flags); >> 424 VMCOREINFO_OFFSET(page, _refcount); >> 425 VMCOREINFO_OFFSET(page, mapping); >> 426 VMCOREINFO_OFFSET(page, lru); >> 427 VMCOREINFO_OFFSET(page, _mapcount); >> 428 VMCOREINFO_OFFSET(page, private); >> 429 VMCOREINFO_OFFSET(page, compound_dtor); >> 430 VMCOREINFO_OFFSET(page, compound_order); >> 431 VMCOREINFO_OFFSET(page, compound_head); >> 432 VMCOREINFO_OFFSET(pglist_data, node_zones); >> 433 VMCOREINFO_OFFSET(pglist_data, nr_zones); >> 434 #ifdef CONFIG_FLAT_NODE_MEM_MAP >> 435 VMCOREINFO_OFFSET(pglist_data, node_mem_map); >> 436 #endif >> 437 VMCOREINFO_OFFSET(pglist_data, node_start_pfn); >> 438 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); >> 439 VMCOREINFO_OFFSET(pglist_data, node_id); >> 440 VMCOREINFO_OFFSET(zone, free_area); >> 441 VMCOREINFO_OFFSET(zone, vm_stat); >> 442 VMCOREINFO_OFFSET(zone, spanned_pages); >> 443 VMCOREINFO_OFFSET(free_area, free_list); >> 444 VMCOREINFO_OFFSET(list_head, next); >> 445 VMCOREINFO_OFFSET(list_head, prev); >> 446 VMCOREINFO_OFFSET(vmap_area, va_start); >> 447 VMCOREINFO_OFFSET(vmap_area, list); >> 448 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); >> 449 log_buf_vmcoreinfo_setup(); >> 450 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); >> 451 VMCOREINFO_NUMBER(NR_FREE_PAGES); >> 452 VMCOREINFO_NUMBER(PG_lru); >> 453 VMCOREINFO_NUMBER(PG_private); >> 454 VMCOREINFO_NUMBER(PG_swapcache); >> 455 VMCOREINFO_NUMBER(PG_slab); >> 456 #ifdef CONFIG_MEMORY_FAILURE >> 457 VMCOREINFO_NUMBER(PG_hwpoison); >> 458 #endif >> 459 VMCOREINFO_NUMBER(PG_head_mask); >> 460 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); >> 461 #ifdef CONFIG_HUGETLB_PAGE >> 462 VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR); >> 463 #endif 660 464 661 if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { !! 465 arch_crash_save_vmcoreinfo(); 662 result = cpuhp_setup_state_noc !! 466 update_vmcoreinfo_note(); 663 "crash/cpuhp", crash_c << 664 } << 665 467 666 return result; !! 468 return 0; 667 } 469 } 668 470 669 subsys_initcall(crash_hotplug_init); !! 471 subsys_initcall(crash_save_vmcoreinfo_init); 670 #endif << 671 472
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.