1 // SPDX-License-Identifier: GPL-2.0-only 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 2 /* 3 * crash.c - kernel crash support code. 3 * crash.c - kernel crash support code. 4 * Copyright (C) 2002-2004 Eric Biederman <eb 4 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 5 */ 5 */ 6 6 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt << 8 << 9 #include <linux/buildid.h> 7 #include <linux/buildid.h> 10 #include <linux/init.h> !! 8 #include <linux/crash_core.h> 11 #include <linux/utsname.h> 9 #include <linux/utsname.h> 12 #include <linux/vmalloc.h> 10 #include <linux/vmalloc.h> 13 #include <linux/sizes.h> << 14 #include <linux/kexec.h> << 15 #include <linux/memory.h> << 16 #include <linux/mm.h> << 17 #include <linux/cpuhotplug.h> << 18 #include <linux/memblock.h> << 19 #include <linux/kmemleak.h> << 20 #include <linux/crash_core.h> << 21 #include <linux/reboot.h> << 22 #include <linux/btf.h> << 23 #include <linux/objtool.h> << 24 11 25 #include <asm/page.h> 12 #include <asm/page.h> 26 #include <asm/sections.h> 13 #include <asm/sections.h> 27 14 28 #include <crypto/sha1.h> 15 #include <crypto/sha1.h> 29 16 30 #include "kallsyms_internal.h" !! 17 /* vmcoreinfo stuff */ 31 #include "kexec_internal.h" !! 18 unsigned char *vmcoreinfo_data; >> 19 size_t vmcoreinfo_size; >> 20 u32 *vmcoreinfo_note; 32 21 33 /* Per cpu memory for storing cpu states in ca !! 22 /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ 34 note_buf_t __percpu *crash_notes; !! 23 static unsigned char *vmcoreinfo_data_safecopy; 35 24 36 #ifdef CONFIG_CRASH_DUMP !! 25 /* >> 26 * parsing the "crashkernel" commandline >> 27 * >> 28 * this code is intended to be called from architecture specific code >> 29 */ 37 30 38 int kimage_crash_copy_vmcoreinfo(struct kimage << 39 { << 40 struct page *vmcoreinfo_page; << 41 void *safecopy; << 42 31 43 if (!IS_ENABLED(CONFIG_CRASH_DUMP)) !! 32 /* 44 return 0; !! 33 * This function parses command lines in the format 45 if (image->type != KEXEC_TYPE_CRASH) !! 34 * 46 return 0; !! 35 * crashkernel=ramsize-range:size[,...][@offset] >> 36 * >> 37 * The function returns 0 on success and -EINVAL on failure. >> 38 */ >> 39 static int __init parse_crashkernel_mem(char *cmdline, >> 40 unsigned long long system_ram, >> 41 unsigned long long *crash_size, >> 42 unsigned long long *crash_base) >> 43 { >> 44 char *cur = cmdline, *tmp; >> 45 >> 46 /* for each entry of the comma-separated list */ >> 47 do { >> 48 unsigned long long start, end = ULLONG_MAX, size; >> 49 >> 50 /* get the start of the range */ >> 51 start = memparse(cur, &tmp); >> 52 if (cur == tmp) { >> 53 pr_warn("crashkernel: Memory value expected\n"); >> 54 return -EINVAL; >> 55 } >> 56 cur = tmp; >> 57 if (*cur != '-') { >> 58 pr_warn("crashkernel: '-' expected\n"); >> 59 return -EINVAL; >> 60 } >> 61 cur++; 47 62 48 /* !! 63 /* if no ':' is here, than we read the end */ 49 * For kdump, allocate one vmcoreinfo !! 64 if (*cur != ':') { 50 * crash memory. as we have arch_kexec !! 65 end = memparse(cur, &tmp); 51 * after kexec syscall, we naturally p !! 66 if (cur == tmp) { 52 * (even read) access under kernel dir !! 67 pr_warn("crashkernel: Memory value expected\n"); 53 * the other hand, we still need to op !! 68 return -EINVAL; 54 * happens to generate vmcoreinfo note !! 69 } 55 * vmap for this purpose. !! 70 cur = tmp; 56 */ !! 71 if (end <= start) { 57 vmcoreinfo_page = kimage_alloc_control !! 72 pr_warn("crashkernel: end <= start\n"); 58 if (!vmcoreinfo_page) { !! 73 return -EINVAL; 59 pr_warn("Could not allocate vm !! 74 } 60 return -ENOMEM; !! 75 } 61 } << 62 safecopy = vmap(&vmcoreinfo_page, 1, V << 63 if (!safecopy) { << 64 pr_warn("Could not vmap vmcore << 65 return -ENOMEM; << 66 } << 67 76 68 image->vmcoreinfo_data_copy = safecopy !! 77 if (*cur != ':') { 69 crash_update_vmcoreinfo_safecopy(safec !! 78 pr_warn("crashkernel: ':' expected\n"); >> 79 return -EINVAL; >> 80 } >> 81 cur++; 70 82 71 return 0; !! 83 size = memparse(cur, &tmp); 72 } !! 84 if (cur == tmp) { >> 85 pr_warn("Memory value expected\n"); >> 86 return -EINVAL; >> 87 } >> 88 cur = tmp; >> 89 if (size >= system_ram) { >> 90 pr_warn("crashkernel: invalid size\n"); >> 91 return -EINVAL; >> 92 } 73 93 >> 94 /* match ? */ >> 95 if (system_ram >= start && system_ram < end) { >> 96 *crash_size = size; >> 97 break; >> 98 } >> 99 } while (*cur++ == ','); 74 100 >> 101 if (*crash_size > 0) { >> 102 while (*cur && *cur != ' ' && *cur != '@') >> 103 cur++; >> 104 if (*cur == '@') { >> 105 cur++; >> 106 *crash_base = memparse(cur, &tmp); >> 107 if (cur == tmp) { >> 108 pr_warn("Memory value expected after '@'\n"); >> 109 return -EINVAL; >> 110 } >> 111 } >> 112 } else >> 113 pr_info("crashkernel size resulted in zero bytes\n"); 75 114 76 int kexec_should_crash(struct task_struct *p) << 77 { << 78 /* << 79 * If crash_kexec_post_notifiers is en << 80 * crash_kexec() here yet, which must << 81 * notifiers in panic(). << 82 */ << 83 if (crash_kexec_post_notifiers) << 84 return 0; << 85 /* << 86 * There are 4 panic() calls in make_t << 87 * corresponds to each of these 4 cond << 88 */ << 89 if (in_interrupt() || !p->pid || is_gl << 90 return 1; << 91 return 0; 115 return 0; 92 } 116 } 93 117 94 int kexec_crash_loaded(void) << 95 { << 96 return !!kexec_crash_image; << 97 } << 98 EXPORT_SYMBOL_GPL(kexec_crash_loaded); << 99 << 100 /* 118 /* 101 * No panic_cpu check version of crash_kexec() !! 119 * That function parses "simple" (old) crashkernel command lines like 102 * only when panic_cpu holds the current CPU n !! 120 * 103 * which processes crash_kexec routines. !! 121 * crashkernel=size[@offset] >> 122 * >> 123 * It returns 0 on success and -EINVAL on failure. 104 */ 124 */ 105 void __noclone __crash_kexec(struct pt_regs *r !! 125 static int __init parse_crashkernel_simple(char *cmdline, 106 { !! 126 unsigned long long *crash_size, 107 /* Take the kexec_lock here to prevent !! 127 unsigned long long *crash_base) 108 * running on one cpu from replacing t !! 128 { 109 * we are using after a panic on a dif !! 129 char *cur = cmdline; 110 * !! 130 111 * If the crash kernel was not located !! 131 *crash_size = memparse(cmdline, &cur); 112 * of memory the xchg(&kexec_crash_ima !! 132 if (cmdline == cur) { 113 * sufficient. But since I reuse the !! 133 pr_warn("crashkernel: memory value expected\n"); 114 */ !! 134 return -EINVAL; 115 if (kexec_trylock()) { !! 135 } 116 if (kexec_crash_image) { !! 136 117 struct pt_regs fixed_r !! 137 if (*cur == '@') 118 !! 138 *crash_base = memparse(cur+1, &cur); 119 crash_setup_regs(&fixe !! 139 else if (*cur != ' ' && *cur != '\0') { 120 crash_save_vmcoreinfo( !! 140 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 121 machine_crash_shutdown !! 141 return -EINVAL; 122 machine_kexec(kexec_cr << 123 } << 124 kexec_unlock(); << 125 } 142 } 126 } << 127 STACK_FRAME_NON_STANDARD(__crash_kexec); << 128 143 129 __bpf_kfunc void crash_kexec(struct pt_regs *r !! 144 return 0; 130 { !! 145 } 131 int old_cpu, this_cpu; << 132 146 133 /* !! 147 #define SUFFIX_HIGH 0 134 * Only one CPU is allowed to execute !! 148 #define SUFFIX_LOW 1 135 * panic(). Otherwise parallel calls !! 149 #define SUFFIX_NULL 2 136 * may stop each other. To exclude th !! 150 static __initdata char *suffix_tbl[] = { 137 */ !! 151 [SUFFIX_HIGH] = ",high", 138 old_cpu = PANIC_CPU_INVALID; !! 152 [SUFFIX_LOW] = ",low", 139 this_cpu = raw_smp_processor_id(); !! 153 [SUFFIX_NULL] = NULL, >> 154 }; 140 155 141 if (atomic_try_cmpxchg(&panic_cpu, &ol !! 156 /* 142 /* This is the 1st CPU which c !! 157 * That function parses "suffix" crashkernel command lines like 143 __crash_kexec(regs); !! 158 * 144 !! 159 * crashkernel=size,[high|low] 145 /* !! 160 * 146 * Reset panic_cpu to allow an !! 161 * It returns 0 on success and -EINVAL on failure. 147 * call. !! 162 */ 148 */ !! 163 static int __init parse_crashkernel_suffix(char *cmdline, 149 atomic_set(&panic_cpu, PANIC_C !! 164 unsigned long long *crash_size, >> 165 const char *suffix) >> 166 { >> 167 char *cur = cmdline; >> 168 >> 169 *crash_size = memparse(cmdline, &cur); >> 170 if (cmdline == cur) { >> 171 pr_warn("crashkernel: memory value expected\n"); >> 172 return -EINVAL; >> 173 } >> 174 >> 175 /* check with suffix */ >> 176 if (strncmp(cur, suffix, strlen(suffix))) { >> 177 pr_warn("crashkernel: unrecognized char: %c\n", *cur); >> 178 return -EINVAL; >> 179 } >> 180 cur += strlen(suffix); >> 181 if (*cur != ' ' && *cur != '\0') { >> 182 pr_warn("crashkernel: unrecognized char: %c\n", *cur); >> 183 return -EINVAL; 150 } 184 } 151 } << 152 185 153 static inline resource_size_t crash_resource_s !! 186 return 0; 154 { << 155 return !res->end ? 0 : resource_size(r << 156 } 187 } 157 188 >> 189 static __init char *get_last_crashkernel(char *cmdline, >> 190 const char *name, >> 191 const char *suffix) >> 192 { >> 193 char *p = cmdline, *ck_cmdline = NULL; >> 194 >> 195 /* find crashkernel and use the last one if there are more */ >> 196 p = strstr(p, name); >> 197 while (p) { >> 198 char *end_p = strchr(p, ' '); >> 199 char *q; >> 200 >> 201 if (!end_p) >> 202 end_p = p + strlen(p); >> 203 >> 204 if (!suffix) { >> 205 int i; >> 206 >> 207 /* skip the one with any known suffix */ >> 208 for (i = 0; suffix_tbl[i]; i++) { >> 209 q = end_p - strlen(suffix_tbl[i]); >> 210 if (!strncmp(q, suffix_tbl[i], >> 211 strlen(suffix_tbl[i]))) >> 212 goto next; >> 213 } >> 214 ck_cmdline = p; >> 215 } else { >> 216 q = end_p - strlen(suffix); >> 217 if (!strncmp(q, suffix, strlen(suffix))) >> 218 ck_cmdline = p; >> 219 } >> 220 next: >> 221 p = strstr(p+1, name); >> 222 } 158 223 >> 224 if (!ck_cmdline) >> 225 return NULL; 159 226 >> 227 return ck_cmdline; >> 228 } 160 229 161 int crash_prepare_elf64_headers(struct crash_m !! 230 static int __init __parse_crashkernel(char *cmdline, 162 void **addr, unsigne !! 231 unsigned long long system_ram, >> 232 unsigned long long *crash_size, >> 233 unsigned long long *crash_base, >> 234 const char *name, >> 235 const char *suffix) 163 { 236 { 164 Elf64_Ehdr *ehdr; !! 237 char *first_colon, *first_space; 165 Elf64_Phdr *phdr; !! 238 char *ck_cmdline; 166 unsigned long nr_cpus = num_possible_c << 167 unsigned char *buf; << 168 unsigned int cpu, i; << 169 unsigned long long notes_addr; << 170 unsigned long mstart, mend; << 171 239 172 /* extra phdr for vmcoreinfo ELF note !! 240 BUG_ON(!crash_size || !crash_base); 173 nr_phdr = nr_cpus + 1; !! 241 *crash_size = 0; 174 nr_phdr += mem->nr_ranges; !! 242 *crash_base = 0; 175 243 176 /* !! 244 ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 177 * kexec-tools creates an extra PT_LOA << 178 * area (for example, ffffffff80000000 << 179 * I think this is required by tools l << 180 * memory will be mapped in two ELF he << 181 * text virtual addresses and other wi << 182 */ << 183 245 184 nr_phdr++; !! 246 if (!ck_cmdline) 185 elf_sz = sizeof(Elf64_Ehdr) + nr_phdr !! 247 return -EINVAL; 186 elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER << 187 248 188 buf = vzalloc(elf_sz); !! 249 ck_cmdline += strlen(name); 189 if (!buf) << 190 return -ENOMEM; << 191 250 192 ehdr = (Elf64_Ehdr *)buf; !! 251 if (suffix) 193 phdr = (Elf64_Phdr *)(ehdr + 1); !! 252 return parse_crashkernel_suffix(ck_cmdline, crash_size, 194 memcpy(ehdr->e_ident, ELFMAG, SELFMAG) !! 253 suffix); 195 ehdr->e_ident[EI_CLASS] = ELFCLASS64; !! 254 /* 196 ehdr->e_ident[EI_DATA] = ELFDATA2LSB; !! 255 * if the commandline contains a ':', then that's the extended 197 ehdr->e_ident[EI_VERSION] = EV_CURRENT !! 256 * syntax -- if not, it must be the classic syntax 198 ehdr->e_ident[EI_OSABI] = ELF_OSABI; !! 257 */ 199 memset(ehdr->e_ident + EI_PAD, 0, EI_N !! 258 first_colon = strchr(ck_cmdline, ':'); 200 ehdr->e_type = ET_CORE; !! 259 first_space = strchr(ck_cmdline, ' '); 201 ehdr->e_machine = ELF_ARCH; !! 260 if (first_colon && (!first_space || first_colon < first_space)) 202 ehdr->e_version = EV_CURRENT; !! 261 return parse_crashkernel_mem(ck_cmdline, system_ram, 203 ehdr->e_phoff = sizeof(Elf64_Ehdr); !! 262 crash_size, crash_base); 204 ehdr->e_ehsize = sizeof(Elf64_Ehdr); << 205 ehdr->e_phentsize = sizeof(Elf64_Phdr) << 206 << 207 /* Prepare one phdr of type PT_NOTE fo << 208 for_each_possible_cpu(cpu) { << 209 phdr->p_type = PT_NOTE; << 210 notes_addr = per_cpu_ptr_to_ph << 211 phdr->p_offset = phdr->p_paddr << 212 phdr->p_filesz = phdr->p_memsz << 213 (ehdr->e_phnum)++; << 214 phdr++; << 215 } << 216 << 217 /* Prepare one PT_NOTE header for vmco << 218 phdr->p_type = PT_NOTE; << 219 phdr->p_offset = phdr->p_paddr = paddr << 220 phdr->p_filesz = phdr->p_memsz = VMCOR << 221 (ehdr->e_phnum)++; << 222 phdr++; << 223 << 224 /* Prepare PT_LOAD type program header << 225 if (need_kernel_map) { << 226 phdr->p_type = PT_LOAD; << 227 phdr->p_flags = PF_R|PF_W|PF_X << 228 phdr->p_vaddr = (unsigned long << 229 phdr->p_filesz = phdr->p_memsz << 230 phdr->p_offset = phdr->p_paddr << 231 ehdr->e_phnum++; << 232 phdr++; << 233 } << 234 << 235 /* Go through all the ranges in mem->r << 236 for (i = 0; i < mem->nr_ranges; i++) { << 237 mstart = mem->ranges[i].start; << 238 mend = mem->ranges[i].end; << 239 << 240 phdr->p_type = PT_LOAD; << 241 phdr->p_flags = PF_R|PF_W|PF_X << 242 phdr->p_offset = mstart; << 243 << 244 phdr->p_paddr = mstart; << 245 phdr->p_vaddr = (unsigned long << 246 phdr->p_filesz = phdr->p_memsz << 247 phdr->p_align = 0; << 248 ehdr->e_phnum++; << 249 #ifdef CONFIG_KEXEC_FILE << 250 kexec_dprintk("Crash PT_LOAD E << 251 phdr, phdr->p_va << 252 ehdr->e_phnum, p << 253 #endif << 254 phdr++; << 255 } << 256 263 257 *addr = buf; !! 264 return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); 258 *sz = elf_sz; << 259 return 0; << 260 } 265 } 261 266 262 int crash_exclude_mem_range(struct crash_mem * !! 267 /* 263 unsigned long long !! 268 * That function is the entry point for command line parsing and should be >> 269 * called from the arch-specific code. >> 270 */ >> 271 int __init parse_crashkernel(char *cmdline, >> 272 unsigned long long system_ram, >> 273 unsigned long long *crash_size, >> 274 unsigned long long *crash_base) 264 { 275 { 265 int i; !! 276 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 266 unsigned long long start, end, p_start !! 277 "crashkernel=", NULL); 267 << 268 for (i = 0; i < mem->nr_ranges; i++) { << 269 start = mem->ranges[i].start; << 270 end = mem->ranges[i].end; << 271 p_start = mstart; << 272 p_end = mend; << 273 << 274 if (p_start > end) << 275 continue; << 276 << 277 /* << 278 * Because the memory ranges i << 279 * ascending order, when we de << 280 * immediately exit the for lo << 281 * ranges will definitely be o << 282 * for. << 283 */ << 284 if (p_end < start) << 285 break; << 286 << 287 /* Truncate any area outside o << 288 if (p_start < start) << 289 p_start = start; << 290 if (p_end > end) << 291 p_end = end; << 292 << 293 /* Found completely overlappin << 294 if (p_start == start && p_end << 295 memmove(&mem->ranges[i << 296 (mem->nr_range << 297 i--; << 298 mem->nr_ranges--; << 299 } else if (p_start > start && << 300 /* Split original rang << 301 if (mem->nr_ranges >= << 302 return -ENOMEM << 303 << 304 memmove(&mem->ranges[i << 305 (mem->nr_range << 306 << 307 mem->ranges[i].end = p << 308 mem->ranges[i + 1].sta << 309 mem->ranges[i + 1].end << 310 << 311 i++; << 312 mem->nr_ranges++; << 313 } else if (p_start != start) << 314 mem->ranges[i].end = p << 315 else << 316 mem->ranges[i].start = << 317 } << 318 << 319 return 0; << 320 } 278 } 321 279 322 ssize_t crash_get_memory_size(void) !! 280 int __init parse_crashkernel_high(char *cmdline, >> 281 unsigned long long system_ram, >> 282 unsigned long long *crash_size, >> 283 unsigned long long *crash_base) 323 { 284 { 324 ssize_t size = 0; !! 285 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 325 !! 286 "crashkernel=", suffix_tbl[SUFFIX_HIGH]); 326 if (!kexec_trylock()) << 327 return -EBUSY; << 328 << 329 size += crash_resource_size(&crashk_re << 330 size += crash_resource_size(&crashk_lo << 331 << 332 kexec_unlock(); << 333 return size; << 334 } 287 } 335 288 336 static int __crash_shrink_memory(struct resour !! 289 int __init parse_crashkernel_low(char *cmdline, 337 unsigned long !! 290 unsigned long long system_ram, >> 291 unsigned long long *crash_size, >> 292 unsigned long long *crash_base) 338 { 293 { 339 struct resource *ram_res; !! 294 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 340 !! 295 "crashkernel=", suffix_tbl[SUFFIX_LOW]); 341 ram_res = kzalloc(sizeof(*ram_res), GF << 342 if (!ram_res) << 343 return -ENOMEM; << 344 << 345 ram_res->start = old_res->start + new_ << 346 ram_res->end = old_res->end; << 347 ram_res->flags = IORESOURCE_BUSY | IOR << 348 ram_res->name = "System RAM"; << 349 << 350 if (!new_size) { << 351 release_resource(old_res); << 352 old_res->start = 0; << 353 old_res->end = 0; << 354 } else { << 355 crashk_res.end = ram_res->star << 356 } << 357 << 358 crash_free_reserved_phys_range(ram_res << 359 insert_resource(&iomem_resource, ram_r << 360 << 361 return 0; << 362 } 296 } 363 297 364 int crash_shrink_memory(unsigned long new_size !! 298 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, >> 299 void *data, size_t data_len) 365 { 300 { 366 int ret = 0; !! 301 struct elf_note *note = (struct elf_note *)buf; 367 unsigned long old_size, low_size; << 368 << 369 if (!kexec_trylock()) << 370 return -EBUSY; << 371 << 372 if (kexec_crash_image) { << 373 ret = -ENOENT; << 374 goto unlock; << 375 } << 376 << 377 low_size = crash_resource_size(&crashk << 378 old_size = crash_resource_size(&crashk << 379 new_size = roundup(new_size, KEXEC_CRA << 380 if (new_size >= old_size) { << 381 ret = (new_size == old_size) ? << 382 goto unlock; << 383 } << 384 302 385 /* !! 303 note->n_namesz = strlen(name) + 1; 386 * (low_size > new_size) implies that !! 304 note->n_descsz = data_len; 387 * This also means that if low_size is !! 305 note->n_type = type; 388 * !! 306 buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word)); 389 * If low_size is greater than 0, (low !! 307 memcpy(buf, name, note->n_namesz); 390 * crashk_low_res also needs to be shr !! 308 buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word)); 391 * needs to be shrunken. !! 309 memcpy(buf, data, data_len); 392 */ !! 310 buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word)); 393 if (low_size > new_size) { << 394 ret = __crash_shrink_memory(&c << 395 if (ret) << 396 goto unlock; << 397 << 398 ret = __crash_shrink_memory(&c << 399 } else { << 400 ret = __crash_shrink_memory(&c << 401 } << 402 311 403 /* Swap crashk_res and crashk_low_res !! 312 return buf; 404 if (!crashk_res.end && crashk_low_res. << 405 crashk_res.start = crashk_low_ << 406 crashk_res.end = crashk_low_ << 407 release_resource(&crashk_low_r << 408 crashk_low_res.start = 0; << 409 crashk_low_res.end = 0; << 410 insert_resource(&iomem_resourc << 411 } << 412 << 413 unlock: << 414 kexec_unlock(); << 415 return ret; << 416 } 313 } 417 314 418 void crash_save_cpu(struct pt_regs *regs, int !! 315 void final_note(Elf_Word *buf) 419 { 316 { 420 struct elf_prstatus prstatus; !! 317 memset(buf, 0, sizeof(struct elf_note)); 421 u32 *buf; !! 318 } 422 319 423 if ((cpu < 0) || (cpu >= nr_cpu_ids)) !! 320 static void update_vmcoreinfo_note(void) 424 return; !! 321 { >> 322 u32 *buf = vmcoreinfo_note; 425 323 426 /* Using ELF notes here is opportunist !! 324 if (!vmcoreinfo_size) 427 * I need a well defined structure for << 428 * for the data I pass, and I need tag << 429 * on the data to indicate what inform << 430 * squirrelled away. ELF notes happen << 431 * all of that, so there is no need to << 432 */ << 433 buf = (u32 *)per_cpu_ptr(crash_notes, << 434 if (!buf) << 435 return; 325 return; 436 memset(&prstatus, 0, sizeof(prstatus)) !! 326 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 437 prstatus.common.pr_pid = current->pid; !! 327 vmcoreinfo_size); 438 elf_core_copy_regs(&prstatus.pr_reg, r << 439 buf = append_elf_note(buf, KEXEC_CORE_ << 440 &prstatus, sizeo << 441 final_note(buf); 328 final_note(buf); 442 } 329 } 443 330 444 !! 331 void crash_update_vmcoreinfo_safecopy(void *ptr) 445 << 446 static int __init crash_notes_memory_init(void << 447 { 332 { 448 /* Allocate memory for saving cpu regi !! 333 if (ptr) 449 size_t size, align; !! 334 memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size); 450 335 451 /* !! 336 vmcoreinfo_data_safecopy = ptr; 452 * crash_notes could be allocated acro << 453 * is vmalloc based . vmalloc doesn't << 454 * pages are also on 2 continuous phys << 455 * 2nd part of crash_notes in 2nd page << 456 * starting address and size of crash_ << 457 * Here round up the size of crash_not << 458 * and pass it to __alloc_percpu as al << 459 * crash_notes is allocated inside one << 460 */ << 461 size = sizeof(note_buf_t); << 462 align = min(roundup_pow_of_two(sizeof( << 463 << 464 /* << 465 * Break compile if size is bigger tha << 466 * definitely will be in 2 pages with << 467 */ << 468 BUILD_BUG_ON(size > PAGE_SIZE); << 469 << 470 crash_notes = __alloc_percpu(size, ali << 471 if (!crash_notes) { << 472 pr_warn("Memory allocation for << 473 return -ENOMEM; << 474 } << 475 return 0; << 476 } 337 } 477 subsys_initcall(crash_notes_memory_init); << 478 << 479 #endif /*CONFIG_CRASH_DUMP*/ << 480 << 481 #ifdef CONFIG_CRASH_HOTPLUG << 482 #undef pr_fmt << 483 #define pr_fmt(fmt) "crash hp: " fmt << 484 338 485 /* !! 339 void crash_save_vmcoreinfo(void) 486 * Different than kexec/kdump loading/unloadin << 487 * usually rarely happen, there will be many c << 488 * during one short period, e.g one memory boa << 489 * regions are online. So mutex lock __crash_ << 490 * serialize the crash hotplug handling specif << 491 */ << 492 static DEFINE_MUTEX(__crash_hotplug_lock); << 493 #define crash_hotplug_lock() mutex_lock(&__cra << 494 #define crash_hotplug_unlock() mutex_unlock(&_ << 495 << 496 /* << 497 * This routine utilized when the crash_hotplu << 498 * It reflects the kernel's ability/permission << 499 * image directly. << 500 */ << 501 int crash_check_hotplug_support(void) << 502 { 340 { 503 int rc = 0; !! 341 if (!vmcoreinfo_note) >> 342 return; 504 343 505 crash_hotplug_lock(); !! 344 /* Use the safe copy to generate vmcoreinfo note if have */ 506 /* Obtain lock while reading crash inf !! 345 if (vmcoreinfo_data_safecopy) 507 if (!kexec_trylock()) { !! 346 vmcoreinfo_data = vmcoreinfo_data_safecopy; 508 pr_info("kexec_trylock() faile << 509 crash_hotplug_unlock(); << 510 return 0; << 511 } << 512 if (kexec_crash_image) { << 513 rc = kexec_crash_image->hotplu << 514 } << 515 /* Release lock now that update comple << 516 kexec_unlock(); << 517 crash_hotplug_unlock(); << 518 347 519 return rc; !! 348 vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds()); >> 349 update_vmcoreinfo_note(); 520 } 350 } 521 351 522 /* !! 352 void vmcoreinfo_append_str(const char *fmt, ...) 523 * To accurately reflect hot un/plug changes o << 524 * (including onling and offlining of those re << 525 * kexec segments must be updated with latest << 526 * << 527 * Architectures must ensure two things for al << 528 * updating during hotplug events: << 529 * << 530 * 1. Segments must be large enough to accommo << 531 * resources. << 532 * 2. Exclude the segments from SHA verificati << 533 * << 534 * For example, on most architectures, the elf << 535 * to the crash kernel via the elfcorehdr= par << 536 * new list of CPUs and memory. To make change << 537 * should be large enough to permit a growing << 538 * resources. One can estimate the elfcorehdr << 539 * NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES << 540 * excluded from SHA verification by default i << 541 * supports crash hotplug. << 542 */ << 543 static void crash_handle_hotplug_event(unsigne << 544 { 353 { 545 struct kimage *image; !! 354 va_list args; 546 !! 355 char buf[0x50]; 547 crash_hotplug_lock(); !! 356 size_t r; 548 /* Obtain lock while changing crash in << 549 if (!kexec_trylock()) { << 550 pr_info("kexec_trylock() faile << 551 crash_hotplug_unlock(); << 552 return; << 553 } << 554 << 555 /* Check kdump is not loaded */ << 556 if (!kexec_crash_image) << 557 goto out; << 558 << 559 image = kexec_crash_image; << 560 << 561 /* Check that kexec segments update is << 562 if (!image->hotplug_support) << 563 goto out; << 564 << 565 if (hp_action == KEXEC_CRASH_HP_ADD_CP << 566 hp_action == KEXEC_CRASH_HP_RE << 567 pr_debug("hp_action %u, cpu %u << 568 else << 569 pr_debug("hp_action %u\n", hp_ << 570 << 571 /* << 572 * The elfcorehdr_index is set to -1 w << 573 * is allocated. Find the segment cont << 574 * if not already found. << 575 */ << 576 if (image->elfcorehdr_index < 0) { << 577 unsigned long mem; << 578 unsigned char *ptr; << 579 unsigned int n; << 580 << 581 for (n = 0; n < image->nr_segm << 582 mem = image->segment[n << 583 ptr = kmap_local_page( << 584 if (ptr) { << 585 /* The segment << 586 if (memcmp(ptr << 587 image- << 588 kunmap_local(p << 589 } << 590 } << 591 } << 592 << 593 if (image->elfcorehdr_index < 0) { << 594 pr_err("unable to locate elfco << 595 goto out; << 596 } << 597 << 598 /* Needed in order for the segments to << 599 arch_kexec_unprotect_crashkres(); << 600 << 601 /* Differentiate between normal load a << 602 image->hp_action = hp_action; << 603 357 604 /* Now invoke arch-specific update han !! 358 va_start(args, fmt); 605 arch_crash_handle_hotplug_event(image, !! 359 r = vscnprintf(buf, sizeof(buf), fmt, args); >> 360 va_end(args); 606 361 607 /* No longer handling a hotplug event !! 362 r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size); 608 image->hp_action = KEXEC_CRASH_HP_NONE << 609 image->elfcorehdr_updated = true; << 610 363 611 /* Change back to read-only */ !! 364 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 612 arch_kexec_protect_crashkres(); << 613 365 614 /* Errors in the callback is not a rea !! 366 vmcoreinfo_size += r; 615 out: << 616 /* Release lock now that update comple << 617 kexec_unlock(); << 618 crash_hotplug_unlock(); << 619 } << 620 << 621 static int crash_memhp_notifier(struct notifie << 622 { << 623 switch (val) { << 624 case MEM_ONLINE: << 625 crash_handle_hotplug_event(KEX << 626 KEXEC_CRASH_HP_INVALID << 627 break; << 628 << 629 case MEM_OFFLINE: << 630 crash_handle_hotplug_event(KEX << 631 KEXEC_CRASH_HP_INVALID << 632 break; << 633 } << 634 return NOTIFY_OK; << 635 } 367 } 636 368 637 static struct notifier_block crash_memhp_nb = !! 369 /* 638 .notifier_call = crash_memhp_notifier, !! 370 * provide an empty default implementation here -- architecture 639 .priority = 0 !! 371 * code may override this 640 }; !! 372 */ >> 373 void __weak arch_crash_save_vmcoreinfo(void) >> 374 {} 641 375 642 static int crash_cpuhp_online(unsigned int cpu !! 376 phys_addr_t __weak paddr_vmcoreinfo_note(void) 643 { 377 { 644 crash_handle_hotplug_event(KEXEC_CRASH !! 378 return __pa(vmcoreinfo_note); 645 return 0; << 646 } 379 } >> 380 EXPORT_SYMBOL(paddr_vmcoreinfo_note); 647 381 648 static int crash_cpuhp_offline(unsigned int cp !! 382 static int __init crash_save_vmcoreinfo_init(void) 649 { 383 { 650 crash_handle_hotplug_event(KEXEC_CRASH !! 384 vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); 651 return 0; !! 385 if (!vmcoreinfo_data) { 652 } !! 386 pr_warn("Memory allocation for vmcoreinfo_data failed\n"); >> 387 return -ENOMEM; >> 388 } 653 389 654 static int __init crash_hotplug_init(void) !! 390 vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE, 655 { !! 391 GFP_KERNEL | __GFP_ZERO); 656 int result = 0; !! 392 if (!vmcoreinfo_note) { >> 393 free_page((unsigned long)vmcoreinfo_data); >> 394 vmcoreinfo_data = NULL; >> 395 pr_warn("Memory allocation for vmcoreinfo_note failed\n"); >> 396 return -ENOMEM; >> 397 } 657 398 658 if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) !! 399 VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 659 register_memory_notifier(&cras !! 400 VMCOREINFO_BUILD_ID(); >> 401 VMCOREINFO_PAGESIZE(PAGE_SIZE); >> 402 >> 403 VMCOREINFO_SYMBOL(init_uts_ns); >> 404 VMCOREINFO_OFFSET(uts_namespace, name); >> 405 VMCOREINFO_SYMBOL(node_online_map); >> 406 #ifdef CONFIG_MMU >> 407 VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); >> 408 #endif >> 409 VMCOREINFO_SYMBOL(_stext); >> 410 VMCOREINFO_SYMBOL(vmap_area_list); 660 411 661 if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { !! 412 #ifndef CONFIG_NUMA 662 result = cpuhp_setup_state_noc !! 413 VMCOREINFO_SYMBOL(mem_map); 663 "crash/cpuhp", crash_c !! 414 VMCOREINFO_SYMBOL(contig_page_data); 664 } !! 415 #endif >> 416 #ifdef CONFIG_SPARSEMEM >> 417 VMCOREINFO_SYMBOL_ARRAY(mem_section); >> 418 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); >> 419 VMCOREINFO_STRUCT_SIZE(mem_section); >> 420 VMCOREINFO_OFFSET(mem_section, section_mem_map); >> 421 VMCOREINFO_NUMBER(SECTION_SIZE_BITS); >> 422 VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); >> 423 #endif >> 424 VMCOREINFO_STRUCT_SIZE(page); >> 425 VMCOREINFO_STRUCT_SIZE(pglist_data); >> 426 VMCOREINFO_STRUCT_SIZE(zone); >> 427 VMCOREINFO_STRUCT_SIZE(free_area); >> 428 VMCOREINFO_STRUCT_SIZE(list_head); >> 429 VMCOREINFO_SIZE(nodemask_t); >> 430 VMCOREINFO_OFFSET(page, flags); >> 431 VMCOREINFO_OFFSET(page, _refcount); >> 432 VMCOREINFO_OFFSET(page, mapping); >> 433 VMCOREINFO_OFFSET(page, lru); >> 434 VMCOREINFO_OFFSET(page, _mapcount); >> 435 VMCOREINFO_OFFSET(page, private); >> 436 VMCOREINFO_OFFSET(page, compound_dtor); >> 437 VMCOREINFO_OFFSET(page, compound_order); >> 438 VMCOREINFO_OFFSET(page, compound_head); >> 439 VMCOREINFO_OFFSET(pglist_data, node_zones); >> 440 VMCOREINFO_OFFSET(pglist_data, nr_zones); >> 441 #ifdef CONFIG_FLATMEM >> 442 VMCOREINFO_OFFSET(pglist_data, node_mem_map); >> 443 #endif >> 444 VMCOREINFO_OFFSET(pglist_data, node_start_pfn); >> 445 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); >> 446 VMCOREINFO_OFFSET(pglist_data, node_id); >> 447 VMCOREINFO_OFFSET(zone, free_area); >> 448 VMCOREINFO_OFFSET(zone, vm_stat); >> 449 VMCOREINFO_OFFSET(zone, spanned_pages); >> 450 VMCOREINFO_OFFSET(free_area, free_list); >> 451 VMCOREINFO_OFFSET(list_head, next); >> 452 VMCOREINFO_OFFSET(list_head, prev); >> 453 VMCOREINFO_OFFSET(vmap_area, va_start); >> 454 VMCOREINFO_OFFSET(vmap_area, list); >> 455 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); >> 456 log_buf_vmcoreinfo_setup(); >> 457 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); >> 458 VMCOREINFO_NUMBER(NR_FREE_PAGES); >> 459 VMCOREINFO_NUMBER(PG_lru); >> 460 VMCOREINFO_NUMBER(PG_private); >> 461 VMCOREINFO_NUMBER(PG_swapcache); >> 462 VMCOREINFO_NUMBER(PG_swapbacked); >> 463 VMCOREINFO_NUMBER(PG_slab); >> 464 #ifdef CONFIG_MEMORY_FAILURE >> 465 VMCOREINFO_NUMBER(PG_hwpoison); >> 466 #endif >> 467 VMCOREINFO_NUMBER(PG_head_mask); >> 468 #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) >> 469 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); >> 470 #ifdef CONFIG_HUGETLB_PAGE >> 471 VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR); >> 472 #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) >> 473 VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); >> 474 #endif >> 475 >> 476 arch_crash_save_vmcoreinfo(); >> 477 update_vmcoreinfo_note(); 665 478 666 return result; !! 479 return 0; 667 } 480 } 668 481 669 subsys_initcall(crash_hotplug_init); !! 482 subsys_initcall(crash_save_vmcoreinfo_init); 670 #endif << 671 483
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.