1 // SPDX-License-Identifier: GPL-2.0-only 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 2 /* 3 * crash.c - kernel crash support code. 3 * crash.c - kernel crash support code. 4 * Copyright (C) 2002-2004 Eric Biederman <eb 4 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 5 */ 5 */ 6 6 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt << 8 << 9 #include <linux/buildid.h> 7 #include <linux/buildid.h> >> 8 #include <linux/crash_core.h> 10 #include <linux/init.h> 9 #include <linux/init.h> 11 #include <linux/utsname.h> 10 #include <linux/utsname.h> 12 #include <linux/vmalloc.h> 11 #include <linux/vmalloc.h> 13 #include <linux/sizes.h> 12 #include <linux/sizes.h> 14 #include <linux/kexec.h> << 15 #include <linux/memory.h> << 16 #include <linux/mm.h> << 17 #include <linux/cpuhotplug.h> << 18 #include <linux/memblock.h> << 19 #include <linux/kmemleak.h> << 20 #include <linux/crash_core.h> << 21 #include <linux/reboot.h> << 22 #include <linux/btf.h> << 23 #include <linux/objtool.h> << 24 13 25 #include <asm/page.h> 14 #include <asm/page.h> 26 #include <asm/sections.h> 15 #include <asm/sections.h> 27 16 28 #include <crypto/sha1.h> 17 #include <crypto/sha1.h> 29 18 30 #include "kallsyms_internal.h" 19 #include "kallsyms_internal.h" 31 #include "kexec_internal.h" << 32 20 33 /* Per cpu memory for storing cpu states in ca !! 21 /* vmcoreinfo stuff */ 34 note_buf_t __percpu *crash_notes; !! 22 unsigned char *vmcoreinfo_data; >> 23 size_t vmcoreinfo_size; >> 24 u32 *vmcoreinfo_note; 35 25 36 #ifdef CONFIG_CRASH_DUMP !! 26 /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ >> 27 static unsigned char *vmcoreinfo_data_safecopy; 37 28 38 int kimage_crash_copy_vmcoreinfo(struct kimage !! 29 /* 39 { !! 30 * parsing the "crashkernel" commandline 40 struct page *vmcoreinfo_page; !! 31 * 41 void *safecopy; !! 32 * this code is intended to be called from architecture specific code >> 33 */ 42 34 43 if (!IS_ENABLED(CONFIG_CRASH_DUMP)) !! 35 44 return 0; !! 36 /* 45 if (image->type != KEXEC_TYPE_CRASH) !! 37 * This function parses command lines in the format 46 return 0; !! 38 * >> 39 * crashkernel=ramsize-range:size[,...][@offset] >> 40 * >> 41 * The function returns 0 on success and -EINVAL on failure. >> 42 */ >> 43 static int __init parse_crashkernel_mem(char *cmdline, >> 44 unsigned long long system_ram, >> 45 unsigned long long *crash_size, >> 46 unsigned long long *crash_base) >> 47 { >> 48 char *cur = cmdline, *tmp; >> 49 unsigned long long total_mem = system_ram; 47 50 48 /* 51 /* 49 * For kdump, allocate one vmcoreinfo !! 52 * Firmware sometimes reserves some memory regions for its own use, 50 * crash memory. as we have arch_kexec !! 53 * so the system memory size is less than the actual physical memory 51 * after kexec syscall, we naturally p !! 54 * size. Work around this by rounding up the total size to 128M, 52 * (even read) access under kernel dir !! 55 * which is enough for most test cases. 53 * the other hand, we still need to op !! 56 */ 54 * happens to generate vmcoreinfo note !! 57 total_mem = roundup(total_mem, SZ_128M); 55 * vmap for this purpose. !! 58 56 */ !! 59 /* for each entry of the comma-separated list */ 57 vmcoreinfo_page = kimage_alloc_control !! 60 do { 58 if (!vmcoreinfo_page) { !! 61 unsigned long long start, end = ULLONG_MAX, size; 59 pr_warn("Could not allocate vm !! 62 60 return -ENOMEM; !! 63 /* get the start of the range */ 61 } !! 64 start = memparse(cur, &tmp); 62 safecopy = vmap(&vmcoreinfo_page, 1, V !! 65 if (cur == tmp) { 63 if (!safecopy) { !! 66 pr_warn("crashkernel: Memory value expected\n"); 64 pr_warn("Could not vmap vmcore !! 67 return -EINVAL; 65 return -ENOMEM; !! 68 } 66 } !! 69 cur = tmp; >> 70 if (*cur != '-') { >> 71 pr_warn("crashkernel: '-' expected\n"); >> 72 return -EINVAL; >> 73 } >> 74 cur++; 67 75 68 image->vmcoreinfo_data_copy = safecopy !! 76 /* if no ':' is here, than we read the end */ 69 crash_update_vmcoreinfo_safecopy(safec !! 77 if (*cur != ':') { >> 78 end = memparse(cur, &tmp); >> 79 if (cur == tmp) { >> 80 pr_warn("crashkernel: Memory value expected\n"); >> 81 return -EINVAL; >> 82 } >> 83 cur = tmp; >> 84 if (end <= start) { >> 85 pr_warn("crashkernel: end <= start\n"); >> 86 return -EINVAL; >> 87 } >> 88 } 70 89 71 return 0; !! 90 if (*cur != ':') { 72 } !! 91 pr_warn("crashkernel: ':' expected\n"); >> 92 return -EINVAL; >> 93 } >> 94 cur++; 73 95 >> 96 size = memparse(cur, &tmp); >> 97 if (cur == tmp) { >> 98 pr_warn("Memory value expected\n"); >> 99 return -EINVAL; >> 100 } >> 101 cur = tmp; >> 102 if (size >= total_mem) { >> 103 pr_warn("crashkernel: invalid size\n"); >> 104 return -EINVAL; >> 105 } 74 106 >> 107 /* match ? */ >> 108 if (total_mem >= start && total_mem < end) { >> 109 *crash_size = size; >> 110 break; >> 111 } >> 112 } while (*cur++ == ','); 75 113 76 int kexec_should_crash(struct task_struct *p) !! 114 if (*crash_size > 0) { 77 { !! 115 while (*cur && *cur != ' ' && *cur != '@') 78 /* !! 116 cur++; 79 * If crash_kexec_post_notifiers is en !! 117 if (*cur == '@') { 80 * crash_kexec() here yet, which must !! 118 cur++; 81 * notifiers in panic(). !! 119 *crash_base = memparse(cur, &tmp); 82 */ !! 120 if (cur == tmp) { 83 if (crash_kexec_post_notifiers) !! 121 pr_warn("Memory value expected after '@'\n"); 84 return 0; !! 122 return -EINVAL; 85 /* !! 123 } 86 * There are 4 panic() calls in make_t !! 124 } 87 * corresponds to each of these 4 cond !! 125 } else 88 */ !! 126 pr_info("crashkernel size resulted in zero bytes\n"); 89 if (in_interrupt() || !p->pid || is_gl << 90 return 1; << 91 return 0; << 92 } << 93 127 94 int kexec_crash_loaded(void) !! 128 return 0; 95 { << 96 return !!kexec_crash_image; << 97 } 129 } 98 EXPORT_SYMBOL_GPL(kexec_crash_loaded); << 99 130 100 /* 131 /* 101 * No panic_cpu check version of crash_kexec() !! 132 * That function parses "simple" (old) crashkernel command lines like 102 * only when panic_cpu holds the current CPU n !! 133 * 103 * which processes crash_kexec routines. !! 134 * crashkernel=size[@offset] >> 135 * >> 136 * It returns 0 on success and -EINVAL on failure. 104 */ 137 */ 105 void __noclone __crash_kexec(struct pt_regs *r !! 138 static int __init parse_crashkernel_simple(char *cmdline, 106 { !! 139 unsigned long long *crash_size, 107 /* Take the kexec_lock here to prevent !! 140 unsigned long long *crash_base) 108 * running on one cpu from replacing t !! 141 { 109 * we are using after a panic on a dif !! 142 char *cur = cmdline; 110 * !! 143 111 * If the crash kernel was not located !! 144 *crash_size = memparse(cmdline, &cur); 112 * of memory the xchg(&kexec_crash_ima !! 145 if (cmdline == cur) { 113 * sufficient. But since I reuse the !! 146 pr_warn("crashkernel: memory value expected\n"); 114 */ !! 147 return -EINVAL; 115 if (kexec_trylock()) { !! 148 } 116 if (kexec_crash_image) { !! 149 117 struct pt_regs fixed_r !! 150 if (*cur == '@') 118 !! 151 *crash_base = memparse(cur+1, &cur); 119 crash_setup_regs(&fixe !! 152 else if (*cur != ' ' && *cur != '\0') { 120 crash_save_vmcoreinfo( !! 153 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 121 machine_crash_shutdown !! 154 return -EINVAL; 122 machine_kexec(kexec_cr << 123 } << 124 kexec_unlock(); << 125 } 155 } 126 } << 127 STACK_FRAME_NON_STANDARD(__crash_kexec); << 128 156 129 __bpf_kfunc void crash_kexec(struct pt_regs *r !! 157 return 0; 130 { !! 158 } 131 int old_cpu, this_cpu; << 132 159 133 /* !! 160 #define SUFFIX_HIGH 0 134 * Only one CPU is allowed to execute !! 161 #define SUFFIX_LOW 1 135 * panic(). Otherwise parallel calls !! 162 #define SUFFIX_NULL 2 136 * may stop each other. To exclude th !! 163 static __initdata char *suffix_tbl[] = { 137 */ !! 164 [SUFFIX_HIGH] = ",high", 138 old_cpu = PANIC_CPU_INVALID; !! 165 [SUFFIX_LOW] = ",low", 139 this_cpu = raw_smp_processor_id(); !! 166 [SUFFIX_NULL] = NULL, >> 167 }; 140 168 141 if (atomic_try_cmpxchg(&panic_cpu, &ol !! 169 /* 142 /* This is the 1st CPU which c !! 170 * That function parses "suffix" crashkernel command lines like 143 __crash_kexec(regs); !! 171 * 144 !! 172 * crashkernel=size,[high|low] 145 /* !! 173 * 146 * Reset panic_cpu to allow an !! 174 * It returns 0 on success and -EINVAL on failure. 147 * call. !! 175 */ 148 */ !! 176 static int __init parse_crashkernel_suffix(char *cmdline, 149 atomic_set(&panic_cpu, PANIC_C !! 177 unsigned long long *crash_size, >> 178 const char *suffix) >> 179 { >> 180 char *cur = cmdline; >> 181 >> 182 *crash_size = memparse(cmdline, &cur); >> 183 if (cmdline == cur) { >> 184 pr_warn("crashkernel: memory value expected\n"); >> 185 return -EINVAL; >> 186 } >> 187 >> 188 /* check with suffix */ >> 189 if (strncmp(cur, suffix, strlen(suffix))) { >> 190 pr_warn("crashkernel: unrecognized char: %c\n", *cur); >> 191 return -EINVAL; >> 192 } >> 193 cur += strlen(suffix); >> 194 if (*cur != ' ' && *cur != '\0') { >> 195 pr_warn("crashkernel: unrecognized char: %c\n", *cur); >> 196 return -EINVAL; 150 } 197 } 151 } << 152 198 153 static inline resource_size_t crash_resource_s !! 199 return 0; 154 { << 155 return !res->end ? 0 : resource_size(r << 156 } 200 } 157 201 >> 202 static __init char *get_last_crashkernel(char *cmdline, >> 203 const char *name, >> 204 const char *suffix) >> 205 { >> 206 char *p = cmdline, *ck_cmdline = NULL; >> 207 >> 208 /* find crashkernel and use the last one if there are more */ >> 209 p = strstr(p, name); >> 210 while (p) { >> 211 char *end_p = strchr(p, ' '); >> 212 char *q; >> 213 >> 214 if (!end_p) >> 215 end_p = p + strlen(p); >> 216 >> 217 if (!suffix) { >> 218 int i; >> 219 >> 220 /* skip the one with any known suffix */ >> 221 for (i = 0; suffix_tbl[i]; i++) { >> 222 q = end_p - strlen(suffix_tbl[i]); >> 223 if (!strncmp(q, suffix_tbl[i], >> 224 strlen(suffix_tbl[i]))) >> 225 goto next; >> 226 } >> 227 ck_cmdline = p; >> 228 } else { >> 229 q = end_p - strlen(suffix); >> 230 if (!strncmp(q, suffix, strlen(suffix))) >> 231 ck_cmdline = p; >> 232 } >> 233 next: >> 234 p = strstr(p+1, name); >> 235 } 158 236 >> 237 return ck_cmdline; >> 238 } 159 239 160 !! 240 static int __init __parse_crashkernel(char *cmdline, 161 int crash_prepare_elf64_headers(struct crash_m !! 241 unsigned long long system_ram, 162 void **addr, unsigne !! 242 unsigned long long *crash_size, >> 243 unsigned long long *crash_base, >> 244 const char *name, >> 245 const char *suffix) 163 { 246 { 164 Elf64_Ehdr *ehdr; !! 247 char *first_colon, *first_space; 165 Elf64_Phdr *phdr; !! 248 char *ck_cmdline; 166 unsigned long nr_cpus = num_possible_c << 167 unsigned char *buf; << 168 unsigned int cpu, i; << 169 unsigned long long notes_addr; << 170 unsigned long mstart, mend; << 171 << 172 /* extra phdr for vmcoreinfo ELF note << 173 nr_phdr = nr_cpus + 1; << 174 nr_phdr += mem->nr_ranges; << 175 << 176 /* << 177 * kexec-tools creates an extra PT_LOA << 178 * area (for example, ffffffff80000000 << 179 * I think this is required by tools l << 180 * memory will be mapped in two ELF he << 181 * text virtual addresses and other wi << 182 */ << 183 << 184 nr_phdr++; << 185 elf_sz = sizeof(Elf64_Ehdr) + nr_phdr << 186 elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER << 187 249 188 buf = vzalloc(elf_sz); !! 250 BUG_ON(!crash_size || !crash_base); 189 if (!buf) !! 251 *crash_size = 0; 190 return -ENOMEM; !! 252 *crash_base = 0; 191 253 192 ehdr = (Elf64_Ehdr *)buf; !! 254 ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 193 phdr = (Elf64_Phdr *)(ehdr + 1); !! 255 if (!ck_cmdline) 194 memcpy(ehdr->e_ident, ELFMAG, SELFMAG) !! 256 return -ENOENT; 195 ehdr->e_ident[EI_CLASS] = ELFCLASS64; << 196 ehdr->e_ident[EI_DATA] = ELFDATA2LSB; << 197 ehdr->e_ident[EI_VERSION] = EV_CURRENT << 198 ehdr->e_ident[EI_OSABI] = ELF_OSABI; << 199 memset(ehdr->e_ident + EI_PAD, 0, EI_N << 200 ehdr->e_type = ET_CORE; << 201 ehdr->e_machine = ELF_ARCH; << 202 ehdr->e_version = EV_CURRENT; << 203 ehdr->e_phoff = sizeof(Elf64_Ehdr); << 204 ehdr->e_ehsize = sizeof(Elf64_Ehdr); << 205 ehdr->e_phentsize = sizeof(Elf64_Phdr) << 206 << 207 /* Prepare one phdr of type PT_NOTE fo << 208 for_each_possible_cpu(cpu) { << 209 phdr->p_type = PT_NOTE; << 210 notes_addr = per_cpu_ptr_to_ph << 211 phdr->p_offset = phdr->p_paddr << 212 phdr->p_filesz = phdr->p_memsz << 213 (ehdr->e_phnum)++; << 214 phdr++; << 215 } << 216 257 217 /* Prepare one PT_NOTE header for vmco !! 258 ck_cmdline += strlen(name); 218 phdr->p_type = PT_NOTE; << 219 phdr->p_offset = phdr->p_paddr = paddr << 220 phdr->p_filesz = phdr->p_memsz = VMCOR << 221 (ehdr->e_phnum)++; << 222 phdr++; << 223 << 224 /* Prepare PT_LOAD type program header << 225 if (need_kernel_map) { << 226 phdr->p_type = PT_LOAD; << 227 phdr->p_flags = PF_R|PF_W|PF_X << 228 phdr->p_vaddr = (unsigned long << 229 phdr->p_filesz = phdr->p_memsz << 230 phdr->p_offset = phdr->p_paddr << 231 ehdr->e_phnum++; << 232 phdr++; << 233 } << 234 259 235 /* Go through all the ranges in mem->r !! 260 if (suffix) 236 for (i = 0; i < mem->nr_ranges; i++) { !! 261 return parse_crashkernel_suffix(ck_cmdline, crash_size, 237 mstart = mem->ranges[i].start; !! 262 suffix); 238 mend = mem->ranges[i].end; !! 263 /* 239 !! 264 * if the commandline contains a ':', then that's the extended 240 phdr->p_type = PT_LOAD; !! 265 * syntax -- if not, it must be the classic syntax 241 phdr->p_flags = PF_R|PF_W|PF_X !! 266 */ 242 phdr->p_offset = mstart; !! 267 first_colon = strchr(ck_cmdline, ':'); 243 !! 268 first_space = strchr(ck_cmdline, ' '); 244 phdr->p_paddr = mstart; !! 269 if (first_colon && (!first_space || first_colon < first_space)) 245 phdr->p_vaddr = (unsigned long !! 270 return parse_crashkernel_mem(ck_cmdline, system_ram, 246 phdr->p_filesz = phdr->p_memsz !! 271 crash_size, crash_base); 247 phdr->p_align = 0; << 248 ehdr->e_phnum++; << 249 #ifdef CONFIG_KEXEC_FILE << 250 kexec_dprintk("Crash PT_LOAD E << 251 phdr, phdr->p_va << 252 ehdr->e_phnum, p << 253 #endif << 254 phdr++; << 255 } << 256 272 257 *addr = buf; !! 273 return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); 258 *sz = elf_sz; << 259 return 0; << 260 } 274 } 261 275 262 int crash_exclude_mem_range(struct crash_mem * !! 276 /* 263 unsigned long long !! 277 * That function is the entry point for command line parsing and should be >> 278 * called from the arch-specific code. >> 279 */ >> 280 int __init parse_crashkernel(char *cmdline, >> 281 unsigned long long system_ram, >> 282 unsigned long long *crash_size, >> 283 unsigned long long *crash_base) 264 { 284 { 265 int i; !! 285 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 266 unsigned long long start, end, p_start !! 286 "crashkernel=", NULL); 267 << 268 for (i = 0; i < mem->nr_ranges; i++) { << 269 start = mem->ranges[i].start; << 270 end = mem->ranges[i].end; << 271 p_start = mstart; << 272 p_end = mend; << 273 << 274 if (p_start > end) << 275 continue; << 276 << 277 /* << 278 * Because the memory ranges i << 279 * ascending order, when we de << 280 * immediately exit the for lo << 281 * ranges will definitely be o << 282 * for. << 283 */ << 284 if (p_end < start) << 285 break; << 286 << 287 /* Truncate any area outside o << 288 if (p_start < start) << 289 p_start = start; << 290 if (p_end > end) << 291 p_end = end; << 292 << 293 /* Found completely overlappin << 294 if (p_start == start && p_end << 295 memmove(&mem->ranges[i << 296 (mem->nr_range << 297 i--; << 298 mem->nr_ranges--; << 299 } else if (p_start > start && << 300 /* Split original rang << 301 if (mem->nr_ranges >= << 302 return -ENOMEM << 303 << 304 memmove(&mem->ranges[i << 305 (mem->nr_range << 306 << 307 mem->ranges[i].end = p << 308 mem->ranges[i + 1].sta << 309 mem->ranges[i + 1].end << 310 << 311 i++; << 312 mem->nr_ranges++; << 313 } else if (p_start != start) << 314 mem->ranges[i].end = p << 315 else << 316 mem->ranges[i].start = << 317 } << 318 << 319 return 0; << 320 } 287 } 321 288 322 ssize_t crash_get_memory_size(void) !! 289 int __init parse_crashkernel_high(char *cmdline, >> 290 unsigned long long system_ram, >> 291 unsigned long long *crash_size, >> 292 unsigned long long *crash_base) 323 { 293 { 324 ssize_t size = 0; !! 294 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 325 !! 295 "crashkernel=", suffix_tbl[SUFFIX_HIGH]); 326 if (!kexec_trylock()) << 327 return -EBUSY; << 328 << 329 size += crash_resource_size(&crashk_re << 330 size += crash_resource_size(&crashk_lo << 331 << 332 kexec_unlock(); << 333 return size; << 334 } 296 } 335 297 336 static int __crash_shrink_memory(struct resour !! 298 int __init parse_crashkernel_low(char *cmdline, 337 unsigned long !! 299 unsigned long long system_ram, >> 300 unsigned long long *crash_size, >> 301 unsigned long long *crash_base) 338 { 302 { 339 struct resource *ram_res; !! 303 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 340 !! 304 "crashkernel=", suffix_tbl[SUFFIX_LOW]); 341 ram_res = kzalloc(sizeof(*ram_res), GF !! 305 } 342 if (!ram_res) << 343 return -ENOMEM; << 344 << 345 ram_res->start = old_res->start + new_ << 346 ram_res->end = old_res->end; << 347 ram_res->flags = IORESOURCE_BUSY | IOR << 348 ram_res->name = "System RAM"; << 349 << 350 if (!new_size) { << 351 release_resource(old_res); << 352 old_res->start = 0; << 353 old_res->end = 0; << 354 } else { << 355 crashk_res.end = ram_res->star << 356 } << 357 << 358 crash_free_reserved_phys_range(ram_res << 359 insert_resource(&iomem_resource, ram_r << 360 306 >> 307 /* >> 308 * Add a dummy early_param handler to mark crashkernel= as a known command line >> 309 * parameter and suppress incorrect warnings in init/main.c. >> 310 */ >> 311 static int __init parse_crashkernel_dummy(char *arg) >> 312 { 361 return 0; 313 return 0; 362 } 314 } >> 315 early_param("crashkernel", parse_crashkernel_dummy); 363 316 364 int crash_shrink_memory(unsigned long new_size !! 317 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, >> 318 void *data, size_t data_len) 365 { 319 { 366 int ret = 0; !! 320 struct elf_note *note = (struct elf_note *)buf; 367 unsigned long old_size, low_size; << 368 << 369 if (!kexec_trylock()) << 370 return -EBUSY; << 371 << 372 if (kexec_crash_image) { << 373 ret = -ENOENT; << 374 goto unlock; << 375 } << 376 << 377 low_size = crash_resource_size(&crashk << 378 old_size = crash_resource_size(&crashk << 379 new_size = roundup(new_size, KEXEC_CRA << 380 if (new_size >= old_size) { << 381 ret = (new_size == old_size) ? << 382 goto unlock; << 383 } << 384 321 385 /* !! 322 note->n_namesz = strlen(name) + 1; 386 * (low_size > new_size) implies that !! 323 note->n_descsz = data_len; 387 * This also means that if low_size is !! 324 note->n_type = type; 388 * !! 325 buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word)); 389 * If low_size is greater than 0, (low !! 326 memcpy(buf, name, note->n_namesz); 390 * crashk_low_res also needs to be shr !! 327 buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word)); 391 * needs to be shrunken. !! 328 memcpy(buf, data, data_len); 392 */ !! 329 buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word)); 393 if (low_size > new_size) { << 394 ret = __crash_shrink_memory(&c << 395 if (ret) << 396 goto unlock; << 397 << 398 ret = __crash_shrink_memory(&c << 399 } else { << 400 ret = __crash_shrink_memory(&c << 401 } << 402 330 403 /* Swap crashk_res and crashk_low_res !! 331 return buf; 404 if (!crashk_res.end && crashk_low_res. << 405 crashk_res.start = crashk_low_ << 406 crashk_res.end = crashk_low_ << 407 release_resource(&crashk_low_r << 408 crashk_low_res.start = 0; << 409 crashk_low_res.end = 0; << 410 insert_resource(&iomem_resourc << 411 } << 412 << 413 unlock: << 414 kexec_unlock(); << 415 return ret; << 416 } 332 } 417 333 418 void crash_save_cpu(struct pt_regs *regs, int !! 334 void final_note(Elf_Word *buf) 419 { 335 { 420 struct elf_prstatus prstatus; !! 336 memset(buf, 0, sizeof(struct elf_note)); 421 u32 *buf; !! 337 } 422 338 423 if ((cpu < 0) || (cpu >= nr_cpu_ids)) !! 339 static void update_vmcoreinfo_note(void) 424 return; !! 340 { >> 341 u32 *buf = vmcoreinfo_note; 425 342 426 /* Using ELF notes here is opportunist !! 343 if (!vmcoreinfo_size) 427 * I need a well defined structure for << 428 * for the data I pass, and I need tag << 429 * on the data to indicate what inform << 430 * squirrelled away. ELF notes happen << 431 * all of that, so there is no need to << 432 */ << 433 buf = (u32 *)per_cpu_ptr(crash_notes, << 434 if (!buf) << 435 return; 344 return; 436 memset(&prstatus, 0, sizeof(prstatus)) !! 345 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 437 prstatus.common.pr_pid = current->pid; !! 346 vmcoreinfo_size); 438 elf_core_copy_regs(&prstatus.pr_reg, r << 439 buf = append_elf_note(buf, KEXEC_CORE_ << 440 &prstatus, sizeo << 441 final_note(buf); 347 final_note(buf); 442 } 348 } 443 349 444 !! 350 void crash_update_vmcoreinfo_safecopy(void *ptr) 445 << 446 static int __init crash_notes_memory_init(void << 447 { 351 { 448 /* Allocate memory for saving cpu regi !! 352 if (ptr) 449 size_t size, align; !! 353 memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size); 450 << 451 /* << 452 * crash_notes could be allocated acro << 453 * is vmalloc based . vmalloc doesn't << 454 * pages are also on 2 continuous phys << 455 * 2nd part of crash_notes in 2nd page << 456 * starting address and size of crash_ << 457 * Here round up the size of crash_not << 458 * and pass it to __alloc_percpu as al << 459 * crash_notes is allocated inside one << 460 */ << 461 size = sizeof(note_buf_t); << 462 align = min(roundup_pow_of_two(sizeof( << 463 354 464 /* !! 355 vmcoreinfo_data_safecopy = ptr; 465 * Break compile if size is bigger tha << 466 * definitely will be in 2 pages with << 467 */ << 468 BUILD_BUG_ON(size > PAGE_SIZE); << 469 << 470 crash_notes = __alloc_percpu(size, ali << 471 if (!crash_notes) { << 472 pr_warn("Memory allocation for << 473 return -ENOMEM; << 474 } << 475 return 0; << 476 } 356 } 477 subsys_initcall(crash_notes_memory_init); << 478 << 479 #endif /*CONFIG_CRASH_DUMP*/ << 480 << 481 #ifdef CONFIG_CRASH_HOTPLUG << 482 #undef pr_fmt << 483 #define pr_fmt(fmt) "crash hp: " fmt << 484 << 485 /* << 486 * Different than kexec/kdump loading/unloadin << 487 * usually rarely happen, there will be many c << 488 * during one short period, e.g one memory boa << 489 * regions are online. So mutex lock __crash_ << 490 * serialize the crash hotplug handling specif << 491 */ << 492 static DEFINE_MUTEX(__crash_hotplug_lock); << 493 #define crash_hotplug_lock() mutex_lock(&__cra << 494 #define crash_hotplug_unlock() mutex_unlock(&_ << 495 357 496 /* !! 358 void crash_save_vmcoreinfo(void) 497 * This routine utilized when the crash_hotplu << 498 * It reflects the kernel's ability/permission << 499 * image directly. << 500 */ << 501 int crash_check_hotplug_support(void) << 502 { 359 { 503 int rc = 0; !! 360 if (!vmcoreinfo_note) >> 361 return; 504 362 505 crash_hotplug_lock(); !! 363 /* Use the safe copy to generate vmcoreinfo note if have */ 506 /* Obtain lock while reading crash inf !! 364 if (vmcoreinfo_data_safecopy) 507 if (!kexec_trylock()) { !! 365 vmcoreinfo_data = vmcoreinfo_data_safecopy; 508 pr_info("kexec_trylock() faile << 509 crash_hotplug_unlock(); << 510 return 0; << 511 } << 512 if (kexec_crash_image) { << 513 rc = kexec_crash_image->hotplu << 514 } << 515 /* Release lock now that update comple << 516 kexec_unlock(); << 517 crash_hotplug_unlock(); << 518 366 519 return rc; !! 367 vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds()); >> 368 update_vmcoreinfo_note(); 520 } 369 } 521 370 522 /* !! 371 void vmcoreinfo_append_str(const char *fmt, ...) 523 * To accurately reflect hot un/plug changes o << 524 * (including onling and offlining of those re << 525 * kexec segments must be updated with latest << 526 * << 527 * Architectures must ensure two things for al << 528 * updating during hotplug events: << 529 * << 530 * 1. Segments must be large enough to accommo << 531 * resources. << 532 * 2. Exclude the segments from SHA verificati << 533 * << 534 * For example, on most architectures, the elf << 535 * to the crash kernel via the elfcorehdr= par << 536 * new list of CPUs and memory. To make change << 537 * should be large enough to permit a growing << 538 * resources. One can estimate the elfcorehdr << 539 * NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES << 540 * excluded from SHA verification by default i << 541 * supports crash hotplug. << 542 */ << 543 static void crash_handle_hotplug_event(unsigne << 544 { 372 { 545 struct kimage *image; !! 373 va_list args; 546 !! 374 char buf[0x50]; 547 crash_hotplug_lock(); !! 375 size_t r; 548 /* Obtain lock while changing crash in << 549 if (!kexec_trylock()) { << 550 pr_info("kexec_trylock() faile << 551 crash_hotplug_unlock(); << 552 return; << 553 } << 554 << 555 /* Check kdump is not loaded */ << 556 if (!kexec_crash_image) << 557 goto out; << 558 << 559 image = kexec_crash_image; << 560 << 561 /* Check that kexec segments update is << 562 if (!image->hotplug_support) << 563 goto out; << 564 << 565 if (hp_action == KEXEC_CRASH_HP_ADD_CP << 566 hp_action == KEXEC_CRASH_HP_RE << 567 pr_debug("hp_action %u, cpu %u << 568 else << 569 pr_debug("hp_action %u\n", hp_ << 570 << 571 /* << 572 * The elfcorehdr_index is set to -1 w << 573 * is allocated. Find the segment cont << 574 * if not already found. << 575 */ << 576 if (image->elfcorehdr_index < 0) { << 577 unsigned long mem; << 578 unsigned char *ptr; << 579 unsigned int n; << 580 << 581 for (n = 0; n < image->nr_segm << 582 mem = image->segment[n << 583 ptr = kmap_local_page( << 584 if (ptr) { << 585 /* The segment << 586 if (memcmp(ptr << 587 image- << 588 kunmap_local(p << 589 } << 590 } << 591 } << 592 << 593 if (image->elfcorehdr_index < 0) { << 594 pr_err("unable to locate elfco << 595 goto out; << 596 } << 597 376 598 /* Needed in order for the segments to !! 377 va_start(args, fmt); 599 arch_kexec_unprotect_crashkres(); !! 378 r = vscnprintf(buf, sizeof(buf), fmt, args); >> 379 va_end(args); 600 380 601 /* Differentiate between normal load a !! 381 r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size); 602 image->hp_action = hp_action; << 603 382 604 /* Now invoke arch-specific update han !! 383 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 605 arch_crash_handle_hotplug_event(image, << 606 384 607 /* No longer handling a hotplug event !! 385 vmcoreinfo_size += r; 608 image->hp_action = KEXEC_CRASH_HP_NONE !! 386 } 609 image->elfcorehdr_updated = true; << 610 387 611 /* Change back to read-only */ !! 388 /* 612 arch_kexec_protect_crashkres(); !! 389 * provide an empty default implementation here -- architecture >> 390 * code may override this >> 391 */ >> 392 void __weak arch_crash_save_vmcoreinfo(void) >> 393 {} 613 394 614 /* Errors in the callback is not a rea !! 395 phys_addr_t __weak paddr_vmcoreinfo_note(void) 615 out: !! 396 { 616 /* Release lock now that update comple !! 397 return __pa(vmcoreinfo_note); 617 kexec_unlock(); << 618 crash_hotplug_unlock(); << 619 } 398 } >> 399 EXPORT_SYMBOL(paddr_vmcoreinfo_note); 620 400 621 static int crash_memhp_notifier(struct notifie !! 401 static int __init crash_save_vmcoreinfo_init(void) 622 { 402 { 623 switch (val) { !! 403 vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); 624 case MEM_ONLINE: !! 404 if (!vmcoreinfo_data) { 625 crash_handle_hotplug_event(KEX !! 405 pr_warn("Memory allocation for vmcoreinfo_data failed\n"); 626 KEXEC_CRASH_HP_INVALID !! 406 return -ENOMEM; 627 break; << 628 << 629 case MEM_OFFLINE: << 630 crash_handle_hotplug_event(KEX << 631 KEXEC_CRASH_HP_INVALID << 632 break; << 633 } 407 } 634 return NOTIFY_OK; << 635 } << 636 << 637 static struct notifier_block crash_memhp_nb = << 638 .notifier_call = crash_memhp_notifier, << 639 .priority = 0 << 640 }; << 641 408 642 static int crash_cpuhp_online(unsigned int cpu !! 409 vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE, 643 { !! 410 GFP_KERNEL | __GFP_ZERO); 644 crash_handle_hotplug_event(KEXEC_CRASH !! 411 if (!vmcoreinfo_note) { 645 return 0; !! 412 free_page((unsigned long)vmcoreinfo_data); 646 } !! 413 vmcoreinfo_data = NULL; >> 414 pr_warn("Memory allocation for vmcoreinfo_note failed\n"); >> 415 return -ENOMEM; >> 416 } 647 417 648 static int crash_cpuhp_offline(unsigned int cp !! 418 VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 649 { !! 419 VMCOREINFO_BUILD_ID(); 650 crash_handle_hotplug_event(KEXEC_CRASH !! 420 VMCOREINFO_PAGESIZE(PAGE_SIZE); 651 return 0; !! 421 652 } !! 422 VMCOREINFO_SYMBOL(init_uts_ns); >> 423 VMCOREINFO_OFFSET(uts_namespace, name); >> 424 VMCOREINFO_SYMBOL(node_online_map); >> 425 #ifdef CONFIG_MMU >> 426 VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); >> 427 #endif >> 428 VMCOREINFO_SYMBOL(_stext); >> 429 VMCOREINFO_SYMBOL(vmap_area_list); 653 430 654 static int __init crash_hotplug_init(void) !! 431 #ifndef CONFIG_NUMA 655 { !! 432 VMCOREINFO_SYMBOL(mem_map); 656 int result = 0; !! 433 VMCOREINFO_SYMBOL(contig_page_data); >> 434 #endif >> 435 #ifdef CONFIG_SPARSEMEM >> 436 VMCOREINFO_SYMBOL_ARRAY(mem_section); >> 437 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); >> 438 VMCOREINFO_STRUCT_SIZE(mem_section); >> 439 VMCOREINFO_OFFSET(mem_section, section_mem_map); >> 440 VMCOREINFO_NUMBER(SECTION_SIZE_BITS); >> 441 VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); >> 442 #endif >> 443 VMCOREINFO_STRUCT_SIZE(page); >> 444 VMCOREINFO_STRUCT_SIZE(pglist_data); >> 445 VMCOREINFO_STRUCT_SIZE(zone); >> 446 VMCOREINFO_STRUCT_SIZE(free_area); >> 447 VMCOREINFO_STRUCT_SIZE(list_head); >> 448 VMCOREINFO_SIZE(nodemask_t); >> 449 VMCOREINFO_OFFSET(page, flags); >> 450 VMCOREINFO_OFFSET(page, _refcount); >> 451 VMCOREINFO_OFFSET(page, mapping); >> 452 VMCOREINFO_OFFSET(page, lru); >> 453 VMCOREINFO_OFFSET(page, _mapcount); >> 454 VMCOREINFO_OFFSET(page, private); >> 455 VMCOREINFO_OFFSET(page, compound_dtor); >> 456 VMCOREINFO_OFFSET(page, compound_order); >> 457 VMCOREINFO_OFFSET(page, compound_head); >> 458 VMCOREINFO_OFFSET(pglist_data, node_zones); >> 459 VMCOREINFO_OFFSET(pglist_data, nr_zones); >> 460 #ifdef CONFIG_FLATMEM >> 461 VMCOREINFO_OFFSET(pglist_data, node_mem_map); >> 462 #endif >> 463 VMCOREINFO_OFFSET(pglist_data, node_start_pfn); >> 464 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); >> 465 VMCOREINFO_OFFSET(pglist_data, node_id); >> 466 VMCOREINFO_OFFSET(zone, free_area); >> 467 VMCOREINFO_OFFSET(zone, vm_stat); >> 468 VMCOREINFO_OFFSET(zone, spanned_pages); >> 469 VMCOREINFO_OFFSET(free_area, free_list); >> 470 VMCOREINFO_OFFSET(list_head, next); >> 471 VMCOREINFO_OFFSET(list_head, prev); >> 472 VMCOREINFO_OFFSET(vmap_area, va_start); >> 473 VMCOREINFO_OFFSET(vmap_area, list); >> 474 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); >> 475 log_buf_vmcoreinfo_setup(); >> 476 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); >> 477 VMCOREINFO_NUMBER(NR_FREE_PAGES); >> 478 VMCOREINFO_NUMBER(PG_lru); >> 479 VMCOREINFO_NUMBER(PG_private); >> 480 VMCOREINFO_NUMBER(PG_swapcache); >> 481 VMCOREINFO_NUMBER(PG_swapbacked); >> 482 VMCOREINFO_NUMBER(PG_slab); >> 483 #ifdef CONFIG_MEMORY_FAILURE >> 484 VMCOREINFO_NUMBER(PG_hwpoison); >> 485 #endif >> 486 VMCOREINFO_NUMBER(PG_head_mask); >> 487 #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) >> 488 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); >> 489 #ifdef CONFIG_HUGETLB_PAGE >> 490 VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR); >> 491 #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) >> 492 VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); >> 493 #endif 657 494 658 if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) !! 495 #ifdef CONFIG_KALLSYMS 659 register_memory_notifier(&cras !! 496 VMCOREINFO_SYMBOL(kallsyms_names); >> 497 VMCOREINFO_SYMBOL(kallsyms_num_syms); >> 498 VMCOREINFO_SYMBOL(kallsyms_token_table); >> 499 VMCOREINFO_SYMBOL(kallsyms_token_index); >> 500 #ifdef CONFIG_KALLSYMS_BASE_RELATIVE >> 501 VMCOREINFO_SYMBOL(kallsyms_offsets); >> 502 VMCOREINFO_SYMBOL(kallsyms_relative_base); >> 503 #else >> 504 VMCOREINFO_SYMBOL(kallsyms_addresses); >> 505 #endif /* CONFIG_KALLSYMS_BASE_RELATIVE */ >> 506 #endif /* CONFIG_KALLSYMS */ 660 507 661 if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { !! 508 arch_crash_save_vmcoreinfo(); 662 result = cpuhp_setup_state_noc !! 509 update_vmcoreinfo_note(); 663 "crash/cpuhp", crash_c << 664 } << 665 510 666 return result; !! 511 return 0; 667 } 512 } 668 513 669 subsys_initcall(crash_hotplug_init); !! 514 subsys_initcall(crash_save_vmcoreinfo_init); 670 #endif << 671 515
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.