1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * kexec.c - kexec_load system call 4 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 5 */ 6 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 9 #include <linux/capability.h> 10 #include <linux/mm.h> 11 #include <linux/file.h> 12 #include <linux/security.h> 13 #include <linux/kexec.h> 14 #include <linux/mutex.h> 15 #include <linux/list.h> 16 #include <linux/syscalls.h> 17 #include <linux/vmalloc.h> 18 #include <linux/slab.h> 19 #include <linux/ccsecurity.h> 20 #include "kexec_internal.h" 21 22 static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, 23 unsigned long nr_segments, 24 struct kexec_segment *segments, 25 unsigned long flags) 26 { 27 int ret; 28 struct kimage *image; 29 bool kexec_on_panic = flags & KEXEC_ON_CRASH; 30 31 #ifdef CONFIG_CRASH_DUMP 32 if (kexec_on_panic) { 33 /* Verify we have a valid entry point */ 34 if ((entry < phys_to_boot_phys(crashk_res.start)) || 35 (entry > phys_to_boot_phys(crashk_res.end))) 36 return -EADDRNOTAVAIL; 37 } 38 #endif 39 40 /* Allocate and initialize a controlling structure */ 41 image = do_kimage_alloc_init(); 42 if (!image) 43 return -ENOMEM; 44 45 image->start = entry; 46 image->nr_segments = nr_segments; 47 memcpy(image->segment, segments, nr_segments * sizeof(*segments)); 48 49 #ifdef CONFIG_CRASH_DUMP 50 if (kexec_on_panic) { 51 /* Enable special crash kernel control page alloc policy. */ 52 image->control_page = crashk_res.start; 53 image->type = KEXEC_TYPE_CRASH; 54 } 55 #endif 56 57 ret = sanity_check_segment_list(image); 58 if (ret) 59 goto out_free_image; 60 61 /* 62 * Find a location for the control code buffer, and add it 63 * the vector of segments so that it's pages will also be 64 * counted as destination pages. 65 */ 66 ret = -ENOMEM; 67 image->control_code_page = kimage_alloc_control_pages(image, 68 get_order(KEXEC_CONTROL_PAGE_SIZE)); 69 if (!image->control_code_page) { 70 pr_err("Could not allocate control_code_buffer\n"); 71 goto out_free_image; 72 } 73 74 if (!kexec_on_panic) { 75 image->swap_page = kimage_alloc_control_pages(image, 0); 76 if (!image->swap_page) { 77 pr_err("Could not allocate swap buffer\n"); 78 goto out_free_control_pages; 79 } 80 } 81 82 *rimage = image; 83 return 0; 84 out_free_control_pages: 85 kimage_free_page_list(&image->control_pages); 86 out_free_image: 87 kfree(image); 88 return ret; 89 } 90 91 static int do_kexec_load(unsigned long entry, unsigned long nr_segments, 92 struct kexec_segment *segments, unsigned long flags) 93 { 94 struct kimage **dest_image, *image; 95 unsigned long i; 96 int ret; 97 98 /* 99 * Because we write directly to the reserved memory region when loading 100 * crash kernels we need a serialization here to prevent multiple crash 101 * kernels from attempting to load simultaneously. 102 */ 103 if (!kexec_trylock()) 104 return -EBUSY; 105 106 #ifdef CONFIG_CRASH_DUMP 107 if (flags & KEXEC_ON_CRASH) { 108 dest_image = &kexec_crash_image; 109 if (kexec_crash_image) 110 arch_kexec_unprotect_crashkres(); 111 } else 112 #endif 113 dest_image = &kexec_image; 114 115 if (nr_segments == 0) { 116 /* Uninstall image */ 117 kimage_free(xchg(dest_image, NULL)); 118 ret = 0; 119 goto out_unlock; 120 } 121 if (flags & KEXEC_ON_CRASH) { 122 /* 123 * Loading another kernel to switch to if this one 124 * crashes. Free any current crash dump kernel before 125 * we corrupt it. 126 */ 127 kimage_free(xchg(&kexec_crash_image, NULL)); 128 } 129 130 ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags); 131 if (ret) 132 goto out_unlock; 133 134 if (flags & KEXEC_PRESERVE_CONTEXT) 135 image->preserve_context = 1; 136 137 #ifdef CONFIG_CRASH_HOTPLUG 138 if ((flags & KEXEC_ON_CRASH) && arch_crash_hotplug_support(image, flags)) 139 image->hotplug_support = 1; 140 #endif 141 142 ret = machine_kexec_prepare(image); 143 if (ret) 144 goto out; 145 146 /* 147 * Some architecture(like S390) may touch the crash memory before 148 * machine_kexec_prepare(), we must copy vmcoreinfo data after it. 149 */ 150 ret = kimage_crash_copy_vmcoreinfo(image); 151 if (ret) 152 goto out; 153 154 for (i = 0; i < nr_segments; i++) { 155 ret = kimage_load_segment(image, &image->segment[i]); 156 if (ret) 157 goto out; 158 } 159 160 kimage_terminate(image); 161 162 ret = machine_kexec_post_load(image); 163 if (ret) 164 goto out; 165 166 /* Install the new kernel and uninstall the old */ 167 image = xchg(dest_image, image); 168 169 out: 170 #ifdef CONFIG_CRASH_DUMP 171 if ((flags & KEXEC_ON_CRASH) && kexec_crash_image) 172 arch_kexec_protect_crashkres(); 173 #endif 174 175 kimage_free(image); 176 out_unlock: 177 kexec_unlock(); 178 return ret; 179 } 180 181 /* 182 * Exec Kernel system call: for obvious reasons only root may call it. 183 * 184 * This call breaks up into three pieces. 185 * - A generic part which loads the new kernel from the current 186 * address space, and very carefully places the data in the 187 * allocated pages. 188 * 189 * - A generic part that interacts with the kernel and tells all of 190 * the devices to shut down. Preventing on-going dmas, and placing 191 * the devices in a consistent state so a later kernel can 192 * reinitialize them. 193 * 194 * - A machine specific part that includes the syscall number 195 * and then copies the image to it's final destination. And 196 * jumps into the image at entry. 197 * 198 * kexec does not sync, or unmount filesystems so if you need 199 * that to happen you need to do that yourself. 200 */ 201 202 static inline int kexec_load_check(unsigned long nr_segments, 203 unsigned long flags) 204 { 205 int image_type = (flags & KEXEC_ON_CRASH) ? 206 KEXEC_TYPE_CRASH : KEXEC_TYPE_DEFAULT; 207 int result; 208 209 /* We only trust the superuser with rebooting the system. */ 210 if (!kexec_load_permitted(image_type)) 211 return -EPERM; 212 if (!ccs_capable(CCS_SYS_KEXEC_LOAD)) 213 return -EPERM; 214 215 /* Permit LSMs and IMA to fail the kexec */ 216 result = security_kernel_load_data(LOADING_KEXEC_IMAGE, false); 217 if (result < 0) 218 return result; 219 220 /* 221 * kexec can be used to circumvent module loading restrictions, so 222 * prevent loading in that case 223 */ 224 result = security_locked_down(LOCKDOWN_KEXEC); 225 if (result) 226 return result; 227 228 /* 229 * Verify we have a legal set of flags 230 * This leaves us room for future extensions. 231 */ 232 if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) 233 return -EINVAL; 234 235 /* Put an artificial cap on the number 236 * of segments passed to kexec_load. 237 */ 238 if (nr_segments > KEXEC_SEGMENT_MAX) 239 return -EINVAL; 240 241 return 0; 242 } 243 244 SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, 245 struct kexec_segment __user *, segments, unsigned long, flags) 246 { 247 struct kexec_segment *ksegments; 248 unsigned long result; 249 250 result = kexec_load_check(nr_segments, flags); 251 if (result) 252 return result; 253 254 /* Verify we are on the appropriate architecture */ 255 if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && 256 ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) 257 return -EINVAL; 258 259 ksegments = memdup_array_user(segments, nr_segments, sizeof(ksegments[0])); 260 if (IS_ERR(ksegments)) 261 return PTR_ERR(ksegments); 262 263 result = do_kexec_load(entry, nr_segments, ksegments, flags); 264 kfree(ksegments); 265 266 return result; 267 } 268 269 #ifdef CONFIG_COMPAT 270 COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, 271 compat_ulong_t, nr_segments, 272 struct compat_kexec_segment __user *, segments, 273 compat_ulong_t, flags) 274 { 275 struct compat_kexec_segment in; 276 struct kexec_segment *ksegments; 277 unsigned long i, result; 278 279 result = kexec_load_check(nr_segments, flags); 280 if (result) 281 return result; 282 283 /* Don't allow clients that don't understand the native 284 * architecture to do anything. 285 */ 286 if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) 287 return -EINVAL; 288 289 ksegments = kmalloc_array(nr_segments, sizeof(ksegments[0]), 290 GFP_KERNEL); 291 if (!ksegments) 292 return -ENOMEM; 293 294 for (i = 0; i < nr_segments; i++) { 295 result = copy_from_user(&in, &segments[i], sizeof(in)); 296 if (result) 297 goto fail; 298 299 ksegments[i].buf = compat_ptr(in.buf); 300 ksegments[i].bufsz = in.bufsz; 301 ksegments[i].mem = in.mem; 302 ksegments[i].memsz = in.memsz; 303 } 304 305 result = do_kexec_load(entry, nr_segments, ksegments, flags); 306 307 fail: 308 kfree(ksegments); 309 return result; 310 } 311 #endif 312
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.