1 // SPDX-License-Identifier: GPL-2.0-only 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 2 /* 3 * kexec.c - kexec_load system call 3 * kexec.c - kexec_load system call 4 * Copyright (C) 2002-2004 Eric Biederman <eb 4 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 5 */ 5 */ 6 6 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 8 9 #include <linux/capability.h> 9 #include <linux/capability.h> 10 #include <linux/mm.h> 10 #include <linux/mm.h> 11 #include <linux/file.h> 11 #include <linux/file.h> 12 #include <linux/security.h> 12 #include <linux/security.h> 13 #include <linux/kexec.h> 13 #include <linux/kexec.h> 14 #include <linux/mutex.h> 14 #include <linux/mutex.h> 15 #include <linux/list.h> 15 #include <linux/list.h> 16 #include <linux/syscalls.h> 16 #include <linux/syscalls.h> 17 #include <linux/vmalloc.h> 17 #include <linux/vmalloc.h> 18 #include <linux/slab.h> 18 #include <linux/slab.h> 19 #include <linux/ccsecurity.h> 19 #include <linux/ccsecurity.h> 20 #include "kexec_internal.h" 20 #include "kexec_internal.h" 21 21 22 static int kimage_alloc_init(struct kimage **r 22 static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, 23 unsigned long nr_ 23 unsigned long nr_segments, 24 struct kexec_segm 24 struct kexec_segment *segments, 25 unsigned long fla 25 unsigned long flags) 26 { 26 { 27 int ret; 27 int ret; 28 struct kimage *image; 28 struct kimage *image; 29 bool kexec_on_panic = flags & KEXEC_ON 29 bool kexec_on_panic = flags & KEXEC_ON_CRASH; 30 30 31 #ifdef CONFIG_CRASH_DUMP << 32 if (kexec_on_panic) { 31 if (kexec_on_panic) { 33 /* Verify we have a valid entr 32 /* Verify we have a valid entry point */ 34 if ((entry < phys_to_boot_phys 33 if ((entry < phys_to_boot_phys(crashk_res.start)) || 35 (entry > phys_to_boot_phys 34 (entry > phys_to_boot_phys(crashk_res.end))) 36 return -EADDRNOTAVAIL; 35 return -EADDRNOTAVAIL; 37 } 36 } 38 #endif << 39 37 40 /* Allocate and initialize a controlli 38 /* Allocate and initialize a controlling structure */ 41 image = do_kimage_alloc_init(); 39 image = do_kimage_alloc_init(); 42 if (!image) 40 if (!image) 43 return -ENOMEM; 41 return -ENOMEM; 44 42 45 image->start = entry; 43 image->start = entry; 46 image->nr_segments = nr_segments; 44 image->nr_segments = nr_segments; 47 memcpy(image->segment, segments, nr_se 45 memcpy(image->segment, segments, nr_segments * sizeof(*segments)); 48 46 49 #ifdef CONFIG_CRASH_DUMP << 50 if (kexec_on_panic) { 47 if (kexec_on_panic) { 51 /* Enable special crash kernel 48 /* Enable special crash kernel control page alloc policy. */ 52 image->control_page = crashk_r 49 image->control_page = crashk_res.start; 53 image->type = KEXEC_TYPE_CRASH 50 image->type = KEXEC_TYPE_CRASH; 54 } 51 } 55 #endif << 56 52 57 ret = sanity_check_segment_list(image) 53 ret = sanity_check_segment_list(image); 58 if (ret) 54 if (ret) 59 goto out_free_image; 55 goto out_free_image; 60 56 61 /* 57 /* 62 * Find a location for the control cod 58 * Find a location for the control code buffer, and add it 63 * the vector of segments so that it's 59 * the vector of segments so that it's pages will also be 64 * counted as destination pages. 60 * counted as destination pages. 65 */ 61 */ 66 ret = -ENOMEM; 62 ret = -ENOMEM; 67 image->control_code_page = kimage_allo 63 image->control_code_page = kimage_alloc_control_pages(image, 68 get 64 get_order(KEXEC_CONTROL_PAGE_SIZE)); 69 if (!image->control_code_page) { 65 if (!image->control_code_page) { 70 pr_err("Could not allocate con 66 pr_err("Could not allocate control_code_buffer\n"); 71 goto out_free_image; 67 goto out_free_image; 72 } 68 } 73 69 74 if (!kexec_on_panic) { 70 if (!kexec_on_panic) { 75 image->swap_page = kimage_allo 71 image->swap_page = kimage_alloc_control_pages(image, 0); 76 if (!image->swap_page) { 72 if (!image->swap_page) { 77 pr_err("Could not allo 73 pr_err("Could not allocate swap buffer\n"); 78 goto out_free_control_ 74 goto out_free_control_pages; 79 } 75 } 80 } 76 } 81 77 82 *rimage = image; 78 *rimage = image; 83 return 0; 79 return 0; 84 out_free_control_pages: 80 out_free_control_pages: 85 kimage_free_page_list(&image->control_ 81 kimage_free_page_list(&image->control_pages); 86 out_free_image: 82 out_free_image: 87 kfree(image); 83 kfree(image); 88 return ret; 84 return ret; 89 } 85 } 90 86 91 static int do_kexec_load(unsigned long entry, 87 static int do_kexec_load(unsigned long entry, unsigned long nr_segments, 92 struct kexec_segment *segments 88 struct kexec_segment *segments, unsigned long flags) 93 { 89 { 94 struct kimage **dest_image, *image; 90 struct kimage **dest_image, *image; 95 unsigned long i; 91 unsigned long i; 96 int ret; 92 int ret; 97 93 98 /* 94 /* 99 * Because we write directly to the re 95 * Because we write directly to the reserved memory region when loading 100 * crash kernels we need a serializati 96 * crash kernels we need a serialization here to prevent multiple crash 101 * kernels from attempting to load sim 97 * kernels from attempting to load simultaneously. 102 */ 98 */ 103 if (!kexec_trylock()) 99 if (!kexec_trylock()) 104 return -EBUSY; 100 return -EBUSY; 105 101 106 #ifdef CONFIG_CRASH_DUMP << 107 if (flags & KEXEC_ON_CRASH) { 102 if (flags & KEXEC_ON_CRASH) { 108 dest_image = &kexec_crash_imag 103 dest_image = &kexec_crash_image; 109 if (kexec_crash_image) 104 if (kexec_crash_image) 110 arch_kexec_unprotect_c 105 arch_kexec_unprotect_crashkres(); 111 } else !! 106 } else { 112 #endif << 113 dest_image = &kexec_image; 107 dest_image = &kexec_image; >> 108 } 114 109 115 if (nr_segments == 0) { 110 if (nr_segments == 0) { 116 /* Uninstall image */ 111 /* Uninstall image */ 117 kimage_free(xchg(dest_image, N 112 kimage_free(xchg(dest_image, NULL)); 118 ret = 0; 113 ret = 0; 119 goto out_unlock; 114 goto out_unlock; 120 } 115 } 121 if (flags & KEXEC_ON_CRASH) { 116 if (flags & KEXEC_ON_CRASH) { 122 /* 117 /* 123 * Loading another kernel to s 118 * Loading another kernel to switch to if this one 124 * crashes. Free any current 119 * crashes. Free any current crash dump kernel before 125 * we corrupt it. 120 * we corrupt it. 126 */ 121 */ 127 kimage_free(xchg(&kexec_crash_ 122 kimage_free(xchg(&kexec_crash_image, NULL)); 128 } 123 } 129 124 130 ret = kimage_alloc_init(&image, entry, 125 ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags); 131 if (ret) 126 if (ret) 132 goto out_unlock; 127 goto out_unlock; 133 128 134 if (flags & KEXEC_PRESERVE_CONTEXT) 129 if (flags & KEXEC_PRESERVE_CONTEXT) 135 image->preserve_context = 1; 130 image->preserve_context = 1; 136 131 137 #ifdef CONFIG_CRASH_HOTPLUG << 138 if ((flags & KEXEC_ON_CRASH) && arch_c << 139 image->hotplug_support = 1; << 140 #endif << 141 << 142 ret = machine_kexec_prepare(image); 132 ret = machine_kexec_prepare(image); 143 if (ret) 133 if (ret) 144 goto out; 134 goto out; 145 135 146 /* 136 /* 147 * Some architecture(like S390) may to 137 * Some architecture(like S390) may touch the crash memory before 148 * machine_kexec_prepare(), we must co 138 * machine_kexec_prepare(), we must copy vmcoreinfo data after it. 149 */ 139 */ 150 ret = kimage_crash_copy_vmcoreinfo(ima 140 ret = kimage_crash_copy_vmcoreinfo(image); 151 if (ret) 141 if (ret) 152 goto out; 142 goto out; 153 143 154 for (i = 0; i < nr_segments; i++) { 144 for (i = 0; i < nr_segments; i++) { 155 ret = kimage_load_segment(imag 145 ret = kimage_load_segment(image, &image->segment[i]); 156 if (ret) 146 if (ret) 157 goto out; 147 goto out; 158 } 148 } 159 149 160 kimage_terminate(image); 150 kimage_terminate(image); 161 151 162 ret = machine_kexec_post_load(image); 152 ret = machine_kexec_post_load(image); 163 if (ret) 153 if (ret) 164 goto out; 154 goto out; 165 155 166 /* Install the new kernel and uninstal 156 /* Install the new kernel and uninstall the old */ 167 image = xchg(dest_image, image); 157 image = xchg(dest_image, image); 168 158 169 out: 159 out: 170 #ifdef CONFIG_CRASH_DUMP << 171 if ((flags & KEXEC_ON_CRASH) && kexec_ 160 if ((flags & KEXEC_ON_CRASH) && kexec_crash_image) 172 arch_kexec_protect_crashkres() 161 arch_kexec_protect_crashkres(); 173 #endif << 174 162 175 kimage_free(image); 163 kimage_free(image); 176 out_unlock: 164 out_unlock: 177 kexec_unlock(); 165 kexec_unlock(); 178 return ret; 166 return ret; 179 } 167 } 180 168 181 /* 169 /* 182 * Exec Kernel system call: for obvious reason 170 * Exec Kernel system call: for obvious reasons only root may call it. 183 * 171 * 184 * This call breaks up into three pieces. 172 * This call breaks up into three pieces. 185 * - A generic part which loads the new kernel 173 * - A generic part which loads the new kernel from the current 186 * address space, and very carefully places 174 * address space, and very carefully places the data in the 187 * allocated pages. 175 * allocated pages. 188 * 176 * 189 * - A generic part that interacts with the ke 177 * - A generic part that interacts with the kernel and tells all of 190 * the devices to shut down. Preventing on- 178 * the devices to shut down. Preventing on-going dmas, and placing 191 * the devices in a consistent state so a la 179 * the devices in a consistent state so a later kernel can 192 * reinitialize them. 180 * reinitialize them. 193 * 181 * 194 * - A machine specific part that includes the 182 * - A machine specific part that includes the syscall number 195 * and then copies the image to it's final d 183 * and then copies the image to it's final destination. And 196 * jumps into the image at entry. 184 * jumps into the image at entry. 197 * 185 * 198 * kexec does not sync, or unmount filesystems 186 * kexec does not sync, or unmount filesystems so if you need 199 * that to happen you need to do that yourself 187 * that to happen you need to do that yourself. 200 */ 188 */ 201 189 202 static inline int kexec_load_check(unsigned lo 190 static inline int kexec_load_check(unsigned long nr_segments, 203 unsigned lo 191 unsigned long flags) 204 { 192 { 205 int image_type = (flags & KEXEC_ON_CRA 193 int image_type = (flags & KEXEC_ON_CRASH) ? 206 KEXEC_TYPE_CRASH : KE 194 KEXEC_TYPE_CRASH : KEXEC_TYPE_DEFAULT; 207 int result; 195 int result; 208 196 209 /* We only trust the superuser with re 197 /* We only trust the superuser with rebooting the system. */ 210 if (!kexec_load_permitted(image_type)) 198 if (!kexec_load_permitted(image_type)) 211 return -EPERM; 199 return -EPERM; 212 if (!ccs_capable(CCS_SYS_KEXEC_LOAD)) 200 if (!ccs_capable(CCS_SYS_KEXEC_LOAD)) 213 return -EPERM; 201 return -EPERM; 214 202 215 /* Permit LSMs and IMA to fail the kex 203 /* Permit LSMs and IMA to fail the kexec */ 216 result = security_kernel_load_data(LOA 204 result = security_kernel_load_data(LOADING_KEXEC_IMAGE, false); 217 if (result < 0) 205 if (result < 0) 218 return result; 206 return result; 219 207 220 /* 208 /* 221 * kexec can be used to circumvent mod 209 * kexec can be used to circumvent module loading restrictions, so 222 * prevent loading in that case 210 * prevent loading in that case 223 */ 211 */ 224 result = security_locked_down(LOCKDOWN 212 result = security_locked_down(LOCKDOWN_KEXEC); 225 if (result) 213 if (result) 226 return result; 214 return result; 227 215 228 /* 216 /* 229 * Verify we have a legal set of flags 217 * Verify we have a legal set of flags 230 * This leaves us room for future exte 218 * This leaves us room for future extensions. 231 */ 219 */ 232 if ((flags & KEXEC_FLAGS) != (flags & 220 if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) 233 return -EINVAL; 221 return -EINVAL; 234 222 235 /* Put an artificial cap on the number 223 /* Put an artificial cap on the number 236 * of segments passed to kexec_load. 224 * of segments passed to kexec_load. 237 */ 225 */ 238 if (nr_segments > KEXEC_SEGMENT_MAX) 226 if (nr_segments > KEXEC_SEGMENT_MAX) 239 return -EINVAL; 227 return -EINVAL; 240 228 241 return 0; 229 return 0; 242 } 230 } 243 231 244 SYSCALL_DEFINE4(kexec_load, unsigned long, ent 232 SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, 245 struct kexec_segment __user *, 233 struct kexec_segment __user *, segments, unsigned long, flags) 246 { 234 { 247 struct kexec_segment *ksegments; 235 struct kexec_segment *ksegments; 248 unsigned long result; 236 unsigned long result; 249 237 250 result = kexec_load_check(nr_segments, 238 result = kexec_load_check(nr_segments, flags); 251 if (result) 239 if (result) 252 return result; 240 return result; 253 241 254 /* Verify we are on the appropriate ar 242 /* Verify we are on the appropriate architecture */ 255 if (((flags & KEXEC_ARCH_MASK) != KEXE 243 if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && 256 ((flags & KEXEC_ARCH_MASK) != 244 ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) 257 return -EINVAL; 245 return -EINVAL; 258 246 259 ksegments = memdup_array_user(segments !! 247 ksegments = memdup_user(segments, nr_segments * sizeof(ksegments[0])); 260 if (IS_ERR(ksegments)) 248 if (IS_ERR(ksegments)) 261 return PTR_ERR(ksegments); 249 return PTR_ERR(ksegments); 262 250 263 result = do_kexec_load(entry, nr_segme 251 result = do_kexec_load(entry, nr_segments, ksegments, flags); 264 kfree(ksegments); 252 kfree(ksegments); 265 253 266 return result; 254 return result; 267 } 255 } 268 256 269 #ifdef CONFIG_COMPAT 257 #ifdef CONFIG_COMPAT 270 COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulon 258 COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, 271 compat_ulong_t, nr_segm 259 compat_ulong_t, nr_segments, 272 struct compat_kexec_seg 260 struct compat_kexec_segment __user *, segments, 273 compat_ulong_t, flags) 261 compat_ulong_t, flags) 274 { 262 { 275 struct compat_kexec_segment in; 263 struct compat_kexec_segment in; 276 struct kexec_segment *ksegments; 264 struct kexec_segment *ksegments; 277 unsigned long i, result; 265 unsigned long i, result; 278 266 279 result = kexec_load_check(nr_segments, 267 result = kexec_load_check(nr_segments, flags); 280 if (result) 268 if (result) 281 return result; 269 return result; 282 270 283 /* Don't allow clients that don't unde 271 /* Don't allow clients that don't understand the native 284 * architecture to do anything. 272 * architecture to do anything. 285 */ 273 */ 286 if ((flags & KEXEC_ARCH_MASK) == KEXEC 274 if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) 287 return -EINVAL; 275 return -EINVAL; 288 276 289 ksegments = kmalloc_array(nr_segments, 277 ksegments = kmalloc_array(nr_segments, sizeof(ksegments[0]), 290 GFP_KERNEL); 278 GFP_KERNEL); 291 if (!ksegments) 279 if (!ksegments) 292 return -ENOMEM; 280 return -ENOMEM; 293 281 294 for (i = 0; i < nr_segments; i++) { 282 for (i = 0; i < nr_segments; i++) { 295 result = copy_from_user(&in, & 283 result = copy_from_user(&in, &segments[i], sizeof(in)); 296 if (result) 284 if (result) 297 goto fail; 285 goto fail; 298 286 299 ksegments[i].buf = compat_pt 287 ksegments[i].buf = compat_ptr(in.buf); 300 ksegments[i].bufsz = in.bufsz; 288 ksegments[i].bufsz = in.bufsz; 301 ksegments[i].mem = in.mem; 289 ksegments[i].mem = in.mem; 302 ksegments[i].memsz = in.memsz; 290 ksegments[i].memsz = in.memsz; 303 } 291 } 304 292 305 result = do_kexec_load(entry, nr_segme 293 result = do_kexec_load(entry, nr_segments, ksegments, flags); 306 294 307 fail: 295 fail: 308 kfree(ksegments); 296 kfree(ksegments); 309 return result; 297 return result; 310 } 298 } 311 #endif 299 #endif 312 300
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.