1 // SPDX-License-Identifier: GPL-2.0-only << 2 /* 1 /* 3 * kexec.c - kexec_load system call 2 * kexec.c - kexec_load system call 4 * Copyright (C) 2002-2004 Eric Biederman <eb 3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> >> 4 * >> 5 * This source code is licensed under the GNU General Public License, >> 6 * Version 2. See the file COPYING for more details. 5 */ 7 */ 6 8 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 10 9 #include <linux/capability.h> 11 #include <linux/capability.h> 10 #include <linux/mm.h> 12 #include <linux/mm.h> 11 #include <linux/file.h> 13 #include <linux/file.h> 12 #include <linux/security.h> 14 #include <linux/security.h> 13 #include <linux/kexec.h> 15 #include <linux/kexec.h> 14 #include <linux/mutex.h> 16 #include <linux/mutex.h> 15 #include <linux/list.h> 17 #include <linux/list.h> 16 #include <linux/syscalls.h> 18 #include <linux/syscalls.h> 17 #include <linux/vmalloc.h> 19 #include <linux/vmalloc.h> 18 #include <linux/slab.h> 20 #include <linux/slab.h> 19 #include <linux/ccsecurity.h> 21 #include <linux/ccsecurity.h> 20 #include "kexec_internal.h" 22 #include "kexec_internal.h" 21 23 >> 24 static int copy_user_segment_list(struct kimage *image, >> 25 unsigned long nr_segments, >> 26 struct kexec_segment __user *segments) >> 27 { >> 28 int ret; >> 29 size_t segment_bytes; >> 30 >> 31 /* Read in the segments */ >> 32 image->nr_segments = nr_segments; >> 33 segment_bytes = nr_segments * sizeof(*segments); >> 34 ret = copy_from_user(image->segment, segments, segment_bytes); >> 35 if (ret) >> 36 ret = -EFAULT; >> 37 >> 38 return ret; >> 39 } >> 40 22 static int kimage_alloc_init(struct kimage **r 41 static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, 23 unsigned long nr_ 42 unsigned long nr_segments, 24 struct kexec_segm !! 43 struct kexec_segment __user *segments, 25 unsigned long fla 44 unsigned long flags) 26 { 45 { 27 int ret; 46 int ret; 28 struct kimage *image; 47 struct kimage *image; 29 bool kexec_on_panic = flags & KEXEC_ON 48 bool kexec_on_panic = flags & KEXEC_ON_CRASH; 30 49 31 #ifdef CONFIG_CRASH_DUMP << 32 if (kexec_on_panic) { 50 if (kexec_on_panic) { 33 /* Verify we have a valid entr 51 /* Verify we have a valid entry point */ 34 if ((entry < phys_to_boot_phys 52 if ((entry < phys_to_boot_phys(crashk_res.start)) || 35 (entry > phys_to_boot_phys 53 (entry > phys_to_boot_phys(crashk_res.end))) 36 return -EADDRNOTAVAIL; 54 return -EADDRNOTAVAIL; 37 } 55 } 38 #endif << 39 56 40 /* Allocate and initialize a controlli 57 /* Allocate and initialize a controlling structure */ 41 image = do_kimage_alloc_init(); 58 image = do_kimage_alloc_init(); 42 if (!image) 59 if (!image) 43 return -ENOMEM; 60 return -ENOMEM; 44 61 45 image->start = entry; 62 image->start = entry; 46 image->nr_segments = nr_segments; << 47 memcpy(image->segment, segments, nr_se << 48 63 49 #ifdef CONFIG_CRASH_DUMP !! 64 ret = copy_user_segment_list(image, nr_segments, segments); >> 65 if (ret) >> 66 goto out_free_image; >> 67 50 if (kexec_on_panic) { 68 if (kexec_on_panic) { 51 /* Enable special crash kernel 69 /* Enable special crash kernel control page alloc policy. */ 52 image->control_page = crashk_r 70 image->control_page = crashk_res.start; 53 image->type = KEXEC_TYPE_CRASH 71 image->type = KEXEC_TYPE_CRASH; 54 } 72 } 55 #endif << 56 73 57 ret = sanity_check_segment_list(image) 74 ret = sanity_check_segment_list(image); 58 if (ret) 75 if (ret) 59 goto out_free_image; 76 goto out_free_image; 60 77 61 /* 78 /* 62 * Find a location for the control cod 79 * Find a location for the control code buffer, and add it 63 * the vector of segments so that it's 80 * the vector of segments so that it's pages will also be 64 * counted as destination pages. 81 * counted as destination pages. 65 */ 82 */ 66 ret = -ENOMEM; 83 ret = -ENOMEM; 67 image->control_code_page = kimage_allo 84 image->control_code_page = kimage_alloc_control_pages(image, 68 get 85 get_order(KEXEC_CONTROL_PAGE_SIZE)); 69 if (!image->control_code_page) { 86 if (!image->control_code_page) { 70 pr_err("Could not allocate con 87 pr_err("Could not allocate control_code_buffer\n"); 71 goto out_free_image; 88 goto out_free_image; 72 } 89 } 73 90 74 if (!kexec_on_panic) { 91 if (!kexec_on_panic) { 75 image->swap_page = kimage_allo 92 image->swap_page = kimage_alloc_control_pages(image, 0); 76 if (!image->swap_page) { 93 if (!image->swap_page) { 77 pr_err("Could not allo 94 pr_err("Could not allocate swap buffer\n"); 78 goto out_free_control_ 95 goto out_free_control_pages; 79 } 96 } 80 } 97 } 81 98 82 *rimage = image; 99 *rimage = image; 83 return 0; 100 return 0; 84 out_free_control_pages: 101 out_free_control_pages: 85 kimage_free_page_list(&image->control_ 102 kimage_free_page_list(&image->control_pages); 86 out_free_image: 103 out_free_image: 87 kfree(image); 104 kfree(image); 88 return ret; 105 return ret; 89 } 106 } 90 107 91 static int do_kexec_load(unsigned long entry, 108 static int do_kexec_load(unsigned long entry, unsigned long nr_segments, 92 struct kexec_segment *segments !! 109 struct kexec_segment __user *segments, unsigned long flags) 93 { 110 { 94 struct kimage **dest_image, *image; 111 struct kimage **dest_image, *image; 95 unsigned long i; 112 unsigned long i; 96 int ret; 113 int ret; 97 114 98 /* << 99 * Because we write directly to the re << 100 * crash kernels we need a serializati << 101 * kernels from attempting to load sim << 102 */ << 103 if (!kexec_trylock()) << 104 return -EBUSY; << 105 << 106 #ifdef CONFIG_CRASH_DUMP << 107 if (flags & KEXEC_ON_CRASH) { 115 if (flags & KEXEC_ON_CRASH) { 108 dest_image = &kexec_crash_imag 116 dest_image = &kexec_crash_image; 109 if (kexec_crash_image) 117 if (kexec_crash_image) 110 arch_kexec_unprotect_c 118 arch_kexec_unprotect_crashkres(); 111 } else !! 119 } else { 112 #endif << 113 dest_image = &kexec_image; 120 dest_image = &kexec_image; >> 121 } 114 122 115 if (nr_segments == 0) { 123 if (nr_segments == 0) { 116 /* Uninstall image */ 124 /* Uninstall image */ 117 kimage_free(xchg(dest_image, N 125 kimage_free(xchg(dest_image, NULL)); 118 ret = 0; !! 126 return 0; 119 goto out_unlock; << 120 } 127 } 121 if (flags & KEXEC_ON_CRASH) { 128 if (flags & KEXEC_ON_CRASH) { 122 /* 129 /* 123 * Loading another kernel to s 130 * Loading another kernel to switch to if this one 124 * crashes. Free any current 131 * crashes. Free any current crash dump kernel before 125 * we corrupt it. 132 * we corrupt it. 126 */ 133 */ 127 kimage_free(xchg(&kexec_crash_ 134 kimage_free(xchg(&kexec_crash_image, NULL)); 128 } 135 } 129 136 130 ret = kimage_alloc_init(&image, entry, 137 ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags); 131 if (ret) 138 if (ret) 132 goto out_unlock; !! 139 return ret; 133 140 134 if (flags & KEXEC_PRESERVE_CONTEXT) 141 if (flags & KEXEC_PRESERVE_CONTEXT) 135 image->preserve_context = 1; 142 image->preserve_context = 1; 136 143 137 #ifdef CONFIG_CRASH_HOTPLUG << 138 if ((flags & KEXEC_ON_CRASH) && arch_c << 139 image->hotplug_support = 1; << 140 #endif << 141 << 142 ret = machine_kexec_prepare(image); 144 ret = machine_kexec_prepare(image); 143 if (ret) 145 if (ret) 144 goto out; 146 goto out; 145 147 146 /* 148 /* 147 * Some architecture(like S390) may to 149 * Some architecture(like S390) may touch the crash memory before 148 * machine_kexec_prepare(), we must co 150 * machine_kexec_prepare(), we must copy vmcoreinfo data after it. 149 */ 151 */ 150 ret = kimage_crash_copy_vmcoreinfo(ima 152 ret = kimage_crash_copy_vmcoreinfo(image); 151 if (ret) 153 if (ret) 152 goto out; 154 goto out; 153 155 154 for (i = 0; i < nr_segments; i++) { 156 for (i = 0; i < nr_segments; i++) { 155 ret = kimage_load_segment(imag 157 ret = kimage_load_segment(image, &image->segment[i]); 156 if (ret) 158 if (ret) 157 goto out; 159 goto out; 158 } 160 } 159 161 160 kimage_terminate(image); 162 kimage_terminate(image); 161 163 162 ret = machine_kexec_post_load(image); << 163 if (ret) << 164 goto out; << 165 << 166 /* Install the new kernel and uninstal 164 /* Install the new kernel and uninstall the old */ 167 image = xchg(dest_image, image); 165 image = xchg(dest_image, image); 168 166 169 out: 167 out: 170 #ifdef CONFIG_CRASH_DUMP << 171 if ((flags & KEXEC_ON_CRASH) && kexec_ 168 if ((flags & KEXEC_ON_CRASH) && kexec_crash_image) 172 arch_kexec_protect_crashkres() 169 arch_kexec_protect_crashkres(); 173 #endif << 174 170 175 kimage_free(image); 171 kimage_free(image); 176 out_unlock: << 177 kexec_unlock(); << 178 return ret; 172 return ret; 179 } 173 } 180 174 181 /* 175 /* 182 * Exec Kernel system call: for obvious reason 176 * Exec Kernel system call: for obvious reasons only root may call it. 183 * 177 * 184 * This call breaks up into three pieces. 178 * This call breaks up into three pieces. 185 * - A generic part which loads the new kernel 179 * - A generic part which loads the new kernel from the current 186 * address space, and very carefully places 180 * address space, and very carefully places the data in the 187 * allocated pages. 181 * allocated pages. 188 * 182 * 189 * - A generic part that interacts with the ke 183 * - A generic part that interacts with the kernel and tells all of 190 * the devices to shut down. Preventing on- 184 * the devices to shut down. Preventing on-going dmas, and placing 191 * the devices in a consistent state so a la 185 * the devices in a consistent state so a later kernel can 192 * reinitialize them. 186 * reinitialize them. 193 * 187 * 194 * - A machine specific part that includes the 188 * - A machine specific part that includes the syscall number 195 * and then copies the image to it's final d 189 * and then copies the image to it's final destination. And 196 * jumps into the image at entry. 190 * jumps into the image at entry. 197 * 191 * 198 * kexec does not sync, or unmount filesystems 192 * kexec does not sync, or unmount filesystems so if you need 199 * that to happen you need to do that yourself 193 * that to happen you need to do that yourself. 200 */ 194 */ 201 195 202 static inline int kexec_load_check(unsigned lo 196 static inline int kexec_load_check(unsigned long nr_segments, 203 unsigned lo 197 unsigned long flags) 204 { 198 { 205 int image_type = (flags & KEXEC_ON_CRA << 206 KEXEC_TYPE_CRASH : KE << 207 int result; 199 int result; 208 200 209 /* We only trust the superuser with re 201 /* We only trust the superuser with rebooting the system. */ 210 if (!kexec_load_permitted(image_type)) !! 202 if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) 211 return -EPERM; 203 return -EPERM; 212 if (!ccs_capable(CCS_SYS_KEXEC_LOAD)) 204 if (!ccs_capable(CCS_SYS_KEXEC_LOAD)) 213 return -EPERM; 205 return -EPERM; 214 206 215 /* Permit LSMs and IMA to fail the kex 207 /* Permit LSMs and IMA to fail the kexec */ 216 result = security_kernel_load_data(LOA !! 208 result = security_kernel_load_data(LOADING_KEXEC_IMAGE); 217 if (result < 0) 209 if (result < 0) 218 return result; 210 return result; 219 211 220 /* 212 /* 221 * kexec can be used to circumvent mod << 222 * prevent loading in that case << 223 */ << 224 result = security_locked_down(LOCKDOWN << 225 if (result) << 226 return result; << 227 << 228 /* << 229 * Verify we have a legal set of flags 213 * Verify we have a legal set of flags 230 * This leaves us room for future exte 214 * This leaves us room for future extensions. 231 */ 215 */ 232 if ((flags & KEXEC_FLAGS) != (flags & 216 if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) 233 return -EINVAL; 217 return -EINVAL; 234 218 235 /* Put an artificial cap on the number 219 /* Put an artificial cap on the number 236 * of segments passed to kexec_load. 220 * of segments passed to kexec_load. 237 */ 221 */ 238 if (nr_segments > KEXEC_SEGMENT_MAX) 222 if (nr_segments > KEXEC_SEGMENT_MAX) 239 return -EINVAL; 223 return -EINVAL; 240 224 241 return 0; 225 return 0; 242 } 226 } 243 227 244 SYSCALL_DEFINE4(kexec_load, unsigned long, ent 228 SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, 245 struct kexec_segment __user *, 229 struct kexec_segment __user *, segments, unsigned long, flags) 246 { 230 { 247 struct kexec_segment *ksegments; !! 231 int result; 248 unsigned long result; << 249 232 250 result = kexec_load_check(nr_segments, 233 result = kexec_load_check(nr_segments, flags); 251 if (result) 234 if (result) 252 return result; 235 return result; 253 236 254 /* Verify we are on the appropriate ar 237 /* Verify we are on the appropriate architecture */ 255 if (((flags & KEXEC_ARCH_MASK) != KEXE 238 if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && 256 ((flags & KEXEC_ARCH_MASK) != 239 ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) 257 return -EINVAL; 240 return -EINVAL; 258 241 259 ksegments = memdup_array_user(segments !! 242 /* Because we write directly to the reserved memory 260 if (IS_ERR(ksegments)) !! 243 * region when loading crash kernels we need a mutex here to 261 return PTR_ERR(ksegments); !! 244 * prevent multiple crash kernels from attempting to load >> 245 * simultaneously, and to prevent a crash kernel from loading >> 246 * over the top of a in use crash kernel. >> 247 * >> 248 * KISS: always take the mutex. >> 249 */ >> 250 if (!mutex_trylock(&kexec_mutex)) >> 251 return -EBUSY; >> 252 >> 253 result = do_kexec_load(entry, nr_segments, segments, flags); 262 254 263 result = do_kexec_load(entry, nr_segme !! 255 mutex_unlock(&kexec_mutex); 264 kfree(ksegments); << 265 256 266 return result; 257 return result; 267 } 258 } 268 259 269 #ifdef CONFIG_COMPAT 260 #ifdef CONFIG_COMPAT 270 COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulon 261 COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, 271 compat_ulong_t, nr_segm 262 compat_ulong_t, nr_segments, 272 struct compat_kexec_seg 263 struct compat_kexec_segment __user *, segments, 273 compat_ulong_t, flags) 264 compat_ulong_t, flags) 274 { 265 { 275 struct compat_kexec_segment in; 266 struct compat_kexec_segment in; 276 struct kexec_segment *ksegments; !! 267 struct kexec_segment out, __user *ksegments; 277 unsigned long i, result; 268 unsigned long i, result; 278 269 279 result = kexec_load_check(nr_segments, 270 result = kexec_load_check(nr_segments, flags); 280 if (result) 271 if (result) 281 return result; 272 return result; 282 273 283 /* Don't allow clients that don't unde 274 /* Don't allow clients that don't understand the native 284 * architecture to do anything. 275 * architecture to do anything. 285 */ 276 */ 286 if ((flags & KEXEC_ARCH_MASK) == KEXEC 277 if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) 287 return -EINVAL; 278 return -EINVAL; 288 279 289 ksegments = kmalloc_array(nr_segments, !! 280 ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); 290 GFP_KERNEL); << 291 if (!ksegments) << 292 return -ENOMEM; << 293 << 294 for (i = 0; i < nr_segments; i++) { 281 for (i = 0; i < nr_segments; i++) { 295 result = copy_from_user(&in, & 282 result = copy_from_user(&in, &segments[i], sizeof(in)); 296 if (result) 283 if (result) 297 goto fail; !! 284 return -EFAULT; >> 285 >> 286 out.buf = compat_ptr(in.buf); >> 287 out.bufsz = in.bufsz; >> 288 out.mem = in.mem; >> 289 out.memsz = in.memsz; 298 290 299 ksegments[i].buf = compat_pt !! 291 result = copy_to_user(&ksegments[i], &out, sizeof(out)); 300 ksegments[i].bufsz = in.bufsz; !! 292 if (result) 301 ksegments[i].mem = in.mem; !! 293 return -EFAULT; 302 ksegments[i].memsz = in.memsz; << 303 } 294 } 304 295 >> 296 /* Because we write directly to the reserved memory >> 297 * region when loading crash kernels we need a mutex here to >> 298 * prevent multiple crash kernels from attempting to load >> 299 * simultaneously, and to prevent a crash kernel from loading >> 300 * over the top of a in use crash kernel. >> 301 * >> 302 * KISS: always take the mutex. >> 303 */ >> 304 if (!mutex_trylock(&kexec_mutex)) >> 305 return -EBUSY; >> 306 305 result = do_kexec_load(entry, nr_segme 307 result = do_kexec_load(entry, nr_segments, ksegments, flags); 306 308 307 fail: !! 309 mutex_unlock(&kexec_mutex); 308 kfree(ksegments); !! 310 309 return result; 311 return result; 310 } 312 } 311 #endif 313 #endif 312 314
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.