~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/kexec_core.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /kernel/kexec_core.c (Version linux-6.11.5) and /kernel/kexec_core.c (Version linux-6.5.13)


  1 // SPDX-License-Identifier: GPL-2.0-only            1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*                                                  2 /*
  3  * kexec.c - kexec system call core code.           3  * kexec.c - kexec system call core code.
  4  * Copyright (C) 2002-2004 Eric Biederman  <eb      4  * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
  5  */                                                 5  */
  6                                                     6 
  7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt         7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  8                                                     8 
  9 #include <linux/btf.h>                              9 #include <linux/btf.h>
 10 #include <linux/capability.h>                      10 #include <linux/capability.h>
 11 #include <linux/mm.h>                              11 #include <linux/mm.h>
 12 #include <linux/file.h>                            12 #include <linux/file.h>
 13 #include <linux/slab.h>                            13 #include <linux/slab.h>
 14 #include <linux/fs.h>                              14 #include <linux/fs.h>
 15 #include <linux/kexec.h>                           15 #include <linux/kexec.h>
 16 #include <linux/mutex.h>                           16 #include <linux/mutex.h>
 17 #include <linux/list.h>                            17 #include <linux/list.h>
 18 #include <linux/highmem.h>                         18 #include <linux/highmem.h>
 19 #include <linux/syscalls.h>                        19 #include <linux/syscalls.h>
 20 #include <linux/reboot.h>                          20 #include <linux/reboot.h>
 21 #include <linux/ioport.h>                          21 #include <linux/ioport.h>
 22 #include <linux/hardirq.h>                         22 #include <linux/hardirq.h>
 23 #include <linux/elf.h>                             23 #include <linux/elf.h>
 24 #include <linux/elfcore.h>                         24 #include <linux/elfcore.h>
 25 #include <linux/utsname.h>                         25 #include <linux/utsname.h>
 26 #include <linux/numa.h>                            26 #include <linux/numa.h>
 27 #include <linux/suspend.h>                         27 #include <linux/suspend.h>
 28 #include <linux/device.h>                          28 #include <linux/device.h>
 29 #include <linux/freezer.h>                         29 #include <linux/freezer.h>
 30 #include <linux/panic_notifier.h>                  30 #include <linux/panic_notifier.h>
 31 #include <linux/pm.h>                              31 #include <linux/pm.h>
 32 #include <linux/cpu.h>                             32 #include <linux/cpu.h>
 33 #include <linux/uaccess.h>                         33 #include <linux/uaccess.h>
 34 #include <linux/io.h>                              34 #include <linux/io.h>
 35 #include <linux/console.h>                         35 #include <linux/console.h>
 36 #include <linux/vmalloc.h>                         36 #include <linux/vmalloc.h>
 37 #include <linux/swap.h>                            37 #include <linux/swap.h>
 38 #include <linux/syscore_ops.h>                     38 #include <linux/syscore_ops.h>
 39 #include <linux/compiler.h>                        39 #include <linux/compiler.h>
 40 #include <linux/hugetlb.h>                         40 #include <linux/hugetlb.h>
 41 #include <linux/objtool.h>                         41 #include <linux/objtool.h>
 42 #include <linux/kmsg_dump.h>                       42 #include <linux/kmsg_dump.h>
 43                                                    43 
 44 #include <asm/page.h>                              44 #include <asm/page.h>
 45 #include <asm/sections.h>                          45 #include <asm/sections.h>
 46                                                    46 
 47 #include <crypto/hash.h>                           47 #include <crypto/hash.h>
 48 #include "kexec_internal.h"                        48 #include "kexec_internal.h"
 49                                                    49 
 50 atomic_t __kexec_lock = ATOMIC_INIT(0);            50 atomic_t __kexec_lock = ATOMIC_INIT(0);
 51                                                    51 
                                                   >>  52 /* Per cpu memory for storing cpu states in case of system crash. */
                                                   >>  53 note_buf_t __percpu *crash_notes;
                                                   >>  54 
 52 /* Flag to indicate we are going to kexec a ne     55 /* Flag to indicate we are going to kexec a new kernel */
 53 bool kexec_in_progress = false;                    56 bool kexec_in_progress = false;
 54                                                    57 
 55 bool kexec_file_dbg_print;                     !!  58 
                                                   >>  59 /* Location of the reserved area for the crash kernel */
                                                   >>  60 struct resource crashk_res = {
                                                   >>  61         .name  = "Crash kernel",
                                                   >>  62         .start = 0,
                                                   >>  63         .end   = 0,
                                                   >>  64         .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
                                                   >>  65         .desc  = IORES_DESC_CRASH_KERNEL
                                                   >>  66 };
                                                   >>  67 struct resource crashk_low_res = {
                                                   >>  68         .name  = "Crash kernel",
                                                   >>  69         .start = 0,
                                                   >>  70         .end   = 0,
                                                   >>  71         .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
                                                   >>  72         .desc  = IORES_DESC_CRASH_KERNEL
                                                   >>  73 };
                                                   >>  74 
                                                   >>  75 int kexec_should_crash(struct task_struct *p)
                                                   >>  76 {
                                                   >>  77         /*
                                                   >>  78          * If crash_kexec_post_notifiers is enabled, don't run
                                                   >>  79          * crash_kexec() here yet, which must be run after panic
                                                   >>  80          * notifiers in panic().
                                                   >>  81          */
                                                   >>  82         if (crash_kexec_post_notifiers)
                                                   >>  83                 return 0;
                                                   >>  84         /*
                                                   >>  85          * There are 4 panic() calls in make_task_dead() path, each of which
                                                   >>  86          * corresponds to each of these 4 conditions.
                                                   >>  87          */
                                                   >>  88         if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
                                                   >>  89                 return 1;
                                                   >>  90         return 0;
                                                   >>  91 }
                                                   >>  92 
                                                   >>  93 int kexec_crash_loaded(void)
                                                   >>  94 {
                                                   >>  95         return !!kexec_crash_image;
                                                   >>  96 }
                                                   >>  97 EXPORT_SYMBOL_GPL(kexec_crash_loaded);
 56                                                    98 
 57 /*                                                 99 /*
 58  * When kexec transitions to the new kernel th    100  * When kexec transitions to the new kernel there is a one-to-one
 59  * mapping between physical and virtual addres    101  * mapping between physical and virtual addresses.  On processors
 60  * where you can disable the MMU this is trivi    102  * where you can disable the MMU this is trivial, and easy.  For
 61  * others it is still a simple predictable pag    103  * others it is still a simple predictable page table to setup.
 62  *                                                104  *
 63  * In that environment kexec copies the new ke    105  * In that environment kexec copies the new kernel to its final
 64  * resting place.  This means I can only suppo    106  * resting place.  This means I can only support memory whose
 65  * physical address can fit in an unsigned lon    107  * physical address can fit in an unsigned long.  In particular
 66  * addresses where (pfn << PAGE_SHIFT) > ULONG    108  * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
 67  * If the assembly stub has more restrictive r    109  * If the assembly stub has more restrictive requirements
 68  * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_ME    110  * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
 69  * defined more restrictively in <asm/kexec.h>    111  * defined more restrictively in <asm/kexec.h>.
 70  *                                                112  *
 71  * The code for the transition from the curren    113  * The code for the transition from the current kernel to the
 72  * new kernel is placed in the control_code_bu    114  * new kernel is placed in the control_code_buffer, whose size
 73  * is given by KEXEC_CONTROL_PAGE_SIZE.  In th    115  * is given by KEXEC_CONTROL_PAGE_SIZE.  In the best case only a single
 74  * page of memory is necessary, but some archi    116  * page of memory is necessary, but some architectures require more.
 75  * Because this memory must be identity mapped    117  * Because this memory must be identity mapped in the transition from
 76  * virtual to physical addresses it must live     118  * virtual to physical addresses it must live in the range
 77  * 0 - TASK_SIZE, as only the user space mappi    119  * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
 78  * modifiable.                                    120  * modifiable.
 79  *                                                121  *
 80  * The assembly stub in the control code buffe    122  * The assembly stub in the control code buffer is passed a linked list
 81  * of descriptor pages detailing the source pa    123  * of descriptor pages detailing the source pages of the new kernel,
 82  * and the destination addresses of those sour    124  * and the destination addresses of those source pages.  As this data
 83  * structure is not used in the context of the    125  * structure is not used in the context of the current OS, it must
 84  * be self-contained.                             126  * be self-contained.
 85  *                                                127  *
 86  * The code has been made to work with highmem    128  * The code has been made to work with highmem pages and will use a
 87  * destination page in its final resting place    129  * destination page in its final resting place (if it happens
 88  * to allocate it).  The end product of this i    130  * to allocate it).  The end product of this is that most of the
 89  * physical address space, and most of RAM can    131  * physical address space, and most of RAM can be used.
 90  *                                                132  *
 91  * Future directions include:                     133  * Future directions include:
 92  *  - allocating a page table with the control    134  *  - allocating a page table with the control code buffer identity
 93  *    mapped, to simplify machine_kexec and ma    135  *    mapped, to simplify machine_kexec and make kexec_on_panic more
 94  *    reliable.                                   136  *    reliable.
 95  */                                               137  */
 96                                                   138 
 97 /*                                                139 /*
 98  * KIMAGE_NO_DEST is an impossible destination    140  * KIMAGE_NO_DEST is an impossible destination address..., for
 99  * allocating pages whose destination address     141  * allocating pages whose destination address we do not care about.
100  */                                               142  */
101 #define KIMAGE_NO_DEST (-1UL)                     143 #define KIMAGE_NO_DEST (-1UL)
102 #define PAGE_COUNT(x) (((x) + PAGE_SIZE - 1) >    144 #define PAGE_COUNT(x) (((x) + PAGE_SIZE - 1) >> PAGE_SHIFT)
103                                                   145 
104 static struct page *kimage_alloc_page(struct k    146 static struct page *kimage_alloc_page(struct kimage *image,
105                                        gfp_t g    147                                        gfp_t gfp_mask,
106                                        unsigne    148                                        unsigned long dest);
107                                                   149 
108 int sanity_check_segment_list(struct kimage *i    150 int sanity_check_segment_list(struct kimage *image)
109 {                                                 151 {
110         int i;                                    152         int i;
111         unsigned long nr_segments = image->nr_    153         unsigned long nr_segments = image->nr_segments;
112         unsigned long total_pages = 0;            154         unsigned long total_pages = 0;
113         unsigned long nr_pages = totalram_page    155         unsigned long nr_pages = totalram_pages();
114                                                   156 
115         /*                                        157         /*
116          * Verify we have good destination add    158          * Verify we have good destination addresses.  The caller is
117          * responsible for making certain we d    159          * responsible for making certain we don't attempt to load
118          * the new image into invalid or reser    160          * the new image into invalid or reserved areas of RAM.  This
119          * just verifies it is an address we c    161          * just verifies it is an address we can use.
120          *                                        162          *
121          * Since the kernel does everything in    163          * Since the kernel does everything in page size chunks ensure
122          * the destination addresses are page     164          * the destination addresses are page aligned.  Too many
123          * special cases crop of when we don't    165          * special cases crop of when we don't do this.  The most
124          * insidious is getting overlapping de    166          * insidious is getting overlapping destination addresses
125          * simply because addresses are change    167          * simply because addresses are changed to page size
126          * granularity.                           168          * granularity.
127          */                                       169          */
128         for (i = 0; i < nr_segments; i++) {       170         for (i = 0; i < nr_segments; i++) {
129                 unsigned long mstart, mend;       171                 unsigned long mstart, mend;
130                                                   172 
131                 mstart = image->segment[i].mem    173                 mstart = image->segment[i].mem;
132                 mend   = mstart + image->segme    174                 mend   = mstart + image->segment[i].memsz;
133                 if (mstart > mend)                175                 if (mstart > mend)
134                         return -EADDRNOTAVAIL;    176                         return -EADDRNOTAVAIL;
135                 if ((mstart & ~PAGE_MASK) || (    177                 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
136                         return -EADDRNOTAVAIL;    178                         return -EADDRNOTAVAIL;
137                 if (mend >= KEXEC_DESTINATION_    179                 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
138                         return -EADDRNOTAVAIL;    180                         return -EADDRNOTAVAIL;
139         }                                         181         }
140                                                   182 
141         /* Verify our destination addresses do    183         /* Verify our destination addresses do not overlap.
142          * If we alloed overlapping destinatio    184          * If we alloed overlapping destination addresses
143          * through very weird things can happe    185          * through very weird things can happen with no
144          * easy explanation as one segment sto    186          * easy explanation as one segment stops on another.
145          */                                       187          */
146         for (i = 0; i < nr_segments; i++) {       188         for (i = 0; i < nr_segments; i++) {
147                 unsigned long mstart, mend;       189                 unsigned long mstart, mend;
148                 unsigned long j;                  190                 unsigned long j;
149                                                   191 
150                 mstart = image->segment[i].mem    192                 mstart = image->segment[i].mem;
151                 mend   = mstart + image->segme    193                 mend   = mstart + image->segment[i].memsz;
152                 for (j = 0; j < i; j++) {         194                 for (j = 0; j < i; j++) {
153                         unsigned long pstart,     195                         unsigned long pstart, pend;
154                                                   196 
155                         pstart = image->segmen    197                         pstart = image->segment[j].mem;
156                         pend   = pstart + imag    198                         pend   = pstart + image->segment[j].memsz;
157                         /* Do the segments ove    199                         /* Do the segments overlap ? */
158                         if ((mend > pstart) &&    200                         if ((mend > pstart) && (mstart < pend))
159                                 return -EINVAL    201                                 return -EINVAL;
160                 }                                 202                 }
161         }                                         203         }
162                                                   204 
163         /* Ensure our buffer sizes are strictl    205         /* Ensure our buffer sizes are strictly less than
164          * our memory sizes.  This should alwa    206          * our memory sizes.  This should always be the case,
165          * and it is easier to check up front     207          * and it is easier to check up front than to be surprised
166          * later on.                              208          * later on.
167          */                                       209          */
168         for (i = 0; i < nr_segments; i++) {       210         for (i = 0; i < nr_segments; i++) {
169                 if (image->segment[i].bufsz >     211                 if (image->segment[i].bufsz > image->segment[i].memsz)
170                         return -EINVAL;           212                         return -EINVAL;
171         }                                         213         }
172                                                   214 
173         /*                                        215         /*
174          * Verify that no more than half of me    216          * Verify that no more than half of memory will be consumed. If the
175          * request from userspace is too large    217          * request from userspace is too large, a large amount of time will be
176          * wasted allocating pages, which can     218          * wasted allocating pages, which can cause a soft lockup.
177          */                                       219          */
178         for (i = 0; i < nr_segments; i++) {       220         for (i = 0; i < nr_segments; i++) {
179                 if (PAGE_COUNT(image->segment[    221                 if (PAGE_COUNT(image->segment[i].memsz) > nr_pages / 2)
180                         return -EINVAL;           222                         return -EINVAL;
181                                                   223 
182                 total_pages += PAGE_COUNT(imag    224                 total_pages += PAGE_COUNT(image->segment[i].memsz);
183         }                                         225         }
184                                                   226 
185         if (total_pages > nr_pages / 2)           227         if (total_pages > nr_pages / 2)
186                 return -EINVAL;                   228                 return -EINVAL;
187                                                   229 
188 #ifdef CONFIG_CRASH_DUMP                       << 
189         /*                                        230         /*
190          * Verify we have good destination add    231          * Verify we have good destination addresses.  Normally
191          * the caller is responsible for makin    232          * the caller is responsible for making certain we don't
192          * attempt to load the new image into     233          * attempt to load the new image into invalid or reserved
193          * areas of RAM.  But crash kernels ar    234          * areas of RAM.  But crash kernels are preloaded into a
194          * reserved area of ram.  We must ensu    235          * reserved area of ram.  We must ensure the addresses
195          * are in the reserved area otherwise     236          * are in the reserved area otherwise preloading the
196          * kernel could corrupt things.           237          * kernel could corrupt things.
197          */                                       238          */
198                                                   239 
199         if (image->type == KEXEC_TYPE_CRASH) {    240         if (image->type == KEXEC_TYPE_CRASH) {
200                 for (i = 0; i < nr_segments; i    241                 for (i = 0; i < nr_segments; i++) {
201                         unsigned long mstart,     242                         unsigned long mstart, mend;
202                                                   243 
203                         mstart = image->segmen    244                         mstart = image->segment[i].mem;
204                         mend = mstart + image-    245                         mend = mstart + image->segment[i].memsz - 1;
205                         /* Ensure we are withi    246                         /* Ensure we are within the crash kernel limits */
206                         if ((mstart < phys_to_    247                         if ((mstart < phys_to_boot_phys(crashk_res.start)) ||
207                             (mend > phys_to_bo    248                             (mend > phys_to_boot_phys(crashk_res.end)))
208                                 return -EADDRN    249                                 return -EADDRNOTAVAIL;
209                 }                                 250                 }
210         }                                         251         }
211 #endif                                         << 
212                                                   252 
213         return 0;                                 253         return 0;
214 }                                                 254 }
215                                                   255 
216 struct kimage *do_kimage_alloc_init(void)         256 struct kimage *do_kimage_alloc_init(void)
217 {                                                 257 {
218         struct kimage *image;                     258         struct kimage *image;
219                                                   259 
220         /* Allocate a controlling structure */    260         /* Allocate a controlling structure */
221         image = kzalloc(sizeof(*image), GFP_KE    261         image = kzalloc(sizeof(*image), GFP_KERNEL);
222         if (!image)                               262         if (!image)
223                 return NULL;                      263                 return NULL;
224                                                   264 
225         image->head = 0;                          265         image->head = 0;
226         image->entry = &image->head;              266         image->entry = &image->head;
227         image->last_entry = &image->head;         267         image->last_entry = &image->head;
228         image->control_page = ~0; /* By defaul    268         image->control_page = ~0; /* By default this does not apply */
229         image->type = KEXEC_TYPE_DEFAULT;         269         image->type = KEXEC_TYPE_DEFAULT;
230                                                   270 
231         /* Initialize the list of control page    271         /* Initialize the list of control pages */
232         INIT_LIST_HEAD(&image->control_pages);    272         INIT_LIST_HEAD(&image->control_pages);
233                                                   273 
234         /* Initialize the list of destination     274         /* Initialize the list of destination pages */
235         INIT_LIST_HEAD(&image->dest_pages);       275         INIT_LIST_HEAD(&image->dest_pages);
236                                                   276 
237         /* Initialize the list of unusable pag    277         /* Initialize the list of unusable pages */
238         INIT_LIST_HEAD(&image->unusable_pages)    278         INIT_LIST_HEAD(&image->unusable_pages);
239                                                   279 
240 #ifdef CONFIG_CRASH_HOTPLUG                    << 
241         image->hp_action = KEXEC_CRASH_HP_NONE << 
242         image->elfcorehdr_index = -1;          << 
243         image->elfcorehdr_updated = false;     << 
244 #endif                                         << 
245                                                << 
246         return image;                             280         return image;
247 }                                                 281 }
248                                                   282 
249 int kimage_is_destination_range(struct kimage     283 int kimage_is_destination_range(struct kimage *image,
250                                         unsign    284                                         unsigned long start,
251                                         unsign    285                                         unsigned long end)
252 {                                                 286 {
253         unsigned long i;                          287         unsigned long i;
254                                                   288 
255         for (i = 0; i < image->nr_segments; i+    289         for (i = 0; i < image->nr_segments; i++) {
256                 unsigned long mstart, mend;       290                 unsigned long mstart, mend;
257                                                   291 
258                 mstart = image->segment[i].mem    292                 mstart = image->segment[i].mem;
259                 mend = mstart + image->segment !! 293                 mend = mstart + image->segment[i].memsz;
260                 if ((end >= mstart) && (start  !! 294                 if ((end > mstart) && (start < mend))
261                         return 1;                 295                         return 1;
262         }                                         296         }
263                                                   297 
264         return 0;                                 298         return 0;
265 }                                                 299 }
266                                                   300 
267 static struct page *kimage_alloc_pages(gfp_t g    301 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
268 {                                                 302 {
269         struct page *pages;                       303         struct page *pages;
270                                                   304 
271         if (fatal_signal_pending(current))        305         if (fatal_signal_pending(current))
272                 return NULL;                      306                 return NULL;
273         pages = alloc_pages(gfp_mask & ~__GFP_    307         pages = alloc_pages(gfp_mask & ~__GFP_ZERO, order);
274         if (pages) {                              308         if (pages) {
275                 unsigned int count, i;            309                 unsigned int count, i;
276                                                   310 
277                 pages->mapping = NULL;            311                 pages->mapping = NULL;
278                 set_page_private(pages, order)    312                 set_page_private(pages, order);
279                 count = 1 << order;               313                 count = 1 << order;
280                 for (i = 0; i < count; i++)       314                 for (i = 0; i < count; i++)
281                         SetPageReserved(pages     315                         SetPageReserved(pages + i);
282                                                   316 
283                 arch_kexec_post_alloc_pages(pa    317                 arch_kexec_post_alloc_pages(page_address(pages), count,
284                                             gf    318                                             gfp_mask);
285                                                   319 
286                 if (gfp_mask & __GFP_ZERO)        320                 if (gfp_mask & __GFP_ZERO)
287                         for (i = 0; i < count;    321                         for (i = 0; i < count; i++)
288                                 clear_highpage    322                                 clear_highpage(pages + i);
289         }                                         323         }
290                                                   324 
291         return pages;                             325         return pages;
292 }                                                 326 }
293                                                   327 
294 static void kimage_free_pages(struct page *pag    328 static void kimage_free_pages(struct page *page)
295 {                                                 329 {
296         unsigned int order, count, i;             330         unsigned int order, count, i;
297                                                   331 
298         order = page_private(page);               332         order = page_private(page);
299         count = 1 << order;                       333         count = 1 << order;
300                                                   334 
301         arch_kexec_pre_free_pages(page_address    335         arch_kexec_pre_free_pages(page_address(page), count);
302                                                   336 
303         for (i = 0; i < count; i++)               337         for (i = 0; i < count; i++)
304                 ClearPageReserved(page + i);      338                 ClearPageReserved(page + i);
305         __free_pages(page, order);                339         __free_pages(page, order);
306 }                                                 340 }
307                                                   341 
308 void kimage_free_page_list(struct list_head *l    342 void kimage_free_page_list(struct list_head *list)
309 {                                                 343 {
310         struct page *page, *next;                 344         struct page *page, *next;
311                                                   345 
312         list_for_each_entry_safe(page, next, l    346         list_for_each_entry_safe(page, next, list, lru) {
313                 list_del(&page->lru);             347                 list_del(&page->lru);
314                 kimage_free_pages(page);          348                 kimage_free_pages(page);
315         }                                         349         }
316 }                                                 350 }
317                                                   351 
318 static struct page *kimage_alloc_normal_contro    352 static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
319                                                   353                                                         unsigned int order)
320 {                                                 354 {
321         /* Control pages are special, they are    355         /* Control pages are special, they are the intermediaries
322          * that are needed while we copy the r    356          * that are needed while we copy the rest of the pages
323          * to their final resting place.  As s    357          * to their final resting place.  As such they must
324          * not conflict with either the destin    358          * not conflict with either the destination addresses
325          * or memory the kernel is already usi    359          * or memory the kernel is already using.
326          *                                        360          *
327          * The only case where we really need     361          * The only case where we really need more than one of
328          * these are for architectures where w    362          * these are for architectures where we cannot disable
329          * the MMU and must instead generate a    363          * the MMU and must instead generate an identity mapped
330          * page table for all of the memory.      364          * page table for all of the memory.
331          *                                        365          *
332          * At worst this runs in O(N) of the i    366          * At worst this runs in O(N) of the image size.
333          */                                       367          */
334         struct list_head extra_pages;             368         struct list_head extra_pages;
335         struct page *pages;                       369         struct page *pages;
336         unsigned int count;                       370         unsigned int count;
337                                                   371 
338         count = 1 << order;                       372         count = 1 << order;
339         INIT_LIST_HEAD(&extra_pages);             373         INIT_LIST_HEAD(&extra_pages);
340                                                   374 
341         /* Loop while I can allocate a page an    375         /* Loop while I can allocate a page and the page allocated
342          * is a destination page.                 376          * is a destination page.
343          */                                       377          */
344         do {                                      378         do {
345                 unsigned long pfn, epfn, addr,    379                 unsigned long pfn, epfn, addr, eaddr;
346                                                   380 
347                 pages = kimage_alloc_pages(KEX    381                 pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order);
348                 if (!pages)                       382                 if (!pages)
349                         break;                    383                         break;
350                 pfn   = page_to_boot_pfn(pages    384                 pfn   = page_to_boot_pfn(pages);
351                 epfn  = pfn + count;              385                 epfn  = pfn + count;
352                 addr  = pfn << PAGE_SHIFT;        386                 addr  = pfn << PAGE_SHIFT;
353                 eaddr = (epfn << PAGE_SHIFT) - !! 387                 eaddr = epfn << PAGE_SHIFT;
354                 if ((epfn >= (KEXEC_CONTROL_ME    388                 if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
355                               kimage_is_destin    389                               kimage_is_destination_range(image, addr, eaddr)) {
356                         list_add(&pages->lru,     390                         list_add(&pages->lru, &extra_pages);
357                         pages = NULL;             391                         pages = NULL;
358                 }                                 392                 }
359         } while (!pages);                         393         } while (!pages);
360                                                   394 
361         if (pages) {                              395         if (pages) {
362                 /* Remember the allocated page    396                 /* Remember the allocated page... */
363                 list_add(&pages->lru, &image->    397                 list_add(&pages->lru, &image->control_pages);
364                                                   398 
365                 /* Because the page is already    399                 /* Because the page is already in it's destination
366                  * location we will never allo    400                  * location we will never allocate another page at
367                  * that address.  Therefore ki    401                  * that address.  Therefore kimage_alloc_pages
368                  * will not return it (again)     402                  * will not return it (again) and we don't need
369                  * to give it an entry in imag    403                  * to give it an entry in image->segment[].
370                  */                               404                  */
371         }                                         405         }
372         /* Deal with the destination pages I h    406         /* Deal with the destination pages I have inadvertently allocated.
373          *                                        407          *
374          * Ideally I would convert multi-page     408          * Ideally I would convert multi-page allocations into single
375          * page allocations, and add everythin    409          * page allocations, and add everything to image->dest_pages.
376          *                                        410          *
377          * For now it is simpler to just free     411          * For now it is simpler to just free the pages.
378          */                                       412          */
379         kimage_free_page_list(&extra_pages);      413         kimage_free_page_list(&extra_pages);
380                                                   414 
381         return pages;                             415         return pages;
382 }                                                 416 }
383                                                   417 
384 #ifdef CONFIG_CRASH_DUMP                       << 
385 static struct page *kimage_alloc_crash_control    418 static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
386                                                   419                                                       unsigned int order)
387 {                                                 420 {
388         /* Control pages are special, they are    421         /* Control pages are special, they are the intermediaries
389          * that are needed while we copy the r    422          * that are needed while we copy the rest of the pages
390          * to their final resting place.  As s    423          * to their final resting place.  As such they must
391          * not conflict with either the destin    424          * not conflict with either the destination addresses
392          * or memory the kernel is already usi    425          * or memory the kernel is already using.
393          *                                        426          *
394          * Control pages are also the only pag    427          * Control pages are also the only pags we must allocate
395          * when loading a crash kernel.  All o    428          * when loading a crash kernel.  All of the other pages
396          * are specified by the segments and w    429          * are specified by the segments and we just memcpy
397          * into them directly.                    430          * into them directly.
398          *                                        431          *
399          * The only case where we really need     432          * The only case where we really need more than one of
400          * these are for architectures where w    433          * these are for architectures where we cannot disable
401          * the MMU and must instead generate a    434          * the MMU and must instead generate an identity mapped
402          * page table for all of the memory.      435          * page table for all of the memory.
403          *                                        436          *
404          * Given the low demand this implement    437          * Given the low demand this implements a very simple
405          * allocator that finds the first hole    438          * allocator that finds the first hole of the appropriate
406          * size in the reserved memory region,    439          * size in the reserved memory region, and allocates all
407          * of the memory up to and including t    440          * of the memory up to and including the hole.
408          */                                       441          */
409         unsigned long hole_start, hole_end, si    442         unsigned long hole_start, hole_end, size;
410         struct page *pages;                       443         struct page *pages;
411                                                   444 
412         pages = NULL;                             445         pages = NULL;
413         size = (1 << order) << PAGE_SHIFT;        446         size = (1 << order) << PAGE_SHIFT;
414         hole_start = ALIGN(image->control_page !! 447         hole_start = (image->control_page + (size - 1)) & ~(size - 1);
415         hole_end   = hole_start + size - 1;       448         hole_end   = hole_start + size - 1;
416         while (hole_end <= crashk_res.end) {      449         while (hole_end <= crashk_res.end) {
417                 unsigned long i;                  450                 unsigned long i;
418                                                   451 
419                 cond_resched();                   452                 cond_resched();
420                                                   453 
421                 if (hole_end > KEXEC_CRASH_CON    454                 if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
422                         break;                    455                         break;
423                 /* See if I overlap any of the    456                 /* See if I overlap any of the segments */
424                 for (i = 0; i < image->nr_segm    457                 for (i = 0; i < image->nr_segments; i++) {
425                         unsigned long mstart,     458                         unsigned long mstart, mend;
426                                                   459 
427                         mstart = image->segmen    460                         mstart = image->segment[i].mem;
428                         mend   = mstart + imag    461                         mend   = mstart + image->segment[i].memsz - 1;
429                         if ((hole_end >= mstar    462                         if ((hole_end >= mstart) && (hole_start <= mend)) {
430                                 /* Advance the    463                                 /* Advance the hole to the end of the segment */
431                                 hole_start = A !! 464                                 hole_start = (mend + (size - 1)) & ~(size - 1);
432                                 hole_end   = h    465                                 hole_end   = hole_start + size - 1;
433                                 break;            466                                 break;
434                         }                         467                         }
435                 }                                 468                 }
436                 /* If I don't overlap any segm    469                 /* If I don't overlap any segments I have found my hole! */
437                 if (i == image->nr_segments) {    470                 if (i == image->nr_segments) {
438                         pages = pfn_to_page(ho    471                         pages = pfn_to_page(hole_start >> PAGE_SHIFT);
439                         image->control_page =  !! 472                         image->control_page = hole_end;
440                         break;                    473                         break;
441                 }                                 474                 }
442         }                                         475         }
443                                                   476 
444         /* Ensure that these pages are decrypt    477         /* Ensure that these pages are decrypted if SME is enabled. */
445         if (pages)                                478         if (pages)
446                 arch_kexec_post_alloc_pages(pa    479                 arch_kexec_post_alloc_pages(page_address(pages), 1 << order, 0);
447                                                   480 
448         return pages;                             481         return pages;
449 }                                                 482 }
450 #endif                                         << 
451                                                   483 
452                                                   484 
453 struct page *kimage_alloc_control_pages(struct    485 struct page *kimage_alloc_control_pages(struct kimage *image,
454                                          unsig    486                                          unsigned int order)
455 {                                                 487 {
456         struct page *pages = NULL;                488         struct page *pages = NULL;
457                                                   489 
458         switch (image->type) {                    490         switch (image->type) {
459         case KEXEC_TYPE_DEFAULT:                  491         case KEXEC_TYPE_DEFAULT:
460                 pages = kimage_alloc_normal_co    492                 pages = kimage_alloc_normal_control_pages(image, order);
461                 break;                            493                 break;
462 #ifdef CONFIG_CRASH_DUMP                       << 
463         case KEXEC_TYPE_CRASH:                    494         case KEXEC_TYPE_CRASH:
464                 pages = kimage_alloc_crash_con    495                 pages = kimage_alloc_crash_control_pages(image, order);
465                 break;                            496                 break;
466 #endif                                         << 
467         }                                         497         }
468                                                   498 
469         return pages;                             499         return pages;
470 }                                                 500 }
471                                                   501 
                                                   >> 502 int kimage_crash_copy_vmcoreinfo(struct kimage *image)
                                                   >> 503 {
                                                   >> 504         struct page *vmcoreinfo_page;
                                                   >> 505         void *safecopy;
                                                   >> 506 
                                                   >> 507         if (image->type != KEXEC_TYPE_CRASH)
                                                   >> 508                 return 0;
                                                   >> 509 
                                                   >> 510         /*
                                                   >> 511          * For kdump, allocate one vmcoreinfo safe copy from the
                                                   >> 512          * crash memory. as we have arch_kexec_protect_crashkres()
                                                   >> 513          * after kexec syscall, we naturally protect it from write
                                                   >> 514          * (even read) access under kernel direct mapping. But on
                                                   >> 515          * the other hand, we still need to operate it when crash
                                                   >> 516          * happens to generate vmcoreinfo note, hereby we rely on
                                                   >> 517          * vmap for this purpose.
                                                   >> 518          */
                                                   >> 519         vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
                                                   >> 520         if (!vmcoreinfo_page) {
                                                   >> 521                 pr_warn("Could not allocate vmcoreinfo buffer\n");
                                                   >> 522                 return -ENOMEM;
                                                   >> 523         }
                                                   >> 524         safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
                                                   >> 525         if (!safecopy) {
                                                   >> 526                 pr_warn("Could not vmap vmcoreinfo buffer\n");
                                                   >> 527                 return -ENOMEM;
                                                   >> 528         }
                                                   >> 529 
                                                   >> 530         image->vmcoreinfo_data_copy = safecopy;
                                                   >> 531         crash_update_vmcoreinfo_safecopy(safecopy);
                                                   >> 532 
                                                   >> 533         return 0;
                                                   >> 534 }
                                                   >> 535 
472 static int kimage_add_entry(struct kimage *ima    536 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
473 {                                                 537 {
474         if (*image->entry != 0)                   538         if (*image->entry != 0)
475                 image->entry++;                   539                 image->entry++;
476                                                   540 
477         if (image->entry == image->last_entry)    541         if (image->entry == image->last_entry) {
478                 kimage_entry_t *ind_page;         542                 kimage_entry_t *ind_page;
479                 struct page *page;                543                 struct page *page;
480                                                   544 
481                 page = kimage_alloc_page(image    545                 page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
482                 if (!page)                        546                 if (!page)
483                         return -ENOMEM;           547                         return -ENOMEM;
484                                                   548 
485                 ind_page = page_address(page);    549                 ind_page = page_address(page);
486                 *image->entry = virt_to_boot_p    550                 *image->entry = virt_to_boot_phys(ind_page) | IND_INDIRECTION;
487                 image->entry = ind_page;          551                 image->entry = ind_page;
488                 image->last_entry = ind_page +    552                 image->last_entry = ind_page +
489                                       ((PAGE_S    553                                       ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
490         }                                         554         }
491         *image->entry = entry;                    555         *image->entry = entry;
492         image->entry++;                           556         image->entry++;
493         *image->entry = 0;                        557         *image->entry = 0;
494                                                   558 
495         return 0;                                 559         return 0;
496 }                                                 560 }
497                                                   561 
498 static int kimage_set_destination(struct kimag    562 static int kimage_set_destination(struct kimage *image,
499                                    unsigned lo    563                                    unsigned long destination)
500 {                                                 564 {
501         destination &= PAGE_MASK;                 565         destination &= PAGE_MASK;
502                                                   566 
503         return kimage_add_entry(image, destina    567         return kimage_add_entry(image, destination | IND_DESTINATION);
504 }                                                 568 }
505                                                   569 
506                                                   570 
507 static int kimage_add_page(struct kimage *imag    571 static int kimage_add_page(struct kimage *image, unsigned long page)
508 {                                                 572 {
509         page &= PAGE_MASK;                        573         page &= PAGE_MASK;
510                                                   574 
511         return kimage_add_entry(image, page |     575         return kimage_add_entry(image, page | IND_SOURCE);
512 }                                                 576 }
513                                                   577 
514                                                   578 
515 static void kimage_free_extra_pages(struct kim    579 static void kimage_free_extra_pages(struct kimage *image)
516 {                                                 580 {
517         /* Walk through and free any extra des    581         /* Walk through and free any extra destination pages I may have */
518         kimage_free_page_list(&image->dest_pag    582         kimage_free_page_list(&image->dest_pages);
519                                                   583 
520         /* Walk through and free any unusable     584         /* Walk through and free any unusable pages I have cached */
521         kimage_free_page_list(&image->unusable    585         kimage_free_page_list(&image->unusable_pages);
522                                                   586 
523 }                                                 587 }
524                                                   588 
525 void kimage_terminate(struct kimage *image)       589 void kimage_terminate(struct kimage *image)
526 {                                                 590 {
527         if (*image->entry != 0)                   591         if (*image->entry != 0)
528                 image->entry++;                   592                 image->entry++;
529                                                   593 
530         *image->entry = IND_DONE;                 594         *image->entry = IND_DONE;
531 }                                                 595 }
532                                                   596 
533 #define for_each_kimage_entry(image, ptr, entr    597 #define for_each_kimage_entry(image, ptr, entry) \
534         for (ptr = &image->head; (entry = *ptr    598         for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
535                 ptr = (entry & IND_INDIRECTION    599                 ptr = (entry & IND_INDIRECTION) ? \
536                         boot_phys_to_virt((ent    600                         boot_phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
537                                                   601 
538 static void kimage_free_entry(kimage_entry_t e    602 static void kimage_free_entry(kimage_entry_t entry)
539 {                                                 603 {
540         struct page *page;                        604         struct page *page;
541                                                   605 
542         page = boot_pfn_to_page(entry >> PAGE_    606         page = boot_pfn_to_page(entry >> PAGE_SHIFT);
543         kimage_free_pages(page);                  607         kimage_free_pages(page);
544 }                                                 608 }
545                                                   609 
546 void kimage_free(struct kimage *image)            610 void kimage_free(struct kimage *image)
547 {                                                 611 {
548         kimage_entry_t *ptr, entry;               612         kimage_entry_t *ptr, entry;
549         kimage_entry_t ind = 0;                   613         kimage_entry_t ind = 0;
550                                                   614 
551         if (!image)                               615         if (!image)
552                 return;                           616                 return;
553                                                   617 
554 #ifdef CONFIG_CRASH_DUMP                       << 
555         if (image->vmcoreinfo_data_copy) {        618         if (image->vmcoreinfo_data_copy) {
556                 crash_update_vmcoreinfo_safeco    619                 crash_update_vmcoreinfo_safecopy(NULL);
557                 vunmap(image->vmcoreinfo_data_    620                 vunmap(image->vmcoreinfo_data_copy);
558         }                                         621         }
559 #endif                                         << 
560                                                   622 
561         kimage_free_extra_pages(image);           623         kimage_free_extra_pages(image);
562         for_each_kimage_entry(image, ptr, entr    624         for_each_kimage_entry(image, ptr, entry) {
563                 if (entry & IND_INDIRECTION) {    625                 if (entry & IND_INDIRECTION) {
564                         /* Free the previous i    626                         /* Free the previous indirection page */
565                         if (ind & IND_INDIRECT    627                         if (ind & IND_INDIRECTION)
566                                 kimage_free_en    628                                 kimage_free_entry(ind);
567                         /* Save this indirecti    629                         /* Save this indirection page until we are
568                          * done with it.          630                          * done with it.
569                          */                       631                          */
570                         ind = entry;              632                         ind = entry;
571                 } else if (entry & IND_SOURCE)    633                 } else if (entry & IND_SOURCE)
572                         kimage_free_entry(entr    634                         kimage_free_entry(entry);
573         }                                         635         }
574         /* Free the final indirection page */     636         /* Free the final indirection page */
575         if (ind & IND_INDIRECTION)                637         if (ind & IND_INDIRECTION)
576                 kimage_free_entry(ind);           638                 kimage_free_entry(ind);
577                                                   639 
578         /* Handle any machine specific cleanup    640         /* Handle any machine specific cleanup */
579         machine_kexec_cleanup(image);             641         machine_kexec_cleanup(image);
580                                                   642 
581         /* Free the kexec control pages... */     643         /* Free the kexec control pages... */
582         kimage_free_page_list(&image->control_    644         kimage_free_page_list(&image->control_pages);
583                                                   645 
584         /*                                        646         /*
585          * Free up any temporary buffers alloc    647          * Free up any temporary buffers allocated. This might hit if
586          * error occurred much later after buf    648          * error occurred much later after buffer allocation.
587          */                                       649          */
588         if (image->file_mode)                     650         if (image->file_mode)
589                 kimage_file_post_load_cleanup(    651                 kimage_file_post_load_cleanup(image);
590                                                   652 
591         kfree(image);                             653         kfree(image);
592 }                                                 654 }
593                                                   655 
594 static kimage_entry_t *kimage_dst_used(struct     656 static kimage_entry_t *kimage_dst_used(struct kimage *image,
595                                         unsign    657                                         unsigned long page)
596 {                                                 658 {
597         kimage_entry_t *ptr, entry;               659         kimage_entry_t *ptr, entry;
598         unsigned long destination = 0;            660         unsigned long destination = 0;
599                                                   661 
600         for_each_kimage_entry(image, ptr, entr    662         for_each_kimage_entry(image, ptr, entry) {
601                 if (entry & IND_DESTINATION)      663                 if (entry & IND_DESTINATION)
602                         destination = entry &     664                         destination = entry & PAGE_MASK;
603                 else if (entry & IND_SOURCE) {    665                 else if (entry & IND_SOURCE) {
604                         if (page == destinatio    666                         if (page == destination)
605                                 return ptr;       667                                 return ptr;
606                         destination += PAGE_SI    668                         destination += PAGE_SIZE;
607                 }                                 669                 }
608         }                                         670         }
609                                                   671 
610         return NULL;                              672         return NULL;
611 }                                                 673 }
612                                                   674 
613 static struct page *kimage_alloc_page(struct k    675 static struct page *kimage_alloc_page(struct kimage *image,
614                                         gfp_t     676                                         gfp_t gfp_mask,
615                                         unsign    677                                         unsigned long destination)
616 {                                                 678 {
617         /*                                        679         /*
618          * Here we implement safeguards to ens    680          * Here we implement safeguards to ensure that a source page
619          * is not copied to its destination pa    681          * is not copied to its destination page before the data on
620          * the destination page is no longer u    682          * the destination page is no longer useful.
621          *                                        683          *
622          * To do this we maintain the invarian    684          * To do this we maintain the invariant that a source page is
623          * either its own destination page, or    685          * either its own destination page, or it is not a
624          * destination page at all.               686          * destination page at all.
625          *                                        687          *
626          * That is slightly stronger than requ    688          * That is slightly stronger than required, but the proof
627          * that no problems will not occur is     689          * that no problems will not occur is trivial, and the
628          * implementation is simply to verify.    690          * implementation is simply to verify.
629          *                                        691          *
630          * When allocating all pages normally     692          * When allocating all pages normally this algorithm will run
631          * in O(N) time, but in the worst case    693          * in O(N) time, but in the worst case it will run in O(N^2)
632          * time.   If the runtime is a problem    694          * time.   If the runtime is a problem the data structures can
633          * be fixed.                              695          * be fixed.
634          */                                       696          */
635         struct page *page;                        697         struct page *page;
636         unsigned long addr;                       698         unsigned long addr;
637                                                   699 
638         /*                                        700         /*
639          * Walk through the list of destinatio    701          * Walk through the list of destination pages, and see if I
640          * have a match.                          702          * have a match.
641          */                                       703          */
642         list_for_each_entry(page, &image->dest    704         list_for_each_entry(page, &image->dest_pages, lru) {
643                 addr = page_to_boot_pfn(page)     705                 addr = page_to_boot_pfn(page) << PAGE_SHIFT;
644                 if (addr == destination) {        706                 if (addr == destination) {
645                         list_del(&page->lru);     707                         list_del(&page->lru);
646                         return page;              708                         return page;
647                 }                                 709                 }
648         }                                         710         }
649         page = NULL;                              711         page = NULL;
650         while (1) {                               712         while (1) {
651                 kimage_entry_t *old;              713                 kimage_entry_t *old;
652                                                   714 
653                 /* Allocate a page, if we run     715                 /* Allocate a page, if we run out of memory give up */
654                 page = kimage_alloc_pages(gfp_    716                 page = kimage_alloc_pages(gfp_mask, 0);
655                 if (!page)                        717                 if (!page)
656                         return NULL;              718                         return NULL;
657                 /* If the page cannot be used     719                 /* If the page cannot be used file it away */
658                 if (page_to_boot_pfn(page) >      720                 if (page_to_boot_pfn(page) >
659                                 (KEXEC_SOURCE_    721                                 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
660                         list_add(&page->lru, &    722                         list_add(&page->lru, &image->unusable_pages);
661                         continue;                 723                         continue;
662                 }                                 724                 }
663                 addr = page_to_boot_pfn(page)     725                 addr = page_to_boot_pfn(page) << PAGE_SHIFT;
664                                                   726 
665                 /* If it is the destination pa    727                 /* If it is the destination page we want use it */
666                 if (addr == destination)          728                 if (addr == destination)
667                         break;                    729                         break;
668                                                   730 
669                 /* If the page is not a destin    731                 /* If the page is not a destination page use it */
670                 if (!kimage_is_destination_ran    732                 if (!kimage_is_destination_range(image, addr,
671                                                !! 733                                                   addr + PAGE_SIZE))
672                         break;                    734                         break;
673                                                   735 
674                 /*                                736                 /*
675                  * I know that the page is som    737                  * I know that the page is someones destination page.
676                  * See if there is already a s    738                  * See if there is already a source page for this
677                  * destination page.  And if s    739                  * destination page.  And if so swap the source pages.
678                  */                               740                  */
679                 old = kimage_dst_used(image, a    741                 old = kimage_dst_used(image, addr);
680                 if (old) {                        742                 if (old) {
681                         /* If so move it */       743                         /* If so move it */
682                         unsigned long old_addr    744                         unsigned long old_addr;
683                         struct page *old_page;    745                         struct page *old_page;
684                                                   746 
685                         old_addr = *old & PAGE    747                         old_addr = *old & PAGE_MASK;
686                         old_page = boot_pfn_to    748                         old_page = boot_pfn_to_page(old_addr >> PAGE_SHIFT);
687                         copy_highpage(page, ol    749                         copy_highpage(page, old_page);
688                         *old = addr | (*old &     750                         *old = addr | (*old & ~PAGE_MASK);
689                                                   751 
690                         /* The old page I have    752                         /* The old page I have found cannot be a
691                          * destination page, s    753                          * destination page, so return it if it's
692                          * gfp_flags honor the    754                          * gfp_flags honor the ones passed in.
693                          */                       755                          */
694                         if (!(gfp_mask & __GFP    756                         if (!(gfp_mask & __GFP_HIGHMEM) &&
695                             PageHighMem(old_pa    757                             PageHighMem(old_page)) {
696                                 kimage_free_pa    758                                 kimage_free_pages(old_page);
697                                 continue;         759                                 continue;
698                         }                         760                         }
699                         page = old_page;          761                         page = old_page;
700                         break;                    762                         break;
701                 }                                 763                 }
702                 /* Place the page on the desti    764                 /* Place the page on the destination list, to be used later */
703                 list_add(&page->lru, &image->d    765                 list_add(&page->lru, &image->dest_pages);
704         }                                         766         }
705                                                   767 
706         return page;                              768         return page;
707 }                                                 769 }
708                                                   770 
709 static int kimage_load_normal_segment(struct k    771 static int kimage_load_normal_segment(struct kimage *image,
710                                          struc    772                                          struct kexec_segment *segment)
711 {                                                 773 {
712         unsigned long maddr;                      774         unsigned long maddr;
713         size_t ubytes, mbytes;                    775         size_t ubytes, mbytes;
714         int result;                               776         int result;
715         unsigned char __user *buf = NULL;         777         unsigned char __user *buf = NULL;
716         unsigned char *kbuf = NULL;               778         unsigned char *kbuf = NULL;
717                                                   779 
718         if (image->file_mode)                     780         if (image->file_mode)
719                 kbuf = segment->kbuf;             781                 kbuf = segment->kbuf;
720         else                                      782         else
721                 buf = segment->buf;               783                 buf = segment->buf;
722         ubytes = segment->bufsz;                  784         ubytes = segment->bufsz;
723         mbytes = segment->memsz;                  785         mbytes = segment->memsz;
724         maddr = segment->mem;                     786         maddr = segment->mem;
725                                                   787 
726         result = kimage_set_destination(image,    788         result = kimage_set_destination(image, maddr);
727         if (result < 0)                           789         if (result < 0)
728                 goto out;                         790                 goto out;
729                                                   791 
730         while (mbytes) {                          792         while (mbytes) {
731                 struct page *page;                793                 struct page *page;
732                 char *ptr;                        794                 char *ptr;
733                 size_t uchunk, mchunk;            795                 size_t uchunk, mchunk;
734                                                   796 
735                 page = kimage_alloc_page(image    797                 page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
736                 if (!page) {                      798                 if (!page) {
737                         result  = -ENOMEM;        799                         result  = -ENOMEM;
738                         goto out;                 800                         goto out;
739                 }                                 801                 }
740                 result = kimage_add_page(image    802                 result = kimage_add_page(image, page_to_boot_pfn(page)
741                                                   803                                                                 << PAGE_SHIFT);
742                 if (result < 0)                   804                 if (result < 0)
743                         goto out;                 805                         goto out;
744                                                   806 
745                 ptr = kmap_local_page(page);      807                 ptr = kmap_local_page(page);
746                 /* Start with a clear page */     808                 /* Start with a clear page */
747                 clear_page(ptr);                  809                 clear_page(ptr);
748                 ptr += maddr & ~PAGE_MASK;        810                 ptr += maddr & ~PAGE_MASK;
749                 mchunk = min_t(size_t, mbytes,    811                 mchunk = min_t(size_t, mbytes,
750                                 PAGE_SIZE - (m    812                                 PAGE_SIZE - (maddr & ~PAGE_MASK));
751                 uchunk = min(ubytes, mchunk);     813                 uchunk = min(ubytes, mchunk);
752                                                   814 
753                 if (uchunk) {                  !! 815                 /* For file based kexec, source pages are in kernel memory */
754                         /* For file based kexe !! 816                 if (image->file_mode)
755                         if (image->file_mode)  !! 817                         memcpy(ptr, kbuf, uchunk);
756                                 memcpy(ptr, kb !! 818                 else
757                         else                   !! 819                         result = copy_from_user(ptr, buf, uchunk);
758                                 result = copy_ << 
759                         ubytes -= uchunk;      << 
760                         if (image->file_mode)  << 
761                                 kbuf += uchunk << 
762                         else                   << 
763                                 buf += uchunk; << 
764                 }                              << 
765                 kunmap_local(ptr);                820                 kunmap_local(ptr);
766                 if (result) {                     821                 if (result) {
767                         result = -EFAULT;         822                         result = -EFAULT;
768                         goto out;                 823                         goto out;
769                 }                                 824                 }
                                                   >> 825                 ubytes -= uchunk;
770                 maddr  += mchunk;                 826                 maddr  += mchunk;
                                                   >> 827                 if (image->file_mode)
                                                   >> 828                         kbuf += mchunk;
                                                   >> 829                 else
                                                   >> 830                         buf += mchunk;
771                 mbytes -= mchunk;                 831                 mbytes -= mchunk;
772                                                   832 
773                 cond_resched();                   833                 cond_resched();
774         }                                         834         }
775 out:                                              835 out:
776         return result;                            836         return result;
777 }                                                 837 }
778                                                   838 
779 #ifdef CONFIG_CRASH_DUMP                       << 
780 static int kimage_load_crash_segment(struct ki    839 static int kimage_load_crash_segment(struct kimage *image,
781                                         struct    840                                         struct kexec_segment *segment)
782 {                                                 841 {
783         /* For crash dumps kernels we simply c    842         /* For crash dumps kernels we simply copy the data from
784          * user space to it's destination.        843          * user space to it's destination.
785          * We do things a page at a time for t    844          * We do things a page at a time for the sake of kmap.
786          */                                       845          */
787         unsigned long maddr;                      846         unsigned long maddr;
788         size_t ubytes, mbytes;                    847         size_t ubytes, mbytes;
789         int result;                               848         int result;
790         unsigned char __user *buf = NULL;         849         unsigned char __user *buf = NULL;
791         unsigned char *kbuf = NULL;               850         unsigned char *kbuf = NULL;
792                                                   851 
793         result = 0;                               852         result = 0;
794         if (image->file_mode)                     853         if (image->file_mode)
795                 kbuf = segment->kbuf;             854                 kbuf = segment->kbuf;
796         else                                      855         else
797                 buf = segment->buf;               856                 buf = segment->buf;
798         ubytes = segment->bufsz;                  857         ubytes = segment->bufsz;
799         mbytes = segment->memsz;                  858         mbytes = segment->memsz;
800         maddr = segment->mem;                     859         maddr = segment->mem;
801         while (mbytes) {                          860         while (mbytes) {
802                 struct page *page;                861                 struct page *page;
803                 char *ptr;                        862                 char *ptr;
804                 size_t uchunk, mchunk;            863                 size_t uchunk, mchunk;
805                                                   864 
806                 page = boot_pfn_to_page(maddr     865                 page = boot_pfn_to_page(maddr >> PAGE_SHIFT);
807                 if (!page) {                      866                 if (!page) {
808                         result  = -ENOMEM;        867                         result  = -ENOMEM;
809                         goto out;                 868                         goto out;
810                 }                                 869                 }
811                 arch_kexec_post_alloc_pages(pa    870                 arch_kexec_post_alloc_pages(page_address(page), 1, 0);
812                 ptr = kmap_local_page(page);      871                 ptr = kmap_local_page(page);
813                 ptr += maddr & ~PAGE_MASK;        872                 ptr += maddr & ~PAGE_MASK;
814                 mchunk = min_t(size_t, mbytes,    873                 mchunk = min_t(size_t, mbytes,
815                                 PAGE_SIZE - (m    874                                 PAGE_SIZE - (maddr & ~PAGE_MASK));
816                 uchunk = min(ubytes, mchunk);     875                 uchunk = min(ubytes, mchunk);
817                 if (mchunk > uchunk) {            876                 if (mchunk > uchunk) {
818                         /* Zero the trailing p    877                         /* Zero the trailing part of the page */
819                         memset(ptr + uchunk, 0    878                         memset(ptr + uchunk, 0, mchunk - uchunk);
820                 }                                 879                 }
821                                                   880 
822                 if (uchunk) {                  !! 881                 /* For file based kexec, source pages are in kernel memory */
823                         /* For file based kexe !! 882                 if (image->file_mode)
824                         if (image->file_mode)  !! 883                         memcpy(ptr, kbuf, uchunk);
825                                 memcpy(ptr, kb !! 884                 else
826                         else                   !! 885                         result = copy_from_user(ptr, buf, uchunk);
827                                 result = copy_ << 
828                         ubytes -= uchunk;      << 
829                         if (image->file_mode)  << 
830                                 kbuf += uchunk << 
831                         else                   << 
832                                 buf += uchunk; << 
833                 }                              << 
834                 kexec_flush_icache_page(page);    886                 kexec_flush_icache_page(page);
835                 kunmap_local(ptr);                887                 kunmap_local(ptr);
836                 arch_kexec_pre_free_pages(page    888                 arch_kexec_pre_free_pages(page_address(page), 1);
837                 if (result) {                     889                 if (result) {
838                         result = -EFAULT;         890                         result = -EFAULT;
839                         goto out;                 891                         goto out;
840                 }                                 892                 }
                                                   >> 893                 ubytes -= uchunk;
841                 maddr  += mchunk;                 894                 maddr  += mchunk;
                                                   >> 895                 if (image->file_mode)
                                                   >> 896                         kbuf += mchunk;
                                                   >> 897                 else
                                                   >> 898                         buf += mchunk;
842                 mbytes -= mchunk;                 899                 mbytes -= mchunk;
843                                                   900 
844                 cond_resched();                   901                 cond_resched();
845         }                                         902         }
846 out:                                              903 out:
847         return result;                            904         return result;
848 }                                                 905 }
849 #endif                                         << 
850                                                   906 
851 int kimage_load_segment(struct kimage *image,     907 int kimage_load_segment(struct kimage *image,
852                                 struct kexec_s    908                                 struct kexec_segment *segment)
853 {                                                 909 {
854         int result = -ENOMEM;                     910         int result = -ENOMEM;
855                                                   911 
856         switch (image->type) {                    912         switch (image->type) {
857         case KEXEC_TYPE_DEFAULT:                  913         case KEXEC_TYPE_DEFAULT:
858                 result = kimage_load_normal_se    914                 result = kimage_load_normal_segment(image, segment);
859                 break;                            915                 break;
860 #ifdef CONFIG_CRASH_DUMP                       << 
861         case KEXEC_TYPE_CRASH:                    916         case KEXEC_TYPE_CRASH:
862                 result = kimage_load_crash_seg    917                 result = kimage_load_crash_segment(image, segment);
863                 break;                            918                 break;
864 #endif                                         << 
865         }                                         919         }
866                                                   920 
867         return result;                            921         return result;
868 }                                                 922 }
869                                                   923 
870 struct kexec_load_limit {                         924 struct kexec_load_limit {
871         /* Mutex protects the limit count. */     925         /* Mutex protects the limit count. */
872         struct mutex mutex;                       926         struct mutex mutex;
873         int limit;                                927         int limit;
874 };                                                928 };
875                                                   929 
876 static struct kexec_load_limit load_limit_rebo    930 static struct kexec_load_limit load_limit_reboot = {
877         .mutex = __MUTEX_INITIALIZER(load_limi    931         .mutex = __MUTEX_INITIALIZER(load_limit_reboot.mutex),
878         .limit = -1,                              932         .limit = -1,
879 };                                                933 };
880                                                   934 
881 static struct kexec_load_limit load_limit_pani    935 static struct kexec_load_limit load_limit_panic = {
882         .mutex = __MUTEX_INITIALIZER(load_limi    936         .mutex = __MUTEX_INITIALIZER(load_limit_panic.mutex),
883         .limit = -1,                              937         .limit = -1,
884 };                                                938 };
885                                                   939 
886 struct kimage *kexec_image;                       940 struct kimage *kexec_image;
887 struct kimage *kexec_crash_image;                 941 struct kimage *kexec_crash_image;
888 static int kexec_load_disabled;                   942 static int kexec_load_disabled;
889                                                   943 
890 #ifdef CONFIG_SYSCTL                              944 #ifdef CONFIG_SYSCTL
891 static int kexec_limit_handler(const struct ct !! 945 static int kexec_limit_handler(struct ctl_table *table, int write,
892                                void *buffer, s    946                                void *buffer, size_t *lenp, loff_t *ppos)
893 {                                                 947 {
894         struct kexec_load_limit *limit = table    948         struct kexec_load_limit *limit = table->data;
895         int val;                                  949         int val;
896         struct ctl_table tmp = {                  950         struct ctl_table tmp = {
897                 .data = &val,                     951                 .data = &val,
898                 .maxlen = sizeof(val),            952                 .maxlen = sizeof(val),
899                 .mode = table->mode,              953                 .mode = table->mode,
900         };                                        954         };
901         int ret;                                  955         int ret;
902                                                   956 
903         if (write) {                              957         if (write) {
904                 ret = proc_dointvec(&tmp, writ    958                 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
905                 if (ret)                          959                 if (ret)
906                         return ret;               960                         return ret;
907                                                   961 
908                 if (val < 0)                      962                 if (val < 0)
909                         return -EINVAL;           963                         return -EINVAL;
910                                                   964 
911                 mutex_lock(&limit->mutex);        965                 mutex_lock(&limit->mutex);
912                 if (limit->limit != -1 && val     966                 if (limit->limit != -1 && val >= limit->limit)
913                         ret = -EINVAL;            967                         ret = -EINVAL;
914                 else                              968                 else
915                         limit->limit = val;       969                         limit->limit = val;
916                 mutex_unlock(&limit->mutex);      970                 mutex_unlock(&limit->mutex);
917                                                   971 
918                 return ret;                       972                 return ret;
919         }                                         973         }
920                                                   974 
921         mutex_lock(&limit->mutex);                975         mutex_lock(&limit->mutex);
922         val = limit->limit;                       976         val = limit->limit;
923         mutex_unlock(&limit->mutex);              977         mutex_unlock(&limit->mutex);
924                                                   978 
925         return proc_dointvec(&tmp, write, buff    979         return proc_dointvec(&tmp, write, buffer, lenp, ppos);
926 }                                                 980 }
927                                                   981 
928 static struct ctl_table kexec_core_sysctls[] =    982 static struct ctl_table kexec_core_sysctls[] = {
929         {                                         983         {
930                 .procname       = "kexec_load_    984                 .procname       = "kexec_load_disabled",
931                 .data           = &kexec_load_    985                 .data           = &kexec_load_disabled,
932                 .maxlen         = sizeof(int),    986                 .maxlen         = sizeof(int),
933                 .mode           = 0644,           987                 .mode           = 0644,
934                 /* only handle a transition fr    988                 /* only handle a transition from default "" to "1" */
935                 .proc_handler   = proc_dointve    989                 .proc_handler   = proc_dointvec_minmax,
936                 .extra1         = SYSCTL_ONE,     990                 .extra1         = SYSCTL_ONE,
937                 .extra2         = SYSCTL_ONE,     991                 .extra2         = SYSCTL_ONE,
938         },                                        992         },
939         {                                         993         {
940                 .procname       = "kexec_load_    994                 .procname       = "kexec_load_limit_panic",
941                 .data           = &load_limit_    995                 .data           = &load_limit_panic,
942                 .mode           = 0644,           996                 .mode           = 0644,
943                 .proc_handler   = kexec_limit_    997                 .proc_handler   = kexec_limit_handler,
944         },                                        998         },
945         {                                         999         {
946                 .procname       = "kexec_load_    1000                 .procname       = "kexec_load_limit_reboot",
947                 .data           = &load_limit_    1001                 .data           = &load_limit_reboot,
948                 .mode           = 0644,           1002                 .mode           = 0644,
949                 .proc_handler   = kexec_limit_    1003                 .proc_handler   = kexec_limit_handler,
950         },                                        1004         },
                                                   >> 1005         { }
951 };                                                1006 };
952                                                   1007 
953 static int __init kexec_core_sysctl_init(void)    1008 static int __init kexec_core_sysctl_init(void)
954 {                                                 1009 {
955         register_sysctl_init("kernel", kexec_c    1010         register_sysctl_init("kernel", kexec_core_sysctls);
956         return 0;                                 1011         return 0;
957 }                                                 1012 }
958 late_initcall(kexec_core_sysctl_init);            1013 late_initcall(kexec_core_sysctl_init);
959 #endif                                            1014 #endif
960                                                   1015 
961 bool kexec_load_permitted(int kexec_image_type    1016 bool kexec_load_permitted(int kexec_image_type)
962 {                                                 1017 {
963         struct kexec_load_limit *limit;           1018         struct kexec_load_limit *limit;
964                                                   1019 
965         /*                                        1020         /*
966          * Only the superuser can use the kexe    1021          * Only the superuser can use the kexec syscall and if it has not
967          * been disabled.                         1022          * been disabled.
968          */                                       1023          */
969         if (!capable(CAP_SYS_BOOT) || kexec_lo    1024         if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
970                 return false;                     1025                 return false;
971                                                   1026 
972         /* Check limit counter and decrease it    1027         /* Check limit counter and decrease it.*/
973         limit = (kexec_image_type == KEXEC_TYP    1028         limit = (kexec_image_type == KEXEC_TYPE_CRASH) ?
974                 &load_limit_panic : &load_limi    1029                 &load_limit_panic : &load_limit_reboot;
975         mutex_lock(&limit->mutex);                1030         mutex_lock(&limit->mutex);
976         if (!limit->limit) {                      1031         if (!limit->limit) {
977                 mutex_unlock(&limit->mutex);      1032                 mutex_unlock(&limit->mutex);
978                 return false;                     1033                 return false;
979         }                                         1034         }
980         if (limit->limit != -1)                   1035         if (limit->limit != -1)
981                 limit->limit--;                   1036                 limit->limit--;
982         mutex_unlock(&limit->mutex);              1037         mutex_unlock(&limit->mutex);
983                                                   1038 
984         return true;                              1039         return true;
985 }                                                 1040 }
986                                                   1041 
987 /*                                                1042 /*
                                                   >> 1043  * No panic_cpu check version of crash_kexec().  This function is called
                                                   >> 1044  * only when panic_cpu holds the current CPU number; this is the only CPU
                                                   >> 1045  * which processes crash_kexec routines.
                                                   >> 1046  */
                                                   >> 1047 void __noclone __crash_kexec(struct pt_regs *regs)
                                                   >> 1048 {
                                                   >> 1049         /* Take the kexec_lock here to prevent sys_kexec_load
                                                   >> 1050          * running on one cpu from replacing the crash kernel
                                                   >> 1051          * we are using after a panic on a different cpu.
                                                   >> 1052          *
                                                   >> 1053          * If the crash kernel was not located in a fixed area
                                                   >> 1054          * of memory the xchg(&kexec_crash_image) would be
                                                   >> 1055          * sufficient.  But since I reuse the memory...
                                                   >> 1056          */
                                                   >> 1057         if (kexec_trylock()) {
                                                   >> 1058                 if (kexec_crash_image) {
                                                   >> 1059                         struct pt_regs fixed_regs;
                                                   >> 1060 
                                                   >> 1061                         crash_setup_regs(&fixed_regs, regs);
                                                   >> 1062                         crash_save_vmcoreinfo();
                                                   >> 1063                         machine_crash_shutdown(&fixed_regs);
                                                   >> 1064                         machine_kexec(kexec_crash_image);
                                                   >> 1065                 }
                                                   >> 1066                 kexec_unlock();
                                                   >> 1067         }
                                                   >> 1068 }
                                                   >> 1069 STACK_FRAME_NON_STANDARD(__crash_kexec);
                                                   >> 1070 
                                                   >> 1071 __bpf_kfunc void crash_kexec(struct pt_regs *regs)
                                                   >> 1072 {
                                                   >> 1073         int old_cpu, this_cpu;
                                                   >> 1074 
                                                   >> 1075         /*
                                                   >> 1076          * Only one CPU is allowed to execute the crash_kexec() code as with
                                                   >> 1077          * panic().  Otherwise parallel calls of panic() and crash_kexec()
                                                   >> 1078          * may stop each other.  To exclude them, we use panic_cpu here too.
                                                   >> 1079          */
                                                   >> 1080         this_cpu = raw_smp_processor_id();
                                                   >> 1081         old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
                                                   >> 1082         if (old_cpu == PANIC_CPU_INVALID) {
                                                   >> 1083                 /* This is the 1st CPU which comes here, so go ahead. */
                                                   >> 1084                 __crash_kexec(regs);
                                                   >> 1085 
                                                   >> 1086                 /*
                                                   >> 1087                  * Reset panic_cpu to allow another panic()/crash_kexec()
                                                   >> 1088                  * call.
                                                   >> 1089                  */
                                                   >> 1090                 atomic_set(&panic_cpu, PANIC_CPU_INVALID);
                                                   >> 1091         }
                                                   >> 1092 }
                                                   >> 1093 
                                                   >> 1094 static inline resource_size_t crash_resource_size(const struct resource *res)
                                                   >> 1095 {
                                                   >> 1096         return !res->end ? 0 : resource_size(res);
                                                   >> 1097 }
                                                   >> 1098 
                                                   >> 1099 ssize_t crash_get_memory_size(void)
                                                   >> 1100 {
                                                   >> 1101         ssize_t size = 0;
                                                   >> 1102 
                                                   >> 1103         if (!kexec_trylock())
                                                   >> 1104                 return -EBUSY;
                                                   >> 1105 
                                                   >> 1106         size += crash_resource_size(&crashk_res);
                                                   >> 1107         size += crash_resource_size(&crashk_low_res);
                                                   >> 1108 
                                                   >> 1109         kexec_unlock();
                                                   >> 1110         return size;
                                                   >> 1111 }
                                                   >> 1112 
                                                   >> 1113 static int __crash_shrink_memory(struct resource *old_res,
                                                   >> 1114                                  unsigned long new_size)
                                                   >> 1115 {
                                                   >> 1116         struct resource *ram_res;
                                                   >> 1117 
                                                   >> 1118         ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
                                                   >> 1119         if (!ram_res)
                                                   >> 1120                 return -ENOMEM;
                                                   >> 1121 
                                                   >> 1122         ram_res->start = old_res->start + new_size;
                                                   >> 1123         ram_res->end   = old_res->end;
                                                   >> 1124         ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
                                                   >> 1125         ram_res->name  = "System RAM";
                                                   >> 1126 
                                                   >> 1127         if (!new_size) {
                                                   >> 1128                 release_resource(old_res);
                                                   >> 1129                 old_res->start = 0;
                                                   >> 1130                 old_res->end   = 0;
                                                   >> 1131         } else {
                                                   >> 1132                 crashk_res.end = ram_res->start - 1;
                                                   >> 1133         }
                                                   >> 1134 
                                                   >> 1135         crash_free_reserved_phys_range(ram_res->start, ram_res->end);
                                                   >> 1136         insert_resource(&iomem_resource, ram_res);
                                                   >> 1137 
                                                   >> 1138         return 0;
                                                   >> 1139 }
                                                   >> 1140 
                                                   >> 1141 int crash_shrink_memory(unsigned long new_size)
                                                   >> 1142 {
                                                   >> 1143         int ret = 0;
                                                   >> 1144         unsigned long old_size, low_size;
                                                   >> 1145 
                                                   >> 1146         if (!kexec_trylock())
                                                   >> 1147                 return -EBUSY;
                                                   >> 1148 
                                                   >> 1149         if (kexec_crash_image) {
                                                   >> 1150                 ret = -ENOENT;
                                                   >> 1151                 goto unlock;
                                                   >> 1152         }
                                                   >> 1153 
                                                   >> 1154         low_size = crash_resource_size(&crashk_low_res);
                                                   >> 1155         old_size = crash_resource_size(&crashk_res) + low_size;
                                                   >> 1156         new_size = roundup(new_size, KEXEC_CRASH_MEM_ALIGN);
                                                   >> 1157         if (new_size >= old_size) {
                                                   >> 1158                 ret = (new_size == old_size) ? 0 : -EINVAL;
                                                   >> 1159                 goto unlock;
                                                   >> 1160         }
                                                   >> 1161 
                                                   >> 1162         /*
                                                   >> 1163          * (low_size > new_size) implies that low_size is greater than zero.
                                                   >> 1164          * This also means that if low_size is zero, the else branch is taken.
                                                   >> 1165          *
                                                   >> 1166          * If low_size is greater than 0, (low_size > new_size) indicates that
                                                   >> 1167          * crashk_low_res also needs to be shrunken. Otherwise, only crashk_res
                                                   >> 1168          * needs to be shrunken.
                                                   >> 1169          */
                                                   >> 1170         if (low_size > new_size) {
                                                   >> 1171                 ret = __crash_shrink_memory(&crashk_res, 0);
                                                   >> 1172                 if (ret)
                                                   >> 1173                         goto unlock;
                                                   >> 1174 
                                                   >> 1175                 ret = __crash_shrink_memory(&crashk_low_res, new_size);
                                                   >> 1176         } else {
                                                   >> 1177                 ret = __crash_shrink_memory(&crashk_res, new_size - low_size);
                                                   >> 1178         }
                                                   >> 1179 
                                                   >> 1180         /* Swap crashk_res and crashk_low_res if needed */
                                                   >> 1181         if (!crashk_res.end && crashk_low_res.end) {
                                                   >> 1182                 crashk_res.start = crashk_low_res.start;
                                                   >> 1183                 crashk_res.end   = crashk_low_res.end;
                                                   >> 1184                 release_resource(&crashk_low_res);
                                                   >> 1185                 crashk_low_res.start = 0;
                                                   >> 1186                 crashk_low_res.end   = 0;
                                                   >> 1187                 insert_resource(&iomem_resource, &crashk_res);
                                                   >> 1188         }
                                                   >> 1189 
                                                   >> 1190 unlock:
                                                   >> 1191         kexec_unlock();
                                                   >> 1192         return ret;
                                                   >> 1193 }
                                                   >> 1194 
                                                   >> 1195 void crash_save_cpu(struct pt_regs *regs, int cpu)
                                                   >> 1196 {
                                                   >> 1197         struct elf_prstatus prstatus;
                                                   >> 1198         u32 *buf;
                                                   >> 1199 
                                                   >> 1200         if ((cpu < 0) || (cpu >= nr_cpu_ids))
                                                   >> 1201                 return;
                                                   >> 1202 
                                                   >> 1203         /* Using ELF notes here is opportunistic.
                                                   >> 1204          * I need a well defined structure format
                                                   >> 1205          * for the data I pass, and I need tags
                                                   >> 1206          * on the data to indicate what information I have
                                                   >> 1207          * squirrelled away.  ELF notes happen to provide
                                                   >> 1208          * all of that, so there is no need to invent something new.
                                                   >> 1209          */
                                                   >> 1210         buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
                                                   >> 1211         if (!buf)
                                                   >> 1212                 return;
                                                   >> 1213         memset(&prstatus, 0, sizeof(prstatus));
                                                   >> 1214         prstatus.common.pr_pid = current->pid;
                                                   >> 1215         elf_core_copy_regs(&prstatus.pr_reg, regs);
                                                   >> 1216         buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
                                                   >> 1217                               &prstatus, sizeof(prstatus));
                                                   >> 1218         final_note(buf);
                                                   >> 1219 }
                                                   >> 1220 
                                                   >> 1221 static int __init crash_notes_memory_init(void)
                                                   >> 1222 {
                                                   >> 1223         /* Allocate memory for saving cpu registers. */
                                                   >> 1224         size_t size, align;
                                                   >> 1225 
                                                   >> 1226         /*
                                                   >> 1227          * crash_notes could be allocated across 2 vmalloc pages when percpu
                                                   >> 1228          * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc
                                                   >> 1229          * pages are also on 2 continuous physical pages. In this case the
                                                   >> 1230          * 2nd part of crash_notes in 2nd page could be lost since only the
                                                   >> 1231          * starting address and size of crash_notes are exported through sysfs.
                                                   >> 1232          * Here round up the size of crash_notes to the nearest power of two
                                                   >> 1233          * and pass it to __alloc_percpu as align value. This can make sure
                                                   >> 1234          * crash_notes is allocated inside one physical page.
                                                   >> 1235          */
                                                   >> 1236         size = sizeof(note_buf_t);
                                                   >> 1237         align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE);
                                                   >> 1238 
                                                   >> 1239         /*
                                                   >> 1240          * Break compile if size is bigger than PAGE_SIZE since crash_notes
                                                   >> 1241          * definitely will be in 2 pages with that.
                                                   >> 1242          */
                                                   >> 1243         BUILD_BUG_ON(size > PAGE_SIZE);
                                                   >> 1244 
                                                   >> 1245         crash_notes = __alloc_percpu(size, align);
                                                   >> 1246         if (!crash_notes) {
                                                   >> 1247                 pr_warn("Memory allocation for saving cpu register states failed\n");
                                                   >> 1248                 return -ENOMEM;
                                                   >> 1249         }
                                                   >> 1250         return 0;
                                                   >> 1251 }
                                                   >> 1252 subsys_initcall(crash_notes_memory_init);
                                                   >> 1253 
                                                   >> 1254 
                                                   >> 1255 /*
988  * Move into place and start executing a prelo    1256  * Move into place and start executing a preloaded standalone
989  * executable.  If nothing was preloaded retur    1257  * executable.  If nothing was preloaded return an error.
990  */                                               1258  */
991 int kernel_kexec(void)                            1259 int kernel_kexec(void)
992 {                                                 1260 {
993         int error = 0;                            1261         int error = 0;
994                                                   1262 
995         if (!kexec_trylock())                     1263         if (!kexec_trylock())
996                 return -EBUSY;                    1264                 return -EBUSY;
997         if (!kexec_image) {                       1265         if (!kexec_image) {
998                 error = -EINVAL;                  1266                 error = -EINVAL;
999                 goto Unlock;                      1267                 goto Unlock;
1000         }                                        1268         }
1001                                                  1269 
1002 #ifdef CONFIG_KEXEC_JUMP                         1270 #ifdef CONFIG_KEXEC_JUMP
1003         if (kexec_image->preserve_context) {     1271         if (kexec_image->preserve_context) {
1004                 pm_prepare_console();            1272                 pm_prepare_console();
1005                 error = freeze_processes();      1273                 error = freeze_processes();
1006                 if (error) {                     1274                 if (error) {
1007                         error = -EBUSY;          1275                         error = -EBUSY;
1008                         goto Restore_console;    1276                         goto Restore_console;
1009                 }                                1277                 }
1010                 suspend_console();               1278                 suspend_console();
1011                 error = dpm_suspend_start(PMS    1279                 error = dpm_suspend_start(PMSG_FREEZE);
1012                 if (error)                       1280                 if (error)
1013                         goto Resume_console;     1281                         goto Resume_console;
1014                 /* At this point, dpm_suspend    1282                 /* At this point, dpm_suspend_start() has been called,
1015                  * but *not* dpm_suspend_end(    1283                  * but *not* dpm_suspend_end(). We *must* call
1016                  * dpm_suspend_end() now.  Ot    1284                  * dpm_suspend_end() now.  Otherwise, drivers for
1017                  * some devices (e.g. interru    1285                  * some devices (e.g. interrupt controllers) become
1018                  * desynchronized with the ac    1286                  * desynchronized with the actual state of the
1019                  * hardware at resume time, a    1287                  * hardware at resume time, and evil weirdness ensues.
1020                  */                              1288                  */
1021                 error = dpm_suspend_end(PMSG_    1289                 error = dpm_suspend_end(PMSG_FREEZE);
1022                 if (error)                       1290                 if (error)
1023                         goto Resume_devices;     1291                         goto Resume_devices;
1024                 error = suspend_disable_secon    1292                 error = suspend_disable_secondary_cpus();
1025                 if (error)                       1293                 if (error)
1026                         goto Enable_cpus;        1294                         goto Enable_cpus;
1027                 local_irq_disable();             1295                 local_irq_disable();
1028                 error = syscore_suspend();       1296                 error = syscore_suspend();
1029                 if (error)                       1297                 if (error)
1030                         goto Enable_irqs;        1298                         goto Enable_irqs;
1031         } else                                   1299         } else
1032 #endif                                           1300 #endif
1033         {                                        1301         {
1034                 kexec_in_progress = true;        1302                 kexec_in_progress = true;
1035                 kernel_restart_prepare("kexec    1303                 kernel_restart_prepare("kexec reboot");
1036                 migrate_to_reboot_cpu();         1304                 migrate_to_reboot_cpu();
1037                 syscore_shutdown();           << 
1038                                                  1305 
1039                 /*                               1306                 /*
1040                  * migrate_to_reboot_cpu() di    1307                  * migrate_to_reboot_cpu() disables CPU hotplug assuming that
1041                  * no further code needs to u    1308                  * no further code needs to use CPU hotplug (which is true in
1042                  * the reboot case). However,    1309                  * the reboot case). However, the kexec path depends on using
1043                  * CPU hotplug again; so re-e    1310                  * CPU hotplug again; so re-enable it here.
1044                  */                              1311                  */
1045                 cpu_hotplug_enable();            1312                 cpu_hotplug_enable();
1046                 pr_notice("Starting new kerne    1313                 pr_notice("Starting new kernel\n");
1047                 machine_shutdown();              1314                 machine_shutdown();
1048         }                                        1315         }
1049                                                  1316 
1050         kmsg_dump(KMSG_DUMP_SHUTDOWN);           1317         kmsg_dump(KMSG_DUMP_SHUTDOWN);
1051         machine_kexec(kexec_image);              1318         machine_kexec(kexec_image);
1052                                                  1319 
1053 #ifdef CONFIG_KEXEC_JUMP                         1320 #ifdef CONFIG_KEXEC_JUMP
1054         if (kexec_image->preserve_context) {     1321         if (kexec_image->preserve_context) {
1055                 syscore_resume();                1322                 syscore_resume();
1056  Enable_irqs:                                    1323  Enable_irqs:
1057                 local_irq_enable();              1324                 local_irq_enable();
1058  Enable_cpus:                                    1325  Enable_cpus:
1059                 suspend_enable_secondary_cpus    1326                 suspend_enable_secondary_cpus();
1060                 dpm_resume_start(PMSG_RESTORE    1327                 dpm_resume_start(PMSG_RESTORE);
1061  Resume_devices:                                 1328  Resume_devices:
1062                 dpm_resume_end(PMSG_RESTORE);    1329                 dpm_resume_end(PMSG_RESTORE);
1063  Resume_console:                                 1330  Resume_console:
1064                 resume_console();                1331                 resume_console();
1065                 thaw_processes();                1332                 thaw_processes();
1066  Restore_console:                                1333  Restore_console:
1067                 pm_restore_console();            1334                 pm_restore_console();
1068         }                                        1335         }
1069 #endif                                           1336 #endif
1070                                                  1337 
1071  Unlock:                                         1338  Unlock:
1072         kexec_unlock();                          1339         kexec_unlock();
1073         return error;                            1340         return error;
1074 }                                                1341 }
1075                                                  1342 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php