~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/boot/compressed/kaslr.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * kaslr.c
  4  *
  5  * This contains the routines needed to generate a reasonable level of
  6  * entropy to choose a randomized kernel base address offset in support
  7  * of Kernel Address Space Layout Randomization (KASLR). Additionally
  8  * handles walking the physical memory maps (and tracking memory regions
  9  * to avoid) in order to select a physical memory location that can
 10  * contain the entire properly aligned running kernel image.
 11  *
 12  */
 13 
 14 /*
 15  * isspace() in linux/ctype.h is expected by next_args() to filter
 16  * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h,
 17  * since isdigit() is implemented in both of them. Hence disable it
 18  * here.
 19  */
 20 #define BOOT_CTYPE_H
 21 
 22 #include "misc.h"
 23 #include "error.h"
 24 #include "../string.h"
 25 #include "efi.h"
 26 
 27 #include <generated/compile.h>
 28 #include <linux/module.h>
 29 #include <linux/uts.h>
 30 #include <linux/utsname.h>
 31 #include <linux/ctype.h>
 32 #include <generated/utsversion.h>
 33 #include <generated/utsrelease.h>
 34 
 35 #define _SETUP
 36 #include <asm/setup.h>  /* For COMMAND_LINE_SIZE */
 37 #undef _SETUP
 38 
 39 extern unsigned long get_cmd_line_ptr(void);
 40 
 41 /* Simplified build-specific string for starting entropy. */
 42 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
 43                 LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
 44 
 45 static unsigned long rotate_xor(unsigned long hash, const void *area,
 46                                 size_t size)
 47 {
 48         size_t i;
 49         unsigned long *ptr = (unsigned long *)area;
 50 
 51         for (i = 0; i < size / sizeof(hash); i++) {
 52                 /* Rotate by odd number of bits and XOR. */
 53                 hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
 54                 hash ^= ptr[i];
 55         }
 56 
 57         return hash;
 58 }
 59 
 60 /* Attempt to create a simple but unpredictable starting entropy. */
 61 static unsigned long get_boot_seed(void)
 62 {
 63         unsigned long hash = 0;
 64 
 65         hash = rotate_xor(hash, build_str, sizeof(build_str));
 66         hash = rotate_xor(hash, boot_params_ptr, sizeof(*boot_params_ptr));
 67 
 68         return hash;
 69 }
 70 
 71 #define KASLR_COMPRESSED_BOOT
 72 #include "../../lib/kaslr.c"
 73 
 74 
 75 /* Only supporting at most 4 unusable memmap regions with kaslr */
 76 #define MAX_MEMMAP_REGIONS      4
 77 
 78 static bool memmap_too_large;
 79 
 80 
 81 /*
 82  * Store memory limit: MAXMEM on 64-bit and KERNEL_IMAGE_SIZE on 32-bit.
 83  * It may be reduced by "mem=nn[KMG]" or "memmap=nn[KMG]" command line options.
 84  */
 85 static u64 mem_limit;
 86 
 87 /* Number of immovable memory regions */
 88 static int num_immovable_mem;
 89 
 90 enum mem_avoid_index {
 91         MEM_AVOID_ZO_RANGE = 0,
 92         MEM_AVOID_INITRD,
 93         MEM_AVOID_CMDLINE,
 94         MEM_AVOID_BOOTPARAMS,
 95         MEM_AVOID_MEMMAP_BEGIN,
 96         MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
 97         MEM_AVOID_MAX,
 98 };
 99 
100 static struct mem_vector mem_avoid[MEM_AVOID_MAX];
101 
102 static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
103 {
104         /* Item one is entirely before item two. */
105         if (one->start + one->size <= two->start)
106                 return false;
107         /* Item one is entirely after item two. */
108         if (one->start >= two->start + two->size)
109                 return false;
110         return true;
111 }
112 
113 char *skip_spaces(const char *str)
114 {
115         while (isspace(*str))
116                 ++str;
117         return (char *)str;
118 }
119 #include "../../../../lib/ctype.c"
120 #include "../../../../lib/cmdline.c"
121 
122 static int
123 parse_memmap(char *p, u64 *start, u64 *size)
124 {
125         char *oldp;
126 
127         if (!p)
128                 return -EINVAL;
129 
130         /* We don't care about this option here */
131         if (!strncmp(p, "exactmap", 8))
132                 return -EINVAL;
133 
134         oldp = p;
135         *size = memparse(p, &p);
136         if (p == oldp)
137                 return -EINVAL;
138 
139         switch (*p) {
140         case '#':
141         case '$':
142         case '!':
143                 *start = memparse(p + 1, &p);
144                 return 0;
145         case '@':
146                 /*
147                  * memmap=nn@ss specifies usable region, should
148                  * be skipped
149                  */
150                 *size = 0;
151                 fallthrough;
152         default:
153                 /*
154                  * If w/o offset, only size specified, memmap=nn[KMG] has the
155                  * same behaviour as mem=nn[KMG]. It limits the max address
156                  * system can use. Region above the limit should be avoided.
157                  */
158                 *start = 0;
159                 return 0;
160         }
161 
162         return -EINVAL;
163 }
164 
165 static void mem_avoid_memmap(char *str)
166 {
167         static int i;
168 
169         if (i >= MAX_MEMMAP_REGIONS)
170                 return;
171 
172         while (str && (i < MAX_MEMMAP_REGIONS)) {
173                 int rc;
174                 u64 start, size;
175                 char *k = strchr(str, ',');
176 
177                 if (k)
178                         *k++ = 0;
179 
180                 rc = parse_memmap(str, &start, &size);
181                 if (rc < 0)
182                         break;
183                 str = k;
184 
185                 if (start == 0) {
186                         /* Store the specified memory limit if size > 0 */
187                         if (size > 0 && size < mem_limit)
188                                 mem_limit = size;
189 
190                         continue;
191                 }
192 
193                 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
194                 mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
195                 i++;
196         }
197 
198         /* More than 4 memmaps, fail kaslr */
199         if ((i >= MAX_MEMMAP_REGIONS) && str)
200                 memmap_too_large = true;
201 }
202 
203 /* Store the number of 1GB huge pages which users specified: */
204 static unsigned long max_gb_huge_pages;
205 
206 static void parse_gb_huge_pages(char *param, char *val)
207 {
208         static bool gbpage_sz;
209         char *p;
210 
211         if (!strcmp(param, "hugepagesz")) {
212                 p = val;
213                 if (memparse(p, &p) != PUD_SIZE) {
214                         gbpage_sz = false;
215                         return;
216                 }
217 
218                 if (gbpage_sz)
219                         warn("Repeatedly set hugeTLB page size of 1G!\n");
220                 gbpage_sz = true;
221                 return;
222         }
223 
224         if (!strcmp(param, "hugepages") && gbpage_sz) {
225                 p = val;
226                 max_gb_huge_pages = simple_strtoull(p, &p, 0);
227                 return;
228         }
229 }
230 
231 static void handle_mem_options(void)
232 {
233         char *args = (char *)get_cmd_line_ptr();
234         size_t len;
235         char *tmp_cmdline;
236         char *param, *val;
237         u64 mem_size;
238 
239         if (!args)
240                 return;
241 
242         len = strnlen(args, COMMAND_LINE_SIZE-1);
243         tmp_cmdline = malloc(len + 1);
244         if (!tmp_cmdline)
245                 error("Failed to allocate space for tmp_cmdline");
246 
247         memcpy(tmp_cmdline, args, len);
248         tmp_cmdline[len] = 0;
249         args = tmp_cmdline;
250 
251         /* Chew leading spaces */
252         args = skip_spaces(args);
253 
254         while (*args) {
255                 args = next_arg(args, &param, &val);
256                 /* Stop at -- */
257                 if (!val && strcmp(param, "--") == 0)
258                         break;
259 
260                 if (!strcmp(param, "memmap")) {
261                         mem_avoid_memmap(val);
262                 } else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) {
263                         parse_gb_huge_pages(param, val);
264                 } else if (!strcmp(param, "mem")) {
265                         char *p = val;
266 
267                         if (!strcmp(p, "nopentium"))
268                                 continue;
269                         mem_size = memparse(p, &p);
270                         if (mem_size == 0)
271                                 break;
272 
273                         if (mem_size < mem_limit)
274                                 mem_limit = mem_size;
275                 }
276         }
277 
278         free(tmp_cmdline);
279         return;
280 }
281 
282 /*
283  * In theory, KASLR can put the kernel anywhere in the range of [16M, MAXMEM)
284  * on 64-bit, and [16M, KERNEL_IMAGE_SIZE) on 32-bit.
285  *
286  * The mem_avoid array is used to store the ranges that need to be avoided
287  * when KASLR searches for an appropriate random address. We must avoid any
288  * regions that are unsafe to overlap with during decompression, and other
289  * things like the initrd, cmdline and boot_params. This comment seeks to
290  * explain mem_avoid as clearly as possible since incorrect mem_avoid
291  * memory ranges lead to really hard to debug boot failures.
292  *
293  * The initrd, cmdline, and boot_params are trivial to identify for
294  * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and
295  * MEM_AVOID_BOOTPARAMS respectively below.
296  *
297  * What is not obvious how to avoid is the range of memory that is used
298  * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover
299  * the compressed kernel (ZO) and its run space, which is used to extract
300  * the uncompressed kernel (VO) and relocs.
301  *
302  * ZO's full run size sits against the end of the decompression buffer, so
303  * we can calculate where text, data, bss, etc of ZO are positioned more
304  * easily.
305  *
306  * For additional background, the decompression calculations can be found
307  * in header.S, and the memory diagram is based on the one found in misc.c.
308  *
309  * The following conditions are already enforced by the image layouts and
310  * associated code:
311  *  - input + input_size >= output + output_size
312  *  - kernel_total_size <= init_size
313  *  - kernel_total_size <= output_size (see Note below)
314  *  - output + init_size >= output + output_size
315  *
316  * (Note that kernel_total_size and output_size have no fundamental
317  * relationship, but output_size is passed to choose_random_location
318  * as a maximum of the two. The diagram is showing a case where
319  * kernel_total_size is larger than output_size, but this case is
320  * handled by bumping output_size.)
321  *
322  * The above conditions can be illustrated by a diagram:
323  *
324  * 0   output            input            input+input_size    output+init_size
325  * |     |                 |                             |             |
326  * |     |                 |                             |             |
327  * |-----|--------|--------|--------------|-----------|--|-------------|
328  *                |                       |           |
329  *                |                       |           |
330  * output+init_size-ZO_INIT_SIZE  output+output_size  output+kernel_total_size
331  *
332  * [output, output+init_size) is the entire memory range used for
333  * extracting the compressed image.
334  *
335  * [output, output+kernel_total_size) is the range needed for the
336  * uncompressed kernel (VO) and its run size (bss, brk, etc).
337  *
338  * [output, output+output_size) is VO plus relocs (i.e. the entire
339  * uncompressed payload contained by ZO). This is the area of the buffer
340  * written to during decompression.
341  *
342  * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case
343  * range of the copied ZO and decompression code. (i.e. the range
344  * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.)
345  *
346  * [input, input+input_size) is the original copied compressed image (ZO)
347  * (i.e. it does not include its run size). This range must be avoided
348  * because it contains the data used for decompression.
349  *
350  * [input+input_size, output+init_size) is [_text, _end) for ZO. This
351  * range includes ZO's heap and stack, and must be avoided since it
352  * performs the decompression.
353  *
354  * Since the above two ranges need to be avoided and they are adjacent,
355  * they can be merged, resulting in: [input, output+init_size) which
356  * becomes the MEM_AVOID_ZO_RANGE below.
357  */
358 static void mem_avoid_init(unsigned long input, unsigned long input_size,
359                            unsigned long output)
360 {
361         unsigned long init_size = boot_params_ptr->hdr.init_size;
362         u64 initrd_start, initrd_size;
363         unsigned long cmd_line, cmd_line_size;
364 
365         /*
366          * Avoid the region that is unsafe to overlap during
367          * decompression.
368          */
369         mem_avoid[MEM_AVOID_ZO_RANGE].start = input;
370         mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
371 
372         /* Avoid initrd. */
373         initrd_start  = (u64)boot_params_ptr->ext_ramdisk_image << 32;
374         initrd_start |= boot_params_ptr->hdr.ramdisk_image;
375         initrd_size  = (u64)boot_params_ptr->ext_ramdisk_size << 32;
376         initrd_size |= boot_params_ptr->hdr.ramdisk_size;
377         mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
378         mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
379         /* No need to set mapping for initrd, it will be handled in VO. */
380 
381         /* Avoid kernel command line. */
382         cmd_line = get_cmd_line_ptr();
383         /* Calculate size of cmd_line. */
384         if (cmd_line) {
385                 cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1;
386                 mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
387                 mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
388         }
389 
390         /* Avoid boot parameters. */
391         mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params_ptr;
392         mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params_ptr);
393 
394         /* We don't need to set a mapping for setup_data. */
395 
396         /* Mark the memmap regions we need to avoid */
397         handle_mem_options();
398 
399         /* Enumerate the immovable memory regions */
400         num_immovable_mem = count_immovable_mem_regions();
401 }
402 
403 /*
404  * Does this memory vector overlap a known avoided area? If so, record the
405  * overlap region with the lowest address.
406  */
407 static bool mem_avoid_overlap(struct mem_vector *img,
408                               struct mem_vector *overlap)
409 {
410         int i;
411         struct setup_data *ptr;
412         u64 earliest = img->start + img->size;
413         bool is_overlapping = false;
414 
415         for (i = 0; i < MEM_AVOID_MAX; i++) {
416                 if (mem_overlaps(img, &mem_avoid[i]) &&
417                     mem_avoid[i].start < earliest) {
418                         *overlap = mem_avoid[i];
419                         earliest = overlap->start;
420                         is_overlapping = true;
421                 }
422         }
423 
424         /* Avoid all entries in the setup_data linked list. */
425         ptr = (struct setup_data *)(unsigned long)boot_params_ptr->hdr.setup_data;
426         while (ptr) {
427                 struct mem_vector avoid;
428 
429                 avoid.start = (unsigned long)ptr;
430                 avoid.size = sizeof(*ptr) + ptr->len;
431 
432                 if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
433                         *overlap = avoid;
434                         earliest = overlap->start;
435                         is_overlapping = true;
436                 }
437 
438                 if (ptr->type == SETUP_INDIRECT &&
439                     ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) {
440                         avoid.start = ((struct setup_indirect *)ptr->data)->addr;
441                         avoid.size = ((struct setup_indirect *)ptr->data)->len;
442 
443                         if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
444                                 *overlap = avoid;
445                                 earliest = overlap->start;
446                                 is_overlapping = true;
447                         }
448                 }
449 
450                 ptr = (struct setup_data *)(unsigned long)ptr->next;
451         }
452 
453         return is_overlapping;
454 }
455 
456 struct slot_area {
457         u64 addr;
458         unsigned long num;
459 };
460 
461 #define MAX_SLOT_AREA 100
462 
463 static struct slot_area slot_areas[MAX_SLOT_AREA];
464 static unsigned int slot_area_index;
465 static unsigned long slot_max;
466 
467 static void store_slot_info(struct mem_vector *region, unsigned long image_size)
468 {
469         struct slot_area slot_area;
470 
471         if (slot_area_index == MAX_SLOT_AREA)
472                 return;
473 
474         slot_area.addr = region->start;
475         slot_area.num = 1 + (region->size - image_size) / CONFIG_PHYSICAL_ALIGN;
476 
477         slot_areas[slot_area_index++] = slot_area;
478         slot_max += slot_area.num;
479 }
480 
481 /*
482  * Skip as many 1GB huge pages as possible in the passed region
483  * according to the number which users specified:
484  */
485 static void
486 process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
487 {
488         u64 pud_start, pud_end;
489         unsigned long gb_huge_pages;
490         struct mem_vector tmp;
491 
492         if (!IS_ENABLED(CONFIG_X86_64) || !max_gb_huge_pages) {
493                 store_slot_info(region, image_size);
494                 return;
495         }
496 
497         /* Are there any 1GB pages in the region? */
498         pud_start = ALIGN(region->start, PUD_SIZE);
499         pud_end = ALIGN_DOWN(region->start + region->size, PUD_SIZE);
500 
501         /* No good 1GB huge pages found: */
502         if (pud_start >= pud_end) {
503                 store_slot_info(region, image_size);
504                 return;
505         }
506 
507         /* Check if the head part of the region is usable. */
508         if (pud_start >= region->start + image_size) {
509                 tmp.start = region->start;
510                 tmp.size = pud_start - region->start;
511                 store_slot_info(&tmp, image_size);
512         }
513 
514         /* Skip the good 1GB pages. */
515         gb_huge_pages = (pud_end - pud_start) >> PUD_SHIFT;
516         if (gb_huge_pages > max_gb_huge_pages) {
517                 pud_end = pud_start + (max_gb_huge_pages << PUD_SHIFT);
518                 max_gb_huge_pages = 0;
519         } else {
520                 max_gb_huge_pages -= gb_huge_pages;
521         }
522 
523         /* Check if the tail part of the region is usable. */
524         if (region->start + region->size >= pud_end + image_size) {
525                 tmp.start = pud_end;
526                 tmp.size = region->start + region->size - pud_end;
527                 store_slot_info(&tmp, image_size);
528         }
529 }
530 
531 static u64 slots_fetch_random(void)
532 {
533         unsigned long slot;
534         unsigned int i;
535 
536         /* Handle case of no slots stored. */
537         if (slot_max == 0)
538                 return 0;
539 
540         slot = kaslr_get_random_long("Physical") % slot_max;
541 
542         for (i = 0; i < slot_area_index; i++) {
543                 if (slot >= slot_areas[i].num) {
544                         slot -= slot_areas[i].num;
545                         continue;
546                 }
547                 return slot_areas[i].addr + ((u64)slot * CONFIG_PHYSICAL_ALIGN);
548         }
549 
550         if (i == slot_area_index)
551                 debug_putstr("slots_fetch_random() failed!?\n");
552         return 0;
553 }
554 
555 static void __process_mem_region(struct mem_vector *entry,
556                                  unsigned long minimum,
557                                  unsigned long image_size)
558 {
559         struct mem_vector region, overlap;
560         u64 region_end;
561 
562         /* Enforce minimum and memory limit. */
563         region.start = max_t(u64, entry->start, minimum);
564         region_end = min(entry->start + entry->size, mem_limit);
565 
566         /* Give up if slot area array is full. */
567         while (slot_area_index < MAX_SLOT_AREA) {
568                 /* Potentially raise address to meet alignment needs. */
569                 region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
570 
571                 /* Did we raise the address above the passed in memory entry? */
572                 if (region.start > region_end)
573                         return;
574 
575                 /* Reduce size by any delta from the original address. */
576                 region.size = region_end - region.start;
577 
578                 /* Return if region can't contain decompressed kernel */
579                 if (region.size < image_size)
580                         return;
581 
582                 /* If nothing overlaps, store the region and return. */
583                 if (!mem_avoid_overlap(&region, &overlap)) {
584                         process_gb_huge_pages(&region, image_size);
585                         return;
586                 }
587 
588                 /* Store beginning of region if holds at least image_size. */
589                 if (overlap.start >= region.start + image_size) {
590                         region.size = overlap.start - region.start;
591                         process_gb_huge_pages(&region, image_size);
592                 }
593 
594                 /* Clip off the overlapping region and start over. */
595                 region.start = overlap.start + overlap.size;
596         }
597 }
598 
599 static bool process_mem_region(struct mem_vector *region,
600                                unsigned long minimum,
601                                unsigned long image_size)
602 {
603         int i;
604         /*
605          * If no immovable memory found, or MEMORY_HOTREMOVE disabled,
606          * use @region directly.
607          */
608         if (!num_immovable_mem) {
609                 __process_mem_region(region, minimum, image_size);
610 
611                 if (slot_area_index == MAX_SLOT_AREA) {
612                         debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n");
613                         return true;
614                 }
615                 return false;
616         }
617 
618 #if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
619         /*
620          * If immovable memory found, filter the intersection between
621          * immovable memory and @region.
622          */
623         for (i = 0; i < num_immovable_mem; i++) {
624                 u64 start, end, entry_end, region_end;
625                 struct mem_vector entry;
626 
627                 if (!mem_overlaps(region, &immovable_mem[i]))
628                         continue;
629 
630                 start = immovable_mem[i].start;
631                 end = start + immovable_mem[i].size;
632                 region_end = region->start + region->size;
633 
634                 entry.start = clamp(region->start, start, end);
635                 entry_end = clamp(region_end, start, end);
636                 entry.size = entry_end - entry.start;
637 
638                 __process_mem_region(&entry, minimum, image_size);
639 
640                 if (slot_area_index == MAX_SLOT_AREA) {
641                         debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n");
642                         return true;
643                 }
644         }
645 #endif
646         return false;
647 }
648 
649 #ifdef CONFIG_EFI
650 
651 /*
652  * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are
653  * guaranteed to be free.
654  *
655  * Pick free memory more conservatively than the EFI spec allows: according to
656  * the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also free memory and thus
657  * available to place the kernel image into, but in practice there's firmware
658  * where using that memory leads to crashes. Buggy vendor EFI code registers
659  * for an event that triggers on SetVirtualAddressMap(). The handler assumes
660  * that EFI_BOOT_SERVICES_DATA memory has not been touched by loader yet, which
661  * is probably true for Windows.
662  *
663  * Preserve EFI_BOOT_SERVICES_* regions until after SetVirtualAddressMap().
664  */
665 static inline bool memory_type_is_free(efi_memory_desc_t *md)
666 {
667         if (md->type == EFI_CONVENTIONAL_MEMORY)
668                 return true;
669 
670         if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) &&
671             md->type == EFI_UNACCEPTED_MEMORY)
672                     return true;
673 
674         return false;
675 }
676 
677 /*
678  * Returns true if we processed the EFI memmap, which we prefer over the E820
679  * table if it is available.
680  */
681 static bool
682 process_efi_entries(unsigned long minimum, unsigned long image_size)
683 {
684         struct efi_info *e = &boot_params_ptr->efi_info;
685         bool efi_mirror_found = false;
686         struct mem_vector region;
687         efi_memory_desc_t *md;
688         unsigned long pmap;
689         char *signature;
690         u32 nr_desc;
691         int i;
692 
693         signature = (char *)&e->efi_loader_signature;
694         if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
695             strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
696                 return false;
697 
698 #ifdef CONFIG_X86_32
699         /* Can't handle data above 4GB at this time */
700         if (e->efi_memmap_hi) {
701                 warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n");
702                 return false;
703         }
704         pmap =  e->efi_memmap;
705 #else
706         pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
707 #endif
708 
709         nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
710         for (i = 0; i < nr_desc; i++) {
711                 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
712                 if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
713                         efi_mirror_found = true;
714                         break;
715                 }
716         }
717 
718         for (i = 0; i < nr_desc; i++) {
719                 md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
720 
721                 if (!memory_type_is_free(md))
722                         continue;
723 
724                 if (efi_soft_reserve_enabled() &&
725                     (md->attribute & EFI_MEMORY_SP))
726                         continue;
727 
728                 if (efi_mirror_found &&
729                     !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
730                         continue;
731 
732                 region.start = md->phys_addr;
733                 region.size = md->num_pages << EFI_PAGE_SHIFT;
734                 if (process_mem_region(&region, minimum, image_size))
735                         break;
736         }
737         return true;
738 }
739 #else
740 static inline bool
741 process_efi_entries(unsigned long minimum, unsigned long image_size)
742 {
743         return false;
744 }
745 #endif
746 
747 static void process_e820_entries(unsigned long minimum,
748                                  unsigned long image_size)
749 {
750         int i;
751         struct mem_vector region;
752         struct boot_e820_entry *entry;
753 
754         /* Verify potential e820 positions, appending to slots list. */
755         for (i = 0; i < boot_params_ptr->e820_entries; i++) {
756                 entry = &boot_params_ptr->e820_table[i];
757                 /* Skip non-RAM entries. */
758                 if (entry->type != E820_TYPE_RAM)
759                         continue;
760                 region.start = entry->addr;
761                 region.size = entry->size;
762                 if (process_mem_region(&region, minimum, image_size))
763                         break;
764         }
765 }
766 
767 static unsigned long find_random_phys_addr(unsigned long minimum,
768                                            unsigned long image_size)
769 {
770         u64 phys_addr;
771 
772         /* Bail out early if it's impossible to succeed. */
773         if (minimum + image_size > mem_limit)
774                 return 0;
775 
776         /* Check if we had too many memmaps. */
777         if (memmap_too_large) {
778                 debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
779                 return 0;
780         }
781 
782         if (!process_efi_entries(minimum, image_size))
783                 process_e820_entries(minimum, image_size);
784 
785         phys_addr = slots_fetch_random();
786 
787         /* Perform a final check to make sure the address is in range. */
788         if (phys_addr < minimum || phys_addr + image_size > mem_limit) {
789                 warn("Invalid physical address chosen!\n");
790                 return 0;
791         }
792 
793         return (unsigned long)phys_addr;
794 }
795 
796 static unsigned long find_random_virt_addr(unsigned long minimum,
797                                            unsigned long image_size)
798 {
799         unsigned long slots, random_addr;
800 
801         /*
802          * There are how many CONFIG_PHYSICAL_ALIGN-sized slots
803          * that can hold image_size within the range of minimum to
804          * KERNEL_IMAGE_SIZE?
805          */
806         slots = 1 + (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN;
807 
808         random_addr = kaslr_get_random_long("Virtual") % slots;
809 
810         return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
811 }
812 
813 /*
814  * Since this function examines addresses much more numerically,
815  * it takes the input and output pointers as 'unsigned long'.
816  */
817 void choose_random_location(unsigned long input,
818                             unsigned long input_size,
819                             unsigned long *output,
820                             unsigned long output_size,
821                             unsigned long *virt_addr)
822 {
823         unsigned long random_addr, min_addr;
824 
825         if (cmdline_find_option_bool("nokaslr")) {
826                 warn("KASLR disabled: 'nokaslr' on cmdline.");
827                 return;
828         }
829 
830         boot_params_ptr->hdr.loadflags |= KASLR_FLAG;
831 
832         if (IS_ENABLED(CONFIG_X86_32))
833                 mem_limit = KERNEL_IMAGE_SIZE;
834         else
835                 mem_limit = MAXMEM;
836 
837         /* Record the various known unsafe memory ranges. */
838         mem_avoid_init(input, input_size, *output);
839 
840         /*
841          * Low end of the randomization range should be the
842          * smaller of 512M or the initial kernel image
843          * location:
844          */
845         min_addr = min(*output, 512UL << 20);
846         /* Make sure minimum is aligned. */
847         min_addr = ALIGN(min_addr, CONFIG_PHYSICAL_ALIGN);
848 
849         /* Walk available memory entries to find a random address. */
850         random_addr = find_random_phys_addr(min_addr, output_size);
851         if (!random_addr) {
852                 warn("Physical KASLR disabled: no suitable memory region!");
853         } else {
854                 /* Update the new physical address location. */
855                 if (*output != random_addr)
856                         *output = random_addr;
857         }
858 
859 
860         /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
861         if (IS_ENABLED(CONFIG_X86_64))
862                 random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
863         *virt_addr = random_addr;
864 }
865 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php