1 // SPDX-License-Identifier: GPL-2.0-or-later 1 2 /* 3 * x86 instruction analysis 4 * 5 * Copyright (C) IBM Corporation, 2002, 2004, 6 */ 7 8 #include <linux/kernel.h> 9 #ifdef __KERNEL__ 10 #include <linux/string.h> 11 #else 12 #include <string.h> 13 #endif 14 #include <asm/inat.h> /*__ignore_sync_check__ 15 #include <asm/insn.h> /* __ignore_sync_check__ 16 #include <linux/unaligned.h> /* __ignore_sync_ 17 18 #include <linux/errno.h> 19 #include <linux/kconfig.h> 20 21 #include <asm/emulate_prefix.h> /* __ignore_sy 22 23 #define leXX_to_cpu(t, r) 24 ({ 25 __typeof__(t) v; 26 switch (sizeof(t)) { 27 case 4: v = le32_to_cpu(r); break; 28 case 2: v = le16_to_cpu(r); break; 29 case 1: v = r; break; 30 default: 31 BUILD_BUG(); break; 32 } 33 v; 34 }) 35 36 /* Verify next sizeof(t) bytes can be on the s 37 #define validate_next(t, insn, n) \ 38 ((insn)->next_byte + sizeof(t) + n <= 39 40 #define __get_next(t, insn) \ 41 ({ t r = get_unaligned((t *)(insn)->ne 42 43 #define __peek_nbyte_next(t, insn, n) \ 44 ({ t r = get_unaligned((t *)(insn)->ne 45 46 #define get_next(t, insn) \ 47 ({ if (unlikely(!validate_next(t, insn 48 49 #define peek_nbyte_next(t, insn, n) \ 50 ({ if (unlikely(!validate_next(t, insn 51 52 #define peek_next(t, insn) peek_nbyte_nex 53 54 /** 55 * insn_init() - initialize struct insn 56 * @insn: &struct insn to be initialized 57 * @kaddr: address (in kernel memory) of 58 * @buf_len: length of the insn buffer at @ 59 * @x86_64: !0 for 64-bit kernel or 64-bit 60 */ 61 void insn_init(struct insn *insn, const void * 62 { 63 /* 64 * Instructions longer than MAX_INSN_S 65 * even if the input buffer is long en 66 */ 67 if (buf_len > MAX_INSN_SIZE) 68 buf_len = MAX_INSN_SIZE; 69 70 memset(insn, 0, sizeof(*insn)); 71 insn->kaddr = kaddr; 72 insn->end_kaddr = kaddr + buf_len; 73 insn->next_byte = kaddr; 74 insn->x86_64 = x86_64; 75 insn->opnd_bytes = 4; 76 if (x86_64) 77 insn->addr_bytes = 8; 78 else 79 insn->addr_bytes = 4; 80 } 81 82 static const insn_byte_t xen_prefix[] = { __XE 83 static const insn_byte_t kvm_prefix[] = { __KV 84 85 static int __insn_get_emulate_prefix(struct in 86 const ins 87 { 88 size_t i; 89 90 for (i = 0; i < len; i++) { 91 if (peek_nbyte_next(insn_byte_ 92 goto err_out; 93 } 94 95 insn->emulate_prefix_size = len; 96 insn->next_byte += len; 97 98 return 1; 99 100 err_out: 101 return 0; 102 } 103 104 static void insn_get_emulate_prefix(struct ins 105 { 106 if (__insn_get_emulate_prefix(insn, xe 107 return; 108 109 __insn_get_emulate_prefix(insn, kvm_pr 110 } 111 112 /** 113 * insn_get_prefixes - scan x86 instruction pr 114 * @insn: &struct insn containing instru 115 * 116 * Populates the @insn->prefixes bitmap, and u 117 * to point to the (first) opcode. No effect 118 * is already set. 119 * 120 * * Returns: 121 * 0: on success 122 * < 0: on error 123 */ 124 int insn_get_prefixes(struct insn *insn) 125 { 126 struct insn_field *prefixes = &insn->p 127 insn_attr_t attr; 128 insn_byte_t b, lb; 129 int i, nb; 130 131 if (prefixes->got) 132 return 0; 133 134 insn_get_emulate_prefix(insn); 135 136 nb = 0; 137 lb = 0; 138 b = peek_next(insn_byte_t, insn); 139 attr = inat_get_opcode_attribute(b); 140 while (inat_is_legacy_prefix(attr)) { 141 /* Skip if same prefix */ 142 for (i = 0; i < nb; i++) 143 if (prefixes->bytes[i] 144 goto found; 145 if (nb == 4) 146 /* Invalid instruction 147 break; 148 prefixes->bytes[nb++] = b; 149 if (inat_is_address_size_prefi 150 /* address size switch 151 if (insn->x86_64) 152 insn->addr_byt 153 else 154 insn->addr_byt 155 } else if (inat_is_operand_siz 156 /* oprand size switche 157 insn->opnd_bytes ^= 6; 158 } 159 found: 160 prefixes->nbytes++; 161 insn->next_byte++; 162 lb = b; 163 b = peek_next(insn_byte_t, ins 164 attr = inat_get_opcode_attribu 165 } 166 /* Set the last prefix */ 167 if (lb && lb != insn->prefixes.bytes[3 168 if (unlikely(insn->prefixes.by 169 /* Swap the last prefi 170 b = insn->prefixes.byt 171 for (i = 0; i < nb; i+ 172 if (prefixes-> 173 insn_s 174 } 175 insn_set_byte(&insn->prefixes, 176 } 177 178 /* Decode REX prefix */ 179 if (insn->x86_64) { 180 b = peek_next(insn_byte_t, ins 181 attr = inat_get_opcode_attribu 182 if (inat_is_rex_prefix(attr)) 183 insn_field_set(&insn-> 184 insn->next_byte++; 185 if (X86_REX_W(b)) 186 /* REX.W overr 187 insn->opnd_byt 188 } else if (inat_is_rex2_prefix 189 insn_set_byte(&insn->r 190 b = peek_nbyte_next(in 191 insn_set_byte(&insn->r 192 insn->rex_prefix.nbyte 193 insn->next_byte += 2; 194 if (X86_REX_W(b)) 195 /* REX.W overr 196 insn->opnd_byt 197 insn->rex_prefix.got = 198 goto vex_end; 199 } 200 } 201 insn->rex_prefix.got = 1; 202 203 /* Decode VEX prefix */ 204 b = peek_next(insn_byte_t, insn); 205 attr = inat_get_opcode_attribute(b); 206 if (inat_is_vex_prefix(attr)) { 207 insn_byte_t b2 = peek_nbyte_ne 208 if (!insn->x86_64) { 209 /* 210 * In 32-bits mode, if 211 * ModRM) on the secon 212 * LDS or LES or BOUND 213 */ 214 if (X86_MODRM_MOD(b2) 215 goto vex_end; 216 } 217 insn_set_byte(&insn->vex_prefi 218 insn_set_byte(&insn->vex_prefi 219 if (inat_is_evex_prefix(attr)) 220 b2 = peek_nbyte_next(i 221 insn_set_byte(&insn->v 222 b2 = peek_nbyte_next(i 223 insn_set_byte(&insn->v 224 insn->vex_prefix.nbyte 225 insn->next_byte += 4; 226 if (insn->x86_64 && X8 227 /* VEX.W overr 228 insn->opnd_byt 229 } else if (inat_is_vex3_prefix 230 b2 = peek_nbyte_next(i 231 insn_set_byte(&insn->v 232 insn->vex_prefix.nbyte 233 insn->next_byte += 3; 234 if (insn->x86_64 && X8 235 /* VEX.W overr 236 insn->opnd_byt 237 } else { 238 /* 239 * For VEX2, fake VEX3 240 * Makes it easier to 241 * vex.L and vex.pp. M 242 */ 243 insn_set_byte(&insn->v 244 insn->vex_prefix.nbyte 245 insn->next_byte += 2; 246 } 247 } 248 vex_end: 249 insn->vex_prefix.got = 1; 250 251 prefixes->got = 1; 252 253 return 0; 254 255 err_out: 256 return -ENODATA; 257 } 258 259 /** 260 * insn_get_opcode - collect opcode(s) 261 * @insn: &struct insn containing instru 262 * 263 * Populates @insn->opcode, updates @insn->nex 264 * opcode byte(s), and set @insn->attr (except 265 * If necessary, first collects any preceding 266 * Sets @insn->opcode.value = opcode1. No eff 267 * is already 1. 268 * 269 * Returns: 270 * 0: on success 271 * < 0: on error 272 */ 273 int insn_get_opcode(struct insn *insn) 274 { 275 struct insn_field *opcode = &insn->opc 276 int pfx_id, ret; 277 insn_byte_t op; 278 279 if (opcode->got) 280 return 0; 281 282 ret = insn_get_prefixes(insn); 283 if (ret) 284 return ret; 285 286 /* Get first opcode */ 287 op = get_next(insn_byte_t, insn); 288 insn_set_byte(opcode, 0, op); 289 opcode->nbytes = 1; 290 291 /* Check if there is VEX prefix or not 292 if (insn_is_avx(insn)) { 293 insn_byte_t m, p; 294 m = insn_vex_m_bits(insn); 295 p = insn_vex_p_bits(insn); 296 insn->attr = inat_get_avx_attr 297 /* SCALABLE EVEX uses p bits t 298 if (inat_evex_scalable(insn->a 299 p == INAT_PFX_OPNDSZ) 300 insn->opnd_bytes = 2; 301 if ((inat_must_evex(insn->attr 302 (!inat_accept_vex(insn->at 303 !inat_is_group(insn->attr 304 /* This instruction is 305 insn->attr = 0; 306 return -EINVAL; 307 } 308 /* VEX has only 1 byte for opc 309 goto end; 310 } 311 312 /* Check if there is REX2 prefix or no 313 if (insn_is_rex2(insn)) { 314 if (insn_rex2_m_bit(insn)) { 315 /* map 1 is escape 0x0 316 insn_attr_t esc_attr = 317 318 pfx_id = insn_last_pre 319 insn->attr = inat_get_ 320 } else { 321 insn->attr = inat_get_ 322 } 323 goto end; 324 } 325 326 insn->attr = inat_get_opcode_attribute 327 while (inat_is_escape(insn->attr)) { 328 /* Get escaped opcode */ 329 op = get_next(insn_byte_t, ins 330 opcode->bytes[opcode->nbytes++ 331 pfx_id = insn_last_prefix_id(i 332 insn->attr = inat_get_escape_a 333 } 334 335 if (inat_must_vex(insn->attr)) { 336 /* This instruction is bad */ 337 insn->attr = 0; 338 return -EINVAL; 339 } 340 end: 341 opcode->got = 1; 342 return 0; 343 344 err_out: 345 return -ENODATA; 346 } 347 348 /** 349 * insn_get_modrm - collect ModRM byte, if any 350 * @insn: &struct insn containing instru 351 * 352 * Populates @insn->modrm and updates @insn->n 353 * ModRM byte, if any. If necessary, first co 354 * (prefixes and opcode(s)). No effect if @in 355 * 356 * Returns: 357 * 0: on success 358 * < 0: on error 359 */ 360 int insn_get_modrm(struct insn *insn) 361 { 362 struct insn_field *modrm = &insn->modr 363 insn_byte_t pfx_id, mod; 364 int ret; 365 366 if (modrm->got) 367 return 0; 368 369 ret = insn_get_opcode(insn); 370 if (ret) 371 return ret; 372 373 if (inat_has_modrm(insn->attr)) { 374 mod = get_next(insn_byte_t, in 375 insn_field_set(modrm, mod, 1); 376 if (inat_is_group(insn->attr)) 377 pfx_id = insn_last_pre 378 insn->attr = inat_get_ 379 380 if (insn_is_avx(insn) 381 /* Bad insn */ 382 insn->attr = 0 383 return -EINVAL 384 } 385 } 386 } 387 388 if (insn->x86_64 && inat_is_force64(in 389 insn->opnd_bytes = 8; 390 391 modrm->got = 1; 392 return 0; 393 394 err_out: 395 return -ENODATA; 396 } 397 398 399 /** 400 * insn_rip_relative() - Does instruction use 401 * @insn: &struct insn containing instru 402 * 403 * If necessary, first collects the instructio 404 * ModRM byte. No effect if @insn->x86_64 is 405 */ 406 int insn_rip_relative(struct insn *insn) 407 { 408 struct insn_field *modrm = &insn->modr 409 int ret; 410 411 if (!insn->x86_64) 412 return 0; 413 414 ret = insn_get_modrm(insn); 415 if (ret) 416 return 0; 417 /* 418 * For rip-relative instructions, the 419 * is zero and the r/m field (bottom 3 420 */ 421 return (modrm->nbytes && (modrm->bytes 422 } 423 424 /** 425 * insn_get_sib() - Get the SIB byte of instru 426 * @insn: &struct insn containing instru 427 * 428 * If necessary, first collects the instructio 429 * ModRM byte. 430 * 431 * Returns: 432 * 0: if decoding succeeded 433 * < 0: otherwise. 434 */ 435 int insn_get_sib(struct insn *insn) 436 { 437 insn_byte_t modrm; 438 int ret; 439 440 if (insn->sib.got) 441 return 0; 442 443 ret = insn_get_modrm(insn); 444 if (ret) 445 return ret; 446 447 if (insn->modrm.nbytes) { 448 modrm = insn->modrm.bytes[0]; 449 if (insn->addr_bytes != 2 && 450 X86_MODRM_MOD(modrm) != 3 451 insn_field_set(&insn-> 452 get_nex 453 } 454 } 455 insn->sib.got = 1; 456 457 return 0; 458 459 err_out: 460 return -ENODATA; 461 } 462 463 464 /** 465 * insn_get_displacement() - Get the displacem 466 * @insn: &struct insn containing instru 467 * 468 * If necessary, first collects the instructio 469 * SIB byte. 470 * Displacement value is sign-expanded. 471 * 472 * * Returns: 473 * 0: if decoding succeeded 474 * < 0: otherwise. 475 */ 476 int insn_get_displacement(struct insn *insn) 477 { 478 insn_byte_t mod, rm, base; 479 int ret; 480 481 if (insn->displacement.got) 482 return 0; 483 484 ret = insn_get_sib(insn); 485 if (ret) 486 return ret; 487 488 if (insn->modrm.nbytes) { 489 /* 490 * Interpreting the modrm byte 491 * mod = 00 - no displacement 492 * mod = 01 - 1-byte displacem 493 * mod = 10 - displacement fie 494 * address size = 2 (0x67 495 * mod = 11 - no memory operan 496 * 497 * If address size = 2... 498 * mod = 00, r/m = 110 - displ 499 * 500 * If address size != 2... 501 * mod != 11, r/m = 100 - SIB 502 * mod = 00, SIB base = 101 - 503 * mod = 00, r/m = 101 - rip-r 504 * field is 4 bytes 505 */ 506 mod = X86_MODRM_MOD(insn->modr 507 rm = X86_MODRM_RM(insn->modrm. 508 base = X86_SIB_BASE(insn->sib. 509 if (mod == 3) 510 goto out; 511 if (mod == 1) { 512 insn_field_set(&insn-> 513 get_nex 514 } else if (insn->addr_bytes == 515 if ((mod == 0 && rm == 516 insn_field_set 517 518 } 519 } else { 520 if ((mod == 0 && rm == 521 (mod == 0 && base 522 insn_field_set 523 524 } 525 } 526 } 527 out: 528 insn->displacement.got = 1; 529 return 0; 530 531 err_out: 532 return -ENODATA; 533 } 534 535 /* Decode moffset16/32/64. Return 0 if failed 536 static int __get_moffset(struct insn *insn) 537 { 538 switch (insn->addr_bytes) { 539 case 2: 540 insn_field_set(&insn->moffset1 541 break; 542 case 4: 543 insn_field_set(&insn->moffset1 544 break; 545 case 8: 546 insn_field_set(&insn->moffset1 547 insn_field_set(&insn->moffset2 548 break; 549 default: /* opnd_bytes must be 550 goto err_out; 551 } 552 insn->moffset1.got = insn->moffset2.go 553 554 return 1; 555 556 err_out: 557 return 0; 558 } 559 560 /* Decode imm v32(Iz). Return 0 if failed */ 561 static int __get_immv32(struct insn *insn) 562 { 563 switch (insn->opnd_bytes) { 564 case 2: 565 insn_field_set(&insn->immediat 566 break; 567 case 4: 568 case 8: 569 insn_field_set(&insn->immediat 570 break; 571 default: /* opnd_bytes must be 572 goto err_out; 573 } 574 575 return 1; 576 577 err_out: 578 return 0; 579 } 580 581 /* Decode imm v64(Iv/Ov), Return 0 if failed * 582 static int __get_immv(struct insn *insn) 583 { 584 switch (insn->opnd_bytes) { 585 case 2: 586 insn_field_set(&insn->immediat 587 break; 588 case 4: 589 insn_field_set(&insn->immediat 590 insn->immediate1.nbytes = 4; 591 break; 592 case 8: 593 insn_field_set(&insn->immediat 594 insn_field_set(&insn->immediat 595 break; 596 default: /* opnd_bytes must be 597 goto err_out; 598 } 599 insn->immediate1.got = insn->immediate 600 601 return 1; 602 err_out: 603 return 0; 604 } 605 606 /* Decode ptr16:16/32(Ap) */ 607 static int __get_immptr(struct insn *insn) 608 { 609 switch (insn->opnd_bytes) { 610 case 2: 611 insn_field_set(&insn->immediat 612 break; 613 case 4: 614 insn_field_set(&insn->immediat 615 break; 616 case 8: 617 /* ptr16:64 is not exist (no s 618 return 0; 619 default: /* opnd_bytes must be 620 goto err_out; 621 } 622 insn_field_set(&insn->immediate2, get_ 623 insn->immediate1.got = insn->immediate 624 625 return 1; 626 err_out: 627 return 0; 628 } 629 630 /** 631 * insn_get_immediate() - Get the immediate in 632 * @insn: &struct insn containing instru 633 * 634 * If necessary, first collects the instructio 635 * displacement bytes. 636 * Basically, most of immediates are sign-expa 637 * computed by bit masking with ((1 << (nbytes 638 * 639 * Returns: 640 * 0: on success 641 * < 0: on error 642 */ 643 int insn_get_immediate(struct insn *insn) 644 { 645 int ret; 646 647 if (insn->immediate.got) 648 return 0; 649 650 ret = insn_get_displacement(insn); 651 if (ret) 652 return ret; 653 654 if (inat_has_moffset(insn->attr)) { 655 if (!__get_moffset(insn)) 656 goto err_out; 657 goto done; 658 } 659 660 if (!inat_has_immediate(insn->attr)) 661 /* no immediates */ 662 goto done; 663 664 switch (inat_immediate_size(insn->attr 665 case INAT_IMM_BYTE: 666 insn_field_set(&insn->immediat 667 break; 668 case INAT_IMM_WORD: 669 insn_field_set(&insn->immediat 670 break; 671 case INAT_IMM_DWORD: 672 insn_field_set(&insn->immediat 673 break; 674 case INAT_IMM_QWORD: 675 insn_field_set(&insn->immediat 676 insn_field_set(&insn->immediat 677 break; 678 case INAT_IMM_PTR: 679 if (!__get_immptr(insn)) 680 goto err_out; 681 break; 682 case INAT_IMM_VWORD32: 683 if (!__get_immv32(insn)) 684 goto err_out; 685 break; 686 case INAT_IMM_VWORD: 687 if (!__get_immv(insn)) 688 goto err_out; 689 break; 690 default: 691 /* Here, insn must have an imm 692 goto err_out; 693 } 694 if (inat_has_second_immediate(insn->at 695 insn_field_set(&insn->immediat 696 } 697 done: 698 insn->immediate.got = 1; 699 return 0; 700 701 err_out: 702 return -ENODATA; 703 } 704 705 /** 706 * insn_get_length() - Get the length of instr 707 * @insn: &struct insn containing instru 708 * 709 * If necessary, first collects the instructio 710 * immediates bytes. 711 * 712 * Returns: 713 * - 0 on success 714 * - < 0 on error 715 */ 716 int insn_get_length(struct insn *insn) 717 { 718 int ret; 719 720 if (insn->length) 721 return 0; 722 723 ret = insn_get_immediate(insn); 724 if (ret) 725 return ret; 726 727 insn->length = (unsigned char)((unsign 728 - (unsign 729 730 return 0; 731 } 732 733 /* Ensure this instruction is decoded complete 734 static inline int insn_complete(struct insn *i 735 { 736 return insn->opcode.got && insn->modrm 737 insn->displacement.got && insn 738 } 739 740 /** 741 * insn_decode() - Decode an x86 instruction 742 * @insn: &struct insn to be initialized 743 * @kaddr: address (in kernel memory) of 744 * @buf_len: length of the insn buffer at @ 745 * @m: insn mode, see enum insn_mode 746 * 747 * Returns: 748 * 0: if decoding succeeded 749 * < 0: otherwise. 750 */ 751 int insn_decode(struct insn *insn, const void 752 { 753 int ret; 754 755 /* #define INSN_MODE_KERN -1 __ignore_sy 756 757 if (m == INSN_MODE_KERN) 758 insn_init(insn, kaddr, buf_len 759 else 760 insn_init(insn, kaddr, buf_len 761 762 ret = insn_get_length(insn); 763 if (ret) 764 return ret; 765 766 if (insn_complete(insn)) 767 return 0; 768 769 return -EINVAL; 770 } 771
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.