1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/linkage.h> 4 #include <asm/cache.h> 5 #include <asm/assembler.h> 6 7 .text 8 9 #define state0 v0 10 #define state1 v1 11 #define state2 v2 12 #define state3 v3 13 #define copy0 v4 14 #define copy0_q q4 15 #define copy1 v5 16 #define copy2 v6 17 #define copy3 v7 18 #define copy3_d d7 19 #define one_d d16 20 #define one_q q16 21 #define one_v v16 22 #define tmp v17 23 #define rot8 v18 24 25 /* 26 * ARM64 ChaCha20 implementation meant for vDS 27 * number of blocks of output with nonce 0, ta 28 * counter. Importantly does not spill to the 29 * 30 * This implementation avoids d8-d15 because t 31 * space. 32 * 33 * void __arch_chacha20_blocks_nostack(uint8_t 34 * const u 35 * uint32_ 36 * size_t 37 * 38 * x0: output bytes 39 * x1: 32-byte key input 40 * x2: 8-byte counter input/output 41 * x3: number of 64-byte block to write t 42 */ 43 SYM_FUNC_START(__arch_chacha20_blocks_nostack) 44 45 /* copy0 = "expand 32-byte k" */ 46 mov_q x8, 0x3320646e61707865 47 mov_q x9, 0x6b20657479622d32 48 mov copy0.d[0], x8 49 mov copy0.d[1], x9 50 51 /* copy1,copy2 = key */ 52 ld1 { copy1.4s, copy2.4s } 53 /* copy3 = counter || zero nonce */ 54 ld1 { copy3.2s }, [x2] 55 56 movi one_v.2s, #1 57 uzp1 one_v.4s, one_v.4s, on 58 59 .Lblock: 60 /* copy state to auxiliary vectors for 61 mov state0.16b, copy0.16b 62 mov state1.16b, copy1.16b 63 mov state2.16b, copy2.16b 64 mov state3.16b, copy3.16b 65 66 mov w4, 20 67 .Lpermute: 68 /* 69 * Permute one 64-byte block where the 70 * registers state0-state3. It perfor 71 * but requires shuffling to rearrange 72 */ 73 74 .Ldoubleround: 75 /* state0 += state1, state3 = rotl32(s 76 add state0.4s, state0.4s, 77 eor state3.16b, state3.16b 78 rev32 state3.8h, state3.8h 79 80 /* state2 += state3, state1 = rotl32(s 81 add state2.4s, state2.4s, 82 eor tmp.16b, state1.16b, s 83 shl state1.4s, tmp.4s, #12 84 sri state1.4s, tmp.4s, #20 85 86 /* state0 += state1, state3 = rotl32(s 87 add state0.4s, state0.4s, 88 eor tmp.16b, state3.16b, s 89 shl state3.4s, tmp.4s, #8 90 sri state3.4s, tmp.4s, #24 91 92 /* state2 += state3, state1 = rotl32(s 93 add state2.4s, state2.4s, 94 eor tmp.16b, state1.16b, s 95 shl state1.4s, tmp.4s, #7 96 sri state1.4s, tmp.4s, #25 97 98 /* state1[0,1,2,3] = state1[1,2,3,0] * 99 ext state1.16b, state1.16b 100 /* state2[0,1,2,3] = state2[2,3,0,1] * 101 ext state2.16b, state2.16b 102 /* state3[0,1,2,3] = state3[1,2,3,0] * 103 ext state3.16b, state3.16b 104 105 /* state0 += state1, state3 = rotl32(s 106 add state0.4s, state0.4s, 107 eor state3.16b, state3.16b 108 rev32 state3.8h, state3.8h 109 110 /* state2 += state3, state1 = rotl32(s 111 add state2.4s, state2.4s, 112 eor tmp.16b, state1.16b, s 113 shl state1.4s, tmp.4s, #12 114 sri state1.4s, tmp.4s, #20 115 116 /* state0 += state1, state3 = rotl32(s 117 add state0.4s, state0.4s, 118 eor tmp.16b, state3.16b, s 119 shl state3.4s, tmp.4s, #8 120 sri state3.4s, tmp.4s, #24 121 122 /* state2 += state3, state1 = rotl32(s 123 add state2.4s, state2.4s, 124 eor tmp.16b, state1.16b, s 125 shl state1.4s, tmp.4s, #7 126 sri state1.4s, tmp.4s, #25 127 128 /* state1[0,1,2,3] = state1[3,0,1,2] * 129 ext state1.16b, state1.16b 130 /* state2[0,1,2,3] = state2[2,3,0,1] * 131 ext state2.16b, state2.16b 132 /* state3[0,1,2,3] = state3[1,2,3,0] * 133 ext state3.16b, state3.16b 134 135 subs w4, w4, #2 136 b.ne .Ldoubleround 137 138 /* output0 = state0 + state0 */ 139 add state0.4s, state0.4s, 140 /* output1 = state1 + state1 */ 141 add state1.4s, state1.4s, 142 /* output2 = state2 + state2 */ 143 add state2.4s, state2.4s, 144 /* output2 = state3 + state3 */ 145 add state3.4s, state3.4s, 146 st1 { state0.16b - state3. 147 148 /* 149 * ++copy3.counter, the 'add' clears t 150 * which is the expected behaviour her 151 */ 152 add copy3_d, copy3_d, one_ 153 154 /* output += 64, --nblocks */ 155 add x0, x0, 64 156 subs x3, x3, #1 157 b.ne .Lblock 158 159 /* counter = copy3.counter */ 160 st1 { copy3.2s }, [x2] 161 162 /* Zero out the potentially sensitive 163 movi state0.16b, #0 164 movi state1.16b, #0 165 movi state2.16b, #0 166 movi state3.16b, #0 167 movi copy1.16b, #0 168 movi copy2.16b, #0 169 ret 170 SYM_FUNC_END(__arch_chacha20_blocks_nostack) 171 172 emit_aarch64_feature_1_and
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.