1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2022-2024 Jason A. Donenfeld < 4 */ 5 6 #include <linux/linkage.h> 7 #include <asm/frame.h> 8 9 .section .rodata, "a" 10 .align 16 11 CONSTANTS: .octa 0x6b20657479622d32332064 12 .text 13 14 /* 15 * Very basic SSE2 implementation of ChaCha20. 16 * of blocks of output with a nonce of 0, taki 17 * counter. Importantly does not spill to the 18 * 19 * rdi: output bytes 20 * rsi: 32-byte key input 21 * rdx: 8-byte counter input/output 22 * rcx: number of 64-byte blocks to write 23 */ 24 SYM_FUNC_START(__arch_chacha20_blocks_nostack) 25 26 .set output, %rdi 27 .set key, %rsi 28 .set counter, %rdx 29 .set nblocks, %rcx 30 .set i, %al 31 /* xmm registers are *not* callee-save. */ 32 .set temp, %xmm0 33 .set state0, %xmm1 34 .set state1, %xmm2 35 .set state2, %xmm3 36 .set state3, %xmm4 37 .set copy0, %xmm5 38 .set copy1, %xmm6 39 .set copy2, %xmm7 40 .set copy3, %xmm8 41 .set one, %xmm9 42 43 /* copy0 = "expand 32-byte k" */ 44 movaps CONSTANTS(%rip),copy0 45 /* copy1,copy2 = key */ 46 movups 0x00(key),copy1 47 movups 0x10(key),copy2 48 /* copy3 = counter || zero nonce */ 49 movq 0x00(counter),copy3 50 /* one = 1 || 0 */ 51 movq $1,%rax 52 movq %rax,one 53 54 .Lblock: 55 /* state0,state1,state2,state3 = copy0 56 movdqa copy0,state0 57 movdqa copy1,state1 58 movdqa copy2,state2 59 movdqa copy3,state3 60 61 movb $10,i 62 .Lpermute: 63 /* state0 += state1, state3 = rotl32(s 64 paddd state1,state0 65 pxor state0,state3 66 movdqa state3,temp 67 pslld $16,temp 68 psrld $16,state3 69 por temp,state3 70 71 /* state2 += state3, state1 = rotl32(s 72 paddd state3,state2 73 pxor state2,state1 74 movdqa state1,temp 75 pslld $12,temp 76 psrld $20,state1 77 por temp,state1 78 79 /* state0 += state1, state3 = rotl32(s 80 paddd state1,state0 81 pxor state0,state3 82 movdqa state3,temp 83 pslld $8,temp 84 psrld $24,state3 85 por temp,state3 86 87 /* state2 += state3, state1 = rotl32(s 88 paddd state3,state2 89 pxor state2,state1 90 movdqa state1,temp 91 pslld $7,temp 92 psrld $25,state1 93 por temp,state1 94 95 /* state1[0,1,2,3] = state1[1,2,3,0] * 96 pshufd $0x39,state1,state1 97 /* state2[0,1,2,3] = state2[2,3,0,1] * 98 pshufd $0x4e,state2,state2 99 /* state3[0,1,2,3] = state3[3,0,1,2] * 100 pshufd $0x93,state3,state3 101 102 /* state0 += state1, state3 = rotl32(s 103 paddd state1,state0 104 pxor state0,state3 105 movdqa state3,temp 106 pslld $16,temp 107 psrld $16,state3 108 por temp,state3 109 110 /* state2 += state3, state1 = rotl32(s 111 paddd state3,state2 112 pxor state2,state1 113 movdqa state1,temp 114 pslld $12,temp 115 psrld $20,state1 116 por temp,state1 117 118 /* state0 += state1, state3 = rotl32(s 119 paddd state1,state0 120 pxor state0,state3 121 movdqa state3,temp 122 pslld $8,temp 123 psrld $24,state3 124 por temp,state3 125 126 /* state2 += state3, state1 = rotl32(s 127 paddd state3,state2 128 pxor state2,state1 129 movdqa state1,temp 130 pslld $7,temp 131 psrld $25,state1 132 por temp,state1 133 134 /* state1[0,1,2,3] = state1[3,0,1,2] * 135 pshufd $0x93,state1,state1 136 /* state2[0,1,2,3] = state2[2,3,0,1] * 137 pshufd $0x4e,state2,state2 138 /* state3[0,1,2,3] = state3[1,2,3,0] * 139 pshufd $0x39,state3,state3 140 141 decb i 142 jnz .Lpermute 143 144 /* output0 = state0 + copy0 */ 145 paddd copy0,state0 146 movups state0,0x00(output) 147 /* output1 = state1 + copy1 */ 148 paddd copy1,state1 149 movups state1,0x10(output) 150 /* output2 = state2 + copy2 */ 151 paddd copy2,state2 152 movups state2,0x20(output) 153 /* output3 = state3 + copy3 */ 154 paddd copy3,state3 155 movups state3,0x30(output) 156 157 /* ++copy3.counter */ 158 paddq one,copy3 159 160 /* output += 64, --nblocks */ 161 addq $64,output 162 decq nblocks 163 jnz .Lblock 164 165 /* counter = copy3.counter */ 166 movq copy3,0x00(counter) 167 168 /* Zero out the potentially sensitive 169 pxor state0,state0 170 pxor state1,state1 171 pxor state2,state2 172 pxor state3,state3 173 pxor copy1,copy1 174 pxor copy2,copy2 175 pxor temp,temp 176 177 ret 178 SYM_FUNC_END(__arch_chacha20_blocks_nostack)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.