1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * sha3-ce-core.S - core SHA-3 transform using 4 * 5 * Copyright (C) 2018 Linaro Ltd <ard.biesheuve 6 * 7 * This program is free software; you can redi 8 * it under the terms of the GNU General Publi 9 * published by the Free Software Foundation. 10 */ 11 12 #include <linux/linkage.h> 13 #include <asm/assembler.h> 14 15 .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12 16 .set .Lv\b\().2d, \b 17 .set .Lv\b\().16b, \b 18 .endr 19 20 /* 21 * ARMv8.2 Crypto Extensions instructi 22 */ 23 .macro eor3, rd, rn, rm, ra 24 .inst 0xce000000 | .L\rd | (.L\rn << 25 .endm 26 27 .macro rax1, rd, rn, rm 28 .inst 0xce608c00 | .L\rd | (.L\rn << 29 .endm 30 31 .macro bcax, rd, rn, rm, ra 32 .inst 0xce200000 | .L\rd | (.L\rn << 33 .endm 34 35 .macro xar, rd, rn, rm, imm6 36 .inst 0xce800000 | .L\rd | (.L\rn << 37 .endm 38 39 /* 40 * int sha3_ce_transform(u64 *st, cons 41 */ 42 .text 43 SYM_FUNC_START(sha3_ce_transform) 44 /* load state */ 45 add x8, x0, #32 46 ld1 { v0.1d- v3.1d}, [x0] 47 ld1 { v4.1d- v7.1d}, [x8], #32 48 ld1 { v8.1d-v11.1d}, [x8], #32 49 ld1 {v12.1d-v15.1d}, [x8], #32 50 ld1 {v16.1d-v19.1d}, [x8], #32 51 ld1 {v20.1d-v23.1d}, [x8], #32 52 ld1 {v24.1d}, [x8] 53 54 0: sub w2, w2, #1 55 mov w8, #24 56 adr_l x9, .Lsha3_rcon 57 58 /* load input */ 59 ld1 {v25.8b-v28.8b}, [x1], #32 60 ld1 {v29.8b-v31.8b}, [x1], #24 61 eor v0.8b, v0.8b, v25.8b 62 eor v1.8b, v1.8b, v26.8b 63 eor v2.8b, v2.8b, v27.8b 64 eor v3.8b, v3.8b, v28.8b 65 eor v4.8b, v4.8b, v29.8b 66 eor v5.8b, v5.8b, v30.8b 67 eor v6.8b, v6.8b, v31.8b 68 69 tbnz x3, #6, 2f // SHA 70 71 ld1 {v25.8b-v28.8b}, [x1], #32 72 ld1 {v29.8b-v30.8b}, [x1], #16 73 eor v7.8b, v7.8b, v25.8b 74 eor v8.8b, v8.8b, v26.8b 75 eor v9.8b, v9.8b, v27.8b 76 eor v10.8b, v10.8b, v28.8b 77 eor v11.8b, v11.8b, v29.8b 78 eor v12.8b, v12.8b, v30.8b 79 80 tbnz x3, #4, 1f // SHA 81 82 // SHA3-256 83 ld1 {v25.8b-v28.8b}, [x1], #32 84 eor v13.8b, v13.8b, v25.8b 85 eor v14.8b, v14.8b, v26.8b 86 eor v15.8b, v15.8b, v27.8b 87 eor v16.8b, v16.8b, v28.8b 88 b 3f 89 90 1: tbz x3, #2, 3f // bit 91 92 // SHA3-224 93 ld1 {v25.8b-v28.8b}, [x1], #32 94 ld1 {v29.8b}, [x1], #8 95 eor v13.8b, v13.8b, v25.8b 96 eor v14.8b, v14.8b, v26.8b 97 eor v15.8b, v15.8b, v27.8b 98 eor v16.8b, v16.8b, v28.8b 99 eor v17.8b, v17.8b, v29.8b 100 b 3f 101 102 // SHA3-512 103 2: ld1 {v25.8b-v26.8b}, [x1], #16 104 eor v7.8b, v7.8b, v25.8b 105 eor v8.8b, v8.8b, v26.8b 106 107 3: sub w8, w8, #1 108 109 eor3 v29.16b, v4.16b, v9.16b, v14 110 eor3 v26.16b, v1.16b, v6.16b, v11 111 eor3 v28.16b, v3.16b, v8.16b, v13 112 eor3 v25.16b, v0.16b, v5.16b, v10 113 eor3 v27.16b, v2.16b, v7.16b, v12 114 eor3 v29.16b, v29.16b, v19.16b, v24 115 eor3 v26.16b, v26.16b, v16.16b, v21 116 eor3 v28.16b, v28.16b, v18.16b, v23 117 eor3 v25.16b, v25.16b, v15.16b, v20 118 eor3 v27.16b, v27.16b, v17.16b, v22 119 120 rax1 v30.2d, v29.2d, v26.2d // bc[ 121 rax1 v26.2d, v26.2d, v28.2d // bc[ 122 rax1 v28.2d, v28.2d, v25.2d // bc[ 123 rax1 v25.2d, v25.2d, v27.2d // bc[ 124 rax1 v27.2d, v27.2d, v29.2d // bc[ 125 126 eor v0.16b, v0.16b, v30.16b 127 xar v29.2d, v1.2d, v25.2d, (64 128 xar v1.2d, v6.2d, v25.2d, (64 129 xar v6.2d, v9.2d, v28.2d, (64 130 xar v9.2d, v22.2d, v26.2d, (64 131 xar v22.2d, v14.2d, v28.2d, (64 132 xar v14.2d, v20.2d, v30.2d, (64 133 xar v31.2d, v2.2d, v26.2d, (64 134 xar v2.2d, v12.2d, v26.2d, (64 135 xar v12.2d, v13.2d, v27.2d, (64 136 xar v13.2d, v19.2d, v28.2d, (64 137 xar v19.2d, v23.2d, v27.2d, (64 138 xar v23.2d, v15.2d, v30.2d, (64 139 xar v15.2d, v4.2d, v28.2d, (64 140 xar v28.2d, v24.2d, v28.2d, (64 141 xar v24.2d, v21.2d, v25.2d, (64 142 xar v8.2d, v8.2d, v27.2d, (64 143 xar v4.2d, v16.2d, v25.2d, (64 144 xar v16.2d, v5.2d, v30.2d, (64 145 xar v5.2d, v3.2d, v27.2d, (64 146 xar v27.2d, v18.2d, v27.2d, (64 147 xar v3.2d, v17.2d, v26.2d, (64 148 xar v25.2d, v11.2d, v25.2d, (64 149 xar v26.2d, v7.2d, v26.2d, (64 150 xar v30.2d, v10.2d, v30.2d, (64 151 152 bcax v20.16b, v31.16b, v22.16b, v8 153 bcax v21.16b, v8.16b, v23.16b, v22 154 bcax v22.16b, v22.16b, v24.16b, v23 155 bcax v23.16b, v23.16b, v31.16b, v24 156 bcax v24.16b, v24.16b, v8.16b, v31 157 158 ld1r {v31.2d}, [x9], #8 159 160 bcax v17.16b, v25.16b, v19.16b, v3 161 bcax v18.16b, v3.16b, v15.16b, v19 162 bcax v19.16b, v19.16b, v16.16b, v15 163 bcax v15.16b, v15.16b, v25.16b, v16 164 bcax v16.16b, v16.16b, v3.16b, v25 165 166 bcax v10.16b, v29.16b, v12.16b, v26 167 bcax v11.16b, v26.16b, v13.16b, v12 168 bcax v12.16b, v12.16b, v14.16b, v13 169 bcax v13.16b, v13.16b, v29.16b, v14 170 bcax v14.16b, v14.16b, v26.16b, v29 171 172 bcax v7.16b, v30.16b, v9.16b, v4 173 bcax v8.16b, v4.16b, v5.16b, v9 174 bcax v9.16b, v9.16b, v6.16b, v5 175 bcax v5.16b, v5.16b, v30.16b, v6 176 bcax v6.16b, v6.16b, v4.16b, v30 177 178 bcax v3.16b, v27.16b, v0.16b, v28 179 bcax v4.16b, v28.16b, v1.16b, v0 180 bcax v0.16b, v0.16b, v2.16b, v1 181 bcax v1.16b, v1.16b, v27.16b, v2 182 bcax v2.16b, v2.16b, v28.16b, v27 183 184 eor v0.16b, v0.16b, v31.16b 185 186 cbnz w8, 3b 187 cond_yield 4f, x8, x9 188 cbnz w2, 0b 189 190 /* save state */ 191 4: st1 { v0.1d- v3.1d}, [x0], #32 192 st1 { v4.1d- v7.1d}, [x0], #32 193 st1 { v8.1d-v11.1d}, [x0], #32 194 st1 {v12.1d-v15.1d}, [x0], #32 195 st1 {v16.1d-v19.1d}, [x0], #32 196 st1 {v20.1d-v23.1d}, [x0], #32 197 st1 {v24.1d}, [x0] 198 mov w0, w2 199 ret 200 SYM_FUNC_END(sha3_ce_transform) 201 202 .section ".rodata", "a" 203 .align 8 204 .Lsha3_rcon: 205 .quad 0x0000000000000001, 0x00000000 206 .quad 0x8000000080008000, 0x00000000 207 .quad 0x8000000080008081, 0x80000000 208 .quad 0x0000000000000088, 0x00000000 209 .quad 0x000000008000808b, 0x80000000 210 .quad 0x8000000000008003, 0x80000000 211 .quad 0x000000000000800a, 0x80000000 212 .quad 0x8000000000008080, 0x00000000
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.