1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * NH - ε-almost-universal hash function, x86_64 SSE2 accelerated 4 * 5 * Copyright 2018 Google LLC 6 * 7 * Author: Eric Biggers <ebiggers@google.com> 8 */ 9 10 #include <linux/linkage.h> 11 #include <linux/cfi_types.h> 12 13 #define PASS0_SUMS %xmm0 14 #define PASS1_SUMS %xmm1 15 #define PASS2_SUMS %xmm2 16 #define PASS3_SUMS %xmm3 17 #define K0 %xmm4 18 #define K1 %xmm5 19 #define K2 %xmm6 20 #define K3 %xmm7 21 #define T0 %xmm8 22 #define T1 %xmm9 23 #define T2 %xmm10 24 #define T3 %xmm11 25 #define T4 %xmm12 26 #define T5 %xmm13 27 #define T6 %xmm14 28 #define T7 %xmm15 29 #define KEY %rdi 30 #define MESSAGE %rsi 31 #define MESSAGE_LEN %rdx 32 #define HASH %rcx 33 34 .macro _nh_stride k0, k1, k2, k3, offset 35 36 // Load next message stride 37 movdqu \offset(MESSAGE), T1 38 39 // Load next key stride 40 movdqu \offset(KEY), \k3 41 42 // Add message words to key words 43 movdqa T1, T2 44 movdqa T1, T3 45 paddd T1, \k0 // reuse k0 to avoid a move 46 paddd \k1, T1 47 paddd \k2, T2 48 paddd \k3, T3 49 50 // Multiply 32x32 => 64 and accumulate 51 pshufd $0x10, \k0, T4 52 pshufd $0x32, \k0, \k0 53 pshufd $0x10, T1, T5 54 pshufd $0x32, T1, T1 55 pshufd $0x10, T2, T6 56 pshufd $0x32, T2, T2 57 pshufd $0x10, T3, T7 58 pshufd $0x32, T3, T3 59 pmuludq T4, \k0 60 pmuludq T5, T1 61 pmuludq T6, T2 62 pmuludq T7, T3 63 paddq \k0, PASS0_SUMS 64 paddq T1, PASS1_SUMS 65 paddq T2, PASS2_SUMS 66 paddq T3, PASS3_SUMS 67 .endm 68 69 /* 70 * void nh_sse2(const u32 *key, const u8 *message, size_t message_len, 71 * __le64 hash[NH_NUM_PASSES]) 72 * 73 * It's guaranteed that message_len % 16 == 0. 74 */ 75 SYM_TYPED_FUNC_START(nh_sse2) 76 77 movdqu 0x00(KEY), K0 78 movdqu 0x10(KEY), K1 79 movdqu 0x20(KEY), K2 80 add $0x30, KEY 81 pxor PASS0_SUMS, PASS0_SUMS 82 pxor PASS1_SUMS, PASS1_SUMS 83 pxor PASS2_SUMS, PASS2_SUMS 84 pxor PASS3_SUMS, PASS3_SUMS 85 86 sub $0x40, MESSAGE_LEN 87 jl .Lloop4_done 88 .Lloop4: 89 _nh_stride K0, K1, K2, K3, 0x00 90 _nh_stride K1, K2, K3, K0, 0x10 91 _nh_stride K2, K3, K0, K1, 0x20 92 _nh_stride K3, K0, K1, K2, 0x30 93 add $0x40, KEY 94 add $0x40, MESSAGE 95 sub $0x40, MESSAGE_LEN 96 jge .Lloop4 97 98 .Lloop4_done: 99 and $0x3f, MESSAGE_LEN 100 jz .Ldone 101 _nh_stride K0, K1, K2, K3, 0x00 102 103 sub $0x10, MESSAGE_LEN 104 jz .Ldone 105 _nh_stride K1, K2, K3, K0, 0x10 106 107 sub $0x10, MESSAGE_LEN 108 jz .Ldone 109 _nh_stride K2, K3, K0, K1, 0x20 110 111 .Ldone: 112 // Sum the accumulators for each pass, then store the sums to 'hash' 113 movdqa PASS0_SUMS, T0 114 movdqa PASS2_SUMS, T1 115 punpcklqdq PASS1_SUMS, T0 // => (PASS0_SUM_A PASS1_SUM_A) 116 punpcklqdq PASS3_SUMS, T1 // => (PASS2_SUM_A PASS3_SUM_A) 117 punpckhqdq PASS1_SUMS, PASS0_SUMS // => (PASS0_SUM_B PASS1_SUM_B) 118 punpckhqdq PASS3_SUMS, PASS2_SUMS // => (PASS2_SUM_B PASS3_SUM_B) 119 paddq PASS0_SUMS, T0 120 paddq PASS2_SUMS, T1 121 movdqu T0, 0x00(HASH) 122 movdqu T1, 0x10(HASH) 123 RET 124 SYM_FUNC_END(nh_sse2)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.