~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm/crypto/nh-neon-core.S

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* SPDX-License-Identifier: GPL-2.0 */
  2 /*
  3  * NH - ε-almost-universal hash function, NEON accelerated version
  4  *
  5  * Copyright 2018 Google LLC
  6  *
  7  * Author: Eric Biggers <ebiggers@google.com>
  8  */
  9 
 10 #include <linux/linkage.h>
 11 
 12         .text
 13         .fpu            neon
 14 
 15         KEY             .req    r0
 16         MESSAGE         .req    r1
 17         MESSAGE_LEN     .req    r2
 18         HASH            .req    r3
 19 
 20         PASS0_SUMS      .req    q0
 21         PASS0_SUM_A     .req    d0
 22         PASS0_SUM_B     .req    d1
 23         PASS1_SUMS      .req    q1
 24         PASS1_SUM_A     .req    d2
 25         PASS1_SUM_B     .req    d3
 26         PASS2_SUMS      .req    q2
 27         PASS2_SUM_A     .req    d4
 28         PASS2_SUM_B     .req    d5
 29         PASS3_SUMS      .req    q3
 30         PASS3_SUM_A     .req    d6
 31         PASS3_SUM_B     .req    d7
 32         K0              .req    q4
 33         K1              .req    q5
 34         K2              .req    q6
 35         K3              .req    q7
 36         T0              .req    q8
 37         T0_L            .req    d16
 38         T0_H            .req    d17
 39         T1              .req    q9
 40         T1_L            .req    d18
 41         T1_H            .req    d19
 42         T2              .req    q10
 43         T2_L            .req    d20
 44         T2_H            .req    d21
 45         T3              .req    q11
 46         T3_L            .req    d22
 47         T3_H            .req    d23
 48 
 49 .macro _nh_stride       k0, k1, k2, k3
 50 
 51         // Load next message stride
 52         vld1.8          {T3}, [MESSAGE]!
 53 
 54         // Load next key stride
 55         vld1.32         {\k3}, [KEY]!
 56 
 57         // Add message words to key words
 58         vadd.u32        T0, T3, \k0
 59         vadd.u32        T1, T3, \k1
 60         vadd.u32        T2, T3, \k2
 61         vadd.u32        T3, T3, \k3
 62 
 63         // Multiply 32x32 => 64 and accumulate
 64         vmlal.u32       PASS0_SUMS, T0_L, T0_H
 65         vmlal.u32       PASS1_SUMS, T1_L, T1_H
 66         vmlal.u32       PASS2_SUMS, T2_L, T2_H
 67         vmlal.u32       PASS3_SUMS, T3_L, T3_H
 68 .endm
 69 
 70 /*
 71  * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
 72  *              __le64 hash[NH_NUM_PASSES])
 73  *
 74  * It's guaranteed that message_len % 16 == 0.
 75  */
 76 ENTRY(nh_neon)
 77 
 78         vld1.32         {K0,K1}, [KEY]!
 79           vmov.u64      PASS0_SUMS, #0
 80           vmov.u64      PASS1_SUMS, #0
 81         vld1.32         {K2}, [KEY]!
 82           vmov.u64      PASS2_SUMS, #0
 83           vmov.u64      PASS3_SUMS, #0
 84 
 85         subs            MESSAGE_LEN, MESSAGE_LEN, #64
 86         blt             .Lloop4_done
 87 .Lloop4:
 88         _nh_stride      K0, K1, K2, K3
 89         _nh_stride      K1, K2, K3, K0
 90         _nh_stride      K2, K3, K0, K1
 91         _nh_stride      K3, K0, K1, K2
 92         subs            MESSAGE_LEN, MESSAGE_LEN, #64
 93         bge             .Lloop4
 94 
 95 .Lloop4_done:
 96         ands            MESSAGE_LEN, MESSAGE_LEN, #63
 97         beq             .Ldone
 98         _nh_stride      K0, K1, K2, K3
 99 
100         subs            MESSAGE_LEN, MESSAGE_LEN, #16
101         beq             .Ldone
102         _nh_stride      K1, K2, K3, K0
103 
104         subs            MESSAGE_LEN, MESSAGE_LEN, #16
105         beq             .Ldone
106         _nh_stride      K2, K3, K0, K1
107 
108 .Ldone:
109         // Sum the accumulators for each pass, then store the sums to 'hash'
110         vadd.u64        T0_L, PASS0_SUM_A, PASS0_SUM_B
111         vadd.u64        T0_H, PASS1_SUM_A, PASS1_SUM_B
112         vadd.u64        T1_L, PASS2_SUM_A, PASS2_SUM_B
113         vadd.u64        T1_H, PASS3_SUM_A, PASS3_SUM_B
114         vst1.8          {T0-T1}, [HASH]
115         bx              lr
116 ENDPROC(nh_neon)

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php