~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/hexagon/lib/memset.S

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/hexagon/lib/memset.S (Version linux-6.12-rc7) and /arch/mips/lib/memset.S (Version linux-6.10.14)


  1 /* SPDX-License-Identifier: GPL-2.0-only */    << 
  2 /*                                                  1 /*
  3  * Copyright (c) 2011, The Linux Foundation. A !!   2  * This file is subject to the terms and conditions of the GNU General Public
                                                   >>   3  * License.  See the file "COPYING" in the main directory of this archive
                                                   >>   4  * for more details.
                                                   >>   5  *
                                                   >>   6  * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
                                                   >>   7  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
                                                   >>   8  * Copyright (C) 2007 by Maciej W. Rozycki
                                                   >>   9  * Copyright (C) 2011, 2012 MIPS Technologies, Inc.
  4  */                                                10  */
                                                   >>  11 #include <linux/export.h>
                                                   >>  12 #include <asm/asm.h>
                                                   >>  13 #include <asm/asm-offsets.h>
                                                   >>  14 #include <asm/regdef.h>
                                                   >>  15 
                                                   >>  16 #if LONGSIZE == 4
                                                   >>  17 #define LONG_S_L swl
                                                   >>  18 #define LONG_S_R swr
                                                   >>  19 #else
                                                   >>  20 #define LONG_S_L sdl
                                                   >>  21 #define LONG_S_R sdr
                                                   >>  22 #endif
  5                                                    23 
                                                   >>  24 #ifdef CONFIG_CPU_MICROMIPS
                                                   >>  25 #define STORSIZE (LONGSIZE * 2)
                                                   >>  26 #define STORMASK (STORSIZE - 1)
                                                   >>  27 #define FILL64RG t8
                                                   >>  28 #define FILLPTRG t7
                                                   >>  29 #undef  LONG_S
                                                   >>  30 #define LONG_S LONG_SP
                                                   >>  31 #else
                                                   >>  32 #define STORSIZE LONGSIZE
                                                   >>  33 #define STORMASK LONGMASK
                                                   >>  34 #define FILL64RG a1
                                                   >>  35 #define FILLPTRG t0
                                                   >>  36 #endif
  6                                                    37 
  7 /* HEXAGON assembly optimized memset */        !!  38 #define LEGACY_MODE 1
  8 /* Replaces the standard library function mems !!  39 #define EVA_MODE    2
  9                                                    40 
                                                   >>  41 /*
                                                   >>  42  * No need to protect it with EVA #ifdefery. The generated block of code
                                                   >>  43  * will never be assembled if EVA is not enabled.
                                                   >>  44  */
                                                   >>  45 #define __EVAFY(insn, reg, addr) __BUILD_EVA_INSN(insn##e, reg, addr)
                                                   >>  46 #define ___BUILD_EVA_INSN(insn, reg, addr) __EVAFY(insn, reg, addr)
 10                                                    47 
 11         .macro HEXAGON_OPT_FUNC_BEGIN name     !!  48 #define EX(insn,reg,addr,handler)                       \
 12         .text                                  !!  49         .if \mode == LEGACY_MODE;                       \
 13         .p2align 4                             !!  50 9:              insn    reg, addr;                      \
 14         .globl \name                           !!  51         .else;                                          \
 15         .type  \name, @function                !!  52 9:              ___BUILD_EVA_INSN(insn, reg, addr);     \
 16 \name:                                         !!  53         .endif;                                         \
                                                   >>  54         .section __ex_table,"a";                        \
                                                   >>  55         PTR_WD  9b, handler;                            \
                                                   >>  56         .previous
                                                   >>  57 
                                                   >>  58         .macro  f_fill64 dst, offset, val, fixup, mode
                                                   >>  59         EX(LONG_S, \val, (\offset +  0 * STORSIZE)(\dst), \fixup)
                                                   >>  60         EX(LONG_S, \val, (\offset +  1 * STORSIZE)(\dst), \fixup)
                                                   >>  61         EX(LONG_S, \val, (\offset +  2 * STORSIZE)(\dst), \fixup)
                                                   >>  62         EX(LONG_S, \val, (\offset +  3 * STORSIZE)(\dst), \fixup)
                                                   >>  63 #if ((defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) || !defined(CONFIG_CPU_MICROMIPS))
                                                   >>  64         EX(LONG_S, \val, (\offset +  4 * STORSIZE)(\dst), \fixup)
                                                   >>  65         EX(LONG_S, \val, (\offset +  5 * STORSIZE)(\dst), \fixup)
                                                   >>  66         EX(LONG_S, \val, (\offset +  6 * STORSIZE)(\dst), \fixup)
                                                   >>  67         EX(LONG_S, \val, (\offset +  7 * STORSIZE)(\dst), \fixup)
                                                   >>  68 #endif
                                                   >>  69 #if (!defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4))
                                                   >>  70         EX(LONG_S, \val, (\offset +  8 * STORSIZE)(\dst), \fixup)
                                                   >>  71         EX(LONG_S, \val, (\offset +  9 * STORSIZE)(\dst), \fixup)
                                                   >>  72         EX(LONG_S, \val, (\offset + 10 * STORSIZE)(\dst), \fixup)
                                                   >>  73         EX(LONG_S, \val, (\offset + 11 * STORSIZE)(\dst), \fixup)
                                                   >>  74         EX(LONG_S, \val, (\offset + 12 * STORSIZE)(\dst), \fixup)
                                                   >>  75         EX(LONG_S, \val, (\offset + 13 * STORSIZE)(\dst), \fixup)
                                                   >>  76         EX(LONG_S, \val, (\offset + 14 * STORSIZE)(\dst), \fixup)
                                                   >>  77         EX(LONG_S, \val, (\offset + 15 * STORSIZE)(\dst), \fixup)
                                                   >>  78 #endif
 17         .endm                                      79         .endm
 18                                                    80 
 19         .macro HEXAGON_OPT_FUNC_FINISH name    !!  81         .align  5
 20         .size  \name, . - \name                !!  82 
                                                   >>  83         /*
                                                   >>  84          * Macro to generate the __bzero{,_user} symbol
                                                   >>  85          * Arguments:
                                                   >>  86          * mode: LEGACY_MODE or EVA_MODE
                                                   >>  87          */
                                                   >>  88         .macro __BUILD_BZERO mode
                                                   >>  89         /* Initialize __memset if this is the first time we call this macro */
                                                   >>  90         .ifnotdef __memset
                                                   >>  91         .set __memset, 1
                                                   >>  92         .hidden __memset /* Make sure it does not leak */
                                                   >>  93         .endif
                                                   >>  94 
                                                   >>  95         sltiu           t0, a2, STORSIZE        /* very small region? */
                                                   >>  96         .set            noreorder
                                                   >>  97         bnez            t0, .Lsmall_memset\@
                                                   >>  98          andi           t0, a0, STORMASK        /* aligned? */
                                                   >>  99         .set            reorder
                                                   >> 100 
                                                   >> 101 #ifdef CONFIG_CPU_MICROMIPS
                                                   >> 102         move            t8, a1                  /* used by 'swp' instruction */
                                                   >> 103         move            t9, a1
                                                   >> 104 #endif
                                                   >> 105         .set            noreorder
                                                   >> 106 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
                                                   >> 107         beqz            t0, 1f
                                                   >> 108          PTR_SUBU       t0, STORSIZE            /* alignment in bytes */
                                                   >> 109 #else
                                                   >> 110         .set            noat
                                                   >> 111         li              AT, STORSIZE
                                                   >> 112         beqz            t0, 1f
                                                   >> 113          PTR_SUBU       t0, AT                  /* alignment in bytes */
                                                   >> 114         .set            at
                                                   >> 115 #endif
                                                   >> 116         .set            reorder
                                                   >> 117 
                                                   >> 118 #ifndef CONFIG_CPU_NO_LOAD_STORE_LR
                                                   >> 119         R10KCBARRIER(0(ra))
                                                   >> 120 #ifdef __MIPSEB__
                                                   >> 121         EX(LONG_S_L, a1, (a0), .Lfirst_fixup\@) /* make word/dword aligned */
                                                   >> 122 #else
                                                   >> 123         EX(LONG_S_R, a1, (a0), .Lfirst_fixup\@) /* make word/dword aligned */
                                                   >> 124 #endif
                                                   >> 125         PTR_SUBU        a0, t0                  /* long align ptr */
                                                   >> 126         PTR_ADDU        a2, t0                  /* correct size */
                                                   >> 127 
                                                   >> 128 #else /* CONFIG_CPU_NO_LOAD_STORE_LR */
                                                   >> 129 #define STORE_BYTE(N)                           \
                                                   >> 130         EX(sb, a1, N(a0), .Lbyte_fixup\@);      \
                                                   >> 131         .set            noreorder;              \
                                                   >> 132         beqz            t0, 0f;                 \
                                                   >> 133          PTR_ADDU       t0, 1;                  \
                                                   >> 134         .set            reorder;
                                                   >> 135 
                                                   >> 136         PTR_ADDU        a2, t0                  /* correct size */
                                                   >> 137         PTR_ADDU        t0, 1
                                                   >> 138         STORE_BYTE(0)
                                                   >> 139         STORE_BYTE(1)
                                                   >> 140 #if LONGSIZE == 4
                                                   >> 141         EX(sb, a1, 2(a0), .Lbyte_fixup\@)
                                                   >> 142 #else
                                                   >> 143         STORE_BYTE(2)
                                                   >> 144         STORE_BYTE(3)
                                                   >> 145         STORE_BYTE(4)
                                                   >> 146         STORE_BYTE(5)
                                                   >> 147         EX(sb, a1, 6(a0), .Lbyte_fixup\@)
                                                   >> 148 #endif
                                                   >> 149 0:
                                                   >> 150         ori             a0, STORMASK
                                                   >> 151         xori            a0, STORMASK
                                                   >> 152         PTR_ADDIU       a0, STORSIZE
                                                   >> 153 #endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
                                                   >> 154 1:      ori             t1, a2, 0x3f            /* # of full blocks */
                                                   >> 155         xori            t1, 0x3f
                                                   >> 156         andi            t0, a2, 0x40-STORSIZE
                                                   >> 157         beqz            t1, .Lmemset_partial\@  /* no block to fill */
                                                   >> 158 
                                                   >> 159         PTR_ADDU        t1, a0                  /* end address */
                                                   >> 160 1:      PTR_ADDIU       a0, 64
                                                   >> 161         R10KCBARRIER(0(ra))
                                                   >> 162         f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
                                                   >> 163         bne             t1, a0, 1b
                                                   >> 164 
                                                   >> 165 .Lmemset_partial\@:
                                                   >> 166         R10KCBARRIER(0(ra))
                                                   >> 167         PTR_LA          t1, 2f                  /* where to start */
                                                   >> 168 #ifdef CONFIG_CPU_MICROMIPS
                                                   >> 169         LONG_SRL        t7, t0, 1
                                                   >> 170 #endif
                                                   >> 171 #if LONGSIZE == 4
                                                   >> 172         PTR_SUBU        t1, FILLPTRG
                                                   >> 173 #else
                                                   >> 174         .set            noat
                                                   >> 175         LONG_SRL        AT, FILLPTRG, 1
                                                   >> 176         PTR_SUBU        t1, AT
                                                   >> 177         .set            at
                                                   >> 178 #endif
                                                   >> 179         PTR_ADDU        a0, t0                  /* dest ptr */
                                                   >> 180         jr              t1
                                                   >> 181 
                                                   >> 182         /* ... but first do longs ... */
                                                   >> 183         f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
                                                   >> 184 2:      andi            a2, STORMASK            /* At most one long to go */
                                                   >> 185 
                                                   >> 186         .set            noreorder
                                                   >> 187         beqz            a2, 1f
                                                   >> 188 #ifndef CONFIG_CPU_NO_LOAD_STORE_LR
                                                   >> 189          PTR_ADDU       a0, a2                  /* What's left */
                                                   >> 190         .set            reorder
                                                   >> 191         R10KCBARRIER(0(ra))
                                                   >> 192 #ifdef __MIPSEB__
                                                   >> 193         EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
                                                   >> 194 #else
                                                   >> 195         EX(LONG_S_L, a1, -1(a0), .Llast_fixup\@)
                                                   >> 196 #endif
                                                   >> 197 #else /* CONFIG_CPU_NO_LOAD_STORE_LR */
                                                   >> 198          PTR_SUBU       t0, $0, a2
                                                   >> 199         .set            reorder
                                                   >> 200         move            a2, zero                /* No remaining longs */
                                                   >> 201         PTR_ADDIU       t0, 1
                                                   >> 202         STORE_BYTE(0)
                                                   >> 203         STORE_BYTE(1)
                                                   >> 204 #if LONGSIZE == 4
                                                   >> 205         EX(sb, a1, 2(a0), .Lbyte_fixup\@)
                                                   >> 206 #else
                                                   >> 207         STORE_BYTE(2)
                                                   >> 208         STORE_BYTE(3)
                                                   >> 209         STORE_BYTE(4)
                                                   >> 210         STORE_BYTE(5)
                                                   >> 211         EX(sb, a1, 6(a0), .Lbyte_fixup\@)
                                                   >> 212 #endif
                                                   >> 213 0:
                                                   >> 214 #endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
                                                   >> 215 1:      move            a2, zero
                                                   >> 216         jr              ra
                                                   >> 217 
                                                   >> 218 .Lsmall_memset\@:
                                                   >> 219         PTR_ADDU        t1, a0, a2
                                                   >> 220         beqz            a2, 2f
                                                   >> 221 
                                                   >> 222 1:      PTR_ADDIU       a0, 1                   /* fill bytewise */
                                                   >> 223         R10KCBARRIER(0(ra))
                                                   >> 224         .set            noreorder
                                                   >> 225         bne             t1, a0, 1b
                                                   >> 226          EX(sb, a1, -1(a0), .Lsmall_fixup\@)
                                                   >> 227         .set            reorder
                                                   >> 228 
                                                   >> 229 2:      move            a2, zero
                                                   >> 230         jr              ra                      /* done */
                                                   >> 231         .if __memset == 1
                                                   >> 232         END(memset)
                                                   >> 233         .set __memset, 0
                                                   >> 234         .hidden __memset
                                                   >> 235         .endif
                                                   >> 236 
                                                   >> 237 #ifdef CONFIG_CPU_NO_LOAD_STORE_LR
                                                   >> 238 .Lbyte_fixup\@:
                                                   >> 239         /*
                                                   >> 240          * unset_bytes = (#bytes - (#unaligned bytes)) - (-#unaligned bytes remaining + 1) + 1
                                                   >> 241          *      a2     =             a2                -              t0                   + 1
                                                   >> 242          */
                                                   >> 243         PTR_SUBU        a2, t0
                                                   >> 244         PTR_ADDIU       a2, 1
                                                   >> 245         jr              ra
                                                   >> 246 #endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
                                                   >> 247 
                                                   >> 248 .Lfirst_fixup\@:
                                                   >> 249         /* unset_bytes already in a2 */
                                                   >> 250         jr      ra
                                                   >> 251 
                                                   >> 252 .Lfwd_fixup\@:
                                                   >> 253         /*
                                                   >> 254          * unset_bytes = partial_start_addr +  #bytes   -     fault_addr
                                                   >> 255          *      a2     =         t1         + (a2 & 3f) - $28->task->BUADDR
                                                   >> 256          */
                                                   >> 257         PTR_L           t0, TI_TASK($28)
                                                   >> 258         andi            a2, 0x3f
                                                   >> 259         LONG_L          t0, THREAD_BUADDR(t0)
                                                   >> 260         LONG_ADDU       a2, t1
                                                   >> 261         LONG_SUBU       a2, t0
                                                   >> 262         jr              ra
                                                   >> 263 
                                                   >> 264 .Lpartial_fixup\@:
                                                   >> 265         /*
                                                   >> 266          * unset_bytes = partial_end_addr +      #bytes     -     fault_addr
                                                   >> 267          *      a2     =       a0         + (a2 & STORMASK) - $28->task->BUADDR
                                                   >> 268          */
                                                   >> 269         PTR_L           t0, TI_TASK($28)
                                                   >> 270         andi            a2, STORMASK
                                                   >> 271         LONG_L          t0, THREAD_BUADDR(t0)
                                                   >> 272         LONG_ADDU       a2, a0
                                                   >> 273         LONG_SUBU       a2, t0
                                                   >> 274         jr              ra
                                                   >> 275 
                                                   >> 276 .Llast_fixup\@:
                                                   >> 277         /* unset_bytes already in a2 */
                                                   >> 278         jr              ra
                                                   >> 279 
                                                   >> 280 .Lsmall_fixup\@:
                                                   >> 281         /*
                                                   >> 282          * unset_bytes = end_addr - current_addr + 1
                                                   >> 283          *      a2     =    t1    -      a0      + 1
                                                   >> 284          */
                                                   >> 285         PTR_SUBU        a2, t1, a0
                                                   >> 286         PTR_ADDIU       a2, 1
                                                   >> 287         jr              ra
                                                   >> 288 
 21         .endm                                     289         .endm
 22                                                   290 
 23 /* FUNCTION: memset (v2 version) */            !! 291 /*
 24 #if __HEXAGON_ARCH__ < 3                       !! 292  * memset(void *s, int c, size_t n)
 25 HEXAGON_OPT_FUNC_BEGIN memset                  !! 293  *
 26         {                                      !! 294  * a0: start of area to clear
 27                 r6 = #8                        !! 295  * a1: char to fill with
 28                 r7 = extractu(r0, #3 , #0)     !! 296  * a2: size of area to clear
 29                 p0 = cmp.eq(r2, #0)            !! 297  */
 30                 p1 = cmp.gtu(r2, #7)           !! 298 
 31         }                                      !! 299 LEAF(memset)
 32         {                                      !! 300 EXPORT_SYMBOL(memset)
 33                 r4 = vsplatb(r1)               !! 301         move            v0, a0                  /* result */
 34                 r8 = r0           /* leave r0  !! 302         beqz            a1, 1f
 35                 r9 = sub(r6, r7)  /* bytes unt !! 303 
 36                 if p0 jumpr r31   /* count ==  !! 304         andi            a1, 0xff                /* spread fillword */
 37         }                                      !! 305         LONG_SLL                t1, a1, 8
 38         {                                      !! 306         or              a1, t1
 39                 r3 = #0                        !! 307         LONG_SLL                t1, a1, 16
 40                 r7 = #0                        !! 308 #if LONGSIZE == 8
 41                 p0 = tstbit(r9, #0)            !! 309         or              a1, t1
 42                 if p1 jump 2f /* skip byte loo !! 310         LONG_SLL                t1, a1, 32
 43         }                                      !! 311 #endif
 44                                                !! 312         or              a1, t1
 45 /* less than 8 bytes to set, so just set a byt !! 313 1:
 46                                                !! 314 #ifndef CONFIG_EVA
 47                 loop0(1f, r2) /* byte loop */  !! 315 FEXPORT(__bzero)
 48         .falign                                !! 316 EXPORT_SYMBOL(__bzero)
 49 1: /* byte loop */                             !! 317 #endif
 50         {                                      !! 318         __BUILD_BZERO LEGACY_MODE
 51                 memb(r8++#1) = r4              !! 319 
 52         }:endloop0                             !! 320 #ifdef CONFIG_EVA
 53                 jumpr r31                      !! 321 LEAF(__bzero)
 54         .falign                                !! 322 EXPORT_SYMBOL(__bzero)
 55 2: /* skip byte loop */                        !! 323         __BUILD_BZERO EVA_MODE
 56         {                                      !! 324 END(__bzero)
 57                 r6 = #1                        << 
 58                 p0 = tstbit(r9, #1)            << 
 59                 p1 = cmp.eq(r2, #1)            << 
 60                 if !p0 jump 3f /* skip initial << 
 61         }                                      << 
 62         {                                      << 
 63                 memb(r8++#1) = r4              << 
 64                 r3:2 = sub(r3:2, r7:6)         << 
 65                 if p1 jumpr r31                << 
 66         }                                      << 
 67         .falign                                << 
 68 3: /* skip initial byte store */               << 
 69         {                                      << 
 70                 r6 = #2                        << 
 71                 p0 = tstbit(r9, #2)            << 
 72                 p1 = cmp.eq(r2, #2)            << 
 73                 if !p0 jump 4f /* skip initial << 
 74         }                                      << 
 75         {                                      << 
 76                 memh(r8++#2) = r4              << 
 77                 r3:2 = sub(r3:2, r7:6)         << 
 78                 if p1 jumpr r31                << 
 79         }                                      << 
 80         .falign                                << 
 81 4: /* skip initial half store */               << 
 82         {                                      << 
 83                 r6 = #4                        << 
 84                 p0 = cmp.gtu(r2, #7)           << 
 85                 p1 = cmp.eq(r2, #4)            << 
 86                 if !p0 jump 5f /* skip initial << 
 87         }                                      << 
 88         {                                      << 
 89                 memw(r8++#4) = r4              << 
 90                 r3:2 = sub(r3:2, r7:6)         << 
 91                 p0 = cmp.gtu(r2, #11)          << 
 92                 if p1 jumpr r31                << 
 93         }                                      << 
 94         .falign                                << 
 95 5: /* skip initial word store */               << 
 96         {                                      << 
 97                 r10 = lsr(r2, #3)              << 
 98                 p1 = cmp.eq(r3, #1)            << 
 99                 if !p0 jump 7f /* skip double  << 
100         }                                      << 
101         {                                      << 
102                 r5 = r4                        << 
103                 r6 = #8                        << 
104                 loop0(6f, r10) /* double loop  << 
105         }                                      << 
106                                                << 
107 /* set bytes a double word at a time  */       << 
108                                                << 
109         .falign                                << 
110 6: /* double loop */                           << 
111         {                                      << 
112                 memd(r8++#8) = r5:4            << 
113                 r3:2 = sub(r3:2, r7:6)         << 
114                 p1 = cmp.eq(r2, #8)            << 
115         }:endloop0                             << 
116         .falign                                << 
117 7: /* skip double loop */                      << 
118         {                                      << 
119                 p0 = tstbit(r2, #2)            << 
120                 if p1 jumpr r31                << 
121         }                                      << 
122         {                                      << 
123                 r6 = #4                        << 
124                 p0 = tstbit(r2, #1)            << 
125                 p1 = cmp.eq(r2, #4)            << 
126                 if !p0 jump 8f /* skip final w << 
127         }                                      << 
128         {                                      << 
129                 memw(r8++#4) = r4              << 
130                 r3:2 = sub(r3:2, r7:6)         << 
131                 if p1 jumpr r31                << 
132         }                                      << 
133         .falign                                << 
134 8: /* skip final word store */                 << 
135         {                                      << 
136                 p1 = cmp.eq(r2, #2)            << 
137                 if !p0 jump 9f /* skip final h << 
138         }                                      << 
139         {                                      << 
140                 memh(r8++#2) = r4              << 
141                 if p1 jumpr r31                << 
142         }                                      << 
143         .falign                                << 
144 9: /* skip final half store */                 << 
145         {                                      << 
146                 memb(r8++#1) = r4              << 
147                 jumpr r31                      << 
148         }                                      << 
149 HEXAGON_OPT_FUNC_FINISH memset                 << 
150 #endif                                         << 
151                                                << 
152                                                << 
153 /*  FUNCTION: memset (v3 and higher version)   << 
154 #if __HEXAGON_ARCH__ >= 3                      << 
155 HEXAGON_OPT_FUNC_BEGIN memset                  << 
156         {                                      << 
157                 r7=vsplatb(r1)                 << 
158                 r6 = r0                        << 
159                 if (r2==#0) jump:nt .L1        << 
160         }                                      << 
161         {                                      << 
162                 r5:4=combine(r7,r7)            << 
163                 p0 = cmp.gtu(r2,#8)            << 
164                 if (p0.new) jump:nt .L3        << 
165         }                                      << 
166         {                                      << 
167                 r3 = r0                        << 
168                 loop0(.L47,r2)                 << 
169         }                                      << 
170         .falign                                << 
171 .L47:                                          << 
172         {                                      << 
173                 memb(r3++#1) = r1              << 
174         }:endloop0 /* start=.L47 */            << 
175                 jumpr r31                      << 
176 .L3:                                           << 
177         {                                      << 
178                 p0 = tstbit(r0,#0)             << 
179                 if (!p0.new) jump:nt .L8       << 
180                 p1 = cmp.eq(r2, #1)            << 
181         }                                      << 
182         {                                      << 
183                 r6 = add(r0, #1)               << 
184                 r2 = add(r2,#-1)               << 
185                 memb(r0) = r1                  << 
186                 if (p1) jump .L1               << 
187         }                                      << 
188 .L8:                                           << 
189         {                                      << 
190                 p0 = tstbit(r6,#1)             << 
191                 if (!p0.new) jump:nt .L10      << 
192         }                                      << 
193         {                                      << 
194                 r2 = add(r2,#-2)               << 
195                 memh(r6++#2) = r7              << 
196                 p0 = cmp.eq(r2, #2)            << 
197                 if (p0.new) jump:nt .L1        << 
198         }                                      << 
199 .L10:                                          << 
200         {                                      << 
201                 p0 = tstbit(r6,#2)             << 
202                 if (!p0.new) jump:nt .L12      << 
203         }                                      << 
204         {                                      << 
205                 r2 = add(r2,#-4)               << 
206                 memw(r6++#4) = r7              << 
207                 p0 = cmp.eq(r2, #4)            << 
208                 if (p0.new) jump:nt .L1        << 
209         }                                      << 
210 .L12:                                          << 
211         {                                      << 
212                 p0 = cmp.gtu(r2,#127)          << 
213                 if (!p0.new) jump:nt .L14      << 
214         }                                      << 
215                 r3 = and(r6,#31)               << 
216                 if (r3==#0) jump:nt .L17       << 
217         {                                      << 
218                 memd(r6++#8) = r5:4            << 
219                 r2 = add(r2,#-8)               << 
220         }                                      << 
221                 r3 = and(r6,#31)               << 
222                 if (r3==#0) jump:nt .L17       << 
223         {                                      << 
224                 memd(r6++#8) = r5:4            << 
225                 r2 = add(r2,#-8)               << 
226         }                                      << 
227                 r3 = and(r6,#31)               << 
228                 if (r3==#0) jump:nt .L17       << 
229         {                                      << 
230                 memd(r6++#8) = r5:4            << 
231                 r2 = add(r2,#-8)               << 
232         }                                      << 
233 .L17:                                          << 
234         {                                      << 
235                 r3 = lsr(r2,#5)                << 
236                 if (r1!=#0) jump:nt .L18       << 
237         }                                      << 
238         {                                      << 
239                 r8 = r3                        << 
240                 r3 = r6                        << 
241                 loop0(.L46,r3)                 << 
242         }                                      << 
243         .falign                                << 
244 .L46:                                          << 
245         {                                      << 
246                 dczeroa(r6)                    << 
247                 r6 = add(r6,#32)               << 
248                 r2 = add(r2,#-32)              << 
249         }:endloop0 /* start=.L46 */            << 
250 .L14:                                          << 
251         {                                      << 
252                 p0 = cmp.gtu(r2,#7)            << 
253                 if (!p0.new) jump:nt .L28      << 
254                 r8 = lsr(r2,#3)                << 
255         }                                      << 
256                 loop0(.L44,r8)                 << 
257         .falign                                << 
258 .L44:                                          << 
259         {                                      << 
260                 memd(r6++#8) = r5:4            << 
261                 r2 = add(r2,#-8)               << 
262         }:endloop0 /* start=.L44 */            << 
263 .L28:                                          << 
264         {                                      << 
265                 p0 = tstbit(r2,#2)             << 
266                 if (!p0.new) jump:nt .L33      << 
267         }                                      << 
268         {                                      << 
269                 r2 = add(r2,#-4)               << 
270                 memw(r6++#4) = r7              << 
271         }                                      << 
272 .L33:                                          << 
273         {                                      << 
274                 p0 = tstbit(r2,#1)             << 
275                 if (!p0.new) jump:nt .L35      << 
276         }                                      << 
277         {                                      << 
278                 r2 = add(r2,#-2)               << 
279                 memh(r6++#2) = r7              << 
280         }                                      << 
281 .L35:                                          << 
282                 p0 = cmp.eq(r2,#1)             << 
283                 if (p0) memb(r6) = r1          << 
284 .L1:                                           << 
285                 jumpr r31                      << 
286 .L18:                                          << 
287                 loop0(.L45,r3)                 << 
288         .falign                                << 
289 .L45:                                          << 
290                 dczeroa(r6)                    << 
291         {                                      << 
292                 memd(r6++#8) = r5:4            << 
293                 r2 = add(r2,#-32)              << 
294         }                                      << 
295                 memd(r6++#8) = r5:4            << 
296                 memd(r6++#8) = r5:4            << 
297         {                                      << 
298                 memd(r6++#8) = r5:4            << 
299         }:endloop0 /* start=.L45  */           << 
300                 jump .L14                      << 
301 HEXAGON_OPT_FUNC_FINISH memset                 << 
302 #endif                                            325 #endif
                                                      

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php