1 // SPDX-License-Identifier: GPL-2.0-only 2 // Copyright (C) 2021 ARM Limited. 3 // Original author: Mark Brown <broonie@kernel.org> 4 // 5 // Scalable Matrix Extension ZA context switch test 6 // Repeatedly writes unique test patterns into each ZA tile 7 // and reads them back to verify integrity. 8 // 9 // for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done 10 // (leave it running for as long as you want...) 11 // kill $pids 12 13 #include <asm/unistd.h> 14 #include "assembler.h" 15 #include "asm-offsets.h" 16 #include "sme-inst.h" 17 18 .arch_extension sve 19 20 #define MAXVL 2048 21 #define MAXVL_B (MAXVL / 8) 22 23 // Declare some storage space to shadow ZA register contents and a 24 // scratch buffer for a vector. 25 .pushsection .text 26 .data 27 .align 4 28 zaref: 29 .space MAXVL_B * MAXVL_B 30 scratch: 31 .space MAXVL_B 32 .popsection 33 34 // Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. 35 // Clobbers x0-x3 36 function memcpy 37 cmp x2, #0 38 b.eq 1f 39 0: ldrb w3, [x1], #1 40 strb w3, [x0], #1 41 subs x2, x2, #1 42 b.ne 0b 43 1: ret 44 endfunction 45 46 // Generate a test pattern for storage in ZA 47 // x0: pid 48 // x1: row in ZA 49 // x2: generation 50 51 // These values are used to constuct a 32-bit pattern that is repeated in the 52 // scratch buffer as many times as will fit: 53 // bits 31:28 generation number (increments once per test_loop) 54 // bits 27:16 pid 55 // bits 15: 8 row number 56 // bits 7: 0 32-bit lane index 57 58 function pattern 59 mov w3, wzr 60 bfi w3, w0, #16, #12 // PID 61 bfi w3, w1, #8, #8 // Row 62 bfi w3, w2, #28, #4 // Generation 63 64 ldr x0, =scratch 65 mov w1, #MAXVL_B / 4 66 67 0: str w3, [x0], #4 68 add w3, w3, #1 // Lane 69 subs w1, w1, #1 70 b.ne 0b 71 72 ret 73 endfunction 74 75 // Get the address of shadow data for ZA horizontal vector xn 76 .macro _adrza xd, xn, nrtmp 77 ldr \xd, =zaref 78 rdsvl \nrtmp, 1 79 madd \xd, x\nrtmp, \xn, \xd 80 .endm 81 82 // Set up test pattern in a ZA horizontal vector 83 // x0: pid 84 // x1: row number 85 // x2: generation 86 function setup_za 87 mov x4, x30 88 mov x12, x1 // Use x12 for vector select 89 90 bl pattern // Get pattern in scratch buffer 91 _adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5 92 mov x5, x0 93 ldr x1, =scratch 94 bl memcpy // length set up in x2 by _adrza 95 96 _ldr_za 12, 5 // load vector w12 from pointer x5 97 98 ret x4 99 endfunction 100 101 // Trivial memory compare: compare x2 bytes starting at address x0 with 102 // bytes starting at address x1. 103 // Returns only if all bytes match; otherwise, the program is aborted. 104 // Clobbers x0-x5. 105 function memcmp 106 cbz x2, 2f 107 108 stp x0, x1, [sp, #-0x20]! 109 str x2, [sp, #0x10] 110 111 mov x5, #0 112 0: ldrb w3, [x0, x5] 113 ldrb w4, [x1, x5] 114 add x5, x5, #1 115 cmp w3, w4 116 b.ne 1f 117 subs x2, x2, #1 118 b.ne 0b 119 120 1: ldr x2, [sp, #0x10] 121 ldp x0, x1, [sp], #0x20 122 b.ne barf 123 124 2: ret 125 endfunction 126 127 // Verify that a ZA vector matches its shadow in memory, else abort 128 // x0: row number 129 // Clobbers x0-x7 and x12. 130 function check_za 131 mov x3, x30 132 133 mov x12, x0 134 _adrza x5, x0, 6 // pointer to expected value in x5 135 mov x4, x0 136 ldr x7, =scratch // x7 is scratch 137 138 mov x0, x7 // Poison scratch 139 mov x1, x6 140 bl memfill_ae 141 142 _str_za 12, 7 // save vector w12 to pointer x7 143 144 mov x0, x5 145 mov x1, x7 146 mov x2, x6 147 mov x30, x3 148 b memcmp 149 endfunction 150 151 // Any SME register modified here can cause corruption in the main 152 // thread -- but *only* the locations modified here. 153 function irritator_handler 154 // Increment the irritation signal count (x23): 155 ldr x0, [x2, #ucontext_regs + 8 * 23] 156 add x0, x0, #1 157 str x0, [x2, #ucontext_regs + 8 * 23] 158 159 // Corrupt some random ZA data 160 #if 0 161 adr x0, .text + (irritator_handler - .text) / 16 * 16 162 movi v0.8b, #1 163 movi v9.16b, #2 164 movi v31.8b, #3 165 #endif 166 167 ret 168 endfunction 169 170 function tickle_handler 171 // Increment the signal count (x23): 172 ldr x0, [x2, #ucontext_regs + 8 * 23] 173 add x0, x0, #1 174 str x0, [x2, #ucontext_regs + 8 * 23] 175 176 ret 177 endfunction 178 179 function terminate_handler 180 mov w21, w0 181 mov x20, x2 182 183 puts "Terminated by signal " 184 mov w0, w21 185 bl putdec 186 puts ", no error, iterations=" 187 ldr x0, [x20, #ucontext_regs + 8 * 22] 188 bl putdec 189 puts ", signals=" 190 ldr x0, [x20, #ucontext_regs + 8 * 23] 191 bl putdecn 192 193 mov x0, #0 194 mov x8, #__NR_exit 195 svc #0 196 endfunction 197 198 // w0: signal number 199 // x1: sa_action 200 // w2: sa_flags 201 // Clobbers x0-x6,x8 202 function setsignal 203 str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! 204 205 mov w4, w0 206 mov x5, x1 207 mov w6, w2 208 209 add x0, sp, #16 210 mov x1, #sa_sz 211 bl memclr 212 213 mov w0, w4 214 add x1, sp, #16 215 str w6, [x1, #sa_flags] 216 str x5, [x1, #sa_handler] 217 mov x2, #0 218 mov x3, #sa_mask_sz 219 mov x8, #__NR_rt_sigaction 220 svc #0 221 222 cbz w0, 1f 223 224 puts "sigaction failure\n" 225 b .Labort 226 227 1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) 228 ret 229 endfunction 230 231 // Main program entry point 232 .globl _start 233 function _start 234 mov x23, #0 // signal count 235 236 mov w0, #SIGINT 237 adr x1, terminate_handler 238 mov w2, #SA_SIGINFO 239 bl setsignal 240 241 mov w0, #SIGTERM 242 adr x1, terminate_handler 243 mov w2, #SA_SIGINFO 244 bl setsignal 245 246 mov w0, #SIGUSR1 247 adr x1, irritator_handler 248 mov w2, #SA_SIGINFO 249 orr w2, w2, #SA_NODEFER 250 bl setsignal 251 252 mov w0, #SIGUSR2 253 adr x1, tickle_handler 254 mov w2, #SA_SIGINFO 255 orr w2, w2, #SA_NODEFER 256 bl setsignal 257 258 puts "Streaming mode " 259 smstart_za 260 261 // Sanity-check and report the vector length 262 263 rdsvl 19, 8 264 cmp x19, #128 265 b.lo 1f 266 cmp x19, #2048 267 b.hi 1f 268 tst x19, #(8 - 1) 269 b.eq 2f 270 271 1: puts "bad vector length: " 272 mov x0, x19 273 bl putdecn 274 b .Labort 275 276 2: puts "vector length:\t" 277 mov x0, x19 278 bl putdec 279 puts " bits\n" 280 281 // Obtain our PID, to ensure test pattern uniqueness between processes 282 mov x8, #__NR_getpid 283 svc #0 284 mov x20, x0 285 286 puts "PID:\t" 287 mov x0, x20 288 bl putdecn 289 290 mov x22, #0 // generation number, increments per iteration 291 .Ltest_loop: 292 rdsvl 0, 8 293 cmp x0, x19 294 b.ne vl_barf 295 296 rdsvl 21, 1 // Set up ZA & shadow with test pattern 297 0: mov x0, x20 298 sub x1, x21, #1 299 mov x2, x22 300 bl setup_za 301 subs x21, x21, #1 302 b.ne 0b 303 304 mov x8, #__NR_sched_yield // encourage preemption 305 1: 306 svc #0 307 308 mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0 309 and x1, x0, #3 310 cmp x1, #2 311 b.ne svcr_barf 312 313 rdsvl 21, 1 // Verify that the data made it through 314 rdsvl 24, 1 // Verify that the data made it through 315 0: sub x0, x24, x21 316 bl check_za 317 subs x21, x21, #1 318 bne 0b 319 320 add x22, x22, #1 // Everything still working 321 b .Ltest_loop 322 323 .Labort: 324 mov x0, #0 325 mov x1, #SIGABRT 326 mov x8, #__NR_kill 327 svc #0 328 endfunction 329 330 function barf 331 // fpsimd.c acitivty log dump hack 332 // ldr w0, =0xdeadc0de 333 // mov w8, #__NR_exit 334 // svc #0 335 // end hack 336 337 mrs x13, S3_3_C4_C2_2 338 339 smstop 340 mov x10, x0 // expected data 341 mov x11, x1 // actual data 342 mov x12, x2 // data size 343 344 puts "Mismatch: PID=" 345 mov x0, x20 346 bl putdec 347 puts ", iteration=" 348 mov x0, x22 349 bl putdec 350 puts ", row=" 351 mov x0, x21 352 bl putdecn 353 puts "\tExpected [" 354 mov x0, x10 355 mov x1, x12 356 bl dumphex 357 puts "]\n\tGot [" 358 mov x0, x11 359 mov x1, x12 360 bl dumphex 361 puts "]\n" 362 puts "\tSVCR: " 363 mov x0, x13 364 bl putdecn 365 366 mov x8, #__NR_getpid 367 svc #0 368 // fpsimd.c acitivty log dump hack 369 // ldr w0, =0xdeadc0de 370 // mov w8, #__NR_exit 371 // svc #0 372 // ^ end of hack 373 mov x1, #SIGABRT 374 mov x8, #__NR_kill 375 svc #0 376 // mov x8, #__NR_exit 377 // mov x1, #1 378 // svc #0 379 endfunction 380 381 function vl_barf 382 mov x10, x0 383 384 puts "Bad active VL: " 385 mov x0, x10 386 bl putdecn 387 388 mov x8, #__NR_exit 389 mov x1, #1 390 svc #0 391 endfunction 392 393 function svcr_barf 394 mov x10, x0 395 396 puts "Bad SVCR: " 397 mov x0, x10 398 bl putdecn 399 400 mov x8, #__NR_exit 401 mov x1, #1 402 svc #0 403 endfunction
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.