1 #!/bin/sh 1 #!/bin/sh 2 # SPDX-License-Identifier: GPL-2.0-only << 3 # 2 # 4 # Copyright 2015, Daniel Axtens, IBM Corporati 3 # Copyright 2015, Daniel Axtens, IBM Corporation 5 # 4 # >> 5 # This program is free software; you can redistribute it and/or modify >> 6 # it under the terms of the GNU General Public License as published by >> 7 # the Free Software Foundation; version 2 of the License. >> 8 # >> 9 # This program is distributed in the hope that it will be useful, >> 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of >> 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> 12 # GNU General Public License for more details. 6 13 7 14 8 # do we have ./getscom, ./putscom? 15 # do we have ./getscom, ./putscom? 9 if [ -x ./getscom ] && [ -x ./putscom ]; then 16 if [ -x ./getscom ] && [ -x ./putscom ]; then 10 GETSCOM=./getscom 17 GETSCOM=./getscom 11 PUTSCOM=./putscom 18 PUTSCOM=./putscom 12 elif which getscom > /dev/null; then 19 elif which getscom > /dev/null; then 13 GETSCOM=$(which getscom) 20 GETSCOM=$(which getscom) 14 PUTSCOM=$(which putscom) 21 PUTSCOM=$(which putscom) 15 else 22 else 16 cat <<EOF 23 cat <<EOF 17 Can't find getscom/putscom in . or \$PATH. 24 Can't find getscom/putscom in . or \$PATH. 18 See https://github.com/open-power/skiboot. 25 See https://github.com/open-power/skiboot. 19 The tool is in external/xscom-utils 26 The tool is in external/xscom-utils 20 EOF 27 EOF 21 exit 1 28 exit 1 22 fi 29 fi 23 30 24 # We will get 8 HMI events per injection 31 # We will get 8 HMI events per injection 25 # todo: deal with things being offline 32 # todo: deal with things being offline 26 expected_hmis=8 33 expected_hmis=8 27 COUNT_HMIS() { 34 COUNT_HMIS() { 28 dmesg | grep -c 'Harmless Hypervisor Maint 35 dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt' 29 } 36 } 30 37 31 # massively expand snooze delay, allowing inje 38 # massively expand snooze delay, allowing injection on all cores 32 ppc64_cpu --smt-snooze-delay=1000000000 39 ppc64_cpu --smt-snooze-delay=1000000000 33 40 34 # when we exit, restore it 41 # when we exit, restore it 35 trap "ppc64_cpu --smt-snooze-delay=100" 0 1 42 trap "ppc64_cpu --smt-snooze-delay=100" 0 1 36 43 37 # for each chip+core combination 44 # for each chip+core combination 38 # todo - less fragile parsing 45 # todo - less fragile parsing 39 grep -E -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' !! 46 egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog | 40 while read chipcore; do 47 while read chipcore; do 41 chip=$(echo "$chipcore"|awk '{print $3 48 chip=$(echo "$chipcore"|awk '{print $3}') 42 core=$(echo "$chipcore"|awk '{print $5 49 core=$(echo "$chipcore"|awk '{print $5}') 43 fir="0x1${core}013100" 50 fir="0x1${core}013100" 44 51 45 # verify that Core FIR is zero as expe 52 # verify that Core FIR is zero as expected 46 if [ "$($GETSCOM -c 0x${chip} $fir)" ! 53 if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then 47 echo "FIR was not zero before 54 echo "FIR was not zero before injection for chip $chip, core $core. Aborting!" 48 echo "Result of $GETSCOM -c 0x 55 echo "Result of $GETSCOM -c 0x${chip} $fir:" 49 $GETSCOM -c 0x${chip} $fir 56 $GETSCOM -c 0x${chip} $fir 50 echo "If you get a -5 error, t 57 echo "If you get a -5 error, the core may be in idle state. Try stress-ng." 51 echo "Otherwise, try $PUTSCOM 58 echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0" 52 exit 1 59 exit 1 53 fi 60 fi 54 61 55 # keep track of the number of HMIs han 62 # keep track of the number of HMIs handled 56 old_hmis=$(COUNT_HMIS) 63 old_hmis=$(COUNT_HMIS) 57 64 58 # do injection, adding a marker to dme 65 # do injection, adding a marker to dmesg for clarity 59 echo "Injecting HMI on core $core, chi 66 echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg 60 # inject a RegFile recoverable error 67 # inject a RegFile recoverable error 61 if ! $PUTSCOM -c 0x${chip} $fir 200000 68 if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then 62 echo "Error injecting. Abortin 69 echo "Error injecting. Aborting!" 63 exit 1 70 exit 1 64 fi 71 fi 65 72 66 # now we want to wait for all the HMIs 73 # now we want to wait for all the HMIs to be processed 67 # we expect one per thread on the core 74 # we expect one per thread on the core 68 i=0; 75 i=0; 69 new_hmis=$(COUNT_HMIS) 76 new_hmis=$(COUNT_HMIS) 70 while [ $new_hmis -lt $((old_hmis + ex 77 while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do 71 echo "Seen $((new_hmis - old_hmis) 78 echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping" 72 sleep 5; 79 sleep 5; 73 i=$((i + 1)) 80 i=$((i + 1)) 74 new_hmis=$(COUNT_HMIS) 81 new_hmis=$(COUNT_HMIS) 75 done 82 done 76 if [ $i = 12 ]; then 83 if [ $i = 12 ]; then 77 echo "Haven't seen expected $expec 84 echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting." 78 exit 1 85 exit 1 79 fi 86 fi 80 echo "Processed $expected_hmis events; 87 echo "Processed $expected_hmis events; presumed success. Check dmesg." 81 echo "" 88 echo "" 82 done 89 done
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.