1 #!/bin/sh 1 #!/bin/sh 2 # SPDX-License-Identifier: GPL-2.0-only 2 # SPDX-License-Identifier: GPL-2.0-only 3 3 4 export KSELFTESTS_SKIP=4 << 5 << 6 log() { << 7 echo >/dev/stderr $* << 8 } << 9 << 10 pe_ok() { 4 pe_ok() { 11 local dev="$1" 5 local dev="$1" 12 local path="/sys/bus/pci/devices/$dev/ 6 local path="/sys/bus/pci/devices/$dev/eeh_pe_state" 13 7 14 # if a driver doesn't support the erro !! 8 if ! [ -e "$path" ] ; then 15 # device is recovered by removing and << 16 # sysfs directory to disappear so read << 17 # any potential error messages << 18 local eeh_state="$(cat $path 2>/dev/nu << 19 if [ -z "$eeh_state" ]; then << 20 return 1; 9 return 1; 21 fi 10 fi 22 11 23 local fw_state="$(echo $eeh_state | cu !! 12 local fw_state="$(cut -d' ' -f1 < $path)" 24 local sw_state="$(echo $eeh_state | cu !! 13 local sw_state="$(cut -d' ' -f2 < $path)" 25 14 26 # If EEH_PE_ISOLATED or EEH_PE_RECOVER 15 # If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an 27 # error state or being recovered. Eith 16 # error state or being recovered. Either way, not ok. 28 if [ "$((sw_state & 0x3))" -ne 0 ] ; t 17 if [ "$((sw_state & 0x3))" -ne 0 ] ; then 29 return 1 18 return 1 30 fi 19 fi 31 20 32 # A functioning PE should have the EEH 21 # A functioning PE should have the EEH_STATE_MMIO_ACTIVE and 33 # EEH_STATE_DMA_ACTIVE flags set. For 22 # EEH_STATE_DMA_ACTIVE flags set. For some goddamn stupid reason 34 # the platform backends set these when 23 # the platform backends set these when the PE is in reset. The 35 # RECOVERING check above should stop a 24 # RECOVERING check above should stop any false positives though. 36 if [ "$((fw_state & 0x18))" -ne "$((0x 25 if [ "$((fw_state & 0x18))" -ne "$((0x18))" ] ; then 37 return 1 26 return 1 38 fi 27 fi 39 28 40 return 0; 29 return 0; 41 } 30 } 42 31 43 eeh_supported() { 32 eeh_supported() { 44 test -e /proc/powerpc/eeh && \ 33 test -e /proc/powerpc/eeh && \ 45 grep -q 'EEH Subsystem is enabled' /pr 34 grep -q 'EEH Subsystem is enabled' /proc/powerpc/eeh 46 } 35 } 47 36 48 eeh_test_prep() { << 49 if ! eeh_supported ; then << 50 echo "EEH not supported on thi << 51 exit $KSELFTESTS_SKIP; << 52 fi << 53 << 54 if [ ! -e "/sys/kernel/debug/powerpc/e << 55 [ ! -e "/sys/kernel/debug/powerpc/e << 56 log "debugfs EEH testing files << 57 exit $KSELFTESTS_SKIP; << 58 fi << 59 << 60 # Bump the max freeze count to somethi << 61 # trip over it while breaking things. << 62 echo 5000 > /sys/kernel/debug/powerpc/ << 63 } << 64 << 65 eeh_can_break() { << 66 # skip bridges since we can't recover << 67 if [ -e "/sys/bus/pci/devices/$dev/pci << 68 log "$dev, Skipped: bridge" << 69 return 1; << 70 fi << 71 << 72 # The ahci driver doesn't support erro << 73 # happens to be hosting the root files << 74 # it the system will generally go down << 75 # at some point << 76 if [ "ahci" = "$(basename $(realpath / << 77 log "$dev, Skipped: ahci doesn << 78 return 1; << 79 fi << 80 << 81 # Don't inject errosr into an already- << 82 # PEs that contain multiple PCI device << 83 # and injecting new errors during the << 84 # result in the recovery failing and t << 85 # failed. << 86 if ! pe_ok $dev ; then << 87 log "$dev, Skipped: Bad initia << 88 return 1; << 89 fi << 90 << 91 return 0 << 92 } << 93 << 94 eeh_one_dev() { 37 eeh_one_dev() { 95 local dev="$1" 38 local dev="$1" 96 39 97 # Using this function from the command 40 # Using this function from the command line is sometimes useful for 98 # testing so check that the argument i 41 # testing so check that the argument is a well-formed sysfs device 99 # name. 42 # name. 100 if ! test -e /sys/bus/pci/devices/$dev 43 if ! test -e /sys/bus/pci/devices/$dev/ ; then 101 log "Error: '$dev' must be a s !! 44 echo "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)" 102 return 1; 45 return 1; 103 fi 46 fi 104 47 105 # Break it 48 # Break it 106 echo $dev >/sys/kernel/debug/powerpc/e 49 echo $dev >/sys/kernel/debug/powerpc/eeh_dev_break 107 50 108 # Force an EEH device check. If the ke 51 # Force an EEH device check. If the kernel has already 109 # noticed the EEH (due to a driver pol 52 # noticed the EEH (due to a driver poll or whatever), this 110 # is a no-op. 53 # is a no-op. 111 echo $dev >/sys/kernel/debug/powerpc/e 54 echo $dev >/sys/kernel/debug/powerpc/eeh_dev_check 112 55 113 # Default to a 60s timeout when waitin 56 # Default to a 60s timeout when waiting for a device to recover. This 114 # is an arbitrary default which can be 57 # is an arbitrary default which can be overridden by setting the 115 # EEH_MAX_WAIT environmental variable 58 # EEH_MAX_WAIT environmental variable when required. 116 59 117 # The current record holder for longes 60 # The current record holder for longest recovery time is: 118 # "Adaptec Series 8 12G SAS/PCIe 3" a 61 # "Adaptec Series 8 12G SAS/PCIe 3" at 39 seconds 119 max_wait=${EEH_MAX_WAIT:=60} 62 max_wait=${EEH_MAX_WAIT:=60} 120 63 121 for i in `seq 0 ${max_wait}` ; do 64 for i in `seq 0 ${max_wait}` ; do 122 if pe_ok $dev ; then 65 if pe_ok $dev ; then 123 break; 66 break; 124 fi 67 fi 125 log "$dev, waited $i/${max_wai !! 68 echo "$dev, waited $i/${max_wait}" 126 sleep 1 69 sleep 1 127 done 70 done 128 71 129 if ! pe_ok $dev ; then 72 if ! pe_ok $dev ; then 130 log "$dev, Failed to recover!" !! 73 echo "$dev, Failed to recover!" 131 return 1; 74 return 1; 132 fi 75 fi 133 76 134 log "$dev, Recovered after $i seconds" !! 77 echo "$dev, Recovered after $i seconds" 135 return 0; 78 return 0; 136 } 79 } 137 80 138 eeh_has_driver() { << 139 test -e /sys/bus/pci/devices/$1/driver << 140 return $? << 141 } << 142 << 143 eeh_can_recover() { << 144 # we'll get an IO error if the device' << 145 # error recovery << 146 echo $1 > '/sys/kernel/debug/powerpc/e << 147 << 148 return $? << 149 } << 150 << 151 eeh_find_all_pfs() { << 152 devices="" << 153 << 154 # SR-IOV on pseries requires hyperviso << 155 is_pseries="" << 156 if grep -q pSeries /proc/cpuinfo ; the << 157 if [ ! -f /proc/device-tree/rt << 158 [ ! -f /proc/device-tree/rt << 159 return 1; << 160 fi << 161 << 162 is_pseries="true" << 163 fi << 164 << 165 for dev in `ls -1 /sys/bus/pci/devices << 166 sysfs="/sys/bus/pci/devices/$d << 167 if [ ! -e "$sysfs/sriov_numvfs << 168 continue << 169 fi << 170 << 171 # skip unsupported PFs on pser << 172 if [ -z "$is_pseries" ] && << 173 [ ! -f "$sysfs/of_node/ibm, << 174 [ ! -f "$sysfs/of_node/ibm, << 175 continue; << 176 fi << 177 << 178 # no driver, no vfs << 179 if ! eeh_has_driver $dev ; the << 180 continue << 181 fi << 182 << 183 devices="$devices $dev" << 184 done << 185 << 186 if [ -z "$devices" ] ; then << 187 return 1; << 188 fi << 189 << 190 echo $devices << 191 return 0; << 192 } << 193 << 194 # attempts to enable one VF on each PF so we c << 195 # stdout: list of enabled VFs, one per line << 196 # return code: 0 if vfs are found, 1 otherwise << 197 eeh_enable_vfs() { << 198 pf_list="$(eeh_find_all_pfs)" << 199 << 200 vfs=0 << 201 for dev in $pf_list ; do << 202 pf_sysfs="/sys/bus/pci/devices << 203 << 204 # make sure we have a single V << 205 echo 0 > "$pf_sysfs/sriov_numv << 206 echo 1 > "$pf_sysfs/sriov_numv << 207 if [ "$?" != 0 ] ; then << 208 log "Unable to enable << 209 continue; << 210 fi << 211 << 212 vf="$(basename $(realpath "$pf << 213 if [ $? != 0 ] ; then << 214 log "unable to find en << 215 echo 0 > "$pf_sysfs/sr << 216 continue; << 217 fi << 218 << 219 if ! eeh_can_break $vf ; then << 220 log "skipping " << 221 << 222 echo 0 > "$pf_sysfs/sr << 223 continue; << 224 fi << 225 << 226 vfs="$((vfs + 1))" << 227 echo $vf << 228 done << 229 << 230 test "$vfs" != 0 << 231 return $? << 232 } << 233 << 234 eeh_disable_vfs() { << 235 pf_list="$(eeh_find_all_pfs)" << 236 if [ -z "$pf_list" ] ; then << 237 return 1; << 238 fi << 239 << 240 for dev in $pf_list ; do << 241 echo 0 > "/sys/bus/pci/devices << 242 done << 243 << 244 return 0; << 245 } <<
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.