1 #!/bin/sh 1 #!/bin/sh 2 # SPDX-License-Identifier: GPL-2.0-only 2 # SPDX-License-Identifier: GPL-2.0-only 3 3 4 export KSELFTESTS_SKIP=4 << 5 << 6 log() { << 7 echo >/dev/stderr $* << 8 } << 9 << 10 pe_ok() { 4 pe_ok() { 11 local dev="$1" 5 local dev="$1" 12 local path="/sys/bus/pci/devices/$dev/ 6 local path="/sys/bus/pci/devices/$dev/eeh_pe_state" 13 7 14 # if a driver doesn't support the erro 8 # if a driver doesn't support the error handling callbacks then the 15 # device is recovered by removing and 9 # device is recovered by removing and re-probing it. This causes the 16 # sysfs directory to disappear so read 10 # sysfs directory to disappear so read the PE state once and squash 17 # any potential error messages 11 # any potential error messages 18 local eeh_state="$(cat $path 2>/dev/nu 12 local eeh_state="$(cat $path 2>/dev/null)" 19 if [ -z "$eeh_state" ]; then 13 if [ -z "$eeh_state" ]; then 20 return 1; 14 return 1; 21 fi 15 fi 22 16 23 local fw_state="$(echo $eeh_state | cu 17 local fw_state="$(echo $eeh_state | cut -d' ' -f1)" 24 local sw_state="$(echo $eeh_state | cu 18 local sw_state="$(echo $eeh_state | cut -d' ' -f2)" 25 19 26 # If EEH_PE_ISOLATED or EEH_PE_RECOVER 20 # If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an 27 # error state or being recovered. Eith 21 # error state or being recovered. Either way, not ok. 28 if [ "$((sw_state & 0x3))" -ne 0 ] ; t 22 if [ "$((sw_state & 0x3))" -ne 0 ] ; then 29 return 1 23 return 1 30 fi 24 fi 31 25 32 # A functioning PE should have the EEH 26 # A functioning PE should have the EEH_STATE_MMIO_ACTIVE and 33 # EEH_STATE_DMA_ACTIVE flags set. For 27 # EEH_STATE_DMA_ACTIVE flags set. For some goddamn stupid reason 34 # the platform backends set these when 28 # the platform backends set these when the PE is in reset. The 35 # RECOVERING check above should stop a 29 # RECOVERING check above should stop any false positives though. 36 if [ "$((fw_state & 0x18))" -ne "$((0x 30 if [ "$((fw_state & 0x18))" -ne "$((0x18))" ] ; then 37 return 1 31 return 1 38 fi 32 fi 39 33 40 return 0; 34 return 0; 41 } 35 } 42 36 43 eeh_supported() { 37 eeh_supported() { 44 test -e /proc/powerpc/eeh && \ 38 test -e /proc/powerpc/eeh && \ 45 grep -q 'EEH Subsystem is enabled' /pr 39 grep -q 'EEH Subsystem is enabled' /proc/powerpc/eeh 46 } 40 } 47 41 48 eeh_test_prep() { << 49 if ! eeh_supported ; then << 50 echo "EEH not supported on thi << 51 exit $KSELFTESTS_SKIP; << 52 fi << 53 << 54 if [ ! -e "/sys/kernel/debug/powerpc/e << 55 [ ! -e "/sys/kernel/debug/powerpc/e << 56 log "debugfs EEH testing files << 57 exit $KSELFTESTS_SKIP; << 58 fi << 59 << 60 # Bump the max freeze count to somethi << 61 # trip over it while breaking things. << 62 echo 5000 > /sys/kernel/debug/powerpc/ << 63 } << 64 << 65 eeh_can_break() { << 66 # skip bridges since we can't recover << 67 if [ -e "/sys/bus/pci/devices/$dev/pci << 68 log "$dev, Skipped: bridge" << 69 return 1; << 70 fi << 71 << 72 # The ahci driver doesn't support erro << 73 # happens to be hosting the root files << 74 # it the system will generally go down << 75 # at some point << 76 if [ "ahci" = "$(basename $(realpath / << 77 log "$dev, Skipped: ahci doesn << 78 return 1; << 79 fi << 80 << 81 # Don't inject errosr into an already- << 82 # PEs that contain multiple PCI device << 83 # and injecting new errors during the << 84 # result in the recovery failing and t << 85 # failed. << 86 if ! pe_ok $dev ; then << 87 log "$dev, Skipped: Bad initia << 88 return 1; << 89 fi << 90 << 91 return 0 << 92 } << 93 << 94 eeh_one_dev() { 42 eeh_one_dev() { 95 local dev="$1" 43 local dev="$1" 96 44 97 # Using this function from the command 45 # Using this function from the command line is sometimes useful for 98 # testing so check that the argument i 46 # testing so check that the argument is a well-formed sysfs device 99 # name. 47 # name. 100 if ! test -e /sys/bus/pci/devices/$dev 48 if ! test -e /sys/bus/pci/devices/$dev/ ; then 101 log "Error: '$dev' must be a s !! 49 echo "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)" 102 return 1; 50 return 1; 103 fi 51 fi 104 52 105 # Break it 53 # Break it 106 echo $dev >/sys/kernel/debug/powerpc/e 54 echo $dev >/sys/kernel/debug/powerpc/eeh_dev_break 107 55 108 # Force an EEH device check. If the ke 56 # Force an EEH device check. If the kernel has already 109 # noticed the EEH (due to a driver pol 57 # noticed the EEH (due to a driver poll or whatever), this 110 # is a no-op. 58 # is a no-op. 111 echo $dev >/sys/kernel/debug/powerpc/e 59 echo $dev >/sys/kernel/debug/powerpc/eeh_dev_check 112 60 113 # Default to a 60s timeout when waitin 61 # Default to a 60s timeout when waiting for a device to recover. This 114 # is an arbitrary default which can be 62 # is an arbitrary default which can be overridden by setting the 115 # EEH_MAX_WAIT environmental variable 63 # EEH_MAX_WAIT environmental variable when required. 116 64 117 # The current record holder for longes 65 # The current record holder for longest recovery time is: 118 # "Adaptec Series 8 12G SAS/PCIe 3" a 66 # "Adaptec Series 8 12G SAS/PCIe 3" at 39 seconds 119 max_wait=${EEH_MAX_WAIT:=60} 67 max_wait=${EEH_MAX_WAIT:=60} 120 68 121 for i in `seq 0 ${max_wait}` ; do 69 for i in `seq 0 ${max_wait}` ; do 122 if pe_ok $dev ; then 70 if pe_ok $dev ; then 123 break; 71 break; 124 fi 72 fi 125 log "$dev, waited $i/${max_wai !! 73 echo "$dev, waited $i/${max_wait}" 126 sleep 1 74 sleep 1 127 done 75 done 128 76 129 if ! pe_ok $dev ; then 77 if ! pe_ok $dev ; then 130 log "$dev, Failed to recover!" !! 78 echo "$dev, Failed to recover!" 131 return 1; 79 return 1; 132 fi 80 fi 133 81 134 log "$dev, Recovered after $i seconds" !! 82 echo "$dev, Recovered after $i seconds" 135 return 0; 83 return 0; 136 } 84 } 137 85 138 eeh_has_driver() { << 139 test -e /sys/bus/pci/devices/$1/driver << 140 return $? << 141 } << 142 << 143 eeh_can_recover() { << 144 # we'll get an IO error if the device' << 145 # error recovery << 146 echo $1 > '/sys/kernel/debug/powerpc/e << 147 << 148 return $? << 149 } << 150 << 151 eeh_find_all_pfs() { << 152 devices="" << 153 << 154 # SR-IOV on pseries requires hyperviso << 155 is_pseries="" << 156 if grep -q pSeries /proc/cpuinfo ; the << 157 if [ ! -f /proc/device-tree/rt << 158 [ ! -f /proc/device-tree/rt << 159 return 1; << 160 fi << 161 << 162 is_pseries="true" << 163 fi << 164 << 165 for dev in `ls -1 /sys/bus/pci/devices << 166 sysfs="/sys/bus/pci/devices/$d << 167 if [ ! -e "$sysfs/sriov_numvfs << 168 continue << 169 fi << 170 << 171 # skip unsupported PFs on pser << 172 if [ -z "$is_pseries" ] && << 173 [ ! -f "$sysfs/of_node/ibm, << 174 [ ! -f "$sysfs/of_node/ibm, << 175 continue; << 176 fi << 177 << 178 # no driver, no vfs << 179 if ! eeh_has_driver $dev ; the << 180 continue << 181 fi << 182 << 183 devices="$devices $dev" << 184 done << 185 << 186 if [ -z "$devices" ] ; then << 187 return 1; << 188 fi << 189 << 190 echo $devices << 191 return 0; << 192 } << 193 << 194 # attempts to enable one VF on each PF so we c << 195 # stdout: list of enabled VFs, one per line << 196 # return code: 0 if vfs are found, 1 otherwise << 197 eeh_enable_vfs() { << 198 pf_list="$(eeh_find_all_pfs)" << 199 << 200 vfs=0 << 201 for dev in $pf_list ; do << 202 pf_sysfs="/sys/bus/pci/devices << 203 << 204 # make sure we have a single V << 205 echo 0 > "$pf_sysfs/sriov_numv << 206 echo 1 > "$pf_sysfs/sriov_numv << 207 if [ "$?" != 0 ] ; then << 208 log "Unable to enable << 209 continue; << 210 fi << 211 << 212 vf="$(basename $(realpath "$pf << 213 if [ $? != 0 ] ; then << 214 log "unable to find en << 215 echo 0 > "$pf_sysfs/sr << 216 continue; << 217 fi << 218 << 219 if ! eeh_can_break $vf ; then << 220 log "skipping " << 221 << 222 echo 0 > "$pf_sysfs/sr << 223 continue; << 224 fi << 225 << 226 vfs="$((vfs + 1))" << 227 echo $vf << 228 done << 229 << 230 test "$vfs" != 0 << 231 return $? << 232 } << 233 << 234 eeh_disable_vfs() { << 235 pf_list="$(eeh_find_all_pfs)" << 236 if [ -z "$pf_list" ] ; then << 237 return 1; << 238 fi << 239 << 240 for dev in $pf_list ; do << 241 echo 0 > "/sys/bus/pci/devices << 242 done << 243 << 244 return 0; << 245 } <<
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.