1 #!/bin/bash 2 # SPDX-License-Identifier: GPL-2.0 3 # 4 # A test for switch behavior under MC overload. An issue in Spectrum chips 5 # causes throughput of UC traffic to drop severely when a switch is under heavy 6 # MC load. This issue can be overcome by putting the switch to MC-aware mode. 7 # This test verifies that UC performance stays intact even as the switch is 8 # under MC flood, and therefore that the MC-aware mode is enabled and correctly 9 # configured. 10 # 11 # Because mlxsw throttles CPU port, the traffic can't actually reach userspace 12 # at full speed. That makes it impossible to use iperf3 to simply measure the 13 # throughput, because many packets (that reach $h3) don't get to the kernel at 14 # all even in UDP mode (the situation is even worse in TCP mode, where one can't 15 # hope to see more than a couple Mbps). 16 # 17 # So instead we send traffic with mausezahn and use RX ethtool counters at $h3. 18 # Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore 19 # each gets a different priority and we can use per-prio ethtool counters to 20 # measure the throughput. In order to avoid prioritizing unicast traffic, prio 21 # qdisc is installed on $swp3 and maps all priorities to the same band #7 (and 22 # thus TC 0). 23 # 24 # Mausezahn can't actually saturate the links unless it's using large frames. 25 # Thus we set MTU to 10K on all involved interfaces. Then both unicast and 26 # multicast traffic uses 8K frames. 27 # 28 # +---------------------------+ +----------------------------------+ 29 # | H1 | | H2 | 30 # | | | unicast --> + $h2.111 | 31 # | multicast | | traffic | 192.0.2.129/28 | 32 # | traffic | | | e-qos-map 0:1 | 33 # | $h1 + <----- | | | | 34 # | 192.0.2.65/28 | | | + $h2 | 35 # +---------------|-----------+ +--------------|-------------------+ 36 # | | 37 # +---------------|---------------------------------------|-------------------+ 38 # | $swp1 + + $swp2 | 39 # | >1Gbps | | >1Gbps | 40 # | +-------------|------+ +----------|----------------+ | 41 # | | $swp1.1 + | | + $swp2.111 | | 42 # | | BR1 | SW | BR111 | | 43 # | | $swp3.1 + | | + $swp3.111 | | 44 # | +-------------|------+ +----------|----------------+ | 45 # | \_______________________________________/ | 46 # | | | 47 # | + $swp3 | 48 # | | 1Gbps bottleneck | 49 # | | prio qdisc: {0..7} -> 7 | 50 # +------------------------------------|--------------------------------------+ 51 # | 52 # +--|-----------------+ 53 # | + $h3 H3 | 54 # | | 192.0.2.66/28 | 55 # | | | 56 # | + $h3.111 | 57 # | 192.0.2.130/28 | 58 # +--------------------+ 59 60 ALL_TESTS=" 61 ping_ipv4 62 test_mc_aware 63 test_uc_aware 64 " 65 66 lib_dir=$(dirname $0)/../../../net/forwarding 67 68 NUM_NETIFS=6 69 source $lib_dir/lib.sh 70 source $lib_dir/devlink_lib.sh 71 source qos_lib.sh 72 73 h1_create() 74 { 75 simple_if_init $h1 192.0.2.65/28 76 mtu_set $h1 10000 77 } 78 79 h1_destroy() 80 { 81 mtu_restore $h1 82 simple_if_fini $h1 192.0.2.65/28 83 } 84 85 h2_create() 86 { 87 simple_if_init $h2 88 mtu_set $h2 10000 89 90 vlan_create $h2 111 v$h2 192.0.2.129/28 91 ip link set dev $h2.111 type vlan egress-qos-map 0:1 92 } 93 94 h2_destroy() 95 { 96 vlan_destroy $h2 111 97 98 mtu_restore $h2 99 simple_if_fini $h2 100 } 101 102 h3_create() 103 { 104 simple_if_init $h3 192.0.2.66/28 105 mtu_set $h3 10000 106 107 vlan_create $h3 111 v$h3 192.0.2.130/28 108 } 109 110 h3_destroy() 111 { 112 vlan_destroy $h3 111 113 114 mtu_restore $h3 115 simple_if_fini $h3 192.0.2.66/28 116 } 117 118 switch_create() 119 { 120 ip link set dev $swp1 up 121 mtu_set $swp1 10000 122 123 ip link set dev $swp2 up 124 mtu_set $swp2 10000 125 126 ip link set dev $swp3 up 127 mtu_set $swp3 10000 128 129 vlan_create $swp2 111 130 vlan_create $swp3 111 131 132 tc qdisc replace dev $swp3 root handle 3: tbf rate 1gbit \ 133 burst 128K limit 1G 134 tc qdisc replace dev $swp3 parent 3:3 handle 33: \ 135 prio bands 8 priomap 7 7 7 7 7 7 7 7 136 137 ip link add name br1 type bridge vlan_filtering 0 138 ip link set dev br1 addrgenmode none 139 ip link set dev br1 up 140 ip link set dev $swp1 master br1 141 ip link set dev $swp3 master br1 142 143 ip link add name br111 type bridge vlan_filtering 0 144 ip link set dev br111 addrgenmode none 145 ip link set dev br111 up 146 ip link set dev $swp2.111 master br111 147 ip link set dev $swp3.111 master br111 148 149 # Make sure that ingress quotas are smaller than egress so that there is 150 # room for both streams of traffic to be admitted to shared buffer. 151 devlink_port_pool_th_save $swp1 0 152 devlink_port_pool_th_set $swp1 0 5 153 devlink_tc_bind_pool_th_save $swp1 0 ingress 154 devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5 155 156 devlink_port_pool_th_save $swp2 0 157 devlink_port_pool_th_set $swp2 0 5 158 devlink_tc_bind_pool_th_save $swp2 1 ingress 159 devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5 160 161 devlink_port_pool_th_save $swp3 4 162 devlink_port_pool_th_set $swp3 4 12 163 } 164 165 switch_destroy() 166 { 167 devlink_port_pool_th_restore $swp3 4 168 169 devlink_tc_bind_pool_th_restore $swp2 1 ingress 170 devlink_port_pool_th_restore $swp2 0 171 172 devlink_tc_bind_pool_th_restore $swp1 0 ingress 173 devlink_port_pool_th_restore $swp1 0 174 175 ip link del dev br111 176 ip link del dev br1 177 178 tc qdisc del dev $swp3 parent 3:3 handle 33: 179 tc qdisc del dev $swp3 root handle 3: 180 181 vlan_destroy $swp3 111 182 vlan_destroy $swp2 111 183 184 mtu_restore $swp3 185 ip link set dev $swp3 down 186 187 mtu_restore $swp2 188 ip link set dev $swp2 down 189 190 mtu_restore $swp1 191 ip link set dev $swp1 down 192 } 193 194 setup_prepare() 195 { 196 h1=${NETIFS[p1]} 197 swp1=${NETIFS[p2]} 198 199 swp2=${NETIFS[p3]} 200 h2=${NETIFS[p4]} 201 202 swp3=${NETIFS[p5]} 203 h3=${NETIFS[p6]} 204 205 h3mac=$(mac_get $h3) 206 207 vrf_prepare 208 209 h1_create 210 h2_create 211 h3_create 212 switch_create 213 } 214 215 cleanup() 216 { 217 pre_cleanup 218 219 switch_destroy 220 h3_destroy 221 h2_destroy 222 h1_destroy 223 224 vrf_cleanup 225 } 226 227 ping_ipv4() 228 { 229 ping_test $h2 192.0.2.130 230 } 231 232 test_mc_aware() 233 { 234 RET=0 235 236 local -a uc_rate 237 start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac 238 uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC-only")) 239 check_err $? "Could not get high enough UC-only ingress rate" 240 stop_traffic 241 local ucth1=${uc_rate[1]} 242 243 start_traffic $h1 192.0.2.65 bc bc 244 245 local d0=$(date +%s) 246 local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) 247 local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0) 248 249 local -a uc_rate_2 250 start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac 251 uc_rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC+MC")) 252 check_err $? "Could not get high enough UC+MC ingress rate" 253 stop_traffic 254 local ucth2=${uc_rate_2[1]} 255 256 local d1=$(date +%s) 257 local t1=$(ethtool_stats_get $h3 rx_octets_prio_0) 258 local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0) 259 260 local deg=$(bc <<< " 261 scale=2 262 ret = 100 * ($ucth1 - $ucth2) / $ucth1 263 if (ret > 0) { ret } else { 0 } 264 ") 265 266 # Minimum shaper of 200Mbps on MC TCs should cause about 20% of 267 # degradation on 1Gbps link. 268 check_err $(bc <<< "$deg < 15") "Minimum shaper not in effect" 269 check_err $(bc <<< "$deg > 25") "MC traffic degrades UC performance too much" 270 271 local interval=$((d1 - d0)) 272 local mc_ir=$(rate $u0 $u1 $interval) 273 local mc_er=$(rate $t0 $t1 $interval) 274 275 stop_traffic 276 277 log_test "UC performance under MC overload" 278 279 echo "UC-only throughput $(humanize $ucth1)" 280 echo "UC+MC throughput $(humanize $ucth2)" 281 echo "Degradation $deg %" 282 echo 283 echo "Full report:" 284 echo " UC only:" 285 echo " ingress UC throughput $(humanize ${uc_rate[0]})" 286 echo " egress UC throughput $(humanize ${uc_rate[1]})" 287 echo " UC+MC:" 288 echo " ingress UC throughput $(humanize ${uc_rate_2[0]})" 289 echo " egress UC throughput $(humanize ${uc_rate_2[1]})" 290 echo " ingress MC throughput $(humanize $mc_ir)" 291 echo " egress MC throughput $(humanize $mc_er)" 292 echo 293 } 294 295 test_uc_aware() 296 { 297 RET=0 298 299 start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac 300 301 local d0=$(date +%s) 302 local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) 303 local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) 304 sleep 1 305 306 local attempts=50 307 local passes=0 308 local i 309 310 for ((i = 0; i < attempts; ++i)); do 311 if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 1; then 312 ((passes++)) 313 fi 314 315 sleep 0.1 316 done 317 318 local d1=$(date +%s) 319 local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) 320 local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) 321 322 local interval=$((d1 - d0)) 323 local uc_ir=$(rate $u0 $u1 $interval) 324 local uc_er=$(rate $t0 $t1 $interval) 325 326 ((attempts == passes)) 327 check_err $? 328 329 stop_traffic 330 331 log_test "MC performance under UC overload" 332 echo " ingress UC throughput $(humanize ${uc_ir})" 333 echo " egress UC throughput $(humanize ${uc_er})" 334 echo " sent $attempts BC ARPs, got $passes responses" 335 } 336 337 trap cleanup EXIT 338 339 setup_prepare 340 setup_wait 341 342 tests_run 343 344 exit $EXIT_STATUS
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.