~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/net/msg_zerocopy.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* Evaluate MSG_ZEROCOPY
  2  *
  3  * Send traffic between two processes over one of the supported
  4  * protocols and modes:
  5  *
  6  * PF_INET/PF_INET6
  7  * - SOCK_STREAM
  8  * - SOCK_DGRAM
  9  * - SOCK_DGRAM with UDP_CORK
 10  * - SOCK_RAW
 11  * - SOCK_RAW with IP_HDRINCL
 12  *
 13  * PF_PACKET
 14  * - SOCK_DGRAM
 15  * - SOCK_RAW
 16  *
 17  * PF_RDS
 18  * - SOCK_SEQPACKET
 19  *
 20  * Start this program on two connected hosts, one in send mode and
 21  * the other with option '-r' to put it in receiver mode.
 22  *
 23  * If zerocopy mode ('-z') is enabled, the sender will verify that
 24  * the kernel queues completions on the error queue for all zerocopy
 25  * transfers.
 26  */
 27 
 28 #define _GNU_SOURCE
 29 
 30 #include <arpa/inet.h>
 31 #include <error.h>
 32 #include <errno.h>
 33 #include <limits.h>
 34 #include <linux/errqueue.h>
 35 #include <linux/if_packet.h>
 36 #include <linux/ipv6.h>
 37 #include <linux/socket.h>
 38 #include <linux/sockios.h>
 39 #include <net/ethernet.h>
 40 #include <net/if.h>
 41 #include <netinet/ip.h>
 42 #include <netinet/ip6.h>
 43 #include <netinet/tcp.h>
 44 #include <netinet/udp.h>
 45 #include <poll.h>
 46 #include <sched.h>
 47 #include <stdbool.h>
 48 #include <stdio.h>
 49 #include <stdint.h>
 50 #include <stdlib.h>
 51 #include <string.h>
 52 #include <sys/ioctl.h>
 53 #include <sys/socket.h>
 54 #include <sys/stat.h>
 55 #include <sys/time.h>
 56 #include <sys/types.h>
 57 #include <sys/wait.h>
 58 #include <unistd.h>
 59 #include <linux/rds.h>
 60 
 61 #ifndef SO_EE_ORIGIN_ZEROCOPY
 62 #define SO_EE_ORIGIN_ZEROCOPY           5
 63 #endif
 64 
 65 #ifndef SO_ZEROCOPY
 66 #define SO_ZEROCOPY     60
 67 #endif
 68 
 69 #ifndef SO_EE_CODE_ZEROCOPY_COPIED
 70 #define SO_EE_CODE_ZEROCOPY_COPIED      1
 71 #endif
 72 
 73 #ifndef MSG_ZEROCOPY
 74 #define MSG_ZEROCOPY    0x4000000
 75 #endif
 76 
 77 static int  cfg_cork;
 78 static bool cfg_cork_mixed;
 79 static int  cfg_cpu             = -1;           /* default: pin to last cpu */
 80 static int  cfg_family          = PF_UNSPEC;
 81 static int  cfg_ifindex         = 1;
 82 static int  cfg_payload_len;
 83 static int  cfg_port            = 8000;
 84 static bool cfg_rx;
 85 static int  cfg_runtime_ms      = 4200;
 86 static int  cfg_verbose;
 87 static int  cfg_waittime_ms     = 500;
 88 static int  cfg_notification_limit = 32;
 89 static bool cfg_zerocopy;
 90 
 91 static socklen_t cfg_alen;
 92 static struct sockaddr_storage cfg_dst_addr;
 93 static struct sockaddr_storage cfg_src_addr;
 94 
 95 static char payload[IP_MAXPACKET];
 96 static long packets, bytes, completions, expected_completions;
 97 static int  zerocopied = -1;
 98 static uint32_t next_completion;
 99 static uint32_t sends_since_notify;
100 
101 static unsigned long gettimeofday_ms(void)
102 {
103         struct timeval tv;
104 
105         gettimeofday(&tv, NULL);
106         return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
107 }
108 
109 static uint16_t get_ip_csum(const uint16_t *start, int num_words)
110 {
111         unsigned long sum = 0;
112         int i;
113 
114         for (i = 0; i < num_words; i++)
115                 sum += start[i];
116 
117         while (sum >> 16)
118                 sum = (sum & 0xFFFF) + (sum >> 16);
119 
120         return ~sum;
121 }
122 
123 static int do_setcpu(int cpu)
124 {
125         cpu_set_t mask;
126 
127         CPU_ZERO(&mask);
128         CPU_SET(cpu, &mask);
129         if (sched_setaffinity(0, sizeof(mask), &mask))
130                 fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
131         else if (cfg_verbose)
132                 fprintf(stderr, "cpu: %u\n", cpu);
133 
134         return 0;
135 }
136 
137 static void do_setsockopt(int fd, int level, int optname, int val)
138 {
139         if (setsockopt(fd, level, optname, &val, sizeof(val)))
140                 error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
141 }
142 
143 static int do_poll(int fd, int events)
144 {
145         struct pollfd pfd;
146         int ret;
147 
148         pfd.events = events;
149         pfd.revents = 0;
150         pfd.fd = fd;
151 
152         ret = poll(&pfd, 1, cfg_waittime_ms);
153         if (ret == -1)
154                 error(1, errno, "poll");
155 
156         return ret && (pfd.revents & events);
157 }
158 
159 static int do_accept(int fd)
160 {
161         int fda = fd;
162 
163         fd = accept(fda, NULL, NULL);
164         if (fd == -1)
165                 error(1, errno, "accept");
166         if (close(fda))
167                 error(1, errno, "close listen sock");
168 
169         return fd;
170 }
171 
172 static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
173 {
174         struct cmsghdr *cm;
175 
176         if (!msg->msg_control)
177                 error(1, errno, "NULL cookie");
178         cm = (void *)msg->msg_control;
179         cm->cmsg_len = CMSG_LEN(sizeof(cookie));
180         cm->cmsg_level = SOL_RDS;
181         cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
182         memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
183 }
184 
185 static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
186 {
187         int ret, len, i, flags;
188         static uint32_t cookie;
189         char ckbuf[CMSG_SPACE(sizeof(cookie))];
190 
191         len = 0;
192         for (i = 0; i < msg->msg_iovlen; i++)
193                 len += msg->msg_iov[i].iov_len;
194 
195         flags = MSG_DONTWAIT;
196         if (do_zerocopy) {
197                 flags |= MSG_ZEROCOPY;
198                 if (domain == PF_RDS) {
199                         memset(&msg->msg_control, 0, sizeof(msg->msg_control));
200                         msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
201                         msg->msg_control = (struct cmsghdr *)ckbuf;
202                         add_zcopy_cookie(msg, ++cookie);
203                 }
204         }
205 
206         ret = sendmsg(fd, msg, flags);
207         if (ret == -1 && errno == EAGAIN)
208                 return false;
209         if (ret == -1)
210                 error(1, errno, "send");
211         if (cfg_verbose && ret != len)
212                 fprintf(stderr, "send: ret=%u != %u\n", ret, len);
213         sends_since_notify++;
214 
215         if (len) {
216                 packets++;
217                 bytes += ret;
218                 if (do_zerocopy && ret)
219                         expected_completions++;
220         }
221         if (do_zerocopy && domain == PF_RDS) {
222                 msg->msg_control = NULL;
223                 msg->msg_controllen = 0;
224         }
225 
226         return true;
227 }
228 
229 static void do_sendmsg_corked(int fd, struct msghdr *msg)
230 {
231         bool do_zerocopy = cfg_zerocopy;
232         int i, payload_len, extra_len;
233 
234         /* split up the packet. for non-multiple, make first buffer longer */
235         payload_len = cfg_payload_len / cfg_cork;
236         extra_len = cfg_payload_len - (cfg_cork * payload_len);
237 
238         do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
239 
240         for (i = 0; i < cfg_cork; i++) {
241 
242                 /* in mixed-frags mode, alternate zerocopy and copy frags
243                  * start with non-zerocopy, to ensure attach later works
244                  */
245                 if (cfg_cork_mixed)
246                         do_zerocopy = (i & 1);
247 
248                 msg->msg_iov[0].iov_len = payload_len + extra_len;
249                 extra_len = 0;
250 
251                 do_sendmsg(fd, msg, do_zerocopy,
252                            (cfg_dst_addr.ss_family == AF_INET ?
253                             PF_INET : PF_INET6));
254         }
255 
256         do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
257 }
258 
259 static int setup_iph(struct iphdr *iph, uint16_t payload_len)
260 {
261         struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
262         struct sockaddr_in *saddr = (void *) &cfg_src_addr;
263 
264         memset(iph, 0, sizeof(*iph));
265 
266         iph->version    = 4;
267         iph->tos        = 0;
268         iph->ihl        = 5;
269         iph->ttl        = 2;
270         iph->saddr      = saddr->sin_addr.s_addr;
271         iph->daddr      = daddr->sin_addr.s_addr;
272         iph->protocol   = IPPROTO_EGP;
273         iph->tot_len    = htons(sizeof(*iph) + payload_len);
274         iph->check      = get_ip_csum((void *) iph, iph->ihl << 1);
275 
276         return sizeof(*iph);
277 }
278 
279 static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
280 {
281         struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
282         struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
283 
284         memset(ip6h, 0, sizeof(*ip6h));
285 
286         ip6h->version           = 6;
287         ip6h->payload_len       = htons(payload_len);
288         ip6h->nexthdr           = IPPROTO_EGP;
289         ip6h->hop_limit         = 2;
290         ip6h->saddr             = saddr->sin6_addr;
291         ip6h->daddr             = daddr->sin6_addr;
292 
293         return sizeof(*ip6h);
294 }
295 
296 
297 static void setup_sockaddr(int domain, const char *str_addr,
298                            struct sockaddr_storage *sockaddr)
299 {
300         struct sockaddr_in6 *addr6 = (void *) sockaddr;
301         struct sockaddr_in *addr4 = (void *) sockaddr;
302 
303         switch (domain) {
304         case PF_INET:
305                 memset(addr4, 0, sizeof(*addr4));
306                 addr4->sin_family = AF_INET;
307                 addr4->sin_port = htons(cfg_port);
308                 if (str_addr &&
309                     inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
310                         error(1, 0, "ipv4 parse error: %s", str_addr);
311                 break;
312         case PF_INET6:
313                 memset(addr6, 0, sizeof(*addr6));
314                 addr6->sin6_family = AF_INET6;
315                 addr6->sin6_port = htons(cfg_port);
316                 if (str_addr &&
317                     inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
318                         error(1, 0, "ipv6 parse error: %s", str_addr);
319                 break;
320         default:
321                 error(1, 0, "illegal domain");
322         }
323 }
324 
325 static int do_setup_tx(int domain, int type, int protocol)
326 {
327         int fd;
328 
329         fd = socket(domain, type, protocol);
330         if (fd == -1)
331                 error(1, errno, "socket t");
332 
333         do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
334         if (cfg_zerocopy)
335                 do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
336 
337         if (domain != PF_PACKET && domain != PF_RDS)
338                 if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
339                         error(1, errno, "connect");
340 
341         if (domain == PF_RDS) {
342                 if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
343                         error(1, errno, "bind");
344         }
345 
346         return fd;
347 }
348 
349 static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
350 {
351         int i;
352 
353         if (ck->num > RDS_MAX_ZCOOKIES)
354                 error(1, 0, "Returned %d cookies, max expected %d\n",
355                       ck->num, RDS_MAX_ZCOOKIES);
356         for (i = 0; i < ck->num; i++)
357                 if (cfg_verbose >= 2)
358                         fprintf(stderr, "%d\n", ck->cookies[i]);
359         return ck->num;
360 }
361 
362 static bool do_recvmsg_completion(int fd)
363 {
364         char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
365         struct rds_zcopy_cookies *ck;
366         struct cmsghdr *cmsg;
367         struct msghdr msg;
368         bool ret = false;
369 
370         memset(&msg, 0, sizeof(msg));
371         msg.msg_control = cmsgbuf;
372         msg.msg_controllen = sizeof(cmsgbuf);
373 
374         if (recvmsg(fd, &msg, MSG_DONTWAIT))
375                 return ret;
376 
377         if (msg.msg_flags & MSG_CTRUNC)
378                 error(1, errno, "recvmsg notification: truncated");
379 
380         for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
381                 if (cmsg->cmsg_level == SOL_RDS &&
382                     cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
383 
384                         ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
385                         completions += do_process_zerocopy_cookies(ck);
386                         ret = true;
387                         break;
388                 }
389                 error(0, 0, "ignoring cmsg at level %d type %d\n",
390                             cmsg->cmsg_level, cmsg->cmsg_type);
391         }
392         return ret;
393 }
394 
395 static bool do_recv_completion(int fd, int domain)
396 {
397         struct sock_extended_err *serr;
398         struct msghdr msg = {};
399         struct cmsghdr *cm;
400         uint32_t hi, lo, range;
401         int ret, zerocopy;
402         char control[100];
403 
404         if (domain == PF_RDS)
405                 return do_recvmsg_completion(fd);
406 
407         msg.msg_control = control;
408         msg.msg_controllen = sizeof(control);
409 
410         ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
411         if (ret == -1 && errno == EAGAIN)
412                 return false;
413         if (ret == -1)
414                 error(1, errno, "recvmsg notification");
415         if (msg.msg_flags & MSG_CTRUNC)
416                 error(1, errno, "recvmsg notification: truncated");
417 
418         cm = CMSG_FIRSTHDR(&msg);
419         if (!cm)
420                 error(1, 0, "cmsg: no cmsg");
421         if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
422               (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
423               (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
424                 error(1, 0, "serr: wrong type: %d.%d",
425                       cm->cmsg_level, cm->cmsg_type);
426 
427         serr = (void *) CMSG_DATA(cm);
428 
429         if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
430                 error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
431         if (serr->ee_errno != 0)
432                 error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
433 
434         hi = serr->ee_data;
435         lo = serr->ee_info;
436         range = hi - lo + 1;
437 
438         /* Detect notification gaps. These should not happen often, if at all.
439          * Gaps can occur due to drops, reordering and retransmissions.
440          */
441         if (cfg_verbose && lo != next_completion)
442                 fprintf(stderr, "gap: %u..%u does not append to %u\n",
443                         lo, hi, next_completion);
444         next_completion = hi + 1;
445 
446         zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
447         if (zerocopied == -1)
448                 zerocopied = zerocopy;
449         else if (zerocopied != zerocopy) {
450                 fprintf(stderr, "serr: inconsistent\n");
451                 zerocopied = zerocopy;
452         }
453 
454         if (cfg_verbose >= 2)
455                 fprintf(stderr, "completed: %u (h=%u l=%u)\n",
456                         range, hi, lo);
457 
458         completions += range;
459         return true;
460 }
461 
462 /* Read all outstanding messages on the errqueue */
463 static void do_recv_completions(int fd, int domain)
464 {
465         while (do_recv_completion(fd, domain)) {}
466         sends_since_notify = 0;
467 }
468 
469 /* Wait for all remaining completions on the errqueue */
470 static void do_recv_remaining_completions(int fd, int domain)
471 {
472         int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
473 
474         while (completions < expected_completions &&
475                gettimeofday_ms() < tstop) {
476                 if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
477                         do_recv_completions(fd, domain);
478         }
479 
480         if (completions < expected_completions)
481                 fprintf(stderr, "missing notifications: %lu < %lu\n",
482                         completions, expected_completions);
483 }
484 
485 static void do_tx(int domain, int type, int protocol)
486 {
487         struct iovec iov[3] = { {0} };
488         struct sockaddr_ll laddr;
489         struct msghdr msg = {0};
490         struct ethhdr eth;
491         union {
492                 struct ipv6hdr ip6h;
493                 struct iphdr iph;
494         } nh;
495         uint64_t tstop;
496         int fd;
497 
498         fd = do_setup_tx(domain, type, protocol);
499 
500         if (domain == PF_PACKET) {
501                 uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
502 
503                 /* sock_raw passes ll header as data */
504                 if (type == SOCK_RAW) {
505                         memset(eth.h_dest, 0x06, ETH_ALEN);
506                         memset(eth.h_source, 0x02, ETH_ALEN);
507                         eth.h_proto = htons(proto);
508                         iov[0].iov_base = &eth;
509                         iov[0].iov_len = sizeof(eth);
510                         msg.msg_iovlen++;
511                 }
512 
513                 /* both sock_raw and sock_dgram expect name */
514                 memset(&laddr, 0, sizeof(laddr));
515                 laddr.sll_family        = AF_PACKET;
516                 laddr.sll_ifindex       = cfg_ifindex;
517                 laddr.sll_protocol      = htons(proto);
518                 laddr.sll_halen         = ETH_ALEN;
519 
520                 memset(laddr.sll_addr, 0x06, ETH_ALEN);
521 
522                 msg.msg_name            = &laddr;
523                 msg.msg_namelen         = sizeof(laddr);
524         }
525 
526         /* packet and raw sockets with hdrincl must pass network header */
527         if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
528                 if (cfg_family == PF_INET)
529                         iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
530                 else
531                         iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
532 
533                 iov[1].iov_base = (void *) &nh;
534                 msg.msg_iovlen++;
535         }
536 
537         if (domain == PF_RDS) {
538                 msg.msg_name = &cfg_dst_addr;
539                 msg.msg_namelen =  (cfg_dst_addr.ss_family == AF_INET ?
540                                     sizeof(struct sockaddr_in) :
541                                     sizeof(struct sockaddr_in6));
542         }
543 
544         iov[2].iov_base = payload;
545         iov[2].iov_len = cfg_payload_len;
546         msg.msg_iovlen++;
547         msg.msg_iov = &iov[3 - msg.msg_iovlen];
548 
549         tstop = gettimeofday_ms() + cfg_runtime_ms;
550         do {
551                 if (cfg_cork)
552                         do_sendmsg_corked(fd, &msg);
553                 else
554                         do_sendmsg(fd, &msg, cfg_zerocopy, domain);
555 
556                 if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit)
557                         do_recv_completions(fd, domain);
558 
559                 while (!do_poll(fd, POLLOUT)) {
560                         if (cfg_zerocopy)
561                                 do_recv_completions(fd, domain);
562                 }
563 
564         } while (gettimeofday_ms() < tstop);
565 
566         if (cfg_zerocopy)
567                 do_recv_remaining_completions(fd, domain);
568 
569         if (close(fd))
570                 error(1, errno, "close");
571 
572         fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
573                 packets, bytes >> 20, completions,
574                 zerocopied == 1 ? 'y' : 'n');
575 }
576 
577 static int do_setup_rx(int domain, int type, int protocol)
578 {
579         int fd;
580 
581         /* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
582          * to recv the only copy of the packet, not a clone
583          */
584         if (domain == PF_PACKET)
585                 error(1, 0, "Use PF_INET/SOCK_RAW to read");
586 
587         if (type == SOCK_RAW && protocol == IPPROTO_RAW)
588                 error(1, 0, "IPPROTO_RAW: not supported on Rx");
589 
590         fd = socket(domain, type, protocol);
591         if (fd == -1)
592                 error(1, errno, "socket r");
593 
594         do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
595         do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
596         do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
597 
598         if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
599                 error(1, errno, "bind");
600 
601         if (type == SOCK_STREAM) {
602                 if (listen(fd, 1))
603                         error(1, errno, "listen");
604                 fd = do_accept(fd);
605         }
606 
607         return fd;
608 }
609 
610 /* Flush all outstanding bytes for the tcp receive queue */
611 static void do_flush_tcp(int fd)
612 {
613         int ret;
614 
615         /* MSG_TRUNC flushes up to len bytes */
616         ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
617         if (ret == -1 && errno == EAGAIN)
618                 return;
619         if (ret == -1)
620                 error(1, errno, "flush");
621         if (!ret)
622                 return;
623 
624         packets++;
625         bytes += ret;
626 }
627 
628 /* Flush all outstanding datagrams. Verify first few bytes of each. */
629 static void do_flush_datagram(int fd, int type)
630 {
631         int ret, off = 0;
632         char buf[64];
633 
634         /* MSG_TRUNC will return full datagram length */
635         ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
636         if (ret == -1 && errno == EAGAIN)
637                 return;
638 
639         /* raw ipv4 return with header, raw ipv6 without */
640         if (cfg_family == PF_INET && type == SOCK_RAW) {
641                 off += sizeof(struct iphdr);
642                 ret -= sizeof(struct iphdr);
643         }
644 
645         if (ret == -1)
646                 error(1, errno, "recv");
647         if (ret != cfg_payload_len)
648                 error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
649         if (ret > sizeof(buf) - off)
650                 ret = sizeof(buf) - off;
651         if (memcmp(buf + off, payload, ret))
652                 error(1, 0, "recv: data mismatch");
653 
654         packets++;
655         bytes += cfg_payload_len;
656 }
657 
658 static void do_rx(int domain, int type, int protocol)
659 {
660         const int cfg_receiver_wait_ms = 400;
661         uint64_t tstop;
662         int fd;
663 
664         fd = do_setup_rx(domain, type, protocol);
665 
666         tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms;
667         do {
668                 if (type == SOCK_STREAM)
669                         do_flush_tcp(fd);
670                 else
671                         do_flush_datagram(fd, type);
672 
673                 do_poll(fd, POLLIN);
674 
675         } while (gettimeofday_ms() < tstop);
676 
677         if (close(fd))
678                 error(1, errno, "close");
679 
680         fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
681 }
682 
683 static void do_test(int domain, int type, int protocol)
684 {
685         int i;
686 
687         if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
688                 error(1, 0, "can only cork udp sockets");
689 
690         do_setcpu(cfg_cpu);
691 
692         for (i = 0; i < IP_MAXPACKET; i++)
693                 payload[i] = 'a' + (i % 26);
694 
695         if (cfg_rx)
696                 do_rx(domain, type, protocol);
697         else
698                 do_tx(domain, type, protocol);
699 }
700 
701 static void usage(const char *filepath)
702 {
703         error(1, 0, "Usage: %s [options] <test>", filepath);
704 }
705 
706 static void parse_opts(int argc, char **argv)
707 {
708         const int max_payload_len = sizeof(payload) -
709                                     sizeof(struct ipv6hdr) -
710                                     sizeof(struct tcphdr) -
711                                     40 /* max tcp options */;
712         int c;
713         char *daddr = NULL, *saddr = NULL;
714         char *cfg_test;
715 
716         cfg_payload_len = max_payload_len;
717 
718         while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
719                 switch (c) {
720                 case '4':
721                         if (cfg_family != PF_UNSPEC)
722                                 error(1, 0, "Pass one of -4 or -6");
723                         cfg_family = PF_INET;
724                         cfg_alen = sizeof(struct sockaddr_in);
725                         break;
726                 case '6':
727                         if (cfg_family != PF_UNSPEC)
728                                 error(1, 0, "Pass one of -4 or -6");
729                         cfg_family = PF_INET6;
730                         cfg_alen = sizeof(struct sockaddr_in6);
731                         break;
732                 case 'c':
733                         cfg_cork = strtol(optarg, NULL, 0);
734                         break;
735                 case 'C':
736                         cfg_cpu = strtol(optarg, NULL, 0);
737                         break;
738                 case 'D':
739                         daddr = optarg;
740                         break;
741                 case 'i':
742                         cfg_ifindex = if_nametoindex(optarg);
743                         if (cfg_ifindex == 0)
744                                 error(1, errno, "invalid iface: %s", optarg);
745                         break;
746                 case 'l':
747                         cfg_notification_limit = strtoul(optarg, NULL, 0);
748                         break;
749                 case 'm':
750                         cfg_cork_mixed = true;
751                         break;
752                 case 'p':
753                         cfg_port = strtoul(optarg, NULL, 0);
754                         break;
755                 case 'r':
756                         cfg_rx = true;
757                         break;
758                 case 's':
759                         cfg_payload_len = strtoul(optarg, NULL, 0);
760                         break;
761                 case 'S':
762                         saddr = optarg;
763                         break;
764                 case 't':
765                         cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
766                         break;
767                 case 'v':
768                         cfg_verbose++;
769                         break;
770                 case 'z':
771                         cfg_zerocopy = true;
772                         break;
773                 }
774         }
775 
776         cfg_test = argv[argc - 1];
777         if (strcmp(cfg_test, "rds") == 0) {
778                 if (!daddr)
779                         error(1, 0, "-D <server addr> required for PF_RDS\n");
780                 if (!cfg_rx && !saddr)
781                         error(1, 0, "-S <client addr> required for PF_RDS\n");
782         }
783         setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
784         setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
785 
786         if (cfg_payload_len > max_payload_len)
787                 error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
788         if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
789                 error(1, 0, "-m: cork_mixed requires corking and zerocopy");
790 
791         if (optind != argc - 1)
792                 usage(argv[0]);
793 }
794 
795 int main(int argc, char **argv)
796 {
797         const char *cfg_test;
798 
799         parse_opts(argc, argv);
800 
801         cfg_test = argv[argc - 1];
802 
803         if (!strcmp(cfg_test, "packet"))
804                 do_test(PF_PACKET, SOCK_RAW, 0);
805         else if (!strcmp(cfg_test, "packet_dgram"))
806                 do_test(PF_PACKET, SOCK_DGRAM, 0);
807         else if (!strcmp(cfg_test, "raw"))
808                 do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
809         else if (!strcmp(cfg_test, "raw_hdrincl"))
810                 do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
811         else if (!strcmp(cfg_test, "tcp"))
812                 do_test(cfg_family, SOCK_STREAM, 0);
813         else if (!strcmp(cfg_test, "udp"))
814                 do_test(cfg_family, SOCK_DGRAM, 0);
815         else if (!strcmp(cfg_test, "rds"))
816                 do_test(PF_RDS, SOCK_SEQPACKET, 0);
817         else
818                 error(1, 0, "unknown cfg_test %s", cfg_test);
819 
820         return 0;
821 }
822 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php