cd5385029f
xdping allows us to get latency estimates from XDP. Output looks like this: ./xdping -I eth4 192.168.55.8 Setting up XDP for eth4, please wait... XDP setup disrupts network connectivity, hit Ctrl+C to quit Normal ping RTT data [Ignore final RTT; it is distorted by XDP using the reply] PING 192.168.55.8 (192.168.55.8) from 192.168.55.7 eth4: 56(84) bytes of data. 64 bytes from 192.168.55.8: icmp_seq=1 ttl=64 time=0.302 ms 64 bytes from 192.168.55.8: icmp_seq=2 ttl=64 time=0.208 ms 64 bytes from 192.168.55.8: icmp_seq=3 ttl=64 time=0.163 ms 64 bytes from 192.168.55.8: icmp_seq=8 ttl=64 time=0.275 ms 4 packets transmitted, 4 received, 0% packet loss, time 3079ms rtt min/avg/max/mdev = 0.163/0.237/0.302/0.054 ms XDP RTT data: 64 bytes from 192.168.55.8: icmp_seq=5 ttl=64 time=0.02808 ms 64 bytes from 192.168.55.8: icmp_seq=6 ttl=64 time=0.02804 ms 64 bytes from 192.168.55.8: icmp_seq=7 ttl=64 time=0.02815 ms 64 bytes from 192.168.55.8: icmp_seq=8 ttl=64 time=0.02805 ms The xdping program loads the associated xdping_kern.o BPF program and attaches it to the specified interface. If run in client mode (the default), it will add a map entry keyed by the target IP address; this map will store RTT measurements, current sequence number etc. Finally in client mode the ping command is executed, and the xdping BPF program will use the last ICMP reply, reformulate it as an ICMP request with the next sequence number and XDP_TX it. After the reply to that request is received we can measure RTT and repeat until the desired number of measurements is made. This is why the sequence numbers in the normal ping are 1, 2, 3 and 8. We XDP_TX a modified version of ICMP reply 4 and keep doing this until we get the 4 replies we need; hence the networking stack only sees reply 8, where we have XDP_PASSed it upstream since we are done. In server mode (-s), xdping simply takes ICMP requests and replies to them in XDP rather than passing the request up to the networking stack. No map entry is required. xdping can be run in native XDP mode (the default, or specified via -N) or in skb mode (-S). A test program test_xdping.sh exercises some of these options. Note that native XDP does not seem to XDP_TX for veths, hence -N is not tested. Looking at the code, it looks like XDP_TX is supported so I'm not sure if that's expected. Running xdping in native mode for ixgbe as both client and server works fine. Changes since v4 - close fds on cleanup (Song Liu) Changes since v3 - fixed seq to be __be16 (Song Liu) - fixed fd checks in xdping.c (Song Liu) Changes since v2 - updated commit message to explain why seq number of last ICMP reply is 8 not 4 (Song Liu) - updated types of seq number, raddr and eliminated csum variable in xdpclient/xdpserver functions as it was not needed (Song Liu) - added XDPING_DEFAULT_COUNT definition and usage specification of default/max counts (Song Liu) Changes since v1 - moved from RFC to PATCH - removed unused variable in ipv4_csum() (Song Liu) - refactored ICMP checks into icmp_check() function called by client and server programs and reworked client and server programs due to lack of shared code (Song Liu) - added checks to ensure that SKB and native mode are not requested together (Song Liu) Signed-off-by: Alan Maguire <alan.maguire@oracle.com> Acked-by: Song Liu <songliubraving@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
259 lines
5.8 KiB
C
259 lines
5.8 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */
|
|
|
|
#include <linux/bpf.h>
|
|
#include <linux/if_link.h>
|
|
#include <arpa/inet.h>
|
|
#include <assert.h>
|
|
#include <errno.h>
|
|
#include <signal.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
#include <libgen.h>
|
|
#include <sys/resource.h>
|
|
#include <net/if.h>
|
|
#include <sys/types.h>
|
|
#include <sys/socket.h>
|
|
#include <netdb.h>
|
|
|
|
#include "bpf/bpf.h"
|
|
#include "bpf/libbpf.h"
|
|
|
|
#include "xdping.h"
|
|
|
|
static int ifindex;
|
|
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
|
|
|
|
static void cleanup(int sig)
|
|
{
|
|
bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
|
|
if (sig)
|
|
exit(1);
|
|
}
|
|
|
|
static int get_stats(int fd, __u16 count, __u32 raddr)
|
|
{
|
|
struct pinginfo pinginfo = { 0 };
|
|
char inaddrbuf[INET_ADDRSTRLEN];
|
|
struct in_addr inaddr;
|
|
__u16 i;
|
|
|
|
inaddr.s_addr = raddr;
|
|
|
|
printf("\nXDP RTT data:\n");
|
|
|
|
if (bpf_map_lookup_elem(fd, &raddr, &pinginfo)) {
|
|
perror("bpf_map_lookup elem: ");
|
|
return 1;
|
|
}
|
|
|
|
for (i = 0; i < count; i++) {
|
|
if (pinginfo.times[i] == 0)
|
|
break;
|
|
|
|
printf("64 bytes from %s: icmp_seq=%d ttl=64 time=%#.5f ms\n",
|
|
inet_ntop(AF_INET, &inaddr, inaddrbuf,
|
|
sizeof(inaddrbuf)),
|
|
count + i + 1,
|
|
(double)pinginfo.times[i]/1000000);
|
|
}
|
|
|
|
if (i < count) {
|
|
fprintf(stderr, "Expected %d samples, got %d.\n", count, i);
|
|
return 1;
|
|
}
|
|
|
|
bpf_map_delete_elem(fd, &raddr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void show_usage(const char *prog)
|
|
{
|
|
fprintf(stderr,
|
|
"usage: %s [OPTS] -I interface destination\n\n"
|
|
"OPTS:\n"
|
|
" -c count Stop after sending count requests\n"
|
|
" (default %d, max %d)\n"
|
|
" -I interface interface name\n"
|
|
" -N Run in driver mode\n"
|
|
" -s Server mode\n"
|
|
" -S Run in skb mode\n",
|
|
prog, XDPING_DEFAULT_COUNT, XDPING_MAX_COUNT);
|
|
}
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
__u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE;
|
|
struct addrinfo *a, hints = { .ai_family = AF_INET };
|
|
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
|
__u16 count = XDPING_DEFAULT_COUNT;
|
|
struct pinginfo pinginfo = { 0 };
|
|
const char *optstr = "c:I:NsS";
|
|
struct bpf_program *main_prog;
|
|
int prog_fd = -1, map_fd = -1;
|
|
struct sockaddr_in rin;
|
|
struct bpf_object *obj;
|
|
struct bpf_map *map;
|
|
char *ifname = NULL;
|
|
char filename[256];
|
|
int opt, ret = 1;
|
|
__u32 raddr = 0;
|
|
int server = 0;
|
|
char cmd[256];
|
|
|
|
while ((opt = getopt(argc, argv, optstr)) != -1) {
|
|
switch (opt) {
|
|
case 'c':
|
|
count = atoi(optarg);
|
|
if (count < 1 || count > XDPING_MAX_COUNT) {
|
|
fprintf(stderr,
|
|
"min count is 1, max count is %d\n",
|
|
XDPING_MAX_COUNT);
|
|
return 1;
|
|
}
|
|
break;
|
|
case 'I':
|
|
ifname = optarg;
|
|
ifindex = if_nametoindex(ifname);
|
|
if (!ifindex) {
|
|
fprintf(stderr, "Could not get interface %s\n",
|
|
ifname);
|
|
return 1;
|
|
}
|
|
break;
|
|
case 'N':
|
|
xdp_flags |= XDP_FLAGS_DRV_MODE;
|
|
break;
|
|
case 's':
|
|
/* use server program */
|
|
server = 1;
|
|
break;
|
|
case 'S':
|
|
xdp_flags |= XDP_FLAGS_SKB_MODE;
|
|
break;
|
|
default:
|
|
show_usage(basename(argv[0]));
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if (!ifname) {
|
|
show_usage(basename(argv[0]));
|
|
return 1;
|
|
}
|
|
if (!server && optind == argc) {
|
|
show_usage(basename(argv[0]));
|
|
return 1;
|
|
}
|
|
|
|
if ((xdp_flags & mode_flags) == mode_flags) {
|
|
fprintf(stderr, "-N or -S can be specified, not both.\n");
|
|
show_usage(basename(argv[0]));
|
|
return 1;
|
|
}
|
|
|
|
if (!server) {
|
|
/* Only supports IPv4; see hints initiailization above. */
|
|
if (getaddrinfo(argv[optind], NULL, &hints, &a) || !a) {
|
|
fprintf(stderr, "Could not resolve %s\n", argv[optind]);
|
|
return 1;
|
|
}
|
|
memcpy(&rin, a->ai_addr, sizeof(rin));
|
|
raddr = rin.sin_addr.s_addr;
|
|
freeaddrinfo(a);
|
|
}
|
|
|
|
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
|
|
perror("setrlimit(RLIMIT_MEMLOCK)");
|
|
return 1;
|
|
}
|
|
|
|
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
|
|
|
if (bpf_prog_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) {
|
|
fprintf(stderr, "load of %s failed\n", filename);
|
|
return 1;
|
|
}
|
|
|
|
main_prog = bpf_object__find_program_by_title(obj,
|
|
server ? "xdpserver" :
|
|
"xdpclient");
|
|
if (main_prog)
|
|
prog_fd = bpf_program__fd(main_prog);
|
|
if (!main_prog || prog_fd < 0) {
|
|
fprintf(stderr, "could not find xdping program");
|
|
return 1;
|
|
}
|
|
|
|
map = bpf_map__next(NULL, obj);
|
|
if (map)
|
|
map_fd = bpf_map__fd(map);
|
|
if (!map || map_fd < 0) {
|
|
fprintf(stderr, "Could not find ping map");
|
|
goto done;
|
|
}
|
|
|
|
signal(SIGINT, cleanup);
|
|
signal(SIGTERM, cleanup);
|
|
|
|
printf("Setting up XDP for %s, please wait...\n", ifname);
|
|
|
|
printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n");
|
|
|
|
if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
|
|
fprintf(stderr, "Link set xdp fd failed for %s\n", ifname);
|
|
goto done;
|
|
}
|
|
|
|
if (server) {
|
|
close(prog_fd);
|
|
close(map_fd);
|
|
printf("Running server on %s; press Ctrl+C to exit...\n",
|
|
ifname);
|
|
do { } while (1);
|
|
}
|
|
|
|
/* Start xdping-ing from last regular ping reply, e.g. for a count
|
|
* of 10 ICMP requests, we start xdping-ing using reply with seq number
|
|
* 10. The reason the last "real" ping RTT is much higher is that
|
|
* the ping program sees the ICMP reply associated with the last
|
|
* XDP-generated packet, so ping doesn't get a reply until XDP is done.
|
|
*/
|
|
pinginfo.seq = htons(count);
|
|
pinginfo.count = count;
|
|
|
|
if (bpf_map_update_elem(map_fd, &raddr, &pinginfo, BPF_ANY)) {
|
|
fprintf(stderr, "could not communicate with BPF map: %s\n",
|
|
strerror(errno));
|
|
cleanup(0);
|
|
goto done;
|
|
}
|
|
|
|
/* We need to wait for XDP setup to complete. */
|
|
sleep(10);
|
|
|
|
snprintf(cmd, sizeof(cmd), "ping -c %d -I %s %s",
|
|
count, ifname, argv[optind]);
|
|
|
|
printf("\nNormal ping RTT data\n");
|
|
printf("[Ignore final RTT; it is distorted by XDP using the reply]\n");
|
|
|
|
ret = system(cmd);
|
|
|
|
if (!ret)
|
|
ret = get_stats(map_fd, count, raddr);
|
|
|
|
cleanup(0);
|
|
|
|
done:
|
|
if (prog_fd > 0)
|
|
close(prog_fd);
|
|
if (map_fd > 0)
|
|
close(map_fd);
|
|
|
|
return ret;
|
|
}
|