1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-03 01:18:10 +03:00
samba-mirror/ctdb/common/system_socket.c
Martin Schwenke 0536d7a98b ctdb-common: Reimplement ctdb_sys_have_ip() using new infrastructure
It can now be used when net.ipv4.ip_nonlocal_bind=1.

This makes the recovery daemon's local IP verification inefficient.
It can be optimised in a subsequent commit.

Fall back to bind() if unable to fetch IPs.

Signed-off-by: Martin Schwenke <mschwenke@ddn.com>
Reviewed-by: John Mulligan <jmulligan@redhat.com>
Reviewed-by: Anoop C S <anoopcs@samba.org>
2024-10-07 15:58:38 +00:00

1264 lines
27 KiB
C

/*
ctdb system specific code to manage raw sockets on linux
Copyright (C) Ronnie Sahlberg 2007
Copyright (C) Andrew Tridgell 2007
Copyright (C) Marc Dequènes (Duck) 2009
Copyright (C) Volker Lendecke 2012
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "replace.h"
/*
* Use BSD struct tcphdr field names for portability. Modern glibc
* makes them available by default via <netinet/tcp.h> but older glibc
* requires __FAVOR_BSD to be defined.
*
* __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE
* (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not
* set. Including "replace.h" above causes <features.h> to be
* indirectly included and this will not set __FAVOR_BSD because
* _GNU_SOURCE is set in Samba's "config.h" (which is included by
* "replace.h").
*
* Therefore, set __FAVOR_BSD by hand below.
*/
#define __FAVOR_BSD 1
#include "system/network.h"
#ifdef HAVE_NETINET_IF_ETHER_H
#include <netinet/if_ether.h>
#endif
#ifdef HAVE_NETINET_IP6_H
#include <netinet/ip6.h>
#endif
#ifdef HAVE_NETINET_ICMP6_H
#include <netinet/icmp6.h>
#endif
#ifdef HAVE_LINUX_IF_PACKET_H
#include <linux/if_packet.h>
#endif
#ifndef ETHERTYPE_IP6
#define ETHERTYPE_IP6 0x86dd
#endif
#include <talloc.h>
#include "lib/util/debug.h"
#include "lib/util/blocking.h"
#include "protocol/protocol.h"
#include "protocol/protocol_util.h"
#include "common/logging.h"
#include "common/system_socket.h"
/*
uint16 checksum for n bytes
*/
static uint32_t uint16_checksum(uint8_t *data, size_t n)
{
uint32_t sum=0;
uint16_t value;
while (n>=2) {
memcpy(&value, data, 2);
sum += (uint32_t)ntohs(value);
data += 2;
n -= 2;
}
if (n == 1) {
sum += (uint32_t)ntohs(*data);
}
return sum;
}
struct ctdb_sys_local_ips_context {
struct ifaddrs *ifa;
};
static int ctdb_sys_local_ips_destructor(
struct ctdb_sys_local_ips_context *ips_ctx)
{
freeifaddrs(ips_ctx->ifa);
ips_ctx->ifa = NULL;
return 0;
}
int ctdb_sys_local_ips_init(TALLOC_CTX *ctx,
struct ctdb_sys_local_ips_context **ips_ctx)
{
struct ctdb_sys_local_ips_context *t = NULL;
int ret = 0;
t = talloc(ctx, struct ctdb_sys_local_ips_context);
if (t == NULL) {
return ENOMEM;
}
ret = getifaddrs(&t->ifa);
if (ret != 0) {
ret = errno;
talloc_free(t);
return ret;
}
talloc_set_destructor(t, ctdb_sys_local_ips_destructor);
*ips_ctx = t;
return ret;
}
bool ctdb_sys_local_ip_check(const struct ctdb_sys_local_ips_context *ips_ctx,
const ctdb_sock_addr *addr)
{
struct ifaddrs *ifa = NULL;
int ret;
for (ifa = ips_ctx->ifa; ifa != NULL; ifa = ifa->ifa_next) {
ctdb_sock_addr sock_addr;
bool match;
if (ifa->ifa_addr == NULL)
continue;
/* Ignore non-IPv4/IPv6 interfaces */
switch (ifa->ifa_addr->sa_family) {
case AF_INET:
case AF_INET6:
break;
default:
continue;
}
ret = ctdb_sock_addr_from_sockaddr(ifa->ifa_addr, &sock_addr);
if (ret != 0) {
return false;
}
match = ctdb_sock_addr_same_ip(&sock_addr, addr);
if (match) {
return true;
}
}
return false;
}
bool ctdb_sys_bind_ip_check(const ctdb_sock_addr *_addr)
{
int s;
int ret;
ctdb_sock_addr __addr = *_addr;
ctdb_sock_addr *addr = &__addr;
socklen_t addrlen = 0;
switch (addr->sa.sa_family) {
case AF_INET:
addr->ip.sin_port = 0;
addrlen = sizeof(struct sockaddr_in);
break;
case AF_INET6:
addr->ip6.sin6_port = 0;
addrlen = sizeof(struct sockaddr_in6);
break;
}
s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
if (s == -1) {
return false;
}
ret = bind(s, (struct sockaddr *)addr, addrlen);
close(s);
return ret == 0;
}
/*
* See if the given IP is currently on an interface
*/
bool ctdb_sys_have_ip(const ctdb_sock_addr *addr)
{
struct ctdb_sys_local_ips_context *ips_ctx = NULL;
bool have_ip;
int ret;
ret = ctdb_sys_local_ips_init(NULL, &ips_ctx);
if (ret != 0) {
DBG_DEBUG("Failed to get local addresses, depending on bind\n");
have_ip = ctdb_sys_bind_ip_check(addr);
return have_ip;
}
have_ip = ctdb_sys_local_ip_check(ips_ctx, addr);
talloc_free(ips_ctx);
return have_ip;
}
/*
* simple TCP checksum - assumes data is multiple of 2 bytes long
*/
static uint16_t ip_checksum(uint8_t *data, size_t n, struct ip *ip)
{
uint32_t sum = uint16_checksum(data, n);
uint16_t sum2;
sum += uint16_checksum((uint8_t *)&ip->ip_src, sizeof(ip->ip_src));
sum += uint16_checksum((uint8_t *)&ip->ip_dst, sizeof(ip->ip_dst));
sum += ip->ip_p + n;
sum = (sum & 0xFFFF) + (sum >> 16);
sum = (sum & 0xFFFF) + (sum >> 16);
sum2 = htons(sum);
sum2 = ~sum2;
if (sum2 == 0) {
return 0xFFFF;
}
return sum2;
}
static uint16_t ip6_checksum(uint8_t *data, size_t n, struct ip6_hdr *ip6)
{
uint16_t phdr[3];
uint32_t sum = 0;
uint16_t sum2;
uint32_t len;
sum += uint16_checksum((uint8_t *)&ip6->ip6_src, 16);
sum += uint16_checksum((uint8_t *)&ip6->ip6_dst, 16);
len = htonl(n);
phdr[0] = len & UINT16_MAX;
phdr[1] = (len >> 16) & UINT16_MAX;
/* ip6_nxt is only 8 bits, so fits comfortably into a uint16_t */
phdr[2] = htons(ip6->ip6_nxt);
sum += uint16_checksum((uint8_t *)phdr, sizeof(phdr));
sum += uint16_checksum(data, n);
sum = (sum & 0xFFFF) + (sum >> 16);
sum = (sum & 0xFFFF) + (sum >> 16);
sum2 = htons(sum);
sum2 = ~sum2;
if (sum2 == 0) {
return 0xFFFF;
}
return sum2;
}
/*
* Send gratuitous ARP request/reply or IPv6 neighbor advertisement
*/
#ifdef HAVE_PACKETSOCKET
/*
* Create IPv4 ARP requests/replies or IPv6 neighbour advertisement
* packets
*/
#define ARP_STRUCT_SIZE sizeof(struct ether_header) + \
sizeof(struct ether_arp)
#define IP6_NA_STRUCT_SIZE sizeof(struct ether_header) + \
sizeof(struct ip6_hdr) + \
sizeof(struct nd_neighbor_advert) + \
sizeof(struct nd_opt_hdr) + \
sizeof(struct ether_addr)
#define ARP_BUFFER_SIZE MAX(ARP_STRUCT_SIZE, 64)
#define IP6_NA_BUFFER_SIZE MAX(IP6_NA_STRUCT_SIZE, 64)
static int arp_build(uint8_t *buffer,
size_t buflen,
const struct sockaddr_in *addr,
const struct ether_addr *hwaddr,
bool reply,
struct ether_addr **ether_dhost,
size_t *len)
{
size_t l = ARP_BUFFER_SIZE;
struct ether_header *eh;
struct ether_arp *ea;
struct arphdr *ah;
if (addr->sin_family != AF_INET) {
return EINVAL;
}
if (buflen < l) {
return EMSGSIZE;
}
memset(buffer, 0 , l);
eh = (struct ether_header *)buffer;
memset(eh->ether_dhost, 0xff, ETH_ALEN);
memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
eh->ether_type = htons(ETHERTYPE_ARP);
ea = (struct ether_arp *)(buffer + sizeof(struct ether_header));
ah = &ea->ea_hdr;
ah->ar_hrd = htons(ARPHRD_ETHER);
ah->ar_pro = htons(ETH_P_IP);
ah->ar_hln = ETH_ALEN;
ah->ar_pln = sizeof(ea->arp_spa);
if (! reply) {
ah->ar_op = htons(ARPOP_REQUEST);
memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
memset(ea->arp_tha, 0, ETH_ALEN);
memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
} else {
ah->ar_op = htons(ARPOP_REPLY);
memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
memcpy(ea->arp_tha, hwaddr, ETH_ALEN);
memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
}
*ether_dhost = (struct ether_addr *)eh->ether_dhost;
*len = l;
return 0;
}
static int ip6_na_build(uint8_t *buffer,
size_t buflen,
const struct sockaddr_in6 *addr,
const struct ether_addr *hwaddr,
struct ether_addr **ether_dhost,
size_t *len)
{
size_t l = IP6_NA_BUFFER_SIZE;
struct ether_header *eh;
struct ip6_hdr *ip6;
struct nd_neighbor_advert *nd_na;
struct nd_opt_hdr *nd_oh;
struct ether_addr *ea;
int ret;
if (addr->sin6_family != AF_INET6) {
return EINVAL;
}
if (buflen < l) {
return EMSGSIZE;
}
memset(buffer, 0 , l);
eh = (struct ether_header *)buffer;
/*
* Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
* section 7) - note memset 0 above!
*/
eh->ether_dhost[0] = 0x33;
eh->ether_dhost[1] = 0x33;
eh->ether_dhost[5] = 0x01;
memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
eh->ether_type = htons(ETHERTYPE_IP6);
ip6 = (struct ip6_hdr *)(buffer + sizeof(struct ether_header));
ip6->ip6_vfc = 6 << 4;
ip6->ip6_plen = htons(sizeof(struct nd_neighbor_advert) +
sizeof(struct nd_opt_hdr) +
ETH_ALEN);
ip6->ip6_nxt = IPPROTO_ICMPV6;
ip6->ip6_hlim = 255;
ip6->ip6_src = addr->sin6_addr;
/* all-nodes multicast */
ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
if (ret != 1) {
return EIO;
}
nd_na = (struct nd_neighbor_advert *)(buffer +
sizeof(struct ether_header) +
sizeof(struct ip6_hdr));
nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
nd_na->nd_na_code = 0;
nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
nd_na->nd_na_target = addr->sin6_addr;
/* Option: Target link-layer address */
nd_oh = (struct nd_opt_hdr *)(buffer +
sizeof(struct ether_header) +
sizeof(struct ip6_hdr) +
sizeof(struct nd_neighbor_advert));
nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
nd_oh->nd_opt_len = 1; /* multiple of 8 octets */
ea = (struct ether_addr *)(buffer +
sizeof(struct ether_header) +
sizeof(struct ip6_hdr) +
sizeof(struct nd_neighbor_advert) +
sizeof(struct nd_opt_hdr));
memcpy(ea, hwaddr, ETH_ALEN);
nd_na->nd_na_cksum = ip6_checksum((uint8_t *)nd_na,
ntohs(ip6->ip6_plen),
ip6);
*ether_dhost = (struct ether_addr *)eh->ether_dhost;
*len = l;
return 0;
}
int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
{
int s;
struct sockaddr_ll sall = {0};
struct ifreq if_hwaddr = {
.ifr_ifru = {
.ifru_flags = 0
},
};
uint8_t buffer[MAX(ARP_BUFFER_SIZE, IP6_NA_BUFFER_SIZE)];
struct ifreq ifr = {
.ifr_ifru = {
.ifru_flags = 0
},
};
struct ether_addr *hwaddr = NULL;
struct ether_addr *ether_dhost = NULL;
size_t len = 0;
int ret = 0;
s = socket(AF_PACKET, SOCK_RAW, 0);
if (s == -1) {
ret = errno;
DBG_ERR("Failed to open raw socket\n");
return ret;
}
DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
/* Find interface */
strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
ret = errno;
DBG_ERR("Interface '%s' not found\n", iface);
goto fail;
}
/* Get MAC address */
strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
if ( ret < 0 ) {
ret = errno;
DBG_ERR("ioctl failed\n");
goto fail;
}
if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
ret = 0;
D_DEBUG("Ignoring loopback arp request\n");
goto fail;
}
if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
ret = EINVAL;
DBG_ERR("Not an ethernet address family (0x%x)\n",
if_hwaddr.ifr_hwaddr.sa_family);
goto fail;;
}
/* Set up most of destination address structure */
sall.sll_family = AF_PACKET;
sall.sll_halen = sizeof(struct ether_addr);
sall.sll_protocol = htons(ETH_P_ALL);
sall.sll_ifindex = ifr.ifr_ifindex;
/* For clarity */
hwaddr = (struct ether_addr *)if_hwaddr.ifr_hwaddr.sa_data;
switch (addr->ip.sin_family) {
case AF_INET:
/* Send gratuitous ARP */
ret = arp_build(buffer,
sizeof(buffer),
&addr->ip,
hwaddr,
false,
&ether_dhost,
&len);
if (ret != 0) {
DBG_ERR("Failed to build ARP request\n");
goto fail;
}
memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
ret = sendto(s,
buffer,
len,
0,
(struct sockaddr *)&sall,
sizeof(sall));
if (ret < 0 ) {
ret = errno;
DBG_ERR("Failed sendto\n");
goto fail;
}
/* Send unsolicited ARP reply */
ret = arp_build(buffer,
sizeof(buffer),
&addr->ip,
hwaddr,
true,
&ether_dhost,
&len);
if (ret != 0) {
DBG_ERR("Failed to build ARP reply\n");
goto fail;
}
memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
ret = sendto(s,
buffer,
len,
0,
(struct sockaddr *)&sall,
sizeof(sall));
if (ret < 0 ) {
ret = errno;
DBG_ERR("Failed sendto\n");
goto fail;
}
close(s);
break;
case AF_INET6:
ret = ip6_na_build(buffer,
sizeof(buffer),
&addr->ip6,
hwaddr,
&ether_dhost,
&len);
if (ret != 0) {
DBG_ERR("Failed to build IPv6 neighbor advertisement\n");
goto fail;
}
memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
ret = sendto(s,
buffer,
len,
0,
(struct sockaddr *)&sall,
sizeof(sall));
if (ret < 0 ) {
ret = errno;
DBG_ERR("Failed sendto\n");
goto fail;
}
close(s);
break;
default:
ret = EINVAL;
DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n",
addr->ip.sin_family);
goto fail;
}
return 0;
fail:
close(s);
return ret;
}
#else /* HAVE_PACKETSOCKET */
int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
{
/* Not implemented */
return ENOSYS;
}
#endif /* HAVE_PACKETSOCKET */
#define IP4_TCP_BUFFER_SIZE sizeof(struct ip) + \
sizeof(struct tcphdr)
#define IP6_TCP_BUFFER_SIZE sizeof(struct ip6_hdr) + \
sizeof(struct tcphdr)
static int tcp4_build(uint8_t *buf,
size_t buflen,
const struct sockaddr_in *src,
const struct sockaddr_in *dst,
uint32_t seq,
uint32_t ack,
int rst,
size_t *len)
{
size_t l = IP4_TCP_BUFFER_SIZE;
struct {
struct ip ip;
struct tcphdr tcp;
} *ip4pkt;
if (l != sizeof(*ip4pkt)) {
return EMSGSIZE;
}
if (buflen < l) {
return EMSGSIZE;
}
ip4pkt = (void *)buf;
memset(ip4pkt, 0, l);
ip4pkt->ip.ip_v = 4;
ip4pkt->ip.ip_hl = sizeof(ip4pkt->ip)/sizeof(uint32_t);
ip4pkt->ip.ip_len = htons(sizeof(ip4pkt));
ip4pkt->ip.ip_ttl = 255;
ip4pkt->ip.ip_p = IPPROTO_TCP;
ip4pkt->ip.ip_src.s_addr = src->sin_addr.s_addr;
ip4pkt->ip.ip_dst.s_addr = dst->sin_addr.s_addr;
ip4pkt->ip.ip_sum = 0;
ip4pkt->tcp.th_sport = src->sin_port;
ip4pkt->tcp.th_dport = dst->sin_port;
ip4pkt->tcp.th_seq = seq;
ip4pkt->tcp.th_ack = ack;
ip4pkt->tcp.th_flags = 0;
ip4pkt->tcp.th_flags |= TH_ACK;
if (rst) {
ip4pkt->tcp.th_flags |= TH_RST;
}
ip4pkt->tcp.th_off = sizeof(ip4pkt->tcp)/sizeof(uint32_t);
/* this makes it easier to spot in a sniffer */
ip4pkt->tcp.th_win = htons(1234);
ip4pkt->tcp.th_sum = ip_checksum((uint8_t *)&ip4pkt->tcp,
sizeof(ip4pkt->tcp),
&ip4pkt->ip);
*len = l;
return 0;
}
static int tcp6_build(uint8_t *buf,
size_t buflen,
const struct sockaddr_in6 *src,
const struct sockaddr_in6 *dst,
uint32_t seq,
uint32_t ack,
int rst,
size_t *len)
{
size_t l = IP6_TCP_BUFFER_SIZE;
struct {
struct ip6_hdr ip6;
struct tcphdr tcp;
} *ip6pkt;
if (l != sizeof(*ip6pkt)) {
return EMSGSIZE;
}
if (buflen < l) {
return EMSGSIZE;
}
ip6pkt = (void *)buf;
memset(ip6pkt, 0, l);
ip6pkt->ip6.ip6_vfc = 6 << 4;
ip6pkt->ip6.ip6_plen = htons(sizeof(struct tcphdr));
ip6pkt->ip6.ip6_nxt = IPPROTO_TCP;
ip6pkt->ip6.ip6_hlim = 64;
ip6pkt->ip6.ip6_src = src->sin6_addr;
ip6pkt->ip6.ip6_dst = dst->sin6_addr;
ip6pkt->tcp.th_sport = src->sin6_port;
ip6pkt->tcp.th_dport = dst->sin6_port;
ip6pkt->tcp.th_seq = seq;
ip6pkt->tcp.th_ack = ack;
ip6pkt->tcp.th_flags = 0;
ip6pkt->tcp.th_flags |= TH_ACK;
if (rst) {
ip6pkt->tcp.th_flags |= TH_RST;
}
ip6pkt->tcp.th_off = sizeof(ip6pkt->tcp)/sizeof(uint32_t);
/* this makes it easier to spot in a sniffer */
ip6pkt->tcp.th_win = htons(1234);
ip6pkt->tcp.th_sum = ip6_checksum((uint8_t *)&ip6pkt->tcp,
sizeof(ip6pkt->tcp),
&ip6pkt->ip6);
*len = l;
return 0;
}
/*
* Send tcp segment from the specified IP/port to the specified
* destination IP/port.
*
* This is used to trigger the receiving host into sending its own ACK,
* which should trigger early detection of TCP reset by the client
* after IP takeover
*
* This can also be used to send RST segments (if rst is true) and also
* if correct seq and ack numbers are provided.
*/
int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
const ctdb_sock_addr *src,
uint32_t seq,
uint32_t ack,
int rst)
{
uint8_t buf[MAX(IP4_TCP_BUFFER_SIZE, IP6_TCP_BUFFER_SIZE)];
size_t len = 0;
int ret;
int s;
uint32_t one = 1;
struct sockaddr_in6 tmpdest = { 0 };
int saved_errno;
switch (src->ip.sin_family) {
case AF_INET:
ret = tcp4_build(buf,
sizeof(buf),
&src->ip,
&dest->ip,
seq,
ack,
rst,
&len);
if (ret != 0) {
DBG_ERR("Failed to build TCP packet (%d)\n", ret);
return ret;
}
/* open a raw socket to send this segment from */
s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
if (s == -1) {
DBG_ERR("Failed to open raw socket (%s)\n",
strerror(errno));
return -1;
}
ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
if (ret != 0) {
DBG_ERR("Failed to setup IP headers (%s)\n",
strerror(errno));
close(s);
return -1;
}
ret = sendto(s,
buf,
len,
0,
(const struct sockaddr *)&dest->ip,
sizeof(dest->ip));
saved_errno = errno;
close(s);
if (ret == -1) {
D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
return -1;
}
if ((size_t)ret != len) {
DBG_ERR("Failed sendto - didn't send full packet\n");
return -1;
}
break;
case AF_INET6:
ret = tcp6_build(buf,
sizeof(buf),
&src->ip6,
&dest->ip6,
seq,
ack,
rst,
&len);
if (ret != 0) {
DBG_ERR("Failed to build TCP packet (%d)\n", ret);
return ret;
}
s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
if (s == -1) {
DBG_ERR("Failed to open sending socket\n");
return -1;
}
/*
* sendto() on an IPv6 raw socket requires the port to
* be either 0 or a protocol value
*/
tmpdest = dest->ip6;
tmpdest.sin6_port = 0;
ret = sendto(s,
buf,
len,
0,
(const struct sockaddr *)&tmpdest,
sizeof(tmpdest));
saved_errno = errno;
close(s);
if (ret == -1) {
D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
return -1;
}
if ((size_t)ret != len) {
DBG_ERR("Failed sendto - didn't send full packet\n");
return -1;
}
break;
default:
DBG_ERR("Not an ipv4/v6 address\n");
return -1;
}
return 0;
}
static int tcp4_extract(const uint8_t *ip_pkt,
size_t pktlen,
struct sockaddr_in *src,
struct sockaddr_in *dst,
uint32_t *ack_seq,
uint32_t *seq,
int *rst,
uint16_t *window)
{
const struct ip *ip;
const struct tcphdr *tcp;
if (pktlen < sizeof(struct ip)) {
return EMSGSIZE;
}
ip = (const struct ip *)ip_pkt;
/* IPv4 only */
if (ip->ip_v != 4) {
return ENOMSG;
}
/* Don't look at fragments */
if ((ntohs(ip->ip_off)&0x1fff) != 0) {
return ENOMSG;
}
/* TCP only */
if (ip->ip_p != IPPROTO_TCP) {
return ENOMSG;
}
/* Ensure there is enough of the packet to gather required fields */
if (pktlen <
(ip->ip_hl * sizeof(uint32_t)) + offsetof(struct tcphdr, th_sum)) {
return EMSGSIZE;
}
tcp = (const struct tcphdr *)(ip_pkt + (ip->ip_hl * sizeof(uint32_t)));
src->sin_family = AF_INET;
src->sin_addr.s_addr = ip->ip_src.s_addr;
src->sin_port = tcp->th_sport;
dst->sin_family = AF_INET;
dst->sin_addr.s_addr = ip->ip_dst.s_addr;
dst->sin_port = tcp->th_dport;
*ack_seq = tcp->th_ack;
*seq = tcp->th_seq;
if (window != NULL) {
*window = tcp->th_win;
}
if (rst != NULL) {
*rst = tcp->th_flags & TH_RST;
}
return 0;
}
static int tcp6_extract(const uint8_t *ip_pkt,
size_t pktlen,
struct sockaddr_in6 *src,
struct sockaddr_in6 *dst,
uint32_t *ack_seq,
uint32_t *seq,
int *rst,
uint16_t *window)
{
const struct ip6_hdr *ip6;
const struct tcphdr *tcp;
/* Ensure there is enough of the packet to gather required fields */
if (pktlen < sizeof(struct ip6_hdr) + offsetof(struct tcphdr, th_sum)) {
return EMSGSIZE;
}
ip6 = (const struct ip6_hdr *)ip_pkt;
/* IPv6 only */
if ((ip6->ip6_vfc >> 4) != 6){
return ENOMSG;
}
/* TCP only */
if (ip6->ip6_nxt != IPPROTO_TCP) {
return ENOMSG;
}
tcp = (const struct tcphdr *)(ip_pkt + sizeof(struct ip6_hdr));
src->sin6_family = AF_INET6;
src->sin6_port = tcp->th_sport;
src->sin6_addr = ip6->ip6_src;
dst->sin6_family = AF_INET6;
dst->sin6_port = tcp->th_dport;
dst->sin6_addr = ip6->ip6_dst;
*ack_seq = tcp->th_ack;
*seq = tcp->th_seq;
if (window != NULL) {
*window = tcp->th_win;
}
if (rst != NULL) {
*rst = tcp->th_flags & TH_RST;
}
return 0;
}
/*
* Packet capture
*
* If AF_PACKET is available then use a raw socket otherwise use pcap.
* wscript has checked to make sure that pcap is available if needed.
*/
#if defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP)
/*
* This function is used to open a raw socket to capture from
*/
int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
{
int s, ret;
/* Open a socket to capture all traffic */
s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (s == -1) {
DBG_ERR("Failed to open raw socket\n");
return -1;
}
DBG_DEBUG("Opened raw socket for TCP tickle capture (fd=%d)\n", s);
ret = set_blocking(s, false);
if (ret != 0) {
DBG_ERR("Failed to set socket non-blocking (%s)\n",
strerror(errno));
close(s);
return -1;
}
set_close_on_exec(s);
return s;
}
/*
* This function is used to do any additional cleanup required when closing
* a capture socket.
* Note that the socket itself is closed automatically in the caller.
*/
int ctdb_sys_close_capture_socket(void *private_data)
{
return 0;
}
/*
* called when the raw socket becomes readable
*/
int ctdb_sys_read_tcp_packet(int s, void *private_data,
ctdb_sock_addr *src,
ctdb_sock_addr *dst,
uint32_t *ack_seq,
uint32_t *seq,
int *rst,
uint16_t *window)
{
ssize_t nread;
uint8_t pkt[100]; /* Large enough for simple ACK/RST packets */
struct ether_header *eth;
int ret;
nread = recv(s, pkt, sizeof(pkt), MSG_TRUNC);
if (nread == -1) {
return errno;
}
if ((size_t)nread < sizeof(*eth)) {
return EMSGSIZE;
}
ZERO_STRUCTP(src);
ZERO_STRUCTP(dst);
/* Ethernet */
eth = (struct ether_header *)pkt;
/* we want either IPv4 or IPv6 */
if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
ret = tcp4_extract(pkt + sizeof(struct ether_header),
(size_t)nread - sizeof(struct ether_header),
&src->ip,
&dst->ip,
ack_seq,
seq,
rst,
window);
return ret;
} else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
ret = tcp6_extract(pkt + sizeof(struct ether_header),
(size_t)nread - sizeof(struct ether_header),
&src->ip6,
&dst->ip6,
ack_seq,
seq,
rst,
window);
return ret;
}
return ENOMSG;
}
#else /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */
#include <pcap.h>
/*
* Assume this exists if pcap.h exists - it has been around for a
* while
*/
#include <pcap/sll.h>
int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
{
char errbuf[PCAP_ERRBUF_SIZE];
pcap_t *pt;
int pcap_packet_type;
const char *t = NULL;
int fd;
int ret;
pt = pcap_create(iface, errbuf);
if (pt == NULL) {
DBG_ERR("Failed to open pcap capture device %s (%s)\n",
iface,
errbuf);
return -1;
}
/*
* pcap isn't very clear about defaults...
*/
ret = pcap_set_snaplen(pt, 100);
if (ret < 0) {
DBG_ERR("Failed to set snaplen for pcap capture\n");
goto fail;
}
ret = pcap_set_promisc(pt, 0);
if (ret < 0) {
DBG_ERR("Failed to unset promiscuous mode for pcap capture\n");
goto fail;
}
ret = pcap_set_timeout(pt, 0);
if (ret < 0) {
DBG_ERR("Failed to set timeout for pcap capture\n");
goto fail;
}
#ifdef HAVE_PCAP_SET_IMMEDIATE_MODE
ret = pcap_set_immediate_mode(pt, 1);
if (ret < 0) {
DBG_ERR("Failed to set immediate mode for pcap capture\n");
goto fail;
}
#endif
ret = pcap_activate(pt);
if (ret < 0) {
DBG_ERR("Failed to activate pcap capture\n");
goto fail;
}
pcap_packet_type = pcap_datalink(pt);
switch (pcap_packet_type) {
case DLT_EN10MB:
t = "DLT_EN10MB";
break;
case DLT_LINUX_SLL:
t = "DLT_LINUX_SLL";
break;
#ifdef DLT_LINUX_SLL2
case DLT_LINUX_SLL2:
t = "DLT_LINUX_SLL2";
break;
#endif /* DLT_LINUX_SLL2 */
default:
DBG_ERR("Unknown pcap packet type %d\n", pcap_packet_type);
goto fail;
}
fd = pcap_get_selectable_fd(pt);
DBG_DEBUG("Opened pcap capture for TCP tickle (type=%s, fd=%d)\n",
t,
fd);
*((pcap_t **)private_data) = pt;
return fd;
fail:
pcap_close(pt);
return -1;
}
int ctdb_sys_close_capture_socket(void *private_data)
{
pcap_t *pt = (pcap_t *)private_data;
pcap_close(pt);
return 0;
}
int ctdb_sys_read_tcp_packet(int s,
void *private_data,
ctdb_sock_addr *src,
ctdb_sock_addr *dst,
uint32_t *ack_seq,
uint32_t *seq,
int *rst,
uint16_t *window)
{
int ret;
struct pcap_pkthdr pkthdr;
const u_char *buffer;
pcap_t *pt = (pcap_t *)private_data;
int pcap_packet_type;
uint16_t ether_type;
size_t ll_hdr_len;
buffer=pcap_next(pt, &pkthdr);
if (buffer==NULL) {
return ENOMSG;
}
ZERO_STRUCTP(src);
ZERO_STRUCTP(dst);
pcap_packet_type = pcap_datalink(pt);
switch (pcap_packet_type) {
case DLT_EN10MB: {
const struct ether_header *eth =
(const struct ether_header *)buffer;
ether_type = ntohs(eth->ether_type);
ll_hdr_len = sizeof(struct ether_header);
break;
}
case DLT_LINUX_SLL: {
const struct sll_header *sll =
(const struct sll_header *)buffer;
uint16_t arphrd_type = ntohs(sll->sll_hatype);
switch (arphrd_type) {
case ARPHRD_ETHER:
case ARPHRD_INFINIBAND:
break;
default:
DBG_DEBUG("SLL: Unknown arphrd_type %"PRIu16"\n",
arphrd_type);
return EPROTONOSUPPORT;
}
ether_type = ntohs(sll->sll_protocol);
ll_hdr_len = SLL_HDR_LEN;
break;
}
#ifdef DLT_LINUX_SLL2
case DLT_LINUX_SLL2: {
const struct sll2_header *sll2 =
(const struct sll2_header *)buffer;
uint16_t arphrd_type = ntohs(sll2->sll2_hatype);
switch (arphrd_type) {
case ARPHRD_ETHER:
case ARPHRD_INFINIBAND:
break;
default:
DBG_DEBUG("SLL2: Unknown arphrd_type %"PRIu16"\n",
arphrd_type);
return EPROTONOSUPPORT;
}
ether_type = ntohs(sll2->sll2_protocol);
ll_hdr_len = SLL2_HDR_LEN;
break;
}
#endif /* DLT_LINUX_SLL2 */
default:
DBG_DEBUG("Unknown pcap packet type %d\n", pcap_packet_type);
return EPROTONOSUPPORT;
}
switch (ether_type) {
case ETHERTYPE_IP:
ret = tcp4_extract(buffer + ll_hdr_len,
(size_t)pkthdr.caplen - ll_hdr_len,
&src->ip,
&dst->ip,
ack_seq,
seq,
rst,
window);
break;
case ETHERTYPE_IP6:
ret = tcp6_extract(buffer + ll_hdr_len,
(size_t)pkthdr.caplen - ll_hdr_len,
&src->ip6,
&dst->ip6,
ack_seq,
seq,
rst,
window);
break;
case ETHERTYPE_ARP:
/* Silently ignore ARP packets */
return EPROTO;
default:
DBG_DEBUG("Unknown ether type %"PRIu16"\n", ether_type);
return EPROTO;
}
return ret;
}
#endif /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */