mirror of
https://github.com/samba-team/samba.git
synced 2025-01-25 06:04:04 +03:00
0536d7a98b
It can now be used when net.ipv4.ip_nonlocal_bind=1. This makes the recovery daemon's local IP verification inefficient. It can be optimised in a subsequent commit. Fall back to bind() if unable to fetch IPs. Signed-off-by: Martin Schwenke <mschwenke@ddn.com> Reviewed-by: John Mulligan <jmulligan@redhat.com> Reviewed-by: Anoop C S <anoopcs@samba.org>
1264 lines
27 KiB
C
1264 lines
27 KiB
C
/*
|
|
ctdb system specific code to manage raw sockets on linux
|
|
|
|
Copyright (C) Ronnie Sahlberg 2007
|
|
Copyright (C) Andrew Tridgell 2007
|
|
Copyright (C) Marc Dequènes (Duck) 2009
|
|
Copyright (C) Volker Lendecke 2012
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "replace.h"
|
|
|
|
/*
|
|
* Use BSD struct tcphdr field names for portability. Modern glibc
|
|
* makes them available by default via <netinet/tcp.h> but older glibc
|
|
* requires __FAVOR_BSD to be defined.
|
|
*
|
|
* __FAVOR_BSD is normally defined in <features.h> if _DEFAULT_SOURCE
|
|
* (new) or _BSD_SOURCE (now deprecated) is set and _GNU_SOURCE is not
|
|
* set. Including "replace.h" above causes <features.h> to be
|
|
* indirectly included and this will not set __FAVOR_BSD because
|
|
* _GNU_SOURCE is set in Samba's "config.h" (which is included by
|
|
* "replace.h").
|
|
*
|
|
* Therefore, set __FAVOR_BSD by hand below.
|
|
*/
|
|
#define __FAVOR_BSD 1
|
|
#include "system/network.h"
|
|
|
|
#ifdef HAVE_NETINET_IF_ETHER_H
|
|
#include <netinet/if_ether.h>
|
|
#endif
|
|
#ifdef HAVE_NETINET_IP6_H
|
|
#include <netinet/ip6.h>
|
|
#endif
|
|
#ifdef HAVE_NETINET_ICMP6_H
|
|
#include <netinet/icmp6.h>
|
|
#endif
|
|
#ifdef HAVE_LINUX_IF_PACKET_H
|
|
#include <linux/if_packet.h>
|
|
#endif
|
|
|
|
#ifndef ETHERTYPE_IP6
|
|
#define ETHERTYPE_IP6 0x86dd
|
|
#endif
|
|
|
|
#include <talloc.h>
|
|
|
|
#include "lib/util/debug.h"
|
|
#include "lib/util/blocking.h"
|
|
|
|
#include "protocol/protocol.h"
|
|
#include "protocol/protocol_util.h"
|
|
|
|
#include "common/logging.h"
|
|
#include "common/system_socket.h"
|
|
|
|
/*
|
|
uint16 checksum for n bytes
|
|
*/
|
|
static uint32_t uint16_checksum(uint8_t *data, size_t n)
|
|
{
|
|
uint32_t sum=0;
|
|
uint16_t value;
|
|
|
|
while (n>=2) {
|
|
memcpy(&value, data, 2);
|
|
sum += (uint32_t)ntohs(value);
|
|
data += 2;
|
|
n -= 2;
|
|
}
|
|
if (n == 1) {
|
|
sum += (uint32_t)ntohs(*data);
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
struct ctdb_sys_local_ips_context {
|
|
struct ifaddrs *ifa;
|
|
};
|
|
|
|
static int ctdb_sys_local_ips_destructor(
|
|
struct ctdb_sys_local_ips_context *ips_ctx)
|
|
{
|
|
freeifaddrs(ips_ctx->ifa);
|
|
ips_ctx->ifa = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int ctdb_sys_local_ips_init(TALLOC_CTX *ctx,
|
|
struct ctdb_sys_local_ips_context **ips_ctx)
|
|
{
|
|
struct ctdb_sys_local_ips_context *t = NULL;
|
|
int ret = 0;
|
|
|
|
t = talloc(ctx, struct ctdb_sys_local_ips_context);
|
|
if (t == NULL) {
|
|
return ENOMEM;
|
|
}
|
|
|
|
ret = getifaddrs(&t->ifa);
|
|
if (ret != 0) {
|
|
ret = errno;
|
|
talloc_free(t);
|
|
return ret;
|
|
}
|
|
|
|
talloc_set_destructor(t, ctdb_sys_local_ips_destructor);
|
|
*ips_ctx = t;
|
|
|
|
return ret;
|
|
}
|
|
|
|
bool ctdb_sys_local_ip_check(const struct ctdb_sys_local_ips_context *ips_ctx,
|
|
const ctdb_sock_addr *addr)
|
|
{
|
|
struct ifaddrs *ifa = NULL;
|
|
int ret;
|
|
|
|
for (ifa = ips_ctx->ifa; ifa != NULL; ifa = ifa->ifa_next) {
|
|
ctdb_sock_addr sock_addr;
|
|
bool match;
|
|
|
|
if (ifa->ifa_addr == NULL)
|
|
continue;
|
|
|
|
/* Ignore non-IPv4/IPv6 interfaces */
|
|
switch (ifa->ifa_addr->sa_family) {
|
|
case AF_INET:
|
|
case AF_INET6:
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
|
|
ret = ctdb_sock_addr_from_sockaddr(ifa->ifa_addr, &sock_addr);
|
|
if (ret != 0) {
|
|
return false;
|
|
}
|
|
|
|
match = ctdb_sock_addr_same_ip(&sock_addr, addr);
|
|
if (match) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool ctdb_sys_bind_ip_check(const ctdb_sock_addr *_addr)
|
|
{
|
|
int s;
|
|
int ret;
|
|
ctdb_sock_addr __addr = *_addr;
|
|
ctdb_sock_addr *addr = &__addr;
|
|
socklen_t addrlen = 0;
|
|
|
|
switch (addr->sa.sa_family) {
|
|
case AF_INET:
|
|
addr->ip.sin_port = 0;
|
|
addrlen = sizeof(struct sockaddr_in);
|
|
break;
|
|
case AF_INET6:
|
|
addr->ip6.sin6_port = 0;
|
|
addrlen = sizeof(struct sockaddr_in6);
|
|
break;
|
|
}
|
|
|
|
s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
|
|
if (s == -1) {
|
|
return false;
|
|
}
|
|
|
|
ret = bind(s, (struct sockaddr *)addr, addrlen);
|
|
|
|
close(s);
|
|
return ret == 0;
|
|
}
|
|
|
|
/*
|
|
* See if the given IP is currently on an interface
|
|
*/
|
|
bool ctdb_sys_have_ip(const ctdb_sock_addr *addr)
|
|
{
|
|
struct ctdb_sys_local_ips_context *ips_ctx = NULL;
|
|
bool have_ip;
|
|
int ret;
|
|
|
|
ret = ctdb_sys_local_ips_init(NULL, &ips_ctx);
|
|
if (ret != 0) {
|
|
DBG_DEBUG("Failed to get local addresses, depending on bind\n");
|
|
have_ip = ctdb_sys_bind_ip_check(addr);
|
|
return have_ip;
|
|
}
|
|
|
|
have_ip = ctdb_sys_local_ip_check(ips_ctx, addr);
|
|
talloc_free(ips_ctx);
|
|
|
|
return have_ip;
|
|
}
|
|
|
|
/*
|
|
* simple TCP checksum - assumes data is multiple of 2 bytes long
|
|
*/
|
|
static uint16_t ip_checksum(uint8_t *data, size_t n, struct ip *ip)
|
|
{
|
|
uint32_t sum = uint16_checksum(data, n);
|
|
uint16_t sum2;
|
|
|
|
sum += uint16_checksum((uint8_t *)&ip->ip_src, sizeof(ip->ip_src));
|
|
sum += uint16_checksum((uint8_t *)&ip->ip_dst, sizeof(ip->ip_dst));
|
|
sum += ip->ip_p + n;
|
|
sum = (sum & 0xFFFF) + (sum >> 16);
|
|
sum = (sum & 0xFFFF) + (sum >> 16);
|
|
sum2 = htons(sum);
|
|
sum2 = ~sum2;
|
|
if (sum2 == 0) {
|
|
return 0xFFFF;
|
|
}
|
|
return sum2;
|
|
}
|
|
|
|
static uint16_t ip6_checksum(uint8_t *data, size_t n, struct ip6_hdr *ip6)
|
|
{
|
|
uint16_t phdr[3];
|
|
uint32_t sum = 0;
|
|
uint16_t sum2;
|
|
uint32_t len;
|
|
|
|
sum += uint16_checksum((uint8_t *)&ip6->ip6_src, 16);
|
|
sum += uint16_checksum((uint8_t *)&ip6->ip6_dst, 16);
|
|
|
|
len = htonl(n);
|
|
phdr[0] = len & UINT16_MAX;
|
|
phdr[1] = (len >> 16) & UINT16_MAX;
|
|
/* ip6_nxt is only 8 bits, so fits comfortably into a uint16_t */
|
|
phdr[2] = htons(ip6->ip6_nxt);
|
|
sum += uint16_checksum((uint8_t *)phdr, sizeof(phdr));
|
|
|
|
sum += uint16_checksum(data, n);
|
|
|
|
sum = (sum & 0xFFFF) + (sum >> 16);
|
|
sum = (sum & 0xFFFF) + (sum >> 16);
|
|
sum2 = htons(sum);
|
|
sum2 = ~sum2;
|
|
if (sum2 == 0) {
|
|
return 0xFFFF;
|
|
}
|
|
return sum2;
|
|
}
|
|
|
|
/*
|
|
* Send gratuitous ARP request/reply or IPv6 neighbor advertisement
|
|
*/
|
|
|
|
#ifdef HAVE_PACKETSOCKET
|
|
|
|
/*
|
|
* Create IPv4 ARP requests/replies or IPv6 neighbour advertisement
|
|
* packets
|
|
*/
|
|
|
|
#define ARP_STRUCT_SIZE sizeof(struct ether_header) + \
|
|
sizeof(struct ether_arp)
|
|
|
|
#define IP6_NA_STRUCT_SIZE sizeof(struct ether_header) + \
|
|
sizeof(struct ip6_hdr) + \
|
|
sizeof(struct nd_neighbor_advert) + \
|
|
sizeof(struct nd_opt_hdr) + \
|
|
sizeof(struct ether_addr)
|
|
|
|
#define ARP_BUFFER_SIZE MAX(ARP_STRUCT_SIZE, 64)
|
|
|
|
#define IP6_NA_BUFFER_SIZE MAX(IP6_NA_STRUCT_SIZE, 64)
|
|
|
|
static int arp_build(uint8_t *buffer,
|
|
size_t buflen,
|
|
const struct sockaddr_in *addr,
|
|
const struct ether_addr *hwaddr,
|
|
bool reply,
|
|
struct ether_addr **ether_dhost,
|
|
size_t *len)
|
|
{
|
|
size_t l = ARP_BUFFER_SIZE;
|
|
struct ether_header *eh;
|
|
struct ether_arp *ea;
|
|
struct arphdr *ah;
|
|
|
|
if (addr->sin_family != AF_INET) {
|
|
return EINVAL;
|
|
}
|
|
|
|
if (buflen < l) {
|
|
return EMSGSIZE;
|
|
}
|
|
|
|
memset(buffer, 0 , l);
|
|
|
|
eh = (struct ether_header *)buffer;
|
|
memset(eh->ether_dhost, 0xff, ETH_ALEN);
|
|
memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
|
|
eh->ether_type = htons(ETHERTYPE_ARP);
|
|
|
|
ea = (struct ether_arp *)(buffer + sizeof(struct ether_header));
|
|
ah = &ea->ea_hdr;
|
|
ah->ar_hrd = htons(ARPHRD_ETHER);
|
|
ah->ar_pro = htons(ETH_P_IP);
|
|
ah->ar_hln = ETH_ALEN;
|
|
ah->ar_pln = sizeof(ea->arp_spa);
|
|
|
|
if (! reply) {
|
|
ah->ar_op = htons(ARPOP_REQUEST);
|
|
memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
|
|
memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
|
|
memset(ea->arp_tha, 0, ETH_ALEN);
|
|
memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
|
|
} else {
|
|
ah->ar_op = htons(ARPOP_REPLY);
|
|
memcpy(ea->arp_sha, hwaddr, ETH_ALEN);
|
|
memcpy(ea->arp_spa, &addr->sin_addr, sizeof(ea->arp_spa));
|
|
memcpy(ea->arp_tha, hwaddr, ETH_ALEN);
|
|
memcpy(ea->arp_tpa, &addr->sin_addr, sizeof(ea->arp_tpa));
|
|
}
|
|
|
|
*ether_dhost = (struct ether_addr *)eh->ether_dhost;
|
|
*len = l;
|
|
return 0;
|
|
}
|
|
|
|
static int ip6_na_build(uint8_t *buffer,
|
|
size_t buflen,
|
|
const struct sockaddr_in6 *addr,
|
|
const struct ether_addr *hwaddr,
|
|
struct ether_addr **ether_dhost,
|
|
size_t *len)
|
|
{
|
|
size_t l = IP6_NA_BUFFER_SIZE;
|
|
struct ether_header *eh;
|
|
struct ip6_hdr *ip6;
|
|
struct nd_neighbor_advert *nd_na;
|
|
struct nd_opt_hdr *nd_oh;
|
|
struct ether_addr *ea;
|
|
int ret;
|
|
|
|
if (addr->sin6_family != AF_INET6) {
|
|
return EINVAL;
|
|
}
|
|
|
|
if (buflen < l) {
|
|
return EMSGSIZE;
|
|
}
|
|
|
|
memset(buffer, 0 , l);
|
|
|
|
eh = (struct ether_header *)buffer;
|
|
/*
|
|
* Ethernet multicast: 33:33:00:00:00:01 (see RFC2464,
|
|
* section 7) - note memset 0 above!
|
|
*/
|
|
eh->ether_dhost[0] = 0x33;
|
|
eh->ether_dhost[1] = 0x33;
|
|
eh->ether_dhost[5] = 0x01;
|
|
memcpy(eh->ether_shost, hwaddr, ETH_ALEN);
|
|
eh->ether_type = htons(ETHERTYPE_IP6);
|
|
|
|
ip6 = (struct ip6_hdr *)(buffer + sizeof(struct ether_header));
|
|
ip6->ip6_vfc = 6 << 4;
|
|
ip6->ip6_plen = htons(sizeof(struct nd_neighbor_advert) +
|
|
sizeof(struct nd_opt_hdr) +
|
|
ETH_ALEN);
|
|
ip6->ip6_nxt = IPPROTO_ICMPV6;
|
|
ip6->ip6_hlim = 255;
|
|
ip6->ip6_src = addr->sin6_addr;
|
|
/* all-nodes multicast */
|
|
|
|
ret = inet_pton(AF_INET6, "ff02::1", &ip6->ip6_dst);
|
|
if (ret != 1) {
|
|
return EIO;
|
|
}
|
|
|
|
nd_na = (struct nd_neighbor_advert *)(buffer +
|
|
sizeof(struct ether_header) +
|
|
sizeof(struct ip6_hdr));
|
|
nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
|
|
nd_na->nd_na_code = 0;
|
|
nd_na->nd_na_flags_reserved = ND_NA_FLAG_OVERRIDE;
|
|
nd_na->nd_na_target = addr->sin6_addr;
|
|
|
|
/* Option: Target link-layer address */
|
|
nd_oh = (struct nd_opt_hdr *)(buffer +
|
|
sizeof(struct ether_header) +
|
|
sizeof(struct ip6_hdr) +
|
|
sizeof(struct nd_neighbor_advert));
|
|
nd_oh->nd_opt_type = ND_OPT_TARGET_LINKADDR;
|
|
nd_oh->nd_opt_len = 1; /* multiple of 8 octets */
|
|
|
|
ea = (struct ether_addr *)(buffer +
|
|
sizeof(struct ether_header) +
|
|
sizeof(struct ip6_hdr) +
|
|
sizeof(struct nd_neighbor_advert) +
|
|
sizeof(struct nd_opt_hdr));
|
|
memcpy(ea, hwaddr, ETH_ALEN);
|
|
|
|
nd_na->nd_na_cksum = ip6_checksum((uint8_t *)nd_na,
|
|
ntohs(ip6->ip6_plen),
|
|
ip6);
|
|
|
|
*ether_dhost = (struct ether_addr *)eh->ether_dhost;
|
|
*len = l;
|
|
return 0;
|
|
}
|
|
|
|
int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
|
|
{
|
|
int s;
|
|
struct sockaddr_ll sall = {0};
|
|
struct ifreq if_hwaddr = {
|
|
.ifr_ifru = {
|
|
.ifru_flags = 0
|
|
},
|
|
};
|
|
uint8_t buffer[MAX(ARP_BUFFER_SIZE, IP6_NA_BUFFER_SIZE)];
|
|
struct ifreq ifr = {
|
|
.ifr_ifru = {
|
|
.ifru_flags = 0
|
|
},
|
|
};
|
|
struct ether_addr *hwaddr = NULL;
|
|
struct ether_addr *ether_dhost = NULL;
|
|
size_t len = 0;
|
|
int ret = 0;
|
|
|
|
s = socket(AF_PACKET, SOCK_RAW, 0);
|
|
if (s == -1) {
|
|
ret = errno;
|
|
DBG_ERR("Failed to open raw socket\n");
|
|
return ret;
|
|
}
|
|
DBG_DEBUG("Created SOCKET FD:%d for sending arp\n", s);
|
|
|
|
/* Find interface */
|
|
strlcpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
|
|
if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
|
|
ret = errno;
|
|
DBG_ERR("Interface '%s' not found\n", iface);
|
|
goto fail;
|
|
}
|
|
|
|
/* Get MAC address */
|
|
strlcpy(if_hwaddr.ifr_name, iface, sizeof(if_hwaddr.ifr_name));
|
|
ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
|
|
if ( ret < 0 ) {
|
|
ret = errno;
|
|
DBG_ERR("ioctl failed\n");
|
|
goto fail;
|
|
}
|
|
if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
|
|
ret = 0;
|
|
D_DEBUG("Ignoring loopback arp request\n");
|
|
goto fail;
|
|
}
|
|
if (if_hwaddr.ifr_hwaddr.sa_family != ARPHRD_ETHER) {
|
|
ret = EINVAL;
|
|
DBG_ERR("Not an ethernet address family (0x%x)\n",
|
|
if_hwaddr.ifr_hwaddr.sa_family);
|
|
goto fail;;
|
|
}
|
|
|
|
/* Set up most of destination address structure */
|
|
sall.sll_family = AF_PACKET;
|
|
sall.sll_halen = sizeof(struct ether_addr);
|
|
sall.sll_protocol = htons(ETH_P_ALL);
|
|
sall.sll_ifindex = ifr.ifr_ifindex;
|
|
|
|
/* For clarity */
|
|
hwaddr = (struct ether_addr *)if_hwaddr.ifr_hwaddr.sa_data;
|
|
|
|
switch (addr->ip.sin_family) {
|
|
case AF_INET:
|
|
/* Send gratuitous ARP */
|
|
ret = arp_build(buffer,
|
|
sizeof(buffer),
|
|
&addr->ip,
|
|
hwaddr,
|
|
false,
|
|
ðer_dhost,
|
|
&len);
|
|
if (ret != 0) {
|
|
DBG_ERR("Failed to build ARP request\n");
|
|
goto fail;
|
|
}
|
|
|
|
memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
|
|
|
|
ret = sendto(s,
|
|
buffer,
|
|
len,
|
|
0,
|
|
(struct sockaddr *)&sall,
|
|
sizeof(sall));
|
|
if (ret < 0 ) {
|
|
ret = errno;
|
|
DBG_ERR("Failed sendto\n");
|
|
goto fail;
|
|
}
|
|
|
|
/* Send unsolicited ARP reply */
|
|
ret = arp_build(buffer,
|
|
sizeof(buffer),
|
|
&addr->ip,
|
|
hwaddr,
|
|
true,
|
|
ðer_dhost,
|
|
&len);
|
|
if (ret != 0) {
|
|
DBG_ERR("Failed to build ARP reply\n");
|
|
goto fail;
|
|
}
|
|
|
|
memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
|
|
|
|
ret = sendto(s,
|
|
buffer,
|
|
len,
|
|
0,
|
|
(struct sockaddr *)&sall,
|
|
sizeof(sall));
|
|
if (ret < 0 ) {
|
|
ret = errno;
|
|
DBG_ERR("Failed sendto\n");
|
|
goto fail;
|
|
}
|
|
|
|
close(s);
|
|
break;
|
|
|
|
case AF_INET6:
|
|
ret = ip6_na_build(buffer,
|
|
sizeof(buffer),
|
|
&addr->ip6,
|
|
hwaddr,
|
|
ðer_dhost,
|
|
&len);
|
|
if (ret != 0) {
|
|
DBG_ERR("Failed to build IPv6 neighbor advertisement\n");
|
|
goto fail;
|
|
}
|
|
|
|
memcpy(&sall.sll_addr[0], ether_dhost, sall.sll_halen);
|
|
|
|
ret = sendto(s,
|
|
buffer,
|
|
len,
|
|
0,
|
|
(struct sockaddr *)&sall,
|
|
sizeof(sall));
|
|
if (ret < 0 ) {
|
|
ret = errno;
|
|
DBG_ERR("Failed sendto\n");
|
|
goto fail;
|
|
}
|
|
|
|
close(s);
|
|
break;
|
|
|
|
default:
|
|
ret = EINVAL;
|
|
DBG_ERR("Not an ipv4/ipv6 address (family is %u)\n",
|
|
addr->ip.sin_family);
|
|
goto fail;
|
|
}
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
close(s);
|
|
return ret;
|
|
}
|
|
|
|
#else /* HAVE_PACKETSOCKET */
|
|
|
|
int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
|
|
{
|
|
/* Not implemented */
|
|
return ENOSYS;
|
|
}
|
|
|
|
#endif /* HAVE_PACKETSOCKET */
|
|
|
|
|
|
#define IP4_TCP_BUFFER_SIZE sizeof(struct ip) + \
|
|
sizeof(struct tcphdr)
|
|
|
|
#define IP6_TCP_BUFFER_SIZE sizeof(struct ip6_hdr) + \
|
|
sizeof(struct tcphdr)
|
|
|
|
static int tcp4_build(uint8_t *buf,
|
|
size_t buflen,
|
|
const struct sockaddr_in *src,
|
|
const struct sockaddr_in *dst,
|
|
uint32_t seq,
|
|
uint32_t ack,
|
|
int rst,
|
|
size_t *len)
|
|
{
|
|
size_t l = IP4_TCP_BUFFER_SIZE;
|
|
struct {
|
|
struct ip ip;
|
|
struct tcphdr tcp;
|
|
} *ip4pkt;
|
|
|
|
if (l != sizeof(*ip4pkt)) {
|
|
return EMSGSIZE;
|
|
}
|
|
|
|
if (buflen < l) {
|
|
return EMSGSIZE;
|
|
}
|
|
|
|
ip4pkt = (void *)buf;
|
|
memset(ip4pkt, 0, l);
|
|
|
|
ip4pkt->ip.ip_v = 4;
|
|
ip4pkt->ip.ip_hl = sizeof(ip4pkt->ip)/sizeof(uint32_t);
|
|
ip4pkt->ip.ip_len = htons(sizeof(ip4pkt));
|
|
ip4pkt->ip.ip_ttl = 255;
|
|
ip4pkt->ip.ip_p = IPPROTO_TCP;
|
|
ip4pkt->ip.ip_src.s_addr = src->sin_addr.s_addr;
|
|
ip4pkt->ip.ip_dst.s_addr = dst->sin_addr.s_addr;
|
|
ip4pkt->ip.ip_sum = 0;
|
|
|
|
ip4pkt->tcp.th_sport = src->sin_port;
|
|
ip4pkt->tcp.th_dport = dst->sin_port;
|
|
ip4pkt->tcp.th_seq = seq;
|
|
ip4pkt->tcp.th_ack = ack;
|
|
ip4pkt->tcp.th_flags = 0;
|
|
ip4pkt->tcp.th_flags |= TH_ACK;
|
|
if (rst) {
|
|
ip4pkt->tcp.th_flags |= TH_RST;
|
|
}
|
|
ip4pkt->tcp.th_off = sizeof(ip4pkt->tcp)/sizeof(uint32_t);
|
|
/* this makes it easier to spot in a sniffer */
|
|
ip4pkt->tcp.th_win = htons(1234);
|
|
ip4pkt->tcp.th_sum = ip_checksum((uint8_t *)&ip4pkt->tcp,
|
|
sizeof(ip4pkt->tcp),
|
|
&ip4pkt->ip);
|
|
|
|
*len = l;
|
|
return 0;
|
|
}
|
|
|
|
static int tcp6_build(uint8_t *buf,
|
|
size_t buflen,
|
|
const struct sockaddr_in6 *src,
|
|
const struct sockaddr_in6 *dst,
|
|
uint32_t seq,
|
|
uint32_t ack,
|
|
int rst,
|
|
size_t *len)
|
|
{
|
|
size_t l = IP6_TCP_BUFFER_SIZE;
|
|
struct {
|
|
struct ip6_hdr ip6;
|
|
struct tcphdr tcp;
|
|
} *ip6pkt;
|
|
|
|
if (l != sizeof(*ip6pkt)) {
|
|
return EMSGSIZE;
|
|
}
|
|
|
|
if (buflen < l) {
|
|
return EMSGSIZE;
|
|
}
|
|
|
|
ip6pkt = (void *)buf;
|
|
memset(ip6pkt, 0, l);
|
|
|
|
ip6pkt->ip6.ip6_vfc = 6 << 4;
|
|
ip6pkt->ip6.ip6_plen = htons(sizeof(struct tcphdr));
|
|
ip6pkt->ip6.ip6_nxt = IPPROTO_TCP;
|
|
ip6pkt->ip6.ip6_hlim = 64;
|
|
ip6pkt->ip6.ip6_src = src->sin6_addr;
|
|
ip6pkt->ip6.ip6_dst = dst->sin6_addr;
|
|
|
|
ip6pkt->tcp.th_sport = src->sin6_port;
|
|
ip6pkt->tcp.th_dport = dst->sin6_port;
|
|
ip6pkt->tcp.th_seq = seq;
|
|
ip6pkt->tcp.th_ack = ack;
|
|
ip6pkt->tcp.th_flags = 0;
|
|
ip6pkt->tcp.th_flags |= TH_ACK;
|
|
if (rst) {
|
|
ip6pkt->tcp.th_flags |= TH_RST;
|
|
}
|
|
ip6pkt->tcp.th_off = sizeof(ip6pkt->tcp)/sizeof(uint32_t);
|
|
/* this makes it easier to spot in a sniffer */
|
|
ip6pkt->tcp.th_win = htons(1234);
|
|
ip6pkt->tcp.th_sum = ip6_checksum((uint8_t *)&ip6pkt->tcp,
|
|
sizeof(ip6pkt->tcp),
|
|
&ip6pkt->ip6);
|
|
|
|
*len = l;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Send tcp segment from the specified IP/port to the specified
|
|
* destination IP/port.
|
|
*
|
|
* This is used to trigger the receiving host into sending its own ACK,
|
|
* which should trigger early detection of TCP reset by the client
|
|
* after IP takeover
|
|
*
|
|
* This can also be used to send RST segments (if rst is true) and also
|
|
* if correct seq and ack numbers are provided.
|
|
*/
|
|
int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
|
|
const ctdb_sock_addr *src,
|
|
uint32_t seq,
|
|
uint32_t ack,
|
|
int rst)
|
|
{
|
|
uint8_t buf[MAX(IP4_TCP_BUFFER_SIZE, IP6_TCP_BUFFER_SIZE)];
|
|
size_t len = 0;
|
|
int ret;
|
|
int s;
|
|
uint32_t one = 1;
|
|
struct sockaddr_in6 tmpdest = { 0 };
|
|
int saved_errno;
|
|
|
|
switch (src->ip.sin_family) {
|
|
case AF_INET:
|
|
ret = tcp4_build(buf,
|
|
sizeof(buf),
|
|
&src->ip,
|
|
&dest->ip,
|
|
seq,
|
|
ack,
|
|
rst,
|
|
&len);
|
|
if (ret != 0) {
|
|
DBG_ERR("Failed to build TCP packet (%d)\n", ret);
|
|
return ret;
|
|
}
|
|
|
|
/* open a raw socket to send this segment from */
|
|
s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
|
|
if (s == -1) {
|
|
DBG_ERR("Failed to open raw socket (%s)\n",
|
|
strerror(errno));
|
|
return -1;
|
|
}
|
|
|
|
ret = setsockopt(s, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
|
|
if (ret != 0) {
|
|
DBG_ERR("Failed to setup IP headers (%s)\n",
|
|
strerror(errno));
|
|
close(s);
|
|
return -1;
|
|
}
|
|
|
|
ret = sendto(s,
|
|
buf,
|
|
len,
|
|
0,
|
|
(const struct sockaddr *)&dest->ip,
|
|
sizeof(dest->ip));
|
|
saved_errno = errno;
|
|
close(s);
|
|
if (ret == -1) {
|
|
D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
|
|
return -1;
|
|
}
|
|
if ((size_t)ret != len) {
|
|
DBG_ERR("Failed sendto - didn't send full packet\n");
|
|
return -1;
|
|
}
|
|
break;
|
|
|
|
case AF_INET6:
|
|
ret = tcp6_build(buf,
|
|
sizeof(buf),
|
|
&src->ip6,
|
|
&dest->ip6,
|
|
seq,
|
|
ack,
|
|
rst,
|
|
&len);
|
|
if (ret != 0) {
|
|
DBG_ERR("Failed to build TCP packet (%d)\n", ret);
|
|
return ret;
|
|
}
|
|
|
|
s = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
|
|
if (s == -1) {
|
|
DBG_ERR("Failed to open sending socket\n");
|
|
return -1;
|
|
|
|
}
|
|
/*
|
|
* sendto() on an IPv6 raw socket requires the port to
|
|
* be either 0 or a protocol value
|
|
*/
|
|
tmpdest = dest->ip6;
|
|
tmpdest.sin6_port = 0;
|
|
|
|
ret = sendto(s,
|
|
buf,
|
|
len,
|
|
0,
|
|
(const struct sockaddr *)&tmpdest,
|
|
sizeof(tmpdest));
|
|
saved_errno = errno;
|
|
close(s);
|
|
if (ret == -1) {
|
|
D_ERR("Failed sendto (%s)\n", strerror(saved_errno));
|
|
return -1;
|
|
}
|
|
if ((size_t)ret != len) {
|
|
DBG_ERR("Failed sendto - didn't send full packet\n");
|
|
return -1;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
DBG_ERR("Not an ipv4/v6 address\n");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int tcp4_extract(const uint8_t *ip_pkt,
|
|
size_t pktlen,
|
|
struct sockaddr_in *src,
|
|
struct sockaddr_in *dst,
|
|
uint32_t *ack_seq,
|
|
uint32_t *seq,
|
|
int *rst,
|
|
uint16_t *window)
|
|
{
|
|
const struct ip *ip;
|
|
const struct tcphdr *tcp;
|
|
|
|
if (pktlen < sizeof(struct ip)) {
|
|
return EMSGSIZE;
|
|
}
|
|
|
|
ip = (const struct ip *)ip_pkt;
|
|
|
|
/* IPv4 only */
|
|
if (ip->ip_v != 4) {
|
|
return ENOMSG;
|
|
}
|
|
/* Don't look at fragments */
|
|
if ((ntohs(ip->ip_off)&0x1fff) != 0) {
|
|
return ENOMSG;
|
|
}
|
|
/* TCP only */
|
|
if (ip->ip_p != IPPROTO_TCP) {
|
|
return ENOMSG;
|
|
}
|
|
|
|
/* Ensure there is enough of the packet to gather required fields */
|
|
if (pktlen <
|
|
(ip->ip_hl * sizeof(uint32_t)) + offsetof(struct tcphdr, th_sum)) {
|
|
return EMSGSIZE;
|
|
}
|
|
|
|
tcp = (const struct tcphdr *)(ip_pkt + (ip->ip_hl * sizeof(uint32_t)));
|
|
|
|
src->sin_family = AF_INET;
|
|
src->sin_addr.s_addr = ip->ip_src.s_addr;
|
|
src->sin_port = tcp->th_sport;
|
|
|
|
dst->sin_family = AF_INET;
|
|
dst->sin_addr.s_addr = ip->ip_dst.s_addr;
|
|
dst->sin_port = tcp->th_dport;
|
|
|
|
*ack_seq = tcp->th_ack;
|
|
*seq = tcp->th_seq;
|
|
if (window != NULL) {
|
|
*window = tcp->th_win;
|
|
}
|
|
if (rst != NULL) {
|
|
*rst = tcp->th_flags & TH_RST;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int tcp6_extract(const uint8_t *ip_pkt,
|
|
size_t pktlen,
|
|
struct sockaddr_in6 *src,
|
|
struct sockaddr_in6 *dst,
|
|
uint32_t *ack_seq,
|
|
uint32_t *seq,
|
|
int *rst,
|
|
uint16_t *window)
|
|
{
|
|
const struct ip6_hdr *ip6;
|
|
const struct tcphdr *tcp;
|
|
|
|
/* Ensure there is enough of the packet to gather required fields */
|
|
if (pktlen < sizeof(struct ip6_hdr) + offsetof(struct tcphdr, th_sum)) {
|
|
return EMSGSIZE;
|
|
}
|
|
|
|
ip6 = (const struct ip6_hdr *)ip_pkt;
|
|
|
|
/* IPv6 only */
|
|
if ((ip6->ip6_vfc >> 4) != 6){
|
|
return ENOMSG;
|
|
}
|
|
|
|
/* TCP only */
|
|
if (ip6->ip6_nxt != IPPROTO_TCP) {
|
|
return ENOMSG;
|
|
}
|
|
|
|
tcp = (const struct tcphdr *)(ip_pkt + sizeof(struct ip6_hdr));
|
|
|
|
src->sin6_family = AF_INET6;
|
|
src->sin6_port = tcp->th_sport;
|
|
src->sin6_addr = ip6->ip6_src;
|
|
|
|
dst->sin6_family = AF_INET6;
|
|
dst->sin6_port = tcp->th_dport;
|
|
dst->sin6_addr = ip6->ip6_dst;
|
|
|
|
*ack_seq = tcp->th_ack;
|
|
*seq = tcp->th_seq;
|
|
if (window != NULL) {
|
|
*window = tcp->th_win;
|
|
}
|
|
if (rst != NULL) {
|
|
*rst = tcp->th_flags & TH_RST;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Packet capture
|
|
*
|
|
* If AF_PACKET is available then use a raw socket otherwise use pcap.
|
|
* wscript has checked to make sure that pcap is available if needed.
|
|
*/
|
|
|
|
#if defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP)
|
|
|
|
/*
|
|
* This function is used to open a raw socket to capture from
|
|
*/
|
|
int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
|
|
{
|
|
int s, ret;
|
|
|
|
/* Open a socket to capture all traffic */
|
|
s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
|
|
if (s == -1) {
|
|
DBG_ERR("Failed to open raw socket\n");
|
|
return -1;
|
|
}
|
|
|
|
DBG_DEBUG("Opened raw socket for TCP tickle capture (fd=%d)\n", s);
|
|
|
|
ret = set_blocking(s, false);
|
|
if (ret != 0) {
|
|
DBG_ERR("Failed to set socket non-blocking (%s)\n",
|
|
strerror(errno));
|
|
close(s);
|
|
return -1;
|
|
}
|
|
|
|
set_close_on_exec(s);
|
|
|
|
return s;
|
|
}
|
|
|
|
/*
|
|
* This function is used to do any additional cleanup required when closing
|
|
* a capture socket.
|
|
* Note that the socket itself is closed automatically in the caller.
|
|
*/
|
|
int ctdb_sys_close_capture_socket(void *private_data)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* called when the raw socket becomes readable
|
|
*/
|
|
int ctdb_sys_read_tcp_packet(int s, void *private_data,
|
|
ctdb_sock_addr *src,
|
|
ctdb_sock_addr *dst,
|
|
uint32_t *ack_seq,
|
|
uint32_t *seq,
|
|
int *rst,
|
|
uint16_t *window)
|
|
{
|
|
ssize_t nread;
|
|
uint8_t pkt[100]; /* Large enough for simple ACK/RST packets */
|
|
struct ether_header *eth;
|
|
int ret;
|
|
|
|
nread = recv(s, pkt, sizeof(pkt), MSG_TRUNC);
|
|
if (nread == -1) {
|
|
return errno;
|
|
}
|
|
if ((size_t)nread < sizeof(*eth)) {
|
|
return EMSGSIZE;
|
|
}
|
|
|
|
ZERO_STRUCTP(src);
|
|
ZERO_STRUCTP(dst);
|
|
|
|
/* Ethernet */
|
|
eth = (struct ether_header *)pkt;
|
|
|
|
/* we want either IPv4 or IPv6 */
|
|
if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
|
|
ret = tcp4_extract(pkt + sizeof(struct ether_header),
|
|
(size_t)nread - sizeof(struct ether_header),
|
|
&src->ip,
|
|
&dst->ip,
|
|
ack_seq,
|
|
seq,
|
|
rst,
|
|
window);
|
|
return ret;
|
|
|
|
} else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
|
|
ret = tcp6_extract(pkt + sizeof(struct ether_header),
|
|
(size_t)nread - sizeof(struct ether_header),
|
|
&src->ip6,
|
|
&dst->ip6,
|
|
ack_seq,
|
|
seq,
|
|
rst,
|
|
window);
|
|
return ret;
|
|
}
|
|
|
|
return ENOMSG;
|
|
}
|
|
|
|
#else /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */
|
|
|
|
#include <pcap.h>
|
|
|
|
/*
|
|
* Assume this exists if pcap.h exists - it has been around for a
|
|
* while
|
|
*/
|
|
#include <pcap/sll.h>
|
|
|
|
int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
|
|
{
|
|
char errbuf[PCAP_ERRBUF_SIZE];
|
|
pcap_t *pt;
|
|
int pcap_packet_type;
|
|
const char *t = NULL;
|
|
int fd;
|
|
int ret;
|
|
|
|
pt = pcap_create(iface, errbuf);
|
|
if (pt == NULL) {
|
|
DBG_ERR("Failed to open pcap capture device %s (%s)\n",
|
|
iface,
|
|
errbuf);
|
|
return -1;
|
|
}
|
|
/*
|
|
* pcap isn't very clear about defaults...
|
|
*/
|
|
ret = pcap_set_snaplen(pt, 100);
|
|
if (ret < 0) {
|
|
DBG_ERR("Failed to set snaplen for pcap capture\n");
|
|
goto fail;
|
|
}
|
|
ret = pcap_set_promisc(pt, 0);
|
|
if (ret < 0) {
|
|
DBG_ERR("Failed to unset promiscuous mode for pcap capture\n");
|
|
goto fail;
|
|
}
|
|
ret = pcap_set_timeout(pt, 0);
|
|
if (ret < 0) {
|
|
DBG_ERR("Failed to set timeout for pcap capture\n");
|
|
goto fail;
|
|
}
|
|
#ifdef HAVE_PCAP_SET_IMMEDIATE_MODE
|
|
ret = pcap_set_immediate_mode(pt, 1);
|
|
if (ret < 0) {
|
|
DBG_ERR("Failed to set immediate mode for pcap capture\n");
|
|
goto fail;
|
|
}
|
|
#endif
|
|
ret = pcap_activate(pt);
|
|
if (ret < 0) {
|
|
DBG_ERR("Failed to activate pcap capture\n");
|
|
goto fail;
|
|
}
|
|
|
|
pcap_packet_type = pcap_datalink(pt);
|
|
switch (pcap_packet_type) {
|
|
case DLT_EN10MB:
|
|
t = "DLT_EN10MB";
|
|
break;
|
|
case DLT_LINUX_SLL:
|
|
t = "DLT_LINUX_SLL";
|
|
break;
|
|
#ifdef DLT_LINUX_SLL2
|
|
case DLT_LINUX_SLL2:
|
|
t = "DLT_LINUX_SLL2";
|
|
break;
|
|
#endif /* DLT_LINUX_SLL2 */
|
|
default:
|
|
DBG_ERR("Unknown pcap packet type %d\n", pcap_packet_type);
|
|
goto fail;
|
|
}
|
|
|
|
fd = pcap_get_selectable_fd(pt);
|
|
DBG_DEBUG("Opened pcap capture for TCP tickle (type=%s, fd=%d)\n",
|
|
t,
|
|
fd);
|
|
|
|
*((pcap_t **)private_data) = pt;
|
|
return fd;
|
|
|
|
fail:
|
|
pcap_close(pt);
|
|
return -1;
|
|
}
|
|
|
|
int ctdb_sys_close_capture_socket(void *private_data)
|
|
{
|
|
pcap_t *pt = (pcap_t *)private_data;
|
|
pcap_close(pt);
|
|
return 0;
|
|
}
|
|
|
|
int ctdb_sys_read_tcp_packet(int s,
|
|
void *private_data,
|
|
ctdb_sock_addr *src,
|
|
ctdb_sock_addr *dst,
|
|
uint32_t *ack_seq,
|
|
uint32_t *seq,
|
|
int *rst,
|
|
uint16_t *window)
|
|
{
|
|
int ret;
|
|
struct pcap_pkthdr pkthdr;
|
|
const u_char *buffer;
|
|
pcap_t *pt = (pcap_t *)private_data;
|
|
int pcap_packet_type;
|
|
uint16_t ether_type;
|
|
size_t ll_hdr_len;
|
|
|
|
buffer=pcap_next(pt, &pkthdr);
|
|
if (buffer==NULL) {
|
|
return ENOMSG;
|
|
}
|
|
|
|
ZERO_STRUCTP(src);
|
|
ZERO_STRUCTP(dst);
|
|
|
|
pcap_packet_type = pcap_datalink(pt);
|
|
switch (pcap_packet_type) {
|
|
case DLT_EN10MB: {
|
|
const struct ether_header *eth =
|
|
(const struct ether_header *)buffer;
|
|
ether_type = ntohs(eth->ether_type);
|
|
ll_hdr_len = sizeof(struct ether_header);
|
|
break;
|
|
}
|
|
case DLT_LINUX_SLL: {
|
|
const struct sll_header *sll =
|
|
(const struct sll_header *)buffer;
|
|
uint16_t arphrd_type = ntohs(sll->sll_hatype);
|
|
switch (arphrd_type) {
|
|
case ARPHRD_ETHER:
|
|
case ARPHRD_INFINIBAND:
|
|
break;
|
|
default:
|
|
DBG_DEBUG("SLL: Unknown arphrd_type %"PRIu16"\n",
|
|
arphrd_type);
|
|
return EPROTONOSUPPORT;
|
|
}
|
|
ether_type = ntohs(sll->sll_protocol);
|
|
ll_hdr_len = SLL_HDR_LEN;
|
|
break;
|
|
}
|
|
#ifdef DLT_LINUX_SLL2
|
|
case DLT_LINUX_SLL2: {
|
|
const struct sll2_header *sll2 =
|
|
(const struct sll2_header *)buffer;
|
|
uint16_t arphrd_type = ntohs(sll2->sll2_hatype);
|
|
switch (arphrd_type) {
|
|
case ARPHRD_ETHER:
|
|
case ARPHRD_INFINIBAND:
|
|
break;
|
|
default:
|
|
DBG_DEBUG("SLL2: Unknown arphrd_type %"PRIu16"\n",
|
|
arphrd_type);
|
|
return EPROTONOSUPPORT;
|
|
}
|
|
ether_type = ntohs(sll2->sll2_protocol);
|
|
ll_hdr_len = SLL2_HDR_LEN;
|
|
break;
|
|
}
|
|
#endif /* DLT_LINUX_SLL2 */
|
|
default:
|
|
DBG_DEBUG("Unknown pcap packet type %d\n", pcap_packet_type);
|
|
return EPROTONOSUPPORT;
|
|
}
|
|
|
|
switch (ether_type) {
|
|
case ETHERTYPE_IP:
|
|
ret = tcp4_extract(buffer + ll_hdr_len,
|
|
(size_t)pkthdr.caplen - ll_hdr_len,
|
|
&src->ip,
|
|
&dst->ip,
|
|
ack_seq,
|
|
seq,
|
|
rst,
|
|
window);
|
|
break;
|
|
case ETHERTYPE_IP6:
|
|
ret = tcp6_extract(buffer + ll_hdr_len,
|
|
(size_t)pkthdr.caplen - ll_hdr_len,
|
|
&src->ip6,
|
|
&dst->ip6,
|
|
ack_seq,
|
|
seq,
|
|
rst,
|
|
window);
|
|
break;
|
|
case ETHERTYPE_ARP:
|
|
/* Silently ignore ARP packets */
|
|
return EPROTO;
|
|
default:
|
|
DBG_DEBUG("Unknown ether type %"PRIu16"\n", ether_type);
|
|
return EPROTO;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
#endif /* defined(HAVE_AF_PACKET) && !defined(ENABLE_PCAP) */
|