mirror of
https://github.com/samba-team/samba.git
synced 2025-03-27 22:50:26 +03:00
1st working ib integrated ctdb
TODO: bugfix of ctdb_bench (wants to sent via uninitialised connection - see bench_ring/dest) (This used to be ctdb commit 61516461e9d45dc7ba87518d134894fed7d7b7cd)
This commit is contained in:
parent
1c8bcd6f57
commit
199a5befb6
@ -9,3 +9,12 @@ After then:
|
||||
|
||||
./configure --enable-infiniband
|
||||
|
||||
Example for testing
|
||||
===================
|
||||
bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.1:9001
|
||||
bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.2:9001
|
||||
|
||||
where 2nodes_rm.txt:
|
||||
10.0.0.1:9001
|
||||
10.0.0.2:9001
|
||||
|
||||
|
@ -29,6 +29,36 @@
|
||||
#include "ibwrapper.h"
|
||||
#include "ibw_ctdb.h"
|
||||
|
||||
int ctdb_ibw_node_connect(struct ibw_ctx *ictx, struct ctdb_node *node)
|
||||
{
|
||||
struct sockaddr_in sock_out;
|
||||
|
||||
memset(&sock_out, 0, sizeof(struct sockaddr_in));
|
||||
inet_pton(AF_INET, node->address.address, &sock_out.sin_addr);
|
||||
sock_out.sin_port = htons(node->address.port);
|
||||
sock_out.sin_family = PF_INET;
|
||||
|
||||
if (ibw_connect(ictx, &sock_out, node)) {
|
||||
DEBUG(0, ("ctdb_ibw_node_connect: ibw_connect failed - retrying in 1 sec...\n"));
|
||||
/* try again once a second */
|
||||
event_add_timed(node->ctdb->ev, node, timeval_current_ofs(1, 0),
|
||||
ctdb_ibw_node_connect_event, node);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* continues at ibw_ctdb.c/IBWC_CONNECTED in good case */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ctdb_ibw_node_connect_event(struct event_context *ev, struct timed_event *te,
|
||||
struct timeval t, void *private)
|
||||
{
|
||||
struct ctdb_node *node = talloc_get_type(private, struct ctdb_node);
|
||||
struct ibw_ctx *ictx = talloc_get_type(node->ctdb->private, struct ibw_ctx);
|
||||
|
||||
ctdb_ibw_node_connect(ictx, node);
|
||||
}
|
||||
|
||||
int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn)
|
||||
{
|
||||
if (ctx!=NULL) {
|
||||
@ -76,11 +106,17 @@ int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn)
|
||||
if (node!=NULL)
|
||||
node->ctdb->upcalls->node_dead(node);
|
||||
talloc_free(conn);
|
||||
/* normal + intended disconnect => not reconnecting in this layer */
|
||||
} break;
|
||||
case IBWC_ERROR: {
|
||||
/* struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node);
|
||||
struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node);
|
||||
if (node!=NULL)
|
||||
node->ctdb->upcalls->node_dead(node);*/
|
||||
node->private = NULL; /* not to use again */
|
||||
|
||||
DEBUG(10, ("IBWC_ERROR, reconnecting immediately...\n"));
|
||||
talloc_free(conn);
|
||||
event_add_timed(node->ctdb->ev, node, timeval_current_ofs(1, 0),
|
||||
ctdb_ibw_node_connect_event, node);
|
||||
} break;
|
||||
default:
|
||||
assert(0);
|
||||
@ -94,17 +130,20 @@ int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn)
|
||||
int ctdb_ibw_receive_handler(struct ibw_conn *conn, void *buf, int n)
|
||||
{
|
||||
struct ctdb_context *ctdb = talloc_get_type(conn->ctx->ctx_userdata, struct ctdb_context);
|
||||
void *buf2; /* future TODO: a solution for removal of this */
|
||||
|
||||
assert(ctdb!=NULL);
|
||||
assert(buf!=NULL);
|
||||
assert(conn!=NULL);
|
||||
assert(conn->state==IBWC_CONNECTED);
|
||||
|
||||
/* TODO: shall I short-circuit this in ibwrapper? */
|
||||
/* maybe when everything go fine... */
|
||||
/* so far "buf" is an ib-registered memory area
|
||||
* and being reused for next receive
|
||||
* noticed that HL requires talloc-ed memory to be stolen */
|
||||
buf2 = talloc_zero_size(conn, n);
|
||||
memcpy(buf2, buf, n);
|
||||
|
||||
/* TODO2: !!! here I can provide conn->conn_userdata (with no perf. penalty) -
|
||||
* as struct ctdb_node in case the connection
|
||||
* has been built up by ibw_connect !!! */
|
||||
ctdb->upcalls->recv_pkt(ctdb, (uint8_t *)buf, (uint32_t)n);
|
||||
ctdb->upcalls->recv_pkt(ctdb, (uint8_t *)buf2, (uint32_t)n);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -23,3 +23,8 @@
|
||||
|
||||
int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn);
|
||||
int ctdb_ibw_receive_handler(struct ibw_conn *conn, void *buf, int n);
|
||||
|
||||
int ctdb_ibw_node_connect(struct ibw_ctx *ictx, struct ctdb_node *node);
|
||||
void ctdb_ibw_node_connect_event(struct event_context *ev, struct timed_event *te,
|
||||
struct timeval t, void *private);
|
||||
|
||||
|
@ -53,24 +53,6 @@ static int ctdb_ibw_listen(struct ctdb_context *ctdb, int backlog)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ctdb_ibw_node_connect(struct ibw_ctx *ictx, struct ctdb_node *node)
|
||||
{
|
||||
struct sockaddr_in sock_out;
|
||||
|
||||
memset(&sock_out, 0, sizeof(struct sockaddr_in));
|
||||
inet_pton(AF_INET, node->address.address, &sock_out.sin_addr);
|
||||
sock_out.sin_port = htons(node->address.port);
|
||||
sock_out.sin_family = PF_INET;
|
||||
|
||||
if (ibw_connect(ictx, &sock_out, node)) {
|
||||
DEBUG(0, ("ctdb_ibw_node_connect: ibw_connect failed\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* continues at ibw_ctdb.c/IBWC_CONNECTED in good case */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Start infiniband
|
||||
*/
|
||||
|
@ -775,7 +775,7 @@ static int ibw_wc_recv(struct ibw_conn *conn, struct ibv_wc *wc)
|
||||
if (part->len<=sizeof(uint32_t) && part->to_read==0) {
|
||||
assert(part->len==sizeof(uint32_t));
|
||||
/* set it again now... */
|
||||
part->to_read = ntohl(*((uint32_t *)(part->buf)));
|
||||
part->to_read = *((uint32_t *)(part->buf)); /* TODO: ntohl */
|
||||
if (part->to_read<sizeof(uint32_t)) {
|
||||
sprintf(ibw_lasterr, "got msglen=%u #2\n", part->to_read);
|
||||
goto error;
|
||||
@ -791,7 +791,7 @@ static int ibw_wc_recv(struct ibw_conn *conn, struct ibv_wc *wc)
|
||||
}
|
||||
} else {
|
||||
if (remain>=sizeof(uint32_t)) {
|
||||
uint32_t msglen = ntohl(*(uint32_t *)p);
|
||||
uint32_t msglen = *(uint32_t *)p; /* TODO: ntohl */
|
||||
if (msglen<sizeof(uint32_t)) {
|
||||
sprintf(ibw_lasterr, "got msglen=%u\n", msglen);
|
||||
goto error;
|
||||
@ -1028,7 +1028,8 @@ int ibw_connect(struct ibw_ctx *ctx, struct sockaddr_in *serv_addr, void *conn_u
|
||||
if (rc) {
|
||||
rc = errno;
|
||||
sprintf(ibw_lasterr, "ibw_connect/rdma_create_id error %d\n", rc);
|
||||
return rc;
|
||||
talloc_free(conn);
|
||||
return -1;
|
||||
}
|
||||
DEBUG(10, ("ibw_connect: rdma_create_id succeeded, cm_id=%p\n", pconn->cm_id));
|
||||
|
||||
@ -1196,7 +1197,7 @@ int ibw_send(struct ibw_conn *conn, void *buf, void *key, uint32_t len)
|
||||
int rc;
|
||||
|
||||
assert(len>=sizeof(uint32_t));
|
||||
*((uint32_t *)buf) = htonl(len);
|
||||
assert((*((uint32_t *)buf)==len)); /* TODO: htonl */
|
||||
|
||||
if (len > pctx->opts.recv_bufsize) {
|
||||
struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
|
||||
|
@ -110,6 +110,7 @@ int ibwtest_send_id(struct ibw_conn *conn)
|
||||
}
|
||||
|
||||
/* first sizeof(uint32_t) size bytes are for length */
|
||||
*((uint32_t *)buf) = len;
|
||||
buf[sizeof(uint32_t)] = (char)TESTOP_SEND_ID;
|
||||
strcpy(buf+sizeof(uint32_t)+1, tcx->id);
|
||||
|
||||
@ -137,6 +138,7 @@ int ibwtest_send_test_msg(struct ibwtest_ctx *tcx, struct ibw_conn *conn, const
|
||||
return -1;
|
||||
}
|
||||
|
||||
*((uint32_t *)buf) = len;
|
||||
p = buf;
|
||||
p += sizeof(uint32_t);
|
||||
p[0] = (char)TESTOP_SEND_TEXT;
|
||||
@ -190,6 +192,7 @@ int ibwtest_do_varsize_scenario_conn_size(struct ibwtest_ctx *tcx, struct ibw_co
|
||||
DEBUG(0, ("varsize/ibw_alloc_send_buf failed\n"));
|
||||
return -1;
|
||||
}
|
||||
*((uint32_t *)buf) = len;
|
||||
buf[sizeof(uint32_t)] = TESTOP_SEND_RND;
|
||||
sum = ibwtest_fill_random(buf + sizeof(uint32_t) + 1, size);
|
||||
buf[sizeof(uint32_t) + 1 + size] = sum;
|
||||
@ -329,7 +332,7 @@ int ibwtest_receive_handler(struct ibw_conn *conn, void *buf, int n)
|
||||
DEBUG(0, ("ERROR: checksum mismatch %u!=%u\n",
|
||||
(uint32_t)sum, (uint32_t)((unsigned char *)buf)[n-1]));
|
||||
ibw_stop(tcx->ibwctx);
|
||||
return -3;
|
||||
goto error;
|
||||
}
|
||||
} else {
|
||||
char *buf2;
|
||||
@ -338,12 +341,12 @@ int ibwtest_receive_handler(struct ibw_conn *conn, void *buf, int n)
|
||||
/* bounce message regardless what it is */
|
||||
if (ibw_alloc_send_buf(conn, (void **)&buf2, &key2, n)) {
|
||||
fprintf(stderr, "ibw_alloc_send_buf error #2\n");
|
||||
return -1;
|
||||
goto error;
|
||||
}
|
||||
memcpy(buf2, buf, n);
|
||||
if (ibw_send(conn, buf2, key2, n)) {
|
||||
fprintf(stderr, "ibw_send error #2\n");
|
||||
return -2;
|
||||
goto error;
|
||||
}
|
||||
tcx->nsent++;
|
||||
}
|
||||
@ -368,6 +371,8 @@ int ibwtest_receive_handler(struct ibw_conn *conn, void *buf, int n)
|
||||
tcx->error = rc;
|
||||
|
||||
return rc;
|
||||
error:
|
||||
return -1;
|
||||
}
|
||||
|
||||
void ibwtest_timeout_handler(struct event_context *ev, struct timed_event *te,
|
||||
|
Loading…
x
Reference in New Issue
Block a user