tipc: fix failover problem
We see the following scenario: 1) Link endpoint B on node 1 discovers that its peer endpoint is gone. Since there is a second working link, failover procedure is started. 2) Link endpoint A on node 1 sends a FAILOVER message to peer endpoint A on node 2. The node item 1->2 goes to state FAILINGOVER. 3) Linke endpoint A/2 receives the failover, and is supposed to take down its parallell link endpoint B/2, while producing a FAILOVER message to send back to A/1. 4) However, B/2 has already been deleted, so no FAILOVER message can created. 5) Node 1->2 remains in state FAILINGOVER forever, refusing to receive any messages that can bring B/1 up again. We are left with a non- redundant link between node 1 and 2. We fix this with letting endpoint A/2 build a dummy FAILOVER message to send to back to A/1, so that the situation can be resolved. Signed-off-by: LUU Duc Canh <canh.d.luu@dektech.com.au> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
418b9a353a
commit
c140eb166d
@ -410,6 +410,11 @@ char *tipc_link_name(struct tipc_link *l)
|
||||
return l->name;
|
||||
}
|
||||
|
||||
u32 tipc_link_state(struct tipc_link *l)
|
||||
{
|
||||
return l->state;
|
||||
}
|
||||
|
||||
/**
|
||||
* tipc_link_create - create a new link
|
||||
* @n: pointer to associated node
|
||||
@ -1385,6 +1390,36 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
|
||||
__skb_queue_tail(xmitq, skb);
|
||||
}
|
||||
|
||||
void tipc_link_create_dummy_tnl_msg(struct tipc_link *l,
|
||||
struct sk_buff_head *xmitq)
|
||||
{
|
||||
u32 onode = tipc_own_addr(l->net);
|
||||
struct tipc_msg *hdr, *ihdr;
|
||||
struct sk_buff_head tnlq;
|
||||
struct sk_buff *skb;
|
||||
u32 dnode = l->addr;
|
||||
|
||||
skb_queue_head_init(&tnlq);
|
||||
skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG,
|
||||
INT_H_SIZE, BASIC_H_SIZE,
|
||||
dnode, onode, 0, 0, 0);
|
||||
if (!skb) {
|
||||
pr_warn("%sunable to create tunnel packet\n", link_co_err);
|
||||
return;
|
||||
}
|
||||
|
||||
hdr = buf_msg(skb);
|
||||
msg_set_msgcnt(hdr, 1);
|
||||
msg_set_bearer_id(hdr, l->peer_bearer_id);
|
||||
|
||||
ihdr = (struct tipc_msg *)msg_data(hdr);
|
||||
tipc_msg_init(onode, ihdr, TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
|
||||
BASIC_H_SIZE, dnode);
|
||||
msg_set_errcode(ihdr, TIPC_ERR_NO_PORT);
|
||||
__skb_queue_tail(&tnlq, skb);
|
||||
tipc_link_xmit(l, &tnlq, xmitq);
|
||||
}
|
||||
|
||||
/* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets
|
||||
* with contents of the link's transmit and backlog queues.
|
||||
*/
|
||||
|
@ -88,6 +88,8 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
|
||||
struct tipc_link **link);
|
||||
void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
|
||||
int mtyp, struct sk_buff_head *xmitq);
|
||||
void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl,
|
||||
struct sk_buff_head *xmitq);
|
||||
void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
|
||||
int tipc_link_fsm_evt(struct tipc_link *l, int evt);
|
||||
bool tipc_link_is_up(struct tipc_link *l);
|
||||
@ -107,6 +109,7 @@ u16 tipc_link_rcv_nxt(struct tipc_link *l);
|
||||
u16 tipc_link_acked(struct tipc_link *l);
|
||||
u32 tipc_link_id(struct tipc_link *l);
|
||||
char *tipc_link_name(struct tipc_link *l);
|
||||
u32 tipc_link_state(struct tipc_link *l);
|
||||
char tipc_link_plane(struct tipc_link *l);
|
||||
int tipc_link_prio(struct tipc_link *l);
|
||||
int tipc_link_window(struct tipc_link *l);
|
||||
|
@ -111,6 +111,7 @@ struct tipc_node {
|
||||
int action_flags;
|
||||
struct list_head list;
|
||||
int state;
|
||||
bool failover_sent;
|
||||
u16 sync_point;
|
||||
int link_cnt;
|
||||
u16 working_links;
|
||||
@ -680,6 +681,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
|
||||
*slot0 = bearer_id;
|
||||
*slot1 = bearer_id;
|
||||
tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT);
|
||||
n->failover_sent = false;
|
||||
n->action_flags |= TIPC_NOTIFY_NODE_UP;
|
||||
tipc_link_set_active(nl, true);
|
||||
tipc_bcast_add_peer(n->net, nl, xmitq);
|
||||
@ -1615,6 +1617,15 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
|
||||
tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl),
|
||||
tipc_link_inputq(l));
|
||||
}
|
||||
/* If parallel link was already down, and this happened before
|
||||
* the tunnel link came up, FAILOVER was never sent. Ensure that
|
||||
* FAILOVER is sent to get peer out of NODE_FAILINGOVER state.
|
||||
*/
|
||||
if (n->state != NODE_FAILINGOVER && !n->failover_sent) {
|
||||
tipc_link_create_dummy_tnl_msg(l, xmitq);
|
||||
n->failover_sent = true;
|
||||
}
|
||||
|
||||
/* If pkts arrive out of order, use lowest calculated syncpt */
|
||||
if (less(syncpt, n->sync_point))
|
||||
n->sync_point = syncpt;
|
||||
|
Loading…
Reference in New Issue
Block a user