diff options
Diffstat (limited to 'net/tipc')
-rw-r--r-- | net/tipc/bcast.c | 275 | ||||
-rw-r--r-- | net/tipc/bcast.h | 12 | ||||
-rw-r--r-- | net/tipc/core.c | 2 | ||||
-rw-r--r-- | net/tipc/core.h | 3 | ||||
-rw-r--r-- | net/tipc/link.c | 274 | ||||
-rw-r--r-- | net/tipc/msg.h | 62 | ||||
-rw-r--r-- | net/tipc/netlink.c | 26 | ||||
-rw-r--r-- | net/tipc/node.c | 23 | ||||
-rw-r--r-- | net/tipc/node.h | 8 | ||||
-rw-r--r-- | net/tipc/socket.c | 6 | ||||
-rw-r--r-- | net/tipc/udp_media.c | 2 |
11 files changed, 615 insertions, 78 deletions
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index d8026543bf4c..6c997d4a6218 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -54,7 +54,9 @@ const char tipc_bclink_name[] = "broadcast-link"; * @dests: array keeping number of reachable destinations per bearer * @primary_bearer: a bearer having links to all broadcast destinations, if any * @bcast_support: indicates if primary bearer, if any, supports broadcast + * @force_bcast: forces broadcast for multicast traffic * @rcast_support: indicates if all peer nodes support replicast + * @force_rcast: forces replicast for multicast traffic * @rc_ratio: dest count as percentage of cluster size where send method changes * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast */ @@ -64,7 +66,9 @@ struct tipc_bc_base { int dests[MAX_BEARERS]; int primary_bearer; bool bcast_support; + bool force_bcast; bool rcast_support; + bool force_rcast; int rc_ratio; int bc_threshold; }; @@ -216,9 +220,24 @@ static void tipc_bcast_select_xmit_method(struct net *net, int dests, } /* Can current method be changed ? */ method->expires = jiffies + TIPC_METHOD_EXPIRE; - if (method->mandatory || time_before(jiffies, exp)) + if (method->mandatory) return; + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL) && + time_before(jiffies, exp)) + return; + + /* Configuration as force 'broadcast' method */ + if (bb->force_bcast) { + method->rcast = false; + return; + } + /* Configuration as force 'replicast' method */ + if (bb->force_rcast) { + method->rcast = true; + return; + } + /* Configuration as 'autoselect' or default method */ /* Determine method to use now */ method->rcast = dests <= bb->bc_threshold; } @@ -281,6 +300,63 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, return 0; } +/* tipc_mcast_send_sync - deliver a dummy message with SYN bit + * @net: the applicable net namespace + * @skb: socket buffer to copy + * @method: send method to be used + * @dests: destination nodes for message. + * @cong_link_cnt: returns number of encountered congested destination links + * Returns 0 if success, otherwise errno + */ +static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, + struct tipc_mc_method *method, + struct tipc_nlist *dests, + u16 *cong_link_cnt) +{ + struct tipc_msg *hdr, *_hdr; + struct sk_buff_head tmpq; + struct sk_buff *_skb; + + /* Is a cluster supporting with new capabilities ? */ + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL)) + return 0; + + hdr = buf_msg(skb); + if (msg_user(hdr) == MSG_FRAGMENTER) + hdr = msg_get_wrapped(hdr); + if (msg_type(hdr) != TIPC_MCAST_MSG) + return 0; + + /* Allocate dummy message */ + _skb = tipc_buf_acquire(MCAST_H_SIZE, GFP_KERNEL); + if (!_skb) + return -ENOMEM; + + /* Preparing for 'synching' header */ + msg_set_syn(hdr, 1); + + /* Copy skb's header into a dummy header */ + skb_copy_to_linear_data(_skb, hdr, MCAST_H_SIZE); + skb_orphan(_skb); + + /* Reverse method for dummy message */ + _hdr = buf_msg(_skb); + msg_set_size(_hdr, MCAST_H_SIZE); + msg_set_is_rcast(_hdr, !msg_is_rcast(hdr)); + + skb_queue_head_init(&tmpq); + __skb_queue_tail(&tmpq, _skb); + if (method->rcast) + tipc_bcast_xmit(net, &tmpq, cong_link_cnt); + else + tipc_rcast_xmit(net, &tmpq, dests, cong_link_cnt); + + /* This queue should normally be empty by now */ + __skb_queue_purge(&tmpq); + + return 0; +} + /* tipc_mcast_xmit - deliver message to indicated destination nodes * and to identified node local sockets * @net: the applicable net namespace @@ -296,6 +372,9 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, u16 *cong_link_cnt) { struct sk_buff_head inputq, localq; + bool rcast = method->rcast; + struct tipc_msg *hdr; + struct sk_buff *skb; int rc = 0; skb_queue_head_init(&inputq); @@ -309,6 +388,18 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, /* Send according to determined transmit method */ if (dests->remote) { tipc_bcast_select_xmit_method(net, dests->remote, method); + + skb = skb_peek(pkts); + hdr = buf_msg(skb); + if (msg_user(hdr) == MSG_FRAGMENTER) + hdr = msg_get_wrapped(hdr); + msg_set_is_rcast(hdr, method->rcast); + + /* Switch method ? */ + if (rcast != method->rcast) + tipc_mcast_send_sync(net, skb, method, + dests, cong_link_cnt); + if (method->rcast) rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt); else @@ -485,10 +576,63 @@ static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit) return 0; } +static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + switch (bc_mode) { + case BCLINK_MODE_BCAST: + if (!bb->bcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = true; + bb->force_rcast = false; + break; + case BCLINK_MODE_RCAST: + if (!bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = true; + break; + case BCLINK_MODE_SEL: + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = false; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + if (bc_ratio > 100 || bc_ratio <= 0) + return -EINVAL; + + bb->rc_ratio = bc_ratio; + tipc_bcast_lock(net); + tipc_bcbase_calc_bc_threshold(net); + tipc_bcast_unlock(net); + + return 0; +} + int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) { int err; u32 win; + u32 bc_mode; + u32 bc_ratio; struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; if (!attrs[TIPC_NLA_LINK_PROP]) @@ -498,12 +642,28 @@ int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) if (err) return err; - if (!props[TIPC_NLA_PROP_WIN]) + if (!props[TIPC_NLA_PROP_WIN] && + !props[TIPC_NLA_PROP_BROADCAST] && + !props[TIPC_NLA_PROP_BROADCAST_RATIO]) { return -EOPNOTSUPP; + } + + if (props[TIPC_NLA_PROP_BROADCAST]) { + bc_mode = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST]); + err = tipc_bc_link_set_broadcast_mode(net, bc_mode); + } + + if (!err && props[TIPC_NLA_PROP_BROADCAST_RATIO]) { + bc_ratio = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST_RATIO]); + err = tipc_bc_link_set_broadcast_ratio(net, bc_ratio); + } - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (!err && props[TIPC_NLA_PROP_WIN]) { + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + err = tipc_bc_link_set_queue_limits(net, win); + } - return tipc_bc_link_set_queue_limits(net, win); + return err; } int tipc_bcast_init(struct net *net) @@ -529,7 +689,7 @@ int tipc_bcast_init(struct net *net) goto enomem; bb->link = l; tn->bcl = l; - bb->rc_ratio = 25; + bb->rc_ratio = 10; bb->rcast_support = true; return 0; enomem: @@ -576,3 +736,108 @@ void tipc_nlist_purge(struct tipc_nlist *nl) nl->remote = 0; nl->local = false; } + +u32 tipc_bcast_get_broadcast_mode(struct net *net) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + if (bb->force_bcast) + return BCLINK_MODE_BCAST; + + if (bb->force_rcast) + return BCLINK_MODE_RCAST; + + if (bb->bcast_support && bb->rcast_support) + return BCLINK_MODE_SEL; + + return 0; +} + +u32 tipc_bcast_get_broadcast_ratio(struct net *net) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + return bb->rc_ratio; +} + +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, + struct sk_buff_head *inputq) +{ + struct sk_buff *skb, *_skb, *tmp; + struct tipc_msg *hdr, *_hdr; + bool match = false; + u32 node, port; + + skb = skb_peek(inputq); + if (!skb) + return; + + hdr = buf_msg(skb); + + if (likely(!msg_is_syn(hdr) && skb_queue_empty(defq))) + return; + + node = msg_orignode(hdr); + if (node == tipc_own_addr(net)) + return; + + port = msg_origport(hdr); + + /* Has the twin SYN message already arrived ? */ + skb_queue_walk(defq, _skb) { + _hdr = buf_msg(_skb); + if (msg_orignode(_hdr) != node) + continue; + if (msg_origport(_hdr) != port) + continue; + match = true; + break; + } + + if (!match) { + if (!msg_is_syn(hdr)) + return; + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; + } + + /* Deliver non-SYN message from other link, otherwise queue it */ + if (!msg_is_syn(hdr)) { + if (msg_is_rcast(hdr) != msg_is_rcast(_hdr)) + return; + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; + } + + /* Queue non-SYN/SYN message from same link */ + if (msg_is_rcast(hdr) == msg_is_rcast(_hdr)) { + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; + } + + /* Matching SYN messages => return the one with data, if any */ + __skb_unlink(_skb, defq); + if (msg_data_sz(hdr)) { + kfree_skb(_skb); + } else { + __skb_dequeue(inputq); + kfree_skb(skb); + __skb_queue_tail(inputq, _skb); + } + + /* Deliver subsequent non-SYN messages from same peer */ + skb_queue_walk_safe(defq, _skb, tmp) { + _hdr = buf_msg(_skb); + if (msg_orignode(_hdr) != node) + continue; + if (msg_origport(_hdr) != port) + continue; + if (msg_is_syn(_hdr)) + break; + __skb_unlink(_skb, defq); + __skb_queue_tail(inputq, _skb); + } +} diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 751530ab0c49..dadad953e2be 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -48,6 +48,10 @@ extern const char tipc_bclink_name[]; #define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000) +#define BCLINK_MODE_BCAST 0x1 +#define BCLINK_MODE_RCAST 0x2 +#define BCLINK_MODE_SEL 0x4 + struct tipc_nlist { struct list_head list; u32 self; @@ -63,11 +67,13 @@ void tipc_nlist_del(struct tipc_nlist *nl, u32 node); /* Cookie to be used between socket and broadcast layer * @rcast: replicast (instead of broadcast) was used at previous xmit * @mandatory: broadcast/replicast indication was set by user + * @deferredq: defer queue to make message in order * @expires: re-evaluate non-mandatory transmit method if we are past this */ struct tipc_mc_method { bool rcast; bool mandatory; + struct sk_buff_head deferredq; unsigned long expires; }; @@ -92,6 +98,12 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); int tipc_bclink_reset_stats(struct net *net); +u32 tipc_bcast_get_broadcast_mode(struct net *net); +u32 tipc_bcast_get_broadcast_ratio(struct net *net); + +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, + struct sk_buff_head *inputq); + static inline void tipc_bcast_lock(struct net *net) { spin_lock_bh(&tipc_net(net)->bclock); diff --git a/net/tipc/core.c b/net/tipc/core.c index 5b38f5164281..27cccd101ef6 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -43,6 +43,7 @@ #include "net.h" #include "socket.h" #include "bcast.h" +#include "node.h" #include <linux/module.h> @@ -59,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net) tn->node_addr = 0; tn->trial_addr = 0; tn->addr_trial_end = 0; + tn->capabilities = TIPC_NODE_CAPABILITIES; memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; diff --git a/net/tipc/core.h b/net/tipc/core.h index 8020a6c360ff..7a68e1b6a066 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -122,6 +122,9 @@ struct tipc_net { /* Topology subscription server */ struct tipc_topsrv *topsrv; atomic_t subscription_count; + + /* Cluster capabilities */ + u16 capabilities; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/link.c b/net/tipc/link.c index 131aa2f0fd27..6053489c8063 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -151,6 +151,7 @@ struct tipc_link { /* Failover/synch */ u16 drop_point; struct sk_buff *failover_reasm_skb; + struct sk_buff_head failover_deferdq; /* Max packet negotiation */ u16 mtu; @@ -209,6 +210,7 @@ enum { }; #define TIPC_BC_RETR_LIM msecs_to_jiffies(10) /* [ms] */ +#define TIPC_UC_RETR_TIME (jiffies + msecs_to_jiffies(1)) /* * Interval between NACKs when packets arrive out of order @@ -246,6 +248,10 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, static void tipc_link_build_bc_init_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static bool tipc_link_release_pkts(struct tipc_link *l, u16 to); +static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data); +static void tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq); /* * Simple non-static link routines (i.e. referenced outside this file) @@ -493,6 +499,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, __skb_queue_head_init(&l->transmq); __skb_queue_head_init(&l->backlogq); __skb_queue_head_init(&l->deferdq); + __skb_queue_head_init(&l->failover_deferdq); skb_queue_head_init(&l->wakeupq); skb_queue_head_init(l->inputq); return true; @@ -885,6 +892,7 @@ void tipc_link_reset(struct tipc_link *l) __skb_queue_purge(&l->transmq); __skb_queue_purge(&l->deferdq); __skb_queue_purge(&l->backlogq); + __skb_queue_purge(&l->failover_deferdq); l->backlog[TIPC_LOW_IMPORTANCE].len = 0; l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; @@ -1156,34 +1164,14 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, * Consumes buffer */ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, - struct sk_buff_head *inputq) + struct sk_buff_head *inputq, + struct sk_buff **reasm_skb) { struct tipc_msg *hdr = buf_msg(skb); - struct sk_buff **reasm_skb = &l->reasm_buf; struct sk_buff *iskb; struct sk_buff_head tmpq; int usr = msg_user(hdr); - int rc = 0; int pos = 0; - int ipos = 0; - - if (unlikely(usr == TUNNEL_PROTOCOL)) { - if (msg_type(hdr) == SYNCH_MSG) { - __skb_queue_purge(&l->deferdq); - goto drop; - } - if (!tipc_msg_extract(skb, &iskb, &ipos)) - return rc; - kfree_skb(skb); - skb = iskb; - hdr = buf_msg(skb); - if (less(msg_seqno(hdr), l->drop_point)) - goto drop; - if (tipc_data_input(l, skb, inputq)) - return rc; - usr = msg_user(hdr); - reasm_skb = &l->failover_reasm_skb; - } if (usr == MSG_BUNDLER) { skb_queue_head_init(&tmpq); @@ -1208,11 +1196,66 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, tipc_link_bc_init_rcv(l->bc_rcvlink, hdr); tipc_bcast_unlock(l->net); } -drop: + kfree_skb(skb); return 0; } +/* tipc_link_tnl_rcv() - receive TUNNEL_PROTOCOL message, drop or process the + * inner message along with the ones in the old link's + * deferdq + * @l: tunnel link + * @skb: TUNNEL_PROTOCOL message + * @inputq: queue to put messages ready for delivery + */ +static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *inputq) +{ + struct sk_buff **reasm_skb = &l->failover_reasm_skb; + struct sk_buff_head *fdefq = &l->failover_deferdq; + struct tipc_msg *hdr = buf_msg(skb); + struct sk_buff *iskb; + int ipos = 0; + int rc = 0; + u16 seqno; + + /* SYNCH_MSG */ + if (msg_type(hdr) == SYNCH_MSG) + goto drop; + + /* FAILOVER_MSG */ + if (!tipc_msg_extract(skb, &iskb, &ipos)) { + pr_warn_ratelimited("Cannot extract FAILOVER_MSG, defq: %d\n", + skb_queue_len(fdefq)); + return rc; + } + + do { + seqno = buf_seqno(iskb); + + if (unlikely(less(seqno, l->drop_point))) { + kfree_skb(iskb); + continue; + } + + if (unlikely(seqno != l->drop_point)) { + __tipc_skb_queue_sorted(fdefq, seqno, iskb); + continue; + } + + l->drop_point++; + + if (!tipc_data_input(l, iskb, inputq)) + rc |= tipc_link_input(l, iskb, inputq, reasm_skb); + if (unlikely(rc)) + break; + } while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point))); + +drop: + kfree_skb(skb); + return rc; +} + static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) { bool released = false; @@ -1228,6 +1271,106 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) return released; } +/* tipc_build_gap_ack_blks - build Gap ACK blocks + * @l: tipc link that data have come with gaps in sequence if any + * @data: data buffer to store the Gap ACK blocks after built + * + * returns the actual allocated memory size + */ +static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data) +{ + struct sk_buff *skb = skb_peek(&l->deferdq); + struct tipc_gap_ack_blks *ga = data; + u16 len, expect, seqno = 0; + u8 n = 0; + + if (!skb) + goto exit; + + expect = buf_seqno(skb); + skb_queue_walk(&l->deferdq, skb) { + seqno = buf_seqno(skb); + if (unlikely(more(seqno, expect))) { + ga->gacks[n].ack = htons(expect - 1); + ga->gacks[n].gap = htons(seqno - expect); + if (++n >= MAX_GAP_ACK_BLKS) { + pr_info_ratelimited("Too few Gap ACK blocks!\n"); + goto exit; + } + } else if (unlikely(less(seqno, expect))) { + pr_warn("Unexpected skb in deferdq!\n"); + continue; + } + expect = seqno + 1; + } + + /* last block */ + ga->gacks[n].ack = htons(seqno); + ga->gacks[n].gap = 0; + n++; + +exit: + len = tipc_gap_ack_blks_sz(n); + ga->len = htons(len); + ga->gack_cnt = n; + return len; +} + +/* tipc_link_advance_transmq - advance TIPC link transmq queue by releasing + * acked packets, also doing retransmissions if + * gaps found + * @l: tipc link with transmq queue to be advanced + * @acked: seqno of last packet acked by peer without any gaps before + * @gap: # of gap packets + * @ga: buffer pointer to Gap ACK blocks from peer + * @xmitq: queue for accumulating the retransmitted packets if any + */ +static void tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb, *_skb, *tmp; + struct tipc_msg *hdr; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + u16 ack = l->rcv_nxt - 1; + u16 seqno; + u16 n = 0; + + skb_queue_walk_safe(&l->transmq, skb, tmp) { + seqno = buf_seqno(skb); + +next_gap_ack: + if (less_eq(seqno, acked)) { + /* release skb */ + __skb_unlink(skb, &l->transmq); + kfree_skb(skb); + } else if (less_eq(seqno, acked + gap)) { + /* retransmit skb */ + if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) + continue; + TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; + + _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); + if (!_skb) + continue; + hdr = buf_msg(_skb); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); + _skb->priority = TC_PRIO_CONTROL; + __skb_queue_tail(xmitq, _skb); + l->stats.retransmitted++; + } else { + /* retry with Gap ACK blocks if any */ + if (!ga || n >= ga->gack_cnt) + break; + acked = ntohs(ga->gacks[n].ack); + gap = ntohs(ga->gacks[n].gap); + n++; + goto next_gap_ack; + } + } +} + /* tipc_link_build_state_msg: prepare link state message for transmission * * Note that sending of broadcast ack is coordinated among nodes, to reduce @@ -1282,6 +1425,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { u32 def_cnt = ++l->stats.deferred_recv; + u32 defq_len = skb_queue_len(&l->deferdq); int match1, match2; if (link_is_bc_rcvlink(l)) { @@ -1292,7 +1436,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, return 0; } - if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) + if (defq_len >= 3 && !((defq_len - 3) % 16)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); return 0; } @@ -1306,29 +1450,29 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct sk_buff_head *defq = &l->deferdq; - struct tipc_msg *hdr; + struct tipc_msg *hdr = buf_msg(skb); u16 seqno, rcv_nxt, win_lim; int rc = 0; + /* Verify and update link state */ + if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) + return tipc_link_proto_rcv(l, skb, xmitq); + + /* Don't send probe at next timeout expiration */ + l->silent_intv_cnt = 0; + do { hdr = buf_msg(skb); seqno = msg_seqno(hdr); rcv_nxt = l->rcv_nxt; win_lim = rcv_nxt + TIPC_MAX_LINK_WIN; - /* Verify and update link state */ - if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) - return tipc_link_proto_rcv(l, skb, xmitq); - if (unlikely(!link_is_up(l))) { if (l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; goto drop; } - /* Don't send probe at next timeout expiration */ - l->silent_intv_cnt = 0; - /* Drop if outside receive window */ if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) { l->stats.duplicates++; @@ -1353,13 +1497,16 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, /* Deliver packet */ l->rcv_nxt++; l->stats.recv_pkts++; - if (!tipc_data_input(l, skb, l->inputq)) - rc |= tipc_link_input(l, skb, l->inputq); + + if (unlikely(msg_user(hdr) == TUNNEL_PROTOCOL)) + rc |= tipc_link_tnl_rcv(l, skb, l->inputq); + else if (!tipc_data_input(l, skb, l->inputq)) + rc |= tipc_link_input(l, skb, l->inputq, &l->reasm_buf); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) rc |= tipc_link_build_state_msg(l, xmitq); if (unlikely(rc & ~TIPC_LINK_SND_STATE)) break; - } while ((skb = __skb_dequeue(defq))); + } while ((skb = __tipc_skb_dequeue(defq, l->rcv_nxt))); return rc; drop: @@ -1380,6 +1527,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, struct tipc_mon_state *mstate = &l->mon_state; int dlen = 0; void *data; + u16 glen = 0; /* Don't send protocol message during reset or link failover */ if (tipc_link_is_blocked(l)) @@ -1392,8 +1540,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, - tipc_max_domain_size, l->addr, - tipc_own_addr(l->net), 0, 0, 0); + tipc_max_domain_size + MAX_GAP_ACK_BLKS_SZ, + l->addr, tipc_own_addr(l->net), 0, 0, 0); if (!skb) return; @@ -1420,9 +1568,11 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); msg_set_probe(hdr, probe); msg_set_is_keepalive(hdr, probe || probe_reply); - tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); - msg_set_size(hdr, INT_H_SIZE + dlen); - skb_trim(skb, INT_H_SIZE + dlen); + if (l->peer_caps & TIPC_GAP_ACK_BLOCK) + glen = tipc_build_gap_ack_blks(l, data); + tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id); + msg_set_size(hdr, INT_H_SIZE + glen + dlen); + skb_trim(skb, INT_H_SIZE + glen + dlen); l->stats.sent_states++; l->rcv_unacked = 0; } else { @@ -1481,6 +1631,7 @@ void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq) { + struct sk_buff_head *fdefq = &tnl->failover_deferdq; struct sk_buff *skb, *tnlskb; struct tipc_msg *hdr, tnlhdr; struct sk_buff_head *queue = &l->transmq; @@ -1508,7 +1659,11 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, /* Initialize reusable tunnel packet header */ tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL, mtyp, INT_H_SIZE, l->addr); - pktcnt = skb_queue_len(&l->transmq) + skb_queue_len(&l->backlogq); + if (mtyp == SYNCH_MSG) + pktcnt = l->snd_nxt - buf_seqno(skb_peek(&l->transmq)); + else + pktcnt = skb_queue_len(&l->transmq); + pktcnt += skb_queue_len(&l->backlogq); msg_set_msgcnt(&tnlhdr, pktcnt); msg_set_bearer_id(&tnlhdr, l->peer_bearer_id); tnl: @@ -1539,6 +1694,14 @@ tnl: tnl->drop_point = l->rcv_nxt; tnl->failover_reasm_skb = l->reasm_buf; l->reasm_buf = NULL; + + /* Failover the link's deferdq */ + if (unlikely(!skb_queue_empty(fdefq))) { + pr_warn("Link failover deferdq not empty: %d!\n", + skb_queue_len(fdefq)); + __skb_queue_purge(fdefq); + } + skb_queue_splice_init(&l->deferdq, fdefq); } } @@ -1592,6 +1755,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); + struct tipc_gap_ack_blks *ga = NULL; u16 rcvgap = 0; u16 ack = msg_ack(hdr); u16 gap = msg_seq_gap(hdr); @@ -1602,6 +1766,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, u16 dlen = msg_data_sz(hdr); int mtyp = msg_type(hdr); bool reply = msg_probe(hdr); + u16 glen = 0; void *data; char *if_name; int rc = 0; @@ -1699,7 +1864,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, rc = TIPC_LINK_UP_EVT; break; } - tipc_mon_rcv(l->net, data, dlen, l->addr, + + /* Receive Gap ACK blocks from peer if any */ + if (l->peer_caps & TIPC_GAP_ACK_BLOCK) { + ga = (struct tipc_gap_ack_blks *)data; + glen = ntohs(ga->len); + /* sanity check: if failed, ignore Gap ACK blocks */ + if (glen != tipc_gap_ack_blks_sz(ga->gack_cnt)) + ga = NULL; + } + + tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr, &l->mon_state, l->bearer_id); /* Send NACK if peer has sent pkts we haven't received yet */ @@ -1708,13 +1883,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (rcvgap || reply) tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, rcvgap, 0, 0, xmitq); - tipc_link_release_pkts(l, ack); + + tipc_link_advance_transmq(l, ack, gap, ga, xmitq); /* If NACK, retransmit will now start at right position */ - if (gap) { - rc = tipc_link_retrans(l, l, ack + 1, ack + gap, xmitq); + if (gap) l->stats.recv_nacks++; - } tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(&l->wakeupq))) @@ -2199,6 +2373,8 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) struct nlattr *attrs; struct nlattr *prop; struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 bc_mode = tipc_bcast_get_broadcast_mode(net); + u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net); struct tipc_link *bcl = tn->bcl; if (!bcl) @@ -2235,6 +2411,12 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode)) + goto prop_msg_full; + if (bc_mode & BCLINK_MODE_SEL) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST_RATIO, + bc_ratio)) + goto prop_msg_full; nla_nest_end(msg->skb, prop); err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d7e4b8b93f9d..8de02ad6e352 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -117,6 +117,37 @@ struct tipc_msg { __be32 hdr[15]; }; +/* struct tipc_gap_ack - TIPC Gap ACK block + * @ack: seqno of the last consecutive packet in link deferdq + * @gap: number of gap packets since the last ack + * + * E.g: + * link deferdq: 1 2 3 4 10 11 13 14 15 20 + * --> Gap ACK blocks: <4, 5>, <11, 1>, <15, 4>, <20, 0> + */ +struct tipc_gap_ack { + __be16 ack; + __be16 gap; +}; + +/* struct tipc_gap_ack_blks + * @len: actual length of the record + * @gack_cnt: number of Gap ACK blocks in the record + * @gacks: array of Gap ACK blocks + */ +struct tipc_gap_ack_blks { + __be16 len; + u8 gack_cnt; + u8 reserved; + struct tipc_gap_ack gacks[]; +}; + +#define tipc_gap_ack_blks_sz(n) (sizeof(struct tipc_gap_ack_blks) + \ + sizeof(struct tipc_gap_ack) * (n)) + +#define MAX_GAP_ACK_BLKS 32 +#define MAX_GAP_ACK_BLKS_SZ tipc_gap_ack_blks_sz(MAX_GAP_ACK_BLKS) + static inline struct tipc_msg *buf_msg(struct sk_buff *skb) { return (struct tipc_msg *)skb->data; @@ -257,6 +288,16 @@ static inline void msg_set_src_droppable(struct tipc_msg *m, u32 d) msg_set_bits(m, 0, 18, 1, d); } +static inline bool msg_is_rcast(struct tipc_msg *m) +{ + return msg_bits(m, 0, 18, 0x1); +} + +static inline void msg_set_is_rcast(struct tipc_msg *m, bool d) +{ + msg_set_bits(m, 0, 18, 0x1, d); +} + static inline void msg_set_size(struct tipc_msg *m, u32 sz) { m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz); @@ -1110,4 +1151,25 @@ static inline void tipc_skb_queue_splice_tail_init(struct sk_buff_head *list, tipc_skb_queue_splice_tail(&tmp, head); } +/* __tipc_skb_dequeue() - dequeue the head skb according to expected seqno + * @list: list to be dequeued from + * @seqno: seqno of the expected msg + * + * returns skb dequeued from the list if its seqno is less than or equal to + * the expected one, otherwise the skb is still hold + * + * Note: must be used with appropriate locks held only + */ +static inline struct sk_buff *__tipc_skb_dequeue(struct sk_buff_head *list, + u16 seqno) +{ + struct sk_buff *skb = skb_peek(list); + + if (skb && less_eq(buf_seqno(skb), seqno)) { + __skb_unlink(skb, list); + return skb; + } + return NULL; +} + #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 99ee419210ba..2d178df0a89f 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -110,7 +110,9 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 }, [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 }, - [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 } + [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_BROADCAST] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_BROADCAST_RATIO] = { .type = NLA_U32 } }; const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = { @@ -142,114 +144,93 @@ static const struct genl_ops tipc_genl_v2_ops[] = { { .cmd = TIPC_NL_BEARER_DISABLE, .doit = tipc_nl_bearer_disable, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_ENABLE, .doit = tipc_nl_bearer_enable, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_GET, .doit = tipc_nl_bearer_get, .dumpit = tipc_nl_bearer_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_ADD, .doit = tipc_nl_bearer_add, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_SET, .doit = tipc_nl_bearer_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_SOCK_GET, .start = tipc_dump_start, .dumpit = tipc_nl_sk_dump, .done = tipc_dump_done, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_PUBL_GET, .dumpit = tipc_nl_publ_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_GET, .doit = tipc_nl_node_get_link, .dumpit = tipc_nl_node_dump_link, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_SET, .doit = tipc_nl_node_set_link, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_RESET_STATS, .doit = tipc_nl_node_reset_link_stats, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MEDIA_GET, .doit = tipc_nl_media_get, .dumpit = tipc_nl_media_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MEDIA_SET, .doit = tipc_nl_media_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NODE_GET, .dumpit = tipc_nl_node_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NET_GET, .dumpit = tipc_nl_net_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NET_SET, .doit = tipc_nl_net_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NAME_TABLE_GET, .dumpit = tipc_nl_name_table_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_SET, .doit = tipc_nl_node_set_monitor, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_GET, .doit = tipc_nl_node_get_monitor, .dumpit = tipc_nl_node_dump_monitor, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_PEER_GET, .dumpit = tipc_nl_node_dump_monitor_peer, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_PEER_REMOVE, .doit = tipc_nl_peer_rm, - .policy = tipc_nl_policy, }, #ifdef CONFIG_TIPC_MEDIA_UDP { .cmd = TIPC_NL_UDP_GET_REMOTEIP, .dumpit = tipc_udp_nl_dump_remoteip, - .policy = tipc_nl_policy, }, #endif }; @@ -259,6 +240,7 @@ struct genl_family tipc_genl_family __ro_after_init = { .version = TIPC_GENL_V2_VERSION, .hdrsize = 0, .maxattr = TIPC_NLA_MAX, + .policy = tipc_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = tipc_genl_v2_ops, diff --git a/net/tipc/node.c b/net/tipc/node.c index dd3b6dc17662..7478e2d4ec02 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -375,14 +375,20 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, if (n->capabilities == capabilities) goto exit; /* Same node may come back with new capabilities */ - write_lock_bh(&n->lock); + tipc_node_write_lock(n); n->capabilities = capabilities; for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { l = n->links[bearer_id].link; if (l) tipc_link_update_caps(l, capabilities); } - write_unlock_bh(&n->lock); + tipc_node_write_unlock_fast(n); + + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } goto exit; } n = kzalloc(sizeof(*n), GFP_ATOMIC); @@ -433,6 +439,11 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, break; } list_add_tail_rcu(&n->list, &temp_node->list); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } trace_tipc_node_create(n, true, " "); exit: spin_unlock_bh(&tn->node_list_lock); @@ -589,6 +600,7 @@ static void tipc_node_clear_links(struct tipc_node *node) */ static bool tipc_node_cleanup(struct tipc_node *peer) { + struct tipc_node *temp_node; struct tipc_net *tn = tipc_net(peer->net); bool deleted = false; @@ -604,6 +616,13 @@ static bool tipc_node_cleanup(struct tipc_node *peer) deleted = true; } tipc_node_write_unlock(peer); + + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } + spin_unlock_bh(&tn->node_list_lock); return deleted; } diff --git a/net/tipc/node.h b/net/tipc/node.h index 4f59a30e989a..c0bf49ea3de4 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -51,7 +51,9 @@ enum { TIPC_BLOCK_FLOWCTL = (1 << 3), TIPC_BCAST_RCAST = (1 << 4), TIPC_NODE_ID128 = (1 << 5), - TIPC_LINK_PROTO_SEQNO = (1 << 6) + TIPC_LINK_PROTO_SEQNO = (1 << 6), + TIPC_MCAST_RBCTL = (1 << 7), + TIPC_GAP_ACK_BLOCK = (1 << 8) }; #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \ @@ -60,7 +62,9 @@ enum { TIPC_BCAST_RCAST | \ TIPC_BLOCK_FLOWCTL | \ TIPC_NODE_ID128 | \ - TIPC_LINK_PROTO_SEQNO) + TIPC_LINK_PROTO_SEQNO | \ + TIPC_MCAST_RBCTL | \ + TIPC_GAP_ACK_BLOCK) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b542f14ed444..8ac8ddf1e324 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -485,6 +485,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) tsk_set_unreliable(tsk, true); + __skb_queue_head_init(&tsk->mc_method.deferredq); } trace_tipc_sk_create(sk, NULL, TIPC_DUMP_NONE, " "); @@ -582,6 +583,7 @@ static int tipc_release(struct socket *sock) sk->sk_shutdown = SHUTDOWN_MASK; tipc_sk_leave(tsk); tipc_sk_withdraw(tsk, 0, NULL); + __skb_queue_purge(&tsk->mc_method.deferredq); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); @@ -2149,6 +2151,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, struct tipc_msg *hdr = buf_msg(skb); struct net *net = sock_net(sk); struct sk_buff_head inputq; + int mtyp = msg_type(hdr); int limit, err = TIPC_OK; trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " "); @@ -2162,6 +2165,9 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, if (unlikely(grp)) tipc_group_filter_msg(grp, &inputq, xmitq); + if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG) + tipc_mcast_filter_msg(net, &tsk->mc_method.deferredq, &inputq); + /* Validate and add to receive buffer if there is space */ while ((skb = __skb_dequeue(&inputq))) { hdr = buf_msg(skb); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 4d85d71f16e2..6f166fbbfff1 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -44,7 +44,7 @@ #include <net/sock.h> #include <net/ip.h> #include <net/udp_tunnel.h> -#include <net/addrconf.h> +#include <net/ipv6_stubs.h> #include <linux/tipc_netlink.h> #include "core.h" #include "addr.h" |