diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-11 14:27:06 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-11 14:27:06 -0800 |
commit | 70e71ca0af244f48a5dcf56dc435243792e3a495 (patch) | |
tree | f7d9c4c4d9a857a00043e9bf6aa2d6f533a34778 /net/ipv6 | |
parent | bae41e45b7400496b9bf0c70c6004419d9987819 (diff) | |
parent | 00c83b01d58068dfeb2e1351cca6fccf2a83fa8f (diff) | |
download | linux-next-70e71ca0af244f48a5dcf56dc435243792e3a495.tar.gz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) New offloading infrastructure and example 'rocker' driver for
offloading of switching and routing to hardware.
This work was done by a large group of dedicated individuals, not
limited to: Scott Feldman, Jiri Pirko, Thomas Graf, John Fastabend,
Jamal Hadi Salim, Andy Gospodarek, Florian Fainelli, Roopa Prabhu
2) Start making the networking operate on IOV iterators instead of
modifying iov objects in-situ during transfers. Thanks to Al Viro
and Herbert Xu.
3) A set of new netlink interfaces for the TIPC stack, from Richard
Alpe.
4) Remove unnecessary looping during ipv6 routing lookups, from Martin
KaFai Lau.
5) Add PAUSE frame generation support to gianfar driver, from Matei
Pavaluca.
6) Allow for larger reordering levels in TCP, which are easily
achievable in the real world right now, from Eric Dumazet.
7) Add a variable of napi_schedule that doesn't need to disable cpu
interrupts, from Eric Dumazet.
8) Use a doubly linked list to optimize neigh_parms_release(), from
Nicolas Dichtel.
9) Various enhancements to the kernel BPF verifier, and allow eBPF
programs to actually be attached to sockets. From Alexei
Starovoitov.
10) Support TSO/LSO in sunvnet driver, from David L Stevens.
11) Allow controlling ECN usage via routing metrics, from Florian
Westphal.
12) Remote checksum offload, from Tom Herbert.
13) Add split-header receive, BQL, and xmit_more support to amd-xgbe
driver, from Thomas Lendacky.
14) Add MPLS support to openvswitch, from Simon Horman.
15) Support wildcard tunnel endpoints in ipv6 tunnels, from Steffen
Klassert.
16) Do gro flushes on a per-device basis using a timer, from Eric
Dumazet. This tries to resolve the conflicting goals between the
desired handling of bulk vs. RPC-like traffic.
17) Allow userspace to ask for the CPU upon what a packet was
received/steered, via SO_INCOMING_CPU. From Eric Dumazet.
18) Limit GSO packets to half the current congestion window, from Eric
Dumazet.
19) Add a generic helper so that all drivers set their RSS keys in a
consistent way, from Eric Dumazet.
20) Add xmit_more support to enic driver, from Govindarajulu
Varadarajan.
21) Add VLAN packet scheduler action, from Jiri Pirko.
22) Support configurable RSS hash functions via ethtool, from Eyal
Perry.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1820 commits)
Fix race condition between vxlan_sock_add and vxlan_sock_release
net/macb: fix compilation warning for print_hex_dump() called with skb->mac_header
net/mlx4: Add support for A0 steering
net/mlx4: Refactor QUERY_PORT
net/mlx4_core: Add explicit error message when rule doesn't meet configuration
net/mlx4: Add A0 hybrid steering
net/mlx4: Add mlx4_bitmap zone allocator
net/mlx4: Add a check if there are too many reserved QPs
net/mlx4: Change QP allocation scheme
net/mlx4_core: Use tasklet for user-space CQ completion events
net/mlx4_core: Mask out host side virtualization features for guests
net/mlx4_en: Set csum level for encapsulated packets
be2net: Export tunnel offloads only when a VxLAN tunnel is created
gianfar: Fix dma check map error when DMA_API_DEBUG is enabled
cxgb4/csiostor: Don't use MASTER_MUST for fw_hello call
net: fec: only enable mdio interrupt before phy device link up
net: fec: clear all interrupt events to support i.MX6SX
net: fec: reset fep link status in suspend function
net: sock: fix access via invalid file descriptor
net: introduce helper macro for_each_cmsghdr
...
Diffstat (limited to 'net/ipv6')
35 files changed, 635 insertions, 346 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0169ccf5aa4f..f7c8bbeb27b7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1170,6 +1170,9 @@ enum { IPV6_SADDR_RULE_PRIVACY, IPV6_SADDR_RULE_ORCHID, IPV6_SADDR_RULE_PREFIX, +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + IPV6_SADDR_RULE_NOT_OPTIMISTIC, +#endif IPV6_SADDR_RULE_MAX }; @@ -1197,6 +1200,15 @@ static inline int ipv6_saddr_preferred(int type) return 0; } +static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev) +{ +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic; +#else + return false; +#endif +} + static int ipv6_get_saddr_eval(struct net *net, struct ipv6_saddr_score *score, struct ipv6_saddr_dst *dst, @@ -1257,10 +1269,16 @@ static int ipv6_get_saddr_eval(struct net *net, score->scopedist = ret; break; case IPV6_SADDR_RULE_PREFERRED: + { /* Rule 3: Avoid deprecated and optimistic addresses */ + u8 avoid = IFA_F_DEPRECATED; + + if (!ipv6_use_optimistic_addr(score->ifa->idev)) + avoid |= IFA_F_OPTIMISTIC; ret = ipv6_saddr_preferred(score->addr_type) || - !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)); + !(score->ifa->flags & avoid); break; + } #ifdef CONFIG_IPV6_MIP6 case IPV6_SADDR_RULE_HOA: { @@ -1306,6 +1324,14 @@ static int ipv6_get_saddr_eval(struct net *net, ret = score->ifa->prefix_len; score->matchlen = ret; break; +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + case IPV6_SADDR_RULE_NOT_OPTIMISTIC: + /* Optimistic addresses still have lower precedence than other + * preferred addresses. + */ + ret = !(score->ifa->flags & IFA_F_OPTIMISTIC); + break; +#endif default: ret = 0; } @@ -1385,10 +1411,8 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, if (unlikely(score->addr_type == IPV6_ADDR_ANY || score->addr_type & IPV6_ADDR_MULTICAST)) { - LIMIT_NETDEBUG(KERN_DEBUG - "ADDRCONF: unspecified / multicast address " - "assigned as unicast address on %s", - dev->name); + net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s", + dev->name); continue; } @@ -2315,8 +2339,8 @@ ok: else stored_lft = 0; if (!update_lft && !create && stored_lft) { - const u32 minimum_lft = min( - stored_lft, (u32)MIN_VALID_LIFETIME); + const u32 minimum_lft = min_t(u32, + stored_lft, MIN_VALID_LIFETIME); valid_lft = max(valid_lft, minimum_lft); /* RFC4862 Section 5.5.3e: @@ -2519,7 +2543,8 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, if (!dev) return -ENODEV; - if ((idev = __in6_dev_get(dev)) == NULL) + idev = __in6_dev_get(dev); + if (idev == NULL) return -ENXIO; read_lock_bh(&idev->lock); @@ -2666,7 +2691,8 @@ static void init_loopback(struct net_device *dev) ASSERT_RTNL(); - if ((idev = ipv6_find_idev(dev)) == NULL) { + idev = ipv6_find_idev(dev); + if (idev == NULL) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2789,7 +2815,8 @@ static void addrconf_sit_config(struct net_device *dev) * our v4 addrs in the tunnel */ - if ((idev = ipv6_find_idev(dev)) == NULL) { + idev = ipv6_find_idev(dev); + if (idev == NULL) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2813,7 +2840,8 @@ static void addrconf_gre_config(struct net_device *dev) ASSERT_RTNL(); - if ((idev = ipv6_find_idev(dev)) == NULL) { + idev = ipv6_find_idev(dev); + if (idev == NULL) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -3222,8 +3250,15 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) * Optimistic nodes can start receiving * Frames right away */ - if (ifp->flags & IFA_F_OPTIMISTIC) + if (ifp->flags & IFA_F_OPTIMISTIC) { ip6_ins_rt(ifp->rt); + if (ipv6_use_optimistic_addr(idev)) { + /* Because optimistic nodes can use this address, + * notify listeners. If DAD fails, RTM_DELADDR is sent. + */ + ipv6_ifa_notify(RTM_NEWADDR, ifp); + } + } addrconf_dad_kick(ifp); out: @@ -4330,6 +4365,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; + array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; @@ -5156,6 +5192,14 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, + { + .procname = "use_optimistic", + .data = &ipv6_devconf.use_optimistic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, #endif #ifdef CONFIG_IPV6_MROUTE { @@ -5336,10 +5380,8 @@ static void __net_exit addrconf_exit_net(struct net *net) __addrconf_sysctl_unregister(net->ipv6.devconf_dflt); __addrconf_sysctl_unregister(net->ipv6.devconf_all); #endif - if (!net_eq(net, &init_net)) { - kfree(net->ipv6.devconf_dflt); - kfree(net->ipv6.devconf_all); - } + kfree(net->ipv6.devconf_dflt); + kfree(net->ipv6.devconf_all); } static struct pernet_operations addrconf_ops = { diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 6d16eb0e0c7f..a6727add2624 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -272,10 +272,9 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) ipv6_rearrange_destopt(iph, exthdr.opth); case NEXTHDR_HOP: if (!zero_out_mutable_opts(exthdr.opth)) { - LIMIT_NETDEBUG( - KERN_WARNING "overrun %sopts\n", - nexthdr == NEXTHDR_HOP ? - "hop" : "dest"); + net_dbg_ratelimited("overrun %sopts\n", + nexthdr == NEXTHDR_HOP ? + "hop" : "dest"); return -EINVAL; } break; @@ -354,7 +353,8 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ahp = x->data; ahash = ahp->ahash; - if ((err = skb_cow_data(skb, 0, &trailer)) < 0) + err = skb_cow_data(skb, 0, &trailer); + if (err < 0) goto out; nfrags = err; @@ -560,8 +560,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, ah_hlen)) goto out; - - if ((err = skb_cow_data(skb, 0, &trailer)) < 0) + err = skb_cow_data(skb, 0, &trailer); + if (err < 0) goto out; nfrags = err; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 2cdc38338be3..100c589a2a6c 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } +static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb) +{ + int ifindex = skb->dev ? skb->dev->ifindex : -1; + + if (skb->protocol == htons(ETH_P_IPV6)) + IP6CB(skb)->iif = ifindex; + else + PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex; +} + /* * Handle MSG_ERRQUEUE */ @@ -351,7 +361,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) msg->msg_flags |= MSG_TRUNC; copied = len; } - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free_skb; @@ -388,8 +398,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_port = 0; - if (np->rxopt.all) + if (np->rxopt.all) { + if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && + serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6) + ip6_datagram_prepare_pktinfo_errqueue(skb); ip6_datagram_recv_common_ctl(sk, msg, skb); + } if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) @@ -445,7 +459,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, msg->msg_flags |= MSG_TRUNC; copied = len; } - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free_skb; @@ -491,7 +505,10 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, &src_info.ipi6_addr); } - put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); + + if (src_info.ipi6_ifindex >= 0) + put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, + sizeof(src_info), &src_info); } } @@ -640,7 +657,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, int len; int err = 0; - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + for_each_cmsghdr(cmsg, msg) { int addr_type; if (!CMSG_OK(msg, cmsg)) { @@ -893,8 +910,8 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, break; } default: - LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", - cmsg->cmsg_type); + net_dbg_ratelimited("invalid cmsg type: %d\n", + cmsg->cmsg_type); err = -EINVAL; goto exit_f; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 83fc3a385a26..e48f2c7c5c59 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -286,8 +286,8 @@ static int esp_input_done2(struct sk_buff *skb, int err) err = -EINVAL; padlen = nexthdr[0]; if (padlen + 2 + alen >= elen) { - LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage " - "padlen=%d, elen=%d\n", padlen + 2, elen - alen); + net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n", + padlen + 2, elen - alen); goto out; } @@ -345,7 +345,8 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } - if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) { + nfrags = skb_cow_data(skb, 0, &trailer); + if (nfrags < 0) { ret = -EINVAL; goto out; } @@ -544,12 +545,12 @@ static int esp_init_authenc(struct xfrm_state *x) BUG_ON(!aalg_desc); err = -EINVAL; - if (aalg_desc->uinfo.auth.icv_fullbits/8 != + if (aalg_desc->uinfo.auth.icv_fullbits / 8 != crypto_aead_authsize(aead)) { - NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", - x->aalg->alg_name, - crypto_aead_authsize(aead), - aalg_desc->uinfo.auth.icv_fullbits/8); + pr_info("ESP: %s digestsize %u != %hu\n", + x->aalg->alg_name, + crypto_aead_authsize(aead), + aalg_desc->uinfo.auth.icv_fullbits / 8); goto free_key; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index bfde361b6134..a7bbbe45570b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -47,7 +47,7 @@ #include <net/xfrm.h> #endif -#include <asm/uaccess.h> +#include <linux/uaccess.h> /* * Parsing tlv encoded headers. @@ -184,7 +184,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) int ret; if (opt->dsthao) { - LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n"); + net_dbg_ratelimited("hao duplicated\n"); goto discard; } opt->dsthao = opt->dst1; @@ -193,14 +193,14 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff); if (hao->length != 16) { - LIMIT_NETDEBUG( - KERN_DEBUG "hao invalid option length = %d\n", hao->length); + net_dbg_ratelimited("hao invalid option length = %d\n", + hao->length); goto discard; } if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { - LIMIT_NETDEBUG( - KERN_DEBUG "hao is not an unicast addr: %pI6\n", &hao->addr); + net_dbg_ratelimited("hao is not an unicast addr: %pI6\n", + &hao->addr); goto discard; } @@ -551,8 +551,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra)); return true; } - LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", - nh[optoff + 1]); + net_dbg_ratelimited("ipv6_hop_ra: wrong RA length %d\n", + nh[optoff + 1]); kfree_skb(skb); return false; } @@ -566,8 +566,8 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) u32 pkt_len; if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { - LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", - nh[optoff+1]); + net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", + nh[optoff+1]); IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); goto drop; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 97ae70077a4f..d674152b6ede 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -243,7 +243,8 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *icmp6h; int err = 0; - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + skb = skb_peek(&sk->sk_write_queue); + if (skb == NULL) goto out; icmp6h = icmp6_hdr(skb); @@ -338,7 +339,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, * anycast. */ if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n"); + net_dbg_ratelimited("icmp6_send: acast source\n"); dst_release(dst); return ERR_PTR(-EINVAL); } @@ -452,7 +453,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * and anycast addresses will be checked later. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n"); + net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n"); return; } @@ -460,7 +461,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n"); + net_dbg_ratelimited("icmp6_send: no reply to icmp error\n"); return; } @@ -509,7 +510,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) len = skb->len - msg.offset; len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); if (len < 0) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n"); + net_dbg_ratelimited("icmp: len problem\n"); goto out_dst_release; } @@ -679,6 +680,7 @@ static int icmpv6_rcv(struct sk_buff *skb) const struct in6_addr *saddr, *daddr; struct icmp6hdr *hdr; u8 type; + bool success = false; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); @@ -706,9 +708,8 @@ static int icmpv6_rcv(struct sk_buff *skb) daddr = &ipv6_hdr(skb)->daddr; if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { - LIMIT_NETDEBUG(KERN_DEBUG - "ICMPv6 checksum failed [%pI6c > %pI6c]\n", - saddr, daddr); + net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n", + saddr, daddr); goto csum_error; } @@ -727,7 +728,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - ping_rcv(skb); + success = ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: @@ -781,7 +782,7 @@ static int icmpv6_rcv(struct sk_buff *skb) if (type & ICMPV6_INFOMSG_MASK) break; - LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n"); + net_dbg_ratelimited("icmpv6: msg of unknown type\n"); /* * error of unknown type. @@ -791,7 +792,14 @@ static int icmpv6_rcv(struct sk_buff *skb) icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); } - kfree_skb(skb); + /* until the v6 path can be better sorted assume failure and + * preserve the status quo behaviour for the rest of the paths to here + */ + if (success) + consume_skb(skb); + else + kfree_skb(skb); + return 0; csum_error: @@ -1009,4 +1017,3 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) return table; } #endif - diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 3dd7d4ebd7cd..2f780cba6e12 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -654,7 +654,11 @@ release: goto done; err = -ENOMEM; - if (sfl1 == NULL || (err = mem_check(sk)) != 0) + if (sfl1 == NULL) + goto done; + + err = mem_check(sk); + if (err != 0) goto done; fl1 = fl_intern(net, fl, freq.flr_label); @@ -769,10 +773,9 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v) static int ip6fl_seq_show(struct seq_file *seq, void *v) { struct ip6fl_iter_state *state = ip6fl_seq_private(seq); - if (v == SEQ_START_TOKEN) - seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n", - "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt"); - else { + if (v == SEQ_START_TOKEN) { + seq_puts(seq, "Label S Owner Users Linger Expires Dst Opt\n"); + } else { struct ip6_flowlabel *fl = v; seq_printf(seq, "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 0e32d2e1bdbf..13cda4c6313b 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -902,7 +902,7 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, struct net_device_stats *stats = &t->dev->stats; int ret; - if (!ip6_tnl_xmit_ctl(t)) + if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) goto tx_err; switch (skb->protocol) { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index a3084ab5df6c..aacdcb4dc762 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -220,7 +220,8 @@ resubmit: nexthdr = skb_network_header(skb)[nhoff]; raw = raw6_local_deliver(skb, nexthdr); - if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) { + ipprot = rcu_dereference(inet6_protos[nexthdr]); + if (ipprot != NULL) { int ret; if (ipprot->flags & INET6_PROTO_FINAL) { diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 01e12d0d8fcc..46d452a56d3e 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -79,7 +79,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_SIT | SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | - SKB_GSO_MPLS | + SKB_GSO_TUNNEL_REMCSUM | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8e950c250ada..ce69a12ae48c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -747,13 +747,11 @@ slow_path: if (len < left) { len &= ~7; } - /* - * Allocate buffer. - */ - if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + - hroom + troom, GFP_ATOMIC)) == NULL) { - NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); + /* Allocate buffer */ + frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + + hroom + troom, GFP_ATOMIC); + if (!frag) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; @@ -900,7 +898,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, if (*dst == NULL) *dst = ip6_route_output(net, sk, fl6); - if ((err = (*dst)->error)) + err = (*dst)->error; + if (err) goto out_err_release; if (ipv6_addr_any(&fl6->saddr)) { @@ -948,7 +947,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); *dst = ip6_route_output(net, sk, &fl_gw6); - if ((err = (*dst)->error)) + err = (*dst)->error; + if (err) goto out_err_release; } } @@ -1056,7 +1056,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, * device, so create one single skb packet containing complete * udp datagram */ - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + skb = skb_peek_tail(&sk->sk_write_queue); + if (skb == NULL) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); @@ -1536,7 +1537,8 @@ int ip6_push_pending_frames(struct sock *sk) unsigned char proto = fl6->flowi6_proto; int err = 0; - if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) + skb = __skb_dequeue(&sk->sk_write_queue); + if (skb == NULL) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9cb94cfa0ae7..92b3da571980 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -183,6 +183,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ unsigned int hash = HASH(remote, local); struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct in6_addr any; for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && @@ -190,6 +191,22 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ (t->dev->flags & IFF_UP)) return t; } + + memset(&any, 0, sizeof(any)); + hash = HASH(&any, local); + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + (t->dev->flags & IFF_UP)) + return t; + } + + hash = HASH(remote, &any); + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(remote, &t->parms.raddr) && + (t->dev->flags & IFF_UP)) + return t; + } + t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; @@ -474,6 +491,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, int rel_msg = 0; u8 rel_type = ICMPV6_DEST_UNREACH; u8 rel_code = ICMPV6_ADDR_UNREACH; + u8 tproto; __u32 rel_info = 0; __u16 len; int err = -ENOENT; @@ -483,11 +501,12 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, processing of the error. */ rcu_read_lock(); - if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, - &ipv6h->saddr)) == NULL) + t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr); + if (t == NULL) goto out; - if (t->parms.proto != ipproto && t->parms.proto != 0) + tproto = ACCESS_ONCE(t->parms.proto); + if (tproto != ipproto && tproto != 0) goto out; err = 0; @@ -531,7 +550,8 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; - if ((len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) { + len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len); + if (len > mtu) { rel_type = ICMPV6_PKT_TOOBIG; rel_code = 0; rel_info = mtu; @@ -788,15 +808,16 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + u8 tproto; int err; rcu_read_lock(); - - if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, - &ipv6h->daddr)) != NULL) { + t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); + if (t != NULL) { struct pcpu_sw_netstats *tstats; - if (t->parms.proto != ipproto && t->parms.proto != 0) { + tproto = ACCESS_ONCE(t->parms.proto); + if (tproto != ipproto && tproto != 0) { rcu_read_unlock(); goto discard; } @@ -902,24 +923,28 @@ ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } -int ip6_tnl_xmit_ctl(struct ip6_tnl *t) +int ip6_tnl_xmit_ctl(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) { struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = t->net; - if (p->flags & IP6_TNL_F_CAP_XMIT) { + if ((p->flags & IP6_TNL_F_CAP_XMIT) || + ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && + (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) { struct net_device *ldev = NULL; rcu_read_lock(); if (p->link) ldev = dev_get_by_index_rcu(net, p->link); - if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0))) + if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0))) pr_warn("%s xmit: Local address not yet configured!\n", p->name); - else if (!ipv6_addr_is_multicast(&p->raddr) && - unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0))) + else if (!ipv6_addr_is_multicast(raddr) && + unlikely(ipv6_chk_addr(net, raddr, NULL, 0))) pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", p->name); else @@ -968,8 +993,34 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, u8 proto; int err = -1; - if (!fl6->flowi6_mark) + /* NBMA tunnel */ + if (ipv6_addr_any(&t->parms.raddr)) { + struct in6_addr *addr6; + struct neighbour *neigh; + int addr_type; + + if (!skb_dst(skb)) + goto tx_err_link_failure; + + neigh = dst_neigh_lookup(skb_dst(skb), + &ipv6_hdr(skb)->daddr); + if (!neigh) + goto tx_err_link_failure; + + addr6 = (struct in6_addr *)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); + + if (addr_type == IPV6_ADDR_ANY) + addr6 = &ipv6_hdr(skb)->daddr; + + memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); + neigh_release(neigh); + } else if (!fl6->flowi6_mark) dst = ip6_tnl_dst_check(t); + + if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) + goto tx_err_link_failure; + if (!dst) { ndst = ip6_route_output(net, NULL, fl6); @@ -1018,7 +1069,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb; - if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) + new_skb = skb_realloc_headroom(skb, max_headroom); + if (!new_skb) goto tx_err_dst_release; if (skb->sk) @@ -1075,10 +1127,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct flowi6 fl6; __u8 dsfield; __u32 mtu; + u8 tproto; int err; - if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) || - !ip6_tnl_xmit_ctl(t)) + tproto = ACCESS_ONCE(t->parms.proto); + if (tproto != IPPROTO_IPIP && tproto != 0) return -1; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) @@ -1117,10 +1170,12 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct flowi6 fl6; __u8 dsfield; __u32 mtu; + u8 tproto; int err; - if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || - !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) + tproto = ACCESS_ONCE(t->parms.proto); + if ((tproto != IPPROTO_IPV6 && tproto != 0) || + ip6_tnl_addr_conflict(t, ipv6h)) return -1; offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); @@ -1282,6 +1337,14 @@ static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) return err; } +static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +{ + /* for default tnl0 device allow to change only the proto */ + t->parms.proto = p->proto; + netdev_state_change(t->dev); + return 0; +} + static void ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) { @@ -1381,7 +1444,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); - if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { + if (cmd == SIOCCHGTUNNEL) { if (t != NULL) { if (t->dev != dev) { err = -EEXIST; @@ -1389,8 +1452,10 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } } else t = netdev_priv(dev); - - err = ip6_tnl_update(t, &p1); + if (dev == ip6n->fb_tnl_dev) + err = ip6_tnl0_update(t, &p1); + else + err = ip6_tnl_update(t, &p1); } if (t) { err = 0; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index bcda14de7f84..ace10d0b3aac 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -95,6 +95,7 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, unsigned int hash = HASH(remote, local); struct ip6_tnl *t; struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct in6_addr any; for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && @@ -102,6 +103,22 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, (t->dev->flags & IFF_UP)) return t; } + + memset(&any, 0, sizeof(any)); + hash = HASH(&any, local); + for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + (t->dev->flags & IFF_UP)) + return t; + } + + hash = HASH(remote, &any); + for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(remote, &t->parms.raddr) && + (t->dev->flags & IFF_UP)) + return t; + } + t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; @@ -287,8 +304,8 @@ static int vti6_rcv(struct sk_buff *skb) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); rcu_read_lock(); - if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, - &ipv6h->daddr)) != NULL) { + t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); + if (t != NULL) { if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); goto discard; @@ -412,6 +429,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct net_device_stats *stats = &t->dev->stats; struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; + struct xfrm_state *x; int err = -1; if (!dst) @@ -425,7 +443,12 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) goto tx_err_link_failure; } - if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr)) + x = dst->xfrm; + if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr)) + goto tx_err_link_failure; + + if (!ip6_tnl_xmit_ctl(t, (const struct in6_addr *)&x->props.saddr, + (const struct in6_addr *)&x->id.daddr)) goto tx_err_link_failure; tdev = dst->dev; @@ -480,7 +503,7 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) ipv6h = ipv6_hdr(skb); if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || - !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h)) + vti6_addr_conflict(t, ipv6h)) goto tx_err; xfrm_decode_session(skb, &fl, AF_INET6); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 1a01d79b8698..722669754bbf 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2094,7 +2094,7 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt, if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) { struct mfc6_cache *cache_proxy; - /* For an (*,G) entry, we only check that the incomming + /* For an (*,G) entry, we only check that the incoming * interface is part of the static tree. */ cache_proxy = ip6mr_cache_find_any_parent(mrt, vif); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index e1a9583bb419..66980d8d98d1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -110,12 +110,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } - opt = xchg(&inet6_sk(sk)->opt, opt); - } else { - spin_lock(&sk->sk_dst_lock); - opt = xchg(&inet6_sk(sk)->opt, opt); - spin_unlock(&sk->sk_dst_lock); } + opt = xchg(&inet6_sk(sk)->opt, opt); sk_dst_reset(sk); return opt; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index ed2c4e400b46..5ce107c8aab3 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2824,11 +2824,7 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); if (v == SEQ_START_TOKEN) { - seq_printf(seq, - "%3s %6s " - "%32s %32s %6s %6s\n", "Idx", - "Device", "Multicast Address", - "Source Address", "INC", "EXC"); + seq_puts(seq, "Idx Device Multicast Address Source Address INC EXC\n"); } else { seq_printf(seq, "%3d %6.6s %pi6 %pi6 %6lu %6lu\n", diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index f61429d391d3..b9779d441b12 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -97,16 +97,17 @@ static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", - mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); + net_dbg_ratelimited("mip6: MH message too short: %d vs >=%d\n", + mh->ip6mh_hdrlen, + mip6_mh_len(mh->ip6mh_type)); mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", - mh->ip6mh_proto); + net_dbg_ratelimited("mip6: MH invalid payload proto = %d\n", + mh->ip6mh_proto); mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + skb_network_header_len(skb)); return -1; @@ -288,7 +289,7 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, * XXX: packet if HAO exists. */ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { - LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n"); + net_dbg_ratelimited("mip6: hao exists already, override\n"); return offset; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 4cb45c1079a2..682866777d53 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -162,7 +162,8 @@ static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data) memcpy(opt+2, data, data_len); data_len += 2; opt += data_len; - if ((space -= data_len) > 0) + space -= data_len; + if (space > 0) memset(opt, 0, space); } @@ -656,8 +657,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1)) saddr = &ipv6_hdr(skb)->saddr; - - if ((probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES)) < 0) { + probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); + if (probes < 0) { if (!(neigh->nud_state & NUD_VALID)) { ND_PRINTK(1, dbg, "%s: trying to ucast probe in NUD_INVALID: %pI6\n", @@ -1763,7 +1764,7 @@ int __init ndisc_init(void) /* * Initialize the neighbour table */ - neigh_table_init(&nd_tbl); + neigh_table_init(NEIGH_ND_TABLE, &nd_tbl); #ifdef CONFIG_SYSCTL err = neigh_sysctl_register(NULL, &nd_tbl.parms, @@ -1796,6 +1797,6 @@ void ndisc_cleanup(void) #ifdef CONFIG_SYSCTL neigh_sysctl_unregister(&nd_tbl.parms); #endif - neigh_table_clear(&nd_tbl); + neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl); unregister_pernet_subsys(&ndisc_net_ops); } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d38e6a8d8b9f..398377a9d018 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -36,7 +36,7 @@ int ip6_route_me_harder(struct sk_buff *skb) err = dst->error; if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); - LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); + net_dbg_ratelimited("ip6_route_me_harder: No more route\n"); dst_release(dst); return err; } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 6af874fc187f..a069822936e6 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -91,6 +91,15 @@ config NFT_MASQ_IPV6 This is the expression that provides IPv4 masquerading support for nf_tables. +config NFT_REDIR_IPV6 + tristate "IPv6 redirect support for nf_tables" + depends on NF_TABLES_IPV6 + depends on NFT_REDIR + select NF_NAT_REDIRECT + help + This is the expression that provides IPv4 redirect support for + nf_tables. + endif # NF_NAT_IPV6 config IP6_NF_IPTABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index fbb25f01143c..c36e0a5490de 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o +obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 7b17a0be93e7..ddf07e6f59d7 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -5,6 +5,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/spinlock.h> @@ -398,8 +399,17 @@ static int __init nf_log_ipv6_init(void) if (ret < 0) return ret; - nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); + ret = nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); + if (ret < 0) { + pr_err("failed to register logger\n"); + goto err1; + } + return 0; + +err1: + unregister_pernet_subsys(&nf_log_ipv6_net_ops); + return ret; } static void __exit nf_log_ipv6_exit(void) @@ -412,6 +422,6 @@ module_init(nf_log_ipv6_init); module_exit(nf_log_ipv6_exit); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); -MODULE_DESCRIPTION("Netfilter IPv4 packet logging"); +MODULE_DESCRIPTION("Netfilter IPv6 packet logging"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NF_LOGGER(AF_INET6, 0); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 015eb8a80766..d05b36440e8b 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -11,6 +11,7 @@ #include <net/ip6_route.h> #include <net/ip6_fib.h> #include <net/ip6_checksum.h> +#include <net/netfilter/ipv6/nf_reject.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/ipv6/nf_reject.h> diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c new file mode 100644 index 000000000000..2433a6bfb191 --- /dev/null +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nft_redir.h> +#include <net/netfilter/nf_nat_redirect.h> + +static void nft_redir_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_range range; + unsigned int verdict; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_proto_min) { + range.min_proto.all = (__force __be16) + data[priv->sreg_proto_min].data[0]; + range.max_proto.all = (__force __be16) + data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + range.flags |= priv->flags; + + verdict = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->ops->hooknum); + data[NFT_REG_VERDICT].verdict = verdict; +} + +static struct nft_expr_type nft_redir_ipv6_type; +static const struct nft_expr_ops nft_redir_ipv6_ops = { + .type = &nft_redir_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_ipv6_eval, + .init = nft_redir_init, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "redir", + .ops = &nft_redir_ipv6_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_REDIR_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_redir_ipv6_module_init(void) +{ + return nft_register_expr(&nft_redir_ipv6_type); +} + +static void __exit nft_redir_ipv6_module_exit(void) +{ + nft_unregister_expr(&nft_redir_ipv6_type); +} + +module_init(nft_redir_ipv6_module_init); +module_exit(nft_redir_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 0bc19fa87821..f73285924144 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -19,9 +19,9 @@ #include <net/netfilter/nft_reject.h> #include <net/netfilter/ipv6/nf_reject.h> -void nft_reject_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_reject_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); @@ -38,7 +38,6 @@ void nft_reject_ipv6_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = NF_DROP; } -EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval); static struct nft_expr_type nft_reject_ipv6_type; static const struct nft_expr_ops nft_reject_ipv6_ops = { diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 5b7a1ed2aba9..2d3148378a1f 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -163,7 +163,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pfh.icmph.checksum = 0; pfh.icmph.un.echo.id = inet->inet_sport; pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; - pfh.iov = msg->msg_iov; + /* XXX: stripping const */ + pfh.iov = (struct iovec *)msg->msg_iter.iov; pfh.wcheck = 0; pfh.family = AF_INET6; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 1752cd0b4882..679253d0af84 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -136,6 +136,7 @@ static const struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS), + SNMP_MIB_ITEM("Udp6IgnoredMulti", UDP_MIB_IGNOREDMULTI), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 896af8807979..ee25631f8c29 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -486,13 +486,13 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, } if (skb_csum_unnecessary(skb)) { - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); } else if (msg->msg_flags&MSG_TRUNC) { if (__skb_checksum_complete(skb)) goto csum_copy_err; - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); } else { - err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov); + err = skb_copy_and_csum_datagram_msg(skb, 0, msg); if (err == -EINVAL) goto csum_copy_err; } @@ -548,7 +548,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, if (!rp->checksum) goto send; - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + skb = skb_peek(&sk->sk_write_queue); + if (!skb) goto out; offset = rp->offset; @@ -671,65 +672,62 @@ error: return err; } -static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg) +struct raw6_frag_vec { + struct msghdr *msg; + int hlen; + char c[4]; +}; + +static int rawv6_probe_proto_opt(struct raw6_frag_vec *rfv, struct flowi6 *fl6) { - struct iovec *iov; - u8 __user *type = NULL; - u8 __user *code = NULL; - u8 len = 0; - int probed = 0; - int i; - - if (!msg->msg_iov) - return 0; + int err = 0; + switch (fl6->flowi6_proto) { + case IPPROTO_ICMPV6: + rfv->hlen = 2; + err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); + if (!err) { + fl6->fl6_icmp_type = rfv->c[0]; + fl6->fl6_icmp_code = rfv->c[1]; + } + break; + case IPPROTO_MH: + rfv->hlen = 4; + err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); + if (!err) + fl6->fl6_mh_type = rfv->c[2]; + } + return err; +} - for (i = 0; i < msg->msg_iovlen; i++) { - iov = &msg->msg_iov[i]; - if (!iov) - continue; +static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, + struct sk_buff *skb) +{ + struct raw6_frag_vec *rfv = from; - switch (fl6->flowi6_proto) { - case IPPROTO_ICMPV6: - /* check if one-byte field is readable or not. */ - if (iov->iov_base && iov->iov_len < 1) - break; - - if (!type) { - type = iov->iov_base; - /* check if code field is readable or not. */ - if (iov->iov_len > 1) - code = type + 1; - } else if (!code) - code = iov->iov_base; - - if (type && code) { - if (get_user(fl6->fl6_icmp_type, type) || - get_user(fl6->fl6_icmp_code, code)) - return -EFAULT; - probed = 1; - } - break; - case IPPROTO_MH: - if (iov->iov_base && iov->iov_len < 1) - break; - /* check if type field is readable or not. */ - if (iov->iov_len > 2 - len) { - u8 __user *p = iov->iov_base; - if (get_user(fl6->fl6_mh_type, &p[2 - len])) - return -EFAULT; - probed = 1; - } else - len += iov->iov_len; + if (offset < rfv->hlen) { + int copy = min(rfv->hlen - offset, len); - break; - default: - probed = 1; - break; - } - if (probed) - break; + if (skb->ip_summed == CHECKSUM_PARTIAL) + memcpy(to, rfv->c + offset, copy); + else + skb->csum = csum_block_add( + skb->csum, + csum_partial_copy_nocheck(rfv->c + offset, + to, copy, 0), + odd); + + odd = 0; + offset += copy; + to += copy; + len -= copy; + + if (!len) + return 0; } - return 0; + + offset -= rfv->hlen; + + return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, @@ -744,6 +742,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; + struct raw6_frag_vec rfv; struct flowi6 fl6; int addr_len = msg->msg_namelen; int hlimit = -1; @@ -847,7 +846,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, opt = ipv6_fixup_options(&opt_space, opt); fl6.flowi6_proto = proto; - err = rawv6_probe_proto_opt(&fl6, msg); + rfv.msg = msg; + rfv.hlen = 0; + err = rawv6_probe_proto_opt(&rfv, &fl6); if (err) goto out; @@ -885,10 +886,11 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, back_from_confirm: if (inet->hdrincl) - err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags); + /* XXX: stripping const */ + err = rawv6_send_hdrinc(sk, (struct iovec *)msg->msg_iter.iov, len, &fl6, &dst, msg->msg_flags); else { lock_sock(sk); - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, + err = ip6_append_data(sk, raw6_getfrag, &rfv, len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, msg->msg_flags, dontfrag); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 1a157ca2ebc1..d7d70e69973b 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -69,7 +69,7 @@ struct ip6frag_skb_cb { #define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb)) -static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) +static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); } @@ -178,7 +178,7 @@ static void ip6_frag_expire(unsigned long data) ip6_expire_frag_queue(net, fq, &ip6_frags); } -static __inline__ struct frag_queue * +static struct frag_queue * fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst, u8 ecn) { @@ -429,7 +429,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct sk_buff *clone; int i, plen = 0; - if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) + clone = alloc_skb(0, GFP_ATOMIC); + if (clone == NULL) goto out_oom; clone->next = head->next; head->next = clone; @@ -684,21 +685,21 @@ static void ip6_frags_sysctl_unregister(void) unregister_net_sysctl_table(ip6_ctl_header); } #else -static inline int ip6_frags_ns_sysctl_register(struct net *net) +static int ip6_frags_ns_sysctl_register(struct net *net) { return 0; } -static inline void ip6_frags_ns_sysctl_unregister(struct net *net) +static void ip6_frags_ns_sysctl_unregister(struct net *net) { } -static inline int ip6_frags_sysctl_register(void) +static int ip6_frags_sysctl_register(void) { return 0; } -static inline void ip6_frags_sysctl_unregister(void) +static void ip6_frags_sysctl_unregister(void) { } #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a318dd89b6d9..c91083156edb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -772,23 +772,22 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } #endif -#define BACKTRACK(__net, saddr) \ -do { \ - if (rt == __net->ipv6.ip6_null_entry) { \ - struct fib6_node *pn; \ - while (1) { \ - if (fn->fn_flags & RTN_TL_ROOT) \ - goto out; \ - pn = fn->parent; \ - if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ - fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ - else \ - fn = pn; \ - if (fn->fn_flags & RTN_RTINFO) \ - goto restart; \ - } \ - } \ -} while (0) +static struct fib6_node* fib6_backtrack(struct fib6_node *fn, + struct in6_addr *saddr) +{ + struct fib6_node *pn; + while (1) { + if (fn->fn_flags & RTN_TL_ROOT) + return NULL; + pn = fn->parent; + if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) + fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); + else + fn = pn; + if (fn->fn_flags & RTN_RTINFO) + return fn; + } +} static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, @@ -804,8 +803,11 @@ restart: rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); - BACKTRACK(net, &fl6->saddr); -out: + if (rt == net->ipv6.ip6_null_entry) { + fn = fib6_backtrack(fn, &fl6->saddr); + if (fn) + goto restart; + } dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); return rt; @@ -915,33 +917,48 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int flags) { - struct fib6_node *fn; + struct fib6_node *fn, *saved_fn; struct rt6_info *rt, *nrt; int strict = 0; int attempts = 3; int err; - int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; strict |= flags & RT6_LOOKUP_F_IFACE; + if (net->ipv6.devconf_all->forwarding == 0) + strict |= RT6_LOOKUP_F_REACHABLE; -relookup: +redo_fib6_lookup_lock: read_lock_bh(&table->tb6_lock); -restart_2: fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); + saved_fn = fn; -restart: - rt = rt6_select(fn, oif, strict | reachable); +redo_rt6_select: + rt = rt6_select(fn, oif, strict); if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(rt, fl6, oif, strict | reachable); - BACKTRACK(net, &fl6->saddr); - if (rt == net->ipv6.ip6_null_entry || - rt->rt6i_flags & RTF_CACHE) - goto out; + rt = rt6_multipath_select(rt, fl6, oif, strict); + if (rt == net->ipv6.ip6_null_entry) { + fn = fib6_backtrack(fn, &fl6->saddr); + if (fn) + goto redo_rt6_select; + else if (strict & RT6_LOOKUP_F_REACHABLE) { + /* also consider unreachable route */ + strict &= ~RT6_LOOKUP_F_REACHABLE; + fn = saved_fn; + goto redo_rt6_select; + } else { + dst_hold(&rt->dst); + read_unlock_bh(&table->tb6_lock); + goto out2; + } + } dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); + if (rt->rt6i_flags & RTF_CACHE) + goto out2; + if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) @@ -967,15 +984,8 @@ restart: * released someone could insert this route. Relookup. */ ip6_rt_put(rt); - goto relookup; + goto redo_fib6_lookup_lock; -out: - if (reachable) { - reachable = 0; - goto restart_2; - } - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); out2: rt->dst.lastuse = jiffies; rt->dst.__use++; @@ -1235,10 +1245,12 @@ restart: rt = net->ipv6.ip6_null_entry; else if (rt->dst.error) { rt = net->ipv6.ip6_null_entry; - goto out; + } else if (rt == net->ipv6.ip6_null_entry) { + fn = fib6_backtrack(fn, &fl6->saddr); + if (fn) + goto restart; } - BACKTRACK(net, &fl6->saddr); -out: + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a24557a1c1d8..213546bd6d5d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1241,7 +1241,8 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) goto done; err = -ENOENT; - if ((t = ipip6_tunnel_locate(net, &p, 0)) == NULL) + t = ipip6_tunnel_locate(net, &p, 0); + if (t == NULL) goto done; err = -EPERM; if (t == netdev_priv(sitn->fb_tunnel_dev)) @@ -1711,7 +1712,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, - tunnel->encap.dport)) + tunnel->encap.flags)) goto nla_put_failure; return 0; @@ -1836,8 +1837,8 @@ static int __net_init sit_init_net(struct net *net) goto err_dev_free; ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); - - if ((err = register_netdev(sitn->fb_tunnel_dev))) + err = register_netdev(sitn->fb_tunnel_dev); + if (err) goto err_reg_dev; t = netdev_priv(sitn->fb_tunnel_dev); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 2f25cb6347ca..7337fc7947e2 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -166,13 +166,15 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; - bool ecn_ok = false; if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; - if (tcp_synq_no_recent_overflow(sk) || - (mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie)) == 0) { + if (tcp_synq_no_recent_overflow(sk)) + goto out; + + mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie); + if (mss == 0) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; } @@ -183,7 +185,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, 0, NULL); - if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) + if (!cookie_timestamp_decode(&tcp_opt)) goto out; ret = NULL; @@ -220,7 +222,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->expires = 0UL; req->num_retrans = 0; - ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; ireq->wscale_ok = tcp_opt.wscale_ok; @@ -261,6 +262,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; + ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); ret = get_cookie_sock(sk, skb, req, dst); out: @@ -269,4 +271,3 @@ out_free: reqsk_free(req); return NULL; } - diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index dc495ae2ead0..5ff87805258e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -787,16 +787,16 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .queue_hash_add = inet6_csk_reqsk_queue_hash_add, }; -static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, - u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *key, int rst, u8 tclass, - u32 label) +static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, + u32 ack, u32 win, u32 tsval, u32 tsecr, + int oif, struct tcp_md5sig_key *key, int rst, + u8 tclass, u32 label) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; struct sk_buff *buff; struct flowi6 fl6; - struct net *net = dev_net(skb_dst(skb)->dev); + struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); struct dst_entry *dst; @@ -946,7 +946,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) (th->doff << 2); oif = sk ? sk->sk_bound_dev_if : 0; - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); + tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); #ifdef CONFIG_TCP_MD5SIG release_sk1: @@ -957,13 +957,13 @@ release_sk1: #endif } -static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 tsval, u32 tsecr, int oif, +static void tcp_v6_send_ack(struct sock *sk, struct sk_buff *skb, u32 seq, + u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, u32 label) { - tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, - label); + tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, + tclass, label); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -971,7 +971,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), @@ -986,10 +986,10 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ - tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? + tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, - tcp_rsk(req)->rcv_nxt, - req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, + tcp_rsk(req)->rcv_nxt, req->rcv_wnd, + tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); } @@ -1296,6 +1296,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst = sk->sk_rx_dst; sock_rps_save_rxhash(sk, skb); + sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || dst->ops->check(dst, np->rx_dst_cookie) == NULL) { @@ -1325,6 +1326,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) */ if (nsk != sk) { sock_rps_save_rxhash(nsk, skb); + sk_mark_napi_id(sk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) @@ -1457,7 +1459,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; - sk_mark_napi_id(sk, skb); + sk_incoming_cpu_update(sk); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f6ba535b6feb..189dc4ae3eca 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -148,72 +148,85 @@ static inline int compute_score(struct sock *sk, struct net *net, const struct in6_addr *daddr, __be16 dport, int dif) { - int score = -1; + int score; + struct inet_sock *inet; - if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && - sk->sk_family == PF_INET6) { - struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + sk->sk_family != PF_INET6) + return -1; - score = 0; - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } + score = 0; + inet = inet_sk(sk); + + if (inet->inet_dport) { + if (inet->inet_dport != sport) + return -1; + score++; + } + + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return -1; + score++; + } + + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) + return -1; + score++; } + + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + return score; } #define SCORE2_MAX (1 + 1 + 1) static inline int compute_score2(struct sock *sk, struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, unsigned short hnum, - int dif) + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, + unsigned short hnum, int dif) { - int score = -1; + int score; + struct inet_sock *inet; - if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && - sk->sk_family == PF_INET6) { - struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + sk->sk_family != PF_INET6) + return -1; - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return -1; + + score = 0; + inet = inet_sk(sk); + + if (inet->inet_dport) { + if (inet->inet_dport != sport) return -1; - score = 0; - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } + score++; } + + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) + return -1; + score++; + } + + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + return score; } - /* called with read_rcu_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -357,7 +370,8 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, struct sock *sk; const struct ipv6hdr *iph = ipv6_hdr(skb); - if (unlikely(sk = skb_steal_sock(skb))) + sk = skb_steal_sock(skb); + if (unlikely(sk)) return sk; return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), @@ -424,10 +438,10 @@ try_again: } if (skb_csum_unnecessary(skb)) - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), + msg, copied); else { - err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); + err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), msg); if (err == -EINVAL) goto csum_copy_err; } @@ -577,6 +591,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (!ipv6_addr_any(&sk->sk_v6_daddr)) { sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); + sk_incoming_cpu_update(sk); } rc = sock_queue_rcv_skb(sk, skb); @@ -659,15 +674,13 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { if (up->pcrlen == 0) { /* full coverage was set */ - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" - " %d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); + net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d " - "too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); goto drop; } } @@ -760,9 +773,9 @@ static void udp6_csum_zero_error(struct sk_buff *skb) /* RFC 2460 section 8.1 says that we SHOULD log * this error. Well, it is reasonable. */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", - &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), - &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); + net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", + &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), + &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); } /* @@ -771,7 +784,7 @@ static void udp6_csum_zero_error(struct sk_buff *skb) */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, - struct udp_table *udptable) + struct udp_table *udptable, int proto) { struct sock *sk, *stack[256 / sizeof(struct sock *)]; const struct udphdr *uh = udp_hdr(skb); @@ -781,6 +794,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, int dif = inet6_iif(skb); unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); + bool inner_flushed = false; if (use_hash2) { hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & @@ -803,6 +817,7 @@ start_lookup: (uh->check || udp_sk(sk)->no_check6_rx)) { if (unlikely(count == ARRAY_SIZE(stack))) { flush_stack(stack, count, skb, ~0); + inner_flushed = true; count = 0; } stack[count++] = sk; @@ -821,7 +836,10 @@ start_lookup: if (count) { flush_stack(stack, count, skb, count - 1); } else { - kfree_skb(skb); + if (!inner_flushed) + UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); + consume_skb(skb); } return 0; } @@ -873,7 +891,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, */ if (ipv6_addr_is_multicast(daddr)) return __udp6_lib_mcast_deliver(net, skb, - saddr, daddr, udptable); + saddr, daddr, udptable, proto); /* Unicast */ @@ -925,14 +943,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return 0; short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", - proto == IPPROTO_UDPLITE ? "-Lite" : "", - saddr, - ntohs(uh->source), - ulen, - skb->len, - daddr, - ntohs(uh->dest)); + net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", + proto == IPPROTO_UDPLITE ? "-Lite" : "", + saddr, ntohs(uh->source), + ulen, skb->len, + daddr, ntohs(uh->dest)); goto discard; csum_error: UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); @@ -1025,7 +1040,8 @@ static int udp_v6_push_pending_frames(struct sock *sk) fl6 = &inet->cork.fl.u.ip6; /* Grab the skbuff where UDP header space exists. */ - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + skb = skb_peek(&sk->sk_write_queue); + if (skb == NULL) goto out; /* @@ -1284,7 +1300,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); + net_dbg_ratelimited("udp cork app bug 2\n"); err = -EINVAL; goto out; } @@ -1296,7 +1312,7 @@ do_append_data: dontfrag = np->dontfrag; up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; - err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, + err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 6b8f543f6ac6..b6aa8ed18257 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -42,11 +42,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | + SKB_GSO_TUNNEL_REMCSUM | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_MPLS) || + SKB_GSO_SIT) || !(type & (SKB_GSO_UDP)))) goto out; |