summaryrefslogtreecommitdiff
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 16:40:27 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 16:40:27 -0700
commit8d65b08debc7e62b2c6032d7fe7389d895b92cbc (patch)
tree0c3141b60c3a03cc32742b5750c5e763b9dae489 /net/ipv4/route.c
parent5a0387a8a8efb90ae7fea1e2e5c62de3efa74691 (diff)
parent5d15af6778b8e4ed1fd41b040283af278e7a9a72 (diff)
downloadlinux-next-8d65b08debc7e62b2c6032d7fe7389d895b92cbc.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Millar: "Here are some highlights from the 2065 networking commits that happened this development cycle: 1) XDP support for IXGBE (John Fastabend) and thunderx (Sunil Kowuri) 2) Add a generic XDP driver, so that anyone can test XDP even if they lack a networking device whose driver has explicit XDP support (me). 3) Sparc64 now has an eBPF JIT too (me) 4) Add a BPF program testing framework via BPF_PROG_TEST_RUN (Alexei Starovoitov) 5) Make netfitler network namespace teardown less expensive (Florian Westphal) 6) Add symmetric hashing support to nft_hash (Laura Garcia Liebana) 7) Implement NAPI and GRO in netvsc driver (Stephen Hemminger) 8) Support TC flower offload statistics in mlxsw (Arkadi Sharshevsky) 9) Multiqueue support in stmmac driver (Joao Pinto) 10) Remove TCP timewait recycling, it never really could possibly work well in the real world and timestamp randomization really zaps any hint of usability this feature had (Soheil Hassas Yeganeh) 11) Support level3 vs level4 ECMP route hashing in ipv4 (Nikolay Aleksandrov) 12) Add socket busy poll support to epoll (Sridhar Samudrala) 13) Netlink extended ACK support (Johannes Berg, Pablo Neira Ayuso, and several others) 14) IPSEC hw offload infrastructure (Steffen Klassert)" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2065 commits) tipc: refactor function tipc_sk_recv_stream() tipc: refactor function tipc_sk_recvmsg() net: thunderx: Optimize page recycling for XDP net: thunderx: Support for XDP header adjustment net: thunderx: Add support for XDP_TX net: thunderx: Add support for XDP_DROP net: thunderx: Add basic XDP support net: thunderx: Cleanup receive buffer allocation net: thunderx: Optimize CQE_TX handling net: thunderx: Optimize RBDR descriptor handling net: thunderx: Support for page recycling ipx: call ipxitf_put() in ioctl error path net: sched: add helpers to handle extended actions qed*: Fix issues in the ptp filter config implementation. qede: Fix concurrency issue in PTP Tx path processing. stmmac: Add support for SIMATIC IOT2000 platform net: hns: fix ethtool_get_strings overflow in hns driver tcp: fix wraparound issue in tcp_lp bpf, arm64: fix jit branch offset related to ldimm64 bpf, arm64: implement jiting of BPF_XADD ...
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c123
1 files changed, 87 insertions, 36 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d9724889ff09..655d9eebe43e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1250,15 +1250,11 @@ static void set_class_tag(struct rtable *rt, u32 tag)
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
- unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
+ unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
+ unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size,
+ ip_rt_min_advmss);
- if (advmss == 0) {
- advmss = max_t(unsigned int, dst->dev->mtu - 40,
- ip_rt_min_advmss);
- if (advmss > 65535 - 40)
- advmss = 65535 - 40;
- }
- return advmss;
+ return min(advmss, IPV4_MAX_PMTU - header_size);
}
static unsigned int ipv4_mtu(const struct dst_entry *dst)
@@ -1734,45 +1730,97 @@ out:
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
/* To make ICMP packets follow the right flow, the multipath hash is
- * calculated from the inner IP addresses in reverse order.
+ * calculated from the inner IP addresses.
*/
-static int ip_multipath_icmp_hash(struct sk_buff *skb)
+static void ip_multipath_l3_keys(const struct sk_buff *skb,
+ struct flow_keys *hash_keys)
{
const struct iphdr *outer_iph = ip_hdr(skb);
- struct icmphdr _icmph;
+ const struct iphdr *inner_iph;
const struct icmphdr *icmph;
struct iphdr _inner_iph;
- const struct iphdr *inner_iph;
+ struct icmphdr _icmph;
+
+ hash_keys->addrs.v4addrs.src = outer_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
+ if (likely(outer_iph->protocol != IPPROTO_ICMP))
+ return;
if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
- goto standard_hash;
+ return;
icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
&_icmph);
if (!icmph)
- goto standard_hash;
+ return;
if (icmph->type != ICMP_DEST_UNREACH &&
icmph->type != ICMP_REDIRECT &&
icmph->type != ICMP_TIME_EXCEEDED &&
- icmph->type != ICMP_PARAMETERPROB) {
- goto standard_hash;
- }
+ icmph->type != ICMP_PARAMETERPROB)
+ return;
inner_iph = skb_header_pointer(skb,
outer_iph->ihl * 4 + sizeof(_icmph),
sizeof(_inner_iph), &_inner_iph);
if (!inner_iph)
- goto standard_hash;
+ return;
+ hash_keys->addrs.v4addrs.src = inner_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+}
- return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
+/* if skb is set it will be used and fl4 can be NULL */
+int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
+ const struct sk_buff *skb)
+{
+ struct net *net = fi->fib_net;
+ struct flow_keys hash_keys;
+ u32 mhash;
-standard_hash:
- return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
-}
+ switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+ case 0:
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ if (skb) {
+ ip_multipath_l3_keys(skb, &hash_keys);
+ } else {
+ hash_keys.addrs.v4addrs.src = fl4->saddr;
+ hash_keys.addrs.v4addrs.dst = fl4->daddr;
+ }
+ break;
+ case 1:
+ /* skb is currently provided only when forwarding */
+ if (skb) {
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+ struct flow_keys keys;
+
+ /* short-circuit if we already have L4 hash present */
+ if (skb->l4_hash)
+ return skb_get_hash_raw(skb) >> 1;
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+ hash_keys.ports.src = keys.ports.src;
+ hash_keys.ports.dst = keys.ports.dst;
+ hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ } else {
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ hash_keys.addrs.v4addrs.src = fl4->saddr;
+ hash_keys.addrs.v4addrs.dst = fl4->daddr;
+ hash_keys.ports.src = fl4->fl4_sport;
+ hash_keys.ports.dst = fl4->fl4_dport;
+ hash_keys.basic.ip_proto = fl4->flowi4_proto;
+ }
+ break;
+ }
+ mhash = flow_hash_from_keys(&hash_keys);
+ return mhash >> 1;
+}
+EXPORT_SYMBOL_GPL(fib_multipath_hash);
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
static int ip_mkroute_input(struct sk_buff *skb,
@@ -1782,12 +1830,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1) {
- int h;
+ int h = fib_multipath_hash(res->fi, NULL, skb);
- if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
- h = ip_multipath_icmp_hash(skb);
- else
- h = fib_multipath_hash(saddr, daddr);
fib_select_multipath(res, h);
}
#endif
@@ -2203,7 +2247,7 @@ add:
*/
struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
- int mp_hash)
+ const struct sk_buff *skb)
{
struct net_device *dev_out = NULL;
__u8 tos = RT_FL_TOS(fl4);
@@ -2366,7 +2410,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
goto make_route;
}
- fib_select_path(net, &res, fl4, mp_hash);
+ fib_select_path(net, &res, fl4, skb);
dev_out = FIB_RES_DEV(res);
fl4->flowi4_oif = dev_out->ifindex;
@@ -2586,7 +2630,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
+static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct rtmsg *rtm;
@@ -2602,7 +2647,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
u32 table_id = RT_TABLE_MAIN;
kuid_t uid;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy,
+ extack);
if (err < 0)
goto errout;
@@ -2620,10 +2666,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
- /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
- ip_hdr(skb)->protocol = IPPROTO_UDP;
- skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
-
src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
@@ -2633,6 +2675,15 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
else
uid = (iif ? INVALID_UID : current_uid());
+ /* Bugfix: need to give ip_route_input enough of an IP header to
+ * not gag.
+ */
+ ip_hdr(skb)->protocol = IPPROTO_UDP;
+ ip_hdr(skb)->saddr = src;
+ ip_hdr(skb)->daddr = dst;
+
+ skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = dst;
fl4.saddr = src;