From e30b38c298b55e09456d3ccbc1df2f3e2e8dc6e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Apr 2010 09:13:03 +0000 Subject: ip: Fix ip_dev_loopback_xmit() Eric Paris got following trace with a linux-next kernel [ 14.203970] BUG: using smp_processor_id() in preemptible [00000000] code: avahi-daemon/2093 [ 14.204025] caller is netif_rx+0xfa/0x110 [ 14.204035] Call Trace: [ 14.204064] [] debug_smp_processor_id+0x105/0x110 [ 14.204070] [] netif_rx+0xfa/0x110 [ 14.204090] [] ip_dev_loopback_xmit+0x71/0xa0 [ 14.204095] [] ip_mc_output+0x192/0x2c0 [ 14.204099] [] ip_local_out+0x20/0x30 [ 14.204105] [] ip_push_pending_frames+0x28d/0x3d0 [ 14.204119] [] udp_push_pending_frames+0x14c/0x400 [ 14.204125] [] udp_sendmsg+0x39c/0x790 [ 14.204137] [] inet_sendmsg+0x45/0x80 [ 14.204149] [] sock_sendmsg+0xf1/0x110 [ 14.204189] [] sys_sendmsg+0x20c/0x380 [ 14.204233] [] system_call_fastpath+0x16/0x1b While current linux-2.6 kernel doesnt emit this warning, bug is latent and might cause unexpected failures. ip_dev_loopback_xmit() runs in process context, preemption enabled, so must call netif_rx_ni() instead of netif_rx(), to make sure that we process pending software interrupt. Same change for ip6_dev_loopback_xmit() Reported-by: Eric Paris Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6/ip6_output.c') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 16c4391f952b..65f9c379df38 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -108,7 +108,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) newskb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(newskb)); - netif_rx(newskb); + netif_rx_ni(newskb); return 0; } -- cgit v1.2.1 From f2228f785a9d97307aa8ba709088cfda6c3df73f Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Sun, 18 Apr 2010 16:58:22 +0000 Subject: ipv6: allow to send packet after receiving ICMPv6 Too Big message with MTU field less than IPV6_MIN_MTU According to RFC2460, PMTU is set to the IPv6 Minimum Link MTU (1280) and a fragment header should always be included after a node receiving Too Big message reporting PMTU is less than the IPv6 Minimum Link MTU. After receiving a ICMPv6 Too Big message reporting PMTU is less than the IPv6 Minimum Link MTU, sctp *can't* send any data/control chunk that total length including IPv6 head and IPv6 extend head is less than IPV6_MIN_MTU(1280 bytes). The failure occured in p6_fragment(), about reason see following(take SHUTDOWN chunk for example): sctp_packet_transmit (SHUTDOWN chunk, len=16 byte) |------sctp_v6_xmit (local_df=0) |------ip6_xmit |------ip6_output (dst_allfrag is ture) |------ip6_fragment In ip6_fragment(), for local_df=0, drops the the packet and returns EMSGSIZE. The patch fixes it with adding check length of skb->len. In this case, Ipv6 not to fragment upper protocol data, just only add a fragment header before it. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6/ip6_output.c') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 65f9c379df38..75d5ef830097 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -629,7 +629,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (!skb->local_df) { + if (!skb->local_df && skb->len > mtu) { skb->dev = skb_dst(skb)->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), -- cgit v1.2.1 From 13b52cd44670e3359055e9918d0e766d89836425 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Fri, 23 Apr 2010 11:26:08 +0000 Subject: IPv6: Add dontfrag argument to relevant functions Add dontfrag argument to relevant functions for IPV6_DONTFRAG support, as well as allowing the value to be passed-in via ancillary cmsg data. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6/ip6_output.c') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 263d4cf5a8de..54d43dd1f085 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1105,7 +1105,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, - struct rt6_info *rt, unsigned int flags) + struct rt6_info *rt, unsigned int flags, int dontfrag) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); -- cgit v1.2.1 From 4b340ae20d0e2366792abe70f46629e576adaf5e Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Fri, 23 Apr 2010 11:26:09 +0000 Subject: IPv6: Complete IPV6_DONTFRAG support Finally add support to detect a local IPV6_DONTFRAG event and return the relevant data to the user if they've enabled IPV6_RECVPATHMTU on the socket. The next recvmsg() will return no data, but have an IPV6_PATHMTU as ancillary data. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'net/ipv6/ip6_output.c') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 54d43dd1f085..61e2bef56090 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1219,15 +1219,23 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, */ inet->cork.length += length; - if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && - (rt->u.dst.dev->features & NETIF_F_UFO)) { + if (length > mtu) { + int proto = sk->sk_protocol; + if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ + ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen); + return -EMSGSIZE; + } - err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, - fragheaderlen, transhdrlen, mtu, - flags); - if (err) - goto error; - return 0; + if (proto == IPPROTO_UDP && + (rt->u.dst.dev->features & NETIF_F_UFO)) { + + err = ip6_ufo_append_data(sk, getfrag, from, length, + hh_len, fragheaderlen, + transhdrlen, mtu, flags); + if (err) + goto error; + return 0; + } } if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) -- cgit v1.2.1 From 83d7eb2979cd3390c375470225dd2d8f2009bc70 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 30 Apr 2010 16:42:08 -0700 Subject: ipv6: cleanup: remove unneeded null check We dereference "sk" unconditionally elsewhere in the function. This was left over from: b30bd282 "ip6_xmit: remove unnecessary NULL ptr check". According to that commit message, "the sk argument to ip6_xmit is never NULL nowadays since the skb->priority assigment expects a valid socket." Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv6/ip6_output.c') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7db09c3f5289..e7a5f17d5e95 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -218,8 +218,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, } kfree_skb(skb); skb = skb2; - if (sk) - skb_set_owner_w(skb, sk); + skb_set_owner_w(skb, sk); } if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); -- cgit v1.2.1