From 7c6a329e444725f24c02192ac493d8a7cd9fa638 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 12 Sep 2008 03:11:54 +0200 Subject: [Bluetooth] Fix regression from using default link policy To speed up the Simple Pairing connection setup, the support for the default link policy has been enabled. This is in contrast to settings the link policy on every connection setup. Using the default link policy is the preferred way since there is no need to dynamically change it for every connection. For backward compatibility reason and to support old userspace the HCISETLINKPOL ioctl has been switched over to using hci_request() to issue the HCI command for setting the default link policy instead of just storing it in the HCI device structure. However the hci_request() can only be issued when the device is brought up. If used on a device that is registered, but still down it will timeout and fail. This is problematic since the command is put on the TX queue and the Bluetooth core tries to submit it to hardware that is not ready yet. The timeout for these requests is 10 seconds and this causes a significant regression when setting up a new device. The userspace can perfectly handle a failure of the HCISETLINKPOL ioctl and will re-submit it later, but the 10 seconds delay causes a problem. So in case hci_request() is called on a device that is still down, just fail it with ENETDOWN to indicate what happens. Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index f5b21cb93699..278a3ace14f6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -164,6 +164,9 @@ static inline int hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev * { int ret; + if (!test_bit(HCI_UP, &hdev->flags)) + return -ENETDOWN; + /* Serialize all requests */ hci_req_lock(hdev); ret = __hci_request(hdev, req, opt, timeout); -- cgit v1.2.1 From 93821778def10ec1e69aa3ac10adee975dad4ff3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Sep 2008 11:48:46 -0700 Subject: udp: Fix rcv socket locking The previous patch in response to the recursive locking on IPsec reception is broken as it tries to drop the BH socket lock while in user context. This patch fixes it by shrinking the section protected by the socket lock to sock_queue_rcv_skb only. The only reason we added the lock is for the accounting which happens in that function. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/udp.c | 62 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8e42fbbd5761..57e26fa66185 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -951,6 +951,27 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } +static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int is_udplite = IS_UDPLITE(sk); + int rc; + + if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); + goto drop; + } + + return 0; + +drop: + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; +} + /* returns: * -1: error * 0: success @@ -989,9 +1010,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) up->encap_rcv != NULL) { int ret; - bh_unlock_sock(sk); ret = (*up->encap_rcv)(sk, skb); - bh_lock_sock(sk); if (ret <= 0) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INDATAGRAMS, @@ -1044,17 +1063,16 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) goto drop; } - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) { - UDP_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); - atomic_inc(&sk->sk_drops); - } - goto drop; - } + rc = 0; - return 0; + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) + rc = __udp_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + + return rc; drop: UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); @@ -1092,15 +1110,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { - int ret = 0; - - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb1); - else - sk_add_backlog(sk, skb1); - bh_unlock_sock(sk); - + int ret = udp_queue_rcv_skb(sk, skb1); if (ret > 0) /* we should probably re-process instead * of dropping packets here. */ @@ -1195,13 +1205,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], uh->dest, inet_iif(skb), udptable); if (sk != NULL) { - int ret = 0; - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); + int ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -1494,7 +1498,7 @@ struct proto udp_prot = { .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, + .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, -- cgit v1.2.1 From 45e9c0de2e86485f8b6633fd64ab19cfbff167f6 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 15 Sep 2008 16:43:18 -0700 Subject: warn: Turn the netdev timeout WARN_ON() into a WARN() this patch turns the netdev timeout WARN_ON_ONCE() into a WARN_ONCE(), so that the device and driver names are inside the warning message. This helps automated tools like kerneloops.org to collect the data and do statistics, as well as making it more likely that humans cut-n-paste the important message as part of a bugreport. Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- net/sched/sch_generic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 9634091ee2f0..ec0a0839ce51 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -215,10 +215,9 @@ static void dev_watchdog(unsigned long arg) time_after(jiffies, (dev->trans_start + dev->watchdog_timeo))) { char drivername[64]; - printk(KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", + WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", dev->name, netdev_drivername(dev, drivername, 64)); dev->tx_timeout(dev); - WARN_ON_ONCE(1); } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + -- cgit v1.2.1 From a3028b8ed1e1e9930bfa70ce4555fb7f9fad3dcc Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 18 Sep 2008 02:48:25 -0700 Subject: sctp: set the skb->ip_summed correctly when sending over loopback. Loopback used to clobber the ip_summed filed which sctp then used to figure out if it needed to do checksumming or not. Now that loopback doesn't do that any more, sctp needs to set the ip_summed field correctly. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index 0dc4a7dfb234..225c7123c41f 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -533,7 +533,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (!(dst->dev->features & NETIF_F_NO_CSUM)) { crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); crc32 = sctp_end_cksum(crc32); - } + } else + nskb->ip_summed = CHECKSUM_UNNECESSARY; /* 3) Put the resultant value into the checksum field in the * common header, and leave the rest of the bits unchanged. -- cgit v1.2.1 From 0ef46e285c062cbe35d60c0adbff96f530d31c86 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 18 Sep 2008 16:27:38 -0700 Subject: sctp: do not enable peer features if we can't do them. Do not enable peer features like addip and auth, if they are administratively disabled localy. If the peer resports that he supports something that we don't, neither end can use it so enabling it is pointless. This solves a problem when talking to a peer that has auth and addip enabled while we do not. Found by Andrei Pelinescu-Onciul . Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index e8ca4e54981f..fe94f42fa068 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1886,11 +1886,13 @@ static void sctp_process_ext_param(struct sctp_association *asoc, /* if the peer reports AUTH, assume that he * supports AUTH. */ - asoc->peer.auth_capable = 1; + if (sctp_auth_enable) + asoc->peer.auth_capable = 1; break; case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: - asoc->peer.asconf_capable = 1; + if (sctp_addip_enable) + asoc->peer.asconf_capable = 1; break; default: break; @@ -2460,6 +2462,9 @@ do_addr_param: break; case SCTP_PARAM_SET_PRIMARY: + if (!sctp_addip_enable) + goto fall_through; + addr_param = param.v + sizeof(sctp_addip_param_t); af = sctp_get_af_specific(param_type2af(param.p->type)); -- cgit v1.2.1 From add52379dde2e5300e2d574b172e62c6cf43b3d3 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 18 Sep 2008 16:28:27 -0700 Subject: sctp: Fix oops when INIT-ACK indicates that peer doesn't support AUTH If INIT-ACK is received with SupportedExtensions parameter which indicates that the peer does not support AUTH, the packet will be silently ignore, and sctp_process_init() do cleanup all of the transports in the association. When T1-Init timer is expires, OOPS happen while we try to choose a different init transport. The solution is to only clean up the non-active transports, i.e the ones that the peer added. However, that introduces a problem with sctp_connectx(), because we don't mark the proper state for the transports provided by the user. So, we'll simply mark user-provided transports as ACTIVE. That will allow INIT retransmissions to work properly in the sctp_connectx() context and prevent the crash. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 9 +++++---- net/sctp/sm_make_chunk.c | 6 ++---- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 8472b8b349c4..abd51cef2413 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -599,11 +599,12 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Check to see if this is a duplicate. */ peer = sctp_assoc_lookup_paddr(asoc, addr); if (peer) { + /* An UNKNOWN state is only set on transports added by + * user in sctp_connectx() call. Such transports should be + * considered CONFIRMED per RFC 4960, Section 5.4. + */ if (peer->state == SCTP_UNKNOWN) { - if (peer_state == SCTP_ACTIVE) - peer->state = SCTP_ACTIVE; - if (peer_state == SCTP_UNCONFIRMED) - peer->state = SCTP_UNCONFIRMED; + peer->state = SCTP_ACTIVE; } return peer; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fe94f42fa068..b599cbba4fbe 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2321,12 +2321,10 @@ clean_up: /* Release the transport structures. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); - list_del_init(pos); - sctp_transport_free(transport); + if (transport->state != SCTP_ACTIVE) + sctp_assoc_rm_peer(asoc, transport); } - asoc->peer.transport_count = 0; - nomem: return 0; } -- cgit v1.2.1 From ad55dcaff0e34269f86975ce2ea0da22e9eb74a1 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 20 Sep 2008 22:05:50 -0700 Subject: netdev: simple_tx_hash shouldn't hash inside fragments Currently simple_tx_hash is hashing inside of udp fragments. As a result packets are getting getting sent to all queues when they shouldn't be. This causes a serious performance regression which can be seen by sending UDP frames larger than mtu on multiqueue devices. This change will make it so that fragments are hashed only as IP datagrams w/o any protocol information. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index e719ed29310f..e8eb2b478344 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -122,6 +122,7 @@ #include #include #include +#include #include #include #include @@ -1667,7 +1668,7 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) { u32 addr1, addr2, ports; u32 hash, ihl; - u8 ip_proto; + u8 ip_proto = 0; if (unlikely(!simple_tx_hashrnd_initialized)) { get_random_bytes(&simple_tx_hashrnd, 4); @@ -1676,7 +1677,8 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) switch (skb->protocol) { case __constant_htons(ETH_P_IP): - ip_proto = ip_hdr(skb)->protocol; + if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) + ip_proto = ip_hdr(skb)->protocol; addr1 = ip_hdr(skb)->saddr; addr2 = ip_hdr(skb)->daddr; ihl = ip_hdr(skb)->ihl; -- cgit v1.2.1 From 43f59c89399fd76883a06c551f24794e98409432 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 Sep 2008 21:28:51 -0700 Subject: net: Remove __skb_insert() calls outside of skbuff internals. This minor cleanup simplifies later changes which will convert struct sk_buff and friends over to using struct list_head. Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- net/sctp/ulpqueue.c | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 85627f83665f..cbfe13d5f423 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4156,7 +4156,7 @@ drop: skb1 = skb1->prev; } } - __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue); + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); /* And clean segments covered by new one as whole. */ while ((skb1 = skb->next) != @@ -4254,7 +4254,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, memcpy(nskb->head, skb->head, header); memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_insert(nskb, skb->prev, skb, list); + __skb_queue_before(list, skb, nskb); skb_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 5061a26c5028..7b23803343cc 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -317,7 +317,7 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, } /* Insert before pos. */ - __skb_insert(sctp_event2skb(event), pos->prev, pos, &ulpq->reasm); + __skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event)); } @@ -825,8 +825,7 @@ static void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq, /* Insert before pos. */ - __skb_insert(sctp_event2skb(event), pos->prev, pos, &ulpq->lobby); - + __skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event)); } static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, -- cgit v1.2.1 From 2cdc55751c33829f00510e0104562d0f8d8a9b85 Mon Sep 17 00:00:00 2001 From: Kaihui Luo Date: Mon, 22 Sep 2008 19:02:36 -0700 Subject: netfilter: xt_time gives a wrong monthday in a leap year The function localtime_3 in xt_time.c gives a wrong monthday in a leap year after 28th 2. calculating monthday should use the array days_since_leapyear[] not days_since_year[] in a leap year. Signed-off-by: Kaihui Luo Acked-by: Jan Engelhardt Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/netfilter/xt_time.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 9f328593287e..307a2c3c2df4 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -136,17 +136,19 @@ static void localtime_3(struct xtm *r, time_t time) * from w repeatedly while counting.) */ if (is_leap(year)) { + /* use days_since_leapyear[] in a leap year */ for (i = ARRAY_SIZE(days_since_leapyear) - 1; - i > 0 && days_since_year[i] > w; --i) + i > 0 && days_since_leapyear[i] > w; --i) /* just loop */; + r->monthday = w - days_since_leapyear[i] + 1; } else { for (i = ARRAY_SIZE(days_since_year) - 1; i > 0 && days_since_year[i] > w; --i) /* just loop */; + r->monthday = w - days_since_year[i] + 1; } r->month = i + 1; - r->monthday = w - days_since_year[i] + 1; return; } -- cgit v1.2.1 From d48abfecea8513cfd2fd7e341439c1b8a28e9ff4 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 22 Sep 2008 19:20:51 -0700 Subject: net: em_cmp.c use unaligned access helpers Signed-off-by: Harvey Harrison Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/sched/em_cmp.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index cc49c932641d..bc450397487a 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -14,6 +14,7 @@ #include #include #include +#include #include static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp) @@ -37,8 +38,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em, break; case TCF_EM_ALIGN_U16: - val = *ptr << 8; - val |= *(ptr+1); + val = get_unaligned_be16(ptr); if (cmp_needs_transformation(cmp)) val = be16_to_cpu(val); @@ -47,10 +47,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em, case TCF_EM_ALIGN_U32: /* Worth checking boundries? The branching seems * to get worse. Visit again. */ - val = *ptr << 24; - val |= *(ptr+1) << 16; - val |= *(ptr+2) << 8; - val |= *(ptr+3); + val = get_unaligned_be32(ptr); if (cmp_needs_transformation(cmp)) val = be32_to_cpu(val); -- cgit v1.2.1 From 5e687220a047dc4f0c2fb9ce886359a23075ddbc Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 22 Sep 2008 19:24:45 -0700 Subject: net/atm/lec.c: drop code after return The break after the return serves no purpose. Signed-off-by: Julia Lawall Reviewed-by: Richard Genoud Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/atm/lec.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/atm/lec.c b/net/atm/lec.c index 5799fb52365a..8f701cde5945 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1931,7 +1931,6 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, switch (priv->lane_version) { case 1: return priv->mcast_vcc; - break; case 2: /* LANE2 wants arp for multicast addresses */ if (!compare_ether_addr(mac_to_find, bus_mac)) return priv->mcast_vcc; -- cgit v1.2.1 From 5c1824587f0797373c95719a196f6098f7c6d20c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 22 Sep 2008 19:48:19 -0700 Subject: ipsec: Fix xfrm_state_walk race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As discovered by Timo Teräs, the currently xfrm_state_walk scheme is racy because if a second dump finishes before the first, we may free xfrm states that the first dump would walk over later. This patch fixes this by storing the dumps in a list in order to calculate the correct completion counter which cures this problem. I've expanded netlink_cb in order to accomodate the extra state related to this. It shouldn't be a big deal since netlink_cb is kmalloced for each dump and we're just increasing it by 4 or 8 bytes. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index abbe2702c400..053970e8765d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -64,6 +64,9 @@ static unsigned long xfrm_state_walk_ongoing; /* Counter indicating walk completion, protected by xfrm_cfg_mutex. */ static unsigned long xfrm_state_walk_completed; +/* List of outstanding state walks used to set the completed counter. */ +static LIST_HEAD(xfrm_state_walks); + static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); @@ -1584,7 +1587,6 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, if (err) { xfrm_state_hold(last); walk->state = last; - xfrm_state_walk_ongoing++; goto out; } } @@ -1599,25 +1601,44 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, err = func(last, 0, data); out: spin_unlock_bh(&xfrm_state_lock); - if (old != NULL) { + if (old != NULL) xfrm_state_put(old); - xfrm_state_walk_completed++; - if (!list_empty(&xfrm_state_gc_leftovers)) - schedule_work(&xfrm_state_gc_work); - } return err; } EXPORT_SYMBOL(xfrm_state_walk); +void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) +{ + walk->proto = proto; + walk->state = NULL; + walk->count = 0; + list_add_tail(&walk->list, &xfrm_state_walks); + walk->genid = ++xfrm_state_walk_ongoing; +} +EXPORT_SYMBOL(xfrm_state_walk_init); + void xfrm_state_walk_done(struct xfrm_state_walk *walk) { + struct list_head *prev; + if (walk->state != NULL) { xfrm_state_put(walk->state); walk->state = NULL; - xfrm_state_walk_completed++; - if (!list_empty(&xfrm_state_gc_leftovers)) - schedule_work(&xfrm_state_gc_work); } + + prev = walk->list.prev; + list_del(&walk->list); + + if (prev != &xfrm_state_walks) { + list_entry(prev, struct xfrm_state_walk, list)->genid = + walk->genid; + return; + } + + xfrm_state_walk_completed = walk->genid; + + if (!list_empty(&xfrm_state_gc_leftovers)) + schedule_work(&xfrm_state_gc_work); } EXPORT_SYMBOL(xfrm_state_walk_done); -- cgit v1.2.1 From bce7b15426cac3000bf6a9cf59d9356ef0be2dec Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 19:51:15 -0700 Subject: Phonet: global definitions Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- net/core/sock.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 23b8b9da36b3..2d358dd8a03e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -154,7 +154,8 @@ static const char *af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , - "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_MAX" + "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , + "sk_lock-AF_MAX" }; static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , @@ -168,7 +169,8 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , - "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_MAX" + "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , + "slock-AF_MAX" }; static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , @@ -182,7 +184,8 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , - "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_MAX" + "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , + "clock-AF_MAX" }; #endif -- cgit v1.2.1 From 4b07b3f69a8471cdc142c51461a331226fef248a Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:02:10 -0700 Subject: Phonet: PF_PHONET protocol family support This is the basis for the Phonet protocol families, and introduces the ETH_P_PHONET packet type and the PF_PHONET socket family. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 216 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 net/phonet/af_phonet.c (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c new file mode 100644 index 000000000000..0cfea9bc994a --- /dev/null +++ b/net/phonet/af_phonet.c @@ -0,0 +1,216 @@ +/* + * File: af_phonet.c + * + * Phonet protocols family + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont + * Original author: Sakari Ailus + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include + +#include +#include +#include + +static struct net_proto_family phonet_proto_family; +static struct phonet_protocol *phonet_proto_get(int protocol); +static inline void phonet_proto_put(struct phonet_protocol *pp); + +/* protocol family functions */ + +static int pn_socket_create(struct net *net, struct socket *sock, int protocol) +{ + struct phonet_protocol *pnp; + int err; + + if (net != &init_net) + return -EAFNOSUPPORT; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (protocol == 0) { + /* Default protocol selection */ + switch (sock->type) { + case SOCK_DGRAM: + protocol = PN_PROTO_PHONET; + break; + default: + return -EPROTONOSUPPORT; + } + } + + pnp = phonet_proto_get(protocol); + if (pnp == NULL) + return -EPROTONOSUPPORT; + if (sock->type != pnp->sock_type) { + err = -EPROTONOSUPPORT; + goto out; + } + + /* TODO: create and init the struct sock */ + err = -EPROTONOSUPPORT; + +out: + phonet_proto_put(pnp); + return err; +} + +static struct net_proto_family phonet_proto_family = { + .family = AF_PHONET, + .create = pn_socket_create, + .owner = THIS_MODULE, +}; + +/* packet type functions */ + +/* + * Stuff received packets to associated sockets. + * On error, returns non-zero and releases the skb. + */ +static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkttype, + struct net_device *orig_dev) +{ + struct phonethdr *ph; + struct sockaddr_pn sa; + u16 len; + + if (dev_net(dev) != &init_net) + goto out; + + /* check we have at least a full Phonet header */ + if (!pskb_pull(skb, sizeof(struct phonethdr))) + goto out; + + /* check that the advertised length is correct */ + ph = pn_hdr(skb); + len = get_unaligned_be16(&ph->pn_length); + if (len < 2) + goto out; + len -= 2; + if ((len > skb->len) || pskb_trim(skb, len)) + goto out; + skb_reset_transport_header(skb); + + pn_skb_get_dst_sockaddr(skb, &sa); + if (pn_sockaddr_get_addr(&sa) == 0) + goto out; /* currently, we cannot be device 0 */ + + /* TODO: put packets to sockets backlog */ + +out: + kfree_skb(skb); + return NET_RX_DROP; +} + +static struct packet_type phonet_packet_type = { + .type = __constant_htons(ETH_P_PHONET), + .dev = NULL, + .func = phonet_rcv, +}; + +/* Transport protocol registration */ +static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; +static DEFINE_SPINLOCK(proto_tab_lock); + +int __init_or_module phonet_proto_register(int protocol, + struct phonet_protocol *pp) +{ + int err = 0; + + if (protocol >= PHONET_NPROTO) + return -EINVAL; + + err = proto_register(pp->prot, 1); + if (err) + return err; + + spin_lock(&proto_tab_lock); + if (proto_tab[protocol]) + err = -EBUSY; + else + proto_tab[protocol] = pp; + spin_unlock(&proto_tab_lock); + + return err; +} +EXPORT_SYMBOL(phonet_proto_register); + +void phonet_proto_unregister(int protocol, struct phonet_protocol *pp) +{ + spin_lock(&proto_tab_lock); + BUG_ON(proto_tab[protocol] != pp); + proto_tab[protocol] = NULL; + spin_unlock(&proto_tab_lock); + proto_unregister(pp->prot); +} +EXPORT_SYMBOL(phonet_proto_unregister); + +static struct phonet_protocol *phonet_proto_get(int protocol) +{ + struct phonet_protocol *pp; + + if (protocol >= PHONET_NPROTO) + return NULL; + + spin_lock(&proto_tab_lock); + pp = proto_tab[protocol]; + if (pp && !try_module_get(pp->prot->owner)) + pp = NULL; + spin_unlock(&proto_tab_lock); + + return pp; +} + +static inline void phonet_proto_put(struct phonet_protocol *pp) +{ + module_put(pp->prot->owner); +} + +/* Module registration */ +static int __init phonet_init(void) +{ + int err; + + err = sock_register(&phonet_proto_family); + if (err) { + printk(KERN_ALERT + "phonet protocol family initialization failed\n"); + return err; + } + + dev_add_pack(&phonet_packet_type); + return 0; +} + +static void __exit phonet_exit(void) +{ + sock_unregister(AF_PHONET); + dev_remove_pack(&phonet_packet_type); +} + +module_init(phonet_init); +module_exit(phonet_exit); +MODULE_DESCRIPTION("Phonet protocol stack for Linux"); +MODULE_LICENSE("GPL"); -- cgit v1.2.1 From 8ead536dec142f27d5b5f72c3994eb39f4741717 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:03:00 -0700 Subject: Phonet: add CONFIG_PHONET Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- net/Kconfig | 1 + net/Makefile | 1 + net/phonet/Kconfig | 16 ++++++++++++++++ net/phonet/Makefile | 4 ++++ 4 files changed, 22 insertions(+) create mode 100644 net/phonet/Kconfig create mode 100644 net/phonet/Makefile (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index d87de48ba656..9103a16a77be 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -232,6 +232,7 @@ source "net/can/Kconfig" source "net/irda/Kconfig" source "net/bluetooth/Kconfig" source "net/rxrpc/Kconfig" +source "net/phonet/Kconfig" config FIB_RULES bool diff --git a/net/Makefile b/net/Makefile index 4f43e7f874f3..acaf819f24aa 100644 --- a/net/Makefile +++ b/net/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_AF_RXRPC) += rxrpc/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_DECNET) += decnet/ obj-$(CONFIG_ECONET) += econet/ +obj-$(CONFIG_PHONET) += phonet/ ifneq ($(CONFIG_VLAN_8021Q),) obj-y += 8021q/ endif diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig new file mode 100644 index 000000000000..51a5669573f2 --- /dev/null +++ b/net/phonet/Kconfig @@ -0,0 +1,16 @@ +# +# Phonet protocol +# + +config PHONET + tristate "Phonet protocols family" + help + The Phone Network protocol (PhoNet) is a packet-oriented + communication protocol developped by Nokia for use with its modems. + + This is required for Maemo to use cellular data connectivity (if + supported). It can also be used to control Nokia phones + from a Linux computer, although AT commands may be easier to use. + + To compile this driver as a module, choose M here: the module + will be called phonet. If unsure, say N. diff --git a/net/phonet/Makefile b/net/phonet/Makefile new file mode 100644 index 000000000000..5dbff68a6f36 --- /dev/null +++ b/net/phonet/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_PHONET) += phonet.o + +phonet-objs := \ + af_phonet.o -- cgit v1.2.1 From f8ff60283de2b6775d7a14619056a08e3083bd40 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:03:44 -0700 Subject: Phonet: network device and address handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This provides support for adding Phonet addresses to and removing Phonet addresses from network devices. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/Makefile | 1 + net/phonet/af_phonet.c | 3 + net/phonet/pn_dev.c | 208 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 212 insertions(+) create mode 100644 net/phonet/pn_dev.c (limited to 'net') diff --git a/net/phonet/Makefile b/net/phonet/Makefile index 5dbff68a6f36..980a3866c9a3 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_PHONET) += phonet.o phonet-objs := \ + pn_dev.o \ af_phonet.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 0cfea9bc994a..a8ba6f177b29 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -31,6 +31,7 @@ #include #include #include +#include static struct net_proto_family phonet_proto_family; static struct phonet_protocol *phonet_proto_get(int protocol); @@ -200,6 +201,7 @@ static int __init phonet_init(void) return err; } + phonet_device_init(); dev_add_pack(&phonet_packet_type); return 0; } @@ -208,6 +210,7 @@ static void __exit phonet_exit(void) { sock_unregister(AF_PHONET); dev_remove_pack(&phonet_packet_type); + phonet_device_exit(); } module_init(phonet_init); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c new file mode 100644 index 000000000000..53be9fc82aaa --- /dev/null +++ b/net/phonet/pn_dev.c @@ -0,0 +1,208 @@ +/* + * File: pn_dev.c + * + * Phonet network device + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont + * Original author: Sakari Ailus + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include + +/* when accessing, remember to lock with spin_lock(&pndevs.lock); */ +struct phonet_device_list pndevs = { + .list = LIST_HEAD_INIT(pndevs.list), + .lock = __SPIN_LOCK_UNLOCKED(pndevs.lock), +}; + +/* Allocate new Phonet device. */ +static struct phonet_device *__phonet_device_alloc(struct net_device *dev) +{ + struct phonet_device *pnd = kmalloc(sizeof(*pnd), GFP_ATOMIC); + if (pnd == NULL) + return NULL; + pnd->netdev = dev; + bitmap_zero(pnd->addrs, 64); + + list_add(&pnd->list, &pndevs.list); + return pnd; +} + +static struct phonet_device *__phonet_get(struct net_device *dev) +{ + struct phonet_device *pnd; + + list_for_each_entry(pnd, &pndevs.list, list) { + if (pnd->netdev == dev) + return pnd; + } + return NULL; +} + +static void __phonet_device_free(struct phonet_device *pnd) +{ + list_del(&pnd->list); + kfree(pnd); +} + +struct net_device *phonet_device_get(struct net *net) +{ + struct phonet_device *pnd; + struct net_device *dev; + + spin_lock_bh(&pndevs.lock); + list_for_each_entry(pnd, &pndevs.list, list) { + dev = pnd->netdev; + BUG_ON(!dev); + + if (dev_net(dev) == net && + (dev->reg_state == NETREG_REGISTERED) && + ((pnd->netdev->flags & IFF_UP)) == IFF_UP) + break; + dev = NULL; + } + if (dev) + dev_hold(dev); + spin_unlock_bh(&pndevs.lock); + return dev; +} + +int phonet_address_add(struct net_device *dev, u8 addr) +{ + struct phonet_device *pnd; + int err = 0; + + spin_lock_bh(&pndevs.lock); + /* Find or create Phonet-specific device data */ + pnd = __phonet_get(dev); + if (pnd == NULL) + pnd = __phonet_device_alloc(dev); + if (unlikely(pnd == NULL)) + err = -ENOMEM; + else if (test_and_set_bit(addr >> 2, pnd->addrs)) + err = -EEXIST; + spin_unlock_bh(&pndevs.lock); + return err; +} + +int phonet_address_del(struct net_device *dev, u8 addr) +{ + struct phonet_device *pnd; + int err = 0; + + spin_lock_bh(&pndevs.lock); + pnd = __phonet_get(dev); + if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) + err = -EADDRNOTAVAIL; + if (bitmap_empty(pnd->addrs, 64)) + __phonet_device_free(pnd); + spin_unlock_bh(&pndevs.lock); + return err; +} + +/* Gets a source address toward a destination, through a interface. */ +u8 phonet_address_get(struct net_device *dev, u8 addr) +{ + struct phonet_device *pnd; + + spin_lock_bh(&pndevs.lock); + pnd = __phonet_get(dev); + if (pnd) { + BUG_ON(bitmap_empty(pnd->addrs, 64)); + + /* Use same source address as destination, if possible */ + if (!test_bit(addr >> 2, pnd->addrs)) + addr = find_first_bit(pnd->addrs, 64) << 2; + } else + addr = PN_NO_ADDR; + spin_unlock_bh(&pndevs.lock); + return addr; +} + +int phonet_address_lookup(u8 addr) +{ + struct phonet_device *pnd; + + spin_lock_bh(&pndevs.lock); + list_for_each_entry(pnd, &pndevs.list, list) { + /* Don't allow unregistering devices! */ + if ((pnd->netdev->reg_state != NETREG_REGISTERED) || + ((pnd->netdev->flags & IFF_UP)) != IFF_UP) + continue; + + if (test_bit(addr >> 2, pnd->addrs)) { + spin_unlock_bh(&pndevs.lock); + return 0; + } + } + spin_unlock_bh(&pndevs.lock); + return -EADDRNOTAVAIL; +} + +/* notify Phonet of device events */ +static int phonet_device_notify(struct notifier_block *me, unsigned long what, + void *arg) +{ + struct net_device *dev = arg; + + if (what == NETDEV_UNREGISTER) { + struct phonet_device *pnd; + + /* Destroy phonet-specific device data */ + spin_lock_bh(&pndevs.lock); + pnd = __phonet_get(dev); + if (pnd) + __phonet_device_free(pnd); + spin_unlock_bh(&pndevs.lock); + } + return 0; + +} + +static struct notifier_block phonet_device_notifier = { + .notifier_call = phonet_device_notify, + .priority = 0, +}; + +/* Initialize Phonet devices list */ +void phonet_device_init(void) +{ + register_netdevice_notifier(&phonet_device_notifier); +} + +void phonet_device_exit(void) +{ + struct phonet_device *pnd, *n; + + rtnl_unregister_all(PF_PHONET); + rtnl_lock(); + spin_lock_bh(&pndevs.lock); + + list_for_each_entry_safe(pnd, n, &pndevs.list, list) + __phonet_device_free(pnd); + + spin_unlock_bh(&pndevs.lock); + rtnl_unlock(); + unregister_netdevice_notifier(&phonet_device_notifier); +} -- cgit v1.2.1 From 8fb397406f6470f79040c41eec49af20900a9e3b Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:04:30 -0700 Subject: Phonet: Netlink interface This provides support for configuring Phonet addresses, notifying Phonet configuration changes, and dumping the configuration. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/Makefile | 1 + net/phonet/af_phonet.c | 1 + net/phonet/pn_netlink.c | 186 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 188 insertions(+) create mode 100644 net/phonet/pn_netlink.c (limited to 'net') diff --git a/net/phonet/Makefile b/net/phonet/Makefile index 980a3866c9a3..4143c3e1dfdc 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -2,4 +2,5 @@ obj-$(CONFIG_PHONET) += phonet.o phonet-objs := \ pn_dev.o \ + pn_netlink.o \ af_phonet.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index a8ba6f177b29..5c729ba56939 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -203,6 +203,7 @@ static int __init phonet_init(void) phonet_device_init(); dev_add_pack(&phonet_packet_type); + phonet_netlink_register(); return 0; } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c new file mode 100644 index 000000000000..b1ea19a230dd --- /dev/null +++ b/net/phonet/pn_netlink.c @@ -0,0 +1,186 @@ +/* + * File: pn_netlink.c + * + * Phonet netlink interface + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont + * Original author: Sakari Ailus + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include +#include + +static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, + u32 pid, u32 seq, int event); + +static void rtmsg_notify(int event, struct net_device *dev, u8 addr) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(1), GFP_KERNEL); + if (skb == NULL) + goto errout; + err = fill_addr(skb, dev, addr, 0, 0, event); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + err = rtnl_notify(skb, dev_net(dev), 0, + RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); +} + +static int newaddr_doit(struct sk_buff *skb, struct nlmsghdr *nlm, void *attr) +{ + struct rtattr **rta = attr; + struct ifaddrmsg *ifm = NLMSG_DATA(nlm); + struct net_device *dev; + int err; + u8 pnaddr; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ASSERT_RTNL(); + + if (rta[IFA_LOCAL - 1] == NULL) + return -EINVAL; + + dev = __dev_get_by_index(&init_net, ifm->ifa_index); + if (dev == NULL) + return -ENODEV; + + if (ifm->ifa_prefixlen > 0) + return -EINVAL; + + memcpy(&pnaddr, RTA_DATA(rta[IFA_LOCAL - 1]), 1); + + err = phonet_address_add(dev, pnaddr); + if (!err) + rtmsg_notify(RTM_NEWADDR, dev, pnaddr); + return err; +} + +static int deladdr_doit(struct sk_buff *skb, struct nlmsghdr *nlm, void *attr) +{ + struct rtattr **rta = attr; + struct ifaddrmsg *ifm = NLMSG_DATA(nlm); + struct net_device *dev; + int err; + u8 pnaddr; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ASSERT_RTNL(); + + if (rta[IFA_LOCAL - 1] == NULL) + return -EINVAL; + + dev = __dev_get_by_index(&init_net, ifm->ifa_index); + if (dev == NULL) + return -ENODEV; + + if (ifm->ifa_prefixlen > 0) + return -EADDRNOTAVAIL; + + memcpy(&pnaddr, RTA_DATA(rta[IFA_LOCAL - 1]), 1); + + err = phonet_address_del(dev, pnaddr); + if (!err) + rtmsg_notify(RTM_DELADDR, dev, pnaddr); + return err; +} + +static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, + u32 pid, u32 seq, int event) +{ + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; + unsigned int orig_len = skb->len; + + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct ifaddrmsg)); + ifm = NLMSG_DATA(nlh); + ifm->ifa_family = AF_PHONET; + ifm->ifa_prefixlen = 0; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_HOST; + ifm->ifa_index = dev->ifindex; + RTA_PUT(skb, IFA_LOCAL, 1, &addr); + nlh->nlmsg_len = skb->len - orig_len; + + return 0; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, orig_len); + + return -1; +} + +static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct phonet_device *pnd; + int dev_idx = 0, dev_start_idx = cb->args[0]; + int addr_idx = 0, addr_start_idx = cb->args[1]; + + spin_lock_bh(&pndevs.lock); + list_for_each_entry(pnd, &pndevs.list, list) { + u8 addr; + + if (dev_idx > dev_start_idx) + addr_start_idx = 0; + if (dev_idx++ < dev_start_idx) + continue; + + addr_idx = 0; + for (addr = find_first_bit(pnd->addrs, 64); addr < 64; + addr = find_next_bit(pnd->addrs, 64, 1+addr)) { + if (addr_idx++ < addr_start_idx) + continue; + + if (fill_addr(skb, pnd->netdev, addr << 2, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWADDR)) + goto out; + } + } + +out: + spin_unlock_bh(&pndevs.lock); + cb->args[0] = dev_idx; + cb->args[1] = addr_idx; + + return skb->len; +} + +void __init phonet_netlink_register(void) +{ + rtnl_register(PF_PHONET, RTM_NEWADDR, newaddr_doit, NULL); + rtnl_register(PF_PHONET, RTM_DELADDR, deladdr_doit, NULL); + rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit); +} -- cgit v1.2.1 From ba113a94b7503ee23ffe819e7045134b0c1d31de Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:05:19 -0700 Subject: Phonet: common socket glue This provides the socket API for the Phonet protocols family. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/Makefile | 1 + net/phonet/af_phonet.c | 28 ++++- net/phonet/socket.c | 311 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 337 insertions(+), 3 deletions(-) create mode 100644 net/phonet/socket.c (limited to 'net') diff --git a/net/phonet/Makefile b/net/phonet/Makefile index 4143c3e1dfdc..c1d671de7835 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -3,4 +3,5 @@ obj-$(CONFIG_PHONET) += phonet.o phonet-objs := \ pn_dev.o \ pn_netlink.o \ + socket.o \ af_phonet.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 5c729ba56939..ba54d53020ff 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -41,6 +41,8 @@ static inline void phonet_proto_put(struct phonet_protocol *pp); static int pn_socket_create(struct net *net, struct socket *sock, int protocol) { + struct sock *sk; + struct pn_sock *pn; struct phonet_protocol *pnp; int err; @@ -69,8 +71,22 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol) goto out; } - /* TODO: create and init the struct sock */ - err = -EPROTONOSUPPORT; + sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot); + if (sk == NULL) { + err = -ENOMEM; + goto out; + } + + sock_init_data(sock, sk); + sock->state = SS_UNCONNECTED; + sock->ops = pnp->ops; + sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; + sk->sk_protocol = protocol; + pn = pn_sk(sk); + pn->sobject = 0; + pn->resource = 0; + sk->sk_prot->init(sk); + err = 0; out: phonet_proto_put(pnp); @@ -94,6 +110,7 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, struct net_device *orig_dev) { struct phonethdr *ph; + struct sock *sk; struct sockaddr_pn sa; u16 len; @@ -118,7 +135,12 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, if (pn_sockaddr_get_addr(&sa) == 0) goto out; /* currently, we cannot be device 0 */ - /* TODO: put packets to sockets backlog */ + sk = pn_find_sock_by_sa(&sa); + if (sk == NULL) + goto out; + + /* Push data to the socket (or other sockets connected to it). */ + return sk_receive_skb(sk, skb, 0); out: kfree_skb(skb); diff --git a/net/phonet/socket.c b/net/phonet/socket.c new file mode 100644 index 000000000000..99a4945d565d --- /dev/null +++ b/net/phonet/socket.c @@ -0,0 +1,311 @@ +/* + * File: socket.c + * + * Phonet sockets + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont + * Original author: Sakari Ailus + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include + +#include +#include +#include + +static int pn_socket_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (sk) { + sock->sk = NULL; + sk->sk_prot->close(sk, 0); + } + return 0; +} + +static struct { + struct hlist_head hlist; + spinlock_t lock; +} pnsocks = { + .hlist = HLIST_HEAD_INIT, + .lock = __SPIN_LOCK_UNLOCKED(pnsocks.lock), +}; + +/* + * Find address based on socket address, match only certain fields. + * Also grab sock if it was found. Remember to sock_put it later. + */ +struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *spn) +{ + struct hlist_node *node; + struct sock *sknode; + struct sock *rval = NULL; + u16 obj = pn_sockaddr_get_object(spn); + u8 res = spn->spn_resource; + + spin_lock_bh(&pnsocks.lock); + + sk_for_each(sknode, node, &pnsocks.hlist) { + struct pn_sock *pn = pn_sk(sknode); + BUG_ON(!pn->sobject); /* unbound socket */ + + if (pn_port(obj)) { + /* Look up socket by port */ + if (pn_port(pn->sobject) != pn_port(obj)) + continue; + } else { + /* If port is zero, look up by resource */ + if (pn->resource != res) + continue; + } + if (pn_addr(pn->sobject) + && pn_addr(pn->sobject) != pn_addr(obj)) + continue; + + rval = sknode; + sock_hold(sknode); + break; + } + + spin_unlock_bh(&pnsocks.lock); + + return rval; + +} + +void pn_sock_hash(struct sock *sk) +{ + spin_lock_bh(&pnsocks.lock); + sk_add_node(sk, &pnsocks.hlist); + spin_unlock_bh(&pnsocks.lock); +} +EXPORT_SYMBOL(pn_sock_hash); + +void pn_sock_unhash(struct sock *sk) +{ + spin_lock_bh(&pnsocks.lock); + sk_del_node_init(sk); + spin_unlock_bh(&pnsocks.lock); +} +EXPORT_SYMBOL(pn_sock_unhash); + +static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len) +{ + struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); + struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; + int err; + u16 handle; + u8 saddr; + + if (sk->sk_prot->bind) + return sk->sk_prot->bind(sk, addr, len); + + if (len < sizeof(struct sockaddr_pn)) + return -EINVAL; + if (spn->spn_family != AF_PHONET) + return -EAFNOSUPPORT; + + handle = pn_sockaddr_get_object((struct sockaddr_pn *)addr); + saddr = pn_addr(handle); + if (saddr && phonet_address_lookup(saddr)) + return -EADDRNOTAVAIL; + + lock_sock(sk); + if (sk->sk_state != TCP_CLOSE || pn_port(pn->sobject)) { + err = -EINVAL; /* attempt to rebind */ + goto out; + } + err = sk->sk_prot->get_port(sk, pn_port(handle)); + if (err) + goto out; + + /* get_port() sets the port, bind() sets the address if applicable */ + pn->sobject = pn_object(saddr, pn_port(pn->sobject)); + pn->resource = spn->spn_resource; + + /* Enable RX on the socket */ + sk->sk_prot->hash(sk); +out: + release_sock(sk); + return err; +} + +static int pn_socket_autobind(struct socket *sock) +{ + struct sockaddr_pn sa; + int err; + + memset(&sa, 0, sizeof(sa)); + sa.spn_family = AF_PHONET; + err = pn_socket_bind(sock, (struct sockaddr *)&sa, + sizeof(struct sockaddr_pn)); + if (err != -EINVAL) + return err; + BUG_ON(!pn_port(pn_sk(sock->sk)->sobject)); + return 0; /* socket was already bound */ +} + +static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, + int *sockaddr_len, int peer) +{ + struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); + + memset(addr, 0, sizeof(struct sockaddr_pn)); + addr->sa_family = AF_PHONET; + if (!peer) /* Race with bind() here is userland's problem. */ + pn_sockaddr_set_object((struct sockaddr_pn *)addr, + pn->sobject); + + *sockaddr_len = sizeof(struct sockaddr_pn); + return 0; +} + +static int pn_socket_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); + + if (cmd == SIOCPNGETOBJECT) { + struct net_device *dev; + u16 handle; + u8 saddr; + + if (get_user(handle, (__u16 __user *)arg)) + return -EFAULT; + + lock_sock(sk); + if (sk->sk_bound_dev_if) + dev = dev_get_by_index(sock_net(sk), + sk->sk_bound_dev_if); + else + dev = phonet_device_get(sock_net(sk)); + if (dev && (dev->flags & IFF_UP)) + saddr = phonet_address_get(dev, pn_addr(handle)); + else + saddr = PN_NO_ADDR; + release_sock(sk); + + if (dev) + dev_put(dev); + if (saddr == PN_NO_ADDR) + return -EHOSTUNREACH; + + handle = pn_object(saddr, pn_port(pn->sobject)); + return put_user(handle, (__u16 __user *)arg); + } + + return sk->sk_prot->ioctl(sk, cmd, arg); +} + +static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) +{ + struct sock *sk = sock->sk; + + if (pn_socket_autobind(sock)) + return -EAGAIN; + + return sk->sk_prot->sendmsg(iocb, sk, m, total_len); +} + +const struct proto_ops phonet_dgram_ops = { + .family = AF_PHONET, + .owner = THIS_MODULE, + .release = pn_socket_release, + .bind = pn_socket_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = pn_socket_getname, + .poll = datagram_poll, + .ioctl = pn_socket_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, +#ifdef CONFIG_COMPAT + .compat_setsockopt = sock_no_setsockopt, + .compat_getsockopt = sock_no_getsockopt, +#endif + .sendmsg = pn_socket_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static DEFINE_MUTEX(port_mutex); + +/* allocate port for a socket */ +int pn_sock_get_port(struct sock *sk, unsigned short sport) +{ + static int port_cur; + struct pn_sock *pn = pn_sk(sk); + struct sockaddr_pn try_sa; + struct sock *tmpsk; + + memset(&try_sa, 0, sizeof(struct sockaddr_pn)); + try_sa.spn_family = AF_PHONET; + + mutex_lock(&port_mutex); + + if (!sport) { + /* search free port */ + int port, pmin = 0x40, pmax = 0x7f; + + for (port = pmin; port <= pmax; port++) { + port_cur++; + if (port_cur < pmin || port_cur > pmax) + port_cur = pmin; + + pn_sockaddr_set_port(&try_sa, port_cur); + tmpsk = pn_find_sock_by_sa(&try_sa); + if (tmpsk == NULL) { + sport = port_cur; + goto found; + } else + sock_put(tmpsk); + } + } else { + /* try to find specific port */ + pn_sockaddr_set_port(&try_sa, sport); + tmpsk = pn_find_sock_by_sa(&try_sa); + if (tmpsk == NULL) + /* No sock there! We can use that port... */ + goto found; + else + sock_put(tmpsk); + } + mutex_unlock(&port_mutex); + + /* the port must be in use already */ + return -EADDRINUSE; + +found: + mutex_unlock(&port_mutex); + pn->sobject = pn_object(pn_addr(pn->sobject), sport); + return 0; +} +EXPORT_SYMBOL(pn_sock_get_port); -- cgit v1.2.1 From 107d0d9b8d9a236883db72841fb61cedd5be845e Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:05:57 -0700 Subject: Phonet: Phonet datagram transport protocol This provides the basic SOCK_DGRAM transport protocol for Phonet. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/Makefile | 1 + net/phonet/af_phonet.c | 106 ++++++++++++++++++++++++++ net/phonet/datagram.c | 197 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 304 insertions(+) create mode 100644 net/phonet/datagram.c (limited to 'net') diff --git a/net/phonet/Makefile b/net/phonet/Makefile index c1d671de7835..d218abc3f06a 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -4,4 +4,5 @@ phonet-objs := \ pn_dev.o \ pn_netlink.o \ socket.o \ + datagram.o \ af_phonet.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index ba54d53020ff..e6771d3961cf 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -99,6 +99,101 @@ static struct net_proto_family phonet_proto_family = { .owner = THIS_MODULE, }; +/* + * Prepends an ISI header and sends a datagram. + */ +static int pn_send(struct sk_buff *skb, struct net_device *dev, + u16 dst, u16 src, u8 res) +{ + struct phonethdr *ph; + int err; + + if (skb->len + 2 > 0xffff) { + /* Phonet length field would overflow */ + err = -EMSGSIZE; + goto drop; + } + + skb_reset_transport_header(skb); + WARN_ON(skb_headroom(skb) & 1); /* HW assumes word alignment */ + skb_push(skb, sizeof(struct phonethdr)); + skb_reset_network_header(skb); + ph = pn_hdr(skb); + ph->pn_rdev = pn_dev(dst); + ph->pn_sdev = pn_dev(src); + ph->pn_res = res; + ph->pn_length = __cpu_to_be16(skb->len + 2 - sizeof(*ph)); + ph->pn_robj = pn_obj(dst); + ph->pn_sobj = pn_obj(src); + + skb->protocol = htons(ETH_P_PHONET); + skb->priority = 0; + skb->dev = dev; + + if (pn_addr(src) == pn_addr(dst)) { + skb_reset_mac_header(skb); + skb->pkt_type = PACKET_LOOPBACK; + skb_orphan(skb); + netif_rx_ni(skb); + err = 0; + } else { + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + NULL, NULL, skb->len); + if (err < 0) { + err = -EHOSTUNREACH; + goto drop; + } + err = dev_queue_xmit(skb); + } + + return err; +drop: + kfree_skb(skb); + return err; +} + +/* + * Create a Phonet header for the skb and send it out. Returns + * non-zero error code if failed. The skb is freed then. + */ +int pn_skb_send(struct sock *sk, struct sk_buff *skb, + const struct sockaddr_pn *target) +{ + struct net_device *dev; + struct pn_sock *pn = pn_sk(sk); + int err; + u16 src; + u8 daddr = pn_sockaddr_get_addr(target), saddr = PN_NO_ADDR; + + err = -EHOSTUNREACH; + if (sk->sk_bound_dev_if) + dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); + else + dev = phonet_device_get(sock_net(sk)); + if (!dev || !(dev->flags & IFF_UP)) + goto drop; + + saddr = phonet_address_get(dev, daddr); + if (saddr == PN_NO_ADDR) + goto drop; + + src = pn->sobject; + if (!pn_addr(src)) + src = pn_object(saddr, pn_obj(src)); + + err = pn_send(skb, dev, pn_sockaddr_get_object(target), + src, pn_sockaddr_get_resource(target)); + dev_put(dev); + return err; + +drop: + kfree_skb(skb); + if (dev) + dev_put(dev); + return err; +} +EXPORT_SYMBOL(pn_skb_send); + /* packet type functions */ /* @@ -226,11 +321,22 @@ static int __init phonet_init(void) phonet_device_init(); dev_add_pack(&phonet_packet_type); phonet_netlink_register(); + + err = isi_register(); + if (err) + goto err; return 0; + +err: + sock_unregister(AF_PHONET); + dev_remove_pack(&phonet_packet_type); + phonet_device_exit(); + return err; } static void __exit phonet_exit(void) { + isi_unregister(); sock_unregister(AF_PHONET); dev_remove_pack(&phonet_packet_type); phonet_device_exit(); diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c new file mode 100644 index 000000000000..e087862ed7e4 --- /dev/null +++ b/net/phonet/datagram.c @@ -0,0 +1,197 @@ +/* + * File: datagram.c + * + * Datagram (ISI) Phonet sockets + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont + * Original author: Sakari Ailus + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include + +#include +#include + +static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb); + +/* associated socket ceases to exist */ +static void pn_sock_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + +static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + struct sk_buff *skb; + int answ; + + switch (cmd) { + case SIOCINQ: + lock_sock(sk); + skb = skb_peek(&sk->sk_receive_queue); + answ = skb ? skb->len : 0; + release_sock(sk); + return put_user(answ, (int __user *)arg); + } + + return -ENOIOCTLCMD; +} + +/* Destroy socket. All references are gone. */ +static void pn_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); +} + +static int pn_init(struct sock *sk) +{ + sk->sk_destruct = pn_destruct; + return 0; +} + +static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len) +{ + struct sockaddr_pn *target; + struct sk_buff *skb; + int err; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + if (msg->msg_name == NULL) + return -EDESTADDRREQ; + + if (msg->msg_namelen < sizeof(struct sockaddr_pn)) + return -EINVAL; + + target = (struct sockaddr_pn *)msg->msg_name; + if (target->spn_family != AF_PHONET) + return -EAFNOSUPPORT; + + skb = sock_alloc_send_skb(sk, MAX_PHONET_HEADER + len, + msg->msg_flags & MSG_DONTWAIT, &err); + if (skb == NULL) + return err; + skb_reserve(skb, MAX_PHONET_HEADER); + + err = memcpy_fromiovec((void *)skb_put(skb, len), msg->msg_iov, len); + if (err < 0) { + kfree_skb(skb); + return err; + } + + /* + * Fill in the Phonet header and + * finally pass the packet forwards. + */ + err = pn_skb_send(sk, skb, target); + + /* If ok, return len. */ + return (err >= 0) ? len : err; +} + +static int pn_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) +{ + struct sk_buff *skb = NULL; + struct sockaddr_pn sa; + int rval = -EOPNOTSUPP; + int copylen; + + if (flags & MSG_OOB) + goto out_nofree; + + if (addr_len) + *addr_len = sizeof(sa); + + skb = skb_recv_datagram(sk, flags, noblock, &rval); + if (skb == NULL) + goto out_nofree; + + pn_skb_get_src_sockaddr(skb, &sa); + + copylen = skb->len; + if (len < copylen) { + msg->msg_flags |= MSG_TRUNC; + copylen = len; + } + + rval = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copylen); + if (rval) { + rval = -EFAULT; + goto out; + } + + rval = (flags & MSG_TRUNC) ? skb->len : copylen; + + if (msg->msg_name != NULL) + memcpy(msg->msg_name, &sa, sizeof(struct sockaddr_pn)); + +out: + skb_free_datagram(sk, skb); + +out_nofree: + return rval; +} + +/* Queue an skb for a sock. */ +static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + int err = sock_queue_rcv_skb(sk, skb); + if (err < 0) + kfree_skb(skb); + return err ? NET_RX_DROP : NET_RX_SUCCESS; +} + +/* Module registration */ +static struct proto pn_proto = { + .close = pn_sock_close, + .ioctl = pn_ioctl, + .init = pn_init, + .sendmsg = pn_sendmsg, + .recvmsg = pn_recvmsg, + .backlog_rcv = pn_backlog_rcv, + .hash = pn_sock_hash, + .unhash = pn_sock_unhash, + .get_port = pn_sock_get_port, + .obj_size = sizeof(struct pn_sock), + .owner = THIS_MODULE, + .name = "PHONET", +}; + +static struct phonet_protocol pn_dgram_proto = { + .ops = &phonet_dgram_ops, + .prot = &pn_proto, + .sock_type = SOCK_DGRAM, +}; + +int __init isi_register(void) +{ + return phonet_proto_register(PN_PROTO_PHONET, &pn_dgram_proto); +} + +void __exit isi_unregister(void) +{ + phonet_proto_unregister(PN_PROTO_PHONET, &pn_dgram_proto); +} -- cgit v1.2.1 From 5f77076d75d35c9f5619e1f9d7e7428a627f65e6 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:08:04 -0700 Subject: Phonet: provide MAC header operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index e6771d3961cf..51397ff308bd 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -99,6 +99,35 @@ static struct net_proto_family phonet_proto_family = { .owner = THIS_MODULE, }; +/* Phonet device header operations */ +static int pn_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned len) +{ + u8 *media = skb_push(skb, 1); + + if (type != ETH_P_PHONET) + return -1; + + if (!saddr) + saddr = dev->dev_addr; + *media = *(const u8 *)saddr; + return 1; +} + +static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr) +{ + const u8 *media = skb_mac_header(skb); + *haddr = *media; + return 1; +} + +struct header_ops phonet_header_ops = { + .create = pn_header_create, + .parse = pn_header_parse, +}; +EXPORT_SYMBOL(phonet_header_ops); + /* * Prepends an ISI header and sends a datagram. */ -- cgit v1.2.1 From 87ab4e20b445c6d2d2727ab4f96fa17f7259511e Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:08:39 -0700 Subject: Phonet: proc interface for port range Phonet endpoints are bound to individual ports. This provides a /proc/sys/net/phonet (or sysctl) interface for selecting the range of automatically allocated ports (much like the ip_local_port_range with IPv4). Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/Makefile | 1 + net/phonet/af_phonet.c | 3 ++ net/phonet/socket.c | 3 +- net/phonet/sysctl.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 net/phonet/sysctl.c (limited to 'net') diff --git a/net/phonet/Makefile b/net/phonet/Makefile index d218abc3f06a..ae9c3ed5be83 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -5,4 +5,5 @@ phonet-objs := \ pn_netlink.o \ socket.o \ datagram.o \ + sysctl.o \ af_phonet.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 51397ff308bd..50dc258d5aa2 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -350,6 +350,7 @@ static int __init phonet_init(void) phonet_device_init(); dev_add_pack(&phonet_packet_type); phonet_netlink_register(); + phonet_sysctl_init(); err = isi_register(); if (err) @@ -357,6 +358,7 @@ static int __init phonet_init(void) return 0; err: + phonet_sysctl_exit(); sock_unregister(AF_PHONET); dev_remove_pack(&phonet_packet_type); phonet_device_exit(); @@ -366,6 +368,7 @@ err: static void __exit phonet_exit(void) { isi_unregister(); + phonet_sysctl_exit(); sock_unregister(AF_PHONET); dev_remove_pack(&phonet_packet_type); phonet_device_exit(); diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 99a4945d565d..dfd4061646db 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -273,8 +273,9 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport) if (!sport) { /* search free port */ - int port, pmin = 0x40, pmax = 0x7f; + int port, pmin, pmax; + phonet_get_local_port_range(&pmin, &pmax); for (port = pmin; port <= pmax; port++) { port_cur++; if (port_cur < pmin || port_cur > pmax) diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c new file mode 100644 index 000000000000..600a4309b8c8 --- /dev/null +++ b/net/phonet/sysctl.c @@ -0,0 +1,113 @@ +/* + * File: sysctl.c + * + * Phonet /proc/sys/net/phonet interface implementation + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include + +#define DYNAMIC_PORT_MIN 0x40 +#define DYNAMIC_PORT_MAX 0x7f + +static DEFINE_SEQLOCK(local_port_range_lock); +static int local_port_range_min[2] = {0, 0}; +static int local_port_range_max[2] = {1023, 1023}; +static int local_port_range[2] = {DYNAMIC_PORT_MIN, DYNAMIC_PORT_MAX}; +static struct ctl_table_header *phonet_table_hrd; + +static void set_local_port_range(int range[2]) +{ + write_seqlock(&local_port_range_lock); + local_port_range[0] = range[0]; + local_port_range[1] = range[1]; + write_sequnlock(&local_port_range_lock); +} + +void phonet_get_local_port_range(int *min, int *max) +{ + unsigned seq; + do { + seq = read_seqbegin(&local_port_range_lock); + if (min) + *min = local_port_range[0]; + if (max) + *max = local_port_range[1]; + } while (read_seqretry(&local_port_range_lock, seq)); +} + +static int proc_local_port_range(ctl_table *table, int write, struct file *filp, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret; + int range[2] = {local_port_range[0], local_port_range[1]}; + ctl_table tmp = { + .data = &range, + .maxlen = sizeof(range), + .mode = table->mode, + .extra1 = &local_port_range_min, + .extra2 = &local_port_range_max, + }; + + ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); + + if (write && ret == 0) { + if (range[1] < range[0]) + ret = -EINVAL; + else + set_local_port_range(range); + } + + return ret; +} + +static struct ctl_table phonet_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "local_port_range", + .data = &local_port_range, + .maxlen = sizeof(local_port_range), + .mode = 0644, + .proc_handler = &proc_local_port_range, + .strategy = NULL, + }, + { .ctl_name = 0 } +}; + +struct ctl_path phonet_ctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "phonet", .ctl_name = CTL_UNNUMBERED, }, + { }, +}; + +int __init phonet_sysctl_init(void) +{ + phonet_table_hrd = register_sysctl_paths(phonet_ctl_path, phonet_table); + return phonet_table_hrd == NULL ? -ENOMEM : 0; +} + +void phonet_sysctl_exit(void) +{ + unregister_sysctl_table(phonet_table_hrd); +} -- cgit v1.2.1 From be0c52bfed7f7828494fa00060efd5d758e92580 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:09:13 -0700 Subject: Phonet: emit errors when a packet cannot be delivered locally When there is no listener socket for a received packet, send an error back to the sender. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 92 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 50dc258d5aa2..1d8df6b7e3df 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -132,7 +132,7 @@ EXPORT_SYMBOL(phonet_header_ops); * Prepends an ISI header and sends a datagram. */ static int pn_send(struct sk_buff *skb, struct net_device *dev, - u16 dst, u16 src, u8 res) + u16 dst, u16 src, u8 res, u8 irq) { struct phonethdr *ph; int err; @@ -163,7 +163,10 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev, skb_reset_mac_header(skb); skb->pkt_type = PACKET_LOOPBACK; skb_orphan(skb); - netif_rx_ni(skb); + if (irq) + netif_rx(skb); + else + netif_rx_ni(skb); err = 0; } else { err = dev_hard_header(skb, dev, ntohs(skb->protocol), @@ -181,6 +184,19 @@ drop: return err; } +static int pn_raw_send(const void *data, int len, struct net_device *dev, + u16 dst, u16 src, u8 res) +{ + struct sk_buff *skb = alloc_skb(MAX_PHONET_HEADER + len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + skb_reserve(skb, MAX_PHONET_HEADER); + __skb_put(skb, len); + skb_copy_to_linear_data(skb, data, len); + return pn_send(skb, dev, dst, src, res, 1); +} + /* * Create a Phonet header for the skb and send it out. Returns * non-zero error code if failed. The skb is freed then. @@ -211,7 +227,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb, src = pn_object(saddr, pn_obj(src)); err = pn_send(skb, dev, pn_sockaddr_get_object(target), - src, pn_sockaddr_get_resource(target)); + src, pn_sockaddr_get_resource(target), 0); dev_put(dev); return err; @@ -223,6 +239,73 @@ drop: } EXPORT_SYMBOL(pn_skb_send); +/* Do not send an error message in response to an error message */ +static inline int can_respond(struct sk_buff *skb) +{ + const struct phonethdr *ph; + const struct phonetmsg *pm; + u8 submsg_id; + + if (!pskb_may_pull(skb, 3)) + return 0; + + ph = pn_hdr(skb); + if (phonet_address_get(skb->dev, ph->pn_rdev) != ph->pn_rdev) + return 0; /* we are not the destination */ + if (ph->pn_res == PN_PREFIX && !pskb_may_pull(skb, 5)) + return 0; + + ph = pn_hdr(skb); /* re-acquires the pointer */ + pm = pn_msg(skb); + if (pm->pn_msg_id != PN_COMMON_MESSAGE) + return 1; + submsg_id = (ph->pn_res == PN_PREFIX) + ? pm->pn_e_submsg_id : pm->pn_submsg_id; + if (submsg_id != PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP && + pm->pn_e_submsg_id != PN_COMM_SERVICE_NOT_IDENTIFIED_RESP) + return 1; + return 0; +} + +static int send_obj_unreachable(struct sk_buff *rskb) +{ + const struct phonethdr *oph = pn_hdr(rskb); + const struct phonetmsg *opm = pn_msg(rskb); + struct phonetmsg resp; + + memset(&resp, 0, sizeof(resp)); + resp.pn_trans_id = opm->pn_trans_id; + resp.pn_msg_id = PN_COMMON_MESSAGE; + if (oph->pn_res == PN_PREFIX) { + resp.pn_e_res_id = opm->pn_e_res_id; + resp.pn_e_submsg_id = PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP; + resp.pn_e_orig_msg_id = opm->pn_msg_id; + resp.pn_e_status = 0; + } else { + resp.pn_submsg_id = PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP; + resp.pn_orig_msg_id = opm->pn_msg_id; + resp.pn_status = 0; + } + return pn_raw_send(&resp, sizeof(resp), rskb->dev, + pn_object(oph->pn_sdev, oph->pn_sobj), + pn_object(oph->pn_rdev, oph->pn_robj), + oph->pn_res); +} + +static int send_reset_indications(struct sk_buff *rskb) +{ + struct phonethdr *oph = pn_hdr(rskb); + static const u8 data[4] = { + 0x00 /* trans ID */, 0x10 /* subscribe msg */, + 0x00 /* subscription count */, 0x00 /* dummy */ + }; + + return pn_raw_send(data, sizeof(data), rskb->dev, + pn_object(oph->pn_sdev, 0x00), + pn_object(oph->pn_rdev, oph->pn_robj), 0x10); +} + + /* packet type functions */ /* @@ -260,8 +343,13 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, goto out; /* currently, we cannot be device 0 */ sk = pn_find_sock_by_sa(&sa); - if (sk == NULL) + if (sk == NULL) { + if (can_respond(skb)) { + send_obj_unreachable(skb); + send_reset_indications(skb); + } goto out; + } /* Push data to the socket (or other sockets connected to it). */ return sk_receive_skb(sk, skb, 0); -- cgit v1.2.1 From 0b815a1a6d43ab498674b8430c8c35ab08487a16 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 22 Sep 2008 21:28:11 -0700 Subject: net: network device name ifalias support This patch add support for keeping an additional character alias associated with an network interface. This is useful for maintaining the SNMP ifAlias value which is a user defined value. Routers use this to hold information like which circuit or line it is connected to. It is just an arbitrary text label on the network device. There are two exposed interfaces with this patch, the value can be read/written either via netlink or sysfs. This could be maintained just by the snmp daemon, but it is more generally useful for other management tools, and the kernel is good place to act as an agreed upon interface to store it. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 23 +++++++++++++++++++++++ net/core/net-sysfs.c | 36 ++++++++++++++++++++++++++++++++++++ net/core/rtnetlink.c | 13 +++++++++++++ 3 files changed, 72 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index fdfc4b6a6448..e91390533999 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -953,6 +953,29 @@ rollback: return err; } +/** + * dev_set_alias - change ifalias of a device + * @dev: device + * @alias: name up to IFALIASZ + * + * Set ifalias for a device, + */ +int dev_set_alias(struct net_device *dev, const char *alias, size_t len) +{ + ASSERT_RTNL(); + + if (len >= IFALIASZ) + return -EINVAL; + + dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL); + if (!dev->ifalias) + return -ENOMEM; + + strlcpy(dev->ifalias, alias, len+1); + return len; +} + + /** * netdev_features_change - device changes features * @dev: device to cause notification diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c1f4e0d428c0..92d6b9467314 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -209,9 +209,44 @@ static ssize_t store_tx_queue_len(struct device *dev, return netdev_store(dev, attr, buf, len, change_tx_queue_len); } +static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct net_device *netdev = to_net_dev(dev); + size_t count = len; + ssize_t ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* ignore trailing newline */ + if (len > 0 && buf[len - 1] == '\n') + --count; + + rtnl_lock(); + ret = dev_set_alias(netdev, buf, count); + rtnl_unlock(); + + return ret < 0 ? ret : len; +} + +static ssize_t show_ifalias(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct net_device *netdev = to_net_dev(dev); + ssize_t ret = 0; + + rtnl_lock(); + if (netdev->ifalias) + ret = sprintf(buf, "%s\n", netdev->ifalias); + rtnl_unlock(); + return ret; +} + static struct device_attribute net_class_attributes[] = { __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(dev_id, S_IRUGO, show_dev_id, NULL), + __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), __ATTR(iflink, S_IRUGO, show_iflink, NULL), __ATTR(ifindex, S_IRUGO, show_ifindex, NULL), __ATTR(features, S_IRUGO, show_features, NULL), @@ -418,6 +453,7 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); + kfree(dev->ifalias); kfree((char *)dev - dev->padded); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 71edb8b36341..8862498fd4a6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -586,6 +586,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */ + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ + nla_total_size(sizeof(struct rtnl_link_ifmap)) + nla_total_size(sizeof(struct rtnl_link_stats)) @@ -640,6 +641,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (txq->qdisc_sleeping) NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id); + if (dev->ifalias) + NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias); + if (1) { struct rtnl_link_ifmap map = { .mem_start = dev->mem_start, @@ -713,6 +717,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKMODE] = { .type = NLA_U8 }, [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, + [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -853,6 +858,14 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, modified = 1; } + if (tb[IFLA_IFALIAS]) { + err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), + nla_len(tb[IFLA_IFALIAS])); + if (err < 0) + goto errout; + modified = 1; + } + if (tb[IFLA_BROADCAST]) { nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); send_addr_notify = 1; -- cgit v1.2.1 From 242f8bfefe4bed626df4e4727ac8f315d80b567a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 22:15:30 -0700 Subject: pkt_sched: Make qdisc->gso_skb a list. The idea is that we can use this to get rid of ->requeue(). Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ec0a0839ce51..5961536be60c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -45,7 +45,7 @@ static inline int qdisc_qlen(struct Qdisc *q) static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { if (unlikely(skb->next)) - q->gso_skb = skb; + __skb_queue_head(&q->requeue, skb); else q->ops->requeue(skb, q); @@ -57,9 +57,8 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { struct sk_buff *skb; - if ((skb = q->gso_skb)) - q->gso_skb = NULL; - else + skb = __skb_dequeue(&q->requeue); + if (!skb) skb = q->dequeue(q); return skb; @@ -327,6 +326,7 @@ struct Qdisc noop_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), + .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, }; @@ -352,6 +352,7 @@ static struct Qdisc noqueue_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), + .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .dev_queue = &noqueue_netdev_queue, }; @@ -472,6 +473,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->padded = (char *) sch - (char *) p; INIT_LIST_HEAD(&sch->list); + skb_queue_head_init(&sch->requeue); skb_queue_head_init(&sch->q); sch->ops = ops; sch->enqueue = ops->enqueue; @@ -539,7 +541,7 @@ void qdisc_destroy(struct Qdisc *qdisc) module_put(ops->owner); dev_put(qdisc_dev(qdisc)); - kfree_skb(qdisc->gso_skb); + __skb_queue_purge(&qdisc->requeue); kfree((char *) qdisc - qdisc->padded); } -- cgit v1.2.1 From f0876520b0b721bedafd9cec3b1b0624ae566eee Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 22:15:58 -0700 Subject: pkt_sched: Always use q->requeue in dev_requeue_skb(). There is no reason to call into the complicated qdiscs just to remember the last SKB where we found the device blocked. The SKB is outside of the qdiscs realm at this point. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5961536be60c..1b508bd1c06c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -44,10 +44,7 @@ static inline int qdisc_qlen(struct Qdisc *q) static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - if (unlikely(skb->next)) - __skb_queue_head(&q->requeue, skb); - else - q->ops->requeue(skb, q); + __skb_queue_head(&q->requeue, skb); __netif_schedule(q); return 0; -- cgit v1.2.1 From ebf059821ed8a36acd706484b719d14d212ada32 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 22 Sep 2008 22:16:23 -0700 Subject: pkt_sched: Check the state of tx_queue in dequeue_skb() Check in dequeue_skb() the state of tx_queue for requeued skb to save on locking and re-requeuing, and possibly remove the current check in qdisc_run(). Based on the idea of Alexander Duyck. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 1b508bd1c06c..5e7e0bd38fe8 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -52,11 +52,21 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { - struct sk_buff *skb; - - skb = __skb_dequeue(&q->requeue); - if (!skb) + struct sk_buff *skb = skb_peek(&q->requeue); + + if (unlikely(skb)) { + struct net_device *dev = qdisc_dev(q); + struct netdev_queue *txq; + + /* check the reason of requeuing without tx lock first */ + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) + __skb_unlink(skb, &q->requeue); + else + skb = NULL; + } else { skb = q->dequeue(q); + } return skb; } -- cgit v1.2.1 From f72051b0674f36c960698653a0583edaec1e495e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 01:11:18 -0700 Subject: neigh: Remove by-hand SKB queue handling. Signed-off-by: David S. Miller --- net/core/neighbour.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9d92e41826e7..1dc728b38589 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -927,8 +927,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) { struct sk_buff *buff; - buff = neigh->arp_queue.next; - __skb_unlink(buff, &neigh->arp_queue); + buff = __skb_dequeue(&neigh->arp_queue); kfree_skb(buff); NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } @@ -1259,24 +1258,20 @@ static void neigh_proxy_process(unsigned long arg) struct neigh_table *tbl = (struct neigh_table *)arg; long sched_next = 0; unsigned long now = jiffies; - struct sk_buff *skb; + struct sk_buff *skb, *n; spin_lock(&tbl->proxy_queue.lock); - skb = tbl->proxy_queue.next; - - while (skb != (struct sk_buff *)&tbl->proxy_queue) { - struct sk_buff *back = skb; - long tdif = NEIGH_CB(back)->sched_next - now; + skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { + long tdif = NEIGH_CB(skb)->sched_next - now; - skb = skb->next; if (tdif <= 0) { - struct net_device *dev = back->dev; - __skb_unlink(back, &tbl->proxy_queue); + struct net_device *dev = skb->dev; + __skb_unlink(skb, &tbl->proxy_queue); if (tbl->proxy_redo && netif_running(dev)) - tbl->proxy_redo(back); + tbl->proxy_redo(skb); else - kfree_skb(back); + kfree_skb(skb); dev_put(dev); } else if (!sched_next || tdif < sched_next) -- cgit v1.2.1 From 77d40a0952b16e020ce07c4cf9fb22024448275b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 01:29:23 -0700 Subject: tcp: Fix order of tests in tcp_retransmit_skb() tcp_write_queue_next() must only be made if we know that tcp_skb_is_last() evaluates to false. Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c3d58ee3e16f..a8499ef3234a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1932,8 +1932,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) /* Collapse two adjacent packets if worthwhile and we can. */ if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && (skb->len < (cur_mss >> 1)) && - (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && (!tcp_skb_is_last(sk, skb)) && + (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && (tcp_skb_pcount(skb) == 1 && -- cgit v1.2.1 From 28e3487b7dd8a9791baac924bc887140ec747bed Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 02:51:41 -0700 Subject: tcp: Fix queue traversal in tcp_use_frto(). We must check tcp_skb_is_last() before doing a tcp_write_queue_next(). Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cbfe13d5f423..3b76bce769dd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1746,6 +1746,8 @@ int tcp_use_frto(struct sock *sk) return 0; skb = tcp_write_queue_head(sk); + if (tcp_skb_is_last(sk, skb)) + return 1; skb = tcp_write_queue_next(sk, skb); /* Skips head */ tcp_for_write_queue_from(skb, sk) { if (skb == tcp_send_head(sk)) -- cgit v1.2.1 From 2d4c8266774188cda7f7e612e6dfb8ad12c579d5 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 22 Sep 2008 13:57:49 -0700 Subject: sys_paccept: disable paccept() until API design is resolved The reasons for disabling paccept() are as follows: * The API is more complex than needed. There is AFAICS no demonstrated use case that the sigset argument of this syscall serves that couldn't equally be served by the use of pselect/ppoll/epoll_pwait + traditional accept(). Roland seems to concur with this opinion (http://thread.gmane.org/gmane.linux.kernel/723953/focus=732255). I have (more than once) asked Ulrich to explain otherwise (http://thread.gmane.org/gmane.linux.kernel/723952/focus=731018), but he does not respond, so one is left to assume that he doesn't know of such a case. * The use of a sigset argument is not consistent with other I/O APIs that can block on a single file descriptor (e.g., read(), recv(), connect()). * The behavior of paccept() when interrupted by a signal is IMO strange: the kernel restarts the system call if SA_RESTART was set for the handler. I think that it should not do this -- that it should behave consistently with paccept()/ppoll()/epoll_pwait(), which never restart, regardless of SA_RESTART. The reasoning here is that the very purpose of paccept() is to wait for a connection or a signal, and that restarting in the latter case is probably never useful. (Note: Roland disagrees on this point, believing that rather paccept() should be consistent with accept() in its behavior wrt EINTR (http://thread.gmane.org/gmane.linux.kernel/723953/focus=732255).) I believe that instead, a simpler API, consistent with Ulrich's other recent additions, is preferable: accept4(int fd, struct sockaddr *sa, socklen_t *salen, ind flags); (This simpler API was originally proposed by Ulrich: http://thread.gmane.org/gmane.linux.network/92072) If this simpler API is added, then if we later decide that the sigset argument really is required, then a suitable bit in 'flags' could be added to indicate the presence of the sigset argument. At this point, I am hoping we either will get a counter-argument from Ulrich about why we really do need paccept()'s sigset argument, or that he will resubmit the original accept4() patch. Signed-off-by: Michael Kerrisk Cc: David Miller Cc: Davide Libenzi Cc: Alan Cox Cc: Ulrich Drepper Cc: Jakub Jelinek Cc: Roland McGrath Cc: Oleg Nesterov Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/socket.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 8ef8ba81b9e2..3e8d4e35c08f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1511,6 +1511,7 @@ out_fd: goto out_put; } +#if 0 #ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, @@ -1564,6 +1565,7 @@ asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); } #endif +#endif asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) -- cgit v1.2.1 From 96ca4a2cc1454cf633a1e0796b7ef39d937b87ec Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 23 Sep 2008 21:23:19 -0700 Subject: net: remove ifalias on empty given alias This patch removes the potentially allocated ifalias when the (new) given alias is empty. E.g. when setting echo "" > /sys/class/net/eth0/ifalias Signed-off-by: Oliver Hartkopp Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index e91390533999..a90737fe2472 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -967,6 +967,14 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) if (len >= IFALIASZ) return -EINVAL; + if (!len) { + if (dev->ifalias) { + kfree(dev->ifalias); + dev->ifalias = NULL; + } + return 0; + } + dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL); if (!dev->ifalias) return -ENOMEM; -- cgit v1.2.1 From 762af43bda3d8281a2738d3920ae5ded170aaf39 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Sep 2008 10:30:34 +0200 Subject: cfg80211: fix static regdomains When Luis added the static regdomains back he used +/-20 of the centre frequencies to account for 40MHz bandwidth neglecting the fact that 40MHz bandwidth cannot be used on the channels close to the allowed band edges. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/core.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index a910cd2d0fd1..59e4d7debf02 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -41,7 +41,7 @@ const struct ieee80211_regdomain world_regdom = { .n_reg_rules = 1, .alpha2 = "00", .reg_rules = { - REG_RULE(2402, 2472, 40, 6, 20, + REG_RULE(2412-10, 2462+10, 40, 6, 20, NL80211_RRF_PASSIVE_SCAN | NL80211_RRF_NO_IBSS), } @@ -64,17 +64,17 @@ const struct ieee80211_regdomain us_regdom = { .alpha2 = "US", .reg_rules = { /* IEEE 802.11b/g, channels 1..11 */ - REG_RULE(2412-20, 2462+20, 40, 6, 27, 0), + REG_RULE(2412-10, 2462+10, 40, 6, 27, 0), /* IEEE 802.11a, channel 36 */ - REG_RULE(5180-20, 5180+20, 40, 6, 23, 0), + REG_RULE(5180-10, 5180+10, 40, 6, 23, 0), /* IEEE 802.11a, channel 40 */ - REG_RULE(5200-20, 5200+20, 40, 6, 23, 0), + REG_RULE(5200-10, 5200+10, 40, 6, 23, 0), /* IEEE 802.11a, channel 44 */ - REG_RULE(5220-20, 5220+20, 40, 6, 23, 0), + REG_RULE(5220-10, 5220+10, 40, 6, 23, 0), /* IEEE 802.11a, channels 48..64 */ - REG_RULE(5240-20, 5320+20, 40, 6, 23, 0), + REG_RULE(5240-10, 5320+10, 40, 6, 23, 0), /* IEEE 802.11a, channels 149..165, outdoor */ - REG_RULE(5745-20, 5825+20, 40, 6, 30, 0), + REG_RULE(5745-10, 5825+10, 40, 6, 30, 0), } }; @@ -83,12 +83,12 @@ const struct ieee80211_regdomain jp_regdom = { .alpha2 = "JP", .reg_rules = { /* IEEE 802.11b/g, channels 1..14 */ - REG_RULE(2412-20, 2484+20, 40, 6, 20, 0), + REG_RULE(2412-10, 2484+10, 40, 6, 20, 0), /* IEEE 802.11a, channels 34..48 */ - REG_RULE(5170-20, 5240+20, 40, 6, 20, + REG_RULE(5170-10, 5240+10, 40, 6, 20, NL80211_RRF_PASSIVE_SCAN), /* IEEE 802.11a, channels 52..64 */ - REG_RULE(5260-20, 5320+20, 40, 6, 20, + REG_RULE(5260-10, 5320+10, 40, 6, 20, NL80211_RRF_NO_IBSS | NL80211_RRF_DFS), } @@ -101,22 +101,22 @@ const struct ieee80211_regdomain eu_regdom = { .alpha2 = "EU", .reg_rules = { /* IEEE 802.11b/g, channels 1..13 */ - REG_RULE(2412-20, 2472+20, 40, 6, 20, 0), + REG_RULE(2412-10, 2472+10, 40, 6, 20, 0), /* IEEE 802.11a, channel 36 */ - REG_RULE(5180-20, 5180+20, 40, 6, 23, + REG_RULE(5180-10, 5180+10, 40, 6, 23, NL80211_RRF_PASSIVE_SCAN), /* IEEE 802.11a, channel 40 */ - REG_RULE(5200-20, 5200+20, 40, 6, 23, + REG_RULE(5200-10, 5200+10, 40, 6, 23, NL80211_RRF_PASSIVE_SCAN), /* IEEE 802.11a, channel 44 */ - REG_RULE(5220-20, 5220+20, 40, 6, 23, + REG_RULE(5220-10, 5220+10, 40, 6, 23, NL80211_RRF_PASSIVE_SCAN), /* IEEE 802.11a, channels 48..64 */ - REG_RULE(5240-20, 5320+20, 40, 6, 20, + REG_RULE(5240-10, 5320+10, 40, 6, 20, NL80211_RRF_NO_IBSS | NL80211_RRF_DFS), /* IEEE 802.11a, channels 100..140 */ - REG_RULE(5500-20, 5700+20, 40, 6, 30, + REG_RULE(5500-10, 5700+10, 40, 6, 30, NL80211_RRF_NO_IBSS | NL80211_RRF_DFS), } -- cgit v1.2.1 From 734366deaee05b1a5842d977960b4cc574d7551d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Sep 2008 10:56:48 +0200 Subject: cfg80211: clean up regulatory mess The recent code from Luis is an #ifdef hell and contains lots of code that's stuffed into the wrong file making a whole bunch of things needlessly non-static, and besides, what is it doing in core.c?? Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/core.c | 144 ----------------------------- net/wireless/reg.c | 254 +++++++++++++++++++++++++++++++++++++++++----------- net/wireless/reg.h | 35 +------- 3 files changed, 203 insertions(+), 230 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 59e4d7debf02..88cb73394864 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -29,107 +29,6 @@ MODULE_AUTHOR("Johannes Berg"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("wireless configuration support"); -struct list_head regulatory_requests; - -/* Central wireless core regulatory domains, we only need two, - * the current one and a world regulatory domain in case we have no - * information to give us an alpha2 */ -struct ieee80211_regdomain *cfg80211_regdomain; - -/* We keep a static world regulatory domain in case of the absence of CRDA */ -const struct ieee80211_regdomain world_regdom = { - .n_reg_rules = 1, - .alpha2 = "00", - .reg_rules = { - REG_RULE(2412-10, 2462+10, 40, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS), - } -}; - -#ifdef CONFIG_WIRELESS_OLD_REGULATORY -/* All this fucking static junk will be removed soon, so - * don't fucking count on it !@#$ */ - -static char *ieee80211_regdom = "US"; -module_param(ieee80211_regdom, charp, 0444); -MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); - -/* We assume 40 MHz bandwidth for the old regulatory work. - * We make emphasis we are using the exact same frequencies - * as before */ - -const struct ieee80211_regdomain us_regdom = { - .n_reg_rules = 6, - .alpha2 = "US", - .reg_rules = { - /* IEEE 802.11b/g, channels 1..11 */ - REG_RULE(2412-10, 2462+10, 40, 6, 27, 0), - /* IEEE 802.11a, channel 36 */ - REG_RULE(5180-10, 5180+10, 40, 6, 23, 0), - /* IEEE 802.11a, channel 40 */ - REG_RULE(5200-10, 5200+10, 40, 6, 23, 0), - /* IEEE 802.11a, channel 44 */ - REG_RULE(5220-10, 5220+10, 40, 6, 23, 0), - /* IEEE 802.11a, channels 48..64 */ - REG_RULE(5240-10, 5320+10, 40, 6, 23, 0), - /* IEEE 802.11a, channels 149..165, outdoor */ - REG_RULE(5745-10, 5825+10, 40, 6, 30, 0), - } -}; - -const struct ieee80211_regdomain jp_regdom = { - .n_reg_rules = 3, - .alpha2 = "JP", - .reg_rules = { - /* IEEE 802.11b/g, channels 1..14 */ - REG_RULE(2412-10, 2484+10, 40, 6, 20, 0), - /* IEEE 802.11a, channels 34..48 */ - REG_RULE(5170-10, 5240+10, 40, 6, 20, - NL80211_RRF_PASSIVE_SCAN), - /* IEEE 802.11a, channels 52..64 */ - REG_RULE(5260-10, 5320+10, 40, 6, 20, - NL80211_RRF_NO_IBSS | - NL80211_RRF_DFS), - } -}; - -const struct ieee80211_regdomain eu_regdom = { - .n_reg_rules = 6, - /* This alpha2 is bogus, we leave it here just for stupid - * backward compatibility */ - .alpha2 = "EU", - .reg_rules = { - /* IEEE 802.11b/g, channels 1..13 */ - REG_RULE(2412-10, 2472+10, 40, 6, 20, 0), - /* IEEE 802.11a, channel 36 */ - REG_RULE(5180-10, 5180+10, 40, 6, 23, - NL80211_RRF_PASSIVE_SCAN), - /* IEEE 802.11a, channel 40 */ - REG_RULE(5200-10, 5200+10, 40, 6, 23, - NL80211_RRF_PASSIVE_SCAN), - /* IEEE 802.11a, channel 44 */ - REG_RULE(5220-10, 5220+10, 40, 6, 23, - NL80211_RRF_PASSIVE_SCAN), - /* IEEE 802.11a, channels 48..64 */ - REG_RULE(5240-10, 5320+10, 40, 6, 20, - NL80211_RRF_NO_IBSS | - NL80211_RRF_DFS), - /* IEEE 802.11a, channels 100..140 */ - REG_RULE(5500-10, 5700+10, 40, 6, 30, - NL80211_RRF_NO_IBSS | - NL80211_RRF_DFS), - } -}; - -#endif - -struct ieee80211_regdomain *cfg80211_world_regdom = - (struct ieee80211_regdomain *) &world_regdom; - -LIST_HEAD(regulatory_requests); -DEFINE_MUTEX(cfg80211_reg_mutex); - /* RCU might be appropriate here since we usually * only read the list, and that can happen quite * often because we need to do it for each command */ @@ -514,34 +413,10 @@ static struct notifier_block cfg80211_netdev_notifier = { .notifier_call = cfg80211_netdev_notifier_call, }; -#ifdef CONFIG_WIRELESS_OLD_REGULATORY -const struct ieee80211_regdomain *static_regdom(char *alpha2) -{ - if (alpha2[0] == 'U' && alpha2[1] == 'S') - return &us_regdom; - if (alpha2[0] == 'J' && alpha2[1] == 'P') - return &jp_regdom; - if (alpha2[0] == 'E' && alpha2[1] == 'U') - return &eu_regdom; - /* Default, as per the old rules */ - return &us_regdom; -} -#endif - static int cfg80211_init(void) { int err; -#ifdef CONFIG_WIRELESS_OLD_REGULATORY - cfg80211_regdomain = - (struct ieee80211_regdomain *) static_regdom(ieee80211_regdom); - /* Used during reset_regdomains_static() */ - cfg80211_world_regdom = cfg80211_regdomain; -#else - cfg80211_regdomain = - (struct ieee80211_regdomain *) cfg80211_world_regdom; -#endif - err = wiphy_sysfs_init(); if (err) goto out_fail_sysfs; @@ -560,25 +435,6 @@ static int cfg80211_init(void) if (err) goto out_fail_reg; -#ifdef CONFIG_WIRELESS_OLD_REGULATORY - printk(KERN_INFO "cfg80211: Using old static regulatory domain:\n"); - print_regdomain_info(cfg80211_regdomain); - /* The old code still requests for a new regdomain and if - * you have CRDA you get it updated, otherwise you get - * stuck with the static values. We ignore "EU" code as - * that is not a valid ISO / IEC 3166 alpha2 */ - if (ieee80211_regdom[0] != 'E' && - ieee80211_regdom[1] != 'U') - err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, - ieee80211_regdom, NULL); -#else - err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", NULL); - if (err) - printk(KERN_ERR "cfg80211: calling CRDA failed - " - "unable to update world regulatory domain, " - "using static definition\n"); -#endif - return 0; out_fail_reg: diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 592b2e391d42..5fbeab50996f 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -42,6 +42,18 @@ #include "core.h" #include "reg.h" +/* wiphy is set if this request's initiator is REGDOM_SET_BY_DRIVER */ +struct regulatory_request { + struct list_head list; + struct wiphy *wiphy; + int granted; + enum reg_set_by initiator; + char alpha2[2]; +}; + +static LIST_HEAD(regulatory_requests); +DEFINE_MUTEX(cfg80211_reg_mutex); + /* To trigger userspace events */ static struct platform_device *reg_pdev; @@ -51,6 +63,161 @@ static u32 supported_bandwidths[] = { MHZ_TO_KHZ(20), }; +static struct list_head regulatory_requests; + +/* Central wireless core regulatory domains, we only need two, + * the current one and a world regulatory domain in case we have no + * information to give us an alpha2 */ +static struct ieee80211_regdomain *cfg80211_regdomain; + +/* We keep a static world regulatory domain in case of the absence of CRDA */ +static const struct ieee80211_regdomain world_regdom = { + .n_reg_rules = 1, + .alpha2 = "00", + .reg_rules = { + REG_RULE(2412-10, 2462+10, 40, 6, 20, + NL80211_RRF_PASSIVE_SCAN | + NL80211_RRF_NO_IBSS), + } +}; + +static struct ieee80211_regdomain *cfg80211_world_regdom = + (struct ieee80211_regdomain *) &world_regdom; + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY +static char *ieee80211_regdom = "US"; +module_param(ieee80211_regdom, charp, 0444); +MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); + +/* We assume 40 MHz bandwidth for the old regulatory work. + * We make emphasis we are using the exact same frequencies + * as before */ + +static const struct ieee80211_regdomain us_regdom = { + .n_reg_rules = 6, + .alpha2 = "US", + .reg_rules = { + /* IEEE 802.11b/g, channels 1..11 */ + REG_RULE(2412-10, 2462+10, 40, 6, 27, 0), + /* IEEE 802.11a, channel 36 */ + REG_RULE(5180-10, 5180+10, 40, 6, 23, 0), + /* IEEE 802.11a, channel 40 */ + REG_RULE(5200-10, 5200+10, 40, 6, 23, 0), + /* IEEE 802.11a, channel 44 */ + REG_RULE(5220-10, 5220+10, 40, 6, 23, 0), + /* IEEE 802.11a, channels 48..64 */ + REG_RULE(5240-10, 5320+10, 40, 6, 23, 0), + /* IEEE 802.11a, channels 149..165, outdoor */ + REG_RULE(5745-10, 5825+10, 40, 6, 30, 0), + } +}; + +static const struct ieee80211_regdomain jp_regdom = { + .n_reg_rules = 3, + .alpha2 = "JP", + .reg_rules = { + /* IEEE 802.11b/g, channels 1..14 */ + REG_RULE(2412-10, 2484+10, 40, 6, 20, 0), + /* IEEE 802.11a, channels 34..48 */ + REG_RULE(5170-10, 5240+10, 40, 6, 20, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channels 52..64 */ + REG_RULE(5260-10, 5320+10, 40, 6, 20, + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), + } +}; + +static const struct ieee80211_regdomain eu_regdom = { + .n_reg_rules = 6, + /* This alpha2 is bogus, we leave it here just for stupid + * backward compatibility */ + .alpha2 = "EU", + .reg_rules = { + /* IEEE 802.11b/g, channels 1..13 */ + REG_RULE(2412-10, 2472+10, 40, 6, 20, 0), + /* IEEE 802.11a, channel 36 */ + REG_RULE(5180-10, 5180+10, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channel 40 */ + REG_RULE(5200-10, 5200+10, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channel 44 */ + REG_RULE(5220-10, 5220+10, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channels 48..64 */ + REG_RULE(5240-10, 5320+10, 40, 6, 20, + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), + /* IEEE 802.11a, channels 100..140 */ + REG_RULE(5500-10, 5700+10, 40, 6, 30, + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), + } +}; + +static const struct ieee80211_regdomain *static_regdom(char *alpha2) +{ + if (alpha2[0] == 'U' && alpha2[1] == 'S') + return &us_regdom; + if (alpha2[0] == 'J' && alpha2[1] == 'P') + return &jp_regdom; + if (alpha2[0] == 'E' && alpha2[1] == 'U') + return &eu_regdom; + /* Default, as per the old rules */ + return &us_regdom; +} + +static bool is_old_static_regdom(struct ieee80211_regdomain *rd) +{ + if (rd == &us_regdom || rd == &jp_regdom || rd == &eu_regdom) + return true; + return false; +} + +/* The old crap never deals with a world regulatory domain, it only + * deals with the static regulatory domain passed and if possible + * an updated "US" or "JP" regulatory domain. We do however store the + * old static regulatory domain in cfg80211_world_regdom for convenience + * of use here */ +static void reset_regdomains_static(void) +{ + if (!is_old_static_regdom(cfg80211_regdomain)) + kfree(cfg80211_regdomain); + /* This is setting the regdom to the old static regdom */ + cfg80211_regdomain = + (struct ieee80211_regdomain *) cfg80211_world_regdom; +} +#else +static void reset_regdomains(void) +{ + if (cfg80211_world_regdom && cfg80211_world_regdom != &world_regdom) { + if (cfg80211_world_regdom == cfg80211_regdomain) { + kfree(cfg80211_regdomain); + } else { + kfree(cfg80211_world_regdom); + kfree(cfg80211_regdomain); + } + } else if (cfg80211_regdomain && cfg80211_regdomain != &world_regdom) + kfree(cfg80211_regdomain); + + cfg80211_world_regdom = (struct ieee80211_regdomain *) &world_regdom; + cfg80211_regdomain = NULL; +} + +/* Dynamic world regulatory domain requested by the wireless + * core upon initialization */ +static void update_world_regdomain(struct ieee80211_regdomain *rd) +{ + BUG_ON(list_empty(®ulatory_requests)); + + reset_regdomains(); + + cfg80211_world_regdom = rd; + cfg80211_regdomain = rd; +} +#endif + bool is_world_regdom(char *alpha2) { if (!alpha2) @@ -555,58 +722,6 @@ void print_regdomain_info(struct ieee80211_regdomain *rd) print_rd_rules(rd); } -#ifdef CONFIG_WIRELESS_OLD_REGULATORY - -static bool is_old_static_regdom(struct ieee80211_regdomain *rd) -{ - if (rd == &us_regdom || rd == &jp_regdom || rd == &eu_regdom) - return true; - return false; -} - -/* The old crap never deals with a world regulatory domain, it only - * deals with the static regulatory domain passed and if possible - * an updated "US" or "JP" regulatory domain. We do however store the - * old static regulatory domain in cfg80211_world_regdom for convenience - * of use here */ -static void reset_regdomains_static(void) -{ - if (!is_old_static_regdom(cfg80211_regdomain)) - kfree(cfg80211_regdomain); - /* This is setting the regdom to the old static regdom */ - cfg80211_regdomain = - (struct ieee80211_regdomain *) cfg80211_world_regdom; -} -#else -static void reset_regdomains(void) -{ - if (cfg80211_world_regdom && cfg80211_world_regdom != &world_regdom) { - if (cfg80211_world_regdom == cfg80211_regdomain) { - kfree(cfg80211_regdomain); - } else { - kfree(cfg80211_world_regdom); - kfree(cfg80211_regdomain); - } - } else if (cfg80211_regdomain && cfg80211_regdomain != &world_regdom) - kfree(cfg80211_regdomain); - - cfg80211_world_regdom = (struct ieee80211_regdomain *) &world_regdom; - cfg80211_regdomain = NULL; -} - -/* Dynamic world regulatory domain requested by the wireless - * core upon initialization */ -static void update_world_regdomain(struct ieee80211_regdomain *rd) -{ - BUG_ON(list_empty(®ulatory_requests)); - - reset_regdomains(); - - cfg80211_world_regdom = rd; - cfg80211_regdomain = rd; -} -#endif - static int __set_regdom(struct ieee80211_regdomain *rd) { struct regulatory_request *request = NULL; @@ -615,7 +730,7 @@ static int __set_regdom(struct ieee80211_regdomain *rd) #ifdef CONFIG_WIRELESS_OLD_REGULATORY /* We ignore the world regdom with the old static regdomains setup - * as there is no point to it with satic regulatory definitions :( + * as there is no point to it with static regulatory definitions :( * Don't worry this shit will be removed soon... */ if (is_world_regdom(rd->alpha2)) return -EINVAL; @@ -735,25 +850,58 @@ int set_regdom(struct ieee80211_regdomain *rd) int regulatory_init(void) { + int err; + reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); if (IS_ERR(reg_pdev)) return PTR_ERR(reg_pdev); + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + cfg80211_regdomain = + (struct ieee80211_regdomain *) static_regdom(ieee80211_regdom); + /* Used during reset_regdomains_static() */ + cfg80211_world_regdom = cfg80211_regdomain; + + printk(KERN_INFO "cfg80211: Using old static regulatory domain:\n"); + print_regdomain_info(cfg80211_regdomain); + /* The old code still requests for a new regdomain and if + * you have CRDA you get it updated, otherwise you get + * stuck with the static values. We ignore "EU" code as + * that is not a valid ISO / IEC 3166 alpha2 */ + if (ieee80211_regdom[0] != 'E' && ieee80211_regdom[1] != 'U') + err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, + ieee80211_regdom, NULL); +#else + cfg80211_regdomain = + (struct ieee80211_regdomain *) cfg80211_world_regdom; + + err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", NULL); + if (err) + printk(KERN_ERR "cfg80211: calling CRDA failed - " + "unable to update world regulatory domain, " + "using static definition\n"); +#endif + return 0; } void regulatory_exit(void) { struct regulatory_request *req, *req_tmp; + mutex_lock(&cfg80211_drv_mutex); + #ifdef CONFIG_WIRELESS_OLD_REGULATORY reset_regdomains_static(); #else reset_regdomains(); #endif + list_for_each_entry_safe(req, req_tmp, ®ulatory_requests, list) { list_del(&req->list); kfree(req); } platform_device_unregister(reg_pdev); + mutex_unlock(&cfg80211_drv_mutex); } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index d75fd0232972..b169815987f4 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -1,44 +1,13 @@ #ifndef __NET_WIRELESS_REG_H #define __NET_WIRELESS_REG_H -extern const struct ieee80211_regdomain world_regdom; -#ifdef CONFIG_WIRELESS_OLD_REGULATORY -extern const struct ieee80211_regdomain us_regdom; -extern const struct ieee80211_regdomain jp_regdom; -extern const struct ieee80211_regdomain eu_regdom; -#endif - -extern struct ieee80211_regdomain *cfg80211_regdomain; -extern struct ieee80211_regdomain *cfg80211_world_regdom; -extern struct list_head regulatory_requests; - -struct regdom_last_setby { - struct wiphy *wiphy; - u8 initiator; -}; - -/* wiphy is set if this request's initiator is REGDOM_SET_BY_DRIVER */ -struct regulatory_request { - struct list_head list; - struct wiphy *wiphy; - int granted; - enum reg_set_by initiator; - char alpha2[2]; -}; - +extern struct mutex cfg80211_reg_mutex; bool is_world_regdom(char *alpha2); bool reg_is_valid_request(char *alpha2); -int set_regdom(struct ieee80211_regdomain *rd); -int __regulatory_hint_alpha2(struct wiphy *wiphy, enum reg_set_by set_by, - const char *alpha2); - int regulatory_init(void); void regulatory_exit(void); -void print_regdomain_info(struct ieee80211_regdomain *); - -/* If a char is A-Z */ -#define IS_ALPHA(letter) (letter >= 65 && letter <= 90) +int set_regdom(struct ieee80211_regdomain *rd); #endif /* __NET_WIRELESS_REG_H */ -- cgit v1.2.1 From a3d2eaf0dcad6dfdf44f3093aef688dfca714b6c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Sep 2008 11:10:52 +0200 Subject: cfg80211: fix regulatory code const A few pointers and structures in the regulatory code are const, but because it wasn't done properly a whole bunch of bogus casts were needed to compile without warning. Mark everything const properly to avoid that kind of junk code. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 58 ++++++++++++++++++++++++++---------------------------- net/wireless/reg.h | 6 +++--- 2 files changed, 31 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5fbeab50996f..7aba46efc7dd 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -68,7 +68,7 @@ static struct list_head regulatory_requests; /* Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no * information to give us an alpha2 */ -static struct ieee80211_regdomain *cfg80211_regdomain; +static const struct ieee80211_regdomain *cfg80211_regdomain; /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { @@ -81,8 +81,8 @@ static const struct ieee80211_regdomain world_regdom = { } }; -static struct ieee80211_regdomain *cfg80211_world_regdom = - (struct ieee80211_regdomain *) &world_regdom; +static const struct ieee80211_regdomain *cfg80211_world_regdom = + &world_regdom; #ifdef CONFIG_WIRELESS_OLD_REGULATORY static char *ieee80211_regdom = "US"; @@ -168,7 +168,7 @@ static const struct ieee80211_regdomain *static_regdom(char *alpha2) return &us_regdom; } -static bool is_old_static_regdom(struct ieee80211_regdomain *rd) +static bool is_old_static_regdom(const struct ieee80211_regdomain *rd) { if (rd == &us_regdom || rd == &jp_regdom || rd == &eu_regdom) return true; @@ -201,13 +201,13 @@ static void reset_regdomains(void) } else if (cfg80211_regdomain && cfg80211_regdomain != &world_regdom) kfree(cfg80211_regdomain); - cfg80211_world_regdom = (struct ieee80211_regdomain *) &world_regdom; + cfg80211_world_regdom = &world_regdom; cfg80211_regdomain = NULL; } /* Dynamic world regulatory domain requested by the wireless * core upon initialization */ -static void update_world_regdomain(struct ieee80211_regdomain *rd) +static void update_world_regdomain(const struct ieee80211_regdomain *rd) { BUG_ON(list_empty(®ulatory_requests)); @@ -218,7 +218,7 @@ static void update_world_regdomain(struct ieee80211_regdomain *rd) } #endif -bool is_world_regdom(char *alpha2) +bool is_world_regdom(const char *alpha2) { if (!alpha2) return false; @@ -227,7 +227,7 @@ bool is_world_regdom(char *alpha2) return false; } -static bool is_alpha2_set(char *alpha2) +static bool is_alpha2_set(const char *alpha2) { if (!alpha2) return false; @@ -244,7 +244,7 @@ static bool is_alpha_upper(char letter) return false; } -static bool is_unknown_alpha2(char *alpha2) +static bool is_unknown_alpha2(const char *alpha2) { if (!alpha2) return false; @@ -255,7 +255,7 @@ static bool is_unknown_alpha2(char *alpha2) return false; } -static bool is_an_alpha2(char *alpha2) +static bool is_an_alpha2(const char *alpha2) { if (!alpha2) return false; @@ -264,7 +264,7 @@ static bool is_an_alpha2(char *alpha2) return false; } -static bool alpha2_equal(char *alpha2_x, char *alpha2_y) +static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) { if (!alpha2_x || !alpha2_y) return false; @@ -274,7 +274,7 @@ static bool alpha2_equal(char *alpha2_x, char *alpha2_y) return false; } -static bool regdom_changed(char *alpha2) +static bool regdom_changed(const char *alpha2) { if (!cfg80211_regdomain) return true; @@ -405,7 +405,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, } } -static bool __reg_is_valid_request(char *alpha2, +static bool __reg_is_valid_request(const char *alpha2, struct regulatory_request **request) { struct regulatory_request *req; @@ -421,16 +421,16 @@ static bool __reg_is_valid_request(char *alpha2, } /* Used by nl80211 before kmalloc'ing our regulatory domain */ -bool reg_is_valid_request(char *alpha2) +bool reg_is_valid_request(const char *alpha2) { struct regulatory_request *request = NULL; return __reg_is_valid_request(alpha2, &request); } /* Sanity check on a regulatory rule */ -static bool is_valid_reg_rule(struct ieee80211_reg_rule *rule) +static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) { - struct ieee80211_freq_range *freq_range = &rule->freq_range; + const struct ieee80211_freq_range *freq_range = &rule->freq_range; u32 freq_diff; if (freq_range->start_freq_khz == 0 || freq_range->end_freq_khz == 0) @@ -447,9 +447,9 @@ static bool is_valid_reg_rule(struct ieee80211_reg_rule *rule) return true; } -static bool is_valid_rd(struct ieee80211_regdomain *rd) +static bool is_valid_rd(const struct ieee80211_regdomain *rd) { - struct ieee80211_reg_rule *reg_rule = NULL; + const struct ieee80211_reg_rule *reg_rule = NULL; unsigned int i; if (!rd->n_reg_rules) @@ -661,12 +661,12 @@ unlock_and_exit: EXPORT_SYMBOL(regulatory_hint); -static void print_rd_rules(struct ieee80211_regdomain *rd) +static void print_rd_rules(const struct ieee80211_regdomain *rd) { unsigned int i; - struct ieee80211_reg_rule *reg_rule = NULL; - struct ieee80211_freq_range *freq_range = NULL; - struct ieee80211_power_rule *power_rule = NULL; + const struct ieee80211_reg_rule *reg_rule = NULL; + const struct ieee80211_freq_range *freq_range = NULL; + const struct ieee80211_power_rule *power_rule = NULL; printk(KERN_INFO "\t(start_freq - end_freq @ bandwidth), " "(max_antenna_gain, max_eirp)\n"); @@ -696,7 +696,7 @@ static void print_rd_rules(struct ieee80211_regdomain *rd) } } -static void print_regdomain(struct ieee80211_regdomain *rd) +static void print_regdomain(const struct ieee80211_regdomain *rd) { if (is_world_regdom(rd->alpha2)) @@ -715,14 +715,14 @@ static void print_regdomain(struct ieee80211_regdomain *rd) print_rd_rules(rd); } -void print_regdomain_info(struct ieee80211_regdomain *rd) +void print_regdomain_info(const struct ieee80211_regdomain *rd) { printk(KERN_INFO "cfg80211: Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]); print_rd_rules(rd); } -static int __set_regdom(struct ieee80211_regdomain *rd) +static int __set_regdom(const struct ieee80211_regdomain *rd) { struct regulatory_request *request = NULL; @@ -804,7 +804,7 @@ static int __set_regdom(struct ieee80211_regdomain *rd) * multiple drivers can be ironed out later. Caller must've already * kmalloc'd the rd structure. If this calls fails you should kfree() * the passed rd. Caller must hold cfg80211_drv_mutex */ -int set_regdom(struct ieee80211_regdomain *rd) +int set_regdom(const struct ieee80211_regdomain *rd) { struct regulatory_request *this_request = NULL, *prev_request = NULL; int r; @@ -857,8 +857,7 @@ int regulatory_init(void) return PTR_ERR(reg_pdev); #ifdef CONFIG_WIRELESS_OLD_REGULATORY - cfg80211_regdomain = - (struct ieee80211_regdomain *) static_regdom(ieee80211_regdom); + cfg80211_regdomain = static_regdom(ieee80211_regdom); /* Used during reset_regdomains_static() */ cfg80211_world_regdom = cfg80211_regdomain; @@ -872,8 +871,7 @@ int regulatory_init(void) err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, ieee80211_regdom, NULL); #else - cfg80211_regdomain = - (struct ieee80211_regdomain *) cfg80211_world_regdom; + cfg80211_regdomain = cfg80211_world_regdom; err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", NULL); if (err) diff --git a/net/wireless/reg.h b/net/wireless/reg.h index b169815987f4..a33362872f3c 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -2,12 +2,12 @@ #define __NET_WIRELESS_REG_H extern struct mutex cfg80211_reg_mutex; -bool is_world_regdom(char *alpha2); -bool reg_is_valid_request(char *alpha2); +bool is_world_regdom(const char *alpha2); +bool reg_is_valid_request(const char *alpha2); int regulatory_init(void); void regulatory_exit(void); -int set_regdom(struct ieee80211_regdomain *rd); +int set_regdom(const struct ieee80211_regdomain *rd); #endif /* __NET_WIRELESS_REG_H */ -- cgit v1.2.1 From 942b25cf9028e7c2f6446ee7c6618bd70dafec5f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Sep 2008 11:26:47 +0200 Subject: cfg80211: clean up static regdomain mess The statically defined regdomains are used in a very convoluted way, use them instead to prime the information we have and then continue operating normally. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 73 +++++++++++++++--------------------------------------- 1 file changed, 20 insertions(+), 53 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 7aba46efc7dd..626dbb688499 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -174,32 +174,27 @@ static bool is_old_static_regdom(const struct ieee80211_regdomain *rd) return true; return false; } - -/* The old crap never deals with a world regulatory domain, it only - * deals with the static regulatory domain passed and if possible - * an updated "US" or "JP" regulatory domain. We do however store the - * old static regulatory domain in cfg80211_world_regdom for convenience - * of use here */ -static void reset_regdomains_static(void) +#else +static inline bool is_old_static_regdom(const struct ieee80211_regdomain *rd) { - if (!is_old_static_regdom(cfg80211_regdomain)) - kfree(cfg80211_regdomain); - /* This is setting the regdom to the old static regdom */ - cfg80211_regdomain = - (struct ieee80211_regdomain *) cfg80211_world_regdom; + return false; } -#else +#endif + static void reset_regdomains(void) { - if (cfg80211_world_regdom && cfg80211_world_regdom != &world_regdom) { - if (cfg80211_world_regdom == cfg80211_regdomain) { - kfree(cfg80211_regdomain); - } else { - kfree(cfg80211_world_regdom); - kfree(cfg80211_regdomain); - } - } else if (cfg80211_regdomain && cfg80211_regdomain != &world_regdom) - kfree(cfg80211_regdomain); + /* avoid freeing static information or freeing something twice */ + if (cfg80211_regdomain == cfg80211_world_regdom) + cfg80211_regdomain = NULL; + if (cfg80211_world_regdom == &world_regdom) + cfg80211_world_regdom = NULL; + if (cfg80211_regdomain == &world_regdom) + cfg80211_regdomain = NULL; + if (is_old_static_regdom(cfg80211_regdomain)) + cfg80211_regdomain = NULL; + + kfree(cfg80211_regdomain); + kfree(cfg80211_world_regdom); cfg80211_world_regdom = &world_regdom; cfg80211_regdomain = NULL; @@ -216,7 +211,6 @@ static void update_world_regdomain(const struct ieee80211_regdomain *rd) cfg80211_world_regdom = rd; cfg80211_regdomain = rd; } -#endif bool is_world_regdom(const char *alpha2) { @@ -297,12 +291,8 @@ static int call_crda(const char *alpha2) printk(KERN_INFO "cfg80211: Calling CRDA for country: %c%c\n", alpha2[0], alpha2[1]); else -#ifdef CONFIG_WIRELESS_OLD_REGULATORY - return -EINVAL; -#else printk(KERN_INFO "cfg80211: Calling CRDA to update world " "regulatory domain\n"); -#endif country_env[8] = alpha2[0]; country_env[9] = alpha2[1]; @@ -728,20 +718,12 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) /* Some basic sanity checks first */ -#ifdef CONFIG_WIRELESS_OLD_REGULATORY - /* We ignore the world regdom with the old static regdomains setup - * as there is no point to it with static regulatory definitions :( - * Don't worry this shit will be removed soon... */ - if (is_world_regdom(rd->alpha2)) - return -EINVAL; -#else if (is_world_regdom(rd->alpha2)) { if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request))) return -EINVAL; update_world_regdomain(rd); return 0; } -#endif if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) && !is_unknown_alpha2(rd->alpha2)) @@ -750,15 +732,10 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (list_empty(®ulatory_requests)) return -EINVAL; -#ifdef CONFIG_WIRELESS_OLD_REGULATORY - /* Static "US" and "JP" will be overridden, but just once */ + /* allow overriding the static definitions if CRDA is present */ if (!is_old_static_regdom(cfg80211_regdomain) && - !regdom_changed(rd->alpha2)) - return -EINVAL; -#else - if (!regdom_changed(rd->alpha2)) + !regdom_changed(rd->alpha2)) return -EINVAL; -#endif /* Now lets set the regulatory domain, update all driver channels * and finally inform them of what we have done, in case they want @@ -768,11 +745,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request))) return -EINVAL; -#ifdef CONFIG_WIRELESS_OLD_REGULATORY - reset_regdomains_static(); -#else reset_regdomains(); -#endif /* Country IE parsing coming soon */ switch (request->initiator) { @@ -858,10 +831,8 @@ int regulatory_init(void) #ifdef CONFIG_WIRELESS_OLD_REGULATORY cfg80211_regdomain = static_regdom(ieee80211_regdom); - /* Used during reset_regdomains_static() */ - cfg80211_world_regdom = cfg80211_regdomain; - printk(KERN_INFO "cfg80211: Using old static regulatory domain:\n"); + printk(KERN_INFO "cfg80211: Using static regulatory domain info\n"); print_regdomain_info(cfg80211_regdomain); /* The old code still requests for a new regdomain and if * you have CRDA you get it updated, otherwise you get @@ -889,11 +860,7 @@ void regulatory_exit(void) mutex_lock(&cfg80211_drv_mutex); -#ifdef CONFIG_WIRELESS_OLD_REGULATORY - reset_regdomains_static(); -#else reset_regdomains(); -#endif list_for_each_entry_safe(req, req_tmp, ®ulatory_requests, list) { list_del(&req->list); -- cgit v1.2.1 From 133b822638ff01eb1e32e1917b197c40ed095ddd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 16 Sep 2008 14:18:59 +0200 Subject: mac80211: make master iface not wireless There's no need to register the master netdev with cfg80211, in fact, this is quite dangerous and lead to having to add checks for the master interface all over the config handlers. This patch removes the "ieee80211_ptr" from the master iface in favour of having a small netdev_priv() associated with the master interface that stores the ieee80211_local pointer. Because of this, a lot of code in the configuration handlers can go away. To make this patch easier to verify I have also removed a number of wiphy_priv() calls in favour of getting the sdata first and then the local pointer from that. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 59 ---------------------------------------------- net/mac80211/debugfs_sta.c | 3 +-- net/mac80211/ieee80211_i.h | 4 ++++ net/mac80211/main.c | 19 ++++++++------- net/mac80211/mesh.c | 2 +- net/mac80211/rx.c | 25 ++++++++------------ net/mac80211/tx.c | 26 ++++++++++---------- 7 files changed, 39 insertions(+), 99 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e2574885db4a..a8501f14b167 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -82,7 +82,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, enum nl80211_iftype type, u32 *flags, struct vif_params *params) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct net_device *dev; struct ieee80211_sub_if_data *sdata; int ret; @@ -95,9 +94,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, if (!nl80211_type_check(type)) return -EINVAL; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); ret = ieee80211_if_change_type(sdata, type); @@ -120,16 +116,12 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx, u8 *mac_addr, struct key_params *params) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct sta_info *sta = NULL; enum ieee80211_key_alg alg; struct ieee80211_key *key; int err; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); switch (params->cipher) { @@ -174,14 +166,10 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx, u8 *mac_addr) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct sta_info *sta; int ret; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); @@ -222,7 +210,6 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, void (*callback)(void *cookie, struct key_params *params)) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct sta_info *sta = NULL; u8 seq[6] = {0}; @@ -232,9 +219,6 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, u16 iv16; int err = -ENOENT; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); @@ -310,12 +294,8 @@ static int ieee80211_config_default_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; - if (dev == local->mdev) - return -EOPNOTSUPP; - rcu_read_lock(); sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -496,13 +476,9 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, struct beacon_parameters *params) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct beacon_data *old; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != NL80211_IFTYPE_AP) @@ -519,13 +495,9 @@ static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev, struct beacon_parameters *params) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct beacon_data *old; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != NL80211_IFTYPE_AP) @@ -541,13 +513,9 @@ static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct beacon_data *old; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != NL80211_IFTYPE_AP) @@ -695,9 +663,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata; int err; - if (dev == local->mdev || params->vlan == local->mdev) - return -EOPNOTSUPP; - /* Prevent a race with changing the rate control algorithm */ if (!netif_running(dev)) return -ENETDOWN; @@ -752,9 +717,6 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata; struct sta_info *sta; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (mac) { @@ -786,9 +748,6 @@ static int ieee80211_change_station(struct wiphy *wiphy, struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; - if (dev == local->mdev || params->vlan == local->mdev) - return -EOPNOTSUPP; - rcu_read_lock(); /* XXX: get sta belonging to dev */ @@ -828,9 +787,6 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; int err; - if (dev == local->mdev) - return -EOPNOTSUPP; - if (!netif_running(dev)) return -ENETDOWN; @@ -884,9 +840,6 @@ static int ieee80211_change_mpath(struct wiphy *wiphy, struct mesh_path *mpath; struct sta_info *sta; - if (dev == local->mdev) - return -EOPNOTSUPP; - if (!netif_running(dev)) return -ENETDOWN; @@ -958,13 +911,9 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) @@ -986,13 +935,9 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) @@ -1015,13 +960,9 @@ static int ieee80211_change_bss(struct wiphy *wiphy, struct net_device *dev, struct bss_parameters *params) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; u32 changed = 0; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != NL80211_IFTYPE_AP) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 81f350eaf8a3..b9902e425f09 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -173,8 +173,7 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; - struct net_device *dev = sta->sdata->dev; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sta->sdata->local; struct ieee80211_hw *hw = &local->hw; u8 *da = sta->sta.addr; static int tid_static_tx[16] = {0, 0, 0, 0, 0, 0, 0, 0, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3912fba6d3d0..0b25b0f46b1a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -573,6 +573,10 @@ enum { /* maximum number of hardware queues we support. */ #define QD_MAX_QUEUES (IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_QUEUES) +struct ieee80211_master_priv { + struct ieee80211_local *local; +}; + struct ieee80211_local { /* embed the driver visible part. * don't cast (use the static inlines below), but we keep diff --git a/net/mac80211/main.c b/net/mac80211/main.c index c307dba7ec03..7d2d5a041e26 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -106,7 +106,8 @@ static const struct header_ops ieee80211_header_ops = { static int ieee80211_master_open(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; struct ieee80211_sub_if_data *sdata; int res = -EOPNOTSUPP; @@ -128,7 +129,8 @@ static int ieee80211_master_open(struct net_device *dev) static int ieee80211_master_stop(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; struct ieee80211_sub_if_data *sdata; /* we hold the RTNL here so can safely walk the list */ @@ -141,7 +143,8 @@ static int ieee80211_master_stop(struct net_device *dev) static void ieee80211_master_set_multicast_list(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; ieee80211_configure_filter(local); } @@ -787,7 +790,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) int result; enum ieee80211_band band; struct net_device *mdev; - struct wireless_dev *mwdev; + struct ieee80211_master_priv *mpriv; /* * generic code guarantees at least one band, @@ -829,16 +832,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (hw->queues < 4) hw->ampdu_queues = 0; - mdev = alloc_netdev_mq(sizeof(struct wireless_dev), + mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv), "wmaster%d", ether_setup, ieee80211_num_queues(hw)); if (!mdev) goto fail_mdev_alloc; - mwdev = netdev_priv(mdev); - mdev->ieee80211_ptr = mwdev; - mwdev->wiphy = local->hw.wiphy; - + mpriv = netdev_priv(mdev); + mpriv->local = local; local->mdev = mdev; ieee80211_rx_bss_list_init(local); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 30cf891fd3a8..8013277924f2 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -351,7 +351,7 @@ static void ieee80211_mesh_path_timer(unsigned long data) struct ieee80211_sub_if_data *sdata = (struct ieee80211_sub_if_data *) data; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee80211_local *local = wdev_priv(&sdata->wdev); + struct ieee80211_local *local = sdata->local; queue_work(local->hw.workqueue, &ifmsh->work); } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 92d898b901e9..3ab9670f1809 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -650,32 +650,28 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return result; } -static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) +static void ap_sta_ps_start(struct sta_info *sta) { - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = sta->sdata; DECLARE_MAC_BUF(mac); - sdata = sta->sdata; - atomic_inc(&sdata->bss->num_sta_ps); set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %s aid %d enters power save mode\n", - dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid); + sdata->dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } -static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) +static int ap_sta_ps_end(struct sta_info *sta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; struct sk_buff *skb; int sent = 0; - struct ieee80211_sub_if_data *sdata; struct ieee80211_tx_info *info; DECLARE_MAC_BUF(mac); - sdata = sta->sdata; - atomic_dec(&sdata->bss->num_sta_ps); clear_sta_flags(sta, WLAN_STA_PS | WLAN_STA_PSPOLL); @@ -685,7 +681,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %s aid %d exits power save mode\n", - dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid); + sdata->dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ /* Send all buffered frames to the station */ @@ -701,7 +697,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) sent++; #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %s aid %d send PS frame " - "since STA not sleeping anymore\n", dev->name, + "since STA not sleeping anymore\n", sdata->dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ info->flags |= IEEE80211_TX_CTL_REQUEUE; @@ -715,7 +711,6 @@ static ieee80211_rx_result debug_noinline ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) { struct sta_info *sta = rx->sta; - struct net_device *dev = rx->dev; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; if (!sta) @@ -757,10 +752,10 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * exchange sequence */ if (test_sta_flags(sta, WLAN_STA_PS) && !ieee80211_has_pm(hdr->frame_control)) - rx->sent_ps_buffered += ap_sta_ps_end(dev, sta); + rx->sent_ps_buffered += ap_sta_ps_end(sta); else if (!test_sta_flags(sta, WLAN_STA_PS) && ieee80211_has_pm(hdr->frame_control)) - ap_sta_ps_start(dev, sta); + ap_sta_ps_start(sta); } /* Drop data::nullfunc frames silently, since they are used only to diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 20d683641b42..00d798cc9e04 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -165,11 +165,10 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, return cpu_to_le16(dur); } -static int inline is_ieee80211_device(struct net_device *dev, - struct net_device *master) +static int inline is_ieee80211_device(struct ieee80211_local *local, + struct net_device *dev) { - return (wdev_priv(dev->ieee80211_ptr) == - wdev_priv(master->ieee80211_ptr)); + return local == wdev_priv(dev->ieee80211_ptr); } /* tx handlers */ @@ -1001,14 +1000,14 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, /* * NB: @tx is uninitialised when passed in here */ -static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx, - struct sk_buff *skb, - struct net_device *mdev) +static int ieee80211_tx_prepare(struct ieee80211_local *local, + struct ieee80211_tx_data *tx, + struct sk_buff *skb) { struct net_device *dev; dev = dev_get_by_index(&init_net, skb->iif); - if (unlikely(dev && !is_ieee80211_device(dev, mdev))) { + if (unlikely(dev && !is_ieee80211_device(local, dev))) { dev_put(dev); dev = NULL; } @@ -1258,6 +1257,8 @@ static int ieee80211_skb_resize(struct ieee80211_local *local, int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) { + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct net_device *odev = NULL; @@ -1273,7 +1274,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, if (skb->iif) odev = dev_get_by_index(&init_net, skb->iif); - if (unlikely(odev && !is_ieee80211_device(odev, dev))) { + if (unlikely(odev && !is_ieee80211_device(local, odev))) { dev_put(odev); odev = NULL; } @@ -1449,8 +1450,8 @@ fail: int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; int ret = 1, head_need; u16 ethertype, hdrlen, meshhdrlen = 0; __le16 fc; @@ -1462,7 +1463,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, struct sta_info *sta; u32 sta_flags = 0; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (unlikely(skb->len < ETH_HLEN)) { ret = 0; goto fail; @@ -2032,7 +2032,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, cpu_to_le16(IEEE80211_FCTL_MOREDATA); } - if (!ieee80211_tx_prepare(&tx, skb, local->mdev)) + if (!ieee80211_tx_prepare(local, &tx, skb)) break; dev_kfree_skb_any(skb); } -- cgit v1.2.1 From 60719ffd721f6764b7d07ca188c0d944a4330b69 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 16 Sep 2008 14:55:09 +0200 Subject: cfg80211: show interface type This patch makes cfg80211 show the interface in the nl80211 information about a specific interface. API users are required to keep the type updated (everything else is fairly complicated) but you will get a warning if you fail to keep it updated. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/iface.c | 1 + net/wireless/core.c | 2 ++ net/wireless/nl80211.c | 6 +++++- 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index a72fbebb8ea2..b5cd91e89712 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -625,6 +625,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, /* and set some type-dependent values */ sdata->vif.type = type; sdata->dev->hard_start_xmit = ieee80211_subif_start_xmit; + sdata->wdev.iftype = type; /* only monitor differs */ sdata->dev->type = ARPHRD_ETHER; diff --git a/net/wireless/core.c b/net/wireless/core.c index 88cb73394864..d6940085d59c 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -384,6 +384,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); + WARN_ON(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_UNSPECIFIED); + switch (state) { case NETDEV_REGISTER: mutex_lock(&rdev->devlist_mtx); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1221d726ed50..44771a690d53 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -299,7 +299,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, dev->name); - /* TODO: interface type */ + NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, dev->ieee80211_ptr->iftype); return genlmsg_end(msg, hdr); nla_put_failure: @@ -453,6 +453,10 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) &flags); err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, type, err ? NULL : &flags, ¶ms); + + dev = __dev_get_by_index(&init_net, ifindex); + WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != type)); + rtnl_unlock(); unlock: -- cgit v1.2.1 From 723b038def23ce0606754c4f598cbb96bae9a102 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 16 Sep 2008 20:22:09 +0200 Subject: cfg80211: allow set_interface without type Which then causes no type change. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 44771a690d53..a745932d1370 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -422,19 +422,20 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) memset(¶ms, 0, sizeof(params)); - if (info->attrs[NL80211_ATTR_IFTYPE]) { - type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); - if (type > NL80211_IFTYPE_MAX) - return -EINVAL; - } else - return -EINVAL; - err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; ifindex = dev->ifindex; + type = dev->ieee80211_ptr->iftype; dev_put(dev); + err = -EINVAL; + if (info->attrs[NL80211_ATTR_IFTYPE]) { + type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); + if (type > NL80211_IFTYPE_MAX) + goto unlock; + } + if (!drv->ops->change_virtual_intf || !(drv->wiphy.interface_modes & (1 << type))) { err = -EOPNOTSUPP; -- cgit v1.2.1 From f8b25cdad719cddceb9cf0d350065b3e59e74219 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 16 Sep 2008 20:22:21 +0200 Subject: mac80211: allow interface settings changes only when down We currently allow monitor flags changes and mesh ID changes when the interface is up, which can lead to trouble. Change it to only allow when down. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a8501f14b167..89a183c2327a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -100,6 +100,9 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, if (ret) return ret; + if (netif_running(sdata->dev)) + return -EBUSY; + if (ieee80211_vif_is_mesh(&sdata->vif) && params->mesh_id_len) ieee80211_sdata_set_mesh_id(sdata, params->mesh_id_len, -- cgit v1.2.1 From 92ffe055c3ea45856183bebed62f8880f75fef3b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 16 Sep 2008 20:39:36 +0200 Subject: cfg80211: reject invalid configuration items Reject configuring mesh-id for non-mesh, monitor flags for non-monitor. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a745932d1370..572793c8c7ab 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -418,7 +418,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) int err, ifindex; enum nl80211_iftype type; struct net_device *dev; - u32 flags; + u32 _flags, *flags = NULL; memset(¶ms, 0, sizeof(params)); @@ -442,18 +442,28 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) goto unlock; } - if (type == NL80211_IFTYPE_MESH_POINT && - info->attrs[NL80211_ATTR_MESH_ID]) { + if (info->attrs[NL80211_ATTR_MESH_ID]) { + if (type != NL80211_IFTYPE_MESH_POINT) { + err = -EINVAL; + goto unlock; + } params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); } + if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) { + if (type != NL80211_IFTYPE_MONITOR) { + err = -EINVAL; + goto unlock; + } + err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS], + &_flags); + if (!err) + flags = &_flags; + } rtnl_lock(); - err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? - info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, - &flags); err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, - type, err ? NULL : &flags, ¶ms); + type, flags, ¶ms); dev = __dev_get_by_index(&init_net, ifindex); WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != type)); -- cgit v1.2.1 From 79617deeebb9cf089e2bc2aad19743b1209043f6 Mon Sep 17 00:00:00 2001 From: YanBo Date: Mon, 22 Sep 2008 13:30:32 +0800 Subject: mac80211: mesh portal functionality support Currently the mesh code doesn't support bridging mesh point interfaces with wired ethernet or AP to construct an MPP or MAP. This patch adds code to support the "6 address frame format packet" functionality to mesh point interfaces. Now the mesh network can be used as backhaul for end to end communication. Signed-off-by: Li YanBo Signed-off-by: John W. Linville --- net/mac80211/mesh.h | 4 ++ net/mac80211/mesh_pathtbl.c | 127 +++++++++++++++++++++++++++++++++++++++++++- net/mac80211/rx.c | 32 +++++++++-- net/mac80211/tx.c | 44 ++++++++++++--- 4 files changed, 196 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 8ee414a0447c..e10471c6ba42 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -71,6 +71,7 @@ enum mesh_path_flags { */ struct mesh_path { u8 dst[ETH_ALEN]; + u8 mpp[ETH_ALEN]; /* used for MPP or MAP */ struct ieee80211_sub_if_data *sdata; struct sta_info *next_hop; struct timer_list timer; @@ -226,6 +227,9 @@ int mesh_nexthop_lookup(struct sk_buff *skb, void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata); struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata); +struct mesh_path *mpp_path_lookup(u8 *dst, + struct ieee80211_sub_if_data *sdata); +int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata); struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data *sdata); void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index e4fa2905fadc..3c72557df45a 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -36,6 +36,7 @@ struct mpath_node { }; static struct mesh_table *mesh_paths; +static struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */ /* This lock will have the grow table function as writer and add / delete nodes * as readers. When reading the table (i.e. doing lookups) we are well protected @@ -94,6 +95,34 @@ struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) return NULL; } +struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +{ + struct mesh_path *mpath; + struct hlist_node *n; + struct hlist_head *bucket; + struct mesh_table *tbl; + struct mpath_node *node; + + tbl = rcu_dereference(mpp_paths); + + bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; + hlist_for_each_entry_rcu(node, n, bucket, list) { + mpath = node->mpath; + if (mpath->sdata == sdata && + memcmp(dst, mpath->dst, ETH_ALEN) == 0) { + if (MPATH_EXPIRED(mpath)) { + spin_lock_bh(&mpath->state_lock); + if (MPATH_EXPIRED(mpath)) + mpath->flags &= ~MESH_PATH_ACTIVE; + spin_unlock_bh(&mpath->state_lock); + } + return mpath; + } + } + return NULL; +} + + /** * mesh_path_lookup_by_idx - look up a path in the mesh path table by its index * @idx: index @@ -226,6 +255,91 @@ err_path_alloc: } +int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata) +{ + struct mesh_path *mpath, *new_mpath; + struct mpath_node *node, *new_node; + struct hlist_head *bucket; + struct hlist_node *n; + int grow = 0; + int err = 0; + u32 hash_idx; + + + if (memcmp(dst, sdata->dev->dev_addr, ETH_ALEN) == 0) + /* never add ourselves as neighbours */ + return -ENOTSUPP; + + if (is_multicast_ether_addr(dst)) + return -ENOTSUPP; + + err = -ENOMEM; + new_mpath = kzalloc(sizeof(struct mesh_path), GFP_KERNEL); + if (!new_mpath) + goto err_path_alloc; + + new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL); + if (!new_node) + goto err_node_alloc; + + read_lock(&pathtbl_resize_lock); + memcpy(new_mpath->dst, dst, ETH_ALEN); + memcpy(new_mpath->mpp, mpp, ETH_ALEN); + new_mpath->sdata = sdata; + new_mpath->flags = 0; + skb_queue_head_init(&new_mpath->frame_queue); + new_node->mpath = new_mpath; + new_mpath->exp_time = jiffies; + spin_lock_init(&new_mpath->state_lock); + + hash_idx = mesh_table_hash(dst, sdata, mpp_paths); + bucket = &mpp_paths->hash_buckets[hash_idx]; + + spin_lock(&mpp_paths->hashwlock[hash_idx]); + + err = -EEXIST; + hlist_for_each_entry(node, n, bucket, list) { + mpath = node->mpath; + if (mpath->sdata == sdata && memcmp(dst, mpath->dst, ETH_ALEN) == 0) + goto err_exists; + } + + hlist_add_head_rcu(&new_node->list, bucket); + if (atomic_inc_return(&mpp_paths->entries) >= + mpp_paths->mean_chain_len * (mpp_paths->hash_mask + 1)) + grow = 1; + + spin_unlock(&mpp_paths->hashwlock[hash_idx]); + read_unlock(&pathtbl_resize_lock); + if (grow) { + struct mesh_table *oldtbl, *newtbl; + + write_lock(&pathtbl_resize_lock); + oldtbl = mpp_paths; + newtbl = mesh_table_grow(mpp_paths); + if (!newtbl) { + write_unlock(&pathtbl_resize_lock); + return 0; + } + rcu_assign_pointer(mpp_paths, newtbl); + write_unlock(&pathtbl_resize_lock); + + synchronize_rcu(); + mesh_table_free(oldtbl, false); + } + return 0; + +err_exists: + spin_unlock(&mpp_paths->hashwlock[hash_idx]); + read_unlock(&pathtbl_resize_lock); + kfree(new_node); +err_node_alloc: + kfree(new_mpath); +err_path_alloc: + return err; +} + + /** * mesh_plink_broken - deactivates paths and sends perr when a link breaks * @@ -475,11 +589,21 @@ static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl) int mesh_pathtbl_init(void) { mesh_paths = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); + if (!mesh_paths) + return -ENOMEM; mesh_paths->free_node = &mesh_path_node_free; mesh_paths->copy_node = &mesh_path_node_copy; mesh_paths->mean_chain_len = MEAN_CHAIN_LEN; - if (!mesh_paths) + + mpp_paths = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); + if (!mpp_paths) { + mesh_table_free(mesh_paths, true); return -ENOMEM; + } + mpp_paths->free_node = &mesh_path_node_free; + mpp_paths->copy_node = &mesh_path_node_copy; + mpp_paths->mean_chain_len = MEAN_CHAIN_LEN; + return 0; } @@ -511,4 +635,5 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata) void mesh_pathtbl_unregister(void) { mesh_table_free(mesh_paths, true); + mesh_table_free(mpp_paths, true); } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3ab9670f1809..2efa4dd47b5d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1107,10 +1107,6 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) hdrlen = ieee80211_hdrlen(hdr->frame_control); - if (ieee80211_vif_is_mesh(&sdata->vif)) - hdrlen += ieee80211_get_mesh_hdrlen( - (struct ieee80211s_hdr *) (skb->data + hdrlen)); - /* convert IEEE 802.11 header + possible LLC headers into Ethernet * header * IEEE 802.11 address fields: @@ -1134,6 +1130,15 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) if (unlikely(sdata->vif.type != NL80211_IFTYPE_WDS && sdata->vif.type != NL80211_IFTYPE_MESH_POINT)) return -1; + if (ieee80211_vif_is_mesh(&sdata->vif)) { + struct ieee80211s_hdr *meshdr = (struct ieee80211s_hdr *) + (skb->data + hdrlen); + hdrlen += ieee80211_get_mesh_hdrlen(meshdr); + if (meshdr->flags & MESH_FLAGS_AE_A5_A6) { + memcpy(dst, meshdr->eaddr1, ETH_ALEN); + memcpy(src, meshdr->eaddr2, ETH_ALEN); + } + } break; case __constant_cpu_to_le16(IEEE80211_FCTL_FROMDS): if (sdata->vif.type != NL80211_IFTYPE_STATION || @@ -1393,6 +1398,25 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) /* illegal frame */ return RX_DROP_MONITOR; + if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6){ + struct ieee80211_sub_if_data *sdata; + struct mesh_path *mppath; + + sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); + rcu_read_lock(); + mppath = mpp_path_lookup(mesh_hdr->eaddr2, sdata); + if (!mppath) { + mpp_path_add(mesh_hdr->eaddr2, hdr->addr4, sdata); + } else { + spin_lock_bh(&mppath->state_lock); + mppath->exp_time = jiffies; + if (compare_ether_addr(mppath->mpp, hdr->addr4) != 0) + memcpy(mppath->mpp, hdr->addr4, ETH_ALEN); + spin_unlock_bh(&mppath->state_lock); + } + rcu_read_unlock(); + } + if (compare_ether_addr(rx->dev->dev_addr, hdr->addr3) == 0) return RX_CONTINUE; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 00d798cc9e04..00d96e63dce9 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1498,18 +1498,50 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); - /* RA TA DA SA */ - memset(hdr.addr1, 0, ETH_ALEN); - memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); if (!sdata->u.mesh.mshcfg.dot11MeshTTL) { /* Do not send frames with mesh_ttl == 0 */ sdata->u.mesh.mshstats.dropped_frames_ttl++; ret = 0; goto fail; } - meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata); + memset(&mesh_hdr, 0, sizeof(mesh_hdr)); + + if (compare_ether_addr(dev->dev_addr, + skb->data + ETH_ALEN) == 0) { + /* RA TA DA SA */ + memset(hdr.addr1, 0, ETH_ALEN); + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr3, skb->data, ETH_ALEN); + memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); + meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata); + } else { + /* packet from other interface */ + struct mesh_path *mppath; + + memset(hdr.addr1, 0, ETH_ALEN); + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr4, dev->dev_addr, ETH_ALEN); + + if (is_multicast_ether_addr(skb->data)) + memcpy(hdr.addr3, skb->data, ETH_ALEN); + else { + rcu_read_lock(); + mppath = mpp_path_lookup(skb->data, sdata); + if (mppath) + memcpy(hdr.addr3, mppath->mpp, ETH_ALEN); + else + memset(hdr.addr3, 0xff, ETH_ALEN); + rcu_read_unlock(); + } + + mesh_hdr.flags |= MESH_FLAGS_AE_A5_A6; + mesh_hdr.ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; + put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &mesh_hdr.seqnum); + memcpy(mesh_hdr.eaddr1, skb->data, ETH_ALEN); + memcpy(mesh_hdr.eaddr2, skb->data + ETH_ALEN, ETH_ALEN); + sdata->u.mesh.mesh_seqnum++; + meshhdrlen = 18; + } hdrlen = 30; break; #endif -- cgit v1.2.1 From 2ff6a6d4e92270283432690adf53a7e5ab186d19 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Sep 2008 12:24:20 +0200 Subject: mac80211: fix mesh action frame handling When I split off the action frame handling I made the code drop all action frames we don't want to handle. This is wrong since some action frames are actually handled via rx_h_mgmt through being queued to the sta/mesh implementations. Thanks to Li YanBo for noticing the problem. Signed-off-by: Johannes Berg Cc: Li YanBo Signed-off-by: John W. Linville --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 2efa4dd47b5d..c489865761bc 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1557,7 +1557,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) */ if (sdata->vif.type != NL80211_IFTYPE_STATION && sdata->vif.type != NL80211_IFTYPE_ADHOC) - return RX_DROP_MONITOR; + return RX_CONTINUE; switch (mgmt->u.action.category) { case WLAN_CATEGORY_BACK: -- cgit v1.2.1 From 4b7679a561e552eeda1e3567119bef2bca99b66e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Sep 2008 18:14:18 +0200 Subject: mac80211: clean up rate control API Long awaited, hard work. This patch totally cleans up the rate control API to remove the requirement to include internal headers outside of net/mac80211/. There's one internal use in the PID algorithm left for mesh networking, we'll have to figure out a way to clean that one up and decide how to do the peer link evaluation, possibly independent of the rate control algorithm or via new API. Additionally, ath9k is left using the cross-inclusion hack for now, we will add new API where necessary to make this work properly, but right now I'm not expert enough to do it. It's still off better than before. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 2 +- net/mac80211/ieee80211_i.h | 2 + net/mac80211/main.c | 4 +- net/mac80211/mlme.c | 4 +- net/mac80211/rate.c | 71 ++++++++++++++---- net/mac80211/rate.h | 102 +++++++------------------- net/mac80211/rc80211_pid.h | 2 - net/mac80211/rc80211_pid_algo.c | 158 +++++++++++++++------------------------- net/mac80211/sta_info.c | 17 ++--- net/mac80211/sta_info.h | 2 +- net/mac80211/tx.c | 5 +- 11 files changed, 159 insertions(+), 210 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 89a183c2327a..855126a3039d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -693,7 +693,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, sta_apply_parameters(local, sta, params); - rate_control_rate_init(sta, local); + rate_control_rate_init(sta); rcu_read_lock(); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 0b25b0f46b1a..8025b294588b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -724,6 +724,8 @@ struct ieee80211_local { #ifdef CONFIG_MAC80211_DEBUGFS struct local_debugfsdentries { + struct dentry *rcdir; + struct dentry *rcname; struct dentry *frequency; struct dentry *antenna_sel_tx; struct dentry *antenna_sel_rx; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 7d2d5a041e26..d608c44047c0 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -542,6 +542,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); u16 frag, type; __le16 fc; + struct ieee80211_supported_band *sband; struct ieee80211_tx_status_rtap_hdr *rthdr; struct ieee80211_sub_if_data *sdata; struct net_device *prev_dev = NULL; @@ -588,7 +589,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) sta->tx_retry_count += info->status.retry_count; } - rate_control_tx_status(local->mdev, skb); + sband = local->hw.wiphy->bands[info->band]; + rate_control_tx_status(local, sband, sta, skb); } rcu_read_unlock(); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8611a8318c9c..109c3a7e63ad 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1323,7 +1323,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ieee80211_handle_ht(local, 1, &sta->sta.ht_info, &bss_info); } - rate_control_rate_init(sta, local); + rate_control_rate_init(sta); if (elems.wmm_param) { set_sta_flags(sta, WLAN_STA_WME); @@ -2342,7 +2342,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, sta->sta.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(local, band); - rate_control_rate_init(sta, local); + rate_control_rate_init(sta); if (sta_info_insert(sta)) return NULL; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 0388c090dfe9..5d786720d935 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -12,6 +12,7 @@ #include #include "rate.h" #include "ieee80211_i.h" +#include "debugfs.h" struct rate_control_alg { struct list_head list; @@ -127,19 +128,46 @@ static void ieee80211_rate_control_ops_put(struct rate_control_ops *ops) module_put(ops->module); } +#ifdef CONFIG_MAC80211_DEBUGFS +static ssize_t rcname_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct rate_control_ref *ref = file->private_data; + int len = strlen(ref->ops->name); + + return simple_read_from_buffer(userbuf, count, ppos, + ref->ops->name, len); +} + +static const struct file_operations rcname_ops = { + .read = rcname_read, + .open = mac80211_open_file_generic, +}; +#endif + struct rate_control_ref *rate_control_alloc(const char *name, struct ieee80211_local *local) { + struct dentry *debugfsdir = NULL; struct rate_control_ref *ref; ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL); if (!ref) goto fail_ref; kref_init(&ref->kref); + ref->local = local; ref->ops = ieee80211_rate_control_ops_get(name); if (!ref->ops) goto fail_ops; - ref->priv = ref->ops->alloc(local); + +#ifdef CONFIG_MAC80211_DEBUGFS + debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir); + local->debugfs.rcdir = debugfsdir; + local->debugfs.rcname = debugfs_create_file("name", 0400, debugfsdir, + ref, &rcname_ops); +#endif + + ref->priv = ref->ops->alloc(&local->hw, debugfsdir); if (!ref->priv) goto fail_priv; return ref; @@ -158,29 +186,46 @@ static void rate_control_release(struct kref *kref) ctrl_ref = container_of(kref, struct rate_control_ref, kref); ctrl_ref->ops->free(ctrl_ref->priv); + +#ifdef CONFIG_MAC80211_DEBUGFS + debugfs_remove(ctrl_ref->local->debugfs.rcname); + ctrl_ref->local->debugfs.rcname = NULL; + debugfs_remove(ctrl_ref->local->debugfs.rcdir); + ctrl_ref->local->debugfs.rcdir = NULL; +#endif + ieee80211_rate_control_ops_put(ctrl_ref->ops); kfree(ctrl_ref); } -void rate_control_get_rate(struct net_device *dev, +void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct sk_buff *skb, + struct sta_info *sta, struct sk_buff *skb, struct rate_selection *sel) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct rate_control_ref *ref = local->rate_ctrl; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct sta_info *sta; + struct rate_control_ref *ref = sdata->local->rate_ctrl; + void *priv_sta = NULL; + struct ieee80211_sta *ista = NULL; int i; - rcu_read_lock(); - sta = sta_info_get(local, hdr->addr1); - sel->rate_idx = -1; sel->nonerp_idx = -1; sel->probe_idx = -1; + sel->max_rate_idx = sdata->max_ratectrl_rateidx; + + if (sta) { + ista = &sta->sta; + priv_sta = sta->rate_ctrl_priv; + } + + if (sta && sdata->force_unicast_rateidx > -1) + sel->rate_idx = sdata->force_unicast_rateidx; + else + ref->ops->get_rate(ref->priv, sband, ista, priv_sta, skb, sel); - ref->ops->get_rate(ref->priv, dev, sband, skb, sel); + if (sdata->max_ratectrl_rateidx > -1 && + sel->rate_idx > sdata->max_ratectrl_rateidx) + sel->rate_idx = sdata->max_ratectrl_rateidx; BUG_ON(sel->rate_idx < 0); @@ -191,13 +236,11 @@ void rate_control_get_rate(struct net_device *dev, if (sband->bitrates[sel->rate_idx].bitrate < rate->bitrate) break; - if (rate_supported(sta, sband->band, i) && + if (rate_supported(ista, sband->band, i) && !(rate->flags & IEEE80211_RATE_ERP_G)) sel->nonerp_idx = i; } } - - rcu_read_unlock(); } struct rate_control_ref *rate_control_get(struct rate_control_ref *ref) diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 5f18c27eb900..eb94e584d24e 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -19,77 +19,48 @@ #include "ieee80211_i.h" #include "sta_info.h" -/** - * struct rate_selection - rate selection for rate control algos - * @rate: selected transmission rate index - * @nonerp: Non-ERP rate to use instead if ERP cannot be used - * @probe: rate for probing (or -1) - * - */ -struct rate_selection { - s8 rate_idx, nonerp_idx, probe_idx; -}; - -struct rate_control_ops { - struct module *module; - const char *name; - void (*tx_status)(void *priv, struct net_device *dev, - struct sk_buff *skb); - void (*get_rate)(void *priv, struct net_device *dev, - struct ieee80211_supported_band *band, - struct sk_buff *skb, - struct rate_selection *sel); - void (*rate_init)(void *priv, void *priv_sta, - struct ieee80211_local *local, struct sta_info *sta); - void (*clear)(void *priv); - - void *(*alloc)(struct ieee80211_local *local); - void (*free)(void *priv); - void *(*alloc_sta)(void *priv, gfp_t gfp); - void (*free_sta)(void *priv, void *priv_sta); - - int (*add_attrs)(void *priv, struct kobject *kobj); - void (*remove_attrs)(void *priv, struct kobject *kobj); - void (*add_sta_debugfs)(void *priv, void *priv_sta, - struct dentry *dir); - void (*remove_sta_debugfs)(void *priv, void *priv_sta); -}; - struct rate_control_ref { + struct ieee80211_local *local; struct rate_control_ops *ops; void *priv; struct kref kref; }; -int ieee80211_rate_control_register(struct rate_control_ops *ops); -void ieee80211_rate_control_unregister(struct rate_control_ops *ops); - /* Get a reference to the rate control algorithm. If `name' is NULL, get the * first available algorithm. */ struct rate_control_ref *rate_control_alloc(const char *name, struct ieee80211_local *local); -void rate_control_get_rate(struct net_device *dev, +void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct sk_buff *skb, + struct sta_info *sta, struct sk_buff *skb, struct rate_selection *sel); struct rate_control_ref *rate_control_get(struct rate_control_ref *ref); void rate_control_put(struct rate_control_ref *ref); -static inline void rate_control_tx_status(struct net_device *dev, +static inline void rate_control_tx_status(struct ieee80211_local *local, + struct ieee80211_supported_band *sband, + struct sta_info *sta, struct sk_buff *skb) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct rate_control_ref *ref = local->rate_ctrl; + struct ieee80211_sta *ista = &sta->sta; + void *priv_sta = sta->rate_ctrl_priv; - ref->ops->tx_status(ref->priv, dev, skb); + ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); } -static inline void rate_control_rate_init(struct sta_info *sta, - struct ieee80211_local *local) +static inline void rate_control_rate_init(struct sta_info *sta) { + struct ieee80211_local *local = sta->sdata->local; struct rate_control_ref *ref = sta->rate_ctrl; - ref->ops->rate_init(ref->priv, sta->rate_ctrl_priv, local, sta); + struct ieee80211_sta *ista = &sta->sta; + void *priv_sta = sta->rate_ctrl_priv; + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + + ref->ops->rate_init(ref->priv, sband, ista, priv_sta); } @@ -100,15 +71,19 @@ static inline void rate_control_clear(struct ieee80211_local *local) } static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, + struct ieee80211_sta *sta, gfp_t gfp) { - return ref->ops->alloc_sta(ref->priv, gfp); + return ref->ops->alloc_sta(ref->priv, sta, gfp); } -static inline void rate_control_free_sta(struct rate_control_ref *ref, - void *priv) +static inline void rate_control_free_sta(struct sta_info *sta) { - ref->ops->free_sta(ref->priv, priv); + struct rate_control_ref *ref = sta->rate_ctrl; + struct ieee80211_sta *ista = &sta->sta; + void *priv_sta = sta->rate_ctrl_priv; + + ref->ops->free_sta(ref->priv, ista, priv_sta); } static inline void rate_control_add_sta_debugfs(struct sta_info *sta) @@ -130,31 +105,6 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta) #endif } -static inline int rate_supported(struct sta_info *sta, - enum ieee80211_band band, - int index) -{ - return (sta == NULL || sta->sta.supp_rates[band] & BIT(index)); -} - -static inline s8 -rate_lowest_index(struct ieee80211_local *local, - struct ieee80211_supported_band *sband, - struct sta_info *sta) -{ - int i; - - for (i = 0; i < sband->n_bitrates; i++) - if (rate_supported(sta, sband->band, i)) - return i; - - /* warn when we cannot find a rate. */ - WARN_ON(1); - - return 0; -} - - /* functions for rate control related to a device */ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, const char *name); diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index ffafc5da572e..01d64d53f3b9 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -124,7 +124,6 @@ struct rc_pid_events_file_info { * struct rc_pid_debugfs_entries - tunable parameters * * Algorithm parameters, tunable via debugfs. - * @dir: the debugfs directory for a specific phy * @target: target percentage for failed frames * @sampling_period: error sampling interval in milliseconds * @coeff_p: absolute value of the proportional coefficient @@ -143,7 +142,6 @@ struct rc_pid_events_file_info { * ordering of rates) */ struct rc_pid_debugfs_entries { - struct dentry *dir; struct dentry *target; struct dentry *sampling_period; struct dentry *coeff_p; diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index bc1c4569caa1..86eb374e3b87 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -68,18 +68,14 @@ * exhibited a worse failed frames behaviour and we'll choose the highest rate * whose failed frames behaviour is not worse than the one of the original rate * target. While at it, check that the new rate is valid. */ -static void rate_control_pid_adjust_rate(struct ieee80211_local *local, - struct sta_info *sta, int adj, +static void rate_control_pid_adjust_rate(struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, + struct rc_pid_sta_info *spinfo, int adj, struct rc_pid_rateinfo *rinfo) { - struct ieee80211_sub_if_data *sdata; - struct ieee80211_supported_band *sband; int cur_sorted, new_sorted, probe, tmp, n_bitrates, band; - struct rc_pid_sta_info *spinfo = (void *)sta->rate_ctrl_priv; int cur = spinfo->txrate_idx; - sdata = sta->sdata; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; band = sband->band; n_bitrates = sband->n_bitrates; @@ -146,13 +142,11 @@ static void rate_control_pid_normalize(struct rc_pid_info *pinfo, int l) } static void rate_control_pid_sample(struct rc_pid_info *pinfo, - struct ieee80211_local *local, - struct sta_info *sta) + struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, + struct rc_pid_sta_info *spinfo) { - struct ieee80211_sub_if_data *sdata = sta->sdata; - struct rc_pid_sta_info *spinfo = sta->rate_ctrl_priv; struct rc_pid_rateinfo *rinfo = pinfo->rinfo; - struct ieee80211_supported_band *sband; u32 pf; s32 err_avg; u32 err_prop; @@ -161,9 +155,6 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, int adj, i, j, tmp; unsigned long period; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - spinfo = sta->rate_ctrl_priv; - /* In case nothing happened during the previous control interval, turn * the sharpening factor on. */ period = (HZ * pinfo->sampling_period + 500) / 1000; @@ -179,11 +170,15 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, if (unlikely(spinfo->tx_num_xmit == 0)) pf = spinfo->last_pf; else { + /* XXX: BAD HACK!!! */ + struct sta_info *si = container_of(sta, struct sta_info, sta); + pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit; - if (ieee80211_vif_is_mesh(&sdata->vif) && pf == 100) - mesh_plink_broken(sta); + + if (ieee80211_vif_is_mesh(&si->sdata->vif) && pf == 100) + mesh_plink_broken(si); pf <<= RC_PID_ARITH_SHIFT; - sta->fail_avg = ((pf + (spinfo->last_pf << 3)) / 9) + si->fail_avg = ((pf + (spinfo->last_pf << 3)) / 9) >> RC_PID_ARITH_SHIFT; } @@ -229,43 +224,25 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, /* Change rate. */ if (adj) - rate_control_pid_adjust_rate(local, sta, adj, rinfo); + rate_control_pid_adjust_rate(sband, sta, spinfo, adj, rinfo); } -static void rate_control_pid_tx_status(void *priv, struct net_device *dev, +static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, struct sk_buff *skb) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_sub_if_data *sdata; struct rc_pid_info *pinfo = priv; - struct sta_info *sta; - struct rc_pid_sta_info *spinfo; + struct rc_pid_sta_info *spinfo = priv_sta; unsigned long period; - struct ieee80211_supported_band *sband; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - rcu_read_lock(); - - sta = sta_info_get(local, hdr->addr1); - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - - if (!sta) - goto unlock; - - spinfo = sta->rate_ctrl_priv; - - /* Don't update the state if we're not controlling the rate. */ - sdata = sta->sdata; - if (sdata->force_unicast_rateidx > -1) { - spinfo->txrate_idx = sdata->max_ratectrl_rateidx; - goto unlock; - } + if (!spinfo) + return; /* Ignore all frames that were sent with a different rate than the rate * we currently advise mac80211 to use. */ if (info->tx_rate_idx != spinfo->txrate_idx) - goto unlock; + return; spinfo->tx_num_xmit++; @@ -289,78 +266,63 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev, if (!period) period = 1; if (time_after(jiffies, spinfo->last_sample + period)) - rate_control_pid_sample(pinfo, local, sta); - - unlock: - rcu_read_unlock(); + rate_control_pid_sample(pinfo, sband, sta, spinfo); } -static void rate_control_pid_get_rate(void *priv, struct net_device *dev, - struct ieee80211_supported_band *sband, - struct sk_buff *skb, - struct rate_selection *sel) +static void +rate_control_pid_get_rate(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb, + struct rate_selection *sel) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_sub_if_data *sdata; - struct rc_pid_sta_info *spinfo; - struct sta_info *sta; + struct rc_pid_sta_info *spinfo = priv_sta; int rateidx; u16 fc; - rcu_read_lock(); - - sta = sta_info_get(local, hdr->addr1); - /* Send management frames and broadcast/multicast data using lowest * rate. */ fc = le16_to_cpu(hdr->frame_control); - if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || - is_multicast_ether_addr(hdr->addr1) || !sta) { - sel->rate_idx = rate_lowest_index(local, sband, sta); - rcu_read_unlock(); + if (!sta || !spinfo || + (fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || + is_multicast_ether_addr(hdr->addr1)) { + sel->rate_idx = rate_lowest_index(sband, sta); return; } - /* If a forced rate is in effect, select it. */ - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - spinfo = (struct rc_pid_sta_info *)sta->rate_ctrl_priv; - if (sdata->force_unicast_rateidx > -1) - spinfo->txrate_idx = sdata->force_unicast_rateidx; - rateidx = spinfo->txrate_idx; if (rateidx >= sband->n_bitrates) rateidx = sband->n_bitrates - 1; - rcu_read_unlock(); - sel->rate_idx = rateidx; #ifdef CONFIG_MAC80211_DEBUGFS - rate_control_pid_event_tx_rate( - &((struct rc_pid_sta_info *) sta->rate_ctrl_priv)->events, + rate_control_pid_event_tx_rate(&spinfo->events, rateidx, sband->bitrates[rateidx].bitrate); #endif } -static void rate_control_pid_rate_init(void *priv, void *priv_sta, - struct ieee80211_local *local, - struct sta_info *sta) +static void +rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta) { + struct rc_pid_sta_info *spinfo = priv_sta; + struct sta_info *si; + /* TODO: This routine should consider using RSSI from previous packets * as we need to have IEEE 802.1X auth succeed immediately after assoc.. * Until that method is implemented, we will use the lowest supported * rate as a workaround. */ - struct ieee80211_supported_band *sband; - struct rc_pid_sta_info *spinfo = (void *)sta->rate_ctrl_priv; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - spinfo->txrate_idx = rate_lowest_index(local, sband, sta); - sta->fail_avg = 0; + spinfo->txrate_idx = rate_lowest_index(sband, sta); + /* HACK */ + si = container_of(sta, struct sta_info, sta); + si->fail_avg = 0; } -static void *rate_control_pid_alloc(struct ieee80211_local *local) +static void *rate_control_pid_alloc(struct ieee80211_hw *hw, + struct dentry *debugfsdir) { struct rc_pid_info *pinfo; struct rc_pid_rateinfo *rinfo; @@ -371,7 +333,7 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) struct rc_pid_debugfs_entries *de; #endif - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = hw->wiphy->bands[hw->conf.channel->band]; pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC); if (!pinfo) @@ -426,30 +388,28 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) #ifdef CONFIG_MAC80211_DEBUGFS de = &pinfo->dentries; - de->dir = debugfs_create_dir("rc80211_pid", - local->hw.wiphy->debugfsdir); de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR, - de->dir, &pinfo->target); + debugfsdir, &pinfo->target); de->sampling_period = debugfs_create_u32("sampling_period", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sampling_period); de->coeff_p = debugfs_create_u32("coeff_p", S_IRUSR | S_IWUSR, - de->dir, &pinfo->coeff_p); + debugfsdir, &pinfo->coeff_p); de->coeff_i = debugfs_create_u32("coeff_i", S_IRUSR | S_IWUSR, - de->dir, &pinfo->coeff_i); + debugfsdir, &pinfo->coeff_i); de->coeff_d = debugfs_create_u32("coeff_d", S_IRUSR | S_IWUSR, - de->dir, &pinfo->coeff_d); + debugfsdir, &pinfo->coeff_d); de->smoothing_shift = debugfs_create_u32("smoothing_shift", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->smoothing_shift); de->sharpen_factor = debugfs_create_u32("sharpen_factor", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sharpen_factor); de->sharpen_duration = debugfs_create_u32("sharpen_duration", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sharpen_duration); de->norm_offset = debugfs_create_u32("norm_offset", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->norm_offset); #endif @@ -471,7 +431,6 @@ static void rate_control_pid_free(void *priv) debugfs_remove(de->coeff_p); debugfs_remove(de->sampling_period); debugfs_remove(de->target); - debugfs_remove(de->dir); #endif kfree(pinfo->rinfo); @@ -482,7 +441,8 @@ static void rate_control_pid_clear(void *priv) { } -static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp) +static void *rate_control_pid_alloc_sta(void *priv, struct ieee80211_sta *sta, + gfp_t gfp) { struct rc_pid_sta_info *spinfo; @@ -500,10 +460,10 @@ static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp) return spinfo; } -static void rate_control_pid_free_sta(void *priv, void *priv_sta) +static void rate_control_pid_free_sta(void *priv, struct ieee80211_sta *sta, + void *priv_sta) { - struct rc_pid_sta_info *spinfo = priv_sta; - kfree(spinfo); + kfree(priv_sta); } static struct rate_control_ops mac80211_rcpid = { diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index d9774ac2e0f7..9b72d15bc8dc 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -93,8 +93,7 @@ static int sta_info_hash_del(struct ieee80211_local *local, } /* protected by RCU */ -static struct sta_info *__sta_info_find(struct ieee80211_local *local, - const u8 *addr) +struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr) { struct sta_info *sta; @@ -107,12 +106,6 @@ static struct sta_info *__sta_info_find(struct ieee80211_local *local, return sta; } -struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr) -{ - return __sta_info_find(local, addr); -} -EXPORT_SYMBOL(sta_info_get); - struct sta_info *sta_info_get_by_idx(struct ieee80211_local *local, int idx, struct net_device *dev) { @@ -146,7 +139,7 @@ static void __sta_info_free(struct ieee80211_local *local, { DECLARE_MAC_BUF(mbuf); - rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv); + rate_control_free_sta(sta); rate_control_put(sta->rate_ctrl); #ifdef CONFIG_MAC80211_VERBOSE_DEBUG @@ -244,7 +237,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->rate_ctrl = rate_control_get(local->rate_ctrl); sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, - gfp); + &sta->sta, gfp); if (!sta->rate_ctrl_priv) { rate_control_put(sta->rate_ctrl); kfree(sta); @@ -308,7 +301,7 @@ int sta_info_insert(struct sta_info *sta) spin_lock_irqsave(&local->sta_lock, flags); /* check if STA exists already */ - if (__sta_info_find(local, sta->sta.addr)) { + if (sta_info_get(local, sta->sta.addr)) { spin_unlock_irqrestore(&local->sta_lock, flags); err = -EEXIST; goto out_free; @@ -834,7 +827,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, const u8 *addr) { - struct sta_info *sta = __sta_info_find(hw_to_local(hw), addr); + struct sta_info *sta = sta_info_get(hw_to_local(hw), addr); if (!sta) return NULL; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index daedfa9e1c63..c3f436964621 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -416,7 +416,7 @@ static inline u32 get_sta_flags(struct sta_info *sta) /* * Get a STA info, must have be under RCU read lock. */ -struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr); +struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr); /* * Get STA info by index, BROKEN! */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 00d96e63dce9..0cc2e23f082c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -446,7 +446,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) sband = tx->local->hw.wiphy->bands[tx->channel->band]; if (likely(tx->rate_idx < 0)) { - rate_control_get_rate(tx->dev, sband, tx->skb, &rsel); + rate_control_get_rate(tx->sdata, sband, tx->sta, + tx->skb, &rsel); if (tx->sta) tx->sta->last_txrate_idx = rsel.rate_idx; tx->rate_idx = rsel.rate_idx; @@ -1955,7 +1956,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, skb->do_not_encrypt = 1; info->band = band; - rate_control_get_rate(local->mdev, sband, skb, &rsel); + rate_control_get_rate(sdata, sband, NULL, skb, &rsel); if (unlikely(rsel.rate_idx < 0)) { if (net_ratelimit()) { -- cgit v1.2.1 From 4dfe51e10047a60e82734860cec0d9f660b102fc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 19 Sep 2008 05:10:34 +0200 Subject: mac80211: probe with correct SSID While associated, we should probe with the SSID we're associated to, not the scan SSID. Signed-off-by: Johannes Berg Acked-by: Tomas Winkler Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 109c3a7e63ad..52a648133018 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -942,8 +942,8 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, disassoc = 1; } else ieee80211_send_probe_req(sdata, ifsta->bssid, - local->scan_ssid, - local->scan_ssid_len); + ifsta->ssid, + ifsta->ssid_len); ifsta->flags ^= IEEE80211_STA_PROBEREQ_POLL; } else { ifsta->flags &= ~IEEE80211_STA_PROBEREQ_POLL; -- cgit v1.2.1 From 4492bea656b70dad6a9ae7b59b1430fa38ba3345 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 22 Sep 2008 17:10:10 +0300 Subject: mac80211: fix led behavior in IBSS This patch fixes the led behavior in IBSS. After we joined an IBSS cell we need to inform the led that we got associated. Although there is no 802.11 association in IBSS mode, the semantic of "There is a link" is relevant. This allows the led to blink in IBSS mode (at least this solves a bug for iwlwifi). Signed-off-by: Emmanuel Grumbach Reviewed-by: Tomas Winkler Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 52a648133018..e859a0ab6162 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1452,6 +1452,8 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, ifsta->state = IEEE80211_STA_MLME_IBSS_JOINED; mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); + ieee80211_led_assoc(local, true); + memset(&wrqu, 0, sizeof(wrqu)); memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN); wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); -- cgit v1.2.1 From 638af07386972861272ed9d0cff01cad528fdceb Mon Sep 17 00:00:00 2001 From: Denis ChengRq Date: Tue, 23 Sep 2008 02:35:37 +0800 Subject: wireless: a global static to local static improvement There are two improvements in this simple patch: 1. wiphy_counter is a static var only used in one function, so can use local static instead of global static; 2. wiphy_counter wrap handling killed one comparision; Signed-off-by: Denis ChengRq Signed-off-by: John W. Linville --- net/wireless/core.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index d6940085d59c..5cadbeb76a14 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -34,7 +34,6 @@ MODULE_DESCRIPTION("wireless configuration support"); * often because we need to do it for each command */ LIST_HEAD(cfg80211_drv_list); DEFINE_MUTEX(cfg80211_drv_mutex); -static int wiphy_counter; /* for debugfs */ static struct dentry *ieee80211_debugfs_dir; @@ -206,6 +205,8 @@ out_unlock: struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) { + static int wiphy_counter; + struct cfg80211_registered_device *drv; int alloc_size; @@ -222,21 +223,18 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) mutex_lock(&cfg80211_drv_mutex); - drv->idx = wiphy_counter; - - /* now increase counter for the next device unless - * it has wrapped previously */ - if (wiphy_counter >= 0) - wiphy_counter++; - - mutex_unlock(&cfg80211_drv_mutex); + drv->idx = wiphy_counter++; if (unlikely(drv->idx < 0)) { + wiphy_counter--; + mutex_unlock(&cfg80211_drv_mutex); /* ugh, wrapped! */ kfree(drv); return NULL; } + mutex_unlock(&cfg80211_drv_mutex); + /* give it a proper name */ snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE, PHY_NAME "%d", drv->idx); -- cgit v1.2.1 From 72029fe85d8d060b3f966f2dbc36b3c75b5a6532 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Sep 2008 16:22:23 -0500 Subject: 9p: implement proper trans module refcounting and unregistration 9p trans modules aren't refcounted nor were they unregistered properly. Fix it. * Add 9p_trans_module->owner and reference the module on each trans instance creation and put it on destruction. * Protect v9fs_trans_list with a spinlock. This isn't strictly necessary as the list is manipulated only during module loading / unloading but it's a good idea to make the API safe. * Unregister trans modules when the corresponding module is being unloaded. * While at it, kill unnecessary EXPORT_SYMBOL on p9_trans_fd_init(). Signed-off-by: Tejun Heo Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 10 ++++-- net/9p/mod.c | 92 +++++++++++++++++++++++++++++++++++++-------------- net/9p/trans_fd.c | 11 +++++- net/9p/trans_virtio.c | 2 ++ 4 files changed, 87 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 2ffe40cf2f01..10e320307ec0 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -75,7 +75,6 @@ static int parse_opts(char *opts, struct p9_client *clnt) int option; int ret = 0; - clnt->trans_mod = v9fs_default_trans(); clnt->dotu = 1; clnt->msize = 8192; @@ -108,7 +107,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) clnt->msize = option; break; case Opt_trans: - clnt->trans_mod = v9fs_match_trans(&args[0]); + clnt->trans_mod = v9fs_get_trans_by_name(&args[0]); break; case Opt_legacy: clnt->dotu = 0; @@ -117,6 +116,10 @@ static int parse_opts(char *opts, struct p9_client *clnt) continue; } } + + if (!clnt->trans_mod) + clnt->trans_mod = v9fs_get_default_trans(); + kfree(options); return ret; } @@ -150,6 +153,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (!clnt) return ERR_PTR(-ENOMEM); + clnt->trans_mod = NULL; clnt->trans = NULL; spin_lock_init(&clnt->lock); INIT_LIST_HEAD(&clnt->fidlist); @@ -235,6 +239,8 @@ void p9_client_destroy(struct p9_client *clnt) clnt->trans = NULL; } + v9fs_put_trans(clnt->trans_mod); + list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) p9_fid_destroy(fid); diff --git a/net/9p/mod.c b/net/9p/mod.c index bdee1fb7cc62..1084feb24cb0 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_NET_9P_DEBUG unsigned int p9_debug_level = 0; /* feature-rific global debug level */ @@ -44,8 +45,8 @@ MODULE_PARM_DESC(debug, "9P debugging level"); * */ +static DEFINE_SPINLOCK(v9fs_trans_lock); static LIST_HEAD(v9fs_trans_list); -static struct p9_trans_module *v9fs_default_transport; /** * v9fs_register_trans - register a new transport with 9p @@ -54,48 +55,87 @@ static struct p9_trans_module *v9fs_default_transport; */ void v9fs_register_trans(struct p9_trans_module *m) { + spin_lock(&v9fs_trans_lock); list_add_tail(&m->list, &v9fs_trans_list); - if (m->def) - v9fs_default_transport = m; + spin_unlock(&v9fs_trans_lock); } EXPORT_SYMBOL(v9fs_register_trans); /** - * v9fs_match_trans - match transport versus registered transports + * v9fs_unregister_trans - unregister a 9p transport + * @m: the transport to remove + * + */ +void v9fs_unregister_trans(struct p9_trans_module *m) +{ + spin_lock(&v9fs_trans_lock); + list_del_init(&m->list); + spin_unlock(&v9fs_trans_lock); +} +EXPORT_SYMBOL(v9fs_unregister_trans); + +/** + * v9fs_get_trans_by_name - get transport with the matching name * @name: string identifying transport * */ -struct p9_trans_module *v9fs_match_trans(const substring_t *name) +struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name) { - struct list_head *p; - struct p9_trans_module *t = NULL; - - list_for_each(p, &v9fs_trans_list) { - t = list_entry(p, struct p9_trans_module, list); - if (strncmp(t->name, name->from, name->to-name->from) == 0) - return t; - } - return NULL; + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (strncmp(t->name, name->from, name->to-name->from) == 0 && + try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; } -EXPORT_SYMBOL(v9fs_match_trans); +EXPORT_SYMBOL(v9fs_get_trans_by_name); /** - * v9fs_default_trans - returns pointer to default transport + * v9fs_get_default_trans - get the default transport * */ -struct p9_trans_module *v9fs_default_trans(void) +struct p9_trans_module *v9fs_get_default_trans(void) { - if (v9fs_default_transport) - return v9fs_default_transport; - else if (!list_empty(&v9fs_trans_list)) - return list_first_entry(&v9fs_trans_list, - struct p9_trans_module, list); - else - return NULL; + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (t->def && try_module_get(t->owner)) { + found = t; + break; + } + + if (!found) + list_for_each_entry(t, &v9fs_trans_list, list) + if (try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; } -EXPORT_SYMBOL(v9fs_default_trans); +EXPORT_SYMBOL(v9fs_get_default_trans); +/** + * v9fs_put_trans - put trans + * @m: transport to put + * + */ +void v9fs_put_trans(struct p9_trans_module *m) +{ + if (m) + module_put(m->owner); +} /** * v9fs_init - Initialize module @@ -120,6 +160,8 @@ static int __init init_p9(void) static void __exit exit_p9(void) { printk(KERN_INFO "Unloading 9P2000 support\n"); + + p9_trans_fd_exit(); } module_init(init_p9) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index cdf137af7adc..6a32ffdb9429 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1629,6 +1629,7 @@ static struct p9_trans_module p9_tcp_trans = { .maxsize = MAX_SOCK_BUF, .def = 1, .create = p9_trans_create_tcp, + .owner = THIS_MODULE, }; static struct p9_trans_module p9_unix_trans = { @@ -1636,6 +1637,7 @@ static struct p9_trans_module p9_unix_trans = { .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_trans_create_unix, + .owner = THIS_MODULE, }; static struct p9_trans_module p9_fd_trans = { @@ -1643,6 +1645,7 @@ static struct p9_trans_module p9_fd_trans = { .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_trans_create_fd, + .owner = THIS_MODULE, }; int p9_trans_fd_init(void) @@ -1659,4 +1662,10 @@ int p9_trans_fd_init(void) return 0; } -EXPORT_SYMBOL(p9_trans_fd_init); + +void p9_trans_fd_exit(void) +{ + v9fs_unregister_trans(&p9_tcp_trans); + v9fs_unregister_trans(&p9_unix_trans); + v9fs_unregister_trans(&p9_fd_trans); +} diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 42adc052b149..94912e077a55 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -528,6 +528,7 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .maxsize = PAGE_SIZE*16, .def = 0, + .owner = THIS_MODULE, }; /* The standard init function */ @@ -545,6 +546,7 @@ static int __init p9_virtio_init(void) static void __exit p9_virtio_cleanup(void) { unregister_virtio_driver(&p9_virtio_drv); + v9fs_unregister_trans(&p9_virtio_trans); } module_init(p9_virtio_init); -- cgit v1.2.1 From 7dc5d24be06a5ed874af035d52a083a7b61ef1bd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Sep 2008 16:22:23 -0500 Subject: 9p-trans_fd: fix trans_fd::p9_conn_destroy() p9_conn_destroy() first kills all current requests by calling p9_conn_cancel(), then waits for the request list to be cleared by waiting on p9_conn->equeue. After that, polling is stopped and the trans is destroyed. This sequence has a few problems. * Read and write works were never cancelled and the p9_conn can be destroyed while the works are running as r/w works remove requests from the list and dereference the p9_conn from them. * The list emptiness wait using p9_conn->equeue wouldn't trigger because p9_conn_cancel() always clears all the lists and the only way the wait can be triggered is to have another task to issue a request between the slim window between p9_conn_cancel() and the wait, which isn't safe under the current implementation with or without the wait. This patch fixes the problem by first stopping poll, which can schedule r/w works, first and cancle r/w works which guarantees that r/w works are not and will not run from that point and then calling p9_conn_cancel() and do the rest of destruction. Signed-off-by: Tejun Heo Signed-off-by: Eric Van Hensbergen --- net/9p/trans_fd.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 6a32ffdb9429..ee0d151da31a 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -151,7 +151,6 @@ struct p9_mux_poll_task { * @trans: reference to transport instance for this connection * @tagpool: id accounting for transactions * @err: error state - * @equeue: event wait_q (?) * @req_list: accounting for requests which have been sent * @unsent_req_list: accounting for requests that haven't been sent * @rcall: current response &p9_fcall structure @@ -178,7 +177,6 @@ struct p9_conn { struct p9_trans *trans; struct p9_idpool *tagpool; int err; - wait_queue_head_t equeue; struct list_head req_list; struct list_head unsent_req_list; struct p9_fcall *rcall; @@ -430,7 +428,6 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans) } m->err = 0; - init_waitqueue_head(&m->equeue); INIT_LIST_HEAD(&m->req_list); INIT_LIST_HEAD(&m->unsent_req_list); m->rcall = NULL; @@ -483,18 +480,13 @@ static void p9_conn_destroy(struct p9_conn *m) { P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m, m->mux_list.prev, m->mux_list.next); - p9_conn_cancel(m, -ECONNRESET); - - if (!list_empty(&m->req_list)) { - /* wait until all processes waiting on this session exit */ - P9_DPRINTK(P9_DEBUG_MUX, - "mux %p waiting for empty request queue\n", m); - wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p request queue empty: %d\n", m, - list_empty(&m->req_list)); - } p9_mux_poll_stop(m); + cancel_work_sync(&m->rq); + cancel_work_sync(&m->wq); + + p9_conn_cancel(m, -ECONNRESET); + m->trans = NULL; p9_idpool_destroy(m->tagpool); kfree(m); @@ -840,8 +832,6 @@ static void p9_read_work(struct work_struct *work) (*req->cb) (req, req->cba); else kfree(req->rcall); - - wake_up(&m->equeue); } } else { if (err >= 0 && rcall->id != P9_RFLUSH) @@ -984,8 +974,6 @@ static void p9_mux_flush_cb(struct p9_req *freq, void *a) (*req->cb) (req, req->cba); else kfree(req->rcall); - - wake_up(&m->equeue); } kfree(freq->tcall); @@ -1191,8 +1179,6 @@ void p9_conn_cancel(struct p9_conn *m, int err) else kfree(req->rcall); } - - wake_up(&m->equeue); } /** -- cgit v1.2.1 From 571ffeafffbfdd0b8f2f9d3b991028797ec87e42 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Sep 2008 16:22:23 -0500 Subject: 9p-trans_fd: clean up p9_conn_create() * Use kzalloc() to allocate p9_conn and remove 0/NULL initializations. * Clean up error return paths. Signed-off-by: Tejun Heo Signed-off-by: Eric Van Hensbergen --- net/9p/trans_fd.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index ee0d151da31a..6c88e8983750 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -407,11 +407,11 @@ static void p9_mux_poll_stop(struct p9_conn *m) static struct p9_conn *p9_conn_create(struct p9_trans *trans) { int i, n; - struct p9_conn *m, *mtmp; + struct p9_conn *m; P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans, trans->msize); - m = kmalloc(sizeof(struct p9_conn), GFP_KERNEL); + m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); if (!m) return ERR_PTR(-ENOMEM); @@ -422,24 +422,14 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans) m->trans = trans; m->tagpool = p9_idpool_create(); if (IS_ERR(m->tagpool)) { - mtmp = ERR_PTR(-ENOMEM); kfree(m); - return mtmp; + return ERR_PTR(-ENOMEM); } - m->err = 0; INIT_LIST_HEAD(&m->req_list); INIT_LIST_HEAD(&m->unsent_req_list); - m->rcall = NULL; - m->rpos = 0; - m->rbuf = NULL; - m->wpos = m->wsize = 0; - m->wbuf = NULL; INIT_WORK(&m->rq, p9_read_work); INIT_WORK(&m->wq, p9_write_work); - m->wsched = 0; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - m->poll_task = NULL; n = p9_mux_poll_start(m); if (n) { kfree(m); @@ -460,10 +450,8 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans) for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { if (IS_ERR(m->poll_waddr[i])) { p9_mux_poll_stop(m); - mtmp = (void *)m->poll_waddr; /* the error code */ kfree(m); - m = mtmp; - break; + return (void *)m->poll_waddr; /* the error code */ } } -- cgit v1.2.1 From ec3c68f232f6d98b4596c05c1c7551b44c617c5f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Sep 2008 16:22:23 -0500 Subject: 9p-trans_fd: don't do fs segment mangling in p9_fd_poll() p9_fd_poll() is never called with user pointers and f_op->poll() doesn't expect its arguments to be from userland. There's no need to set kernel ds before calling f_op->poll() from p9_fd_poll(). Remove it. Signed-off-by: Tejun Heo Signed-off-by: Eric Van Hensbergen --- net/9p/trans_fd.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 6c88e8983750..f6d4af16cb19 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1344,7 +1344,6 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt) { int ret, n; struct p9_trans_fd *ts = NULL; - mm_segment_t oldfs; if (trans && trans->status == Connected) ts = trans->priv; @@ -1358,24 +1357,17 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt) if (!ts->wr->f_op || !ts->wr->f_op->poll) return -EIO; - oldfs = get_fs(); - set_fs(get_ds()); - ret = ts->rd->f_op->poll(ts->rd, pt); if (ret < 0) - goto end; + return ret; if (ts->rd != ts->wr) { n = ts->wr->f_op->poll(ts->wr, pt); - if (n < 0) { - ret = n; - goto end; - } + if (n < 0) + return n; ret = (ret & ~POLLOUT) | (n & ~POLLIN); } -end: - set_fs(oldfs); return ret; } -- cgit v1.2.1 From 206ca50de77033c6cc17d0e14fbb12d119a67b01 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Sep 2008 16:22:23 -0500 Subject: 9p-trans_fd: fix and clean up module init/exit paths trans_fd leaked p9_mux_wq on module unload. Fix it. While at it, collapse p9_mux_global_init() into p9_trans_fd_init(). It's easier to follow this way and the global poll_tasks array is about to removed anyway. Signed-off-by: Tejun Heo Signed-off-by: Eric Van Hensbergen --- net/9p/trans_fd.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index f6d4af16cb19..0b4eb5f78356 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -238,22 +238,6 @@ static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, static void p9_conn_cancel(struct p9_conn *m, int err); -static int p9_mux_global_init(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) - p9_mux_poll_tasks[i].task = NULL; - - p9_mux_wq = create_workqueue("v9fs"); - if (!p9_mux_wq) { - printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); - return -ENOMEM; - } - - return 0; -} - static u16 p9_mux_get_tag(struct p9_conn *m) { int tag; @@ -1616,10 +1600,15 @@ static struct p9_trans_module p9_fd_trans = { int p9_trans_fd_init(void) { - int ret = p9_mux_global_init(); - if (ret) { - printk(KERN_WARNING "9p: starting mux failed\n"); - return ret; + int i; + + for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) + p9_mux_poll_tasks[i].task = NULL; + + p9_mux_wq = create_workqueue("v9fs"); + if (!p9_mux_wq) { + printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); + return -ENOMEM; } v9fs_register_trans(&p9_tcp_trans); @@ -1634,4 +1623,6 @@ void p9_trans_fd_exit(void) v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); + + destroy_workqueue(p9_mux_wq); } -- cgit v1.2.1 From 620678244bc7b83287e2e283ed4fe6b959e94b7d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 24 Sep 2008 16:22:22 -0500 Subject: 9p: introduce missing kfree Error handling code following a kmalloc should free the allocated data. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,l; position p1,p2; expression *ptr != NULL; @@ ( if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S | x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S ) <... when != x when != if (...) { <+...x...+> } x->f = E ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: Eric Van Hensbergen Signed-off-by: Andrew Morton --- net/9p/trans_fd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 0b4eb5f78356..d652baf5ff91 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -870,8 +870,10 @@ static struct p9_req *p9_send_request(struct p9_conn *m, else n = p9_mux_get_tag(m); - if (n < 0) + if (n < 0) { + kfree(req); return ERR_PTR(-ENOMEM); + } p9_set_tag(tc, n); -- cgit v1.2.1 From 16ec4700127d479143c77fd9128dfa17ab572963 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Wed, 24 Sep 2008 16:22:22 -0500 Subject: 9p: fix put_data error handling Abhishek Kulkarni pointed out an inconsistency in the way errors are returned from p9_put_data. On deeper exploration it seems the error handling for this path was completely wrong. This patch adds checks for allocation problems and propagates errors correctly. Signed-off-by: Eric Van Hensbergen --- net/9p/conv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/conv.c b/net/9p/conv.c index 44547201f5bc..5ad3a3bd73b2 100644 --- a/net/9p/conv.c +++ b/net/9p/conv.c @@ -451,8 +451,10 @@ p9_put_data(struct cbuf *bufp, const char *data, int count, unsigned char **pdata) { *pdata = buf_alloc(bufp, count); + if (*pdata == NULL) + return -ENOMEM; memmove(*pdata, data, count); - return count; + return 0; } static int @@ -460,6 +462,8 @@ p9_put_user_data(struct cbuf *bufp, const char __user *data, int count, unsigned char **pdata) { *pdata = buf_alloc(bufp, count); + if (*pdata == NULL) + return -ENOMEM; return copy_from_user(*pdata, data, count); } -- cgit v1.2.1 From 8ca31ce52a5cfd03b960fd81a49197ae85d25347 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Wed, 24 Sep 2008 15:53:39 -0700 Subject: netfilter: ip6t_{hbh,dst}: Rejects not-strict mode on rule insertion The current code ignores rules for internal options in HBH/DST options header in packet processing if 'Not strict' mode is specified (which is not implemented). Clearly it is not expected by user. Kernel should reject HBH/DST rule insertion with 'Not strict' mode in the first place. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6t_hbh.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 62e39ace0588..26654b26d7fa 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -97,8 +97,6 @@ hbh_mt6(const struct sk_buff *skb, const struct net_device *in, hdrlen -= 2; if (!(optinfo->flags & IP6T_OPTS_OPTS)) { return ret; - } else if (optinfo->flags & IP6T_OPTS_NSTRICT) { - pr_debug("Not strict - not implemented"); } else { pr_debug("Strict "); pr_debug("#%d ", optinfo->optsnr); @@ -177,6 +175,12 @@ hbh_mt6_check(const char *tablename, const void *entry, pr_debug("ip6t_opts: unknown flags %X\n", optsinfo->invflags); return false; } + + if (optsinfo->flags & IP6T_OPTS_NSTRICT) { + pr_debug("ip6t_opts: Not strict - not implemented"); + return false; + } + return true; } -- cgit v1.2.1 From d01dbeb6af7a0848063033f73c3d146fec7451f3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 30 Sep 2008 02:03:19 -0700 Subject: ipsec: Fix pskb_expand_head corruption in xfrm_state_check_space We're never supposed to shrink the headroom or tailroom. In fact, shrinking the headroom is a fatal action. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_output.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index ac25b4c0e982..dc50f1e71f76 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -27,10 +27,14 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) - skb_headroom(skb); int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); - if (nhead > 0 || ntail > 0) - return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); - - return 0; + if (nhead <= 0) { + if (ntail <= 0) + return 0; + nhead = 0; + } else if (ntail < 0) + ntail = 0; + + return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); } static int xfrm_output_one(struct sk_buff *skb, int err) -- cgit v1.2.1 From cf04a4c764cd3e651a64b3e667bb6a673ead99e1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 30 Sep 2008 02:22:14 -0700 Subject: netdev: use const for some name functions dev_change_name and netdev_drivername should use const char on parameters that are read-only input values. The strcpy to newname is not needed since newname is not used later in function. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index a90737fe2472..64f0d5b7cdfc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -890,7 +890,7 @@ int dev_alloc_name(struct net_device *dev, const char *name) * Change name of a device, can pass format strings "eth%d". * for wildcarding. */ -int dev_change_name(struct net_device *dev, char *newname) +int dev_change_name(struct net_device *dev, const char *newname) { char oldname[IFNAMSIZ]; int err = 0; @@ -916,7 +916,6 @@ int dev_change_name(struct net_device *dev, char *newname) err = dev_alloc_name(dev, newname); if (err < 0) return err; - strcpy(newname, dev->name); } else if (__dev_get_by_name(net, newname)) return -EEXIST; @@ -4754,10 +4753,10 @@ err_name: return -ENOMEM; } -char *netdev_drivername(struct net_device *dev, char *buffer, int len) +char *netdev_drivername(const struct net_device *dev, char *buffer, int len) { - struct device_driver *driver; - struct device *parent; + const struct device_driver *driver; + const struct device *parent; if (len <= 0 || !buffer) return buffer; -- cgit v1.2.1 From f0db275a81ef184293ca7ef3646fe065b336efb7 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 30 Sep 2008 02:23:58 -0700 Subject: netdev: docbook comment update (revised) Add more docbook comments to network device functions and cleanup the comments. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 64f0d5b7cdfc..2cc258b7acce 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -956,6 +956,7 @@ rollback: * dev_set_alias - change ifalias of a device * @dev: device * @alias: name up to IFALIASZ + * @len: limit of bytes to copy from info * * Set ifalias for a device, */ @@ -3330,6 +3331,12 @@ static void dev_addr_discard(struct net_device *dev) netif_addr_unlock_bh(dev); } +/** + * dev_get_flags - get flags reported to userspace + * @dev: device + * + * Get the combination of flag bits exported through APIs to userspace. + */ unsigned dev_get_flags(const struct net_device *dev) { unsigned flags; @@ -3354,6 +3361,14 @@ unsigned dev_get_flags(const struct net_device *dev) return flags; } +/** + * dev_change_flags - change device settings + * @dev: device + * @flags: device state flags + * + * Change settings on device based state flags. The flags are + * in the userspace exported format. + */ int dev_change_flags(struct net_device *dev, unsigned flags) { int ret, changes; @@ -3423,6 +3438,13 @@ int dev_change_flags(struct net_device *dev, unsigned flags) return ret; } +/** + * dev_set_mtu - Change maximum transfer unit + * @dev: device + * @new_mtu: new transfer unit + * + * Change the maximum transfer size of the network device. + */ int dev_set_mtu(struct net_device *dev, int new_mtu) { int err; @@ -3447,6 +3469,13 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) return err; } +/** + * dev_set_mac_address - Change Media Access Control Address + * @dev: device + * @sa: new address + * + * Change the hardware (MAC) address of the device + */ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) { int err; @@ -4350,7 +4379,12 @@ void free_netdev(struct net_device *dev) put_device(&dev->dev); } -/* Synchronize with packet receive processing. */ +/** + * synchronize_net - Synchronize with packet receive processing + * + * Wait for packets currently being received to be done. + * Does not block later packets from starting. + */ void synchronize_net(void) { might_sleep(); @@ -4652,7 +4686,7 @@ netdev_dma_event(struct dma_client *client, struct dma_chan *chan, } /** - * netdev_dma_regiser - register the networking subsystem as a DMA client + * netdev_dma_register - register the networking subsystem as a DMA client */ static int __init netdev_dma_register(void) { @@ -4753,6 +4787,14 @@ err_name: return -ENOMEM; } +/** + * netdev_drivername - network driver for the device + * @dev: network device + * @buffer: buffer for resulting name + * @len: size of buffer + * + * Determine network driver for device. + */ char *netdev_drivername(const struct net_device *dev, char *buffer, int len) { const struct device_driver *driver; -- cgit v1.2.1 From 8980713b97393b21a50d11408a22d2caa87d016a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 30 Sep 2008 02:51:18 -0700 Subject: Phonet: Netlink factorization and cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pn_netlink.c | 91 +++++++++++++++++++------------------------------ 1 file changed, 35 insertions(+), 56 deletions(-) (limited to 'net') diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index b1ea19a230dd..b1770d66bc8d 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -54,11 +54,16 @@ errout: rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); } -static int newaddr_doit(struct sk_buff *skb, struct nlmsghdr *nlm, void *attr) +static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = { + [IFA_LOCAL] = { .type = NLA_U8 }, +}; + +static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) { - struct rtattr **rta = attr; - struct ifaddrmsg *ifm = NLMSG_DATA(nlm); + struct net *net = sock_net(skb->sk); + struct nlattr *tb[IFA_MAX+1]; struct net_device *dev; + struct ifaddrmsg *ifm; int err; u8 pnaddr; @@ -67,52 +72,28 @@ static int newaddr_doit(struct sk_buff *skb, struct nlmsghdr *nlm, void *attr) ASSERT_RTNL(); - if (rta[IFA_LOCAL - 1] == NULL) - return -EINVAL; - - dev = __dev_get_by_index(&init_net, ifm->ifa_index); - if (dev == NULL) - return -ENODEV; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy); + if (err < 0) + return err; - if (ifm->ifa_prefixlen > 0) + ifm = nlmsg_data(nlh); + if (tb[IFA_LOCAL] == NULL) return -EINVAL; - - memcpy(&pnaddr, RTA_DATA(rta[IFA_LOCAL - 1]), 1); - - err = phonet_address_add(dev, pnaddr); - if (!err) - rtmsg_notify(RTM_NEWADDR, dev, pnaddr); - return err; -} - -static int deladdr_doit(struct sk_buff *skb, struct nlmsghdr *nlm, void *attr) -{ - struct rtattr **rta = attr; - struct ifaddrmsg *ifm = NLMSG_DATA(nlm); - struct net_device *dev; - int err; - u8 pnaddr; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - ASSERT_RTNL(); - - if (rta[IFA_LOCAL - 1] == NULL) + pnaddr = nla_get_u8(tb[IFA_LOCAL]); + if (pnaddr & 3) + /* Phonet addresses only have 6 high-order bits */ return -EINVAL; - dev = __dev_get_by_index(&init_net, ifm->ifa_index); + dev = __dev_get_by_index(net, ifm->ifa_index); if (dev == NULL) return -ENODEV; - if (ifm->ifa_prefixlen > 0) - return -EADDRNOTAVAIL; - - memcpy(&pnaddr, RTA_DATA(rta[IFA_LOCAL - 1]), 1); - - err = phonet_address_del(dev, pnaddr); + if (nlh->nlmsg_type == RTM_NEWADDR) + err = phonet_address_add(dev, pnaddr); + else + err = phonet_address_del(dev, pnaddr); if (!err) - rtmsg_notify(RTM_DELADDR, dev, pnaddr); + rtmsg_notify(nlh->nlmsg_type, dev, pnaddr); return err; } @@ -121,25 +102,23 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - unsigned int orig_len = skb->len; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct ifaddrmsg)); - ifm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), 0); + if (nlh == NULL) + return -EMSGSIZE; + + ifm = nlmsg_data(nlh); ifm->ifa_family = AF_PHONET; ifm->ifa_prefixlen = 0; ifm->ifa_flags = IFA_F_PERMANENT; - ifm->ifa_scope = RT_SCOPE_HOST; + ifm->ifa_scope = RT_SCOPE_LINK; ifm->ifa_index = dev->ifindex; - RTA_PUT(skb, IFA_LOCAL, 1, &addr); - nlh->nlmsg_len = skb->len - orig_len; - - return 0; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, orig_len); + NLA_PUT_U8(skb, IFA_LOCAL, addr); + return nlmsg_end(skb, nlh); - return -1; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) @@ -180,7 +159,7 @@ out: void __init phonet_netlink_register(void) { - rtnl_register(PF_PHONET, RTM_NEWADDR, newaddr_doit, NULL); - rtnl_register(PF_PHONET, RTM_DELADDR, deladdr_doit, NULL); + rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL); + rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL); rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit); } -- cgit v1.2.1 From 8b122efd13a227d35d5ca242561770db1b5e3658 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 30 Sep 2008 03:03:35 -0700 Subject: iucv: Fix mismerge again. fb65a7c091529bfffb1262515252c0d0f6241c5c ("iucv: Fix bad merging.") fixed a merge error, but in a wrong way. We now end up with the bug below. This patch corrects the mismerge like it was intended. BUG: scheduling while atomic: swapper/1/0x00000000 Modules linked in: CPU: 1 Not tainted 2.6.27-rc7-00094-gc0f4d6d #9 Process swapper (pid: 1, task: 000000003fe7d988, ksp: 000000003fe838c0) 0000000000000000 000000003fe839b8 0000000000000002 0000000000000000 000000003fe83a58 000000003fe839d0 000000003fe839d0 0000000000390de6 000000000058acd8 00000000000000d0 000000003fe7dcd8 0000000000000000 000000000000000c 000000000000000d 0000000000000000 000000003fe83a28 000000000039c5b8 0000000000015e5e 000000003fe839b8 000000003fe83a00 Call Trace: ([<0000000000015d6a>] show_trace+0xe6/0x134) [<0000000000039656>] __schedule_bug+0xa2/0xa8 [<0000000000391744>] schedule+0x49c/0x910 [<0000000000391f64>] schedule_timeout+0xc4/0x114 [<00000000003910d4>] wait_for_common+0xe8/0x1b4 [<00000000000549ae>] call_usermodehelper_exec+0xa6/0xec [<00000000001af7b8>] kobject_uevent_env+0x418/0x438 [<00000000001d08fc>] bus_add_driver+0x1e4/0x298 [<00000000001d1ee4>] driver_register+0x90/0x18c [<0000000000566848>] netiucv_init+0x168/0x2c8 [<00000000000120be>] do_one_initcall+0x3e/0x17c [<000000000054a31a>] kernel_init+0x1ce/0x248 [<000000000001a97a>] kernel_thread_starter+0x6/0xc [<000000000001a974>] kernel_thread_starter+0x0/0xc iucv: NETIUCV driver initialized initcall netiucv_init+0x0/0x2c8 returned with preemption imbalance Signed-off-by: Heiko Carstens Signed-off-by: David S. Miller --- net/iucv/iucv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 705959b31e24..d7b54b5bfa69 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -524,7 +524,6 @@ static int iucv_enable(void) get_online_cpus(); for_each_online_cpu(cpu) smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); - preempt_enable(); if (cpus_empty(iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ goto out_path; @@ -547,7 +546,9 @@ out: */ static void iucv_disable(void) { + get_online_cpus(); on_each_cpu(iucv_retrieve_cpu, NULL, 1); + put_online_cpus(); kfree(iucv_path_table); } -- cgit v1.2.1 From ba0166708ef4da7eeb61dd92bbba4d5a749d6561 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 30 Sep 2008 05:32:24 -0700 Subject: sctp: Fix kernel panic while process protocol violation parameter Since call to function sctp_sf_abort_violation() need paramter 'arg' with 'struct sctp_chunk' type, it will read the chunk type and chunk length from the chunk_hdr member of chunk. But call to sctp_sf_violation_paramlen() always with 'struct sctp_paramhdr' type's parameter, it will be passed to sctp_sf_abort_violation(). This may cause kernel panic. sctp_sf_violation_paramlen() |-- sctp_sf_abort_violation() |-- sctp_make_abort_violation() This patch fixed this problem. This patch also fix two place which called sctp_sf_violation_paramlen() with wrong paramter type. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 37 ++++++++++++++++++++++++------------- net/sctp/sm_statefuns.c | 48 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 61 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index b599cbba4fbe..d68869f966c3 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1012,6 +1012,29 @@ end: return retval; } +struct sctp_chunk *sctp_make_violation_paramlen( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + struct sctp_paramhdr *param) +{ + struct sctp_chunk *retval; + static const char error[] = "The following parameter had invalid length:"; + size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) + + sizeof(sctp_paramhdr_t); + + retval = sctp_make_abort(asoc, chunk, payload_len); + if (!retval) + goto nodata; + + sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, + sizeof(error) + sizeof(sctp_paramhdr_t)); + sctp_addto_chunk(retval, sizeof(error), error); + sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param); + +nodata: + return retval; +} + /* Make a HEARTBEAT chunk. */ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, const struct sctp_transport *transport, @@ -1782,11 +1805,6 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc, const struct sctp_chunk *chunk, struct sctp_chunk **errp) { - static const char error[] = "The following parameter had invalid length:"; - size_t payload_len = WORD_ROUND(sizeof(error)) + - sizeof(sctp_paramhdr_t); - - /* This is a fatal error. Any accumulated non-fatal errors are * not reported. */ @@ -1794,14 +1812,7 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc, sctp_chunk_free(*errp); /* Create an error chunk and fill it in with our payload. */ - *errp = sctp_make_op_error_space(asoc, chunk, payload_len); - - if (*errp) { - sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION, - sizeof(error) + sizeof(sctp_paramhdr_t)); - sctp_addto_chunk(*errp, sizeof(error), error); - sctp_addto_param(*errp, sizeof(sctp_paramhdr_t), param); - } + *errp = sctp_make_violation_paramlen(asoc, chunk, param); return 0; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 8848d329aa2c..7c622af2ce55 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -119,7 +119,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen( const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, - void *arg, + void *arg, void *ext, sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_ctsn( @@ -3425,7 +3425,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, addr_param = (union sctp_addr_param *)hdr->params; length = ntohs(addr_param->p.length); if (length < sizeof(sctp_paramhdr_t)) - return sctp_sf_violation_paramlen(ep, asoc, type, + return sctp_sf_violation_paramlen(ep, asoc, type, arg, (void *)addr_param, commands); /* Verify the ASCONF chunk before processing it. */ @@ -3433,8 +3433,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)((void *)addr_param + length), (void *)chunk->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, - (void *)&err_param, commands); + return sctp_sf_violation_paramlen(ep, asoc, type, arg, + (void *)err_param, commands); /* ADDIP 5.2 E1) Compare the value of the serial number to the value * the endpoint stored in a new association variable @@ -3542,8 +3542,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)addip_hdr->params, (void *)asconf_ack->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, - (void *)&err_param, commands); + return sctp_sf_violation_paramlen(ep, asoc, type, arg, + (void *)err_param, commands); if (last_asconf) { addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr; @@ -4240,12 +4240,38 @@ static sctp_disposition_t sctp_sf_violation_paramlen( const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) { - static const char err_str[] = "The following parameter had invalid length:"; + void *arg, void *ext, + sctp_cmd_seq_t *commands) +{ + struct sctp_chunk *chunk = arg; + struct sctp_paramhdr *param = ext; + struct sctp_chunk *abort = NULL; - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, - sizeof(err_str)); + if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) + goto discard; + + /* Make the abort chunk. */ + abort = sctp_make_violation_paramlen(asoc, chunk, param); + if (!abort) + goto nomem; + + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, + SCTP_ERROR(ECONNABORTED)); + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, + SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); + SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + +discard: + sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); + + SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + + return SCTP_DISPOSITION_ABORT; +nomem: + return SCTP_DISPOSITION_NOMEM; } /* Handle a protocol violation when the peer trying to advance the -- cgit v1.2.1 From b4a4bf5d77c7d32098a7080f34a8857dd7fa466d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Sep 2008 13:34:54 +0200 Subject: mac80211: fixups for "make master iface not wireless" In "mac80211: make master iface not wireless" I accidentally forgot to include these changes ... leading to the expected BUG_ON errors. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/iface.c | 9 ++++----- net/mac80211/wme.c | 8 ++++---- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b5cd91e89712..8336fee68d3e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -58,8 +58,9 @@ static inline int identical_mac_addr_allowed(int type1, int type2) static int ieee80211_open(struct net_device *dev) { - struct ieee80211_sub_if_data *sdata, *nsdata; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *nsdata; + struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct ieee80211_if_init_conf conf; u32 changed = 0; @@ -67,8 +68,6 @@ static int ieee80211_open(struct net_device *dev) bool need_hw_reconfig = 0; u8 null_addr[ETH_ALEN] = {0}; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - /* fail early if user set an invalid address */ if (compare_ether_addr(dev->dev_addr, null_addr) && !is_valid_ether_addr(dev->dev_addr)) @@ -512,8 +511,8 @@ static int ieee80211_stop(struct net_device *dev) static void ieee80211_set_multicast_list(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; int allmulti, promisc, sdata_allmulti, sdata_promisc; allmulti = !!(dev->flags & IFF_ALLMULTI); diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index c703f8b44e92..139b5f267b34 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -73,9 +73,8 @@ static int wme_downgrade_ac(struct sk_buff *skb) /* Indicate which queue to use. */ -static u16 classify80211(struct sk_buff *skb, struct net_device *dev) +static u16 classify80211(struct ieee80211_local *local, struct sk_buff *skb) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; if (!ieee80211_is_data(hdr->frame_control)) { @@ -113,14 +112,15 @@ static u16 classify80211(struct sk_buff *skb, struct net_device *dev) u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) { + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct sta_info *sta; u16 queue; u8 tid; - queue = classify80211(skb, dev); + queue = classify80211(local, skb); if (unlikely(queue >= local->hw.queues)) queue = local->hw.queues - 1; -- cgit v1.2.1 From d88410a0b657c5ccebd1c120af1f14c5ca6a3d95 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Thu, 25 Sep 2008 20:45:01 +0300 Subject: mac80211: remove wme_tx_queue and wme_rx_queue from net/mac80211/sta_info.h This patch removes wme_tx_queue and wme_rx_queue from struct sta_info and from the debugfs sub-structure of struct sta_info in net/mac80211/sta_info.h, as they are useless and not used. Signed-off-by: Rami Rosen Signed-off-by: John W. Linville --- net/mac80211/sta_info.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index c3f436964621..a6b51862a89d 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -189,7 +189,6 @@ struct sta_ampdu_mlme { * @last_qual: qual of last received frame from this STA * @last_noise: noise of last received frame from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) - * @wme_rx_queue: TBD * @tx_filtered_count: TBD * @tx_retry_failed: TBD * @tx_retry_count: TBD @@ -199,7 +198,6 @@ struct sta_ampdu_mlme { * @tx_fragments: number of transmitted MPDUs * @last_txrate_idx: Index of the last used transmit rate * @tid_seq: TBD - * @wme_tx_queue: TBD * @ampdu_mlme: TBD * @timer_to_tid: identity mapping to ID timers * @tid_to_tx_q: map tid to tx queue @@ -258,9 +256,6 @@ struct sta_info { int last_qual; int last_noise; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES]; -#endif /* Updated from TX status path only, no locking requirements */ unsigned long tx_filtered_count; @@ -274,9 +269,6 @@ struct sta_info { unsigned long tx_fragments; unsigned int last_txrate_idx; u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES]; -#endif /* * Aggregation information, locked with lock. @@ -307,10 +299,6 @@ struct sta_info { struct dentry *num_ps_buf_frames; struct dentry *inactive_ms; struct dentry *last_seq_ctrl; -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - struct dentry *wme_rx_queue; - struct dentry *wme_tx_queue; -#endif struct dentry *agg_status; } debugfs; #endif -- cgit v1.2.1 From b0dee5784dff3e2984510a7fe7a7e48109001f94 Mon Sep 17 00:00:00 2001 From: Davide Pesavento Date: Sat, 27 Sep 2008 17:29:12 +0200 Subject: Fix modpost failure when rx handlers are not inlined. When CONFIG_MAC80211_MESH=n and CONFIG_MAC80211_NOINLINE=y, gcc doesn't optimize out a call to ieee80211_rx_h_mesh_fwding, even if the previous comparison is always false in this case. This leads to the following errors during modpost: ERROR: "mpp_path_lookup" [net/mac80211/mac80211.ko] undefined! ERROR: "mpp_path_add" [net/mac80211/mac80211.ko] undefined! Fix by removing the possibility of uninlining ieee80211_rx_h_mesh_fwding rx handler. Signed-off-by: Davide Pesavento Signed-off-by: John W. Linville --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c489865761bc..77e7b014872b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1379,7 +1379,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) return RX_QUEUED; } -static ieee80211_rx_result debug_noinline +static ieee80211_rx_result ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr; -- cgit v1.2.1 From 4dd7972d1204c3851a4092cecd2207e05eb29b09 Mon Sep 17 00:00:00 2001 From: Vitaliy Gusev Date: Wed, 1 Oct 2008 01:51:39 -0700 Subject: tcp: Fix NULL dereference in tcp_4_send_ack() Fix NULL dereference in tcp_4_send_ack(). As skb->dev is reset to NULL in tcp_v4_rcv() thus OOPS occurs: BUG: unable to handle kernel NULL pointer dereference at 00000000000004d0 IP: [] tcp_v4_send_ack+0x203/0x250 Stack: ffff810005dbb000 ffff810015c8acc0 e77b2c6e5f861600 a01610802e90cb6d 0a08010100000000 88afffff88afffff 0000000080762be8 0000000115c872e8 0004122000000000 0000000000000001 ffffffff80762b88 0000000000000020 Call Trace: [] tcp_v4_reqsk_send_ack+0x20/0x22 [] tcp_check_req+0x108/0x14c [] ? rt_intern_hash+0x322/0x33c [] tcp_v4_do_rcv+0x399/0x4ec [] ? skb_checksum+0x4f/0x272 [] ? __inet_lookup_listener+0x14a/0x15c [] tcp_v4_rcv+0x6a1/0x701 [] ip_local_deliver_finish+0x157/0x24a [] ip_local_deliver+0x72/0x7c [] ip_rcv_finish+0x38d/0x3b2 [] ? scsi_io_completion+0x19d/0x39e [] ip_rcv+0x2a2/0x2e5 [] netif_receive_skb+0x293/0x303 [] process_backlog+0x80/0xd0 [] ? __rcu_process_callbacks+0x125/0x1b4 [] net_rx_action+0xb9/0x17f [] __do_softirq+0xa3/0x164 [] call_softirq+0x1c/0x28 [] do_softirq+0x34/0x72 [] local_bh_enable_ip+0x3f/0x50 [] _spin_unlock_bh+0x12/0x14 [] release_sock+0xb8/0xc1 [] inet_stream_connect+0x146/0x25c [] ? autoremove_wake_function+0x0/0x38 [] sys_connect+0x68/0x8e [] ? fd_install+0x5f/0x68 [] ? sock_map_fd+0x55/0x62 [] system_call_after_swapgs+0x7b/0x80 Code: 41 10 11 d0 83 d0 00 4d 85 ed 89 45 c0 c7 45 c4 08 00 00 00 74 07 41 8b 45 04 89 45 c8 48 8b 43 20 8b 4d b8 48 8d 55 b0 48 89 de <48> 8b 80 d0 04 00 00 48 8b b8 60 01 00 00 e8 20 ae fe ff 65 48 RIP [] tcp_v4_send_ack+0x203/0x250 RSP CR2: 00000000000004d0 Signed-off-by: Vitaliy Gusev Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1b4fee20fc93..011478e46c40 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -618,7 +618,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ]; } rep; struct ip_reply_arg arg; - struct net *net = dev_net(skb->dev); + struct net *net = dev_net(skb->dst->dev); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); -- cgit v1.2.1 From 2a5b82751f73a0bf6f604ce56d34adba6da1b246 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 1 Oct 2008 02:13:16 -0700 Subject: ipv6: NULL pointer dereferrence in tcp_v6_send_ack The following actions are possible: tcp_v6_rcv skb->dev = NULL; tcp_v6_do_rcv tcp_v6_hnd_req tcp_check_req req->rsk_ops->send_ack == tcp_v6_send_ack So, skb->dev can be NULL in tcp_v6_send_ack. We must obtain namespace from dst entry. Thanks to Vitaliy Gusev for initial problem finding in IPv4 code. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b585c850a89a..10e22fd48222 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1050,7 +1050,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; - struct net *net = dev_net(skb->dev); + struct net *net = dev_net(skb->dst->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); __be32 *topt; -- cgit v1.2.1 From 04a4bb55bcf35b63d40fd2725e58599ff8310dd7 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 1 Oct 2008 02:33:12 -0700 Subject: net: add skb_recycle_check() to enable netdriver skb recycling This patch adds skb_recycle_check(), which can be used by a network driver after transmitting an skb to check whether this skb can be recycled as a receive buffer. skb_recycle_check() checks that the skb is not shared or cloned, and that it is linear and its head portion large enough (as determined by the driver) to be recycled as a receive buffer. If these conditions are met, it does any necessary reference count dropping and cleans up the skbuff as if it just came from __alloc_skb(). Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- net/core/skbuff.c | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ca1ccdf1ef76..2c218a0808b4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -363,8 +363,7 @@ static void kfree_skbmem(struct sk_buff *skb) } } -/* Free everything but the sk_buff shell. */ -static void skb_release_all(struct sk_buff *skb) +static void skb_release_head_state(struct sk_buff *skb) { dst_release(skb->dst); #ifdef CONFIG_XFRM @@ -388,6 +387,12 @@ static void skb_release_all(struct sk_buff *skb) skb->tc_verd = 0; #endif #endif +} + +/* Free everything but the sk_buff shell. */ +static void skb_release_all(struct sk_buff *skb) +{ + skb_release_head_state(skb); skb_release_data(skb); } @@ -424,6 +429,38 @@ void kfree_skb(struct sk_buff *skb) __kfree_skb(skb); } +int skb_recycle_check(struct sk_buff *skb, int skb_size) +{ + struct skb_shared_info *shinfo; + + if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) + return 0; + + skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); + if (skb_end_pointer(skb) - skb->head < skb_size) + return 0; + + if (skb_shared(skb) || skb_cloned(skb)) + return 0; + + skb_release_head_state(skb); + shinfo = skb_shinfo(skb); + atomic_set(&shinfo->dataref, 1); + shinfo->nr_frags = 0; + shinfo->gso_size = 0; + shinfo->gso_segs = 0; + shinfo->gso_type = 0; + shinfo->ip6_frag_id = 0; + shinfo->frag_list = NULL; + + memset(skb, 0, offsetof(struct sk_buff, tail)); + skb_reset_tail_pointer(skb); + skb->data = skb->head + NET_SKB_PAD; + + return 1; +} +EXPORT_SYMBOL(skb_recycle_check); + static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { new->tstamp = old->tstamp; -- cgit v1.2.1 From 5dc121e9a7a8a3721cefeb07f3559f50fbedc67e Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Wed, 1 Oct 2008 02:37:56 -0700 Subject: XFRM,IPv6: initialize ip6_dst_blackhole_ops.kmem_cachep ip6_dst_blackhole_ops.kmem_cachep is not expected to be NULL (i.e. to be initialized) when dst_alloc() is called from ip6_dst_blackhole(). Otherwise, it results in the following (xfrm_larval_drop is now set to 1 by default): [ 78.697642] Unable to handle kernel paging request for data at address 0x0000004c [ 78.703449] Faulting instruction address: 0xc0097f54 [ 78.786896] Oops: Kernel access of bad area, sig: 11 [#1] [ 78.792791] PowerMac [ 78.798383] Modules linked in: btusb usbhid bluetooth b43 mac80211 cfg80211 ehci_hcd ohci_hcd sungem sungem_phy usbcore ssb [ 78.804263] NIP: c0097f54 LR: c0334a28 CTR: c002d430 [ 78.809997] REGS: eef19ad0 TRAP: 0300 Not tainted (2.6.27-rc5) [ 78.815743] MSR: 00001032 CR: 22242482 XER: 20000000 [ 78.821550] DAR: 0000004c, DSISR: 40000000 [ 78.827278] TASK = eef0df40[3035] 'mip6d' THREAD: eef18000 [ 78.827408] GPR00: 00001032 eef19b80 eef0df40 00000000 00008020 eef19c30 00000001 00000000 [ 78.833249] GPR08: eee5101c c05a5c10 ef9ad500 00000000 24242422 1005787c 00000000 1004f960 [ 78.839151] GPR16: 00000000 10024e90 10050040 48030018 0fe44150 00000000 00000000 eef19c30 [ 78.845046] GPR24: eef19e44 00000000 eef19bf8 efb37c14 eef19bf8 00008020 00009032 c0596064 [ 78.856671] NIP [c0097f54] kmem_cache_alloc+0x20/0x94 [ 78.862581] LR [c0334a28] dst_alloc+0x40/0xc4 [ 78.868451] Call Trace: [ 78.874252] [eef19b80] [c03c1810] ip6_dst_lookup_tail+0x1c8/0x1dc (unreliable) [ 78.880222] [eef19ba0] [c0334a28] dst_alloc+0x40/0xc4 [ 78.886164] [eef19bb0] [c03cd698] ip6_dst_blackhole+0x28/0x1cc [ 78.892090] [eef19be0] [c03d9be8] rawv6_sendmsg+0x75c/0xc88 [ 78.897999] [eef19cb0] [c038bca4] inet_sendmsg+0x4c/0x78 [ 78.903907] [eef19cd0] [c03207c8] sock_sendmsg+0xac/0xe4 [ 78.909734] [eef19db0] [c03209e4] sys_sendmsg+0x1e4/0x2a0 [ 78.915540] [eef19f00] [c03220a8] sys_socketcall+0xfc/0x210 [ 78.921406] [eef19f40] [c0014b3c] ret_from_syscall+0x0/0x38 [ 78.927295] --- Exception: c01 at 0xfe2d730 [ 78.927297] LR = 0xfe2d71c [ 78.939019] Instruction dump: [ 78.944835] 91640018 9144001c 900a0000 4bffff44 9421ffe0 7c0802a6 bf810010 7c9d2378 [ 78.950694] 90010024 7fc000a6 57c0045e 7c000124 <83e3004c> 8383005c 2f9f0000 419e0050 [ 78.956464] ---[ end trace 05fa1ed7972487a1 ]--- As commented by Benjamin Thery, the bug was introduced by f2fc6a54585a1be6669613a31fbaba2ecbadcd36, while adding network namespaces support to ipv6 routes. Signed-off-by: Arnaud Ebalard Acked-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9af6115f0f50..63442a1e741c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2688,6 +2688,8 @@ int __init ip6_route_init(void) if (ret) goto out_kmem_cache; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; + /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ -- cgit v1.2.1 From 93c8b90f01f0dc73891da4e84b26524b61d29d66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 1 Oct 2008 02:48:31 -0700 Subject: ipv6: almost identical frag hashing funcs combined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit $ diff-funcs ip6qhashfn reassembly.c netfilter/nf_conntrack_reasm.c --- reassembly.c:ip6qhashfn() +++ netfilter/nf_conntrack_reasm.c:ip6qhashfn() @@ -1,5 +1,5 @@ -static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, - struct in6_addr *daddr) +static unsigned int ip6qhashfn(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr) { u32 a, b, c; @@ -9,7 +9,7 @@ a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += ip6_frags.rnd; + c += nf_frags.rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; And codiff xx.o.old xx.o.new: net/ipv6/netfilter/nf_conntrack_reasm.c: ip6qhashfn | -512 nf_hashfn | +6 nf_ct_frag6_gather | +36 3 functions changed, 42 bytes added, 512 bytes removed, diff: -470 net/ipv6/reassembly.c: ip6qhashfn | -512 ip6_hashfn | +7 ipv6_frag_rcv | +89 3 functions changed, 96 bytes added, 512 bytes removed, diff: -416 net/ipv6/reassembly.c: inet6_hash_frag | +510 1 function changed, 510 bytes added, diff: +510 Total: -376 Compile tested. Signed-off-by: Ilpo Järvinen Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 32 ++------------------------------ net/ipv6/reassembly.c | 11 ++++++----- 2 files changed, 8 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 52d06dd4b817..9967ac7a01a8 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -103,39 +102,12 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { }; #endif -static unsigned int ip6qhashfn(__be32 id, const struct in6_addr *saddr, - const struct in6_addr *daddr) -{ - u32 a, b, c; - - a = (__force u32)saddr->s6_addr32[0]; - b = (__force u32)saddr->s6_addr32[1]; - c = (__force u32)saddr->s6_addr32[2]; - - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += nf_frags.rnd; - __jhash_mix(a, b, c); - - a += (__force u32)saddr->s6_addr32[3]; - b += (__force u32)daddr->s6_addr32[0]; - c += (__force u32)daddr->s6_addr32[1]; - __jhash_mix(a, b, c); - - a += (__force u32)daddr->s6_addr32[2]; - b += (__force u32)daddr->s6_addr32[3]; - c += (__force u32)id; - __jhash_mix(a, b, c); - - return c & (INETFRAGS_HASHSZ - 1); -} - static unsigned int nf_hashfn(struct inet_frag_queue *q) { const struct nf_ct_frag6_queue *nq; nq = container_of(q, struct nf_ct_frag6_queue, q); - return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr); + return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); } static void nf_skb_free(struct sk_buff *skb) @@ -209,7 +181,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.dst = dst; read_lock_bh(&nf_frags.lock); - hash = ip6qhashfn(id, src, dst); + hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); local_bh_enable(); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 89184b576e23..2eeadfa039cb 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -99,8 +99,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, * callers should be careful not to use the hash value outside the ipfrag_lock * as doing so could race with ipfrag_hash_rnd being recalculated. */ -static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, - struct in6_addr *daddr) +unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr, u32 rnd) { u32 a, b, c; @@ -110,7 +110,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += ip6_frags.rnd; + c += rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; @@ -125,13 +125,14 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, return c & (INETFRAGS_HASHSZ - 1); } +EXPORT_SYMBOL_GPL(inet6_hash_frag); static unsigned int ip6_hashfn(struct inet_frag_queue *q) { struct frag_queue *fq; fq = container_of(q, struct frag_queue, q); - return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr); + return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd); } int ip6_frag_match(struct inet_frag_queue *q, void *a) @@ -247,7 +248,7 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, arg.dst = dst; read_lock(&ip6_frags.lock); - hash = ip6qhashfn(id, src, dst); + hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); if (q == NULL) -- cgit v1.2.1 From 0523820482dcb42784572ffd2296c2f08c275a2b Mon Sep 17 00:00:00 2001 From: Timo Teras Date: Wed, 1 Oct 2008 05:17:54 -0700 Subject: af_key: Free dumping state on socket close Fix a xfrm_{state,policy}_walk leak if pfkey socket is closed while dumping is on-going. Signed-off-by: Timo Teras Signed-off-by: David S. Miller --- net/key/af_key.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index d628df97e02e..b7f5a1c353ee 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -73,22 +73,18 @@ static int pfkey_can_dump(struct sock *sk) return 0; } -static int pfkey_do_dump(struct pfkey_sock *pfk) +static void pfkey_terminate_dump(struct pfkey_sock *pfk) { - int rc; - - rc = pfk->dump.dump(pfk); - if (rc == -ENOBUFS) - return 0; - - pfk->dump.done(pfk); - pfk->dump.dump = NULL; - pfk->dump.done = NULL; - return rc; + if (pfk->dump.dump) { + pfk->dump.done(pfk); + pfk->dump.dump = NULL; + pfk->dump.done = NULL; + } } static void pfkey_sock_destruct(struct sock *sk) { + pfkey_terminate_dump(pfkey_sk(sk)); skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { @@ -310,6 +306,18 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, return err; } +static int pfkey_do_dump(struct pfkey_sock *pfk) +{ + int rc; + + rc = pfk->dump.dump(pfk); + if (rc == -ENOBUFS) + return 0; + + pfkey_terminate_dump(pfk); + return rc; +} + static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig) { *new = *orig; -- cgit v1.2.1 From 12a169e7d8f4b1c95252d8b04ed0f1033ed7cfe2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 1 Oct 2008 07:03:24 -0700 Subject: ipsec: Put dumpers on the dump list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Herbert Xu came up with the idea and the original patch to make xfrm_state dump list contain also dumpers: As it is we go to extraordinary lengths to ensure that states don't go away while dumpers go to sleep. It's much easier if we just put the dumpers themselves on the list since they can't go away while they're going. I've also changed the order of addition on new states to prevent a never-ending dump. Timo Teräs improved the patch to apply cleanly to latest tree, modified iteration code to be more readable by using a common struct for entries in the list, implemented the same idea for xfrm_policy dumping and moved the af_key specific "last" entry caching to af_key. Signed-off-by: Herbert Xu Signed-off-by: Timo Teras Signed-off-by: David S. Miller --- net/key/af_key.c | 38 ++++++++++++++--- net/xfrm/xfrm_policy.c | 111 ++++++++++++++++++++++++++----------------------- net/xfrm/xfrm_state.c | 109 ++++++++++++++++-------------------------------- net/xfrm/xfrm_user.c | 4 +- 4 files changed, 130 insertions(+), 132 deletions(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index b7f5a1c353ee..7ae641df70bd 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -58,6 +58,7 @@ struct pfkey_sock { struct xfrm_policy_walk policy; struct xfrm_state_walk state; } u; + struct sk_buff *skb; } dump; }; @@ -76,6 +77,10 @@ static int pfkey_can_dump(struct sock *sk) static void pfkey_terminate_dump(struct pfkey_sock *pfk) { if (pfk->dump.dump) { + if (pfk->dump.skb) { + kfree_skb(pfk->dump.skb); + pfk->dump.skb = NULL; + } pfk->dump.done(pfk); pfk->dump.dump = NULL; pfk->dump.done = NULL; @@ -308,12 +313,25 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, static int pfkey_do_dump(struct pfkey_sock *pfk) { + struct sadb_msg *hdr; int rc; rc = pfk->dump.dump(pfk); if (rc == -ENOBUFS) return 0; + if (pfk->dump.skb) { + if (!pfkey_can_dump(&pfk->sk)) + return 0; + + hdr = (struct sadb_msg *) pfk->dump.skb->data; + hdr->sadb_msg_seq = 0; + hdr->sadb_msg_errno = rc; + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = NULL; + } + pfkey_terminate_dump(pfk); return rc; } @@ -1744,9 +1762,14 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; - out_hdr->sadb_msg_seq = count; + out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk); + + if (pfk->dump.skb) + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = out_skb; + return 0; } @@ -2245,7 +2268,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return 0; out: - xp->dead = 1; + xp->walk.dead = 1; xfrm_policy_destroy(xp); return err; } @@ -2583,9 +2606,14 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_type = SADB_X_SPDDUMP; out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; - out_hdr->sadb_msg_seq = count; + out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk); + + if (pfk->dump.skb) + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = out_skb; + return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ef9ccbc38752..b7ec08025ffb 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -46,7 +46,7 @@ EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -static struct list_head xfrm_policy_bytype[XFRM_POLICY_TYPE_MAX]; +static struct list_head xfrm_policy_all; unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; EXPORT_SYMBOL(xfrm_policy_count); @@ -164,7 +164,7 @@ static void xfrm_policy_timer(unsigned long data) read_lock(&xp->lock); - if (xp->dead) + if (xp->walk.dead) goto out; dir = xfrm_policy_id2dir(xp->index); @@ -236,7 +236,7 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { - INIT_LIST_HEAD(&policy->bytype); + INIT_LIST_HEAD(&policy->walk.all); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); @@ -252,17 +252,13 @@ EXPORT_SYMBOL(xfrm_policy_alloc); void xfrm_policy_destroy(struct xfrm_policy *policy) { - BUG_ON(!policy->dead); + BUG_ON(!policy->walk.dead); BUG_ON(policy->bundles); if (del_timer(&policy->timer)) BUG(); - write_lock_bh(&xfrm_policy_lock); - list_del(&policy->bytype); - write_unlock_bh(&xfrm_policy_lock); - security_xfrm_policy_free(policy->security); kfree(policy); } @@ -310,8 +306,8 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) int dead; write_lock_bh(&policy->lock); - dead = policy->dead; - policy->dead = 1; + dead = policy->walk.dead; + policy->walk.dead = 1; write_unlock_bh(&policy->lock); if (unlikely(dead)) { @@ -609,6 +605,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) if (delpol) { hlist_del(&delpol->bydst); hlist_del(&delpol->byidx); + list_del(&delpol->walk.all); xfrm_policy_count[dir]--; } policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); @@ -617,7 +614,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - list_add_tail(&policy->bytype, &xfrm_policy_bytype[policy->type]); + list_add(&policy->walk.all, &xfrm_policy_all); write_unlock_bh(&xfrm_policy_lock); if (delpol) @@ -684,6 +681,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, } hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; } ret = pol; @@ -727,6 +725,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, } hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; } ret = pol; @@ -840,6 +839,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) continue; hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); write_unlock_bh(&xfrm_policy_lock); xfrm_audit_policy_delete(pol, 1, @@ -867,60 +867,68 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *old, *pol, *last = NULL; + struct xfrm_policy *pol; + struct xfrm_policy_walk_entry *x; int error = 0; if (walk->type >= XFRM_POLICY_TYPE_MAX && walk->type != XFRM_POLICY_TYPE_ANY) return -EINVAL; - if (walk->policy == NULL && walk->count != 0) + if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; - old = pol = walk->policy; - walk->policy = NULL; - read_lock_bh(&xfrm_policy_lock); - - for (; walk->cur_type < XFRM_POLICY_TYPE_MAX; walk->cur_type++) { - if (walk->type != walk->cur_type && - walk->type != XFRM_POLICY_TYPE_ANY) + write_lock_bh(&xfrm_policy_lock); + if (list_empty(&walk->walk.all)) + x = list_first_entry(&xfrm_policy_all, struct xfrm_policy_walk_entry, all); + else + x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); + list_for_each_entry_from(x, &xfrm_policy_all, all) { + if (x->dead) continue; - - if (pol == NULL) { - pol = list_first_entry(&xfrm_policy_bytype[walk->cur_type], - struct xfrm_policy, bytype); - } - list_for_each_entry_from(pol, &xfrm_policy_bytype[walk->cur_type], bytype) { - if (pol->dead) - continue; - if (last) { - error = func(last, xfrm_policy_id2dir(last->index), - walk->count, data); - if (error) { - xfrm_pol_hold(last); - walk->policy = last; - goto out; - } - } - last = pol; - walk->count++; + pol = container_of(x, struct xfrm_policy, walk); + if (walk->type != XFRM_POLICY_TYPE_ANY && + walk->type != pol->type) + continue; + error = func(pol, xfrm_policy_id2dir(pol->index), + walk->seq, data); + if (error) { + list_move_tail(&walk->walk.all, &x->all); + goto out; } - pol = NULL; + walk->seq++; } - if (walk->count == 0) { + if (walk->seq == 0) { error = -ENOENT; goto out; } - if (last) - error = func(last, xfrm_policy_id2dir(last->index), 0, data); + list_del_init(&walk->walk.all); out: - read_unlock_bh(&xfrm_policy_lock); - if (old != NULL) - xfrm_pol_put(old); + write_unlock_bh(&xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); +void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) +{ + INIT_LIST_HEAD(&walk->walk.all); + walk->walk.dead = 1; + walk->type = type; + walk->seq = 0; +} +EXPORT_SYMBOL(xfrm_policy_walk_init); + +void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) +{ + if (list_empty(&walk->walk.all)) + return; + + write_lock_bh(&xfrm_policy_lock); + list_del(&walk->walk.all); + write_unlock_bh(&xfrm_policy_lock); +} +EXPORT_SYMBOL(xfrm_policy_walk_done); + /* * Find policy to apply to this flow. * @@ -1077,7 +1085,7 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) struct hlist_head *chain = policy_hash_bysel(&pol->selector, pol->family, dir); - list_add_tail(&pol->bytype, &xfrm_policy_bytype[pol->type]); + list_add(&pol->walk.all, &xfrm_policy_all); hlist_add_head(&pol->bydst, chain); hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); xfrm_policy_count[dir]++; @@ -1095,6 +1103,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; return pol; @@ -1720,7 +1729,7 @@ restart: for (pi = 0; pi < npols; pi++) { read_lock_bh(&pols[pi]->lock); - pol_dead |= pols[pi]->dead; + pol_dead |= pols[pi]->walk.dead; read_unlock_bh(&pols[pi]->lock); } @@ -2415,9 +2424,7 @@ static void __init xfrm_policy_init(void) panic("XFRM: failed to allocate bydst hash\n"); } - for (dir = 0; dir < XFRM_POLICY_TYPE_MAX; dir++) - INIT_LIST_HEAD(&xfrm_policy_bytype[dir]); - + INIT_LIST_HEAD(&xfrm_policy_all); INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); register_netdevice_notifier(&xfrm_dev_notifier); } @@ -2601,7 +2608,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, int i, j, n = 0; write_lock_bh(&pol->lock); - if (unlikely(pol->dead)) { + if (unlikely(pol->walk.dead)) { /* target policy has been deleted */ write_unlock_bh(&pol->lock); return -ENOENT; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 053970e8765d..747fd8c291a7 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -59,14 +59,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; -/* Counter indicating ongoing walk, protected by xfrm_state_lock. */ -static unsigned long xfrm_state_walk_ongoing; -/* Counter indicating walk completion, protected by xfrm_cfg_mutex. */ -static unsigned long xfrm_state_walk_completed; - -/* List of outstanding state walks used to set the completed counter. */ -static LIST_HEAD(xfrm_state_walks); - static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); @@ -199,8 +191,7 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; static struct work_struct xfrm_state_gc_work; -static LIST_HEAD(xfrm_state_gc_leftovers); -static LIST_HEAD(xfrm_state_gc_list); +static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); @@ -412,23 +403,16 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(struct work_struct *data) { - struct xfrm_state *x, *tmp; - unsigned long completed; + struct xfrm_state *x; + struct hlist_node *entry, *tmp; + struct hlist_head gc_list; - mutex_lock(&xfrm_cfg_mutex); spin_lock_bh(&xfrm_state_gc_lock); - list_splice_tail_init(&xfrm_state_gc_list, &xfrm_state_gc_leftovers); + hlist_move_list(&xfrm_state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - completed = xfrm_state_walk_completed; - mutex_unlock(&xfrm_cfg_mutex); - - list_for_each_entry_safe(x, tmp, &xfrm_state_gc_leftovers, gclist) { - if ((long)(x->lastused - completed) > 0) - break; - list_del(&x->gclist); + hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); - } wake_up(&km_waitq); } @@ -529,7 +513,7 @@ struct xfrm_state *xfrm_state_alloc(void) if (x) { atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); - INIT_LIST_HEAD(&x->all); + INIT_LIST_HEAD(&x->km.all); INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); @@ -556,7 +540,7 @@ void __xfrm_state_destroy(struct xfrm_state *x) WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - list_add_tail(&x->gclist, &xfrm_state_gc_list); + hlist_add_head(&x->gclist, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -569,8 +553,7 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); - x->lastused = xfrm_state_walk_ongoing; - list_del_rcu(&x->all); + list_del(&x->km.all); hlist_del(&x->bydst); hlist_del(&x->bysrc); if (x->id.spi) @@ -871,7 +854,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add_tail(&x->all, &xfrm_state_all); + list_add(&x->km.all, &xfrm_state_all); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -940,7 +923,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) x->genid = ++xfrm_state_genid; - list_add_tail(&x->all, &xfrm_state_all); + list_add(&x->km.all, &xfrm_state_all); h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); @@ -1069,7 +1052,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re xfrm_state_hold(x); x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); - list_add_tail(&x->all, &xfrm_state_all); + list_add(&x->km.all, &xfrm_state_all); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -1566,79 +1549,59 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *data) { - struct xfrm_state *old, *x, *last = NULL; + struct xfrm_state *state; + struct xfrm_state_walk *x; int err = 0; - if (walk->state == NULL && walk->count != 0) + if (walk->seq != 0 && list_empty(&walk->all)) return 0; - old = x = walk->state; - walk->state = NULL; spin_lock_bh(&xfrm_state_lock); - if (x == NULL) - x = list_first_entry(&xfrm_state_all, struct xfrm_state, all); + if (list_empty(&walk->all)) + x = list_first_entry(&xfrm_state_all, struct xfrm_state_walk, all); + else + x = list_entry(&walk->all, struct xfrm_state_walk, all); list_for_each_entry_from(x, &xfrm_state_all, all) { - if (x->km.state == XFRM_STATE_DEAD) + if (x->state == XFRM_STATE_DEAD) continue; - if (!xfrm_id_proto_match(x->id.proto, walk->proto)) + state = container_of(x, struct xfrm_state, km); + if (!xfrm_id_proto_match(state->id.proto, walk->proto)) continue; - if (last) { - err = func(last, walk->count, data); - if (err) { - xfrm_state_hold(last); - walk->state = last; - goto out; - } + err = func(state, walk->seq, data); + if (err) { + list_move_tail(&walk->all, &x->all); + goto out; } - last = x; - walk->count++; + walk->seq++; } - if (walk->count == 0) { + if (walk->seq == 0) { err = -ENOENT; goto out; } - if (last) - err = func(last, 0, data); + list_del_init(&walk->all); out: spin_unlock_bh(&xfrm_state_lock); - if (old != NULL) - xfrm_state_put(old); return err; } EXPORT_SYMBOL(xfrm_state_walk); void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) { + INIT_LIST_HEAD(&walk->all); walk->proto = proto; - walk->state = NULL; - walk->count = 0; - list_add_tail(&walk->list, &xfrm_state_walks); - walk->genid = ++xfrm_state_walk_ongoing; + walk->state = XFRM_STATE_DEAD; + walk->seq = 0; } EXPORT_SYMBOL(xfrm_state_walk_init); void xfrm_state_walk_done(struct xfrm_state_walk *walk) { - struct list_head *prev; - - if (walk->state != NULL) { - xfrm_state_put(walk->state); - walk->state = NULL; - } - - prev = walk->list.prev; - list_del(&walk->list); - - if (prev != &xfrm_state_walks) { - list_entry(prev, struct xfrm_state_walk, list)->genid = - walk->genid; + if (list_empty(&walk->all)) return; - } - - xfrm_state_walk_completed = walk->genid; - if (!list_empty(&xfrm_state_gc_leftovers)) - schedule_work(&xfrm_state_gc_work); + spin_lock_bh(&xfrm_state_lock); + list_del(&walk->all); + spin_lock_bh(&xfrm_state_lock); } EXPORT_SYMBOL(xfrm_state_walk_done); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 04c41504f84c..76f75df21e15 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1102,7 +1102,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, return xp; error: *errp = err; - xp->dead = 1; + xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } @@ -1595,7 +1595,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENOENT; read_lock(&xp->lock); - if (xp->dead) { + if (xp->walk.dead) { read_unlock(&xp->lock); goto out; } -- cgit v1.2.1 From 4edd87ad5cad8e159e0db3ce3131b3d97219c9cd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 1 Oct 2008 07:09:38 -0700 Subject: net: BUG instead of corrupting memory in pskb_expand_head If the caller of pskb_expand_head specifies a negative nhead we'll silently overwrite other people's memory. This patch makes it BUG instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2c218a0808b4..8bd248a64879 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -738,6 +738,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, #endif long off; + BUG_ON(nhead < 0); + if (skb_shared(skb)) BUG(); -- cgit v1.2.1 From a210d01ae3ee006b59e54e772a7f212486e0f021 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 1 Oct 2008 07:28:28 -0700 Subject: ipv4: Loosen source address check on IPv4 output ip_route_output() contains a check to make sure that no flows with non-local source IP addresses are routed. This obviously makes using such addresses impossible. This patch introduces a flowi flag which makes omitting this check possible. The new flag provides a way of handling transparent and non-transparent connections differently. Signed-off-by: Julian Anastasov Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- net/ipv4/route.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f62187bb6d08..a6d7c584f53b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2361,11 +2361,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, ipv4_is_zeronet(oldflp->fl4_src)) goto out; - /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = ip_dev_find(net, oldflp->fl4_src); - if (dev_out == NULL) - goto out; - /* I removed check for oif == dev_out->oif here. It was wrong for two reasons: 1. ip_dev_find(net, saddr) can return wrong iface, if saddr @@ -2377,6 +2372,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, if (oldflp->oif == 0 && (ipv4_is_multicast(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ + dev_out = ip_dev_find(net, oldflp->fl4_src); + if (dev_out == NULL) + goto out; + /* Special hack: user can direct multicasts and limited broadcast via necessary interface without fiddling with IP_MULTICAST_IF or IP_PKTINFO. @@ -2395,9 +2395,15 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, fl.oif = dev_out->ifindex; goto make_route; } - if (dev_out) + + if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ + dev_out = ip_dev_find(net, oldflp->fl4_src); + if (dev_out == NULL) + goto out; dev_put(dev_out); - dev_out = NULL; + dev_out = NULL; + } } -- cgit v1.2.1 From f5715aea4564f233767ea1d944b2637a5fd7cd2e Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:30:02 -0700 Subject: ipv4: Implement IP_TRANSPARENT socket option This patch introduces the IP_TRANSPARENT socket option: enabling that will make the IPv4 routing omit the non-local source address check on output. Setting IP_TRANSPARENT requires NET_ADMIN capability. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- net/ipv4/inet_timewait_sock.c | 1 + net/ipv4/ip_sockglue.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 743f011b9a84..1c5fd38f8824 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -126,6 +126,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_reuse = sk->sk_reuse; tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; + tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; twsk_net_set(tw, hold_net(sock_net(sk))); atomic_set(&tw->tw_refcnt, 1); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 105d92a039b9..465abf0a9869 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -419,7 +419,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<= sizeof(int)) { @@ -878,6 +878,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = xfrm_user_policy(sk, optname, optval, optlen); break; + case IP_TRANSPARENT: + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (optlen < 1) + goto e_inval; + inet->transparent = !!val; + break; + default: err = -ENOPROTOOPT; break; @@ -1130,6 +1140,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_FREEBIND: val = inet->freebind; break; + case IP_TRANSPARENT: + val = inet->transparent; + break; default: release_sock(sk); return -ENOPROTOOPT; -- cgit v1.2.1 From b9fb15067ce93497bef852c05e406d7a96212a9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B3th=20L=C3=A1szl=C3=B3=20Attila?= Date: Wed, 1 Oct 2008 07:31:24 -0700 Subject: ipv4: Allow binding to non-local addresses if IP_TRANSPARENT is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting IP_TRANSPARENT is not really useful without allowing non-local binds for the socket. To make user-space code simpler we allow these binds even if IP_TRANSPARENT is set but IP_FREEBIND is not. Signed-off-by: Tóth László Attila Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8a3ac1fa71a9..1fbff5fa4241 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -469,7 +469,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && - !inet->freebind && + !(inet->freebind || inet->transparent) && addr->sin_addr.s_addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && -- cgit v1.2.1 From 1668e010cbe1a7567c81d4c02d31dde9859e9da1 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:33:10 -0700 Subject: ipv4: Make inet_sock.h independent of route.h inet_iif() in inet_sock.h requires route.h. Since users of inet_iif() usually require other route.h functionality anyway this patch moves inet_iif() to route.h. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_helper.c | 1 + net/ipv6/af_inet6.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 11976ea29884..112dcfa12900 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 95055f8c3f35..f018704ecb86 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.1 From 88ef4a5a78e63420dd1dd770f1bd1dc198926b04 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:41:00 -0700 Subject: tcp: Handle TCP SYN+ACK/ACK/RST transparency The TCP stack sends out SYN+ACK/ACK/RST reply packets in response to incoming packets. The non-local source address check on output bites us again, as replies for transparently redirected traffic won't have a chance to leave the node. This patch selectively sets the FLOWI_FLAG_ANYSRC flag when doing the route lookup for those replies. Transparent replies are enabled if the listening socket has the transparent socket flag set. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d13688e3558d..8b24bd833cb4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -591,6 +591,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) ip_hdr(skb)->saddr, /* XXX */ sizeof(struct tcphdr), IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; + arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; net = dev_net(skb->dst->dev); ip_send_reply(net->ipv4.tcp_sock, skb, @@ -606,7 +607,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, int oif, - struct tcp_md5sig_key *key) + struct tcp_md5sig_key *key, + int reply_flags) { struct tcphdr *th = tcp_hdr(skb); struct { @@ -659,6 +661,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ip_hdr(skb)->daddr, &rep.th); } #endif + arg.flags = reply_flags; arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); @@ -681,7 +684,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent, tw->tw_bound_dev_if, - tcp_twsk_md5_key(tcptw) + tcp_twsk_md5_key(tcptw), + tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0 ); inet_twsk_put(tw); @@ -694,7 +698,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, 0, - tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr)); + tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), + inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0); } /* @@ -1244,6 +1249,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(sk, skb); if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); -- cgit v1.2.1 From 86b08d867d7de001ab224180ed7865fab93fd56e Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:44:42 -0700 Subject: ipv4: Make Netfilter's ip_route_me_harder() non-local address compatible Netfilter's ip_route_me_harder() tries to re-route packets either generated or re-routed by Netfilter. This patch changes ip_route_me_harder() to handle packets from non-locally-bound sockets with IP_TRANSPARENT set as local and to set the appropriate flowi flags when re-doing the routing lookup. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 1 + net/ipv4/ip_output.c | 4 +++- net/ipv4/netfilter.c | 3 +++ net/ipv4/syncookies.c | 2 ++ 4 files changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0c1ae68ee84b..432c570c9f5f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -335,6 +335,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d533a89e08de..d2a8f8bb78a6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -340,6 +340,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) .saddr = inet->saddr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, .dport = inet->dport } } }; @@ -1371,7 +1372,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .uli_u = { .ports = { .sport = tcp_hdr(skb)->dest, .dport = tcp_hdr(skb)->source } }, - .proto = sk->sk_protocol }; + .proto = sk->sk_protocol, + .flags = ip_reply_arg_flowi_flags(arg) }; security_skb_classify_flow(skb, &fl); if (ip_route_output_key(sock_net(sk), &rt, &fl)) return; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f8edacdf991d..01671ad51ed3 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -20,6 +20,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) unsigned int type; type = inet_addr_type(&init_net, iph->saddr); + if (skb->sk && inet_sk(skb->sk)->transparent) + type = RTN_LOCAL; if (addr_type == RTN_UNSPEC) addr_type = type; @@ -33,6 +35,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl.mark = skb->mark; + fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; if (ip_route_output_key(&init_net, &rt, &fl) != 0) return -1; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9d38005abbac..929302b2ba94 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -16,6 +16,7 @@ #include #include #include +#include /* Timestamps: lowest 9 bits store TCP options */ #define TSBITS 9 @@ -337,6 +338,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = IPPROTO_TCP, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = th->dest, .dport = th->source } } }; -- cgit v1.2.1 From a3116ac5c216fc3c145906a46df9ce542ff7dcf2 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:46:49 -0700 Subject: tcp: Port redirection support for TCP Current TCP code relies on the local port of the listening socket being the same as the destination address of the incoming connection. Port redirection used by many transparent proxying techniques obviously breaks this, so we have to store the original destination port address. This patch extends struct inet_request_sock and stores the incoming destination port value there. It also modifies the handshake code to use that value as the source port when sending reply packets. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 2 ++ net/ipv4/syncookies.c | 1 + net/ipv4/tcp_output.c | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 432c570c9f5f..21fcc5a9045f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -516,6 +516,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, newicsk->icsk_bind_hash = NULL; inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; + inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port); + inet_sk(newsk)->sport = inet_rsk(req)->loc_port; newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 929302b2ba94..d346c22aa6ae 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -297,6 +297,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; req->mss = mss; + ireq->loc_port = th->dest; ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a8499ef3234a..493553c71d32 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2275,7 +2275,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->syn = 1; th->ack = 1; TCP_ECN_make_synack(req, th); - th->source = inet_sk(sk)->sport; + th->source = ireq->loc_port; th->dest = ireq->rmt_port; /* Setting of flags are superfluous here for callers (and ECE is * not even correctly set) -- cgit v1.2.1 From bcd41303f422015ab662c9276d108414aa75b796 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:48:10 -0700 Subject: udp: Export UDP socket lookup function The iptables tproxy code has to be able to do UDP socket hash lookups, so we have to provide an exported lookup function for this purpose. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- net/ipv4/udp.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 57e26fa66185..c83d0ef469c9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -302,6 +302,13 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, return result; } +struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, int dif) +{ + return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash); +} +EXPORT_SYMBOL_GPL(udp4_lib_lookup); + static inline struct sock *udp_v4_mcast_next(struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, -- cgit v1.2.1 From 2cd9b822bfa79fc1335d3e71a0449f3cd0b5078e Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 19 Jun 2008 17:59:13 -0400 Subject: sctp: Only mark chunks as missing when there are gaps Frist small step in optimizing SACK processing. Do not call sctp_mark_missing() when there are no gaps reported and thus not missing chunks. Signed-off-by: Vlad Yasevich --- net/sctp/outqueue.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 4328ad5439c9..ef5ea7423cc8 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1129,12 +1129,13 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) unsigned outstanding; struct sctp_transport *primary = asoc->peer.primary_path; int count_of_newacks = 0; + int gap_ack_blocks; /* Grab the association's destination address list. */ transport_list = &asoc->peer.transport_addr_list; sack_ctsn = ntohl(sack->cum_tsn_ack); - + gap_ack_blocks = ntohs(sack->num_gap_ack_blocks); /* * SFR-CACC algorithm: * On receipt of a SACK the sender SHOULD execute the @@ -1161,7 +1162,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) * A) Initialize the cacc_saw_newack to 0 for all destination * addresses. */ - if (sack->num_gap_ack_blocks && + if (gap_ack_blocks && primary->cacc.changeover_active) { list_for_each_entry(transport, transport_list, transports) { transport->cacc.cacc_saw_newack = 0; @@ -1170,9 +1171,8 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) /* Get the highest TSN in the sack. */ highest_tsn = sack_ctsn; - if (sack->num_gap_ack_blocks) - highest_tsn += - ntohs(frags[ntohs(sack->num_gap_ack_blocks) - 1].gab.end); + if (gap_ack_blocks) + highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end); if (TSN_lt(asoc->highest_sacked, highest_tsn)) { highest_new_tsn = highest_tsn; @@ -1181,11 +1181,11 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) highest_new_tsn = sctp_highest_new_tsn(sack, asoc); } + /* Run through the retransmit queue. Credit bytes received * and free those chunks that we can. */ sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn); - sctp_mark_missing(q, &q->retransmit, NULL, highest_new_tsn, 0); /* Run through the transmitted queue. * Credit bytes received and free those chunks which we can. @@ -1204,9 +1204,12 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) count_of_newacks ++; } - list_for_each_entry(transport, transport_list, transports) { - sctp_mark_missing(q, &transport->transmitted, transport, - highest_new_tsn, count_of_newacks); + if (gap_ack_blocks) { + sctp_mark_missing(q, &q->retransmit, NULL, highest_new_tsn, 0); + + list_for_each_entry(transport, transport_list, transports) + sctp_mark_missing(q, &transport->transmitted, transport, + highest_new_tsn, count_of_newacks); } /* Move the Cumulative TSN Ack Point if appropriate. */ -- cgit v1.2.1 From ab5216a5bd453752f04bb79c29e8f01b11d69006 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 19 Jun 2008 18:17:24 -0400 Subject: sctp: Optimize SFR-CACC transport list walking during SACK processing There is a possibility of walking the transport list twice during SACK processing when doing SFR-CACC algorithm. We can restructure the code to only do this once. Signed-off-by: Vlad Yasevich --- net/sctp/outqueue.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index ef5ea7423cc8..c8de4da57f36 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1145,27 +1145,31 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) * on the current primary, the CHANGEOVER_ACTIVE flag SHOULD be * cleared. The CYCLING_CHANGEOVER flag SHOULD also be cleared for * all destinations. - */ - if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { - primary->cacc.changeover_active = 0; - list_for_each_entry(transport, transport_list, - transports) { - transport->cacc.cycling_changeover = 0; - } - } - - /* - * SFR-CACC algorithm: * 2) If the SACK contains gap acks and the flag CHANGEOVER_ACTIVE * is set the receiver of the SACK MUST take the following actions: * * A) Initialize the cacc_saw_newack to 0 for all destination * addresses. + * + * Only bother if changeover_active is set. Otherwise, this is + * totally suboptimal to do on every SACK. */ - if (gap_ack_blocks && - primary->cacc.changeover_active) { - list_for_each_entry(transport, transport_list, transports) { - transport->cacc.cacc_saw_newack = 0; + if (primary->cacc.changeover_active) { + u8 clear_cycling = 0; + + if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { + primary->cacc.changeover_active = 0; + clear_cycling = 1; + } + + if (clear_cycling || gap_ack_blocks) { + list_for_each_entry(transport, transport_list, + transports) { + if (clear_cycling) + transport->cacc.cycling_changeover = 0; + if (gap_ack_blocks) + transport->cacc.cacc_saw_newack = 0; + } } } -- cgit v1.2.1 From 845b8eda4d783a7ce2670d482a716840a650389e Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Jun 2008 15:26:20 -0400 Subject: sctp: Retransmit list is ineligable for missing indications Chunks placed on the retransmit list are marked as inelegible for fast retrasnmission. Since missing indications determine when fast reransmission is done, there is not point in calling sctp_mark_missing() on the retransmit list since those chunks will not be marked. Signed-off-by: Vlad Yasevich --- net/sctp/outqueue.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c8de4da57f36..da8d846301c1 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1209,8 +1209,6 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) } if (gap_ack_blocks) { - sctp_mark_missing(q, &q->retransmit, NULL, highest_new_tsn, 0); - list_for_each_entry(transport, transport_list, transports) sctp_mark_missing(q, &transport->transmitted, transport, highest_new_tsn, count_of_newacks); -- cgit v1.2.1 From c226ef9b83694311327f3ab0036c6de9c22e9daf Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 25 Jul 2008 12:44:09 -0400 Subject: sctp: reduce memory footprint of sctp_chunk structure sctp_chunks should be put on a diet. This is some of the low hanging fruit that we can strip out. Changes all the __s8/__u8 flags to bitfields. Saves 12 bytes per chunk. Signed-off-by: Neil Horman Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 2 +- net/sctp/outqueue.c | 14 +++++++------- net/sctp/sm_make_chunk.c | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index 225c7123c41f..c3f417f7ec6e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -699,7 +699,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * When a Fast Retransmit is being performed the sender SHOULD * ignore the value of cwnd and SHOULD NOT delay retransmission. */ - if (chunk->fast_retransmit <= 0) + if (chunk->fast_retransmit != SCTP_NEED_FRTX) if (transport->flight_size >= transport->cwnd) { retval = SCTP_XMIT_RWND_FULL; goto finish; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index da8d846301c1..247ebc95c1e5 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -420,7 +420,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, * be added to the retransmit queue. */ if ((reason == SCTP_RTXR_FAST_RTX && - (chunk->fast_retransmit > 0)) || + (chunk->fast_retransmit == SCTP_NEED_FRTX)) || (reason != SCTP_RTXR_FAST_RTX && !chunk->tsn_gap_acked)) { /* If this chunk was sent less then 1 rto ago, do not * retransmit this chunk, but give the peer time @@ -650,8 +650,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, /* Mark the chunk as ineligible for fast retransmit * after it is retransmitted. */ - if (chunk->fast_retransmit > 0) - chunk->fast_retransmit = -1; + if (chunk->fast_retransmit == SCTP_NEED_FRTX) + chunk->fast_retransmit = SCTP_DONT_FRTX; /* Force start T3-rtx timer when fast retransmitting * the earliest outstanding TSN @@ -680,8 +680,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, */ if (rtx_timeout || fast_rtx) { list_for_each_entry(chunk1, lqueue, transmitted_list) { - if (chunk1->fast_retransmit > 0) - chunk1->fast_retransmit = -1; + if (chunk1->fast_retransmit == SCTP_NEED_FRTX) + chunk1->fast_retransmit = SCTP_DONT_FRTX; } } @@ -1656,7 +1656,7 @@ static void sctp_mark_missing(struct sctp_outq *q, * chunk if it has NOT been fast retransmitted or marked for * fast retransmit already. */ - if (!chunk->fast_retransmit && + if (chunk->fast_retransmit == SCTP_CAN_FRTX && !chunk->tsn_gap_acked && TSN_lt(tsn, highest_new_tsn_in_sack)) { @@ -1681,7 +1681,7 @@ static void sctp_mark_missing(struct sctp_outq *q, */ if (chunk->tsn_missing_report >= 3) { - chunk->fast_retransmit = 1; + chunk->fast_retransmit = SCTP_NEED_FRTX; do_fast_retransmit = 1; } } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index d68869f966c3..99fe0747cc96 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1211,7 +1211,7 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, */ retval->tsn_missing_report = 0; retval->tsn_gap_acked = 0; - retval->fast_retransmit = 0; + retval->fast_retransmit = SCTP_CAN_FRTX; /* If this is a fragmented message, track all fragments * of the message (for SEND_FAILED). -- cgit v1.2.1 From 52cae8f06babf9eed327479c1aa024ce3732f912 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 18 Aug 2008 10:34:34 -0400 Subject: sctp: try harder to figure out address family when checking wildcards sctp_is_any() function that is used to check for wildcard addresses only looks at the address itself to determine the address family. This function is used in the API to check the address passed in from the user. If the user simply zerroes out the sockaddr_storage and pass that in, we'll end up failing. So, let's try harder to determine the address family by also checking the socket if it's possible. Signed-off-by: Vlad Yasevich --- net/sctp/bind_addr.c | 16 +++++++++++++--- net/sctp/ipv6.c | 5 +++-- net/sctp/socket.c | 10 +++++----- 3 files changed, 21 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index f62bc2468935..6d5944a745d4 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -457,7 +457,7 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, { int error = 0; - if (sctp_is_any(addr)) { + if (sctp_is_any(NULL, addr)) { error = sctp_copy_local_addr_list(dest, scope, gfp, flags); } else if (sctp_in_scope(addr, scope)) { /* Now that the address is in scope, check to see if @@ -477,11 +477,21 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, } /* Is this a wildcard address? */ -int sctp_is_any(const union sctp_addr *addr) +int sctp_is_any(struct sock *sk, const union sctp_addr *addr) { - struct sctp_af *af = sctp_get_af_specific(addr->sa.sa_family); + unsigned short fam = 0; + struct sctp_af *af; + + /* Try to get the right address family */ + if (addr->sa.sa_family != AF_UNSPEC) + fam = addr->sa.sa_family; + else if (sk) + fam = sk->sk_family; + + af = sctp_get_af_specific(fam); if (!af) return 0; + return af->is_any(addr); } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 47f91afa0211..c78da3c9dd34 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -837,6 +837,7 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, struct sctp_sock *opt) { struct sctp_af *af1, *af2; + struct sock *sk = sctp_opt2sk(opt); af1 = sctp_get_af_specific(addr1->sa.sa_family); af2 = sctp_get_af_specific(addr2->sa.sa_family); @@ -845,11 +846,11 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, return 0; /* If the socket is IPv6 only, v4 addrs will not match */ - if (__ipv6_only_sock(sctp_opt2sk(opt)) && af1 != af2) + if (__ipv6_only_sock(sk) && af1 != af2) return 0; /* Today, wildcard AF_INET/AF_INET6. */ - if (sctp_is_any(addr1) || sctp_is_any(addr2)) + if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2)) return 1; if (addr1->sa.sa_family != addr2->sa.sa_family) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5ffb9dec1c3f..a1b904529d5e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2309,7 +2309,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) @@ -4062,7 +4062,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) { @@ -4414,7 +4414,7 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, if (sctp_list_single_entry(&bp->address_list)) { addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); - if (sctp_is_any(&addr->a)) { + if (sctp_is_any(sk, &addr->a)) { rcu_read_lock(); list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { @@ -4602,7 +4602,7 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, if (sctp_list_single_entry(&bp->address_list)) { addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); - if (sctp_is_any(&addr->a)) { + if (sctp_is_any(sk, &addr->a)) { cnt = sctp_copy_laddrs_old(sk, bp->port, getaddrs.addr_num, addrs, &bytes_copied); @@ -4695,7 +4695,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, if (sctp_list_single_entry(&bp->address_list)) { addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); - if (sctp_is_any(&addr->a)) { + if (sctp_is_any(sk, &addr->a)) { cnt = sctp_copy_laddrs(sk, bp->port, addrs, space_left, &bytes_copied); if (cnt < 0) { -- cgit v1.2.1 From 536428a9b9a98495f7ea54ad95cad83e22f1d47d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 5 Sep 2008 08:55:26 +0800 Subject: sctp: Fix to start T5-shutdown-guard timer while enter SHUTDOWN-SENT state RFC 4960: Section 9.2 The sender of the SHUTDOWN MAY also start an overall guard timer 'T5-shutdown-guard' to bound the overall time for the shutdown sequence. At the expiration of this timer, the sender SHOULD abort the association by sending an ABORT chunk. If the 'T5-shutdown- guard' timer is used, it SHOULD be set to the recommended value of 5 times 'RTO.Max'. The timer 'T5-shutdown-guard' is used to counter the overall time for shutdown sequence, and it's start by the sender of the SHUTDOWN. So timer 'T5-shutdown-guard' should be start when we send the first SHUTDOWN chunk and enter the SHUTDOWN-SENT state, not start when we receipt of the SHUTDOWN primitive and enter SHUTDOWN-PENDING state. If 'T5-shutdown-guard' timer is start at SHUTDOWN-PENDING state, the association may be ABORT while data is still transmitting. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/sm_statefuns.c | 24 +++++++----------------- net/sctp/sm_statetable.c | 2 +- 2 files changed, 8 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7c622af2ce55..e57d1d3ad0a6 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2076,10 +2076,6 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); - /* Stop the T5-shutdown guard timer. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, - SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); } @@ -4543,13 +4539,6 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_SHUTDOWN_PENDING)); - /* sctpimpguide-05 Section 2.12.2 - * The sender of the SHUTDOWN MAY also start an overall guard timer - * 'T5-shutdown-guard' to bound the overall time for shutdown sequence. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, - SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type, @@ -4994,6 +4983,13 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); + /* RFC 4960 Section 9.2 + * The sender of the SHUTDOWN MAY also start an overall guard timer + * 'T5-shutdown-guard' to bound the overall time for shutdown sequence. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, + SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); + if (asoc->autoclose) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -5520,12 +5516,6 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_SHUTDOWN_PENDING)); - /* sctpimpguide-05 Section 2.12.2 - * The sender of the SHUTDOWN MAY also start an overall guard timer - * 'T5-shutdown-guard' to bound the overall time for shutdown sequence. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, - SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type, diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index d991237fb400..dd4ddc40c0ad 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -897,7 +897,7 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_ /* SCTP_STATE_ESTABLISHED */ \ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ -- cgit v1.2.1 From 8190f89dfd09dae0c117fb0745f5a820bd19a5a4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Sep 2008 12:13:55 +0800 Subject: sctp: Fix the SNMP counter of SCTP_MIB_OUTOFBLUES RFC3873 defined SCTP_MIB_OUTOFBLUES: sctpOutOfBlues OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "The number of out of the blue packets received by the host. An out of the blue packet is an SCTP packet correctly formed, including the proper checksum, but for which the receiver was unable to identify an appropriate association." REFERENCE "Section 8.4 in RFC2960 deals with the Out-Of-The-Blue (OOTB) packet definition and procedures." But OOTB packet INIT, INIT-ACK and SHUTDOWN-ACK(COOKIE-WAIT or COOKIE-ECHOED state) are not counted by SCTP_MIB_OUTOFBLUES. Case 1(INIT): Endpoint A Endpoint B (CLOSED) (CLOSED) INIT ----------> <---------- ABORT Case 2(INIT-ACK): Endpoint A Endpoint B (CLOSED) (CLOSED) INIT-ACK ----------> <---------- ABORT Case 3(SHUTDOWN-ACK): Endpoint A Endpoint B (CLOSED) (CLOSED) <---------- INIT SHUTDOWN-ACK ----------> <---------- SHUTDOWN-COMPLETE Case 4(SHUTDOWN-ACK): Endpoint A Endpoint B (CLOSED) (COOKIE-ECHOED) SHUTDOWN-ACK ----------> <---------- SHUTDOWN-COMPLETE This patch fixed the problem. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/sm_statefuns.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index e57d1d3ad0a6..81dfaee49b71 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -315,8 +315,10 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) + if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { + SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + } /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. @@ -635,8 +637,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) + if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { + SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + } /* Make sure that the COOKIE_ECHO chunk has a valid length. * In this case, we check that we have enough for at least a @@ -3378,6 +3382,8 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, * packet and the state function that handles OOTB SHUTDOWN_ACK is * called with a NULL association. */ + SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands); } -- cgit v1.2.1 From 96cd0d3d710e64c55e034b77052d7ac46f094759 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 8 Sep 2008 14:00:26 -0400 Subject: sctp: enable cookie-echo retransmission transport switch This patch enables cookie-echo retransmission transport switch feature. If COOKIE-ECHO retransmission happens, it will be sent to the address other than the one last sent to. Signed-off-by: Gui Jianfeng Signed-off-by: Vlad Yasevich --- net/sctp/sm_sideeffect.c | 78 +++++++++++++++++++++++++++--------------------- net/sctp/sm_statefuns.c | 2 ++ 2 files changed, 46 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 9732c797e8ed..13d9eea5cf1a 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -889,6 +889,35 @@ static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands, sctp_ulpq_tail_event(&asoc->ulpq, ev); } + +static void sctp_cmd_t1_timer_update(struct sctp_association *asoc, + sctp_event_timeout_t timer, + char *name) +{ + struct sctp_transport *t; + + t = asoc->init_last_sent_to; + asoc->init_err_counter++; + + if (t->init_sent_count > (asoc->init_cycle + 1)) { + asoc->timeouts[timer] *= 2; + if (asoc->timeouts[timer] > asoc->max_init_timeo) { + asoc->timeouts[timer] = asoc->max_init_timeo; + } + asoc->init_cycle++; + SCTP_DEBUG_PRINTK( + "T1 %s Timeout adjustment" + " init_err_counter: %d" + " cycle: %d" + " timeout: %ld\n", + name, + asoc->init_err_counter, + asoc->init_cycle, + asoc->timeouts[timer]); + } + +} + /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. @@ -1196,6 +1225,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(cmd->obj.ptr)); + if (new_obj->transport) { + new_obj->transport->init_sent_count++; + asoc->init_last_sent_to = new_obj->transport; + } + /* FIXME - Eventually come up with a cleaner way to * enabling COOKIE-ECHO + DATA bundling during * multihoming stale cookie scenarios, the following @@ -1345,26 +1379,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, * all transports have been tried at the current * timeout. */ - t = asoc->init_last_sent_to; - asoc->init_err_counter++; - - if (t->init_sent_count > (asoc->init_cycle + 1)) { - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] *= 2; - if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] > - asoc->max_init_timeo) { - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] = - asoc->max_init_timeo; - } - asoc->init_cycle++; - SCTP_DEBUG_PRINTK( - "T1 INIT Timeout adjustment" - " init_err_counter: %d" - " cycle: %d" - " timeout: %ld\n", - asoc->init_err_counter, - asoc->init_cycle, - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT]); - } + sctp_cmd_t1_timer_update(asoc, + SCTP_EVENT_TIMEOUT_T1_INIT, + "INIT"); sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); @@ -1377,20 +1394,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, * all transports have been tried at the current * timeout. */ - asoc->init_err_counter++; - - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] *= 2; - if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] > - asoc->max_init_timeo) { - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] = - asoc->max_init_timeo; - } - SCTP_DEBUG_PRINTK( - "T1 COOKIE Timeout adjustment" - " init_err_counter: %d" - " timeout: %ld\n", - asoc->init_err_counter, - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE]); + sctp_cmd_t1_timer_update(asoc, + SCTP_EVENT_TIMEOUT_T1_COOKIE, + "COOKIE"); /* If we've sent any data bundled with * COOKIE-ECHO we need to resend. @@ -1422,6 +1428,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_INIT_COUNTER_RESET: asoc->init_err_counter = 0; asoc->init_cycle = 0; + list_for_each_entry(t, &asoc->peer.transport_addr_list, + transports) { + t->init_sent_count = 0; + } break; case SCTP_CMD_REPORT_DUP: diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 81dfaee49b71..ea3a34cbe470 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -5307,6 +5307,8 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep if (!repl) return SCTP_DISPOSITION_NOMEM; + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, + SCTP_CHUNK(repl)); /* Issue a sideeffect to do the needed accounting. */ sctp_add_cmd_sf(commands, SCTP_CMD_COOKIEECHO_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); -- cgit v1.2.1 From e69c4e0f1210450841e40716894ba6a877b31d52 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 15 Sep 2008 16:29:49 -0400 Subject: sctp: correctly save sctp_adaptation from parameter. The INIT perameter carries the adapatation value in network-byte order. We need to store it in host byte order as expected by data types and the user API. Signed-off-by: Vlad Yasevich --- net/sctp/sm_make_chunk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 99fe0747cc96..76726bcff3e9 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2467,7 +2467,7 @@ do_addr_param: break; case SCTP_PARAM_ADAPTATION_LAYER_IND: - asoc->peer.adaptation_ind = param.aind->adaptation_ind; + asoc->peer.adaptation_ind = ntohl(param.aind->adaptation_ind); break; case SCTP_PARAM_SET_PRIMARY: -- cgit v1.2.1 From 25532824fb727744a302edb25c6a6ac10b82cb63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 5 Oct 2008 11:14:27 -0700 Subject: Phonet: modules auto-loading support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 1d8df6b7e3df..0a74aeaf5adf 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -64,6 +64,11 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol) } pnp = phonet_proto_get(protocol); +#ifdef CONFIG_KMOD + if (pnp == NULL && + request_module("net-pf-%d-proto-%d", PF_PHONET, protocol) == 0) + pnp = phonet_proto_get(protocol); +#endif if (pnp == NULL) return -EPROTONOSUPPORT; if (sock->type != pnp->sock_type) { @@ -94,7 +99,7 @@ out: } static struct net_proto_family phonet_proto_family = { - .family = AF_PHONET, + .family = PF_PHONET, .create = pn_socket_create, .owner = THIS_MODULE, }; @@ -447,7 +452,7 @@ static int __init phonet_init(void) err: phonet_sysctl_exit(); - sock_unregister(AF_PHONET); + sock_unregister(PF_PHONET); dev_remove_pack(&phonet_packet_type); phonet_device_exit(); return err; @@ -457,7 +462,7 @@ static void __exit phonet_exit(void) { isi_unregister(); phonet_sysctl_exit(); - sock_unregister(AF_PHONET); + sock_unregister(PF_PHONET); dev_remove_pack(&phonet_packet_type); phonet_device_exit(); } @@ -466,3 +471,4 @@ module_init(phonet_init); module_exit(phonet_exit); MODULE_DESCRIPTION("Phonet protocol stack for Linux"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_PHONET); -- cgit v1.2.1 From 9995a32b4d14dcda2f8df58030526bee91114c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 5 Oct 2008 11:14:48 -0700 Subject: Phonet: connected sockets glue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/phonet/socket.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) (limited to 'net') diff --git a/net/phonet/socket.c b/net/phonet/socket.c index dfd4061646db..cea1136cbe46 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -25,11 +25,13 @@ #include #include +#include #include #include #include #include +#include #include static int pn_socket_release(struct socket *sock) @@ -166,6 +168,24 @@ static int pn_socket_autobind(struct socket *sock) return 0; /* socket was already bound */ } +static int pn_socket_accept(struct socket *sock, struct socket *newsock, + int flags) +{ + struct sock *sk = sock->sk; + struct sock *newsk; + int err; + + newsk = sk->sk_prot->accept(sk, flags, &err); + if (!newsk) + return err; + + lock_sock(newsk); + sock_graft(newsk, newsock); + newsock->state = SS_CONNECTED; + release_sock(newsk); + return 0; +} + static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, int *sockaddr_len, int peer) { @@ -182,6 +202,33 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, return 0; } +static unsigned int pn_socket_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + struct pep_sock *pn = pep_sk(sk); + unsigned int mask = 0; + + poll_wait(file, &sock->wait, wait); + + switch (sk->sk_state) { + case TCP_LISTEN: + return hlist_empty(&pn->ackq) ? 0 : POLLIN; + case TCP_CLOSE: + return POLLERR; + } + + if (!skb_queue_empty(&sk->sk_receive_queue)) + mask |= POLLIN | POLLRDNORM; + else if (sk->sk_state == TCP_CLOSE_WAIT) + return POLLHUP; + + if (sk->sk_state == TCP_ESTABLISHED && pn->tx_credits) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + return mask; +} + static int pn_socket_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { @@ -220,6 +267,30 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd, return sk->sk_prot->ioctl(sk, cmd, arg); } +static int pn_socket_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + int err = 0; + + if (sock->state != SS_UNCONNECTED) + return -EINVAL; + if (pn_socket_autobind(sock)) + return -ENOBUFS; + + lock_sock(sk); + if (sk->sk_state != TCP_CLOSE) { + err = -EINVAL; + goto out; + } + + sk->sk_state = TCP_LISTEN; + sk->sk_ack_backlog = 0; + sk->sk_max_ack_backlog = backlog; +out: + release_sock(sk); + return err; +} + static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len) { @@ -256,6 +327,32 @@ const struct proto_ops phonet_dgram_ops = { .sendpage = sock_no_sendpage, }; +const struct proto_ops phonet_stream_ops = { + .family = AF_PHONET, + .owner = THIS_MODULE, + .release = pn_socket_release, + .bind = pn_socket_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = pn_socket_accept, + .getname = pn_socket_getname, + .poll = pn_socket_poll, + .ioctl = pn_socket_ioctl, + .listen = pn_socket_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, +#ifdef CONFIG_COMPAT + .compat_setsockopt = sock_no_setsockopt, + .compat_getsockopt = compat_sock_no_getsockopt, +#endif + .sendmsg = pn_socket_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; +EXPORT_SYMBOL(phonet_stream_ops); + static DEFINE_MUTEX(port_mutex); /* allocate port for a socket */ -- cgit v1.2.1 From 9641458d3ec42def729fde64669abf07f3220cd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 5 Oct 2008 11:15:13 -0700 Subject: Phonet: Pipe End Point for Phonet Pipes protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This protocol provides some connection handling and negotiated congestion control. Nokia cellular modems use it for bulk transfers. It provides packet boundaries (hence SOCK_SEQPACKET). Congestion control is per packet rather per byte, so we do not re-use the generic socket memory accounting. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/Makefile | 4 +- net/phonet/af_phonet.c | 3 + net/phonet/pep.c | 908 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 914 insertions(+), 1 deletion(-) create mode 100644 net/phonet/pep.c (limited to 'net') diff --git a/net/phonet/Makefile b/net/phonet/Makefile index ae9c3ed5be83..505df2a04a85 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_PHONET) += phonet.o +obj-$(CONFIG_PHONET) += phonet.o pn_pep.o phonet-objs := \ pn_dev.o \ @@ -7,3 +7,5 @@ phonet-objs := \ datagram.o \ sysctl.o \ af_phonet.o + +pn_pep-objs := pep.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 0a74aeaf5adf..9e9c6fce11aa 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -58,6 +58,9 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol) case SOCK_DGRAM: protocol = PN_PROTO_PHONET; break; + case SOCK_SEQPACKET: + protocol = PN_PROTO_PIPE; + break; default: return -EPROTONOSUPPORT; } diff --git a/net/phonet/pep.c b/net/phonet/pep.c new file mode 100644 index 000000000000..c5dfecb207d2 --- /dev/null +++ b/net/phonet/pep.c @@ -0,0 +1,908 @@ +/* + * File: pep.c + * + * Phonet pipe protocol end point socket + * + * Copyright (C) 2008 Nokia Corporation. + * + * Author: Rémi Denis-Courmont + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +/* sk_state values: + * TCP_CLOSE sock not in use yet + * TCP_CLOSE_WAIT disconnected pipe + * TCP_LISTEN listening pipe endpoint + * TCP_SYN_RECV connected pipe in disabled state + * TCP_ESTABLISHED connected pipe in enabled state + * + * pep_sock locking: + * - sk_state, ackq, hlist: sock lock needed + * - listener: read only + * - pipe_handle: read only + */ + +#define CREDITS_MAX 10 +#define CREDITS_THR 7 + +static const struct sockaddr_pn pipe_srv = { + .spn_family = AF_PHONET, + .spn_resource = 0xD9, /* pipe service */ +}; + +#define pep_sb_size(s) (((s) + 5) & ~3) /* 2-bytes head, 32-bits aligned */ + +/* Get the next TLV sub-block. */ +static unsigned char *pep_get_sb(struct sk_buff *skb, u8 *ptype, u8 *plen, + void *buf) +{ + void *data = NULL; + struct { + u8 sb_type; + u8 sb_len; + } *ph, h; + int buflen = *plen; + + ph = skb_header_pointer(skb, 0, 2, &h); + if (ph == NULL || ph->sb_len < 2 || !pskb_may_pull(skb, ph->sb_len)) + return NULL; + ph->sb_len -= 2; + *ptype = ph->sb_type; + *plen = ph->sb_len; + + if (buflen > ph->sb_len) + buflen = ph->sb_len; + data = skb_header_pointer(skb, 2, buflen, buf); + __skb_pull(skb, 2 + ph->sb_len); + return data; +} + +static int pep_reply(struct sock *sk, struct sk_buff *oskb, + u8 code, const void *data, int len, gfp_t priority) +{ + const struct pnpipehdr *oph = pnp_hdr(oskb); + struct pnpipehdr *ph; + struct sk_buff *skb; + + skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PNPIPE_HEADER); + __skb_put(skb, len); + skb_copy_to_linear_data(skb, data, len); + __skb_push(skb, sizeof(*ph)); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = oph->utid; + ph->message_id = oph->message_id + 1; /* REQ -> RESP */ + ph->pipe_handle = oph->pipe_handle; + ph->error_code = code; + + return pn_skb_send(sk, skb, &pipe_srv); +} + +#define PAD 0x00 +static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) +{ + static const u8 data[20] = { + PAD, PAD, PAD, 2 /* sub-blocks */, + PN_PIPE_SB_REQUIRED_FC_TX, pep_sb_size(5), 3, PAD, + PN_MULTI_CREDIT_FLOW_CONTROL, + PN_ONE_CREDIT_FLOW_CONTROL, + PN_LEGACY_FLOW_CONTROL, + PAD, + PN_PIPE_SB_PREFERRED_FC_RX, pep_sb_size(5), 3, PAD, + PN_MULTI_CREDIT_FLOW_CONTROL, + PN_ONE_CREDIT_FLOW_CONTROL, + PN_LEGACY_FLOW_CONTROL, + PAD, + }; + + might_sleep(); + return pep_reply(sk, skb, PN_PIPE_NO_ERROR, data, sizeof(data), + GFP_KERNEL); +} + +static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code) +{ + static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ }; + WARN_ON(code == PN_PIPE_NO_ERROR); + return pep_reply(sk, skb, code, data, sizeof(data), GFP_ATOMIC); +} + +/* Control requests are not sent by the pipe service and have a specific + * message format. */ +static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code) +{ + const struct pnpipehdr *oph = pnp_hdr(oskb); + struct sk_buff *skb; + struct pnpipehdr *ph; + struct sockaddr_pn dst; + + skb = alloc_skb(MAX_PNPIPE_HEADER + 4, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PHONET_HEADER); + ph = (struct pnpipehdr *)skb_put(skb, sizeof(*ph) + 4); + + ph->utid = oph->utid; + ph->message_id = PNS_PEP_CTRL_RESP; + ph->pipe_handle = oph->pipe_handle; + ph->data[0] = oph->data[1]; /* CTRL id */ + ph->data[1] = oph->data[0]; /* PEP type */ + ph->data[2] = code; /* error code, at an usual offset */ + ph->data[3] = PAD; + ph->data[4] = PAD; + + pn_skb_get_src_sockaddr(oskb, &dst); + return pn_skb_send(sk, skb, &dst); +} + +static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *ph; + struct sk_buff *skb; + + skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PNPIPE_HEADER + 4); + __skb_push(skb, sizeof(*ph) + 4); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = 0; + ph->message_id = PNS_PEP_STATUS_IND; + ph->pipe_handle = pn->pipe_handle; + ph->pep_type = PN_PEP_TYPE_COMMON; + ph->data[1] = type; + ph->data[2] = PAD; + ph->data[3] = PAD; + ph->data[4] = status; + + return pn_skb_send(sk, skb, &pipe_srv); +} + +/* Send our RX flow control information to the sender. + * Socket must be locked. */ +static void pipe_grant_credits(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + BUG_ON(sk->sk_state != TCP_ESTABLISHED); + + switch (pn->rx_fc) { + case PN_LEGACY_FLOW_CONTROL: /* TODO */ + break; + case PN_ONE_CREDIT_FLOW_CONTROL: + pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL, + PEP_IND_READY, GFP_ATOMIC); + pn->rx_credits = 1; + break; + case PN_MULTI_CREDIT_FLOW_CONTROL: + if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX) + break; + if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS, + CREDITS_MAX - pn->rx_credits, + GFP_ATOMIC) == 0) + pn->rx_credits = CREDITS_MAX; + break; + } +} + +static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + + if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) + return -EINVAL; + + if (hdr->data[0] != PN_PEP_TYPE_COMMON) { + LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP type: %u\n", + (unsigned)hdr->data[0]); + return -EOPNOTSUPP; + } + + switch (hdr->data[1]) { + case PN_PEP_IND_FLOW_CONTROL: + switch (pn->tx_fc) { + case PN_LEGACY_FLOW_CONTROL: + switch (hdr->data[4]) { + case PEP_IND_BUSY: + pn->tx_credits = 0; + break; + case PEP_IND_READY: + pn->tx_credits = 1; + break; + } + break; + case PN_ONE_CREDIT_FLOW_CONTROL: + if (hdr->data[4] == PEP_IND_READY) + pn->tx_credits = 1; + break; + } + break; + + case PN_PEP_IND_ID_MCFC_GRANT_CREDITS: + if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL) + break; + if (pn->tx_credits + hdr->data[4] > 0xff) + pn->tx_credits = 0xff; + else + pn->tx_credits += hdr->data[4]; + break; + + default: + LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP indication: %u\n", + (unsigned)hdr->data[1]); + return -EOPNOTSUPP; + } + if (pn->tx_credits) + sk->sk_write_space(sk); + return 0; +} + +static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + u8 n_sb = hdr->data[0]; + + pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL; + __skb_pull(skb, sizeof(*hdr)); + while (n_sb > 0) { + u8 type, buf[2], len = sizeof(buf); + u8 *data = pep_get_sb(skb, &type, &len, buf); + + if (data == NULL) + return -EINVAL; + switch (type) { + case PN_PIPE_SB_NEGOTIATED_FC: + if (len < 2 || (data[0] | data[1]) > 3) + break; + pn->tx_fc = data[0] & 3; + pn->rx_fc = data[1] & 3; + break; + } + n_sb--; + } + return 0; +} + +/* Queue an skb to a connected sock. + * Socket lock must be held. */ +static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + int err = 0; + + BUG_ON(sk->sk_state == TCP_CLOSE_WAIT); + + switch (hdr->message_id) { + case PNS_PEP_CONNECT_REQ: + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); + break; + + case PNS_PEP_DISCONNECT_REQ: + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + sk->sk_state = TCP_CLOSE_WAIT; + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + break; + + case PNS_PEP_ENABLE_REQ: + /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */ + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + break; + + case PNS_PEP_RESET_REQ: + switch (hdr->state_after_reset) { + case PN_PIPE_DISABLE: + pn->init_enable = 0; + break; + case PN_PIPE_ENABLE: + pn->init_enable = 1; + break; + default: /* not allowed to send an error here!? */ + err = -EINVAL; + goto out; + } + /* fall through */ + case PNS_PEP_DISABLE_REQ: + pn->tx_credits = 0; + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + break; + + case PNS_PEP_CTRL_REQ: + /* TODO */ + pep_ctrlreq_error(sk, skb, PN_PIPE_NO_ERROR); + break; + + case PNS_PIPE_DATA: + __skb_pull(skb, 3); /* Pipe data header */ + if (!pn_flow_safe(pn->rx_fc)) { + err = sock_queue_rcv_skb(sk, skb); + if (!err) + return 0; + break; + } + + if (pn->rx_credits == 0) { + err = -ENOBUFS; + break; + } + pn->rx_credits--; + skb->dev = NULL; + skb_set_owner_r(skb, sk); + err = skb->len; + skb_queue_tail(&sk->sk_receive_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, err); + return 0; + + case PNS_PEP_STATUS_IND: + pipe_rcv_status(sk, skb); + break; + + case PNS_PIPE_REDIRECTED_IND: + err = pipe_rcv_created(sk, skb); + break; + + case PNS_PIPE_CREATED_IND: + err = pipe_rcv_created(sk, skb); + if (err) + break; + /* fall through */ + case PNS_PIPE_RESET_IND: + if (!pn->init_enable) + break; + /* fall through */ + case PNS_PIPE_ENABLED_IND: + if (!pn_flow_safe(pn->tx_fc)) { + pn->tx_credits = 1; + sk->sk_write_space(sk); + } + if (sk->sk_state == TCP_ESTABLISHED) + break; /* Nothing to do */ + sk->sk_state = TCP_ESTABLISHED; + pipe_grant_credits(sk); + break; + + case PNS_PIPE_DISABLED_IND: + sk->sk_state = TCP_SYN_RECV; + pn->rx_credits = 0; + break; + + default: + LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP message: %u\n", + hdr->message_id); + err = -EINVAL; + } +out: + kfree_skb(skb); + return err; +} + +/* Destroy connected sock. */ +static void pipe_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); +} + +static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct sock *newsk; + struct pep_sock *newpn, *pn = pep_sk(sk); + struct pnpipehdr *hdr; + struct sockaddr_pn dst; + u16 peer_type; + u8 pipe_handle, enabled, n_sb; + + if (!pskb_pull(skb, sizeof(*hdr) + 4)) + return -EINVAL; + + hdr = pnp_hdr(skb); + pipe_handle = hdr->pipe_handle; + switch (hdr->state_after_connect) { + case PN_PIPE_DISABLE: + enabled = 0; + break; + case PN_PIPE_ENABLE: + enabled = 1; + break; + default: + pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM); + return -EINVAL; + } + peer_type = hdr->other_pep_type << 8; + + if (unlikely(sk->sk_state != TCP_LISTEN) || sk_acceptq_is_full(sk)) { + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); + return -ENOBUFS; + } + + /* Parse sub-blocks (options) */ + n_sb = hdr->data[4]; + while (n_sb > 0) { + u8 type, buf[1], len = sizeof(buf); + const u8 *data = pep_get_sb(skb, &type, &len, buf); + + if (data == NULL) + return -EINVAL; + switch (type) { + case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE: + if (len < 1) + return -EINVAL; + peer_type = (peer_type & 0xff00) | data[0]; + break; + } + n_sb--; + } + + skb = skb_clone(skb, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + /* Create a new to-be-accepted sock */ + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_ATOMIC, sk->sk_prot); + if (!newsk) { + kfree_skb(skb); + return -ENOMEM; + } + sock_init_data(NULL, newsk); + newsk->sk_state = TCP_SYN_RECV; + newsk->sk_backlog_rcv = pipe_do_rcv; + newsk->sk_protocol = sk->sk_protocol; + newsk->sk_destruct = pipe_destruct; + + newpn = pep_sk(newsk); + pn_skb_get_dst_sockaddr(skb, &dst); + newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); + newpn->pn_sk.resource = pn->pn_sk.resource; + newpn->pipe_handle = pipe_handle; + newpn->peer_type = peer_type; + newpn->rx_credits = newpn->tx_credits = 0; + newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; + newpn->init_enable = enabled; + + BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue)); + skb_queue_head(&newsk->sk_receive_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, 0); + + sk_acceptq_added(sk); + sk_add_node(newsk, &pn->ackq); + return 0; +} + +/* Listening sock must be locked */ +static struct sock *pep_find_pipe(const struct hlist_head *hlist, + const struct sockaddr_pn *dst, + u8 pipe_handle) +{ + struct hlist_node *node; + struct sock *sknode; + u16 dobj = pn_sockaddr_get_object(dst); + + sk_for_each(sknode, node, hlist) { + struct pep_sock *pnnode = pep_sk(sknode); + + /* Ports match, but addresses might not: */ + if (pnnode->pn_sk.sobject != dobj) + continue; + if (pnnode->pipe_handle != pipe_handle) + continue; + if (sknode->sk_state == TCP_CLOSE_WAIT) + continue; + + sock_hold(sknode); + return sknode; + } + return NULL; +} + +/* + * Deliver an skb to a listening sock. + * Socket lock must be held. + * We then queue the skb to the right connected sock (if any). + */ +static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct sock *sknode; + struct pnpipehdr *hdr = pnp_hdr(skb); + struct sockaddr_pn dst; + int err = NET_RX_SUCCESS; + u8 pipe_handle; + + if (!pskb_may_pull(skb, sizeof(*hdr))) + goto drop; + + hdr = pnp_hdr(skb); + pipe_handle = hdr->pipe_handle; + if (pipe_handle == PN_PIPE_INVALID_HANDLE) + goto drop; + + pn_skb_get_dst_sockaddr(skb, &dst); + + /* Look for an existing pipe handle */ + sknode = pep_find_pipe(&pn->hlist, &dst, pipe_handle); + if (sknode) + return sk_receive_skb(sknode, skb, 1); + + /* Look for a pipe handle pending accept */ + sknode = pep_find_pipe(&pn->ackq, &dst, pipe_handle); + if (sknode) { + sock_put(sknode); + if (net_ratelimit()) + printk(KERN_WARNING"Phonet unconnected PEP ignored"); + err = NET_RX_DROP; + goto drop; + } + + switch (hdr->message_id) { + case PNS_PEP_CONNECT_REQ: + err = pep_connreq_rcv(sk, skb); + break; + + case PNS_PEP_DISCONNECT_REQ: + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + break; + + case PNS_PEP_CTRL_REQ: + pep_ctrlreq_error(sk, skb, PN_PIPE_INVALID_HANDLE); + break; + + case PNS_PEP_RESET_REQ: + case PNS_PEP_ENABLE_REQ: + case PNS_PEP_DISABLE_REQ: + /* invalid handle is not even allowed here! */ + default: + err = NET_RX_DROP; + } +drop: + kfree_skb(skb); + return err; +} + +/* associated socket ceases to exist */ +static void pep_sock_close(struct sock *sk, long timeout) +{ + struct pep_sock *pn = pep_sk(sk); + + sk_common_release(sk); + + lock_sock(sk); + if (sk->sk_state == TCP_LISTEN) { + /* Destroy the listen queue */ + struct sock *sknode; + struct hlist_node *p, *n; + + sk_for_each_safe(sknode, p, n, &pn->ackq) + sk_del_node_init(sknode); + sk->sk_state = TCP_CLOSE; + } + release_sock(sk); +} + +static int pep_wait_connreq(struct sock *sk, int noblock) +{ + struct task_struct *tsk = current; + struct pep_sock *pn = pep_sk(sk); + long timeo = sock_rcvtimeo(sk, noblock); + + for (;;) { + DEFINE_WAIT(wait); + + if (sk->sk_state != TCP_LISTEN) + return -EINVAL; + if (!hlist_empty(&pn->ackq)) + break; + if (!timeo) + return -EWOULDBLOCK; + if (signal_pending(tsk)) + return sock_intr_errno(timeo); + + prepare_to_wait_exclusive(&sk->sk_socket->wait, &wait, + TASK_INTERRUPTIBLE); + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + finish_wait(&sk->sk_socket->wait, &wait); + } + + return 0; +} + +static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) +{ + struct pep_sock *pn = pep_sk(sk); + struct sock *newsk = NULL; + struct sk_buff *oskb; + int err; + + lock_sock(sk); + err = pep_wait_connreq(sk, flags & O_NONBLOCK); + if (err) + goto out; + + newsk = __sk_head(&pn->ackq); + + oskb = skb_dequeue(&newsk->sk_receive_queue); + err = pep_accept_conn(newsk, oskb); + if (err) { + skb_queue_head(&newsk->sk_receive_queue, oskb); + newsk = NULL; + goto out; + } + + sock_hold(sk); + pep_sk(newsk)->listener = sk; + + sock_hold(newsk); + sk_del_node_init(newsk); + sk_acceptq_removed(sk); + sk_add_node(newsk, &pn->hlist); + __sock_put(newsk); + +out: + release_sock(sk); + *errp = err; + return newsk; +} + +static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + int answ; + + switch (cmd) { + case SIOCINQ: + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + lock_sock(sk); + if (!skb_queue_empty(&sk->sk_receive_queue)) + answ = skb_peek(&sk->sk_receive_queue)->len; + else + answ = 0; + release_sock(sk); + return put_user(answ, (int __user *)arg); + } + + return -ENOIOCTLCMD; +} + +static int pep_init(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + INIT_HLIST_HEAD(&pn->ackq); + INIT_HLIST_HEAD(&pn->hlist); + pn->pipe_handle = PN_PIPE_INVALID_HANDLE; + return 0; +} + +static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len) +{ + struct pep_sock *pn = pep_sk(sk); + struct sk_buff *skb = NULL; + struct pnpipehdr *ph; + long timeo; + int flags = msg->msg_flags; + int err, done; + + if (msg->msg_flags & MSG_OOB || !(msg->msg_flags & MSG_EOR)) + return -EOPNOTSUPP; + + lock_sock(sk); + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + if ((1 << sk->sk_state) & (TCPF_LISTEN|TCPF_CLOSE)) { + err = -ENOTCONN; + goto out; + } + if (sk->sk_state != TCP_ESTABLISHED) { + /* Wait until the pipe gets to enabled state */ +disabled: + err = sk_stream_wait_connect(sk, &timeo); + if (err) + goto out; + + if (sk->sk_state == TCP_CLOSE_WAIT) { + err = -ECONNRESET; + goto out; + } + } + BUG_ON(sk->sk_state != TCP_ESTABLISHED); + + /* Wait until flow control allows TX */ + done = pn->tx_credits > 0; + while (!done) { + DEFINE_WAIT(wait); + + if (!timeo) { + err = -EAGAIN; + goto out; + } + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + goto out; + } + + prepare_to_wait(&sk->sk_socket->wait, &wait, + TASK_INTERRUPTIBLE); + done = sk_wait_event(sk, &timeo, pn->tx_credits > 0); + finish_wait(&sk->sk_socket->wait, &wait); + + if (sk->sk_state != TCP_ESTABLISHED) + goto disabled; + } + + if (!skb) { + skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len, + flags & MSG_DONTWAIT, &err); + if (skb == NULL) + goto out; + skb_reserve(skb, MAX_PHONET_HEADER + 3); + + if (sk->sk_state != TCP_ESTABLISHED || !pn->tx_credits) + goto disabled; /* sock_alloc_send_skb might sleep */ + } + + err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + if (err < 0) + goto out; + + __skb_push(skb, 3); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = 0; + ph->message_id = PNS_PIPE_DATA; + ph->pipe_handle = pn->pipe_handle; + if (pn_flow_safe(pn->tx_fc)) /* credit-based flow control */ + pn->tx_credits--; + + err = pn_skb_send(sk, skb, &pipe_srv); + if (err >= 0) + err = len; /* success! */ + skb = NULL; +out: + release_sock(sk); + kfree_skb(skb); + return err; +} + +static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) +{ + struct sk_buff *skb; + int err; + + if (unlikely(flags & MSG_OOB)) + return -EOPNOTSUPP; + if (unlikely(1 << sk->sk_state & (TCPF_LISTEN | TCPF_CLOSE))) + return -ENOTCONN; + + skb = skb_recv_datagram(sk, flags, noblock, &err); + lock_sock(sk); + if (skb == NULL) { + if (err == -ENOTCONN && sk->sk_state == TCP_CLOSE_WAIT) + err = -ECONNRESET; + release_sock(sk); + return err; + } + + if (sk->sk_state == TCP_ESTABLISHED) + pipe_grant_credits(sk); + release_sock(sk); + + msg->msg_flags |= MSG_EOR; + + if (skb->len > len) + msg->msg_flags |= MSG_TRUNC; + else + len = skb->len; + + err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len); + if (!err) + err = (flags & MSG_TRUNC) ? skb->len : len; + + skb_free_datagram(sk, skb); + return err; +} + +static void pep_sock_unhash(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct sock *skparent = NULL; + + lock_sock(sk); + if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { + skparent = pn->listener; + sk_del_node_init(sk); + release_sock(sk); + + sk = skparent; + pn = pep_sk(skparent); + lock_sock(sk); + } + /* Unhash a listening sock only when it is closed + * and all of its active connected pipes are closed. */ + if (hlist_empty(&pn->hlist)) + pn_sock_unhash(&pn->pn_sk.sk); + release_sock(sk); + + if (skparent) + sock_put(skparent); +} + +static struct proto pep_proto = { + .close = pep_sock_close, + .accept = pep_sock_accept, + .ioctl = pep_ioctl, + .init = pep_init, + .sendmsg = pep_sendmsg, + .recvmsg = pep_recvmsg, + .backlog_rcv = pep_do_rcv, + .hash = pn_sock_hash, + .unhash = pep_sock_unhash, + .get_port = pn_sock_get_port, + .obj_size = sizeof(struct pep_sock), + .owner = THIS_MODULE, + .name = "PNPIPE", +}; + +static struct phonet_protocol pep_pn_proto = { + .ops = &phonet_stream_ops, + .prot = &pep_proto, + .sock_type = SOCK_SEQPACKET, +}; + +static int __init pep_register(void) +{ + return phonet_proto_register(PN_PROTO_PIPE, &pep_pn_proto); +} + +static void __exit pep_unregister(void) +{ + phonet_proto_unregister(PN_PROTO_PIPE, &pep_pn_proto); +} + +module_init(pep_register); +module_exit(pep_unregister); +MODULE_AUTHOR("Remi Denis-Courmont, Nokia"); +MODULE_DESCRIPTION("Phonet pipe protocol"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_PHONET, PN_PROTO_PIPE); -- cgit v1.2.1 From c41bd97f815720f9404f97da0c4f4400b52c243d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 5 Oct 2008 11:15:43 -0700 Subject: Phonet: receive pipe control requests as out-of-band data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 65 ++++++++++++++++++++++++++++++++++++++--------------- net/phonet/socket.c | 4 +++- 2 files changed, 50 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index c5dfecb207d2..d564d07cb797 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -137,14 +137,15 @@ static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code) /* Control requests are not sent by the pipe service and have a specific * message format. */ -static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code) +static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, + gfp_t priority) { const struct pnpipehdr *oph = pnp_hdr(oskb); struct sk_buff *skb; struct pnpipehdr *ph; struct sockaddr_pn dst; - skb = alloc_skb(MAX_PNPIPE_HEADER + 4, GFP_ATOMIC); + skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); if (!skb) return -ENOMEM; skb_set_owner_w(skb, sk); @@ -305,6 +306,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) { struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *hdr = pnp_hdr(skb); + struct sk_buff_head *queue; int err = 0; BUG_ON(sk->sk_state == TCP_CLOSE_WAIT); @@ -345,9 +347,11 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) break; case PNS_PEP_CTRL_REQ: - /* TODO */ - pep_ctrlreq_error(sk, skb, PN_PIPE_NO_ERROR); - break; + if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) + break; + __skb_pull(skb, 4); + queue = &pn->ctrlreq_queue; + goto queue; case PNS_PIPE_DATA: __skb_pull(skb, 3); /* Pipe data header */ @@ -363,13 +367,8 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) break; } pn->rx_credits--; - skb->dev = NULL; - skb_set_owner_r(skb, sk); - err = skb->len; - skb_queue_tail(&sk->sk_receive_queue, skb); - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, err); - return 0; + queue = &sk->sk_receive_queue; + goto queue; case PNS_PEP_STATUS_IND: pipe_rcv_status(sk, skb); @@ -412,12 +411,24 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) out: kfree_skb(skb); return err; + +queue: + skb->dev = NULL; + skb_set_owner_r(skb, sk); + err = skb->len; + skb_queue_tail(queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, err); + return 0; } /* Destroy connected sock. */ static void pipe_destruct(struct sock *sk) { + struct pep_sock *pn = pep_sk(sk); + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&pn->ctrlreq_queue); } static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) @@ -490,6 +501,7 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) pn_skb_get_dst_sockaddr(skb, &dst); newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); newpn->pn_sk.resource = pn->pn_sk.resource; + skb_queue_head_init(&newpn->ctrlreq_queue); newpn->pipe_handle = pipe_handle; newpn->peer_type = peer_type; newpn->rx_credits = newpn->tx_credits = 0; @@ -581,7 +593,7 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) break; case PNS_PEP_CTRL_REQ: - pep_ctrlreq_error(sk, skb, PN_PIPE_INVALID_HANDLE); + pep_ctrlreq_error(sk, skb, PN_PIPE_INVALID_HANDLE, GFP_ATOMIC); break; case PNS_PEP_RESET_REQ: @@ -684,6 +696,7 @@ out: static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) { + struct pep_sock *pn = pep_sk(sk); int answ; switch (cmd) { @@ -692,7 +705,10 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) return -EINVAL; lock_sock(sk); - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (sock_flag(sk, SOCK_URGINLINE) + && !skb_queue_empty(&pn->ctrlreq_queue)) + answ = skb_peek(&pn->ctrlreq_queue)->len; + else if (!skb_queue_empty(&sk->sk_receive_queue)) answ = skb_peek(&sk->sk_receive_queue)->len; else answ = 0; @@ -709,6 +725,7 @@ static int pep_init(struct sock *sk) INIT_HLIST_HEAD(&pn->ackq); INIT_HLIST_HEAD(&pn->hlist); + skb_queue_head_init(&pn->ctrlreq_queue); pn->pipe_handle = PN_PIPE_INVALID_HANDLE; return 0; } @@ -810,11 +827,24 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, struct sk_buff *skb; int err; - if (unlikely(flags & MSG_OOB)) - return -EOPNOTSUPP; if (unlikely(1 << sk->sk_state & (TCPF_LISTEN | TCPF_CLOSE))) return -ENOTCONN; + if ((flags & MSG_OOB) || sock_flag(sk, SOCK_URGINLINE)) { + /* Dequeue and acknowledge control request */ + struct pep_sock *pn = pep_sk(sk); + + skb = skb_dequeue(&pn->ctrlreq_queue); + if (skb) { + pep_ctrlreq_error(sk, skb, PN_PIPE_NO_ERROR, + GFP_KERNEL); + msg->msg_flags |= MSG_OOB; + goto copy; + } + if (flags & MSG_OOB) + return -EINVAL; + } + skb = skb_recv_datagram(sk, flags, noblock, &err); lock_sock(sk); if (skb == NULL) { @@ -827,9 +857,8 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, if (sk->sk_state == TCP_ESTABLISHED) pipe_grant_credits(sk); release_sock(sk); - +copy: msg->msg_flags |= MSG_EOR; - if (skb->len > len) msg->msg_flags |= MSG_TRUNC; else diff --git a/net/phonet/socket.c b/net/phonet/socket.c index cea1136cbe46..a9c3d1f2e9db 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -220,7 +220,9 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; - else if (sk->sk_state == TCP_CLOSE_WAIT) + if (!skb_queue_empty(&pn->ctrlreq_queue)) + mask |= POLLPRI; + if (!mask && sk->sk_state == TCP_CLOSE_WAIT) return POLLHUP; if (sk->sk_state == TCP_ESTABLISHED && pn->tx_credits) -- cgit v1.2.1 From 02a47617cdce440f60c71a51f3a93f9f5fcc5a7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 5 Oct 2008 11:16:16 -0700 Subject: Phonet: implement GPRS virtual interface over PEP socket MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/Makefile | 2 +- net/phonet/pep-gprs.c | 347 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/phonet/pep.c | 161 +++++++++++++++++++++-- net/phonet/socket.c | 8 +- 4 files changed, 502 insertions(+), 16 deletions(-) create mode 100644 net/phonet/pep-gprs.c (limited to 'net') diff --git a/net/phonet/Makefile b/net/phonet/Makefile index 505df2a04a85..d62bbba649b3 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -8,4 +8,4 @@ phonet-objs := \ sysctl.o \ af_phonet.o -pn_pep-objs := pep.o +pn_pep-objs := pep.o pep-gprs.o diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c new file mode 100644 index 000000000000..9978afbd9f2a --- /dev/null +++ b/net/phonet/pep-gprs.c @@ -0,0 +1,347 @@ +/* + * File: pep-gprs.c + * + * GPRS over Phonet pipe end point socket + * + * Copyright (C) 2008 Nokia Corporation. + * + * Author: Rémi Denis-Courmont + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define GPRS_DEFAULT_MTU 1400 + +struct gprs_dev { + struct sock *sk; + void (*old_state_change)(struct sock *); + void (*old_data_ready)(struct sock *, int); + void (*old_write_space)(struct sock *); + + struct net_device *net; + struct net_device_stats stats; + + struct sk_buff_head tx_queue; + struct work_struct tx_work; + spinlock_t tx_lock; + unsigned tx_max; +}; + +static int gprs_type_trans(struct sk_buff *skb) +{ + const u8 *pvfc; + u8 buf; + + pvfc = skb_header_pointer(skb, 0, 1, &buf); + if (!pvfc) + return 0; + /* Look at IP version field */ + switch (*pvfc >> 4) { + case 4: + return htons(ETH_P_IP); + case 6: + return htons(ETH_P_IPV6); + } + return 0; +} + +/* + * Socket callbacks + */ + +static void gprs_state_change(struct sock *sk) +{ + struct gprs_dev *dev = sk->sk_user_data; + + if (sk->sk_state == TCP_CLOSE_WAIT) { + netif_stop_queue(dev->net); + netif_carrier_off(dev->net); + } +} + +static int gprs_recv(struct gprs_dev *dev, struct sk_buff *skb) +{ + int err = 0; + u16 protocol = gprs_type_trans(skb); + + if (!protocol) { + err = -EINVAL; + goto drop; + } + + if (likely(skb_headroom(skb) & 3)) { + struct sk_buff *rskb, *fs; + int flen = 0; + + /* Phonet Pipe data header is misaligned (3 bytes), + * so wrap the IP packet as a single fragment of an head-less + * socket buffer. The network stack will pull what it needs, + * but at least, the whole IP payload is not memcpy'd. */ + rskb = netdev_alloc_skb(dev->net, 0); + if (!rskb) { + err = -ENOBUFS; + goto drop; + } + skb_shinfo(rskb)->frag_list = skb; + rskb->len += skb->len; + rskb->data_len += rskb->len; + rskb->truesize += rskb->len; + + /* Avoid nested fragments */ + for (fs = skb_shinfo(skb)->frag_list; fs; fs = fs->next) + flen += fs->len; + skb->next = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = NULL; + skb->len -= flen; + skb->data_len -= flen; + skb->truesize -= flen; + + skb = rskb; + } + + skb->protocol = protocol; + skb_reset_mac_header(skb); + skb->dev = dev->net; + + if (likely(dev->net->flags & IFF_UP)) { + dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; + netif_rx(skb); + skb = NULL; + } else + err = -ENODEV; + +drop: + if (skb) { + dev_kfree_skb(skb); + dev->stats.rx_dropped++; + } + return err; +} + +static void gprs_data_ready(struct sock *sk, int len) +{ + struct gprs_dev *dev = sk->sk_user_data; + struct sk_buff *skb; + + while ((skb = pep_read(sk)) != NULL) { + skb_orphan(skb); + gprs_recv(dev, skb); + } +} + +static void gprs_write_space(struct sock *sk) +{ + struct gprs_dev *dev = sk->sk_user_data; + unsigned credits = pep_writeable(sk); + + spin_lock_bh(&dev->tx_lock); + dev->tx_max = credits; + if (credits > skb_queue_len(&dev->tx_queue)) + netif_wake_queue(dev->net); + spin_unlock_bh(&dev->tx_lock); +} + +/* + * Network device callbacks + */ + +static int gprs_xmit(struct sk_buff *skb, struct net_device *net) +{ + struct gprs_dev *dev = netdev_priv(net); + + switch (skb->protocol) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + break; + default: + dev_kfree_skb(skb); + return 0; + } + + spin_lock(&dev->tx_lock); + if (likely(skb_queue_len(&dev->tx_queue) < dev->tx_max)) { + skb_queue_tail(&dev->tx_queue, skb); + skb = NULL; + } + if (skb_queue_len(&dev->tx_queue) >= dev->tx_max) + netif_stop_queue(net); + spin_unlock(&dev->tx_lock); + + schedule_work(&dev->tx_work); + if (unlikely(skb)) + dev_kfree_skb(skb); + return 0; +} + +static void gprs_tx(struct work_struct *work) +{ + struct gprs_dev *dev = container_of(work, struct gprs_dev, tx_work); + struct sock *sk = dev->sk; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&dev->tx_queue)) != NULL) { + int err; + + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; + + skb_orphan(skb); + skb_set_owner_w(skb, sk); + + lock_sock(sk); + err = pep_write(sk, skb); + if (err) { + LIMIT_NETDEBUG(KERN_WARNING"%s: TX error (%d)\n", + dev->net->name, err); + dev->stats.tx_aborted_errors++; + dev->stats.tx_errors++; + } + release_sock(sk); + } + + lock_sock(sk); + gprs_write_space(sk); + release_sock(sk); +} + +static int gprs_set_mtu(struct net_device *net, int new_mtu) +{ + if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11))) + return -EINVAL; + + net->mtu = new_mtu; + return 0; +} + +static struct net_device_stats *gprs_get_stats(struct net_device *net) +{ + struct gprs_dev *dev = netdev_priv(net); + + return &dev->stats; +} + +static void gprs_setup(struct net_device *net) +{ + net->features = NETIF_F_FRAGLIST; + net->type = ARPHRD_NONE; + net->flags = IFF_POINTOPOINT | IFF_NOARP; + net->mtu = GPRS_DEFAULT_MTU; + net->hard_header_len = 0; + net->addr_len = 0; + net->tx_queue_len = 10; + + net->destructor = free_netdev; + net->hard_start_xmit = gprs_xmit; /* mandatory */ + net->change_mtu = gprs_set_mtu; + net->get_stats = gprs_get_stats; +} + +/* + * External interface + */ + +/* + * Attach a GPRS interface to a datagram socket. + * Returns the interface index on success, negative error code on error. + */ +int gprs_attach(struct sock *sk) +{ + static const char ifname[] = "gprs%d"; + struct gprs_dev *dev; + struct net_device *net; + int err; + + if (unlikely(sk->sk_type == SOCK_STREAM)) + return -EINVAL; /* need packet boundaries */ + + /* Create net device */ + net = alloc_netdev(sizeof(*dev), ifname, gprs_setup); + if (!net) + return -ENOMEM; + dev = netdev_priv(net); + dev->net = net; + dev->tx_max = 0; + spin_lock_init(&dev->tx_lock); + skb_queue_head_init(&dev->tx_queue); + INIT_WORK(&dev->tx_work, gprs_tx); + + netif_stop_queue(net); + err = register_netdev(net); + if (err) { + free_netdev(net); + return err; + } + + lock_sock(sk); + if (unlikely(sk->sk_user_data)) { + err = -EBUSY; + goto out_rel; + } + if (unlikely((1 << sk->sk_state & (TCPF_CLOSE|TCPF_LISTEN)) || + sock_flag(sk, SOCK_DEAD))) { + err = -EINVAL; + goto out_rel; + } + sk->sk_user_data = dev; + dev->old_state_change = sk->sk_state_change; + dev->old_data_ready = sk->sk_data_ready; + dev->old_write_space = sk->sk_write_space; + sk->sk_state_change = gprs_state_change; + sk->sk_data_ready = gprs_data_ready; + sk->sk_write_space = gprs_write_space; + release_sock(sk); + + sock_hold(sk); + dev->sk = sk; + + printk(KERN_DEBUG"%s: attached\n", net->name); + gprs_write_space(sk); /* kick off TX */ + return net->ifindex; + +out_rel: + release_sock(sk); + unregister_netdev(net); + return err; +} + +void gprs_detach(struct sock *sk) +{ + struct gprs_dev *dev = sk->sk_user_data; + struct net_device *net = dev->net; + + lock_sock(sk); + sk->sk_user_data = NULL; + sk->sk_state_change = dev->old_state_change; + sk->sk_data_ready = dev->old_data_ready; + sk->sk_write_space = dev->old_write_space; + release_sock(sk); + + printk(KERN_DEBUG"%s: detached\n", net->name); + unregister_netdev(net); + flush_scheduled_work(); + sock_put(sk); + skb_queue_purge(&dev->tx_queue); +} diff --git a/net/phonet/pep.c b/net/phonet/pep.c index d564d07cb797..bc6d50f83249 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -31,6 +31,7 @@ #include #include #include +#include /* sk_state values: * TCP_CLOSE sock not in use yet @@ -612,6 +613,7 @@ drop: static void pep_sock_close(struct sock *sk, long timeout) { struct pep_sock *pn = pep_sk(sk); + int ifindex = 0; sk_common_release(sk); @@ -625,7 +627,12 @@ static void pep_sock_close(struct sock *sk, long timeout) sk_del_node_init(sknode); sk->sk_state = TCP_CLOSE; } + ifindex = pn->ifindex; + pn->ifindex = 0; release_sock(sk); + + if (ifindex) + gprs_detach(sk); } static int pep_wait_connreq(struct sock *sk, int noblock) @@ -730,12 +737,107 @@ static int pep_init(struct sock *sk) return 0; } +static int pep_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) +{ + struct pep_sock *pn = pep_sk(sk); + int val = 0, err = 0; + + if (level != SOL_PNPIPE) + return -ENOPROTOOPT; + if (optlen >= sizeof(int)) { + if (get_user(val, (int __user *) optval)) + return -EFAULT; + } + + lock_sock(sk); + switch (optname) { + case PNPIPE_ENCAP: + if (val && val != PNPIPE_ENCAP_IP) { + err = -EINVAL; + break; + } + if (!pn->ifindex == !val) + break; /* Nothing to do! */ + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (val) { + release_sock(sk); + err = gprs_attach(sk); + if (err > 0) { + pn->ifindex = err; + err = 0; + } + } else { + pn->ifindex = 0; + release_sock(sk); + gprs_detach(sk); + err = 0; + } + goto out_norel; + default: + err = -ENOPROTOOPT; + } + release_sock(sk); + +out_norel: + return err; +} + +static int pep_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct pep_sock *pn = pep_sk(sk); + int len, val; + + if (level != SOL_PNPIPE) + return -ENOPROTOOPT; + if (get_user(len, optlen)) + return -EFAULT; + + switch (optname) { + case PNPIPE_ENCAP: + val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE; + break; + case PNPIPE_IFINDEX: + val = pn->ifindex; + break; + default: + return -ENOPROTOOPT; + } + + len = min_t(unsigned int, sizeof(int), len); + if (put_user(len, optlen)) + return -EFAULT; + if (put_user(val, (int __user *) optval)) + return -EFAULT; + return 0; +} + +static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *ph; + + skb_push(skb, 3); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = 0; + ph->message_id = PNS_PIPE_DATA; + ph->pipe_handle = pn->pipe_handle; + if (pn_flow_safe(pn->tx_fc) && pn->tx_credits) + pn->tx_credits--; + + return pn_skb_send(sk, skb, &pipe_srv); +} + static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { struct pep_sock *pn = pep_sk(sk); struct sk_buff *skb = NULL; - struct pnpipehdr *ph; long timeo; int flags = msg->msg_flags; int err, done; @@ -801,16 +903,7 @@ disabled: if (err < 0) goto out; - __skb_push(skb, 3); - skb_reset_transport_header(skb); - ph = pnp_hdr(skb); - ph->utid = 0; - ph->message_id = PNS_PIPE_DATA; - ph->pipe_handle = pn->pipe_handle; - if (pn_flow_safe(pn->tx_fc)) /* credit-based flow control */ - pn->tx_credits--; - - err = pn_skb_send(sk, skb, &pipe_srv); + err = pipe_skb_send(sk, skb); if (err >= 0) err = len; /* success! */ skb = NULL; @@ -820,6 +913,50 @@ out: return err; } +int pep_writeable(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + return (sk->sk_state == TCP_ESTABLISHED) ? pn->tx_credits : 0; +} + +int pep_write(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff *rskb, *fs; + int flen = 0; + + rskb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC); + if (!rskb) { + kfree_skb(skb); + return -ENOMEM; + } + skb_shinfo(rskb)->frag_list = skb; + rskb->len += skb->len; + rskb->data_len += rskb->len; + rskb->truesize += rskb->len; + + /* Avoid nested fragments */ + for (fs = skb_shinfo(skb)->frag_list; fs; fs = fs->next) + flen += fs->len; + skb->next = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = NULL; + skb->len -= flen; + skb->data_len -= flen; + skb->truesize -= flen; + + skb_reserve(rskb, MAX_PHONET_HEADER + 3); + return pipe_skb_send(sk, rskb); +} + +struct sk_buff *pep_read(struct sock *sk) +{ + struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); + + if (sk->sk_state == TCP_ESTABLISHED) + pipe_grant_credits(sk); + return skb; +} + static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) @@ -902,6 +1039,8 @@ static struct proto pep_proto = { .accept = pep_sock_accept, .ioctl = pep_ioctl, .init = pep_init, + .setsockopt = pep_setsockopt, + .getsockopt = pep_getsockopt, .sendmsg = pep_sendmsg, .recvmsg = pep_recvmsg, .backlog_rcv = pep_do_rcv, diff --git a/net/phonet/socket.c b/net/phonet/socket.c index a9c3d1f2e9db..d81740187fb4 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -342,11 +342,11 @@ const struct proto_ops phonet_stream_ops = { .ioctl = pn_socket_ioctl, .listen = pn_socket_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, #ifdef CONFIG_COMPAT - .compat_setsockopt = sock_no_setsockopt, - .compat_getsockopt = compat_sock_no_getsockopt, + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, #endif .sendmsg = pn_socket_sendmsg, .recvmsg = sock_common_recvmsg, -- cgit v1.2.1 From 13c1d18931ebb5cf407cb348ef2cd6284d68902d Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Sun, 5 Oct 2008 13:33:42 -0700 Subject: xfrm: MIGRATE enhancements (draft-ebalard-mext-pfkey-enhanced-migrate) Provides implementation of the enhancements of XFRM/PF_KEY MIGRATE mechanism specified in draft-ebalard-mext-pfkey-enhanced-migrate-00. Defines associated PF_KEY SADB_X_EXT_KMADDRESS extension and XFRM/netlink XFRMA_KMADDRESS attribute. Signed-off-by: Arnaud Ebalard Signed-off-by: David S. Miller --- net/key/af_key.c | 86 ++++++++++++++++++++++++++++++++++++++++---------- net/xfrm/xfrm_policy.c | 5 +-- net/xfrm/xfrm_state.c | 5 +-- net/xfrm/xfrm_user.c | 57 ++++++++++++++++++++++++++------- 4 files changed, 120 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 7ae641df70bd..362fe317e1f3 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -398,6 +398,7 @@ static u8 sadb_ext_min_len[] = { [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx), + [SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress), }; /* Verify sadb_address_{len,prefixlen} against sa_family. */ @@ -2384,24 +2385,21 @@ static int pfkey_sockaddr_pair_size(sa_family_t family) return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2); } -static int parse_sockaddr_pair(struct sadb_x_ipsecrequest *rq, +static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family) { - u8 *sa = (u8 *) (rq + 1); int af, socklen; - if (rq->sadb_x_ipsecrequest_len < - pfkey_sockaddr_pair_size(((struct sockaddr *)sa)->sa_family)) + if (ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) return -EINVAL; - af = pfkey_sockaddr_extract((struct sockaddr *) sa, - saddr); + af = pfkey_sockaddr_extract(sa, saddr); if (!af) return -EINVAL; socklen = pfkey_sockaddr_len(af); - if (pfkey_sockaddr_extract((struct sockaddr *) (sa + socklen), + if (pfkey_sockaddr_extract((struct sockaddr *) (((u8 *)sa) + socklen), daddr) != af) return -EINVAL; @@ -2421,7 +2419,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, return -EINVAL; /* old endoints */ - err = parse_sockaddr_pair(rq1, &m->old_saddr, &m->old_daddr, + err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1), + rq1->sadb_x_ipsecrequest_len, + &m->old_saddr, &m->old_daddr, &m->old_family); if (err) return err; @@ -2434,7 +2434,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, return -EINVAL; /* new endpoints */ - err = parse_sockaddr_pair(rq2, &m->new_saddr, &m->new_daddr, + err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1), + rq2->sadb_x_ipsecrequest_len, + &m->new_saddr, &m->new_daddr, &m->new_family); if (err) return err; @@ -2460,29 +2462,40 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, int i, len, ret, err = -EINVAL; u8 dir; struct sadb_address *sa; + struct sadb_x_kmaddress *kma; struct sadb_x_policy *pol; struct sadb_x_ipsecrequest *rq; struct xfrm_selector sel; struct xfrm_migrate m[XFRM_MAX_DEPTH]; + struct xfrm_kmaddress k; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1], - ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || + ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || !ext_hdrs[SADB_X_EXT_POLICY - 1]) { err = -EINVAL; goto out; } + kma = ext_hdrs[SADB_X_EXT_KMADDRESS - 1]; pol = ext_hdrs[SADB_X_EXT_POLICY - 1]; - if (!pol) { - err = -EINVAL; - goto out; - } if (pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) { err = -EINVAL; goto out; } + if (kma) { + /* convert sadb_x_kmaddress to xfrm_kmaddress */ + k.reserved = kma->sadb_x_kmaddress_reserved; + ret = parse_sockaddr_pair((struct sockaddr *)(kma + 1), + 8*(kma->sadb_x_kmaddress_len) - sizeof(*kma), + &k.local, &k.remote, &k.family); + if (ret < 0) { + err = ret; + goto out; + } + } + dir = pol->sadb_x_policy_dir - 1; memset(&sel, 0, sizeof(sel)); @@ -2527,7 +2540,8 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, goto out; } - return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i); + return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, + kma ? &k : NULL); out: return err; @@ -3319,6 +3333,32 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, return 0; } + +static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k) +{ + struct sadb_x_kmaddress *kma; + u8 *sa; + int family = k->family; + int socklen = pfkey_sockaddr_len(family); + int size_req; + + size_req = (sizeof(struct sadb_x_kmaddress) + + pfkey_sockaddr_pair_size(family)); + + kma = (struct sadb_x_kmaddress *)skb_put(skb, size_req); + memset(kma, 0, size_req); + kma->sadb_x_kmaddress_len = size_req / 8; + kma->sadb_x_kmaddress_exttype = SADB_X_EXT_KMADDRESS; + kma->sadb_x_kmaddress_reserved = k->reserved; + + sa = (u8 *)(kma + 1); + if (!pfkey_sockaddr_fill(&k->local, 0, (struct sockaddr *)sa, family) || + !pfkey_sockaddr_fill(&k->remote, 0, (struct sockaddr *)(sa+socklen), family)) + return -EINVAL; + + return 0; +} + static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, uint32_t reqid, uint8_t family, @@ -3351,7 +3391,8 @@ static int set_ipsecrequest(struct sk_buff *skb, #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles) + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k) { int i; int sasize_sel; @@ -3368,6 +3409,12 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, if (num_bundles <= 0 || num_bundles > XFRM_MAX_DEPTH) return -EINVAL; + if (k != NULL) { + /* addresses for KM */ + size += PFKEY_ALIGN8(sizeof(struct sadb_x_kmaddress) + + pfkey_sockaddr_pair_size(k->family)); + } + /* selector */ sasize_sel = pfkey_sockaddr_size(sel->family); if (!sasize_sel) @@ -3404,6 +3451,10 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, hdr->sadb_msg_seq = 0; hdr->sadb_msg_pid = 0; + /* Addresses to be used by KM for negotiation, if ext is available */ + if (k != NULL && (set_sadb_kmaddress(skb, k) < 0)) + return -EINVAL; + /* selector src */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel); @@ -3449,7 +3500,8 @@ err: } #else static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles) + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b7ec08025ffb..832b47c1de80 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2679,7 +2679,8 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) } int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -2723,7 +2724,7 @@ int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 5 - announce */ - km_migrate(sel, dir, type, m, num_migrate); + km_migrate(sel, dir, type, m, num_migrate, k); xfrm_pol_put(pol); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 747fd8c291a7..508337f97249 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1814,7 +1814,8 @@ EXPORT_SYMBOL(km_policy_expired); #ifdef CONFIG_XFRM_MIGRATE int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { int err = -EINVAL; int ret; @@ -1823,7 +1824,7 @@ int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { if (km->migrate) { - ret = km->migrate(sel, dir, type, m, num_migrate); + ret = km->migrate(sel, dir, type, m, num_migrate, k); if (!ret) err = ret; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 76f75df21e15..4a8a1abb59ee 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1710,12 +1710,23 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, #ifdef CONFIG_XFRM_MIGRATE static int copy_from_user_migrate(struct xfrm_migrate *ma, + struct xfrm_kmaddress *k, struct nlattr **attrs, int *num) { struct nlattr *rt = attrs[XFRMA_MIGRATE]; struct xfrm_user_migrate *um; int i, num_migrate; + if (k != NULL) { + struct xfrm_user_kmaddress *uk; + + uk = nla_data(attrs[XFRMA_KMADDRESS]); + memcpy(&k->local, &uk->local, sizeof(k->local)); + memcpy(&k->remote, &uk->remote, sizeof(k->remote)); + k->family = uk->family; + k->reserved = uk->reserved; + } + um = nla_data(rt); num_migrate = nla_len(rt) / sizeof(*um); @@ -1745,6 +1756,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, { struct xfrm_userpolicy_id *pi = nlmsg_data(nlh); struct xfrm_migrate m[XFRM_MAX_DEPTH]; + struct xfrm_kmaddress km, *kmp; u8 type; int err; int n = 0; @@ -1752,19 +1764,20 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; + kmp = attrs[XFRMA_KMADDRESS] ? &km : NULL; + err = copy_from_user_policy_type(&type, attrs); if (err) return err; - err = copy_from_user_migrate((struct xfrm_migrate *)m, - attrs, &n); + err = copy_from_user_migrate((struct xfrm_migrate *)m, kmp, attrs, &n); if (err) return err; if (!n) return 0; - xfrm_migrate(&pi->sel, pi->dir, type, m, n); + xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp); return 0; } @@ -1795,16 +1808,30 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); } -static inline size_t xfrm_migrate_msgsize(int num_migrate) +static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) +{ + struct xfrm_user_kmaddress uk; + + memset(&uk, 0, sizeof(uk)); + uk.family = k->family; + uk.reserved = k->reserved; + memcpy(&uk.local, &k->local, sizeof(uk.local)); + memcpy(&uk.remote, &k->local, sizeof(uk.remote)); + + return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); +} + +static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma) { return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) - + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) - + userpolicy_type_attrsize(); + + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) + + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) + + userpolicy_type_attrsize(); } static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, - int num_migrate, struct xfrm_selector *sel, - u8 dir, u8 type) + int num_migrate, struct xfrm_kmaddress *k, + struct xfrm_selector *sel, u8 dir, u8 type) { struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; @@ -1821,6 +1848,9 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); pol_id->dir = dir; + if (k != NULL && (copy_to_user_kmaddress(k, skb) < 0)) + goto nlmsg_failure; + if (copy_to_user_policy_type(type, skb) < 0) goto nlmsg_failure; @@ -1836,23 +1866,25 @@ nlmsg_failure: } static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { struct sk_buff *skb; - skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate), GFP_ATOMIC); + skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; /* build migrate */ - if (build_migrate(skb, m, num_migrate, sel, dir, type) < 0) + if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) BUG(); return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); } #else static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } @@ -1901,6 +1933,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) }, [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, + [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, }; static struct xfrm_link { -- cgit v1.2.1 From 554794de7949d1a6279336404c066f974d4c2bde Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 6 Oct 2008 09:54:39 -0700 Subject: pkt_sched: Fix handling of gso skbs on requeuing Jay Cliburn noticed and diagnosed a bug triggered in dev_gso_skb_destructor() after last change from qdisc->gso_skb to qdisc->requeue list. Since gso_segmented skbs can't be queued to another list this patch brings back qdisc->gso_skb for them. Reported-by: Jay Cliburn Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5e7e0bd38fe8..3db4cf1bd263 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -44,7 +44,10 @@ static inline int qdisc_qlen(struct Qdisc *q) static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - __skb_queue_head(&q->requeue, skb); + if (unlikely(skb->next)) + q->gso_skb = skb; + else + __skb_queue_head(&q->requeue, skb); __netif_schedule(q); return 0; @@ -52,7 +55,10 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { - struct sk_buff *skb = skb_peek(&q->requeue); + struct sk_buff *skb = q->gso_skb; + + if (!skb) + skb = skb_peek(&q->requeue); if (unlikely(skb)) { struct net_device *dev = qdisc_dev(q); @@ -60,10 +66,15 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) /* check the reason of requeuing without tx lock first */ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) - __skb_unlink(skb, &q->requeue); - else + if (!netif_tx_queue_stopped(txq) && + !netif_tx_queue_frozen(txq)) { + if (q->gso_skb) + q->gso_skb = NULL; + else + __skb_unlink(skb, &q->requeue); + } else { skb = NULL; + } } else { skb = q->dequeue(q); } @@ -548,6 +559,7 @@ void qdisc_destroy(struct Qdisc *qdisc) module_put(ops->owner); dev_put(qdisc_dev(qdisc)); + kfree_skb(qdisc->gso_skb); __skb_queue_purge(&qdisc->requeue); kfree((char *) qdisc - qdisc->padded); -- cgit v1.2.1 From 6252352d16f7b45a0fd42224f7e70e0288dc4480 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 6 Oct 2008 10:41:50 -0700 Subject: pkt_sched: Simplify dev_requeue_skb and dequeue_skb qdisc->requeue was planned to universally replace all requeuing code, but at the top level we never requeue more than one skb, so qdisc-> gso_skb is enough for this. qdisc->requeue would be used on the lower levels only for one level deep requeuing (like in sch_hfsc) after finishing all the changes. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 3db4cf1bd263..31f6b614b59b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -44,12 +44,9 @@ static inline int qdisc_qlen(struct Qdisc *q) static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - if (unlikely(skb->next)) - q->gso_skb = skb; - else - __skb_queue_head(&q->requeue, skb); - + q->gso_skb = skb; __netif_schedule(q); + return 0; } @@ -57,24 +54,16 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { struct sk_buff *skb = q->gso_skb; - if (!skb) - skb = skb_peek(&q->requeue); - if (unlikely(skb)) { struct net_device *dev = qdisc_dev(q); struct netdev_queue *txq; /* check the reason of requeuing without tx lock first */ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - if (!netif_tx_queue_stopped(txq) && - !netif_tx_queue_frozen(txq)) { - if (q->gso_skb) - q->gso_skb = NULL; - else - __skb_unlink(skb, &q->requeue); - } else { + if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) + q->gso_skb = NULL; + else skb = NULL; - } } else { skb = q->dequeue(q); } -- cgit v1.2.1 From c7004482e8dcb7c3c72666395cfa98a216a4fb70 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Oct 2008 10:43:54 -0700 Subject: tcp: Respect SO_RCVLOWAT in tcp_poll(). Based upon a report by Vito Caputo. Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1ab341e5d3e0..7d81a1ee5507 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -384,13 +384,17 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Connected? */ if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) { + int target = sock_rcvlowat(sk, 0, INT_MAX); + + if (tp->urg_seq == tp->copied_seq && + !sock_flag(sk, SOCK_URGINLINE) && + tp->urg_data) + target--; + /* Potential race condition. If read of tp below will * escape above sk->sk_state, we can be illegally awaken * in SYN_* states. */ - if ((tp->rcv_nxt != tp->copied_seq) && - (tp->urg_seq != tp->copied_seq || - tp->rcv_nxt != tp->copied_seq + 1 || - sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data)) + if (tp->rcv_nxt - tp->copied_seq >= target) mask |= POLLIN | POLLRDNORM; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { -- cgit v1.2.1