diff options
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r-- | include/linux/skbuff.h | 544 |
1 files changed, 381 insertions, 163 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index abde271c18ae..85ab7d72b54c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -20,6 +20,8 @@ #include <linux/time.h> #include <linux/bug.h> #include <linux/cache.h> +#include <linux/rbtree.h> +#include <linux/socket.h> #include <linux/atomic.h> #include <asm/types.h> @@ -28,7 +30,6 @@ #include <linux/textsearch.h> #include <net/checksum.h> #include <linux/rcupdate.h> -#include <linux/dmaengine.h> #include <linux/hrtimer.h> #include <linux/dma-mapping.h> #include <linux/netdev_features.h> @@ -47,11 +48,29 @@ * * The hardware you're dealing with doesn't calculate the full checksum * (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums - * for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will - * set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still - * undefined in this case though. It is a bad option, but, unfortunately, - * nowadays most vendors do this. Apparently with the secret goal to sell - * you new devices, when you will add new protocol to your host, f.e. IPv6 8) + * for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY + * if their checksums are okay. skb->csum is still undefined in this case + * though. It is a bad option, but, unfortunately, nowadays most vendors do + * this. Apparently with the secret goal to sell you new devices, when you + * will add new protocol to your host, f.e. IPv6 8) + * + * CHECKSUM_UNNECESSARY is applicable to following protocols: + * TCP: IPv6 and IPv4. + * UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a + * zero UDP checksum for either IPv4 or IPv6, the networking stack + * may perform further validation in this case. + * GRE: only if the checksum is present in the header. + * SCTP: indicates the CRC in SCTP header has been validated. + * + * skb->csum_level indicates the number of consecutive checksums found in + * the packet minus one that have been verified as CHECKSUM_UNNECESSARY. + * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet + * and a device is able to verify the checksums for UDP (possibly zero), + * GRE (checksum flag is set), and TCP-- skb->csum_level would be set to + * two. If the device were only able to verify the UDP checksum and not + * GRE, either because it doesn't support GRE checksum of because GRE + * checksum is bad, skb->csum_level would be set to zero (TCP checksum is + * not considered in this case). * * CHECKSUM_COMPLETE: * @@ -112,6 +131,9 @@ #define CHECKSUM_COMPLETE 2 #define CHECKSUM_PARTIAL 3 +/* Maximum value in skb->csum_level */ +#define SKB_MAX_CSUM_LEVEL 3 + #define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES) #define SKB_WITH_OVERHEAD(X) \ ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) @@ -128,6 +150,8 @@ struct net_device; struct scatterlist; struct pipe_inode_info; +struct iov_iter; +struct napi_struct; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack { @@ -135,7 +159,7 @@ struct nf_conntrack { }; #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { atomic_t use; unsigned int mask; @@ -318,9 +342,9 @@ struct skb_shared_info { enum { - SKB_FCLONE_UNAVAILABLE, - SKB_FCLONE_ORIG, - SKB_FCLONE_CLONE, + SKB_FCLONE_UNAVAILABLE, /* skb has no fclone (from head_cache) */ + SKB_FCLONE_ORIG, /* orig skb (from fclone_cache) */ + SKB_FCLONE_CLONE, /* companion fclone skb (from fclone_cache) */ }; enum { @@ -349,8 +373,7 @@ enum { SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, - SKB_GSO_MPLS = 1 << 12, - + SKB_GSO_TUNNEL_REMCSUM = 1 << 12, }; #if BITS_PER_LONG > 32 @@ -419,6 +442,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @next: Next buffer in list * @prev: Previous buffer in list * @tstamp: Time we arrived/left + * @rbnode: RB tree node, alternative to next/prev for netem/tcp * @sk: Socket we are owned by * @dev: Device we arrived on/are leaving by * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -452,6 +476,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @tc_verd: traffic control verdict * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices + * @xmit_more: More SKBs are pending for this queue * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -460,8 +485,6 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS - * @dma_cookie: a cookie to one of several possible DMA operations - * done by skb DMA functions * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking * @mark: Generic packet mark @@ -484,15 +507,19 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, */ struct sk_buff { - /* These two members must be first. */ - struct sk_buff *next; - struct sk_buff *prev; - union { - ktime_t tstamp; - struct skb_mstamp skb_mstamp; + struct { + /* These two members must be first. */ + struct sk_buff *next; + struct sk_buff *prev; + + union { + ktime_t tstamp; + struct skb_mstamp skb_mstamp; + }; + }; + struct rb_node rbnode; /* used in netem & tcp stack */ }; - struct sock *sk; struct net_device *dev; @@ -505,87 +532,102 @@ struct sk_buff { char cb[48] __aligned(8); unsigned long _skb_refdst; + void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_XFRM struct sec_path *sp; #endif +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + struct nf_conntrack *nfct; +#endif +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + struct nf_bridge_info *nf_bridge; +#endif unsigned int len, data_len; __u16 mac_len, hdr_len; - union { - __wsum csum; - struct { - __u16 csum_start; - __u16 csum_offset; - }; - }; - __u32 priority; + + /* Following fields are _not_ copied in __copy_skb_header() + * Note that queue_mapping is here mostly to fill a hole. + */ kmemcheck_bitfield_begin(flags1); - __u8 ignore_df:1, - cloned:1, - ip_summed:2, + __u16 queue_mapping; + __u8 cloned:1, nohdr:1, - nfctinfo:3; - __u8 pkt_type:3, fclone:2, - ipvs_property:1, peeked:1, - nf_trace:1; + head_frag:1, + xmit_more:1; + /* one bit hole */ kmemcheck_bitfield_end(flags1); - __be16 protocol; - void (*destructor)(struct sk_buff *skb); -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - struct nf_conntrack *nfct; -#endif -#ifdef CONFIG_BRIDGE_NETFILTER - struct nf_bridge_info *nf_bridge; -#endif - - int skb_iif; - - __u32 hash; - - __be16 vlan_proto; - __u16 vlan_tci; + /* fields enclosed in headers_start/headers_end are copied + * using a single memcpy() in __copy_skb_header() + */ + /* private: */ + __u32 headers_start[0]; + /* public: */ -#ifdef CONFIG_NET_SCHED - __u16 tc_index; /* traffic control index */ -#ifdef CONFIG_NET_CLS_ACT - __u16 tc_verd; /* traffic control verdict */ -#endif +/* if you move pkt_type around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_TYPE_MAX (7 << 5) +#else +#define PKT_TYPE_MAX 7 #endif +#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) - __u16 queue_mapping; - kmemcheck_bitfield_begin(flags2); -#ifdef CONFIG_IPV6_NDISC_NODETYPE - __u8 ndisc_nodetype:2; -#endif + __u8 __pkt_type_offset[0]; + __u8 pkt_type:3; __u8 pfmemalloc:1; + __u8 ignore_df:1; + __u8 nfctinfo:3; + + __u8 nf_trace:1; + __u8 ip_summed:2; __u8 ooo_okay:1; __u8 l4_hash:1; __u8 sw_hash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; + __u8 no_fcs:1; - __u8 head_frag:1; - /* Encapsulation protocol and NIC drivers should use - * this flag to indicate to each other if the skb contains - * encapsulated packet or not and maybe use the inner packet - * headers if needed - */ + /* Indicates the inner headers are valid in the skbuff. */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; __u8 csum_valid:1; __u8 csum_complete_sw:1; - /* 2/4 bit hole (depending on ndisc_nodetype presence) */ - kmemcheck_bitfield_end(flags2); + __u8 csum_level:2; + __u8 csum_bad:1; + +#ifdef CONFIG_IPV6_NDISC_NODETYPE + __u8 ndisc_nodetype:2; +#endif + __u8 ipvs_property:1; + __u8 inner_protocol_type:1; + __u8 remcsum_offload:1; + /* 3 or 5 bit hole */ + +#ifdef CONFIG_NET_SCHED + __u16 tc_index; /* traffic control index */ +#ifdef CONFIG_NET_CLS_ACT + __u16 tc_verd; /* traffic control verdict */ +#endif +#endif -#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL union { - unsigned int napi_id; - dma_cookie_t dma_cookie; + __wsum csum; + struct { + __u16 csum_start; + __u16 csum_offset; + }; }; + __u32 priority; + int skb_iif; + __u32 hash; + __be16 vlan_proto; + __u16 vlan_tci; +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned int napi_id; #endif #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; @@ -596,13 +638,24 @@ struct sk_buff { __u32 reserved_tailroom; }; - __be16 inner_protocol; + union { + __be16 inner_protocol; + __u8 inner_ipproto; + }; + __u16 inner_transport_header; __u16 inner_network_header; __u16 inner_mac_header; + + __be16 protocol; __u16 transport_header; __u16 network_header; __u16 mac_header; + + /* private: */ + __u32 headers_end[0]; + /* public: */ + /* These elements must be at the end, see alloc_skb() for details. */ sk_buff_data_t tail; sk_buff_data_t end; @@ -621,6 +674,7 @@ struct sk_buff { #define SKB_ALLOC_FCLONE 0x01 #define SKB_ALLOC_RX 0x02 +#define SKB_ALLOC_NAPI 0x04 /* Returns true if the skb was allocated from PFMEMALLOC reserves */ static inline bool skb_pfmemalloc(const struct sk_buff *skb) @@ -665,9 +719,6 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) skb->_skb_refdst = (unsigned long)dst; } -void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, - bool force); - /** * skb_dst_set_noref - sets skb dst, hopefully, without taking reference * @skb: buffer @@ -680,24 +731,8 @@ void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, */ static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { - __skb_dst_set_noref(skb, dst, false); -} - -/** - * skb_dst_set_noref_force - sets skb dst, without taking reference - * @skb: buffer - * @dst: dst entry - * - * Sets skb dst, assuming a reference was not taken on dst. - * No reference is taken and no dst_release will be called. While for - * cached dsts deferred reclaim is a basic feature, for entries that are - * not cached it is caller's job to guarantee that last dst_release for - * provided dst happens when nobody uses it, eg. after a RCU grace period. - */ -static inline void skb_dst_set_noref_force(struct sk_buff *skb, - struct dst_entry *dst) -{ - __skb_dst_set_noref(skb, dst, true); + WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } /** @@ -734,6 +769,41 @@ static inline struct sk_buff *alloc_skb(unsigned int size, return __alloc_skb(size, priority, 0, NUMA_NO_NODE); } +struct sk_buff *alloc_skb_with_frags(unsigned long header_len, + unsigned long data_len, + int max_page_order, + int *errcode, + gfp_t gfp_mask); + +/* Layout of fast clones : [skb1][skb2][fclone_ref] */ +struct sk_buff_fclones { + struct sk_buff skb1; + + struct sk_buff skb2; + + atomic_t fclone_ref; +}; + +/** + * skb_fclone_busy - check if fclone is busy + * @skb: buffer + * + * Returns true is skb is a fast clone, and its clone is not freed. + * Some drivers call skb_orphan() in their ndo_start_xmit(), + * so we also check that this didnt happen. + */ +static inline bool skb_fclone_busy(const struct sock *sk, + const struct sk_buff *skb) +{ + const struct sk_buff_fclones *fclones; + + fclones = container_of(skb, struct sk_buff_fclones, skb1); + + return skb->fclone == SKB_FCLONE_ORIG && + atomic_read(&fclones->fclone_ref) > 1 && + fclones->skb2.sk == sk; +} + static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { @@ -1042,6 +1112,7 @@ static inline int skb_header_cloned(const struct sk_buff *skb) * Drop a reference to the header part of the buffer. This is done * by acquiring a payload reference. You must not read from the header * part of skb->data after this. + * Note : Check if you can use __skb_header_release() instead. */ static inline void skb_header_release(struct sk_buff *skb) { @@ -1051,6 +1122,20 @@ static inline void skb_header_release(struct sk_buff *skb) } /** + * __skb_header_release - release reference to header + * @skb: buffer to operate on + * + * Variant of skb_header_release() assuming skb is private to caller. + * We can avoid one atomic operation. + */ +static inline void __skb_header_release(struct sk_buff *skb) +{ + skb->nohdr = 1; + atomic_set(&skb_shinfo(skb)->dataref, 1 + (1 << SKB_DATAREF_SHIFT)); +} + + +/** * skb_shared - is the buffer shared * @skb: buffer to check * @@ -1116,7 +1201,12 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb, might_sleep_if(pri & __GFP_WAIT); if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, pri); - kfree_skb(skb); /* Free our shared copy */ + + /* Free our shared copy */ + if (likely(nskb)) + consume_skb(skb); + else + kfree_skb(skb); skb = nskb; } return skb; @@ -1675,6 +1765,23 @@ static inline void skb_reserve(struct sk_buff *skb, int len) skb->tail += len; } +#define ENCAP_TYPE_ETHER 0 +#define ENCAP_TYPE_IPPROTO 1 + +static inline void skb_set_inner_protocol(struct sk_buff *skb, + __be16 protocol) +{ + skb->inner_protocol = protocol; + skb->inner_protocol_type = ENCAP_TYPE_ETHER; +} + +static inline void skb_set_inner_ipproto(struct sk_buff *skb, + __u8 ipproto) +{ + skb->inner_ipproto = ipproto; + skb->inner_protocol_type = ENCAP_TYPE_IPPROTO; +} + static inline void skb_reset_inner_headers(struct sk_buff *skb) { skb->inner_mac_header = skb->mac_header; @@ -1860,18 +1967,6 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) return pskb_may_pull(skb, skb_network_offset(skb) + len); } -static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb) -{ - /* Only continue with checksum unnecessary if device indicated - * it is valid across encapsulation (skb->encapsulation was set). - */ - if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation) - skb->ip_summed = CHECKSUM_NONE; - - skb->encapsulation = 0; - skb->csum_valid = 0; -} - /* * CPUs often take a performance hit when accessing unaligned memory * locations. The actual performance hit varies, it can be small if the @@ -2071,47 +2166,61 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); } +void *napi_alloc_frag(unsigned int fragsz); +struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, + unsigned int length, gfp_t gfp_mask); +static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, + unsigned int length) +{ + return __napi_alloc_skb(napi, length, GFP_ATOMIC); +} + /** - * __skb_alloc_pages - allocate pages for ps-rx on a skb and preserve pfmemalloc data - * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX - * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used - * @order: size of the allocation + * __dev_alloc_pages - allocate page for network Rx + * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx + * @order: size of the allocation * - * Allocate a new page. + * Allocate a new page. * - * %NULL is returned if there is no free memory. + * %NULL is returned if there is no free memory. */ -static inline struct page *__skb_alloc_pages(gfp_t gfp_mask, - struct sk_buff *skb, - unsigned int order) -{ - struct page *page; - - gfp_mask |= __GFP_COLD; - - if (!(gfp_mask & __GFP_NOMEMALLOC)) - gfp_mask |= __GFP_MEMALLOC; +static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, + unsigned int order) +{ + /* This piece of code contains several assumptions. + * 1. This is for device Rx, therefor a cold page is preferred. + * 2. The expectation is the user wants a compound page. + * 3. If requesting a order 0 page it will not be compound + * due to the check to see if order has a value in prep_new_page + * 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to + * code in gfp_to_alloc_flags that should be enforcing this. + */ + gfp_mask |= __GFP_COLD | __GFP_COMP | __GFP_MEMALLOC; - page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); - if (skb && page && page->pfmemalloc) - skb->pfmemalloc = true; + return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); +} - return page; +static inline struct page *dev_alloc_pages(unsigned int order) +{ + return __dev_alloc_pages(GFP_ATOMIC, order); } /** - * __skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data - * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX - * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used + * __dev_alloc_page - allocate a page for network Rx + * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx * - * Allocate a new page. + * Allocate a new page. * - * %NULL is returned if there is no free memory. + * %NULL is returned if there is no free memory. */ -static inline struct page *__skb_alloc_page(gfp_t gfp_mask, - struct sk_buff *skb) +static inline struct page *__dev_alloc_page(gfp_t gfp_mask) +{ + return __dev_alloc_pages(gfp_mask, 0); +} + +static inline struct page *dev_alloc_page(void) { - return __skb_alloc_pages(gfp_mask, skb, 0); + return __dev_alloc_page(GFP_ATOMIC); } /** @@ -2343,7 +2452,6 @@ static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom) * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ - static inline int skb_padto(struct sk_buff *skb, unsigned int len) { unsigned int size = skb->len; @@ -2352,6 +2460,29 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) return skb_pad(skb, len - size); } +/** + * skb_put_padto - increase size and pad an skbuff up to a minimal size + * @skb: buffer to pad + * @len: minimal length + * + * Pads up a buffer to ensure the trailing bytes exist and are + * blanked. If the buffer already contains sufficient data it + * is untouched. Otherwise it is extended. Returns zero on + * success. The skb is freed on error. + */ +static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +{ + unsigned int size = skb->len; + + if (unlikely(size < len)) { + len -= size; + if (skb_pad(skb, len)) + return -ENOMEM; + __skb_put(skb, len); + } + return 0; +} + static inline int skb_add_data(struct sk_buff *skb, char __user *from, int copy) { @@ -2524,18 +2655,18 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); -int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, - struct iovec *to, int size); -int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, - struct iovec *iov); -int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, - const struct iovec *from, int from_offset, - int len); -int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm, - int offset, size_t count); -int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset, - const struct iovec *to, int to_offset, - int size); +int skb_copy_datagram_iter(const struct sk_buff *from, int offset, + struct iov_iter *to, int size); +static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, + struct msghdr *msg, int size) +{ + return skb_copy_datagram_iter(from, offset, &msg->msg_iter, size); +} +int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, + struct msghdr *msg); +int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, + struct iov_iter *from, int len); +int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); @@ -2556,6 +2687,20 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); +int skb_ensure_writable(struct sk_buff *skb, int write_len); +int skb_vlan_pop(struct sk_buff *skb); +int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); + +static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) +{ + /* XXX: stripping const */ + return memcpy_fromiovec(data, (struct iovec *)msg->msg_iter.iov, len); +} + +static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) +{ + return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT; +} struct skb_checksum_ops { __wsum (*update)(const void *mem, int len, __wsum wsum); @@ -2567,20 +2712,26 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); -static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *buffer) +static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *data, int hlen, void *buffer) { - int hlen = skb_headlen(skb); - if (hlen - offset >= len) - return skb->data + offset; + return data + offset; - if (skb_copy_bits(skb, offset, buffer, len) < 0) + if (!skb || + skb_copy_bits(skb, offset, buffer, len) < 0) return NULL; return buffer; } +static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *buffer) +{ + return __skb_header_pointer(skb, offset, len, skb->data, + skb_headlen(skb), buffer); +} + /** * skb_needs_linearize - check if we need to linearize a given skb * depending on the given device features. @@ -2671,6 +2822,8 @@ static inline ktime_t net_invalid_timestamp(void) return ktime_set(0, 0); } +struct sk_buff *skb_clone_sk(struct sk_buff *skb); + #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING void skb_clone_tx_timestamp(struct sk_buff *skb); @@ -2786,6 +2939,42 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb) 0 : __skb_checksum_complete(skb); } +static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (skb->csum_level == 0) + skb->ip_summed = CHECKSUM_NONE; + else + skb->csum_level--; + } +} + +static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (skb->csum_level < SKB_MAX_CSUM_LEVEL) + skb->csum_level++; + } else if (skb->ip_summed == CHECKSUM_NONE) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = 0; + } +} + +static inline void __skb_mark_checksum_bad(struct sk_buff *skb) +{ + /* Mark current checksum as bad (typically called from GRO + * path). In the case that ip_summed is CHECKSUM_NONE + * this must be the first checksum encountered in the packet. + * When ip_summed is CHECKSUM_UNNECESSARY, this is the first + * checksum after the last one validated. For UDP, a zero + * checksum can not be marked as bad. + */ + + if (skb->ip_summed == CHECKSUM_NONE || + skb->ip_summed == CHECKSUM_UNNECESSARY) + skb->csum_bad = 1; +} + /* Check if we need to perform checksum complete validation. * * Returns true if checksum complete is needed, false otherwise @@ -2797,6 +2986,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, { if (skb_csum_unnecessary(skb) || (zero_okay && !check)) { skb->csum_valid = 1; + __skb_decr_checksum_unnecessary(skb); return false; } @@ -2826,6 +3016,9 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, skb->csum_valid = 1; return 0; } + } else if (skb->csum_bad) { + /* ip_summed == CHECKSUM_NONE in this case */ + return 1; } skb->csum = psum; @@ -2883,6 +3076,26 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto) #define skb_checksum_simple_validate(skb) \ __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo) +static inline bool __skb_checksum_convert_check(struct sk_buff *skb) +{ + return (skb->ip_summed == CHECKSUM_NONE && + skb->csum_valid && !skb->csum_bad); +} + +static inline void __skb_checksum_convert(struct sk_buff *skb, + __sum16 check, __wsum pseudo) +{ + skb->csum = ~pseudo; + skb->ip_summed = CHECKSUM_COMPLETE; +} + +#define skb_checksum_try_convert(skb, proto, check, compute_pseudo) \ +do { \ + if (__skb_checksum_convert_check(skb)) \ + __skb_checksum_convert(skb, check, \ + compute_pseudo(skb, proto)); \ +} while (0) + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) @@ -2896,7 +3109,7 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct) atomic_inc(&nfct->use); } #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) { if (nf_bridge && atomic_dec_and_test(&nf_bridge->use)) @@ -2914,7 +3127,7 @@ static inline void nf_reset(struct sk_buff *skb) nf_conntrack_put(skb->nfct); skb->nfct = NULL; #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); skb->nf_bridge = NULL; #endif @@ -2928,19 +3141,22 @@ static inline void nf_reset_trace(struct sk_buff *skb) } /* Note: This doesn't put any conntrack and bridge info in dst. */ -static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) +static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, + bool copy) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) dst->nfct = src->nfct; nf_conntrack_get(src->nfct); - dst->nfctinfo = src->nfctinfo; + if (copy) + dst->nfctinfo = src->nfctinfo; #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dst->nf_bridge = src->nf_bridge; nf_bridge_get(src->nf_bridge); #endif #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) - dst->nf_trace = src->nf_trace; + if (copy) + dst->nf_trace = src->nf_trace; #endif } @@ -2949,10 +3165,10 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(dst->nfct); #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(dst->nf_bridge); #endif - __nf_copy(dst, src); + __nf_copy(dst, src, true); } #ifdef CONFIG_NETWORK_SECMARK @@ -3137,7 +3353,9 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); int skb_checksum_setup(struct sk_buff *skb, bool recalculate); -u32 __skb_get_poff(const struct sk_buff *skb); +u32 skb_get_poff(const struct sk_buff *skb); +u32 __skb_get_poff(const struct sk_buff *skb, void *data, + const struct flow_keys *keys, int hlen); /** * skb_head_is_locked - Determine if the skb->head is locked down |