summaryrefslogtreecommitdiff
path: root/include/linux/skbuff.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r--include/linux/skbuff.h544
1 files changed, 381 insertions, 163 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index abde271c18ae..85ab7d72b54c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -20,6 +20,8 @@
#include <linux/time.h>
#include <linux/bug.h>
#include <linux/cache.h>
+#include <linux/rbtree.h>
+#include <linux/socket.h>
#include <linux/atomic.h>
#include <asm/types.h>
@@ -28,7 +30,6 @@
#include <linux/textsearch.h>
#include <net/checksum.h>
#include <linux/rcupdate.h>
-#include <linux/dmaengine.h>
#include <linux/hrtimer.h>
#include <linux/dma-mapping.h>
#include <linux/netdev_features.h>
@@ -47,11 +48,29 @@
*
* The hardware you're dealing with doesn't calculate the full checksum
* (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums
- * for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will
- * set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still
- * undefined in this case though. It is a bad option, but, unfortunately,
- * nowadays most vendors do this. Apparently with the secret goal to sell
- * you new devices, when you will add new protocol to your host, f.e. IPv6 8)
+ * for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY
+ * if their checksums are okay. skb->csum is still undefined in this case
+ * though. It is a bad option, but, unfortunately, nowadays most vendors do
+ * this. Apparently with the secret goal to sell you new devices, when you
+ * will add new protocol to your host, f.e. IPv6 8)
+ *
+ * CHECKSUM_UNNECESSARY is applicable to following protocols:
+ * TCP: IPv6 and IPv4.
+ * UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a
+ * zero UDP checksum for either IPv4 or IPv6, the networking stack
+ * may perform further validation in this case.
+ * GRE: only if the checksum is present in the header.
+ * SCTP: indicates the CRC in SCTP header has been validated.
+ *
+ * skb->csum_level indicates the number of consecutive checksums found in
+ * the packet minus one that have been verified as CHECKSUM_UNNECESSARY.
+ * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet
+ * and a device is able to verify the checksums for UDP (possibly zero),
+ * GRE (checksum flag is set), and TCP-- skb->csum_level would be set to
+ * two. If the device were only able to verify the UDP checksum and not
+ * GRE, either because it doesn't support GRE checksum of because GRE
+ * checksum is bad, skb->csum_level would be set to zero (TCP checksum is
+ * not considered in this case).
*
* CHECKSUM_COMPLETE:
*
@@ -112,6 +131,9 @@
#define CHECKSUM_COMPLETE 2
#define CHECKSUM_PARTIAL 3
+/* Maximum value in skb->csum_level */
+#define SKB_MAX_CSUM_LEVEL 3
+
#define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES)
#define SKB_WITH_OVERHEAD(X) \
((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
@@ -128,6 +150,8 @@
struct net_device;
struct scatterlist;
struct pipe_inode_info;
+struct iov_iter;
+struct napi_struct;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack {
@@ -135,7 +159,7 @@ struct nf_conntrack {
};
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info {
atomic_t use;
unsigned int mask;
@@ -318,9 +342,9 @@ struct skb_shared_info {
enum {
- SKB_FCLONE_UNAVAILABLE,
- SKB_FCLONE_ORIG,
- SKB_FCLONE_CLONE,
+ SKB_FCLONE_UNAVAILABLE, /* skb has no fclone (from head_cache) */
+ SKB_FCLONE_ORIG, /* orig skb (from fclone_cache) */
+ SKB_FCLONE_CLONE, /* companion fclone skb (from fclone_cache) */
};
enum {
@@ -349,8 +373,7 @@ enum {
SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
- SKB_GSO_MPLS = 1 << 12,
-
+ SKB_GSO_TUNNEL_REMCSUM = 1 << 12,
};
#if BITS_PER_LONG > 32
@@ -419,6 +442,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
* @next: Next buffer in list
* @prev: Previous buffer in list
* @tstamp: Time we arrived/left
+ * @rbnode: RB tree node, alternative to next/prev for netem/tcp
* @sk: Socket we are owned by
* @dev: Device we arrived on/are leaving by
* @cb: Control buffer. Free for use by every layer. Put private vars here
@@ -452,6 +476,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
* @tc_verd: traffic control verdict
* @hash: the packet hash
* @queue_mapping: Queue mapping for multiqueue devices
+ * @xmit_more: More SKBs are pending for this queue
* @ndisc_nodetype: router type (from link layer)
* @ooo_okay: allow the mapping of a socket to a queue to be changed
* @l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -460,8 +485,6 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
* @wifi_acked_valid: wifi_acked was set
* @wifi_acked: whether frame was acked on wifi or not
* @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
- * @dma_cookie: a cookie to one of several possible DMA operations
- * done by skb DMA functions
* @napi_id: id of the NAPI struct this skb came from
* @secmark: security marking
* @mark: Generic packet mark
@@ -484,15 +507,19 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
*/
struct sk_buff {
- /* These two members must be first. */
- struct sk_buff *next;
- struct sk_buff *prev;
-
union {
- ktime_t tstamp;
- struct skb_mstamp skb_mstamp;
+ struct {
+ /* These two members must be first. */
+ struct sk_buff *next;
+ struct sk_buff *prev;
+
+ union {
+ ktime_t tstamp;
+ struct skb_mstamp skb_mstamp;
+ };
+ };
+ struct rb_node rbnode; /* used in netem & tcp stack */
};
-
struct sock *sk;
struct net_device *dev;
@@ -505,87 +532,102 @@ struct sk_buff {
char cb[48] __aligned(8);
unsigned long _skb_refdst;
+ void (*destructor)(struct sk_buff *skb);
#ifdef CONFIG_XFRM
struct sec_path *sp;
#endif
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ struct nf_conntrack *nfct;
+#endif
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ struct nf_bridge_info *nf_bridge;
+#endif
unsigned int len,
data_len;
__u16 mac_len,
hdr_len;
- union {
- __wsum csum;
- struct {
- __u16 csum_start;
- __u16 csum_offset;
- };
- };
- __u32 priority;
+
+ /* Following fields are _not_ copied in __copy_skb_header()
+ * Note that queue_mapping is here mostly to fill a hole.
+ */
kmemcheck_bitfield_begin(flags1);
- __u8 ignore_df:1,
- cloned:1,
- ip_summed:2,
+ __u16 queue_mapping;
+ __u8 cloned:1,
nohdr:1,
- nfctinfo:3;
- __u8 pkt_type:3,
fclone:2,
- ipvs_property:1,
peeked:1,
- nf_trace:1;
+ head_frag:1,
+ xmit_more:1;
+ /* one bit hole */
kmemcheck_bitfield_end(flags1);
- __be16 protocol;
- void (*destructor)(struct sk_buff *skb);
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
- struct nf_conntrack *nfct;
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
- struct nf_bridge_info *nf_bridge;
-#endif
-
- int skb_iif;
-
- __u32 hash;
-
- __be16 vlan_proto;
- __u16 vlan_tci;
+ /* fields enclosed in headers_start/headers_end are copied
+ * using a single memcpy() in __copy_skb_header()
+ */
+ /* private: */
+ __u32 headers_start[0];
+ /* public: */
-#ifdef CONFIG_NET_SCHED
- __u16 tc_index; /* traffic control index */
-#ifdef CONFIG_NET_CLS_ACT
- __u16 tc_verd; /* traffic control verdict */
-#endif
+/* if you move pkt_type around you also must adapt those constants */
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_TYPE_MAX (7 << 5)
+#else
+#define PKT_TYPE_MAX 7
#endif
+#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset)
- __u16 queue_mapping;
- kmemcheck_bitfield_begin(flags2);
-#ifdef CONFIG_IPV6_NDISC_NODETYPE
- __u8 ndisc_nodetype:2;
-#endif
+ __u8 __pkt_type_offset[0];
+ __u8 pkt_type:3;
__u8 pfmemalloc:1;
+ __u8 ignore_df:1;
+ __u8 nfctinfo:3;
+
+ __u8 nf_trace:1;
+ __u8 ip_summed:2;
__u8 ooo_okay:1;
__u8 l4_hash:1;
__u8 sw_hash:1;
__u8 wifi_acked_valid:1;
__u8 wifi_acked:1;
+
__u8 no_fcs:1;
- __u8 head_frag:1;
- /* Encapsulation protocol and NIC drivers should use
- * this flag to indicate to each other if the skb contains
- * encapsulated packet or not and maybe use the inner packet
- * headers if needed
- */
+ /* Indicates the inner headers are valid in the skbuff. */
__u8 encapsulation:1;
__u8 encap_hdr_csum:1;
__u8 csum_valid:1;
__u8 csum_complete_sw:1;
- /* 2/4 bit hole (depending on ndisc_nodetype presence) */
- kmemcheck_bitfield_end(flags2);
+ __u8 csum_level:2;
+ __u8 csum_bad:1;
+
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+ __u8 ndisc_nodetype:2;
+#endif
+ __u8 ipvs_property:1;
+ __u8 inner_protocol_type:1;
+ __u8 remcsum_offload:1;
+ /* 3 or 5 bit hole */
+
+#ifdef CONFIG_NET_SCHED
+ __u16 tc_index; /* traffic control index */
+#ifdef CONFIG_NET_CLS_ACT
+ __u16 tc_verd; /* traffic control verdict */
+#endif
+#endif
-#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
union {
- unsigned int napi_id;
- dma_cookie_t dma_cookie;
+ __wsum csum;
+ struct {
+ __u16 csum_start;
+ __u16 csum_offset;
+ };
};
+ __u32 priority;
+ int skb_iif;
+ __u32 hash;
+ __be16 vlan_proto;
+ __u16 vlan_tci;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ unsigned int napi_id;
#endif
#ifdef CONFIG_NETWORK_SECMARK
__u32 secmark;
@@ -596,13 +638,24 @@ struct sk_buff {
__u32 reserved_tailroom;
};
- __be16 inner_protocol;
+ union {
+ __be16 inner_protocol;
+ __u8 inner_ipproto;
+ };
+
__u16 inner_transport_header;
__u16 inner_network_header;
__u16 inner_mac_header;
+
+ __be16 protocol;
__u16 transport_header;
__u16 network_header;
__u16 mac_header;
+
+ /* private: */
+ __u32 headers_end[0];
+ /* public: */
+
/* These elements must be at the end, see alloc_skb() for details. */
sk_buff_data_t tail;
sk_buff_data_t end;
@@ -621,6 +674,7 @@ struct sk_buff {
#define SKB_ALLOC_FCLONE 0x01
#define SKB_ALLOC_RX 0x02
+#define SKB_ALLOC_NAPI 0x04
/* Returns true if the skb was allocated from PFMEMALLOC reserves */
static inline bool skb_pfmemalloc(const struct sk_buff *skb)
@@ -665,9 +719,6 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
skb->_skb_refdst = (unsigned long)dst;
}
-void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
- bool force);
-
/**
* skb_dst_set_noref - sets skb dst, hopefully, without taking reference
* @skb: buffer
@@ -680,24 +731,8 @@ void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
*/
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
- __skb_dst_set_noref(skb, dst, false);
-}
-
-/**
- * skb_dst_set_noref_force - sets skb dst, without taking reference
- * @skb: buffer
- * @dst: dst entry
- *
- * Sets skb dst, assuming a reference was not taken on dst.
- * No reference is taken and no dst_release will be called. While for
- * cached dsts deferred reclaim is a basic feature, for entries that are
- * not cached it is caller's job to guarantee that last dst_release for
- * provided dst happens when nobody uses it, eg. after a RCU grace period.
- */
-static inline void skb_dst_set_noref_force(struct sk_buff *skb,
- struct dst_entry *dst)
-{
- __skb_dst_set_noref(skb, dst, true);
+ WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+ skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
}
/**
@@ -734,6 +769,41 @@ static inline struct sk_buff *alloc_skb(unsigned int size,
return __alloc_skb(size, priority, 0, NUMA_NO_NODE);
}
+struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
+ unsigned long data_len,
+ int max_page_order,
+ int *errcode,
+ gfp_t gfp_mask);
+
+/* Layout of fast clones : [skb1][skb2][fclone_ref] */
+struct sk_buff_fclones {
+ struct sk_buff skb1;
+
+ struct sk_buff skb2;
+
+ atomic_t fclone_ref;
+};
+
+/**
+ * skb_fclone_busy - check if fclone is busy
+ * @skb: buffer
+ *
+ * Returns true is skb is a fast clone, and its clone is not freed.
+ * Some drivers call skb_orphan() in their ndo_start_xmit(),
+ * so we also check that this didnt happen.
+ */
+static inline bool skb_fclone_busy(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const struct sk_buff_fclones *fclones;
+
+ fclones = container_of(skb, struct sk_buff_fclones, skb1);
+
+ return skb->fclone == SKB_FCLONE_ORIG &&
+ atomic_read(&fclones->fclone_ref) > 1 &&
+ fclones->skb2.sk == sk;
+}
+
static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
gfp_t priority)
{
@@ -1042,6 +1112,7 @@ static inline int skb_header_cloned(const struct sk_buff *skb)
* Drop a reference to the header part of the buffer. This is done
* by acquiring a payload reference. You must not read from the header
* part of skb->data after this.
+ * Note : Check if you can use __skb_header_release() instead.
*/
static inline void skb_header_release(struct sk_buff *skb)
{
@@ -1051,6 +1122,20 @@ static inline void skb_header_release(struct sk_buff *skb)
}
/**
+ * __skb_header_release - release reference to header
+ * @skb: buffer to operate on
+ *
+ * Variant of skb_header_release() assuming skb is private to caller.
+ * We can avoid one atomic operation.
+ */
+static inline void __skb_header_release(struct sk_buff *skb)
+{
+ skb->nohdr = 1;
+ atomic_set(&skb_shinfo(skb)->dataref, 1 + (1 << SKB_DATAREF_SHIFT));
+}
+
+
+/**
* skb_shared - is the buffer shared
* @skb: buffer to check
*
@@ -1116,7 +1201,12 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
might_sleep_if(pri & __GFP_WAIT);
if (skb_cloned(skb)) {
struct sk_buff *nskb = skb_copy(skb, pri);
- kfree_skb(skb); /* Free our shared copy */
+
+ /* Free our shared copy */
+ if (likely(nskb))
+ consume_skb(skb);
+ else
+ kfree_skb(skb);
skb = nskb;
}
return skb;
@@ -1675,6 +1765,23 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
skb->tail += len;
}
+#define ENCAP_TYPE_ETHER 0
+#define ENCAP_TYPE_IPPROTO 1
+
+static inline void skb_set_inner_protocol(struct sk_buff *skb,
+ __be16 protocol)
+{
+ skb->inner_protocol = protocol;
+ skb->inner_protocol_type = ENCAP_TYPE_ETHER;
+}
+
+static inline void skb_set_inner_ipproto(struct sk_buff *skb,
+ __u8 ipproto)
+{
+ skb->inner_ipproto = ipproto;
+ skb->inner_protocol_type = ENCAP_TYPE_IPPROTO;
+}
+
static inline void skb_reset_inner_headers(struct sk_buff *skb)
{
skb->inner_mac_header = skb->mac_header;
@@ -1860,18 +1967,6 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
return pskb_may_pull(skb, skb_network_offset(skb) + len);
}
-static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb)
-{
- /* Only continue with checksum unnecessary if device indicated
- * it is valid across encapsulation (skb->encapsulation was set).
- */
- if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation)
- skb->ip_summed = CHECKSUM_NONE;
-
- skb->encapsulation = 0;
- skb->csum_valid = 0;
-}
-
/*
* CPUs often take a performance hit when accessing unaligned memory
* locations. The actual performance hit varies, it can be small if the
@@ -2071,47 +2166,61 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
}
+void *napi_alloc_frag(unsigned int fragsz);
+struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
+ unsigned int length, gfp_t gfp_mask);
+static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
+ unsigned int length)
+{
+ return __napi_alloc_skb(napi, length, GFP_ATOMIC);
+}
+
/**
- * __skb_alloc_pages - allocate pages for ps-rx on a skb and preserve pfmemalloc data
- * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
- * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
- * @order: size of the allocation
+ * __dev_alloc_pages - allocate page for network Rx
+ * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
+ * @order: size of the allocation
*
- * Allocate a new page.
+ * Allocate a new page.
*
- * %NULL is returned if there is no free memory.
+ * %NULL is returned if there is no free memory.
*/
-static inline struct page *__skb_alloc_pages(gfp_t gfp_mask,
- struct sk_buff *skb,
- unsigned int order)
-{
- struct page *page;
-
- gfp_mask |= __GFP_COLD;
-
- if (!(gfp_mask & __GFP_NOMEMALLOC))
- gfp_mask |= __GFP_MEMALLOC;
+static inline struct page *__dev_alloc_pages(gfp_t gfp_mask,
+ unsigned int order)
+{
+ /* This piece of code contains several assumptions.
+ * 1. This is for device Rx, therefor a cold page is preferred.
+ * 2. The expectation is the user wants a compound page.
+ * 3. If requesting a order 0 page it will not be compound
+ * due to the check to see if order has a value in prep_new_page
+ * 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to
+ * code in gfp_to_alloc_flags that should be enforcing this.
+ */
+ gfp_mask |= __GFP_COLD | __GFP_COMP | __GFP_MEMALLOC;
- page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
- if (skb && page && page->pfmemalloc)
- skb->pfmemalloc = true;
+ return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
+}
- return page;
+static inline struct page *dev_alloc_pages(unsigned int order)
+{
+ return __dev_alloc_pages(GFP_ATOMIC, order);
}
/**
- * __skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data
- * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
- * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
+ * __dev_alloc_page - allocate a page for network Rx
+ * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
*
- * Allocate a new page.
+ * Allocate a new page.
*
- * %NULL is returned if there is no free memory.
+ * %NULL is returned if there is no free memory.
*/
-static inline struct page *__skb_alloc_page(gfp_t gfp_mask,
- struct sk_buff *skb)
+static inline struct page *__dev_alloc_page(gfp_t gfp_mask)
+{
+ return __dev_alloc_pages(gfp_mask, 0);
+}
+
+static inline struct page *dev_alloc_page(void)
{
- return __skb_alloc_pages(gfp_mask, skb, 0);
+ return __dev_alloc_page(GFP_ATOMIC);
}
/**
@@ -2343,7 +2452,6 @@ static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom)
* is untouched. Otherwise it is extended. Returns zero on
* success. The skb is freed on error.
*/
-
static inline int skb_padto(struct sk_buff *skb, unsigned int len)
{
unsigned int size = skb->len;
@@ -2352,6 +2460,29 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len)
return skb_pad(skb, len - size);
}
+/**
+ * skb_put_padto - increase size and pad an skbuff up to a minimal size
+ * @skb: buffer to pad
+ * @len: minimal length
+ *
+ * Pads up a buffer to ensure the trailing bytes exist and are
+ * blanked. If the buffer already contains sufficient data it
+ * is untouched. Otherwise it is extended. Returns zero on
+ * success. The skb is freed on error.
+ */
+static inline int skb_put_padto(struct sk_buff *skb, unsigned int len)
+{
+ unsigned int size = skb->len;
+
+ if (unlikely(size < len)) {
+ len -= size;
+ if (skb_pad(skb, len))
+ return -ENOMEM;
+ __skb_put(skb, len);
+ }
+ return 0;
+}
+
static inline int skb_add_data(struct sk_buff *skb,
char __user *from, int copy)
{
@@ -2524,18 +2655,18 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
int *err);
unsigned int datagram_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
-int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
- struct iovec *to, int size);
-int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
- struct iovec *iov);
-int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
- const struct iovec *from, int from_offset,
- int len);
-int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm,
- int offset, size_t count);
-int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset,
- const struct iovec *to, int to_offset,
- int size);
+int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
+ struct iov_iter *to, int size);
+static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
+ struct msghdr *msg, int size)
+{
+ return skb_copy_datagram_iter(from, offset, &msg->msg_iter, size);
+}
+int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
+ struct msghdr *msg);
+int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
+ struct iov_iter *from, int len);
+int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
@@ -2556,6 +2687,20 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet);
unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
+int skb_ensure_writable(struct sk_buff *skb, int write_len);
+int skb_vlan_pop(struct sk_buff *skb);
+int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
+
+static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
+{
+ /* XXX: stripping const */
+ return memcpy_fromiovec(data, (struct iovec *)msg->msg_iter.iov, len);
+}
+
+static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
+{
+ return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT;
+}
struct skb_checksum_ops {
__wsum (*update)(const void *mem, int len, __wsum wsum);
@@ -2567,20 +2712,26 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
__wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
__wsum csum);
-static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
- int len, void *buffer)
+static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset,
+ int len, void *data, int hlen, void *buffer)
{
- int hlen = skb_headlen(skb);
-
if (hlen - offset >= len)
- return skb->data + offset;
+ return data + offset;
- if (skb_copy_bits(skb, offset, buffer, len) < 0)
+ if (!skb ||
+ skb_copy_bits(skb, offset, buffer, len) < 0)
return NULL;
return buffer;
}
+static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
+ int len, void *buffer)
+{
+ return __skb_header_pointer(skb, offset, len, skb->data,
+ skb_headlen(skb), buffer);
+}
+
/**
* skb_needs_linearize - check if we need to linearize a given skb
* depending on the given device features.
@@ -2671,6 +2822,8 @@ static inline ktime_t net_invalid_timestamp(void)
return ktime_set(0, 0);
}
+struct sk_buff *skb_clone_sk(struct sk_buff *skb);
+
#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
void skb_clone_tx_timestamp(struct sk_buff *skb);
@@ -2786,6 +2939,42 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
0 : __skb_checksum_complete(skb);
}
+static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb)
+{
+ if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ if (skb->csum_level == 0)
+ skb->ip_summed = CHECKSUM_NONE;
+ else
+ skb->csum_level--;
+ }
+}
+
+static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
+{
+ if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+ skb->csum_level++;
+ } else if (skb->ip_summed == CHECKSUM_NONE) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->csum_level = 0;
+ }
+}
+
+static inline void __skb_mark_checksum_bad(struct sk_buff *skb)
+{
+ /* Mark current checksum as bad (typically called from GRO
+ * path). In the case that ip_summed is CHECKSUM_NONE
+ * this must be the first checksum encountered in the packet.
+ * When ip_summed is CHECKSUM_UNNECESSARY, this is the first
+ * checksum after the last one validated. For UDP, a zero
+ * checksum can not be marked as bad.
+ */
+
+ if (skb->ip_summed == CHECKSUM_NONE ||
+ skb->ip_summed == CHECKSUM_UNNECESSARY)
+ skb->csum_bad = 1;
+}
+
/* Check if we need to perform checksum complete validation.
*
* Returns true if checksum complete is needed, false otherwise
@@ -2797,6 +2986,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
{
if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
skb->csum_valid = 1;
+ __skb_decr_checksum_unnecessary(skb);
return false;
}
@@ -2826,6 +3016,9 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
skb->csum_valid = 1;
return 0;
}
+ } else if (skb->csum_bad) {
+ /* ip_summed == CHECKSUM_NONE in this case */
+ return 1;
}
skb->csum = psum;
@@ -2883,6 +3076,26 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
#define skb_checksum_simple_validate(skb) \
__skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
+static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
+{
+ return (skb->ip_summed == CHECKSUM_NONE &&
+ skb->csum_valid && !skb->csum_bad);
+}
+
+static inline void __skb_checksum_convert(struct sk_buff *skb,
+ __sum16 check, __wsum pseudo)
+{
+ skb->csum = ~pseudo;
+ skb->ip_summed = CHECKSUM_COMPLETE;
+}
+
+#define skb_checksum_try_convert(skb, proto, check, compute_pseudo) \
+do { \
+ if (__skb_checksum_convert_check(skb)) \
+ __skb_checksum_convert(skb, check, \
+ compute_pseudo(skb, proto)); \
+} while (0)
+
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
void nf_conntrack_destroy(struct nf_conntrack *nfct);
static inline void nf_conntrack_put(struct nf_conntrack *nfct)
@@ -2896,7 +3109,7 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
atomic_inc(&nfct->use);
}
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
{
if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
@@ -2914,7 +3127,7 @@ static inline void nf_reset(struct sk_buff *skb)
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(skb->nf_bridge);
skb->nf_bridge = NULL;
#endif
@@ -2928,19 +3141,22 @@ static inline void nf_reset_trace(struct sk_buff *skb)
}
/* Note: This doesn't put any conntrack and bridge info in dst. */
-static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
+static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
+ bool copy)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
dst->nfct = src->nfct;
nf_conntrack_get(src->nfct);
- dst->nfctinfo = src->nfctinfo;
+ if (copy)
+ dst->nfctinfo = src->nfctinfo;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dst->nf_bridge = src->nf_bridge;
nf_bridge_get(src->nf_bridge);
#endif
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
- dst->nf_trace = src->nf_trace;
+ if (copy)
+ dst->nf_trace = src->nf_trace;
#endif
}
@@ -2949,10 +3165,10 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(dst->nfct);
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(dst->nf_bridge);
#endif
- __nf_copy(dst, src);
+ __nf_copy(dst, src, true);
}
#ifdef CONFIG_NETWORK_SECMARK
@@ -3137,7 +3353,9 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
int skb_checksum_setup(struct sk_buff *skb, bool recalculate);
-u32 __skb_get_poff(const struct sk_buff *skb);
+u32 skb_get_poff(const struct sk_buff *skb);
+u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+ const struct flow_keys *keys, int hlen);
/**
* skb_head_is_locked - Determine if the skb->head is locked down