summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Makefile.am23
-rw-r--r--NEWS3
-rw-r--r--build-aux/initial-tab-allowed-files1
-rw-r--r--configure.ac6
-rw-r--r--datapath-windows/include/automake.mk2
-rw-r--r--datapath/.gitignore7
-rw-r--r--datapath/Makefile.am60
-rw-r--r--datapath/Modules.mk58
-rw-r--r--datapath/actions.c1587
-rw-r--r--datapath/compat.h92
-rw-r--r--datapath/conntrack.c2413
-rw-r--r--datapath/conntrack.h113
-rw-r--r--datapath/datapath.c2707
-rw-r--r--datapath/datapath.h283
-rw-r--r--datapath/dp_notify.c102
-rw-r--r--datapath/flow.c972
-rw-r--r--datapath/flow.h297
-rw-r--r--datapath/flow_netlink.c3519
-rw-r--r--datapath/flow_netlink.h85
-rw-r--r--datapath/flow_table.c988
-rw-r--r--datapath/flow_table.h102
-rw-r--r--datapath/linux/.gitignore8
-rw-r--r--datapath/linux/Kbuild.in27
-rw-r--r--datapath/linux/Makefile.in9
-rw-r--r--datapath/linux/Makefile.main.in107
-rw-r--r--datapath/linux/Modules.mk123
-rw-r--r--datapath/linux/compat/build-aux/export-check-allow-list1
-rw-r--r--datapath/linux/compat/dev-openvswitch.c83
-rw-r--r--datapath/linux/compat/dst_cache.c173
-rw-r--r--datapath/linux/compat/exthdrs_core.c129
-rw-r--r--datapath/linux/compat/genetlink-openvswitch.c55
-rw-r--r--datapath/linux/compat/geneve.c1854
-rw-r--r--datapath/linux/compat/gre.c239
-rw-r--r--datapath/linux/compat/gso.c317
-rw-r--r--datapath/linux/compat/gso.h214
-rw-r--r--datapath/linux/compat/include/linux/bug.h13
-rw-r--r--datapath/linux/compat/include/linux/cache.h23
-rw-r--r--datapath/linux/compat/include/linux/compiler-gcc.h20
-rw-r--r--datapath/linux/compat/include/linux/compiler.h26
-rw-r--r--datapath/linux/compat/include/linux/cpumask.h11
-rw-r--r--datapath/linux/compat/include/linux/err.h37
-rw-r--r--datapath/linux/compat/include/linux/etherdevice.h62
-rw-r--r--datapath/linux/compat/include/linux/genetlink.h16
-rw-r--r--datapath/linux/compat/include/linux/if.h6
-rw-r--r--datapath/linux/compat/include/linux/if_ether.h39
-rw-r--r--datapath/linux/compat/include/linux/if_link.h171
-rw-r--r--datapath/linux/compat/include/linux/if_vlan.h306
-rw-r--r--datapath/linux/compat/include/linux/in.h56
-rw-r--r--datapath/linux/compat/include/linux/jiffies.h34
-rw-r--r--datapath/linux/compat/include/linux/kconfig.h49
-rw-r--r--datapath/linux/compat/include/linux/kernel.h39
-rw-r--r--datapath/linux/compat/include/linux/list.h31
-rw-r--r--datapath/linux/compat/include/linux/mm.h44
-rw-r--r--datapath/linux/compat/include/linux/mpls.h40
-rw-r--r--datapath/linux/compat/include/linux/net.h62
-rw-r--r--datapath/linux/compat/include/linux/netdev_features.h77
-rw-r--r--datapath/linux/compat/include/linux/netdevice.h336
-rw-r--r--datapath/linux/compat/include/linux/netfilter.h19
-rw-r--r--datapath/linux/compat/include/linux/netfilter_ipv6.h32
-rw-r--r--datapath/linux/compat/include/linux/netlink.h19
-rw-r--r--datapath/linux/compat/include/linux/overflow.h313
-rw-r--r--datapath/linux/compat/include/linux/percpu.h33
-rw-r--r--datapath/linux/compat/include/linux/random.h17
-rw-r--r--datapath/linux/compat/include/linux/rbtree.h19
-rw-r--r--datapath/linux/compat/include/linux/rculist.h39
-rw-r--r--datapath/linux/compat/include/linux/rcupdate.h41
-rw-r--r--datapath/linux/compat/include/linux/reciprocal_div.h37
-rw-r--r--datapath/linux/compat/include/linux/rtnetlink.h41
-rw-r--r--datapath/linux/compat/include/linux/skbuff.h491
-rw-r--r--datapath/linux/compat/include/linux/static_key.h86
-rw-r--r--datapath/linux/compat/include/linux/stddef.h15
-rw-r--r--datapath/linux/compat/include/linux/timekeeping.h11
-rw-r--r--datapath/linux/compat/include/linux/types.h11
-rw-r--r--datapath/linux/compat/include/linux/u64_stats_sync.h155
-rw-r--r--datapath/linux/compat/include/linux/udp.h33
-rw-r--r--datapath/linux/compat/include/linux/workqueue.h6
-rw-r--r--datapath/linux/compat/include/net/checksum.h39
-rw-r--r--datapath/linux/compat/include/net/dst.h77
-rw-r--r--datapath/linux/compat/include/net/dst_cache.h114
-rw-r--r--datapath/linux/compat/include/net/dst_metadata.h269
-rw-r--r--datapath/linux/compat/include/net/erspan.h342
-rw-r--r--datapath/linux/compat/include/net/genetlink.h136
-rw-r--r--datapath/linux/compat/include/net/geneve.h107
-rw-r--r--datapath/linux/compat/include/net/gre.h191
-rw-r--r--datapath/linux/compat/include/net/inet_ecn.h59
-rw-r--r--datapath/linux/compat/include/net/inet_frag.h83
-rw-r--r--datapath/linux/compat/include/net/inetpeer.h16
-rw-r--r--datapath/linux/compat/include/net/ip.h143
-rw-r--r--datapath/linux/compat/include/net/ip6_fib.h43
-rw-r--r--datapath/linux/compat/include/net/ip6_route.h16
-rw-r--r--datapath/linux/compat/include/net/ip6_tunnel.h208
-rw-r--r--datapath/linux/compat/include/net/ip_tunnels.h513
-rw-r--r--datapath/linux/compat/include/net/ipv6.h88
-rw-r--r--datapath/linux/compat/include/net/ipv6_frag.h8
-rw-r--r--datapath/linux/compat/include/net/lisp.h27
-rw-r--r--datapath/linux/compat/include/net/mpls.h62
-rw-r--r--datapath/linux/compat/include/net/net_namespace.h33
-rw-r--r--datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h42
-rw-r--r--datapath/linux/compat/include/net/netfilter/nf_conntrack.h33
-rw-r--r--datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h137
-rw-r--r--datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h54
-rw-r--r--datapath/linux/compat/include/net/netfilter/nf_conntrack_expect.h21
-rw-r--r--datapath/linux/compat/include/net/netfilter/nf_conntrack_helper.h39
-rw-r--r--datapath/linux/compat/include/net/netfilter/nf_conntrack_labels.h107
-rw-r--r--datapath/linux/compat/include/net/netfilter/nf_conntrack_seqadj.h30
-rw-r--r--datapath/linux/compat/include/net/netfilter/nf_conntrack_timeout.h34
-rw-r--r--datapath/linux/compat/include/net/netfilter/nf_conntrack_zones.h101
-rw-r--r--datapath/linux/compat/include/net/netfilter/nf_nat.h44
-rw-r--r--datapath/linux/compat/include/net/netlink.h185
-rw-r--r--datapath/linux/compat/include/net/nsh.h313
-rw-r--r--datapath/linux/compat/include/net/protocol.h19
-rw-r--r--datapath/linux/compat/include/net/route.h6
-rw-r--r--datapath/linux/compat/include/net/rtnetlink.h44
-rw-r--r--datapath/linux/compat/include/net/sctp/checksum.h25
-rw-r--r--datapath/linux/compat/include/net/sock.h13
-rw-r--r--datapath/linux/compat/include/net/stt.h70
-rw-r--r--datapath/linux/compat/include/net/tun_proto.h49
-rw-r--r--datapath/linux/compat/include/net/udp.h62
-rw-r--r--datapath/linux/compat/include/net/udp_tunnel.h208
-rw-r--r--datapath/linux/compat/include/net/vrf.h26
-rw-r--r--datapath/linux/compat/include/net/vxlan.h444
-rw-r--r--datapath/linux/compat/include/uapi/linux/netfilter.h14
-rw-r--r--datapath/linux/compat/inet_fragment.c31
-rw-r--r--datapath/linux/compat/ip6_gre.c2746
-rw-r--r--datapath/linux/compat/ip6_output.c470
-rw-r--r--datapath/linux/compat/ip6_tunnel.c2213
-rw-r--r--datapath/linux/compat/ip_fragment.c831
-rw-r--r--datapath/linux/compat/ip_gre.c1450
-rw-r--r--datapath/linux/compat/ip_output.c418
-rw-r--r--datapath/linux/compat/ip_tunnel.c776
-rw-r--r--datapath/linux/compat/ip_tunnels_core.c330
-rw-r--r--datapath/linux/compat/lisp.c816
-rw-r--r--datapath/linux/compat/netdevice.c167
-rw-r--r--datapath/linux/compat/nf_conncount.c621
-rw-r--r--datapath/linux/compat/nf_conntrack_core.c13
-rw-r--r--datapath/linux/compat/nf_conntrack_proto.c114
-rw-r--r--datapath/linux/compat/nf_conntrack_reasm.c740
-rw-r--r--datapath/linux/compat/nf_conntrack_timeout.c102
-rw-r--r--datapath/linux/compat/reciprocal_div.c27
-rw-r--r--datapath/linux/compat/skbuff-openvswitch.c310
-rw-r--r--datapath/linux/compat/socket.c32
-rw-r--r--datapath/linux/compat/stt.c2129
-rw-r--r--datapath/linux/compat/udp.c46
-rw-r--r--datapath/linux/compat/udp_tunnel.c292
-rw-r--r--datapath/linux/compat/utils.c112
-rw-r--r--datapath/linux/compat/vxlan.c2382
-rw-r--r--datapath/meter.c639
-rw-r--r--datapath/meter.h54
-rw-r--r--datapath/nsh.c142
-rw-r--r--datapath/vport-geneve.c147
-rw-r--r--datapath/vport-gre.c119
-rw-r--r--datapath/vport-internal_dev.c340
-rw-r--r--datapath/vport-internal_dev.h30
-rw-r--r--datapath/vport-lisp.c146
-rw-r--r--datapath/vport-netdev.c230
-rw-r--r--datapath/vport-netdev.h39
-rw-r--r--datapath/vport-stt.c149
-rw-r--r--datapath/vport-vxlan.c216
-rw-r--r--datapath/vport.c614
-rw-r--r--datapath/vport.h205
-rw-r--r--debian/copyright.in2
-rw-r--r--include/automake.mk2
-rw-r--r--include/linux/automake.mk1
-rw-r--r--include/linux/openvswitch.h (renamed from datapath/linux/compat/include/linux/openvswitch.h)0
-rwxr-xr-xutilities/docker/debian/build-kernel-modules.sh13
-rwxr-xr-xutilities/ovs-dev.py19
167 files changed, 23 insertions, 44622 deletions
diff --git a/.gitignore b/.gitignore
index b0098f46a..26ed8d3d0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,7 +34,6 @@
/Makefile
/Makefile.in
/aclocal.m4
-/all-distfiles
/all-gitfiles
/autom4te.cache
/build-arch-stamp
diff --git a/Makefile.am b/Makefile.am
index cb8076433..fff98564a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -7,7 +7,6 @@
AUTOMAKE_OPTIONS = foreign subdir-objects
ACLOCAL_AMFLAGS = -I m4
-SUBDIRS = datapath
AM_CPPFLAGS = $(SSL_CFLAGS)
AM_LDFLAGS = $(SSL_LDFLAGS)
@@ -198,25 +197,22 @@ CLEAN_LOCAL += clean-pycov
ALL_LOCAL += dist-hook-git
dist-hook-git: distfiles
@if test -e $(srcdir)/.git && (git --version) >/dev/null 2>&1; then \
- (cd datapath && $(MAKE) distfiles); \
- (cat distfiles; sed 's|^|datapath/|' datapath/distfiles) | \
- LC_ALL=C sort -u > all-distfiles; \
(cd $(srcdir) && git ls-files) | grep -v '\.gitignore$$' | \
grep -v '\.gitattributes$$' | \
LC_ALL=C sort -u > all-gitfiles; \
- LC_ALL=C comm -1 -3 all-distfiles all-gitfiles > missing-distfiles; \
+ LC_ALL=C comm -1 -3 distfiles all-gitfiles > missing-distfiles; \
if test -s missing-distfiles; then \
echo "The following files are in git but not the distribution:"; \
cat missing-distfiles; \
exit 1; \
fi; \
- if LC_ALL=C grep '\.gitignore$$' all-distfiles; then \
+ if LC_ALL=C grep '\.gitignore$$' distfiles; then \
echo "See above for list of files that are distributed but"; \
echo "should not be."; \
exit 1; \
fi \
fi
-CLEANFILES += all-distfiles all-gitfiles missing-distfiles
+CLEANFILES += all-gitfiles missing-distfiles
# The following is based on commands for the Automake "distdir" target.
distfiles: Makefile
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
@@ -235,7 +231,7 @@ config-h-check:
@cd $(srcdir); \
if test -e .git && (git --version) >/dev/null 2>&1 && \
git --no-pager grep -L '#include <config\.h>' `git ls-files | grep '\.c$$' | \
- grep -vE '^datapath|^lib/sflow|^third-party|^datapath-windows|^python'`; \
+ grep -vE '^datapath-windows|^lib/sflow|^python|^third-party'`; \
then \
echo "See above for list of violations of the rule that"; \
echo "every C source file must #include <config.h>."; \
@@ -256,7 +252,7 @@ printf-check:
@cd $(srcdir); \
if test -e .git && (git --version) >/dev/null 2>&1 && \
git --no-pager grep -n -E -e '%[-+ #0-9.*]*([ztj]|hh)' --and --not -e 'ovs_scan' `git ls-files | grep '\.[ch]$$' | \
- grep -vE '^datapath|^lib/sflow|^third-party'`; \
+ grep -vE '^datapath-windows|^lib/sflow|^third-party'`; \
then \
echo "See above for list of violations of the rule that"; \
echo "'z', 't', 'j', 'hh' printf() type modifiers are"; \
@@ -299,7 +295,7 @@ check-endian:
@if test -e $(srcdir)/.git && (git --version) >/dev/null 2>&1 && \
(cd $(srcdir) && git --no-pager grep -l -E \
-e 'BIG_ENDIAN|LITTLE_ENDIAN' --and --not -e 'BYTE_ORDER' | \
- $(EGREP) -v '^datapath/|^include/sparse/rte_'); \
+ $(EGREP) -v '^include/sparse/rte_'); \
then \
echo "See above for list of files that misuse LITTLE""_ENDIAN"; \
echo "or BIG""_ENDIAN. Please use WORDS_BIGENDIAN instead."; \
@@ -339,7 +335,7 @@ thread-safety-check:
if test -e .git && (git --version) >/dev/null 2>&1 && \
grep -n -f build-aux/thread-safety-forbidden \
`git ls-files | grep '\.[ch]$$' \
- | $(EGREP) -v '^datapath|^lib/sflow|^third-party'` /dev/null \
+ | $(EGREP) -v '^datapath-windows|^lib/sflow|^third-party'` /dev/null \
| $(EGREP) -v ':[ ]*/?\*'; \
then \
echo "See above for list of calls to functions that are"; \
@@ -468,11 +464,6 @@ install-data-local: $(INSTALL_DATA_LOCAL)
uninstall-local: $(UNINSTALL_LOCAL)
.PHONY: $(DIST_HOOKS) $(CLEAN_LOCAL) $(INSTALL_DATA_LOCAL) $(UNINSTALL_LOCAL)
-modules_install:
-if LINUX_ENABLED
- cd datapath/linux && $(MAKE) modules_install
-endif
-
dist-docs:
VERSION=$(VERSION) MAKE='$(MAKE)' $(srcdir)/build-aux/dist-docs $(srcdir) $(docs)
.PHONY: dist-docs
diff --git a/NEWS b/NEWS
index 6a12dbee4..478a3d67e 100644
--- a/NEWS
+++ b/NEWS
@@ -74,6 +74,9 @@ Post-v2.17.0
- Linux datapath:
* Add offloading meter tc police.
* Add support for offloading the check_pkt_len action.
+ - Previously deprecated Linux kernel module is now fully removed from
+ the OVS source tree. The version provided with the Linux kernel
+ should be used instead.
v2.17.0 - 17 Feb 2022
diff --git a/build-aux/initial-tab-allowed-files b/build-aux/initial-tab-allowed-files
index 6a9968e32..ff597d23c 100644
--- a/build-aux/initial-tab-allowed-files
+++ b/build-aux/initial-tab-allowed-files
@@ -3,7 +3,6 @@
\.mk$
\.png$
\.sln$
-^datapath/
^include/linux/
^include/sparse/rte_
^include/windows/
diff --git a/configure.ac b/configure.ac
index 6c51e48ce..63359fe29 100644
--- a/configure.ac
+++ b/configure.ac
@@ -14,7 +14,7 @@
AC_PREREQ(2.63)
AC_INIT(openvswitch, 2.17.90, bugs@openvswitch.org)
-AC_CONFIG_SRCDIR([datapath/datapath.c])
+AC_CONFIG_SRCDIR([vswitchd/ovs-vswitchd.c])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_AUX_DIR([build-aux])
AC_CONFIG_HEADERS([config.h])
@@ -204,10 +204,6 @@ AC_SUBST([OVS_CFLAGS])
AC_SUBST([OVS_LDFLAGS])
AC_CONFIG_FILES(Makefile)
-AC_CONFIG_FILES(datapath/Makefile)
-AC_CONFIG_FILES(datapath/linux/Kbuild)
-AC_CONFIG_FILES(datapath/linux/Makefile)
-AC_CONFIG_FILES(datapath/linux/Makefile.main)
AC_CONFIG_FILES(tests/atlocal)
AC_CONFIG_FILES(lib/libopenvswitch.pc)
AC_CONFIG_FILES(lib/libsflow.pc)
diff --git a/datapath-windows/include/automake.mk b/datapath-windows/include/automake.mk
index b8dcf83b9..a354f007f 100644
--- a/datapath-windows/include/automake.mk
+++ b/datapath-windows/include/automake.mk
@@ -3,7 +3,7 @@ BUILT_SOURCES += $(srcdir)/datapath-windows/include/OvsDpInterface.h
endif
$(srcdir)/datapath-windows/include/OvsDpInterface.h: \
- datapath/linux/compat/include/linux/openvswitch.h \
+ include/linux/openvswitch.h \
build-aux/extract-odp-netlink-windows-dp-h
$(AM_V_GEN)sed -f $(srcdir)/build-aux/extract-odp-netlink-windows-dp-h < $< > $@
diff --git a/datapath/.gitignore b/datapath/.gitignore
deleted file mode 100644
index fb8cf7d3b..000000000
--- a/datapath/.gitignore
+++ /dev/null
@@ -1,7 +0,0 @@
-/Makefile
-/Makefile.in
-*.cmd
-*.ko
-*.mod.c
-Module.symvers
-/distfiles
diff --git a/datapath/Makefile.am b/datapath/Makefile.am
deleted file mode 100644
index e4dd0c704..000000000
--- a/datapath/Makefile.am
+++ /dev/null
@@ -1,60 +0,0 @@
-SUBDIRS =
-if LINUX_ENABLED
-SUBDIRS += linux
-endif
-
-EXTRA_DIST = $(dist_headers) $(dist_sources) $(dist_extras)
-
-# Suppress warnings about GNU extensions in Modules.mk files.
-AUTOMAKE_OPTIONS = -Wno-portability
-
-include Modules.mk
-include linux/Modules.mk
-
-# The following is based on commands for the Automake "distdir" target.
-distfiles: Makefile
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t" | sort -u > $@
-CLEANFILES = distfiles
-
-# Print name of all modules.
-print-build-modules:
- @if test -z "$(build_modules)"; \
- then \
- echo "Could not find any kernel module."; \
- exit 1; \
- fi
- @echo "$(build_modules)" | tr '_' '-';
-
-if !WIN32
-COMPAT_GET_FUNCTIONS := find $(top_srcdir)/datapath/linux/compat -name "*.h" \
- -exec sed -n '/^[a-z][a-z]* \*\?[A-Za-z0-9_][A-Za-z0-9_]*([a-z]/p; /^struct [a-z0-9_][a-z0-9_]* \*\?[A-Za-z0-9_][A-Za-z0-9_]*([a-z]/p' {} \; | tr -d '*' | cut -d '(' -f1 | rev | cut -d ' ' -f1 | rev
-COMPAT_GET_EXPORTS := find $(top_srcdir)/datapath/linux/compat -name "*.c" \
- -exec sed -n 's/^EXPORT_SYMBOL[A-Z_]*(\([a-z_][a-z_]*\));$$/\1/p' {} \;
-COMPAT_FUNCTIONS := $(shell $(COMPAT_GET_FUNCTIONS))
-COMPAT_EXPORTS := $(shell $(COMPAT_GET_EXPORTS))
-
-# Checks that all public functions are 'rpl_' or 'ovs_' prefixed.
-# Checks that all EXPORT_SYMBOL_GPL() export 'rpl_' or 'ovs_' prefixed functions.
-check-export-symbol:
- @for fun_ in $(COMPAT_FUNCTIONS); do \
- if ! grep -- $${fun_} $(top_srcdir)/datapath/linux/compat/build-aux/export-check-allow-list > /dev/null; then \
- if ! echo $${fun_} | grep -q -E '^(rpl|ovs)_'; then \
- echo "error: $${fun_}() needs to be prefixed with 'rpl_' or 'ovs_'."; \
- exit 1; \
- fi; \
- fi; \
- done
- @for fun_ in $(COMPAT_EXPORTS); do \
- if ! echo $${fun_} | grep -q -E '^(rpl|ovs)_'; then \
- echo "error: $${fun_}() needs to be prefixed with 'rpl_' or 'ovs_'."; \
- exit 1; \
- fi; \
- done
-
-all-local: check-export-symbol
-endif
diff --git a/datapath/Modules.mk b/datapath/Modules.mk
deleted file mode 100644
index 3c4ae366c..000000000
--- a/datapath/Modules.mk
+++ /dev/null
@@ -1,58 +0,0 @@
-# Some modules should be built and distributed, e.g. openvswitch.
-#
-# Some modules should be built but not distributed, e.g. third-party
-# hwtable modules.
-build_multi_modules = \
- openvswitch
-both_modules = \
- $(build_multi_modules) \
- vport_geneve \
- vport_gre \
- vport_lisp \
- vport_stt \
- vport_vxlan
-# When changing the name of 'build_modules', please also update the
-# print-build-modules in Makefile.am.
-build_modules = $(both_modules) # Modules to build
-dist_modules = $(both_modules) # Modules to distribute
-
-openvswitch_sources = \
- actions.c \
- conntrack.c \
- datapath.c \
- dp_notify.c \
- flow.c \
- flow_netlink.c \
- flow_table.c \
- vport.c \
- vport-internal_dev.c \
- vport-netdev.c \
- nsh.c \
- meter.c
-
-vport_geneve_sources = vport-geneve.c
-vport_vxlan_sources = vport-vxlan.c
-vport_gre_sources = vport-gre.c
-vport_lisp_sources = vport-lisp.c
-vport_stt_sources = vport-stt.c
-nsh_sources = nsh.c
-
-openvswitch_headers = \
- compat.h \
- conntrack.h \
- datapath.h \
- flow.h \
- flow_netlink.h \
- flow_table.h \
- vport.h \
- vport-internal_dev.h \
- vport-netdev.h \
- meter.h
-
-dist_sources = $(foreach module,$(dist_modules),$($(module)_sources))
-dist_headers = $(foreach module,$(dist_modules),$($(module)_headers))
-dist_extras = $(foreach module,$(dist_modules),$($(module)_extras))
-build_sources = $(foreach module,$(build_modules),$($(module)_sources))
-build_headers = $(foreach module,$(build_modules),$($(module)_headers))
-build_links = $(notdir $(build_sources))
-build_objects = $(notdir $(patsubst %.c,%.o,$(build_sources)))
diff --git a/datapath/actions.c b/datapath/actions.c
deleted file mode 100644
index fbf445703..000000000
--- a/datapath/actions.c
+++ /dev/null
@@ -1,1587 +0,0 @@
-/*
- * Copyright (c) 2007-2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/openvswitch.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/sctp.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/in6.h>
-#include <linux/if_arp.h>
-#include <linux/if_vlan.h>
-
-#include <net/dst.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
-#include <net/checksum.h>
-#include <net/dsfield.h>
-#include <net/mpls.h>
-#include <net/sctp/checksum.h>
-
-#include "datapath.h"
-#include "conntrack.h"
-#include "gso.h"
-#include "vport.h"
-#include "flow_netlink.h"
-
-static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key,
- const struct nlattr *attr, int len);
-
-struct deferred_action {
- struct sk_buff *skb;
- const struct nlattr *actions;
- int actions_len;
-
- /* Store pkt_key clone when creating deferred action. */
- struct sw_flow_key pkt_key;
-};
-
-#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN)
-struct ovs_frag_data {
- unsigned long dst;
- struct vport *vport;
- struct ovs_gso_cb cb;
- __be16 inner_protocol;
- u16 network_offset; /* valid only for MPLS */
- u16 vlan_tci;
- __be16 vlan_proto;
- unsigned int l2_len;
- u8 mac_proto;
- u8 l2_data[MAX_L2_LEN];
-};
-
-static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
-
-#define DEFERRED_ACTION_FIFO_SIZE 10
-#define OVS_RECURSION_LIMIT 4
-#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
-struct action_fifo {
- int head;
- int tail;
- /* Deferred action fifo queue storage. */
- struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
-};
-
-struct action_flow_keys {
- struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
-};
-
-static struct action_fifo __percpu *action_fifos;
-static struct action_flow_keys __percpu *flow_keys;
-static DEFINE_PER_CPU(int, exec_actions_level);
-
-/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
- * space. Return NULL if out of key spaces.
- */
-static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
-{
- struct action_flow_keys *keys = this_cpu_ptr(flow_keys);
- int level = this_cpu_read(exec_actions_level);
- struct sw_flow_key *key = NULL;
-
- if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
- key = &keys->key[level - 1];
- *key = *key_;
- }
-
- return key;
-}
-
-static void action_fifo_init(struct action_fifo *fifo)
-{
- fifo->head = 0;
- fifo->tail = 0;
-}
-
-static bool action_fifo_is_empty(const struct action_fifo *fifo)
-{
- return (fifo->head == fifo->tail);
-}
-
-static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
-{
- if (action_fifo_is_empty(fifo))
- return NULL;
-
- return &fifo->fifo[fifo->tail++];
-}
-
-static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
-{
- if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
- return NULL;
-
- return &fifo->fifo[fifo->head++];
-}
-
-/* Return queue entry if fifo is not full */
-static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
- const struct sw_flow_key *key,
- const struct nlattr *actions,
- const int actions_len)
-{
- struct action_fifo *fifo;
- struct deferred_action *da;
-
- fifo = this_cpu_ptr(action_fifos);
- da = action_fifo_put(fifo);
- if (da) {
- da->skb = skb;
- da->actions = actions;
- da->actions_len = actions_len;
- da->pkt_key = *key;
- }
-
- return da;
-}
-
-static void invalidate_flow_key(struct sw_flow_key *key)
-{
- key->mac_proto |= SW_FLOW_KEY_INVALID;
-}
-
-static bool is_flow_key_valid(const struct sw_flow_key *key)
-{
- return !(key->mac_proto & SW_FLOW_KEY_INVALID);
-}
-
-static int clone_execute(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key,
- u32 recirc_id,
- const struct nlattr *actions, int len,
- bool last, bool clone_flow_key);
-
-static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
- __be16 ethertype)
-{
- if (skb->ip_summed == CHECKSUM_COMPLETE) {
- __be16 diff[] = { ~(hdr->h_proto), ethertype };
-
- skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
- }
-
- hdr->h_proto = ethertype;
-}
-
-static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_action_push_mpls *mpls)
-{
- struct mpls_shim_hdr *new_mpls_lse;
-
- /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
- if (skb->encapsulation)
- return -ENOTSUPP;
-
- if (skb_cow_head(skb, MPLS_HLEN) < 0)
- return -ENOMEM;
-
- if (!ovs_skb_get_inner_protocol(skb)) {
- skb_set_inner_network_header(skb, skb->mac_len);
- ovs_skb_set_inner_protocol(skb, skb->protocol);
- }
-
- skb_push(skb, MPLS_HLEN);
- memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
- skb->mac_len);
- skb_reset_mac_header(skb);
-#ifdef MPLS_HEADER_IS_L3
- skb_set_network_header(skb, skb->mac_len);
-#endif
-
- new_mpls_lse = mpls_hdr(skb);
- new_mpls_lse->label_stack_entry = mpls->mpls_lse;
-
- skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
-
- if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET)
- update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
- skb->protocol = mpls->mpls_ethertype;
-
- invalidate_flow_key(key);
- return 0;
-}
-
-static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
- const __be16 ethertype)
-{
- int err;
-
- err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
- if (unlikely(err))
- return err;
-
- skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
-
- memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
- skb->mac_len);
-
- __skb_pull(skb, MPLS_HLEN);
- skb_reset_mac_header(skb);
- skb_set_network_header(skb, skb->mac_len);
-
- if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) {
- struct ethhdr *hdr;
-
- /* mpls_hdr() is used to locate the ethertype
- * field correctly in the presence of VLAN tags.
- */
- hdr = (struct ethhdr *)((void*)mpls_hdr(skb) - ETH_HLEN);
- update_ethertype(skb, hdr, ethertype);
- }
- if (eth_p_mpls(skb->protocol))
- skb->protocol = ethertype;
-
- invalidate_flow_key(key);
- return 0;
-}
-
-static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
- const __be32 *mpls_lse, const __be32 *mask)
-{
- struct mpls_shim_hdr *stack;
- __be32 lse;
- int err;
-
- err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
- if (unlikely(err))
- return err;
-
- stack = mpls_hdr(skb);
- lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
- if (skb->ip_summed == CHECKSUM_COMPLETE) {
- __be32 diff[] = { ~(stack->label_stack_entry), lse };
-
- skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
- }
-
- stack->label_stack_entry = lse;
- flow_key->mpls.lse[0] = lse;
- return 0;
-}
-
-static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
-{
- int err;
-
- err = skb_vlan_pop(skb);
- if (skb_vlan_tag_present(skb)) {
- invalidate_flow_key(key);
- } else {
- key->eth.vlan.tci = 0;
- key->eth.vlan.tpid = 0;
- }
- return err;
-}
-
-static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_action_push_vlan *vlan)
-{
- if (skb_vlan_tag_present(skb)) {
- invalidate_flow_key(key);
- } else {
- key->eth.vlan.tci = vlan->vlan_tci;
- key->eth.vlan.tpid = vlan->vlan_tpid;
- }
- return skb_vlan_push(skb, vlan->vlan_tpid,
- ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
-}
-
-/* 'src' is already properly masked. */
-static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
-{
- u16 *dst = (u16 *)dst_;
- const u16 *src = (const u16 *)src_;
- const u16 *mask = (const u16 *)mask_;
-
- OVS_SET_MASKED(dst[0], src[0], mask[0]);
- OVS_SET_MASKED(dst[1], src[1], mask[1]);
- OVS_SET_MASKED(dst[2], src[2], mask[2]);
-}
-
-static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
- const struct ovs_key_ethernet *key,
- const struct ovs_key_ethernet *mask)
-{
- int err;
-
- err = skb_ensure_writable(skb, ETH_HLEN);
- if (unlikely(err))
- return err;
-
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
-
- ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
- mask->eth_src);
- ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
- mask->eth_dst);
-
- skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
-
- ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
- ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
- return 0;
-}
-
-/* pop_eth does not support VLAN packets as this action is never called
- * for them.
- */
-static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
-{
- skb_pull_rcsum(skb, ETH_HLEN);
- skb_reset_mac_header(skb);
- skb_reset_mac_len(skb);
-
- /* safe right before invalidate_flow_key */
- key->mac_proto = MAC_PROTO_NONE;
- invalidate_flow_key(key);
- return 0;
-}
-
-static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_action_push_eth *ethh)
-{
- struct ethhdr *hdr;
-
- /* Add the new Ethernet header */
- if (skb_cow_head(skb, ETH_HLEN) < 0)
- return -ENOMEM;
-
- skb_push(skb, ETH_HLEN);
- skb_reset_mac_header(skb);
- skb_reset_mac_len(skb);
-
- hdr = eth_hdr(skb);
- ether_addr_copy(hdr->h_source, ethh->addresses.eth_src);
- ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst);
- hdr->h_proto = skb->protocol;
-
- skb_postpush_rcsum(skb, hdr, ETH_HLEN);
-
- /* safe right before invalidate_flow_key */
- key->mac_proto = MAC_PROTO_ETHERNET;
- invalidate_flow_key(key);
- return 0;
-}
-
-static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
- const struct nshhdr *nh)
-{
- int err;
-
- err = ovs_nsh_push(skb, nh);
- if (err)
- return err;
-
- /* safe right before invalidate_flow_key */
- key->mac_proto = MAC_PROTO_NONE;
- invalidate_flow_key(key);
- return 0;
-}
-
-static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
-{
- int err;
-
- err = ovs_nsh_pop(skb);
- if (err)
- return err;
-
- /* safe right before invalidate_flow_key */
- if (skb->protocol == htons(ETH_P_TEB))
- key->mac_proto = MAC_PROTO_ETHERNET;
- else
- key->mac_proto = MAC_PROTO_NONE;
- invalidate_flow_key(key);
- return 0;
-}
-
-static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
- __be32 addr, __be32 new_addr)
-{
- int transport_len = skb->len - skb_transport_offset(skb);
-
- if (nh->frag_off & htons(IP_OFFSET))
- return;
-
- if (nh->protocol == IPPROTO_TCP) {
- if (likely(transport_len >= sizeof(struct tcphdr)))
- inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
- addr, new_addr, true);
- } else if (nh->protocol == IPPROTO_UDP) {
- if (likely(transport_len >= sizeof(struct udphdr))) {
- struct udphdr *uh = udp_hdr(skb);
-
- if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
- inet_proto_csum_replace4(&uh->check, skb,
- addr, new_addr, true);
- if (!uh->check)
- uh->check = CSUM_MANGLED_0;
- }
- }
- }
-
-}
-
-static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
- __be32 *addr, __be32 new_addr)
-{
- update_ip_l4_checksum(skb, nh, *addr, new_addr);
- csum_replace4(&nh->check, *addr, new_addr);
- skb_clear_hash(skb);
- *addr = new_addr;
-}
-
-static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
- __be32 addr[4], const __be32 new_addr[4])
-{
- int transport_len = skb->len - skb_transport_offset(skb);
-
- if (l4_proto == NEXTHDR_TCP) {
- if (likely(transport_len >= sizeof(struct tcphdr)))
- inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
- addr, new_addr, true);
- } else if (l4_proto == NEXTHDR_UDP) {
- if (likely(transport_len >= sizeof(struct udphdr))) {
- struct udphdr *uh = udp_hdr(skb);
-
- if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
- inet_proto_csum_replace16(&uh->check, skb,
- addr, new_addr, true);
- if (!uh->check)
- uh->check = CSUM_MANGLED_0;
- }
- }
- } else if (l4_proto == NEXTHDR_ICMP) {
- if (likely(transport_len >= sizeof(struct icmp6hdr)))
- inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
- skb, addr, new_addr, true);
- }
-}
-
-static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
- const __be32 mask[4], __be32 masked[4])
-{
- masked[0] = OVS_MASKED(old[0], addr[0], mask[0]);
- masked[1] = OVS_MASKED(old[1], addr[1], mask[1]);
- masked[2] = OVS_MASKED(old[2], addr[2], mask[2]);
- masked[3] = OVS_MASKED(old[3], addr[3], mask[3]);
-}
-
-static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
- __be32 addr[4], const __be32 new_addr[4],
- bool recalculate_csum)
-{
- if (likely(recalculate_csum))
- update_ipv6_checksum(skb, l4_proto, addr, new_addr);
-
- skb_clear_hash(skb);
- memcpy(addr, new_addr, sizeof(__be32[4]));
-}
-
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
-{
- /* Bits 21-24 are always unmasked, so this retains their values. */
- OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
- OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
- OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
-}
-
-static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
- u8 mask)
-{
- new_ttl = OVS_MASKED(nh->ttl, new_ttl, mask);
-
- csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
- nh->ttl = new_ttl;
-}
-
-static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
- const struct ovs_key_ipv4 *key,
- const struct ovs_key_ipv4 *mask)
-{
- struct iphdr *nh;
- __be32 new_addr;
- int err;
-
- err = skb_ensure_writable(skb, skb_network_offset(skb) +
- sizeof(struct iphdr));
- if (unlikely(err))
- return err;
-
- nh = ip_hdr(skb);
-
- /* Setting an IP addresses is typically only a side effect of
- * matching on them in the current userspace implementation, so it
- * makes sense to check if the value actually changed.
- */
- if (mask->ipv4_src) {
- new_addr = OVS_MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
-
- if (unlikely(new_addr != nh->saddr)) {
- set_ip_addr(skb, nh, &nh->saddr, new_addr);
- flow_key->ipv4.addr.src = new_addr;
- }
- }
- if (mask->ipv4_dst) {
- new_addr = OVS_MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
-
- if (unlikely(new_addr != nh->daddr)) {
- set_ip_addr(skb, nh, &nh->daddr, new_addr);
- flow_key->ipv4.addr.dst = new_addr;
- }
- }
- if (mask->ipv4_tos) {
- ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
- flow_key->ip.tos = nh->tos;
- }
- if (mask->ipv4_ttl) {
- set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
- flow_key->ip.ttl = nh->ttl;
- }
-
- return 0;
-}
-
-static bool is_ipv6_mask_nonzero(const __be32 addr[4])
-{
- return !!(addr[0] | addr[1] | addr[2] | addr[3]);
-}
-
-static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
- const struct ovs_key_ipv6 *key,
- const struct ovs_key_ipv6 *mask)
-{
- struct ipv6hdr *nh;
- int err;
-
- err = skb_ensure_writable(skb, skb_network_offset(skb) +
- sizeof(struct ipv6hdr));
- if (unlikely(err))
- return err;
-
- nh = ipv6_hdr(skb);
-
- /* Setting an IP addresses is typically only a side effect of
- * matching on them in the current userspace implementation, so it
- * makes sense to check if the value actually changed.
- */
- if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
- __be32 *saddr = (__be32 *)&nh->saddr;
- __be32 masked[4];
-
- mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
-
- if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
- set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked,
- true);
- memcpy(&flow_key->ipv6.addr.src, masked,
- sizeof(flow_key->ipv6.addr.src));
- }
- }
- if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
- unsigned int offset = 0;
- int flags = IP6_FH_F_SKIP_RH;
- bool recalc_csum = true;
- __be32 *daddr = (__be32 *)&nh->daddr;
- __be32 masked[4];
-
- mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
-
- if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
- if (ipv6_ext_hdr(nh->nexthdr))
- recalc_csum = (ipv6_find_hdr(skb, &offset,
- NEXTHDR_ROUTING,
- NULL, &flags)
- != NEXTHDR_ROUTING);
-
- set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked,
- recalc_csum);
- memcpy(&flow_key->ipv6.addr.dst, masked,
- sizeof(flow_key->ipv6.addr.dst));
- }
- }
- if (mask->ipv6_tclass) {
- ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
- flow_key->ip.tos = ipv6_get_dsfield(nh);
- }
- if (mask->ipv6_label) {
- set_ipv6_fl(nh, ntohl(key->ipv6_label),
- ntohl(mask->ipv6_label));
- flow_key->ipv6.label =
- *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
- }
- if (mask->ipv6_hlimit) {
- OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit,
- mask->ipv6_hlimit);
- flow_key->ip.ttl = nh->hop_limit;
- }
- return 0;
-}
-
-static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
- const struct nlattr *a)
-{
- struct nshhdr *nh;
- size_t length;
- int err;
- u8 flags;
- u8 ttl;
- int i;
-
- struct ovs_key_nsh key;
- struct ovs_key_nsh mask;
-
- err = nsh_key_from_nlattr(a, &key, &mask);
- if (err)
- return err;
-
- /* Make sure the NSH base header is there */
- if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN))
- return -ENOMEM;
-
- nh = nsh_hdr(skb);
- length = nsh_hdr_len(nh);
-
- /* Make sure the whole NSH header is there */
- err = skb_ensure_writable(skb, skb_network_offset(skb) +
- length);
- if (unlikely(err))
- return err;
-
- nh = nsh_hdr(skb);
- skb_postpull_rcsum(skb, nh, length);
- flags = nsh_get_flags(nh);
- flags = OVS_MASKED(flags, key.base.flags, mask.base.flags);
- flow_key->nsh.base.flags = flags;
- ttl = nsh_get_ttl(nh);
- ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl);
- flow_key->nsh.base.ttl = ttl;
- nsh_set_flags_and_ttl(nh, flags, ttl);
- nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr,
- mask.base.path_hdr);
- flow_key->nsh.base.path_hdr = nh->path_hdr;
- switch (nh->mdtype) {
- case NSH_M_TYPE1:
- for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) {
- nh->md1.context[i] =
- OVS_MASKED(nh->md1.context[i], key.context[i],
- mask.context[i]);
- }
- memcpy(flow_key->nsh.context, nh->md1.context,
- sizeof(nh->md1.context));
- break;
- case NSH_M_TYPE2:
- memset(flow_key->nsh.context, 0,
- sizeof(flow_key->nsh.context));
- break;
- default:
- return -EINVAL;
- }
- skb_postpush_rcsum(skb, nh, length);
- return 0;
-}
-
-/* Must follow skb_ensure_writable() since that can move the skb data. */
-static void set_tp_port(struct sk_buff *skb, __be16 *port,
- __be16 new_port, __sum16 *check)
-{
- inet_proto_csum_replace2(check, skb, *port, new_port, false);
- *port = new_port;
-}
-
-static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
- const struct ovs_key_udp *key,
- const struct ovs_key_udp *mask)
-{
- struct udphdr *uh;
- __be16 src, dst;
- int err;
-
- err = skb_ensure_writable(skb, skb_transport_offset(skb) +
- sizeof(struct udphdr));
- if (unlikely(err))
- return err;
-
- uh = udp_hdr(skb);
- /* Either of the masks is non-zero, so do not bother checking them. */
- src = OVS_MASKED(uh->source, key->udp_src, mask->udp_src);
- dst = OVS_MASKED(uh->dest, key->udp_dst, mask->udp_dst);
-
- if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
- if (likely(src != uh->source)) {
- set_tp_port(skb, &uh->source, src, &uh->check);
- flow_key->tp.src = src;
- }
- if (likely(dst != uh->dest)) {
- set_tp_port(skb, &uh->dest, dst, &uh->check);
- flow_key->tp.dst = dst;
- }
-
- if (unlikely(!uh->check))
- uh->check = CSUM_MANGLED_0;
- } else {
- uh->source = src;
- uh->dest = dst;
- flow_key->tp.src = src;
- flow_key->tp.dst = dst;
- }
-
- skb_clear_hash(skb);
-
- return 0;
-}
-
-static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
- const struct ovs_key_tcp *key,
- const struct ovs_key_tcp *mask)
-{
- struct tcphdr *th;
- __be16 src, dst;
- int err;
-
- err = skb_ensure_writable(skb, skb_transport_offset(skb) +
- sizeof(struct tcphdr));
- if (unlikely(err))
- return err;
-
- th = tcp_hdr(skb);
- src = OVS_MASKED(th->source, key->tcp_src, mask->tcp_src);
- if (likely(src != th->source)) {
- set_tp_port(skb, &th->source, src, &th->check);
- flow_key->tp.src = src;
- }
- dst = OVS_MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
- if (likely(dst != th->dest)) {
- set_tp_port(skb, &th->dest, dst, &th->check);
- flow_key->tp.dst = dst;
- }
- skb_clear_hash(skb);
-
- return 0;
-}
-
-static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
- const struct ovs_key_sctp *key,
- const struct ovs_key_sctp *mask)
-{
- unsigned int sctphoff = skb_transport_offset(skb);
- struct sctphdr *sh;
- __le32 old_correct_csum, new_csum, old_csum;
- int err;
-
- err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
- if (unlikely(err))
- return err;
-
- sh = sctp_hdr(skb);
- old_csum = sh->checksum;
- old_correct_csum = sctp_compute_cksum(skb, sctphoff);
-
- sh->source = OVS_MASKED(sh->source, key->sctp_src, mask->sctp_src);
- sh->dest = OVS_MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
-
- new_csum = sctp_compute_cksum(skb, sctphoff);
-
- /* Carry any checksum errors through. */
- sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
-
- skb_clear_hash(skb);
- flow_key->tp.src = sh->source;
- flow_key->tp.dst = sh->dest;
-
- return 0;
-}
-
-static int ovs_vport_output(OVS_VPORT_OUTPUT_PARAMS)
-{
- struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
- struct vport *vport = data->vport;
-
- if (skb_cow_head(skb, data->l2_len) < 0) {
- kfree_skb(skb);
- return -ENOMEM;
- }
-
- __skb_dst_copy(skb, data->dst);
- *OVS_GSO_CB(skb) = data->cb;
- ovs_skb_set_inner_protocol(skb, data->inner_protocol);
- if (data->vlan_tci & VLAN_CFI_MASK)
- __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci & ~VLAN_CFI_MASK);
- else
- __vlan_hwaccel_clear_tag(skb);
-
- /* Reconstruct the MAC header. */
- skb_push(skb, data->l2_len);
- memcpy(skb->data, &data->l2_data, data->l2_len);
- skb_postpush_rcsum(skb, skb->data, data->l2_len);
- skb_reset_mac_header(skb);
-
- if (eth_p_mpls(skb->protocol)) {
- skb->inner_network_header = skb->network_header;
- skb_set_network_header(skb, data->network_offset);
- skb_reset_mac_len(skb);
- }
-
- ovs_vport_send(vport, skb, data->mac_proto);
- return 0;
-}
-
-static unsigned int
-ovs_dst_get_mtu(const struct dst_entry *dst)
-{
- return dst->dev->mtu;
-}
-
-static struct dst_ops ovs_dst_ops = {
- .family = AF_UNSPEC,
- .mtu = ovs_dst_get_mtu,
-};
-
-/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
- * ovs_vport_output(), which is called once per fragmented packet.
- */
-static void prepare_frag(struct vport *vport, struct sk_buff *skb,
- u16 orig_network_offset, u8 mac_proto)
-{
- unsigned int hlen = skb_network_offset(skb);
- struct ovs_frag_data *data;
-
- data = this_cpu_ptr(&ovs_frag_data_storage);
- data->dst = (unsigned long) skb_dst(skb);
- data->vport = vport;
- data->cb = *OVS_GSO_CB(skb);
- data->inner_protocol = ovs_skb_get_inner_protocol(skb);
- data->network_offset = orig_network_offset;
- if (skb_vlan_tag_present(skb))
- data->vlan_tci = skb_vlan_tag_get(skb) | VLAN_CFI_MASK;
- else
- data->vlan_tci = 0;
- data->vlan_proto = skb->vlan_proto;
- data->mac_proto = mac_proto;
- data->l2_len = hlen;
- memcpy(&data->l2_data, skb->data, hlen);
-
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- skb_pull(skb, hlen);
-}
-
-static void ovs_fragment(struct net *net, struct vport *vport,
- struct sk_buff *skb, u16 mru,
- struct sw_flow_key *key)
-{
- u16 orig_network_offset = 0;
-
- if (eth_p_mpls(skb->protocol)) {
- orig_network_offset = skb_network_offset(skb);
- skb->network_header = skb->inner_network_header;
- }
-
- if (skb_network_offset(skb) > MAX_L2_LEN) {
- OVS_NLERR(1, "L2 header too long to fragment");
- goto err;
- }
-
- if (key->eth.type == htons(ETH_P_IP)) {
- struct dst_entry ovs_dst;
- unsigned long orig_dst;
-
- prepare_frag(vport, skb, orig_network_offset,
- ovs_key_mac_proto(key));
- dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
- DST_OBSOLETE_NONE, DST_NOCOUNT);
- ovs_dst.dev = vport->dev;
-
- orig_dst = (unsigned long) skb_dst(skb);
- skb_dst_set_noref(skb, &ovs_dst);
- IPCB(skb)->frag_max_size = mru;
-
- ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
- refdst_drop(orig_dst);
- } else if (key->eth.type == htons(ETH_P_IPV6)) {
- const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
- unsigned long orig_dst;
- struct rt6_info ovs_rt;
-
- if (!v6ops)
- goto err;
-
- prepare_frag(vport, skb, orig_network_offset,
- ovs_key_mac_proto(key));
- memset(&ovs_rt, 0, sizeof(ovs_rt));
- dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
- DST_OBSOLETE_NONE, DST_NOCOUNT);
- ovs_rt.dst.dev = vport->dev;
-
- orig_dst = (unsigned long) skb_dst(skb);
- skb_dst_set_noref(skb, &ovs_rt.dst);
- IP6CB(skb)->frag_max_size = mru;
-#ifdef HAVE_IP_LOCAL_OUT_TAKES_NET
- v6ops->fragment(net, skb->sk, skb, ovs_vport_output);
-#else
- v6ops->fragment(skb->sk, skb, ovs_vport_output);
-#endif
- refdst_drop(orig_dst);
- } else {
- WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
- ovs_vport_name(vport), ntohs(key->eth.type), mru,
- vport->dev->mtu);
- goto err;
- }
-
- return;
-err:
- kfree_skb(skb);
-}
-
-static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
- struct sw_flow_key *key)
-{
- struct vport *vport = ovs_vport_rcu(dp, out_port);
-
- if (likely(vport)) {
- u16 mru = OVS_CB(skb)->mru;
- u32 cutlen = OVS_CB(skb)->cutlen;
-
- if (unlikely(cutlen > 0)) {
- if (skb->len - cutlen > ovs_mac_header_len(key))
- pskb_trim(skb, skb->len - cutlen);
- else
- pskb_trim(skb, ovs_mac_header_len(key));
- }
-
- if (likely(!mru ||
- (skb->len <= mru + vport->dev->hard_header_len))) {
- ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
- } else if (mru <= vport->dev->mtu) {
- struct net *net = ovs_dp_get_net(dp);
-
- ovs_fragment(net, vport, skb, mru, key);
- } else {
- OVS_NLERR(true, "Cannot fragment IP frames");
- kfree_skb(skb);
- }
- } else {
- kfree_skb(skb);
- }
-}
-
-static int output_userspace(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key, const struct nlattr *attr,
- const struct nlattr *actions, int actions_len,
- uint32_t cutlen)
-{
- struct dp_upcall_info upcall;
- const struct nlattr *a;
- int rem, err;
-
- memset(&upcall, 0, sizeof(upcall));
- upcall.cmd = OVS_PACKET_CMD_ACTION;
- upcall.mru = OVS_CB(skb)->mru;
-
- SKB_INIT_FILL_METADATA_DST(skb);
- for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
- a = nla_next(a, &rem)) {
- switch (nla_type(a)) {
- case OVS_USERSPACE_ATTR_USERDATA:
- upcall.userdata = a;
- break;
-
- case OVS_USERSPACE_ATTR_PID:
- upcall.portid = nla_get_u32(a);
- break;
-
- case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
- /* Get out tunnel info. */
- struct vport *vport;
-
- vport = ovs_vport_rcu(dp, nla_get_u32(a));
- if (vport) {
- err = dev_fill_metadata_dst(vport->dev, skb);
- if (!err)
- upcall.egress_tun_info = skb_tunnel_info(skb);
- }
-
- break;
- }
-
- case OVS_USERSPACE_ATTR_ACTIONS: {
- /* Include actions. */
- upcall.actions = actions;
- upcall.actions_len = actions_len;
- break;
- }
-
- } /* End of switch. */
- }
-
- err = ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
- SKB_RESTORE_FILL_METADATA_DST(skb);
- return err;
-}
-
-/* When 'last' is true, sample() should always consume the 'skb'.
- * Otherwise, sample() should keep 'skb' intact regardless what
- * actions are executed within sample().
- */
-static int sample(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key, const struct nlattr *attr,
- bool last)
-{
- struct nlattr *actions;
- struct nlattr *sample_arg;
- int rem = nla_len(attr);
- const struct sample_arg *arg;
- bool clone_flow_key;
-
- /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
- sample_arg = nla_data(attr);
- arg = nla_data(sample_arg);
- actions = nla_next(sample_arg, &rem);
-
- if ((arg->probability != U32_MAX) &&
- (!arg->probability || prandom_u32() > arg->probability)) {
- if (last)
- consume_skb(skb);
- return 0;
- }
-
- clone_flow_key = !arg->exec;
- return clone_execute(dp, skb, key, 0, actions, rem, last,
- clone_flow_key);
-}
-
-/* When 'last' is true, clone() should always consume the 'skb'.
- * Otherwise, clone() should keep 'skb' intact regardless what
- * actions are executed within clone().
- */
-static int clone(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key, const struct nlattr *attr,
- bool last)
-{
- struct nlattr *actions;
- struct nlattr *clone_arg;
- int rem = nla_len(attr);
- bool dont_clone_flow_key;
-
- /* The first action is always 'OVS_CLONE_ATTR_ARG'. */
- clone_arg = nla_data(attr);
- dont_clone_flow_key = nla_get_u32(clone_arg);
- actions = nla_next(clone_arg, &rem);
-
- return clone_execute(dp, skb, key, 0, actions, rem, last,
- !dont_clone_flow_key);
-}
-
-static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
- const struct nlattr *attr)
-{
- struct ovs_action_hash *hash_act = nla_data(attr);
- u32 hash = 0;
-
- /* OVS_HASH_ALG_L4 is the only possible hash algorithm. */
- hash = skb_get_hash(skb);
- hash = jhash_1word(hash, hash_act->hash_basis);
- if (!hash)
- hash = 0x1;
-
- key->ovs_flow_hash = hash;
-}
-
-static int execute_set_action(struct sk_buff *skb,
- struct sw_flow_key *flow_key,
- const struct nlattr *a)
-{
- /* Only tunnel set execution is supported without a mask. */
- if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
- struct ovs_tunnel_info *tun = nla_data(a);
-
- ovs_skb_dst_drop(skb);
- ovs_dst_hold((struct dst_entry *)tun->tun_dst);
- ovs_skb_dst_set(skb, (struct dst_entry *)tun->tun_dst);
- return 0;
- }
-
- return -EINVAL;
-}
-
-/* Mask is at the midpoint of the data. */
-#define get_mask(a, type) ((const type)nla_data(a) + 1)
-
-static int execute_masked_set_action(struct sk_buff *skb,
- struct sw_flow_key *flow_key,
- const struct nlattr *a)
-{
- int err = 0;
-
- switch (nla_type(a)) {
- case OVS_KEY_ATTR_PRIORITY:
- OVS_SET_MASKED(skb->priority, nla_get_u32(a),
- *get_mask(a, u32 *));
- flow_key->phy.priority = skb->priority;
- break;
-
- case OVS_KEY_ATTR_SKB_MARK:
- OVS_SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
- flow_key->phy.skb_mark = skb->mark;
- break;
-
- case OVS_KEY_ATTR_TUNNEL_INFO:
- /* Masked data not supported for tunnel. */
- err = -EINVAL;
- break;
-
- case OVS_KEY_ATTR_ETHERNET:
- err = set_eth_addr(skb, flow_key, nla_data(a),
- get_mask(a, struct ovs_key_ethernet *));
- break;
-
- case OVS_KEY_ATTR_NSH:
- err = set_nsh(skb, flow_key, a);
- break;
-
- case OVS_KEY_ATTR_IPV4:
- err = set_ipv4(skb, flow_key, nla_data(a),
- get_mask(a, struct ovs_key_ipv4 *));
- break;
-
- case OVS_KEY_ATTR_IPV6:
- err = set_ipv6(skb, flow_key, nla_data(a),
- get_mask(a, struct ovs_key_ipv6 *));
- break;
-
- case OVS_KEY_ATTR_TCP:
- err = set_tcp(skb, flow_key, nla_data(a),
- get_mask(a, struct ovs_key_tcp *));
- break;
-
- case OVS_KEY_ATTR_UDP:
- err = set_udp(skb, flow_key, nla_data(a),
- get_mask(a, struct ovs_key_udp *));
- break;
-
- case OVS_KEY_ATTR_SCTP:
- err = set_sctp(skb, flow_key, nla_data(a),
- get_mask(a, struct ovs_key_sctp *));
- break;
-
- case OVS_KEY_ATTR_MPLS:
- err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
- __be32 *));
- break;
-
- case OVS_KEY_ATTR_CT_STATE:
- case OVS_KEY_ATTR_CT_ZONE:
- case OVS_KEY_ATTR_CT_MARK:
- case OVS_KEY_ATTR_CT_LABELS:
- case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
- case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
- err = -EINVAL;
- break;
- }
-
- return err;
-}
-
-static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key,
- const struct nlattr *a, bool last)
-{
- u32 recirc_id;
-
- if (!is_flow_key_valid(key)) {
- int err;
-
- err = ovs_flow_key_update(skb, key);
- if (err)
- return err;
- }
- BUG_ON(!is_flow_key_valid(key));
-
- recirc_id = nla_get_u32(a);
- return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
-}
-
-static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key,
- const struct nlattr *attr, bool last)
-{
- const struct nlattr *actions, *cpl_arg;
- const struct check_pkt_len_arg *arg;
- int rem = nla_len(attr);
- bool clone_flow_key;
-
- /* The first netlink attribute in 'attr' is always
- * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
- */
- cpl_arg = nla_data(attr);
- arg = nla_data(cpl_arg);
-
- if (skb->len <= arg->pkt_len) {
- /* Second netlink attribute in 'attr' is always
- * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
- */
- actions = nla_next(cpl_arg, &rem);
- clone_flow_key = !arg->exec_for_lesser_equal;
- } else {
- /* Third netlink attribute in 'attr' is always
- * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER'.
- */
- actions = nla_next(cpl_arg, &rem);
- actions = nla_next(actions, &rem);
- clone_flow_key = !arg->exec_for_greater;
- }
-
- return clone_execute(dp, skb, key, 0, nla_data(actions),
- nla_len(actions), last, clone_flow_key);
-}
-
-/* Execute a list of actions against 'skb'. */
-static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key,
- const struct nlattr *attr, int len)
-{
- const struct nlattr *a;
- int rem;
-
- for (a = attr, rem = len; rem > 0;
- a = nla_next(a, &rem)) {
- int err = 0;
-
- switch (nla_type(a)) {
- case OVS_ACTION_ATTR_OUTPUT: {
- int port = nla_get_u32(a);
- struct sk_buff *clone;
-
- /* Every output action needs a separate clone
- * of 'skb', In case the output action is the
- * last action, cloning can be avoided.
- */
- if (nla_is_last(a, rem)) {
- do_output(dp, skb, port, key);
- /* 'skb' has been used for output.
- */
- return 0;
- }
-
- clone = skb_clone(skb, GFP_ATOMIC);
- if (clone)
- do_output(dp, clone, port, key);
- OVS_CB(skb)->cutlen = 0;
- break;
- }
-
- case OVS_ACTION_ATTR_TRUNC: {
- struct ovs_action_trunc *trunc = nla_data(a);
-
- if (skb->len > trunc->max_len)
- OVS_CB(skb)->cutlen = skb->len - trunc->max_len;
- break;
- }
-
- case OVS_ACTION_ATTR_USERSPACE:
- output_userspace(dp, skb, key, a, attr,
- len, OVS_CB(skb)->cutlen);
- OVS_CB(skb)->cutlen = 0;
- break;
-
- case OVS_ACTION_ATTR_HASH:
- execute_hash(skb, key, a);
- break;
-
- case OVS_ACTION_ATTR_PUSH_MPLS:
- err = push_mpls(skb, key, nla_data(a));
- break;
-
- case OVS_ACTION_ATTR_POP_MPLS:
- err = pop_mpls(skb, key, nla_get_be16(a));
- break;
-
- case OVS_ACTION_ATTR_PUSH_VLAN:
- err = push_vlan(skb, key, nla_data(a));
- break;
-
- case OVS_ACTION_ATTR_POP_VLAN:
- err = pop_vlan(skb, key);
- break;
-
- case OVS_ACTION_ATTR_RECIRC: {
- bool last = nla_is_last(a, rem);
-
- err = execute_recirc(dp, skb, key, a, last);
- if (last) {
- /* If this is the last action, the skb has
- * been consumed or freed.
- * Return immediately.
- */
- return err;
- }
- break;
- }
-
- case OVS_ACTION_ATTR_SET:
- err = execute_set_action(skb, key, nla_data(a));
- break;
-
- case OVS_ACTION_ATTR_SET_MASKED:
- case OVS_ACTION_ATTR_SET_TO_MASKED:
- err = execute_masked_set_action(skb, key, nla_data(a));
- break;
-
- case OVS_ACTION_ATTR_SAMPLE: {
- bool last = nla_is_last(a, rem);
-
- err = sample(dp, skb, key, a, last);
- if (last)
- return err;
-
- break;
- }
-
- case OVS_ACTION_ATTR_CT:
- if (!is_flow_key_valid(key)) {
- err = ovs_flow_key_update(skb, key);
- if (err)
- return err;
- }
-
- err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
- nla_data(a));
-
- /* Hide stolen IP fragments from user space. */
- if (err)
- return err == -EINPROGRESS ? 0 : err;
- break;
-
- case OVS_ACTION_ATTR_CT_CLEAR:
- err = ovs_ct_clear(skb, key);
- break;
-
- case OVS_ACTION_ATTR_PUSH_ETH:
- err = push_eth(skb, key, nla_data(a));
- break;
-
- case OVS_ACTION_ATTR_POP_ETH:
- err = pop_eth(skb, key);
- break;
-
- case OVS_ACTION_ATTR_PUSH_NSH: {
- u8 buffer[NSH_HDR_MAX_LEN];
- struct nshhdr *nh = (struct nshhdr *)buffer;
-
- err = nsh_hdr_from_nlattr(nla_data(a), nh,
- NSH_HDR_MAX_LEN);
- if (unlikely(err))
- break;
- err = push_nsh(skb, key, nh);
- break;
- }
-
- case OVS_ACTION_ATTR_POP_NSH:
- err = pop_nsh(skb, key);
- break;
-
- case OVS_ACTION_ATTR_METER:
- if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
- consume_skb(skb);
- return 0;
- }
- break;
-
- case OVS_ACTION_ATTR_CLONE: {
- bool last = nla_is_last(a, rem);
-
- err = clone(dp, skb, key, a, last);
- if (last)
- return err;
- break;
- }
-
- case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
- bool last = nla_is_last(a, rem);
-
- err = execute_check_pkt_len(dp, skb, key, a, last);
- if (last)
- return err;
-
- break;
- }
- }
-
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
- }
- }
-
- consume_skb(skb);
- return 0;
-}
-
-/* Execute the actions on the clone of the packet. The effect of the
- * execution does not affect the original 'skb' nor the original 'key'.
- *
- * The execution may be deferred in case the actions can not be executed
- * immediately.
- */
-static int clone_execute(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key, u32 recirc_id,
- const struct nlattr *actions, int len,
- bool last, bool clone_flow_key)
-{
- struct deferred_action *da;
- struct sw_flow_key *clone;
-
- skb = last ? skb : skb_clone(skb, GFP_ATOMIC);
- if (!skb) {
- /* Out of memory, skip this action.
- */
- return 0;
- }
-
- /* When clone_flow_key is false, the 'key' will not be change
- * by the actions, then the 'key' can be used directly.
- * Otherwise, try to clone key from the next recursion level of
- * 'flow_keys'. If clone is successful, execute the actions
- * without deferring.
- */
- clone = clone_flow_key ? clone_key(key) : key;
- if (clone) {
- int err = 0;
-
- if (actions) { /* Sample action */
- if (clone_flow_key)
- __this_cpu_inc(exec_actions_level);
-
- err = do_execute_actions(dp, skb, clone,
- actions, len);
-
- if (clone_flow_key)
- __this_cpu_dec(exec_actions_level);
- } else { /* Recirc action */
- clone->recirc_id = recirc_id;
- ovs_dp_process_packet(skb, clone);
- }
- return err;
- }
-
- /* Out of 'flow_keys' space. Defer actions */
- da = add_deferred_actions(skb, key, actions, len);
- if (da) {
- if (!actions) { /* Recirc action */
- key = &da->pkt_key;
- key->recirc_id = recirc_id;
- }
- } else {
- /* Out of per CPU action FIFO space. Drop the 'skb' and
- * log an error.
- */
- kfree_skb(skb);
-
- if (net_ratelimit()) {
- if (actions) { /* Sample action */
- pr_warn("%s: deferred action limit reached, drop sample action\n",
- ovs_dp_name(dp));
- } else { /* Recirc action */
- pr_warn("%s: deferred action limit reached, drop recirc action\n",
- ovs_dp_name(dp));
- }
- }
- }
- return 0;
-}
-
-static void process_deferred_actions(struct datapath *dp)
-{
- struct action_fifo *fifo = this_cpu_ptr(action_fifos);
-
- /* Do not touch the FIFO in case there is no deferred actions. */
- if (action_fifo_is_empty(fifo))
- return;
-
- /* Finishing executing all deferred actions. */
- do {
- struct deferred_action *da = action_fifo_get(fifo);
- struct sk_buff *skb = da->skb;
- struct sw_flow_key *key = &da->pkt_key;
- const struct nlattr *actions = da->actions;
- int actions_len = da->actions_len;
-
- if (actions)
- do_execute_actions(dp, skb, key, actions, actions_len);
- else
- ovs_dp_process_packet(skb, key);
- } while (!action_fifo_is_empty(fifo));
-
- /* Reset FIFO for the next packet. */
- action_fifo_init(fifo);
-}
-
-/* Execute a list of actions against 'skb'. */
-int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
- const struct sw_flow_actions *acts,
- struct sw_flow_key *key)
-{
- int err, level;
-
- level = __this_cpu_inc_return(exec_actions_level);
- if (unlikely(level > OVS_RECURSION_LIMIT)) {
- net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
- ovs_dp_name(dp));
- kfree_skb(skb);
- err = -ENETDOWN;
- goto out;
- }
-
- OVS_CB(skb)->acts_origlen = acts->orig_len;
- err = do_execute_actions(dp, skb, key,
- acts->actions, acts->actions_len);
-
- if (level == 1)
- process_deferred_actions(dp);
-
-out:
- __this_cpu_dec(exec_actions_level);
- return err;
-}
-
-int action_fifos_init(void)
-{
- action_fifos = alloc_percpu(struct action_fifo);
- if (!action_fifos)
- return -ENOMEM;
-
- flow_keys = alloc_percpu(struct action_flow_keys);
- if (!flow_keys) {
- free_percpu(action_fifos);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void action_fifos_exit(void)
-{
- free_percpu(action_fifos);
- free_percpu(flow_keys);
-}
diff --git a/datapath/compat.h b/datapath/compat.h
deleted file mode 100644
index b820251a4..000000000
--- a/datapath/compat.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2007-2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#ifndef COMPAT_H
-#define COMPAT_H 1
-
-#include <linux/in.h>
-#include <linux/in_route.h>
-#include <linux/netlink.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <net/xfrm.h>
-#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
-#include <net/netfilter/nf_conntrack_count.h>
-
-/* Fix grsecurity patch compilation issue. */
-#ifdef CONSTIFY_PLUGIN
-#include <linux/cache.h>
-#undef __read_mostly
-#define __read_mostly
-#endif
-
-/* Even though vanilla 3.10 kernel has grp->id, RHEL 7 kernel is missing
- * this field. */
-#ifdef HAVE_GENL_MULTICAST_GROUP_WITH_ID
-#define GROUP_ID(grp) ((grp)->id)
-#else
-#define GROUP_ID(grp) 0
-#endif
-
-#ifdef HAVE_NF_IPV6_OPS_FRAGMENT
-static inline int __init ip6_output_init(void) { return 0; }
-static inline void ip6_output_exit(void) { }
-#else
-int __init ip6_output_init(void);
-void ip6_output_exit(void);
-#endif
-
-static inline int __init compat_init(void)
-{
- int err;
-
- err = ipfrag_init();
- if (err)
- return err;
-
- err = nf_ct_frag6_init();
- if (err)
- goto error_ipfrag_exit;
-
- err = ip6_output_init();
- if (err)
- goto error_frag6_exit;
-
- err = rpl_nf_conncount_modinit();
- if (err)
- goto error_nf_conncount_exit;
-
- return 0;
-
-error_nf_conncount_exit:
- rpl_nf_conncount_modexit();
-error_frag6_exit:
- nf_ct_frag6_cleanup();
-error_ipfrag_exit:
- rpl_ipfrag_fini();
- return err;
-}
-static inline void compat_exit(void)
-{
- rpl_nf_conncount_modexit();
- ip6_output_exit();
- nf_ct_frag6_cleanup();
- rpl_ipfrag_fini();
-}
-
-#endif /* compat.h */
diff --git a/datapath/conntrack.c b/datapath/conntrack.c
deleted file mode 100644
index fc268aeae..000000000
--- a/datapath/conntrack.c
+++ /dev/null
@@ -1,2413 +0,0 @@
-/*
- * Copyright (c) 2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-
-#include <linux/kconfig.h>
-#include <linux/version.h>
-
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-
-#include <linux/module.h>
-#include <linux/openvswitch.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/sctp.h>
-#include <linux/static_key.h>
-#include <net/ip.h>
-#include <net/genetlink.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_count.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_labels.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/nf_conntrack_timeout.h>
-#include <net/netfilter/nf_conntrack_zones.h>
-#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
-#include <net/ipv6_frag.h>
-
-/* Upstream commit 4806e975729f ("netfilter: replace NF_NAT_NEEDED with
- * IS_ENABLED(CONFIG_NF_NAT)") replaces the config checking on NF_NAT_NEEDED
- * with CONFIG_NF_NAT. We will replace the checking on NF_NAT_NEEDED for the
- * newer kernel with the marco in order to keep backward compatiblity.
- */
-#ifndef HAVE_CONFIG_NF_NAT_NEEDED
-#define CONFIG_NF_NAT_NEEDED CONFIG_NF_NAT
-#endif
-
-#if IS_ENABLED(CONFIG_NF_NAT_NEEDED)
-/* Starting from upstream commit 3bf195ae6037 ("netfilter: nat: merge
- * nf_nat_ipv4,6 into nat core") in kernel 5.1. nf_nat_ipv4,6 are merged
- * into nf_nat. In order to keep backward compatibility, we keep the config
- * checking as is for the old kernel, and replace them with marco for the
- * new kernel. */
-#ifdef HAVE_UPSTREAM_NF_NAT
-#include <net/netfilter/nf_nat.h>
-#define CONFIG_NF_NAT_IPV4 CONFIG_NF_NAT
-#define CONFIG_NF_NAT_IPV6 CONFIG_IPV6
-#else
-#include <linux/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#endif /* HAVE_UPSTREAM_NF_NAT */
-#endif /* CONFIG_NF_NAT_NEEDED */
-
-#include "datapath.h"
-#include "conntrack.h"
-#include "flow.h"
-#include "flow_netlink.h"
-#include "gso.h"
-
-#ifndef HAVE_NF_NAT_RANGE2
-#define nf_nat_range2 nf_nat_range
-#endif
-
-struct ovs_ct_len_tbl {
- int maxlen;
- int minlen;
-};
-
-/* Metadata mark for masked write to conntrack mark */
-struct md_mark {
- u32 value;
- u32 mask;
-};
-
-/* Metadata label for masked write to conntrack label. */
-struct md_labels {
- struct ovs_key_ct_labels value;
- struct ovs_key_ct_labels mask;
-};
-
-enum ovs_ct_nat {
- OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */
- OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */
- OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */
-};
-
-/* Conntrack action context for execution. */
-struct ovs_conntrack_info {
- struct nf_conntrack_helper *helper;
- struct nf_conntrack_zone zone;
- struct nf_conn *ct;
- u8 commit : 1;
- u8 nat : 3; /* enum ovs_ct_nat */
- u8 random_fully_compat : 1; /* bool */
- u8 force : 1;
- u8 have_eventmask : 1;
- u16 family;
- u32 eventmask; /* Mask of 1 << IPCT_*. */
- struct md_mark mark;
- struct md_labels labels;
- char timeout[CTNL_TIMEOUT_NAME_MAX];
- struct nf_ct_timeout *nf_ct_timeout;
-#if IS_ENABLED(CONFIG_NF_NAT_NEEDED)
- struct nf_nat_range2 range; /* Only present for SRC NAT and DST NAT. */
-#endif
-};
-
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
-#define OVS_CT_LIMIT_UNLIMITED 0
-#define OVS_CT_LIMIT_DEFAULT OVS_CT_LIMIT_UNLIMITED
-#define CT_LIMIT_HASH_BUCKETS 512
-static DEFINE_STATIC_KEY_FALSE(ovs_ct_limit_enabled);
-
-struct ovs_ct_limit {
- /* Elements in ovs_ct_limit_info->limits hash table */
- struct hlist_node hlist_node;
- struct rcu_head rcu;
- u16 zone;
- u32 limit;
-};
-
-struct ovs_ct_limit_info {
- u32 default_limit;
- struct hlist_head *limits;
- struct nf_conncount_data *data;
-};
-
-static const struct nla_policy ct_limit_policy[OVS_CT_LIMIT_ATTR_MAX + 1] = {
- [OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NLA_NESTED, },
-};
-#endif
-
-static bool labels_nonzero(const struct ovs_key_ct_labels *labels);
-
-static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
-
-static u16 key_to_nfproto(const struct sw_flow_key *key)
-{
- switch (ntohs(key->eth.type)) {
- case ETH_P_IP:
- return NFPROTO_IPV4;
- case ETH_P_IPV6:
- return NFPROTO_IPV6;
- default:
- return NFPROTO_UNSPEC;
- }
-}
-
-/* Map SKB connection state into the values used by flow definition. */
-static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
-{
- u8 ct_state = OVS_CS_F_TRACKED;
-
- switch (ctinfo) {
- case IP_CT_ESTABLISHED_REPLY:
- case IP_CT_RELATED_REPLY:
- ct_state |= OVS_CS_F_REPLY_DIR;
- break;
- default:
- break;
- }
-
- switch (ctinfo) {
- case IP_CT_ESTABLISHED:
- case IP_CT_ESTABLISHED_REPLY:
- ct_state |= OVS_CS_F_ESTABLISHED;
- break;
- case IP_CT_RELATED:
- case IP_CT_RELATED_REPLY:
- ct_state |= OVS_CS_F_RELATED;
- break;
- case IP_CT_NEW:
- ct_state |= OVS_CS_F_NEW;
- break;
- default:
- break;
- }
-
- return ct_state;
-}
-
-static u32 ovs_ct_get_mark(const struct nf_conn *ct)
-{
-#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
- return ct ? ct->mark : 0;
-#else
- return 0;
-#endif
-}
-
-/* Guard against conntrack labels max size shrinking below 128 bits. */
-#if NF_CT_LABELS_MAX_SIZE < 16
-#error NF_CT_LABELS_MAX_SIZE must be at least 16 bytes
-#endif
-
-static void ovs_ct_get_labels(const struct nf_conn *ct,
- struct ovs_key_ct_labels *labels)
-{
- struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
-
- if (cl)
- memcpy(labels, cl->bits, OVS_CT_LABELS_LEN);
- else
- memset(labels, 0, OVS_CT_LABELS_LEN);
-}
-
-static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key,
- const struct nf_conntrack_tuple *orig,
- u8 icmp_proto)
-{
- key->ct_orig_proto = orig->dst.protonum;
- if (orig->dst.protonum == icmp_proto) {
- key->ct.orig_tp.src = htons(orig->dst.u.icmp.type);
- key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code);
- } else {
- key->ct.orig_tp.src = orig->src.u.all;
- key->ct.orig_tp.dst = orig->dst.u.all;
- }
-}
-
-static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
- const struct nf_conntrack_zone *zone,
- const struct nf_conn *ct)
-{
- key->ct_state = state;
- key->ct_zone = zone->id;
- key->ct.mark = ovs_ct_get_mark(ct);
- ovs_ct_get_labels(ct, &key->ct.labels);
-
- if (ct) {
- const struct nf_conntrack_tuple *orig;
-
- /* Use the master if we have one. */
- if (ct->master)
- ct = ct->master;
- orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-
- /* IP version must match with the master connection. */
- if (key->eth.type == htons(ETH_P_IP) &&
- nf_ct_l3num(ct) == NFPROTO_IPV4) {
- key->ipv4.ct_orig.src = orig->src.u3.ip;
- key->ipv4.ct_orig.dst = orig->dst.u3.ip;
- __ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP);
- return;
- } else if (key->eth.type == htons(ETH_P_IPV6) &&
- !sw_flow_key_is_nd(key) &&
- nf_ct_l3num(ct) == NFPROTO_IPV6) {
- key->ipv6.ct_orig.src = orig->src.u3.in6;
- key->ipv6.ct_orig.dst = orig->dst.u3.in6;
- __ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP);
- return;
- }
- }
- /* Clear 'ct_orig_proto' to mark the non-existence of conntrack
- * original direction key fields.
- */
- key->ct_orig_proto = 0;
-}
-
-/* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has
- * previously sent the packet to conntrack via the ct action. If
- * 'keep_nat_flags' is true, the existing NAT flags retained, else they are
- * initialized from the connection status.
- */
-static void ovs_ct_update_key(const struct sk_buff *skb,
- const struct ovs_conntrack_info *info,
- struct sw_flow_key *key, bool post_ct,
- bool keep_nat_flags)
-{
- const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct;
- u8 state = 0;
-
- ct = nf_ct_get(skb, &ctinfo);
- if (ct) {
- state = ovs_ct_get_state(ctinfo);
- /* All unconfirmed entries are NEW connections. */
- if (!nf_ct_is_confirmed(ct))
- state |= OVS_CS_F_NEW;
- /* OVS persists the related flag for the duration of the
- * connection.
- */
- if (ct->master)
- state |= OVS_CS_F_RELATED;
- if (keep_nat_flags) {
- state |= key->ct_state & OVS_CS_F_NAT_MASK;
- } else {
- if (ct->status & IPS_SRC_NAT)
- state |= OVS_CS_F_SRC_NAT;
- if (ct->status & IPS_DST_NAT)
- state |= OVS_CS_F_DST_NAT;
- }
- zone = nf_ct_zone(ct);
- } else if (post_ct) {
- state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
- if (info)
- zone = &info->zone;
- }
- __ovs_ct_update_key(key, state, zone, ct);
-}
-
-/* This is called to initialize CT key fields possibly coming in from the local
- * stack.
- */
-void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
-{
- ovs_ct_update_key(skb, NULL, key, false, false);
-}
-
-#define IN6_ADDR_INITIALIZER(ADDR) \
- { (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \
- (ADDR).s6_addr32[2], (ADDR).s6_addr32[3] }
-
-int ovs_ct_put_key(const struct sw_flow_key *swkey,
- const struct sw_flow_key *output, struct sk_buff *skb)
-{
- if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct_state))
- return -EMSGSIZE;
-
- if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
- nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct_zone))
- return -EMSGSIZE;
-
- if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
- nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark))
- return -EMSGSIZE;
-
- if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
- nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels),
- &output->ct.labels))
- return -EMSGSIZE;
-
- if (swkey->ct_orig_proto) {
- if (swkey->eth.type == htons(ETH_P_IP)) {
- struct ovs_key_ct_tuple_ipv4 orig = {
- output->ipv4.ct_orig.src,
- output->ipv4.ct_orig.dst,
- output->ct.orig_tp.src,
- output->ct.orig_tp.dst,
- output->ct_orig_proto,
- };
- if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,
- sizeof(orig), &orig))
- return -EMSGSIZE;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- struct ovs_key_ct_tuple_ipv6 orig = {
- IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src),
- IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst),
- output->ct.orig_tp.src,
- output->ct.orig_tp.dst,
- output->ct_orig_proto,
- };
- if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,
- sizeof(orig), &orig))
- return -EMSGSIZE;
- }
- }
-
- return 0;
-}
-
-static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key,
- u32 ct_mark, u32 mask)
-{
-#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
- u32 new_mark;
-
- new_mark = ct_mark | (ct->mark & ~(mask));
- if (ct->mark != new_mark) {
- ct->mark = new_mark;
- if (nf_ct_is_confirmed(ct))
- nf_conntrack_event_cache(IPCT_MARK, ct);
- key->ct.mark = new_mark;
- }
-
- return 0;
-#else
- return -ENOTSUPP;
-#endif
-}
-
-static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct)
-{
- struct nf_conn_labels *cl;
-
- cl = nf_ct_labels_find(ct);
- if (!cl) {
- nf_ct_labels_ext_add(ct);
- cl = nf_ct_labels_find(ct);
- }
-
- return cl;
-}
-
-/* Initialize labels for a new, yet to be committed conntrack entry. Note that
- * since the new connection is not yet confirmed, and thus no-one else has
- * access to it's labels, we simply write them over.
- */
-static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key,
- const struct ovs_key_ct_labels *labels,
- const struct ovs_key_ct_labels *mask)
-{
- struct nf_conn_labels *cl, *master_cl;
- bool have_mask = labels_nonzero(mask);
-
- /* Inherit master's labels to the related connection? */
- master_cl = ct->master ? nf_ct_labels_find(ct->master) : NULL;
-
- if (!master_cl && !have_mask)
- return 0; /* Nothing to do. */
-
- cl = ovs_ct_get_conn_labels(ct);
- if (!cl)
- return -ENOSPC;
-
- /* Inherit the master's labels, if any. Must use memcpy for backport
- * as struct assignment only copies the length field in older
- * kernels.
- */
- if (master_cl)
- memcpy(cl->bits, master_cl->bits, OVS_CT_LABELS_LEN);
-
- if (have_mask) {
- u32 *dst = (u32 *)cl->bits;
- int i;
-
- for (i = 0; i < OVS_CT_LABELS_LEN_32; i++)
- dst[i] = (dst[i] & ~mask->ct_labels_32[i]) |
- (labels->ct_labels_32[i]
- & mask->ct_labels_32[i]);
- }
-
- /* Labels are included in the IPCTNL_MSG_CT_NEW event only if the
- * IPCT_LABEL bit is set in the event cache.
- */
- nf_conntrack_event_cache(IPCT_LABEL, ct);
-
- memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN);
-
- return 0;
-}
-
-static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key,
- const struct ovs_key_ct_labels *labels,
- const struct ovs_key_ct_labels *mask)
-{
- struct nf_conn_labels *cl;
- int err;
-
- cl = ovs_ct_get_conn_labels(ct);
- if (!cl)
- return -ENOSPC;
-
- err = nf_connlabels_replace(ct, labels->ct_labels_32,
- mask->ct_labels_32,
- OVS_CT_LABELS_LEN_32);
- if (err)
- return err;
-
- memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN);
-
- return 0;
-}
-
-/* 'skb' should already be pulled to nh_ofs. */
-static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
-{
- const struct nf_conntrack_helper *helper;
- const struct nf_conn_help *help;
- enum ip_conntrack_info ctinfo;
- unsigned int protoff;
- struct nf_conn *ct;
- u8 nexthdr;
- int err;
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
- bool dst_set = false;
- struct rtable rt = { .rt_flags = 0 };
-#endif
-
- ct = nf_ct_get(skb, &ctinfo);
- if (!ct || ctinfo == IP_CT_RELATED_REPLY)
- return NF_ACCEPT;
-
- help = nfct_help(ct);
- if (!help)
- return NF_ACCEPT;
-
- helper = rcu_dereference(help->helper);
- if (!helper)
- return NF_ACCEPT;
-
- switch (proto) {
- case NFPROTO_IPV4:
- protoff = ip_hdrlen(skb);
- break;
- case NFPROTO_IPV6: {
- __be16 frag_off;
- int ofs;
-
- nexthdr = ipv6_hdr(skb)->nexthdr;
- ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
- &frag_off);
- if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
- pr_debug("proto header not found\n");
- return NF_ACCEPT;
- }
- protoff = ofs;
- break;
- }
- default:
- WARN_ONCE(1, "helper invoked on non-IP family!");
- return NF_DROP;
- }
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
- /* Linux 4.5 and older depend on skb_dst being set when recalculating
- * checksums after NAT helper has mangled TCP or UDP packet payload.
- * skb_dst is cast to a rtable struct and the flags examined.
- * Forcing these flags to have RTCF_LOCAL not set ensures checksum mod
- * is carried out in the same way as kernel versions > 4.5
- */
- if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL
- && !skb_dst(skb)) {
- dst_set = true;
- skb_dst_set(skb, &rt.dst);
- }
-#endif
- err = helper->help(skb, protoff, ct, ctinfo);
- if (err != NF_ACCEPT)
- return err;
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
- if (dst_set)
- skb_dst_set(skb, NULL);
-#endif
-
- /* Adjust seqs after helper. This is needed due to some helpers (e.g.,
- * FTP with NAT) adusting the TCP payload size when mangling IP
- * addresses and/or port numbers in the text-based control connection.
- */
- if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
- !nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
- return NF_DROP;
- return NF_ACCEPT;
-}
-
-/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
- * value if 'skb' is freed.
- */
-static int handle_fragments(struct net *net, struct sw_flow_key *key,
- u16 zone, struct sk_buff *skb)
-{
- struct ovs_gso_cb ovs_cb = *OVS_GSO_CB(skb);
- int err;
-
- if (key->eth.type == htons(ETH_P_IP)) {
- enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- err = ip_defrag(net, skb, user);
- if (err)
- return err;
-
- ovs_cb.dp_cb.mru = IPCB(skb)->frag_max_size;
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- } else if (key->eth.type == htons(ETH_P_IPV6)) {
- enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- err = nf_ct_frag6_gather(net, skb, user);
- if (err) {
- if (err != -EINPROGRESS)
- kfree_skb(skb);
- return err;
- }
-
- key->ip.proto = ipv6_hdr(skb)->nexthdr;
- ovs_cb.dp_cb.mru = IP6CB(skb)->frag_max_size;
-#endif /* IP frag support */
- } else {
- kfree_skb(skb);
- return -EPFNOSUPPORT;
- }
-
- /* The key extracted from the fragment that completed this datagram
- * likely didn't have an L4 header, so regenerate it.
- */
- ovs_flow_key_update_l3l4(skb, key);
-
- key->ip.frag = OVS_FRAG_TYPE_NONE;
- skb_clear_hash(skb);
- skb->ignore_df = 1;
- *OVS_GSO_CB(skb) = ovs_cb;
-
- return 0;
-}
-
-static struct nf_conntrack_expect *
-ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
- u16 proto, const struct sk_buff *skb)
-{
- struct nf_conntrack_tuple tuple;
- struct nf_conntrack_expect *exp;
-
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
- return NULL;
-
- exp = __nf_ct_expect_find(net, zone, &tuple);
- if (exp) {
- struct nf_conntrack_tuple_hash *h;
-
- /* Delete existing conntrack entry, if it clashes with the
- * expectation. This can happen since conntrack ALGs do not
- * check for clashes between (new) expectations and existing
- * conntrack entries. nf_conntrack_in() will check the
- * expectations only if a conntrack entry can not be found,
- * which can lead to OVS finding the expectation (here) in the
- * init direction, but which will not be removed by the
- * nf_conntrack_in() call, if a matching conntrack entry is
- * found instead. In this case all init direction packets
- * would be reported as new related packets, while reply
- * direction packets would be reported as un-related
- * established packets.
- */
- h = nf_conntrack_find_get(net, zone, &tuple);
- if (h) {
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-
- nf_ct_delete(ct, 0, 0);
- nf_conntrack_put(&ct->ct_general);
- }
- }
-
- return exp;
-}
-
-/* This replicates logic from nf_conntrack_core.c that is not exported. */
-static enum ip_conntrack_info
-ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
-{
- const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-
- if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
- return IP_CT_ESTABLISHED_REPLY;
- /* Once we've had two way comms, always ESTABLISHED. */
- if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status))
- return IP_CT_ESTABLISHED;
- if (test_bit(IPS_EXPECTED_BIT, &ct->status))
- return IP_CT_RELATED;
- return IP_CT_NEW;
-}
-
-/* Find an existing connection which this packet belongs to without
- * re-attributing statistics or modifying the connection state. This allows an
- * skb->_nfct lost due to an upcall to be recovered during actions execution.
- *
- * Must be called with rcu_read_lock.
- *
- * On success, populates skb->_nfct and returns the connection. Returns NULL
- * if there is no existing entry.
- */
-static struct nf_conn *
-ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
- u8 l3num, struct sk_buff *skb, bool natted)
-{
- struct nf_conntrack_tuple tuple;
- struct nf_conntrack_tuple_hash *h;
- struct nf_conn *ct;
-
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num,
- net, &tuple)) {
- pr_debug("ovs_ct_find_existing: Can't get tuple\n");
- return NULL;
- }
-
- /* Must invert the tuple if skb has been transformed by NAT. */
- if (natted) {
- struct nf_conntrack_tuple inverse;
-
- if (!rpl_nf_ct_invert_tuple(&inverse, &tuple)) {
- pr_debug("ovs_ct_find_existing: Inversion failed!\n");
- return NULL;
- }
- tuple = inverse;
- }
-
- /* look for tuple match */
- h = nf_conntrack_find_get(net, zone, &tuple);
- if (!h)
- return NULL; /* Not found. */
-
- ct = nf_ct_tuplehash_to_ctrack(h);
-
- /* Inverted packet tuple matches the reverse direction conntrack tuple,
- * select the other tuplehash to get the right 'ctinfo' bits for this
- * packet.
- */
- if (natted)
- h = &ct->tuplehash[!h->tuple.dst.dir];
-
- nf_ct_set(skb, ct, ovs_ct_get_info(h));
- return ct;
-}
-
-static
-struct nf_conn *ovs_ct_executed(struct net *net,
- const struct sw_flow_key *key,
- const struct ovs_conntrack_info *info,
- struct sk_buff *skb,
- bool *ct_executed)
-{
- struct nf_conn *ct = NULL;
-
- /* If no ct, check if we have evidence that an existing conntrack entry
- * might be found for this skb. This happens when we lose a skb->_nfct
- * due to an upcall, or if the direction is being forced. If the
- * connection was not confirmed, it is not cached and needs to be run
- * through conntrack again.
- */
- *ct_executed = (key->ct_state & OVS_CS_F_TRACKED) &&
- !(key->ct_state & OVS_CS_F_INVALID) &&
- (key->ct_zone == info->zone.id);
-
- if (*ct_executed || (!key->ct_state && info->force)) {
- ct = ovs_ct_find_existing(net, &info->zone, info->family, skb,
- !!(key->ct_state &
- OVS_CS_F_NAT_MASK));
- }
-
- return ct;
-}
-
-/* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
-static bool skb_nfct_cached(struct net *net,
- const struct sw_flow_key *key,
- const struct ovs_conntrack_info *info,
- struct sk_buff *skb)
-{
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct;
- bool ct_executed = true;
-
- ct = nf_ct_get(skb, &ctinfo);
- if (!ct)
- ct = ovs_ct_executed(net, key, info, skb, &ct_executed);
-
- if (ct)
- nf_ct_get(skb, &ctinfo);
- else
- return false;
-
- if (!net_eq(net, read_pnet(&ct->ct_net)))
- return false;
- if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
- return false;
- if (info->helper) {
- struct nf_conn_help *help;
-
- help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
- if (help && rcu_access_pointer(help->helper) != info->helper)
- return false;
- }
- if (info->nf_ct_timeout) {
- struct nf_conn_timeout *timeout_ext;
-
- timeout_ext = nf_ct_timeout_find(ct);
- if (!timeout_ext || info->nf_ct_timeout !=
- rcu_dereference(timeout_ext->timeout))
- return false;
- }
- /* Force conntrack entry direction to the current packet? */
- if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
- /* Delete the conntrack entry if confirmed, else just release
- * the reference.
- */
- if (nf_ct_is_confirmed(ct))
- nf_ct_delete(ct, 0, 0);
-
- nf_conntrack_put(&ct->ct_general);
- nf_ct_set(skb, NULL, 0);
- return false;
- }
-
- return ct_executed;
-}
-
-#if IS_ENABLED(CONFIG_NF_NAT_NEEDED)
-/* Modelled after nf_nat_ipv[46]_fn().
- * range is only used for new, uninitialized NAT state.
- * Returns either NF_ACCEPT or NF_DROP.
- */
-static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype)
-{
- int hooknum, nh_off, err = NF_ACCEPT;
-
- nh_off = skb_network_offset(skb);
- skb_pull_rcsum(skb, nh_off);
-
- /* See HOOK2MANIP(). */
- if (maniptype == NF_NAT_MANIP_SRC)
- hooknum = NF_INET_LOCAL_IN; /* Source NAT */
- else
- hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED_REPLY:
- if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
- skb->protocol == htons(ETH_P_IP) &&
- ip_hdr(skb)->protocol == IPPROTO_ICMP) {
- if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
- hooknum))
- err = NF_DROP;
- goto push;
- } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
- skb->protocol == htons(ETH_P_IPV6)) {
- __be16 frag_off;
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- int hdrlen = ipv6_skip_exthdr(skb,
- sizeof(struct ipv6hdr),
- &nexthdr, &frag_off);
-
- if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
- if (!nf_nat_icmpv6_reply_translation(skb, ct,
- ctinfo,
- hooknum,
- hdrlen))
- err = NF_DROP;
- goto push;
- }
- }
- /* Non-ICMP, fall thru to initialize if needed. */
- /* fall through */
- case IP_CT_NEW:
- /* Seen it before? This can happen for loopback, retrans,
- * or local packets.
- */
- if (!nf_nat_initialized(ct, maniptype)) {
- /* Initialize according to the NAT action. */
- err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
- /* Action is set up to establish a new
- * mapping.
- */
- ? nf_nat_setup_info(ct, range, maniptype)
- : nf_nat_alloc_null_binding(ct, hooknum);
- if (err != NF_ACCEPT)
- goto push;
- }
- break;
-
- case IP_CT_ESTABLISHED:
- case IP_CT_ESTABLISHED_REPLY:
- break;
-
- default:
- err = NF_DROP;
- goto push;
- }
-
- err = nf_nat_packet(ct, ctinfo, hooknum, skb);
-push:
- skb_push(skb, nh_off);
- skb_postpush_rcsum(skb, skb->data, nh_off);
-
- return err;
-}
-
-static void ovs_nat_update_key(struct sw_flow_key *key,
- const struct sk_buff *skb,
- enum nf_nat_manip_type maniptype)
-{
- if (maniptype == NF_NAT_MANIP_SRC) {
- __be16 src;
-
- key->ct_state |= OVS_CS_F_SRC_NAT;
- if (key->eth.type == htons(ETH_P_IP))
- key->ipv4.addr.src = ip_hdr(skb)->saddr;
- else if (key->eth.type == htons(ETH_P_IPV6))
- memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
- sizeof(key->ipv6.addr.src));
- else
- return;
-
- if (key->ip.proto == IPPROTO_UDP)
- src = udp_hdr(skb)->source;
- else if (key->ip.proto == IPPROTO_TCP)
- src = tcp_hdr(skb)->source;
- else if (key->ip.proto == IPPROTO_SCTP)
- src = sctp_hdr(skb)->source;
- else
- return;
-
- key->tp.src = src;
- } else {
- __be16 dst;
-
- key->ct_state |= OVS_CS_F_DST_NAT;
- if (key->eth.type == htons(ETH_P_IP))
- key->ipv4.addr.dst = ip_hdr(skb)->daddr;
- else if (key->eth.type == htons(ETH_P_IPV6))
- memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
- sizeof(key->ipv6.addr.dst));
- else
- return;
-
- if (key->ip.proto == IPPROTO_UDP)
- dst = udp_hdr(skb)->dest;
- else if (key->ip.proto == IPPROTO_TCP)
- dst = tcp_hdr(skb)->dest;
- else if (key->ip.proto == IPPROTO_SCTP)
- dst = sctp_hdr(skb)->dest;
- else
- return;
-
- key->tp.dst = dst;
- }
-}
-
-/* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
-static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
- const struct ovs_conntrack_info *info,
- struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- enum nf_nat_manip_type maniptype;
- int err;
-
-#ifdef HAVE_NF_CT_IS_UNTRACKED
- if (nf_ct_is_untracked(ct)) {
- /* A NAT action may only be performed on tracked packets. */
- return NF_ACCEPT;
- }
-#endif /* HAVE_NF_CT_IS_UNTRACKED */
-
- /* Add NAT extension if not confirmed yet. */
- if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
- return NF_ACCEPT; /* Can't NAT. */
-
- /* Determine NAT type.
- * Check if the NAT type can be deduced from the tracked connection.
- * Make sure new expected connections (IP_CT_RELATED) are NATted only
- * when committing.
- */
- if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW &&
- ct->status & IPS_NAT_MASK &&
- (ctinfo != IP_CT_RELATED || info->commit)) {
- /* NAT an established or related connection like before. */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
- /* This is the REPLY direction for a connection
- * for which NAT was applied in the forward
- * direction. Do the reverse NAT.
- */
- maniptype = ct->status & IPS_SRC_NAT
- ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
- else
- maniptype = ct->status & IPS_SRC_NAT
- ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
- } else if (info->nat & OVS_CT_SRC_NAT) {
- maniptype = NF_NAT_MANIP_SRC;
- } else if (info->nat & OVS_CT_DST_NAT) {
- maniptype = NF_NAT_MANIP_DST;
- } else {
- return NF_ACCEPT; /* Connection is not NATed. */
- }
- err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
-
- if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
- if (ct->status & IPS_SRC_NAT) {
- if (maniptype == NF_NAT_MANIP_SRC)
- maniptype = NF_NAT_MANIP_DST;
- else
- maniptype = NF_NAT_MANIP_SRC;
-
- err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
- maniptype);
- } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
- err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL,
- NF_NAT_MANIP_SRC);
- }
- }
-
- /* Mark NAT done if successful and update the flow key. */
- if (err == NF_ACCEPT)
- ovs_nat_update_key(key, skb, maniptype);
-
- return err;
-}
-#else /* !CONFIG_NF_NAT_NEEDED */
-static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
- const struct ovs_conntrack_info *info,
- struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- return NF_ACCEPT;
-}
-#endif
-
-/* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
- * not done already. Update key with new CT state after passing the packet
- * through conntrack.
- * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be
- * set to NULL and 0 will be returned.
- */
-static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
- const struct ovs_conntrack_info *info,
- struct sk_buff *skb)
-{
- /* If we are recirculating packets to match on conntrack fields and
- * committing with a separate conntrack action, then we don't need to
- * actually run the packet through conntrack twice unless it's for a
- * different zone.
- */
- bool cached = skb_nfct_cached(net, key, info, skb);
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct;
-
- if (!cached) {
- struct nf_hook_state state = {
- .hook = NF_INET_PRE_ROUTING,
- .pf = info->family,
- .net = net,
- };
- struct nf_conn *tmpl = info->ct;
- int err;
-
- /* Associate skb with specified zone. */
- if (tmpl) {
- if (skb_nfct(skb))
- nf_conntrack_put(skb_nfct(skb));
- nf_conntrack_get(&tmpl->ct_general);
- nf_ct_set(skb, tmpl, IP_CT_NEW);
- }
-
- err = nf_conntrack_in(skb, &state);
- if (err != NF_ACCEPT)
- return -ENOENT;
-
- /* Clear CT state NAT flags to mark that we have not yet done
- * NAT after the nf_conntrack_in() call. We can actually clear
- * the whole state, as it will be re-initialized below.
- */
- key->ct_state = 0;
-
- /* Update the key, but keep the NAT flags. */
- ovs_ct_update_key(skb, info, key, true, true);
- }
-
- ct = nf_ct_get(skb, &ctinfo);
- if (ct) {
- bool add_helper = false;
-
- /* Packets starting a new connection must be NATted before the
- * helper, so that the helper knows about the NAT. We enforce
- * this by delaying both NAT and helper calls for unconfirmed
- * connections until the committing CT action. For later
- * packets NAT and Helper may be called in either order.
- *
- * NAT will be done only if the CT action has NAT, and only
- * once per packet (per zone), as guarded by the NAT bits in
- * the key->ct_state.
- */
- if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) &&
- (nf_ct_is_confirmed(ct) || info->commit) &&
- ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
- return -EINVAL;
- }
-
- /* Userspace may decide to perform a ct lookup without a helper
- * specified followed by a (recirculate and) commit with one,
- * or attach a helper in a later commit. Therefore, for
- * connections which we will commit, we may need to attach
- * the helper here.
- */
- if (info->commit && info->helper && !nfct_help(ct)) {
- int err = __nf_ct_try_assign_helper(ct, info->ct,
- GFP_ATOMIC);
- if (err)
- return err;
- add_helper = true;
-
- /* helper installed, add seqadj if NAT is required */
- if (info->nat && !nfct_seqadj(ct)) {
- if (!nfct_seqadj_ext_add(ct))
- return -EINVAL;
- }
- }
-
- /* Call the helper only if:
- * - nf_conntrack_in() was executed above ("!cached") or a
- * helper was just attached ("add_helper") for a confirmed
- * connection, or
- * - When committing an unconfirmed connection.
- */
- if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
- info->commit) &&
- ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-/* Lookup connection and read fields into key. */
-static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
- const struct ovs_conntrack_info *info,
- struct sk_buff *skb)
-{
- struct nf_conntrack_expect *exp;
-
- /* If we pass an expected packet through nf_conntrack_in() the
- * expectation is typically removed, but the packet could still be
- * lost in upcall processing. To prevent this from happening we
- * perform an explicit expectation lookup. Expected connections are
- * always new, and will be passed through conntrack only when they are
- * committed, as it is OK to remove the expectation at that time.
- */
- exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
- if (exp) {
- u8 state;
-
- /* NOTE: New connections are NATted and Helped only when
- * committed, so we are not calling into NAT here.
- */
- state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
- __ovs_ct_update_key(key, state, &info->zone, exp->master);
- } else {
- struct nf_conn *ct;
- int err;
-
- err = __ovs_ct_lookup(net, key, info, skb);
- if (err)
- return err;
-
- ct = (struct nf_conn *)skb_nfct(skb);
- if (ct)
- nf_ct_deliver_cached_events(ct);
- }
-
- return 0;
-}
-
-static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
-{
- size_t i;
-
- for (i = 0; i < OVS_CT_LABELS_LEN_32; i++)
- if (labels->ct_labels_32[i])
- return true;
-
- return false;
-}
-
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
-static struct hlist_head *ct_limit_hash_bucket(
- const struct ovs_ct_limit_info *info, u16 zone)
-{
- return &info->limits[zone & (CT_LIMIT_HASH_BUCKETS - 1)];
-}
-
-/* Call with ovs_mutex */
-static void ct_limit_set(const struct ovs_ct_limit_info *info,
- struct ovs_ct_limit *new_ct_limit)
-{
- struct ovs_ct_limit *ct_limit;
- struct hlist_head *head;
-
- head = ct_limit_hash_bucket(info, new_ct_limit->zone);
- hlist_for_each_entry_rcu(ct_limit, head, hlist_node) {
- if (ct_limit->zone == new_ct_limit->zone) {
- hlist_replace_rcu(&ct_limit->hlist_node,
- &new_ct_limit->hlist_node);
- kfree_rcu(ct_limit, rcu);
- return;
- }
- }
-
- hlist_add_head_rcu(&new_ct_limit->hlist_node, head);
-}
-
-/* Call with ovs_mutex */
-static void ct_limit_del(const struct ovs_ct_limit_info *info, u16 zone)
-{
- struct ovs_ct_limit *ct_limit;
- struct hlist_head *head;
- struct hlist_node *n;
-
- head = ct_limit_hash_bucket(info, zone);
- hlist_for_each_entry_safe(ct_limit, n, head, hlist_node) {
- if (ct_limit->zone == zone) {
- hlist_del_rcu(&ct_limit->hlist_node);
- kfree_rcu(ct_limit, rcu);
- return;
- }
- }
-}
-
-/* Call with RCU read lock */
-static u32 ct_limit_get(const struct ovs_ct_limit_info *info, u16 zone)
-{
- struct ovs_ct_limit *ct_limit;
- struct hlist_head *head;
-
- head = ct_limit_hash_bucket(info, zone);
- hlist_for_each_entry_rcu(ct_limit, head, hlist_node) {
- if (ct_limit->zone == zone)
- return ct_limit->limit;
- }
-
- return info->default_limit;
-}
-
-static int ovs_ct_check_limit(struct net *net,
- const struct ovs_conntrack_info *info,
- const struct nf_conntrack_tuple *tuple)
-{
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
- const struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
- u32 per_zone_limit, connections;
- u32 conncount_key;
-
- conncount_key = info->zone.id;
-
- per_zone_limit = ct_limit_get(ct_limit_info, info->zone.id);
- if (per_zone_limit == OVS_CT_LIMIT_UNLIMITED)
- return 0;
-
- connections = nf_conncount_count(net, ct_limit_info->data,
- &conncount_key, tuple, &info->zone);
- if (connections > per_zone_limit)
- return -ENOMEM;
-
- return 0;
-}
-#endif
-
-/* Lookup connection and confirm if unconfirmed. */
-static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
- const struct ovs_conntrack_info *info,
- struct sk_buff *skb)
-{
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct;
- int err;
-
- err = __ovs_ct_lookup(net, key, info, skb);
- if (err)
- return err;
-
- /* The connection could be invalid, in which case this is a no-op.*/
- ct = nf_ct_get(skb, &ctinfo);
- if (!ct)
- return 0;
-
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
- if (static_branch_unlikely(&ovs_ct_limit_enabled)) {
- if (!nf_ct_is_confirmed(ct)) {
- err = ovs_ct_check_limit(net, info,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- if (err) {
- net_warn_ratelimited("openvswitch: zone: %u "
- "exceeds conntrack limit\n",
- info->zone.id);
- return err;
- }
- }
- }
-#endif
-
- /* Set the conntrack event mask if given. NEW and DELETE events have
- * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener
- * typically would receive many kinds of updates. Setting the event
- * mask allows those events to be filtered. The set event mask will
- * remain in effect for the lifetime of the connection unless changed
- * by a further CT action with both the commit flag and the eventmask
- * option. */
- if (info->have_eventmask) {
- struct nf_conntrack_ecache *cache = nf_ct_ecache_find(ct);
-
- if (cache)
- cache->ctmask = info->eventmask;
- }
-
- /* Apply changes before confirming the connection so that the initial
- * conntrack NEW netlink event carries the values given in the CT
- * action.
- */
- if (info->mark.mask) {
- err = ovs_ct_set_mark(ct, key, info->mark.value,
- info->mark.mask);
- if (err)
- return err;
- }
- if (!nf_ct_is_confirmed(ct)) {
- err = ovs_ct_init_labels(ct, key, &info->labels.value,
- &info->labels.mask);
- if (err)
- return err;
- } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
- labels_nonzero(&info->labels.mask)) {
- err = ovs_ct_set_labels(ct, key, &info->labels.value,
- &info->labels.mask);
- if (err)
- return err;
- }
- /* This will take care of sending queued events even if the connection
- * is already confirmed.
- */
- if (nf_conntrack_confirm(skb) != NF_ACCEPT)
- return -EINVAL;
-
- return 0;
-}
-
-/* Trim the skb to the length specified by the IP/IPv6 header,
- * removing any trailing lower-layer padding. This prepares the skb
- * for higher-layer processing that assumes skb->len excludes padding
- * (such as nf_ip_checksum). The caller needs to pull the skb to the
- * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
- */
-static int ovs_skb_network_trim(struct sk_buff *skb)
-{
- unsigned int len;
- int err;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- len = ntohs(ip_hdr(skb)->tot_len);
- break;
- case htons(ETH_P_IPV6):
- len = sizeof(struct ipv6hdr)
- + ntohs(ipv6_hdr(skb)->payload_len);
- break;
- default:
- len = skb->len;
- }
-
- err = pskb_trim_rcsum(skb, len);
- if (err)
- kfree_skb(skb);
-
- return err;
-}
-
-/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
- * value if 'skb' is freed.
- */
-int ovs_ct_execute(struct net *net, struct sk_buff *skb,
- struct sw_flow_key *key,
- const struct ovs_conntrack_info *info)
-{
- int nh_ofs;
- int err;
-
- /* The conntrack module expects to be working at L3. */
- nh_ofs = skb_network_offset(skb);
- skb_pull_rcsum(skb, nh_ofs);
-
- err = ovs_skb_network_trim(skb);
- if (err)
- return err;
-
- if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
- err = handle_fragments(net, key, info->zone.id, skb);
- if (err)
- return err;
- }
-
- if (info->commit)
- err = ovs_ct_commit(net, key, info, skb);
- else
- err = ovs_ct_lookup(net, key, info, skb);
-
- skb_push(skb, nh_ofs);
- skb_postpush_rcsum(skb, skb->data, nh_ofs);
- if (err)
- kfree_skb(skb);
- return err;
-}
-
-int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
-{
- if (skb_nfct(skb)) {
- nf_conntrack_put(skb_nfct(skb));
-#ifdef HAVE_IP_CT_UNTRACKED
- nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
-#else
- nf_ct_set(skb, NULL, 0);
-#endif
- ovs_ct_fill_key(skb, key);
- }
-
- return 0;
-}
-
-static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
- const struct sw_flow_key *key, bool log)
-{
- struct nf_conntrack_helper *helper;
- struct nf_conn_help *help;
- int ret = 0;
-
- helper = nf_conntrack_helper_try_module_get(name, info->family,
- key->ip.proto);
- if (!helper) {
- OVS_NLERR(log, "Unknown helper \"%s\"", name);
- return -EINVAL;
- }
-
- help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
- if (!help) {
- nf_conntrack_helper_put(helper);
- return -ENOMEM;
- }
-
-#if IS_ENABLED(CONFIG_NF_NAT_NEEDED)
- if (info->nat) {
- ret = nf_nat_helper_try_module_get(name, info->family,
- key->ip.proto);
- if (ret) {
- nf_conntrack_helper_put(helper);
- OVS_NLERR(log, "Failed to load \"%s\" NAT helper, error: %d",
- name, ret);
- return ret;
- }
- }
-#endif
-
- rcu_assign_pointer(help->helper, helper);
- info->helper = helper;
- return ret;
-}
-
-#if IS_ENABLED(CONFIG_NF_NAT_NEEDED)
-static int parse_nat(const struct nlattr *attr,
- struct ovs_conntrack_info *info, bool log)
-{
- struct nlattr *a;
- int rem;
- bool have_ip_max = false;
- bool have_proto_max = false;
- bool ip_vers = (info->family == NFPROTO_IPV6);
-
- nla_for_each_nested(a, attr, rem) {
- static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = {
- [OVS_NAT_ATTR_SRC] = {0, 0},
- [OVS_NAT_ATTR_DST] = {0, 0},
- [OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr),
- sizeof(struct in6_addr)},
- [OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr),
- sizeof(struct in6_addr)},
- [OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)},
- [OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)},
- [OVS_NAT_ATTR_PERSISTENT] = {0, 0},
- [OVS_NAT_ATTR_PROTO_HASH] = {0, 0},
- [OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0},
- };
- int type = nla_type(a);
-
- if (type > OVS_NAT_ATTR_MAX) {
- OVS_NLERR(log, "Unknown NAT attribute (type=%d, max=%d)",
- type, OVS_NAT_ATTR_MAX);
- return -EINVAL;
- }
-
- if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) {
- OVS_NLERR(log, "NAT attribute type %d has unexpected length (%d != %d)",
- type, nla_len(a),
- ovs_nat_attr_lens[type][ip_vers]);
- return -EINVAL;
- }
-
- switch (type) {
- case OVS_NAT_ATTR_SRC:
- case OVS_NAT_ATTR_DST:
- if (info->nat) {
- OVS_NLERR(log, "Only one type of NAT may be specified");
- return -ERANGE;
- }
- info->nat |= OVS_CT_NAT;
- info->nat |= ((type == OVS_NAT_ATTR_SRC)
- ? OVS_CT_SRC_NAT : OVS_CT_DST_NAT);
- break;
-
- case OVS_NAT_ATTR_IP_MIN:
- nla_memcpy(&info->range.min_addr, a,
- sizeof(info->range.min_addr));
- info->range.flags |= NF_NAT_RANGE_MAP_IPS;
- break;
-
- case OVS_NAT_ATTR_IP_MAX:
- have_ip_max = true;
- nla_memcpy(&info->range.max_addr, a,
- sizeof(info->range.max_addr));
- info->range.flags |= NF_NAT_RANGE_MAP_IPS;
- break;
-
- case OVS_NAT_ATTR_PROTO_MIN:
- info->range.min_proto.all = htons(nla_get_u16(a));
- info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
- break;
-
- case OVS_NAT_ATTR_PROTO_MAX:
- have_proto_max = true;
- info->range.max_proto.all = htons(nla_get_u16(a));
- info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
- break;
-
- case OVS_NAT_ATTR_PERSISTENT:
- info->range.flags |= NF_NAT_RANGE_PERSISTENT;
- break;
-
- case OVS_NAT_ATTR_PROTO_HASH:
- info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM;
- break;
-
- case OVS_NAT_ATTR_PROTO_RANDOM:
-#ifdef NF_NAT_RANGE_PROTO_RANDOM_FULLY
- info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY;
-#else
- info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM;
- info->random_fully_compat = true;
-#endif
- break;
-
- default:
- OVS_NLERR(log, "Unknown nat attribute (%d)", type);
- return -EINVAL;
- }
- }
-
- if (rem > 0) {
- OVS_NLERR(log, "NAT attribute has %d unknown bytes", rem);
- return -EINVAL;
- }
- if (!info->nat) {
- /* Do not allow flags if no type is given. */
- if (info->range.flags) {
- OVS_NLERR(log,
- "NAT flags may be given only when NAT range (SRC or DST) is also specified."
- );
- return -EINVAL;
- }
- info->nat = OVS_CT_NAT; /* NAT existing connections. */
- } else if (!info->commit) {
- OVS_NLERR(log,
- "NAT attributes may be specified only when CT COMMIT flag is also specified."
- );
- return -EINVAL;
- }
- /* Allow missing IP_MAX. */
- if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) {
- memcpy(&info->range.max_addr, &info->range.min_addr,
- sizeof(info->range.max_addr));
- }
- /* Allow missing PROTO_MAX. */
- if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
- !have_proto_max) {
- info->range.max_proto.all = info->range.min_proto.all;
- }
- return 0;
-}
-#endif
-
-static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
- [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 },
- [OVS_CT_ATTR_FORCE_COMMIT] = { .minlen = 0, .maxlen = 0 },
- [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
- .maxlen = sizeof(u16) },
- [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark),
- .maxlen = sizeof(struct md_mark) },
- [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels),
- .maxlen = sizeof(struct md_labels) },
- [OVS_CT_ATTR_HELPER] = { .minlen = 1,
- .maxlen = NF_CT_HELPER_NAME_LEN },
-#if IS_ENABLED(CONFIG_NF_NAT_NEEDED)
- /* NAT length is checked when parsing the nested attributes. */
- [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX },
-#endif
- [OVS_CT_ATTR_EVENTMASK] = { .minlen = sizeof(u32),
- .maxlen = sizeof(u32) },
- [OVS_CT_ATTR_TIMEOUT] = { .minlen = 1,
- .maxlen = CTNL_TIMEOUT_NAME_MAX },
-};
-
-static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
- const char **helper, bool log)
-{
- struct nlattr *a;
- int rem;
-
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
- int maxlen;
- int minlen;
-
- if (type > OVS_CT_ATTR_MAX) {
- OVS_NLERR(log,
- "Unknown conntrack attr (type=%d, max=%d)",
- type, OVS_CT_ATTR_MAX);
- return -EINVAL;
- }
-
- maxlen = ovs_ct_attr_lens[type].maxlen;
- minlen = ovs_ct_attr_lens[type].minlen;
- if (nla_len(a) < minlen || nla_len(a) > maxlen) {
- OVS_NLERR(log,
- "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
- type, nla_len(a), maxlen);
- return -EINVAL;
- }
-
- switch (type) {
- case OVS_CT_ATTR_FORCE_COMMIT:
- info->force = true;
- /* fall through. */
- case OVS_CT_ATTR_COMMIT:
- info->commit = true;
- break;
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- case OVS_CT_ATTR_ZONE:
- info->zone.id = nla_get_u16(a);
- break;
-#endif
-#ifdef CONFIG_NF_CONNTRACK_MARK
- case OVS_CT_ATTR_MARK: {
- struct md_mark *mark = nla_data(a);
-
- if (!mark->mask) {
- OVS_NLERR(log, "ct_mark mask cannot be 0");
- return -EINVAL;
- }
- info->mark = *mark;
- break;
- }
-#endif
-#ifdef CONFIG_NF_CONNTRACK_LABELS
- case OVS_CT_ATTR_LABELS: {
- struct md_labels *labels = nla_data(a);
-
- if (!labels_nonzero(&labels->mask)) {
- OVS_NLERR(log, "ct_labels mask cannot be 0");
- return -EINVAL;
- }
- info->labels = *labels;
- break;
- }
-#endif
- case OVS_CT_ATTR_HELPER:
- *helper = nla_data(a);
- if (!memchr(*helper, '\0', nla_len(a))) {
- OVS_NLERR(log, "Invalid conntrack helper");
- return -EINVAL;
- }
- break;
-#if IS_ENABLED(CONFIG_NF_NAT_NEEDED)
- case OVS_CT_ATTR_NAT: {
- int err = parse_nat(a, info, log);
-
- if (err)
- return err;
- break;
- }
-#endif
- case OVS_CT_ATTR_EVENTMASK:
- info->have_eventmask = true;
- info->eventmask = nla_get_u32(a);
- break;
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- case OVS_CT_ATTR_TIMEOUT:
- memcpy(info->timeout, nla_data(a), nla_len(a));
- if (!memchr(info->timeout, '\0', nla_len(a))) {
- OVS_NLERR(log, "Invalid conntrack timeout");
- return -EINVAL;
- }
- break;
-#endif
-
- default:
- OVS_NLERR(log, "Unknown conntrack attr (%d)",
- type);
- return -EINVAL;
- }
- }
-
-#ifdef CONFIG_NF_CONNTRACK_MARK
- if (!info->commit && info->mark.mask) {
- OVS_NLERR(log,
- "Setting conntrack mark requires 'commit' flag.");
- return -EINVAL;
- }
-#endif
-#ifdef CONFIG_NF_CONNTRACK_LABELS
- if (!info->commit && labels_nonzero(&info->labels.mask)) {
- OVS_NLERR(log,
- "Setting conntrack labels requires 'commit' flag.");
- return -EINVAL;
- }
-#endif
- if (rem > 0) {
- OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
- return -EINVAL;
- }
-
- return 0;
-}
-
-bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
-{
- if (attr == OVS_KEY_ATTR_CT_STATE)
- return true;
- if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
- attr == OVS_KEY_ATTR_CT_ZONE)
- return true;
- if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
- attr == OVS_KEY_ATTR_CT_MARK)
- return true;
- if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
- attr == OVS_KEY_ATTR_CT_LABELS) {
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-
- return ovs_net->xt_label;
- }
-
- return false;
-}
-
-int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
- const struct sw_flow_key *key,
- struct sw_flow_actions **sfa, bool log)
-{
- struct ovs_conntrack_info ct_info;
- const char *helper = NULL;
- u16 family;
- int err;
-
- family = key_to_nfproto(key);
- if (family == NFPROTO_UNSPEC) {
- OVS_NLERR(log, "ct family unspecified");
- return -EINVAL;
- }
-
- memset(&ct_info, 0, sizeof(ct_info));
- ct_info.family = family;
-
- nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
- NF_CT_DEFAULT_ZONE_DIR, 0);
-
- err = parse_ct(attr, &ct_info, &helper, log);
- if (err)
- return err;
-
- /* Set up template for tracking connections in specific zones. */
- ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
- if (!ct_info.ct) {
- OVS_NLERR(log, "Failed to allocate conntrack template");
- return -ENOMEM;
- }
-
- if (ct_info.timeout[0]) {
- if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
- ct_info.timeout))
- pr_info_ratelimited("Failed to associated timeout "
- "policy `%s'\n", ct_info.timeout);
- else
- ct_info.nf_ct_timeout = rcu_dereference(
- nf_ct_timeout_find(ct_info.ct)->timeout);
-
- }
-
- if (helper) {
- err = ovs_ct_add_helper(&ct_info, helper, key, log);
- if (err)
- goto err_free_ct;
- }
-
- err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
- sizeof(ct_info), log);
- if (err)
- goto err_free_ct;
-
- __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
- nf_conntrack_get(&ct_info.ct->ct_general);
- return 0;
-err_free_ct:
- __ovs_ct_free_action(&ct_info);
- return err;
-}
-
-#if IS_ENABLED(CONFIG_NF_NAT_NEEDED)
-static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info,
- struct sk_buff *skb)
-{
- struct nlattr *start;
-
- start = nla_nest_start_noflag(skb, OVS_CT_ATTR_NAT);
- if (!start)
- return false;
-
- if (info->nat & OVS_CT_SRC_NAT) {
- if (nla_put_flag(skb, OVS_NAT_ATTR_SRC))
- return false;
- } else if (info->nat & OVS_CT_DST_NAT) {
- if (nla_put_flag(skb, OVS_NAT_ATTR_DST))
- return false;
- } else {
- goto out;
- }
-
- if (info->range.flags & NF_NAT_RANGE_MAP_IPS) {
- if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
- info->family == NFPROTO_IPV4) {
- if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN,
- info->range.min_addr.ip) ||
- (info->range.max_addr.ip
- != info->range.min_addr.ip &&
- (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX,
- info->range.max_addr.ip))))
- return false;
- } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
- info->family == NFPROTO_IPV6) {
- if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN,
- &info->range.min_addr.in6) ||
- (memcmp(&info->range.max_addr.in6,
- &info->range.min_addr.in6,
- sizeof(info->range.max_addr.in6)) &&
- (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX,
- &info->range.max_addr.in6))))
- return false;
- } else {
- return false;
- }
- }
- if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
- (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN,
- ntohs(info->range.min_proto.all)) ||
- (info->range.max_proto.all != info->range.min_proto.all &&
- nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX,
- ntohs(info->range.max_proto.all)))))
- return false;
-
- if (info->range.flags & NF_NAT_RANGE_PERSISTENT &&
- nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT))
- return false;
- if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM &&
- nla_put_flag(skb, info->random_fully_compat
- ? OVS_NAT_ATTR_PROTO_RANDOM
- : OVS_NAT_ATTR_PROTO_HASH))
- return false;
-#ifdef NF_NAT_RANGE_PROTO_RANDOM_FULLY
- if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY &&
- nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM))
- return false;
-#endif
-out:
- nla_nest_end(skb, start);
-
- return true;
-}
-#endif
-
-int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
- struct sk_buff *skb)
-{
- struct nlattr *start;
-
- start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CT);
- if (!start)
- return -EMSGSIZE;
-
- if (ct_info->commit && nla_put_flag(skb, ct_info->force
- ? OVS_CT_ATTR_FORCE_COMMIT
- : OVS_CT_ATTR_COMMIT))
- return -EMSGSIZE;
- if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
- nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
- return -EMSGSIZE;
- if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
- nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
- &ct_info->mark))
- return -EMSGSIZE;
- if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
- labels_nonzero(&ct_info->labels.mask) &&
- nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
- &ct_info->labels))
- return -EMSGSIZE;
- if (ct_info->helper) {
- if (nla_put_string(skb, OVS_CT_ATTR_HELPER,
- ct_info->helper->name))
- return -EMSGSIZE;
- }
- if (ct_info->have_eventmask &&
- nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask))
- return -EMSGSIZE;
- if (ct_info->timeout[0]) {
- if (nla_put_string(skb, OVS_CT_ATTR_TIMEOUT, ct_info->timeout))
- return -EMSGSIZE;
- }
-
-#if IS_ENABLED(CONFIG_NF_NAT_NEEDED)
- if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
- return -EMSGSIZE;
-#endif
- nla_nest_end(skb, start);
-
- return 0;
-}
-
-void ovs_ct_free_action(const struct nlattr *a)
-{
- struct ovs_conntrack_info *ct_info = nla_data(a);
-
- __ovs_ct_free_action(ct_info);
-}
-
-static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
-{
- if (ct_info->helper) {
-#if IS_ENABLED(CONFIG_NF_NAT_NEEDED)
- if (ct_info->nat)
- nf_nat_helper_put(ct_info->helper);
-#endif
- nf_conntrack_helper_put(ct_info->helper);
- }
- if (ct_info->ct) {
- if (ct_info->timeout[0])
- nf_ct_destroy_timeout(ct_info->ct);
- nf_ct_tmpl_free(ct_info->ct);
- }
-}
-
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
-static int ovs_ct_limit_init(struct net *net, struct ovs_net *ovs_net)
-{
- int i, err;
-
- ovs_net->ct_limit_info = kmalloc(sizeof(*ovs_net->ct_limit_info),
- GFP_KERNEL);
- if (!ovs_net->ct_limit_info)
- return -ENOMEM;
-
- ovs_net->ct_limit_info->default_limit = OVS_CT_LIMIT_DEFAULT;
- ovs_net->ct_limit_info->limits =
- kmalloc_array(CT_LIMIT_HASH_BUCKETS, sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!ovs_net->ct_limit_info->limits) {
- kfree(ovs_net->ct_limit_info);
- return -ENOMEM;
- }
-
- for (i = 0; i < CT_LIMIT_HASH_BUCKETS; i++)
- INIT_HLIST_HEAD(&ovs_net->ct_limit_info->limits[i]);
-
- ovs_net->ct_limit_info->data =
- nf_conncount_init(net, NFPROTO_INET, sizeof(u32));
-
- if (IS_ERR(ovs_net->ct_limit_info->data)) {
- err = PTR_ERR(ovs_net->ct_limit_info->data);
- kfree(ovs_net->ct_limit_info->limits);
- kfree(ovs_net->ct_limit_info);
- pr_err("openvswitch: failed to init nf_conncount %d\n", err);
- return err;
- }
- return 0;
-}
-
-static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
-{
- const struct ovs_ct_limit_info *info = ovs_net->ct_limit_info;
- int i;
-
- nf_conncount_destroy(net, NFPROTO_INET, info->data);
- for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
- struct hlist_head *head = &info->limits[i];
- struct ovs_ct_limit *ct_limit;
-
- hlist_for_each_entry_rcu(ct_limit, head, hlist_node,
- lockdep_ovsl_is_held())
- kfree_rcu(ct_limit, rcu);
- }
- kfree(ovs_net->ct_limit_info->limits);
- kfree(ovs_net->ct_limit_info);
-}
-
-static struct sk_buff *
-ovs_ct_limit_cmd_reply_start(struct genl_info *info, u8 cmd,
- struct ovs_header **ovs_reply_header)
-{
- struct ovs_header *ovs_header = info->userhdr;
- struct sk_buff *skb;
-
- skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!skb)
- return ERR_PTR(-ENOMEM);
-
- *ovs_reply_header = genlmsg_put(skb, info->snd_portid,
- info->snd_seq,
- &dp_ct_limit_genl_family, 0, cmd);
-
- if (!*ovs_reply_header) {
- nlmsg_free(skb);
- return ERR_PTR(-EMSGSIZE);
- }
- (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
-
- return skb;
-}
-
-static bool check_zone_id(int zone_id, u16 *pzone)
-{
- if (zone_id >= 0 && zone_id <= 65535) {
- *pzone = (u16)zone_id;
- return true;
- }
- return false;
-}
-
-static int ovs_ct_limit_set_zone_limit(struct nlattr *nla_zone_limit,
- struct ovs_ct_limit_info *info)
-{
- struct ovs_zone_limit *zone_limit;
- int rem;
- u16 zone;
-
- rem = NLA_ALIGN(nla_len(nla_zone_limit));
- zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit);
-
- while (rem >= sizeof(*zone_limit)) {
- if (unlikely(zone_limit->zone_id ==
- OVS_ZONE_LIMIT_DEFAULT_ZONE)) {
- ovs_lock();
- info->default_limit = zone_limit->limit;
- ovs_unlock();
- } else if (unlikely(!check_zone_id(
- zone_limit->zone_id, &zone))) {
- OVS_NLERR(true, "zone id is out of range");
- } else {
- struct ovs_ct_limit *ct_limit;
-
- ct_limit = kmalloc(sizeof(*ct_limit), GFP_KERNEL);
- if (!ct_limit)
- return -ENOMEM;
-
- ct_limit->zone = zone;
- ct_limit->limit = zone_limit->limit;
-
- ovs_lock();
- ct_limit_set(info, ct_limit);
- ovs_unlock();
- }
- rem -= NLA_ALIGN(sizeof(*zone_limit));
- zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit +
- NLA_ALIGN(sizeof(*zone_limit)));
- }
-
- if (rem)
- OVS_NLERR(true, "set zone limit has %d unknown bytes", rem);
-
- return 0;
-}
-
-static int ovs_ct_limit_del_zone_limit(struct nlattr *nla_zone_limit,
- struct ovs_ct_limit_info *info)
-{
- struct ovs_zone_limit *zone_limit;
- int rem;
- u16 zone;
-
- rem = NLA_ALIGN(nla_len(nla_zone_limit));
- zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit);
-
- while (rem >= sizeof(*zone_limit)) {
- if (unlikely(zone_limit->zone_id ==
- OVS_ZONE_LIMIT_DEFAULT_ZONE)) {
- ovs_lock();
- info->default_limit = OVS_CT_LIMIT_DEFAULT;
- ovs_unlock();
- } else if (unlikely(!check_zone_id(
- zone_limit->zone_id, &zone))) {
- OVS_NLERR(true, "zone id is out of range");
- } else {
- ovs_lock();
- ct_limit_del(info, zone);
- ovs_unlock();
- }
- rem -= NLA_ALIGN(sizeof(*zone_limit));
- zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit +
- NLA_ALIGN(sizeof(*zone_limit)));
- }
-
- if (rem)
- OVS_NLERR(true, "del zone limit has %d unknown bytes", rem);
-
- return 0;
-}
-
-static int ovs_ct_limit_get_default_limit(struct ovs_ct_limit_info *info,
- struct sk_buff *reply)
-{
- struct ovs_zone_limit zone_limit;
- int err;
-
- zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE;
- zone_limit.limit = info->default_limit;
- err = nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit);
- if (err)
- return err;
-
- return 0;
-}
-
-static int __ovs_ct_limit_get_zone_limit(struct net *net,
- struct nf_conncount_data *data,
- u16 zone_id, u32 limit,
- struct sk_buff *reply)
-{
- struct nf_conntrack_zone ct_zone;
- struct ovs_zone_limit zone_limit;
- u32 conncount_key = zone_id;
-
- zone_limit.zone_id = zone_id;
- zone_limit.limit = limit;
- nf_ct_zone_init(&ct_zone, zone_id, NF_CT_DEFAULT_ZONE_DIR, 0);
-
- zone_limit.count = nf_conncount_count(net, data, &conncount_key, NULL,
- &ct_zone);
- return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit);
-}
-
-static int ovs_ct_limit_get_zone_limit(struct net *net,
- struct nlattr *nla_zone_limit,
- struct ovs_ct_limit_info *info,
- struct sk_buff *reply)
-{
- struct ovs_zone_limit *zone_limit;
- int rem, err;
- u32 limit;
- u16 zone;
-
- rem = NLA_ALIGN(nla_len(nla_zone_limit));
- zone_limit = (struct ovs_zone_limit *)nla_data(nla_zone_limit);
-
- while (rem >= sizeof(*zone_limit)) {
- if (unlikely(zone_limit->zone_id ==
- OVS_ZONE_LIMIT_DEFAULT_ZONE)) {
- err = ovs_ct_limit_get_default_limit(info, reply);
- if (err)
- return err;
- } else if (unlikely(!check_zone_id(zone_limit->zone_id,
- &zone))) {
- OVS_NLERR(true, "zone id is out of range");
- } else {
- rcu_read_lock();
- limit = ct_limit_get(info, zone);
- rcu_read_unlock();
-
- err = __ovs_ct_limit_get_zone_limit(
- net, info->data, zone, limit, reply);
- if (err)
- return err;
- }
- rem -= NLA_ALIGN(sizeof(*zone_limit));
- zone_limit = (struct ovs_zone_limit *)((u8 *)zone_limit +
- NLA_ALIGN(sizeof(*zone_limit)));
- }
-
- if (rem)
- OVS_NLERR(true, "get zone limit has %d unknown bytes", rem);
-
- return 0;
-}
-
-static int ovs_ct_limit_get_all_zone_limit(struct net *net,
- struct ovs_ct_limit_info *info,
- struct sk_buff *reply)
-{
- struct ovs_ct_limit *ct_limit;
- struct hlist_head *head;
- int i, err = 0;
-
- err = ovs_ct_limit_get_default_limit(info, reply);
- if (err)
- return err;
-
- rcu_read_lock();
- for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
- head = &info->limits[i];
- hlist_for_each_entry_rcu(ct_limit, head, hlist_node) {
- err = __ovs_ct_limit_get_zone_limit(net, info->data,
- ct_limit->zone, ct_limit->limit, reply);
- if (err)
- goto exit_err;
- }
- }
-
-exit_err:
- rcu_read_unlock();
- return err;
-}
-
-static int ovs_ct_limit_cmd_set(struct sk_buff *skb, struct genl_info *info)
-{
- struct nlattr **a = info->attrs;
- struct sk_buff *reply;
- struct ovs_header *ovs_reply_header;
- struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
- struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
- int err;
-
- reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_SET,
- &ovs_reply_header);
- if (IS_ERR(reply))
- return PTR_ERR(reply);
-
- if (!a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
- err = -EINVAL;
- goto exit_err;
- }
-
- err = ovs_ct_limit_set_zone_limit(a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT],
- ct_limit_info);
- if (err)
- goto exit_err;
-
- static_branch_enable(&ovs_ct_limit_enabled);
-
- genlmsg_end(reply, ovs_reply_header);
- return genlmsg_reply(reply, info);
-
-exit_err:
- nlmsg_free(reply);
- return err;
-}
-
-static int ovs_ct_limit_cmd_del(struct sk_buff *skb, struct genl_info *info)
-{
- struct nlattr **a = info->attrs;
- struct sk_buff *reply;
- struct ovs_header *ovs_reply_header;
- struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
- struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
- int err;
-
- reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_DEL,
- &ovs_reply_header);
- if (IS_ERR(reply))
- return PTR_ERR(reply);
-
- if (!a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
- err = -EINVAL;
- goto exit_err;
- }
-
- err = ovs_ct_limit_del_zone_limit(a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT],
- ct_limit_info);
- if (err)
- goto exit_err;
-
- genlmsg_end(reply, ovs_reply_header);
- return genlmsg_reply(reply, info);
-
-exit_err:
- nlmsg_free(reply);
- return err;
-}
-
-static int ovs_ct_limit_cmd_get(struct sk_buff *skb, struct genl_info *info)
-{
- struct nlattr **a = info->attrs;
- struct nlattr *nla_reply;
- struct sk_buff *reply;
- struct ovs_header *ovs_reply_header;
- struct net *net = sock_net(skb->sk);
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
- struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
- int err;
-
- reply = ovs_ct_limit_cmd_reply_start(info, OVS_CT_LIMIT_CMD_GET,
- &ovs_reply_header);
- if (IS_ERR(reply))
- return PTR_ERR(reply);
-
- nla_reply = nla_nest_start_noflag(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
- if (!nla_reply) {
- err = -EMSGSIZE;
- goto exit_err;
- }
-
- if (a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
- err = ovs_ct_limit_get_zone_limit(
- net, a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT], ct_limit_info,
- reply);
- if (err)
- goto exit_err;
- } else {
- err = ovs_ct_limit_get_all_zone_limit(net, ct_limit_info,
- reply);
- if (err)
- goto exit_err;
- }
-
- nla_nest_end(reply, nla_reply);
- genlmsg_end(reply, ovs_reply_header);
- return genlmsg_reply(reply, info);
-
-exit_err:
- nlmsg_free(reply);
- return err;
-}
-
-static struct genl_ops ct_limit_genl_ops[] = {
- { .cmd = OVS_CT_LIMIT_CMD_SET,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
- * privilege. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = ct_limit_policy,
-#endif
- .doit = ovs_ct_limit_cmd_set,
- },
- { .cmd = OVS_CT_LIMIT_CMD_DEL,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
- * privilege. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = ct_limit_policy,
-#endif
- .doit = ovs_ct_limit_cmd_del,
- },
- { .cmd = OVS_CT_LIMIT_CMD_GET,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = 0, /* OK for unprivileged users. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = ct_limit_policy,
-#endif
- .doit = ovs_ct_limit_cmd_get,
- },
-};
-
-static const struct genl_multicast_group ovs_ct_limit_multicast_group = {
- .name = OVS_CT_LIMIT_MCGROUP,
-};
-
-struct genl_family dp_ct_limit_genl_family __ro_after_init = {
- .hdrsize = sizeof(struct ovs_header),
- .name = OVS_CT_LIMIT_FAMILY,
- .version = OVS_CT_LIMIT_VERSION,
- .maxattr = OVS_CT_LIMIT_ATTR_MAX,
-#ifndef HAVE_GENL_OPS_POLICY
- .policy = ct_limit_policy,
-#endif
- .netnsok = true,
- .parallel_ops = true,
- .ops = ct_limit_genl_ops,
- .n_ops = ARRAY_SIZE(ct_limit_genl_ops),
- .mcgrps = &ovs_ct_limit_multicast_group,
- .n_mcgrps = 1,
- .module = THIS_MODULE,
-};
-#endif
-
-int ovs_ct_init(struct net *net)
-{
- unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-
- if (nf_connlabels_get(net, n_bits - 1)) {
- ovs_net->xt_label = false;
- OVS_NLERR(true, "Failed to set connlabel length");
- } else {
- ovs_net->xt_label = true;
- }
-
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
- return ovs_ct_limit_init(net, ovs_net);
-#else
- return 0;
-#endif
-}
-
-void ovs_ct_exit(struct net *net)
-{
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
- ovs_ct_limit_exit(net, ovs_net);
-#endif
-
- if (ovs_net->xt_label)
- nf_connlabels_put(net);
-}
-
-#endif /* CONFIG_NF_CONNTRACK */
diff --git a/datapath/conntrack.h b/datapath/conntrack.h
deleted file mode 100644
index 5b4b34c19..000000000
--- a/datapath/conntrack.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-
-#ifndef OVS_CONNTRACK_H
-#define OVS_CONNTRACK_H 1
-
-#include <linux/version.h>
-#include "flow.h"
-
-struct ovs_conntrack_info;
-struct ovs_ct_limit_info;
-enum ovs_key_attr;
-
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-int ovs_ct_init(struct net *);
-void ovs_ct_exit(struct net *);
-bool ovs_ct_verify(struct net *, enum ovs_key_attr attr);
-int ovs_ct_copy_action(struct net *, const struct nlattr *,
- const struct sw_flow_key *, struct sw_flow_actions **,
- bool log);
-int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *);
-
-int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
- const struct ovs_conntrack_info *);
-int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key);
-
-void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
-int ovs_ct_put_key(const struct sw_flow_key *swkey,
- const struct sw_flow_key *output, struct sk_buff *skb);
-void ovs_ct_free_action(const struct nlattr *a);
-
-#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
- OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \
- OVS_CS_F_INVALID | OVS_CS_F_TRACKED | \
- OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT)
-#else
-#include <linux/errno.h>
-
-static inline int ovs_ct_init(struct net *net) { return 0; }
-
-static inline void ovs_ct_exit(struct net *net) { }
-
-static inline bool ovs_ct_verify(struct net *net, int attr)
-{
- return false;
-}
-
-static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla,
- const struct sw_flow_key *key,
- struct sw_flow_actions **acts, bool log)
-{
- return -ENOTSUPP;
-}
-
-static inline int ovs_ct_action_to_attr(const struct ovs_conntrack_info *info,
- struct sk_buff *skb)
-{
- return -ENOTSUPP;
-}
-
-static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
- struct sw_flow_key *key,
- const struct ovs_conntrack_info *info)
-{
- kfree_skb(skb);
- return -ENOTSUPP;
-}
-
-static inline int ovs_ct_clear(struct sk_buff *skb,
- struct sw_flow_key *key)
-{
- return -ENOTSUPP;
-}
-
-static inline void ovs_ct_fill_key(const struct sk_buff *skb,
- struct sw_flow_key *key)
-{
- key->ct_state = 0;
- key->ct_zone = 0;
- key->ct.mark = 0;
- memset(&key->ct.labels, 0, sizeof(key->ct.labels));
- /* Clear 'ct_orig_proto' to mark the non-existence of original
- * direction key fields.
- */
- key->ct_orig_proto = 0;
-}
-
-static inline int ovs_ct_put_key(const struct sw_flow_key *swkey,
- const struct sw_flow_key *output,
- struct sk_buff *skb)
-{
- return 0;
-}
-
-static inline void ovs_ct_free_action(const struct nlattr *a) { }
-
-#define CT_SUPPORTED_MASK 0
-#endif
-
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
-extern struct genl_family dp_ct_limit_genl_family;
-#endif
-#endif /* ovs_conntrack.h */
diff --git a/datapath/datapath.c b/datapath/datapath.c
deleted file mode 100644
index b88d16107..000000000
--- a/datapath/datapath.c
+++ /dev/null
@@ -1,2707 +0,0 @@
-/*
- * Copyright (c) 2007-2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/if_arp.h>
-#include <linux/if_vlan.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/jhash.h>
-#include <linux/delay.h>
-#include <linux/time.h>
-#include <linux/etherdevice.h>
-#include <linux/genetlink.h>
-#include <linux/kernel.h>
-#include <linux/kthread.h>
-#include <linux/mutex.h>
-#include <linux/percpu.h>
-#include <linux/rcupdate.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/version.h>
-#include <linux/ethtool.h>
-#include <linux/wait.h>
-#include <asm/div64.h>
-#include <linux/highmem.h>
-#include <linux/netfilter_bridge.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/inetdevice.h>
-#include <linux/list.h>
-#include <linux/openvswitch.h>
-#include <linux/rculist.h>
-#include <linux/dmi.h>
-#include <net/genetlink.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/nsh.h>
-
-#include "datapath.h"
-#include "conntrack.h"
-#include "flow.h"
-#include "flow_table.h"
-#include "flow_netlink.h"
-#include "meter.h"
-#include "gso.h"
-#include "vport-internal_dev.h"
-#include "vport-netdev.h"
-
-unsigned int ovs_net_id __read_mostly;
-
-static struct genl_family dp_packet_genl_family;
-static struct genl_family dp_flow_genl_family;
-static struct genl_family dp_datapath_genl_family;
-
-static const struct nla_policy flow_policy[];
-
-static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
- .name = OVS_FLOW_MCGROUP,
-};
-
-static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
- .name = OVS_DATAPATH_MCGROUP,
-};
-
-const struct genl_multicast_group ovs_dp_vport_multicast_group = {
- .name = OVS_VPORT_MCGROUP,
-};
-
-/* Check if need to build a reply message.
- * OVS userspace sets the NLM_F_ECHO flag if it needs the reply.
- */
-static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
- unsigned int group)
-{
- return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
- genl_has_listeners(family, genl_info_net(info), group);
-}
-
-static void ovs_notify(struct genl_family *family,
- const struct genl_multicast_group *grp,
- struct sk_buff *skb, struct genl_info *info)
-{
- genl_notify(family, skb, info, GROUP_ID(grp), GFP_KERNEL);
-}
-
-/**
- * DOC: Locking:
- *
- * All writes e.g. Writes to device state (add/remove datapath, port, set
- * operations on vports, etc.), Writes to other state (flow table
- * modifications, set miscellaneous datapath parameters, etc.) are protected
- * by ovs_lock.
- *
- * Reads are protected by RCU.
- *
- * There are a few special cases (mostly stats) that have their own
- * synchronization but they nest under all of above and don't interact with
- * each other.
- *
- * The RTNL lock nests inside ovs_mutex.
- */
-
-static DEFINE_MUTEX(ovs_mutex);
-
-void ovs_lock(void)
-{
- mutex_lock(&ovs_mutex);
-}
-
-void ovs_unlock(void)
-{
- mutex_unlock(&ovs_mutex);
-}
-
-#ifdef CONFIG_LOCKDEP
-int lockdep_ovsl_is_held(void)
-{
- if (debug_locks)
- return lockdep_is_held(&ovs_mutex);
- else
- return 1;
-}
-#endif
-
-static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
- const struct sw_flow_key *,
- const struct dp_upcall_info *,
- uint32_t cutlen);
-static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
- const struct sw_flow_key *,
- const struct dp_upcall_info *,
- uint32_t cutlen);
-
-/* Must be called with rcu_read_lock or ovs_mutex. */
-const char *ovs_dp_name(const struct datapath *dp)
-{
- struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
- return ovs_vport_name(vport);
-}
-
-static int get_dpifindex(const struct datapath *dp)
-{
- struct vport *local;
- int ifindex;
-
- rcu_read_lock();
-
- local = ovs_vport_rcu(dp, OVSP_LOCAL);
- if (local)
- ifindex = local->dev->ifindex;
- else
- ifindex = 0;
-
- rcu_read_unlock();
-
- return ifindex;
-}
-
-static void destroy_dp_rcu(struct rcu_head *rcu)
-{
- struct datapath *dp = container_of(rcu, struct datapath, rcu);
-
- ovs_flow_tbl_destroy(&dp->table);
- free_percpu(dp->stats_percpu);
- kfree(dp->ports);
- ovs_meters_exit(dp);
- kfree(dp);
-}
-
-static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
- u16 port_no)
-{
- return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
-}
-
-/* Called with ovs_mutex or RCU read lock. */
-struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
-{
- struct vport *vport;
- struct hlist_head *head;
-
- head = vport_hash_bucket(dp, port_no);
- hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
- if (vport->port_no == port_no)
- return vport;
- }
- return NULL;
-}
-
-/* Called with ovs_mutex. */
-static struct vport *new_vport(const struct vport_parms *parms)
-{
- struct vport *vport;
-
- vport = ovs_vport_add(parms);
- if (!IS_ERR(vport)) {
- struct datapath *dp = parms->dp;
- struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
-
- hlist_add_head_rcu(&vport->dp_hash_node, head);
- }
- return vport;
-}
-
-void ovs_dp_detach_port(struct vport *p)
-{
- ASSERT_OVSL();
-
- /* First drop references to device. */
- hlist_del_rcu(&p->dp_hash_node);
-
- /* Then destroy it. */
- ovs_vport_del(p);
-}
-
-/* Must be called with rcu_read_lock. */
-void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
-{
- const struct vport *p = OVS_CB(skb)->input_vport;
- struct datapath *dp = p->dp;
- struct sw_flow *flow;
- struct sw_flow_actions *sf_acts;
- struct dp_stats_percpu *stats;
- u64 *stats_counter;
- u32 n_mask_hit;
- int error;
-
- stats = this_cpu_ptr(dp->stats_percpu);
-
- /* Look up flow. */
- flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
- &n_mask_hit);
- if (unlikely(!flow)) {
- struct dp_upcall_info upcall;
-
- memset(&upcall, 0, sizeof(upcall));
- upcall.cmd = OVS_PACKET_CMD_MISS;
- upcall.portid = ovs_vport_find_upcall_portid(p, skb);
- upcall.mru = OVS_CB(skb)->mru;
- error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
- if (unlikely(error))
- kfree_skb(skb);
- else
- consume_skb(skb);
- stats_counter = &stats->n_missed;
- goto out;
- }
-
- ovs_flow_stats_update(flow, key->tp.flags, skb);
- sf_acts = rcu_dereference(flow->sf_acts);
- error = ovs_execute_actions(dp, skb, sf_acts, key);
- if (unlikely(error))
- net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
- ovs_dp_name(dp), error);
-
- stats_counter = &stats->n_hit;
-
-out:
- /* Update datapath statistics. */
- u64_stats_update_begin(&stats->syncp);
- (*stats_counter)++;
- stats->n_mask_hit += n_mask_hit;
- u64_stats_update_end(&stats->syncp);
-}
-
-int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
- const struct sw_flow_key *key,
- const struct dp_upcall_info *upcall_info,
- uint32_t cutlen)
-{
- struct dp_stats_percpu *stats;
- int err;
-
- if (upcall_info->portid == 0) {
- err = -ENOTCONN;
- goto err;
- }
-
- if (!skb_is_gso(skb))
- err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
- else
- err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
- if (err)
- goto err;
-
- return 0;
-
-err:
- stats = this_cpu_ptr(dp->stats_percpu);
-
- u64_stats_update_begin(&stats->syncp);
- stats->n_lost++;
- u64_stats_update_end(&stats->syncp);
-
- return err;
-}
-
-static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
- const struct sw_flow_key *key,
- const struct dp_upcall_info *upcall_info,
- uint32_t cutlen)
-{
-#ifdef HAVE_SKB_GSO_UDP
- unsigned int gso_type = skb_shinfo(skb)->gso_type;
- struct sw_flow_key later_key;
-#endif
- struct sk_buff *segs, *nskb;
- struct ovs_skb_cb ovs_cb;
- int err;
-
- ovs_cb = *OVS_CB(skb);
- segs = __skb_gso_segment(skb, NETIF_F_SG, false);
- *OVS_CB(skb) = ovs_cb;
- if (IS_ERR(segs))
- return PTR_ERR(segs);
- if (segs == NULL)
- return -EINVAL;
-#ifdef HAVE_SKB_GSO_UDP
- if (gso_type & SKB_GSO_UDP) {
- /* The initial flow key extracted by ovs_flow_key_extract()
- * in this case is for a first fragment, so we need to
- * properly mark later fragments.
- */
- later_key = *key;
- later_key.ip.frag = OVS_FRAG_TYPE_LATER;
- }
-#endif
- /* Queue all of the segments. */
- skb_list_walk_safe(segs, skb, nskb) {
- *OVS_CB(skb) = ovs_cb;
-#ifdef HAVE_SKB_GSO_UDP
- if (gso_type & SKB_GSO_UDP && skb != segs)
- key = &later_key;
-#endif
- err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
- if (err)
- break;
-
- }
-
- /* Free all of the segments. */
- skb_list_walk_safe(segs, skb, nskb) {
- if (err)
- kfree_skb(skb);
- else
- consume_skb(skb);
- }
- return err;
-}
-
-static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
- unsigned int hdrlen, int actions_attrlen)
-{
- size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
- + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
- + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
- + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
- + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
-
- /* OVS_PACKET_ATTR_USERDATA */
- if (upcall_info->userdata)
- size += NLA_ALIGN(upcall_info->userdata->nla_len);
-
- /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
- if (upcall_info->egress_tun_info)
- size += nla_total_size(ovs_tun_key_attr_size());
-
- /* OVS_PACKET_ATTR_ACTIONS */
- if (upcall_info->actions_len)
- size += nla_total_size(actions_attrlen);
-
- /* OVS_PACKET_ATTR_MRU */
- if (upcall_info->mru)
- size += nla_total_size(sizeof(upcall_info->mru));
-
- return size;
-}
-
-static void pad_packet(struct datapath *dp, struct sk_buff *skb)
-{
- if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
- size_t plen = NLA_ALIGN(skb->len) - skb->len;
-
- if (plen > 0)
- skb_put_zero(skb, plen);
- }
-}
-
-static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
- const struct sw_flow_key *key,
- const struct dp_upcall_info *upcall_info,
- uint32_t cutlen)
-{
- struct ovs_header *upcall;
- struct sk_buff *nskb = NULL;
- struct sk_buff *user_skb = NULL; /* to be queued to userspace */
- struct nlattr *nla;
- size_t len;
- unsigned int hlen;
- int err, dp_ifindex;
- u64 hash;
-
- dp_ifindex = get_dpifindex(dp);
- if (!dp_ifindex)
- return -ENODEV;
-
- if (skb_vlan_tag_present(skb)) {
- nskb = skb_clone(skb, GFP_ATOMIC);
- if (!nskb)
- return -ENOMEM;
-
- nskb = __vlan_hwaccel_push_inside(nskb);
- if (!nskb)
- return -ENOMEM;
-
- skb = nskb;
- }
-
- if (nla_attr_size(skb->len) > USHRT_MAX) {
- err = -EFBIG;
- goto out;
- }
-
- /* Complete checksum if needed */
- if (skb->ip_summed == CHECKSUM_PARTIAL &&
- (err = skb_csum_hwoffload_help(skb, 0)))
- goto out;
-
- /* Older versions of OVS user space enforce alignment of the last
- * Netlink attribute to NLA_ALIGNTO which would require extensive
- * padding logic. Only perform zerocopy if padding is not required.
- */
- if (dp->user_features & OVS_DP_F_UNALIGNED)
- hlen = skb_zerocopy_headlen(skb);
- else
- hlen = skb->len;
-
- len = upcall_msg_size(upcall_info, hlen - cutlen,
- OVS_CB(skb)->acts_origlen);
- user_skb = genlmsg_new(len, GFP_ATOMIC);
- if (!user_skb) {
- err = -ENOMEM;
- goto out;
- }
-
- upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
- 0, upcall_info->cmd);
- if (!upcall) {
- err = -EINVAL;
- goto out;
- }
- upcall->dp_ifindex = dp_ifindex;
-
- err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
- if (err)
- goto out;
-
- if (upcall_info->userdata)
- __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
- nla_len(upcall_info->userdata),
- nla_data(upcall_info->userdata));
-
-
- if (upcall_info->egress_tun_info) {
- nla = nla_nest_start_noflag(user_skb,
- OVS_PACKET_ATTR_EGRESS_TUN_KEY);
- if (!nla) {
- err = -EMSGSIZE;
- goto out;
- }
- err = ovs_nla_put_tunnel_info(user_skb,
- upcall_info->egress_tun_info);
- if (err)
- goto out;
-
- nla_nest_end(user_skb, nla);
- }
-
- if (upcall_info->actions_len) {
- nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
- if (!nla) {
- err = -EMSGSIZE;
- goto out;
- }
- err = ovs_nla_put_actions(upcall_info->actions,
- upcall_info->actions_len,
- user_skb);
- if (!err)
- nla_nest_end(user_skb, nla);
- else
- nla_nest_cancel(user_skb, nla);
- }
-
- /* Add OVS_PACKET_ATTR_MRU */
- if (upcall_info->mru &&
- nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
- err = -ENOBUFS;
- goto out;
- }
-
- /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
- if (cutlen > 0 &&
- nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
- err = -ENOBUFS;
- goto out;
- }
-
- /* Add OVS_PACKET_ATTR_HASH */
- hash = skb_get_hash_raw(skb);
-#ifdef HAVE_SW_HASH
- if (skb->sw_hash)
- hash |= OVS_PACKET_HASH_SW_BIT;
-#endif
-
- if (skb->l4_hash)
- hash |= OVS_PACKET_HASH_L4_BIT;
-
- if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
- err = -ENOBUFS;
- goto out;
- }
-
- /* Only reserve room for attribute header, packet data is added
- * in skb_zerocopy()
- */
- if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
- err = -ENOBUFS;
- goto out;
- }
- nla->nla_len = nla_attr_size(skb->len - cutlen);
-
- err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
- if (err)
- goto out;
-
- /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
- pad_packet(dp, user_skb);
-
- ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
-
- err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
- user_skb = NULL;
-out:
- if (err)
- skb_tx_error(skb);
- kfree_skb(user_skb);
- kfree_skb(nskb);
- return err;
-}
-
-static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
-{
- struct ovs_header *ovs_header = info->userhdr;
- struct net *net = sock_net(skb->sk);
- struct nlattr **a = info->attrs;
- struct sw_flow_actions *acts;
- struct sk_buff *packet;
- struct sw_flow *flow;
- struct sw_flow_actions *sf_acts;
- struct datapath *dp;
- struct vport *input_vport;
- u16 mru = 0;
- u64 hash;
- int len;
- int err;
- bool log = !a[OVS_PACKET_ATTR_PROBE];
-
- err = -EINVAL;
- if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
- !a[OVS_PACKET_ATTR_ACTIONS])
- goto err;
-
- len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
- packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
- err = -ENOMEM;
- if (!packet)
- goto err;
- skb_reserve(packet, NET_IP_ALIGN);
-
- nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
-
- /* Set packet's mru */
- if (a[OVS_PACKET_ATTR_MRU]) {
- mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
- packet->ignore_df = 1;
- }
- OVS_CB(packet)->mru = mru;
-
- if (a[OVS_PACKET_ATTR_HASH]) {
- hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
-
- __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
- !!(hash & OVS_PACKET_HASH_SW_BIT),
- !!(hash & OVS_PACKET_HASH_L4_BIT));
- }
-
- /* Build an sw_flow for sending this packet. */
- flow = ovs_flow_alloc();
- err = PTR_ERR(flow);
- if (IS_ERR(flow))
- goto err_kfree_skb;
-
- err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
- packet, &flow->key, log);
- if (err)
- goto err_flow_free;
-
- err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
- &flow->key, &acts, log);
- if (err)
- goto err_flow_free;
-
- rcu_assign_pointer(flow->sf_acts, acts);
- packet->priority = flow->key.phy.priority;
- packet->mark = flow->key.phy.skb_mark;
-
- rcu_read_lock();
- dp = get_dp_rcu(net, ovs_header->dp_ifindex);
- err = -ENODEV;
- if (!dp)
- goto err_unlock;
-
- input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
- if (!input_vport)
- input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
-
- if (!input_vport)
- goto err_unlock;
-
- packet->dev = input_vport->dev;
- OVS_CB(packet)->input_vport = input_vport;
- sf_acts = rcu_dereference(flow->sf_acts);
-
- local_bh_disable();
- err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
- local_bh_enable();
- rcu_read_unlock();
-
- ovs_flow_free(flow, false);
- return err;
-
-err_unlock:
- rcu_read_unlock();
-err_flow_free:
- ovs_flow_free(flow, false);
-err_kfree_skb:
- kfree_skb(packet);
-err:
- return err;
-}
-
-static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
- [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
- [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
- [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
- [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
- [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
- [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
-};
-
-static struct genl_ops dp_packet_genl_ops[] = {
- { .cmd = OVS_PACKET_CMD_EXECUTE,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = packet_policy,
-#endif
- .doit = ovs_packet_cmd_execute
- }
-};
-
-static struct genl_family dp_packet_genl_family __ro_after_init = {
- .hdrsize = sizeof(struct ovs_header),
- .name = OVS_PACKET_FAMILY,
- .version = OVS_PACKET_VERSION,
- .maxattr = OVS_PACKET_ATTR_MAX,
-#ifndef HAVE_GENL_OPS_POLICY
- .policy = packet_policy,
-#endif
- .netnsok = true,
- .parallel_ops = true,
- .ops = dp_packet_genl_ops,
- .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
- .module = THIS_MODULE,
-};
-
-static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
- struct ovs_dp_megaflow_stats *mega_stats)
-{
- int i;
-
- memset(mega_stats, 0, sizeof(*mega_stats));
-
- stats->n_flows = ovs_flow_tbl_count(&dp->table);
- mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
-
- stats->n_hit = stats->n_missed = stats->n_lost = 0;
-
- for_each_possible_cpu(i) {
- const struct dp_stats_percpu *percpu_stats;
- struct dp_stats_percpu local_stats;
- unsigned int start;
-
- percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
-
- do {
- start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
- local_stats = *percpu_stats;
- } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
-
- stats->n_hit += local_stats.n_hit;
- stats->n_missed += local_stats.n_missed;
- stats->n_lost += local_stats.n_lost;
- mega_stats->n_mask_hit += local_stats.n_mask_hit;
- }
-}
-
-static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
-{
- return ovs_identifier_is_ufid(sfid) &&
- !(ufid_flags & OVS_UFID_F_OMIT_KEY);
-}
-
-static bool should_fill_mask(uint32_t ufid_flags)
-{
- return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
-}
-
-static bool should_fill_actions(uint32_t ufid_flags)
-{
- return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
-}
-
-static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
- const struct sw_flow_id *sfid,
- uint32_t ufid_flags)
-{
- size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
-
- /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
- * see ovs_nla_put_identifier()
- */
- if (sfid && ovs_identifier_is_ufid(sfid))
- len += nla_total_size(sfid->ufid_len);
- else
- len += nla_total_size(ovs_key_attr_size());
-
- /* OVS_FLOW_ATTR_KEY */
- if (!sfid || should_fill_key(sfid, ufid_flags))
- len += nla_total_size(ovs_key_attr_size());
-
- /* OVS_FLOW_ATTR_MASK */
- if (should_fill_mask(ufid_flags))
- len += nla_total_size(ovs_key_attr_size());
-
- /* OVS_FLOW_ATTR_ACTIONS */
- if (should_fill_actions(ufid_flags))
- len += nla_total_size(acts->orig_len);
-
- return len
- + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
- + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
- + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
-}
-
-/* Called with ovs_mutex or RCU read lock. */
-static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
- struct sk_buff *skb)
-{
- struct ovs_flow_stats stats;
- __be16 tcp_flags;
- unsigned long used;
-
- ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
-
- if (used &&
- nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
- OVS_FLOW_ATTR_PAD))
- return -EMSGSIZE;
-
- if (stats.n_packets &&
- nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
- sizeof(struct ovs_flow_stats), &stats,
- OVS_FLOW_ATTR_PAD))
- return -EMSGSIZE;
-
- if ((u8)ntohs(tcp_flags) &&
- nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
- return -EMSGSIZE;
-
- return 0;
-}
-
-/* Called with ovs_mutex or RCU read lock. */
-static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
- struct sk_buff *skb, int skb_orig_len)
-{
- struct nlattr *start;
- int err;
-
- /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
- * this is the first flow to be dumped into 'skb'. This is unusual for
- * Netlink but individual action lists can be longer than
- * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
- * The userspace caller can always fetch the actions separately if it
- * really wants them. (Most userspace callers in fact don't care.)
- *
- * This can only fail for dump operations because the skb is always
- * properly sized for single flows.
- */
- start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
- if (start) {
- const struct sw_flow_actions *sf_acts;
-
- sf_acts = rcu_dereference_ovsl(flow->sf_acts);
- err = ovs_nla_put_actions(sf_acts->actions,
- sf_acts->actions_len, skb);
-
- if (!err)
- nla_nest_end(skb, start);
- else {
- if (skb_orig_len)
- return err;
-
- nla_nest_cancel(skb, start);
- }
- } else if (skb_orig_len) {
- return -EMSGSIZE;
- }
-
- return 0;
-}
-
-/* Called with ovs_mutex or RCU read lock. */
-static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
- struct sk_buff *skb, u32 portid,
- u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
-{
- const int skb_orig_len = skb->len;
- struct ovs_header *ovs_header;
- int err;
-
- ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
- flags, cmd);
- if (!ovs_header)
- return -EMSGSIZE;
-
- ovs_header->dp_ifindex = dp_ifindex;
-
- err = ovs_nla_put_identifier(flow, skb);
- if (err)
- goto error;
-
- if (should_fill_key(&flow->id, ufid_flags)) {
- err = ovs_nla_put_masked_key(flow, skb);
- if (err)
- goto error;
- }
-
- if (should_fill_mask(ufid_flags)) {
- err = ovs_nla_put_mask(flow, skb);
- if (err)
- goto error;
- }
-
- err = ovs_flow_cmd_fill_stats(flow, skb);
- if (err)
- goto error;
-
- if (should_fill_actions(ufid_flags)) {
- err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
- if (err)
- goto error;
- }
-
- genlmsg_end(skb, ovs_header);
- return 0;
-
-error:
- genlmsg_cancel(skb, ovs_header);
- return err;
-}
-
-/* May not be called with RCU read lock. */
-static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
- const struct sw_flow_id *sfid,
- struct genl_info *info,
- bool always,
- uint32_t ufid_flags)
-{
- struct sk_buff *skb;
- size_t len;
-
- if (!always && !ovs_must_notify(&dp_flow_genl_family, info,
- GROUP_ID(&ovs_dp_flow_multicast_group)))
- return NULL;
-
- len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
- skb = genlmsg_new(len, GFP_KERNEL);
- if (!skb)
- return ERR_PTR(-ENOMEM);
-
- return skb;
-}
-
-/* Called with ovs_mutex. */
-static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
- int dp_ifindex,
- struct genl_info *info, u8 cmd,
- bool always, u32 ufid_flags)
-{
- struct sk_buff *skb;
- int retval;
-
- skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
- &flow->id, info, always, ufid_flags);
- if (IS_ERR_OR_NULL(skb))
- return skb;
-
- retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
- info->snd_portid, info->snd_seq, 0,
- cmd, ufid_flags);
- if (WARN_ON_ONCE(retval < 0)) {
- kfree_skb(skb);
- skb = ERR_PTR(retval);
- }
- return skb;
-}
-
-static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
-{
- struct net *net = sock_net(skb->sk);
- struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
- struct sw_flow *flow = NULL, *new_flow;
- struct sw_flow_mask mask;
- struct sk_buff *reply;
- struct datapath *dp;
- struct sw_flow_actions *acts;
- struct sw_flow_match match;
- u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
- int error;
- bool log = !a[OVS_FLOW_ATTR_PROBE];
-
- /* Must have key and actions. */
- error = -EINVAL;
- if (!a[OVS_FLOW_ATTR_KEY]) {
- OVS_NLERR(log, "Flow key attr not present in new flow.");
- goto error;
- }
- if (!a[OVS_FLOW_ATTR_ACTIONS]) {
- OVS_NLERR(log, "Flow actions attr not present in new flow.");
- goto error;
- }
-
- /* Most of the time we need to allocate a new flow, do it before
- * locking.
- */
- new_flow = ovs_flow_alloc();
- if (IS_ERR(new_flow)) {
- error = PTR_ERR(new_flow);
- goto error;
- }
-
- /* Extract key. */
- ovs_match_init(&match, &new_flow->key, false, &mask);
- error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
- a[OVS_FLOW_ATTR_MASK], log);
- if (error)
- goto err_kfree_flow;
-
- /* Extract flow identifier. */
- error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
- &new_flow->key, log);
- if (error)
- goto err_kfree_flow;
-
- /* unmasked key is needed to match when ufid is not used. */
- if (ovs_identifier_is_key(&new_flow->id))
- match.key = new_flow->id.unmasked_key;
-
- ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
-
- /* Validate actions. */
- error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
- &new_flow->key, &acts, log);
- if (error) {
- OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
- goto err_kfree_flow;
- }
-
- reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
- ufid_flags);
- if (IS_ERR(reply)) {
- error = PTR_ERR(reply);
- goto err_kfree_acts;
- }
-
- ovs_lock();
- dp = get_dp(net, ovs_header->dp_ifindex);
- if (unlikely(!dp)) {
- error = -ENODEV;
- goto err_unlock_ovs;
- }
-
- /* Check if this is a duplicate flow */
- if (ovs_identifier_is_ufid(&new_flow->id))
- flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
- if (!flow)
- flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
- if (likely(!flow)) {
- rcu_assign_pointer(new_flow->sf_acts, acts);
-
- /* Put flow in bucket. */
- error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
- if (unlikely(error)) {
- acts = NULL;
- goto err_unlock_ovs;
- }
-
- if (unlikely(reply)) {
- error = ovs_flow_cmd_fill_info(new_flow,
- ovs_header->dp_ifindex,
- reply, info->snd_portid,
- info->snd_seq, 0,
- OVS_FLOW_CMD_NEW,
- ufid_flags);
- BUG_ON(error < 0);
- }
- ovs_unlock();
- } else {
- struct sw_flow_actions *old_acts;
-
- /* Bail out if we're not allowed to modify an existing flow.
- * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
- * because Generic Netlink treats the latter as a dump
- * request. We also accept NLM_F_EXCL in case that bug ever
- * gets fixed.
- */
- if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
- | NLM_F_EXCL))) {
- error = -EEXIST;
- goto err_unlock_ovs;
- }
- /* The flow identifier has to be the same for flow updates.
- * Look for any overlapping flow.
- */
- if (unlikely(!ovs_flow_cmp(flow, &match))) {
- if (ovs_identifier_is_key(&flow->id))
- flow = ovs_flow_tbl_lookup_exact(&dp->table,
- &match);
- else /* UFID matches but key is different */
- flow = NULL;
- if (!flow) {
- error = -ENOENT;
- goto err_unlock_ovs;
- }
- }
- /* Update actions. */
- old_acts = ovsl_dereference(flow->sf_acts);
- rcu_assign_pointer(flow->sf_acts, acts);
-
- if (unlikely(reply)) {
- error = ovs_flow_cmd_fill_info(flow,
- ovs_header->dp_ifindex,
- reply, info->snd_portid,
- info->snd_seq, 0,
- OVS_FLOW_CMD_NEW,
- ufid_flags);
- BUG_ON(error < 0);
- }
- ovs_unlock();
-
- ovs_nla_free_flow_actions_rcu(old_acts);
- ovs_flow_free(new_flow, false);
- }
-
- if (reply)
- ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
- return 0;
-
-err_unlock_ovs:
- ovs_unlock();
- kfree_skb(reply);
-err_kfree_acts:
- ovs_nla_free_flow_actions(acts);
-err_kfree_flow:
- ovs_flow_free(new_flow, false);
-error:
- return error;
-}
-
-/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
-static noinline_for_stack struct sw_flow_actions *get_flow_actions(struct net *net,
- const struct nlattr *a,
- const struct sw_flow_key *key,
- const struct sw_flow_mask *mask,
- bool log)
-{
- struct sw_flow_actions *acts;
- struct sw_flow_key masked_key;
- int error;
-
- ovs_flow_mask_key(&masked_key, key, true, mask);
- error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
- if (error) {
- OVS_NLERR(log,
- "Actions may not be safe on all matching packets");
- return ERR_PTR(error);
- }
-
- return acts;
-}
-
-/* Factor out match-init and action-copy to avoid
- * "Wframe-larger-than=1024" warning. Because mask is only
- * used to get actions, we new a function to save some
- * stack space.
- *
- * If there are not key and action attrs, we return 0
- * directly. In the case, the caller will also not use the
- * match as before. If there is action attr, we try to get
- * actions and save them to *acts. Before returning from
- * the function, we reset the match->mask pointer. Because
- * we should not to return match object with dangling reference
- * to mask.
- * */
-static noinline_for_stack int
-ovs_nla_init_match_and_action(struct net *net,
- struct sw_flow_match *match,
- struct sw_flow_key *key,
- struct nlattr **a,
- struct sw_flow_actions **acts,
- bool log)
-{
- struct sw_flow_mask mask;
- int error = 0;
-
- if (a[OVS_FLOW_ATTR_KEY]) {
- ovs_match_init(match, key, true, &mask);
- error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
- a[OVS_FLOW_ATTR_MASK], log);
- if (error)
- goto error;
- }
-
- if (a[OVS_FLOW_ATTR_ACTIONS]) {
- if (!a[OVS_FLOW_ATTR_KEY]) {
- OVS_NLERR(log,
- "Flow key attribute not present in set flow.");
- error = -EINVAL;
- goto error;
- }
-
- *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
- &mask, log);
- if (IS_ERR(*acts)) {
- error = PTR_ERR(*acts);
- goto error;
- }
- }
-
- /* On success, error is 0. */
-error:
- match->mask = NULL;
- return error;
-}
-
-static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
-{
- struct net *net = sock_net(skb->sk);
- struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
- struct sw_flow_key key;
- struct sw_flow *flow;
- struct sk_buff *reply = NULL;
- struct datapath *dp;
- struct sw_flow_actions *old_acts = NULL, *acts = NULL;
- struct sw_flow_match match;
- struct sw_flow_id sfid;
- u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
- int error = 0;
- bool log = !a[OVS_FLOW_ATTR_PROBE];
- bool ufid_present;
-
- ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
- if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
- OVS_NLERR(log,
- "Flow set message rejected, Key attribute missing.");
- return -EINVAL;
- }
-
- error = ovs_nla_init_match_and_action(net, &match, &key, a,
- &acts, log);
- if (error)
- goto error;
-
- if (acts) {
- /* Can allocate before locking if have acts. */
- reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
- ufid_flags);
- if (IS_ERR(reply)) {
- error = PTR_ERR(reply);
- goto err_kfree_acts;
- }
- }
-
- ovs_lock();
- dp = get_dp(net, ovs_header->dp_ifindex);
- if (unlikely(!dp)) {
- error = -ENODEV;
- goto err_unlock_ovs;
- }
- /* Check that the flow exists. */
- if (ufid_present)
- flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
- else
- flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
- if (unlikely(!flow)) {
- error = -ENOENT;
- goto err_unlock_ovs;
- }
-
- /* Update actions, if present. */
- if (likely(acts)) {
- old_acts = ovsl_dereference(flow->sf_acts);
- rcu_assign_pointer(flow->sf_acts, acts);
-
- if (unlikely(reply)) {
- error = ovs_flow_cmd_fill_info(flow,
- ovs_header->dp_ifindex,
- reply, info->snd_portid,
- info->snd_seq, 0,
- OVS_FLOW_CMD_SET,
- ufid_flags);
- BUG_ON(error < 0);
- }
- } else {
- /* Could not alloc without acts before locking. */
- reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
- info, OVS_FLOW_CMD_SET, false,
- ufid_flags);
-
- if (unlikely(IS_ERR(reply))) {
- error = PTR_ERR(reply);
- goto err_unlock_ovs;
- }
- }
-
- /* Clear stats. */
- if (a[OVS_FLOW_ATTR_CLEAR])
- ovs_flow_stats_clear(flow);
- ovs_unlock();
-
- if (reply)
- ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
- if (old_acts)
- ovs_nla_free_flow_actions_rcu(old_acts);
-
- return 0;
-
-err_unlock_ovs:
- ovs_unlock();
- kfree_skb(reply);
-err_kfree_acts:
- ovs_nla_free_flow_actions(acts);
-error:
- return error;
-}
-
-static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
-{
- struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
- struct net *net = sock_net(skb->sk);
- struct sw_flow_key key;
- struct sk_buff *reply;
- struct sw_flow *flow;
- struct datapath *dp;
- struct sw_flow_match match;
- struct sw_flow_id ufid;
- u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
- int err = 0;
- bool log = !a[OVS_FLOW_ATTR_PROBE];
- bool ufid_present;
-
- ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
- if (a[OVS_FLOW_ATTR_KEY]) {
- ovs_match_init(&match, &key, true, NULL);
- err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
- log);
- } else if (!ufid_present) {
- OVS_NLERR(log,
- "Flow get message rejected, Key attribute missing.");
- err = -EINVAL;
- }
- if (err)
- return err;
-
- ovs_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp) {
- err = -ENODEV;
- goto unlock;
- }
-
- if (ufid_present)
- flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
- else
- flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
- if (!flow) {
- err = -ENOENT;
- goto unlock;
- }
-
- reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
- OVS_FLOW_CMD_GET, true, ufid_flags);
- if (IS_ERR(reply)) {
- err = PTR_ERR(reply);
- goto unlock;
- }
-
- ovs_unlock();
- return genlmsg_reply(reply, info);
-unlock:
- ovs_unlock();
- return err;
-}
-
-static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
-{
- struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
- struct net *net = sock_net(skb->sk);
- struct sw_flow_key key;
- struct sk_buff *reply;
- struct sw_flow *flow = NULL;
- struct datapath *dp;
- struct sw_flow_match match;
- struct sw_flow_id ufid;
- u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
- int err;
- bool log = !a[OVS_FLOW_ATTR_PROBE];
- bool ufid_present;
-
- ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
- if (a[OVS_FLOW_ATTR_KEY]) {
- ovs_match_init(&match, &key, true, NULL);
- err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
- NULL, log);
- if (unlikely(err))
- return err;
- }
-
- ovs_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (unlikely(!dp)) {
- err = -ENODEV;
- goto unlock;
- }
-
- if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
- err = ovs_flow_tbl_flush(&dp->table);
- goto unlock;
- }
-
- if (ufid_present)
- flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
- else
- flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
- if (unlikely(!flow)) {
- err = -ENOENT;
- goto unlock;
- }
-
- ovs_flow_tbl_remove(&dp->table, flow);
- ovs_unlock();
-
- reply = ovs_flow_cmd_alloc_info(rcu_dereference_raw(flow->sf_acts),
- &flow->id, info, false, ufid_flags);
-
- if (likely(reply)) {
- if (!IS_ERR(reply)) {
- rcu_read_lock(); /*To keep RCU checker happy. */
- err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
- reply, info->snd_portid,
- info->snd_seq, 0,
- OVS_FLOW_CMD_DEL,
- ufid_flags);
- rcu_read_unlock();
- if (WARN_ON_ONCE(err < 0)) {
- kfree_skb(reply);
- goto out_free;
- }
- ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
- } else {
- genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0,
- GROUP_ID(&ovs_dp_flow_multicast_group), PTR_ERR(reply));
-
- }
- }
-
-out_free:
- ovs_flow_free(flow, true);
- return 0;
-unlock:
- ovs_unlock();
- return err;
-}
-
-static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct nlattr *a[__OVS_FLOW_ATTR_MAX];
- struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
- struct table_instance *ti;
- struct datapath *dp;
- u32 ufid_flags;
- int err;
-
- err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
- OVS_FLOW_ATTR_MAX, flow_policy, NULL);
- if (err)
- return err;
- ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
-
- rcu_read_lock();
- dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp) {
- rcu_read_unlock();
- return -ENODEV;
- }
-
- ti = rcu_dereference(dp->table.ti);
- for (;;) {
- struct sw_flow *flow;
- u32 bucket, obj;
-
- bucket = cb->args[0];
- obj = cb->args[1];
- flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
- if (!flow)
- break;
-
- if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- OVS_FLOW_CMD_GET, ufid_flags) < 0)
- break;
-
- cb->args[0] = bucket;
- cb->args[1] = obj;
- }
- rcu_read_unlock();
- return skb->len;
-}
-
-static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
- [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
- [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
- [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
- [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
- [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
- [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
- [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
-};
-
-static const struct genl_ops dp_flow_genl_ops[] = {
- { .cmd = OVS_FLOW_CMD_NEW,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = flow_policy,
-#endif
- .doit = ovs_flow_cmd_new
- },
- { .cmd = OVS_FLOW_CMD_DEL,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = flow_policy,
-#endif
- .doit = ovs_flow_cmd_del
- },
- { .cmd = OVS_FLOW_CMD_GET,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = 0, /* OK for unprivileged users. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = flow_policy,
-#endif
- .doit = ovs_flow_cmd_get,
- .dumpit = ovs_flow_cmd_dump
- },
- { .cmd = OVS_FLOW_CMD_SET,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = flow_policy,
-#endif
- .doit = ovs_flow_cmd_set,
- },
-};
-
-static struct genl_family dp_flow_genl_family __ro_after_init = {
- .hdrsize = sizeof(struct ovs_header),
- .name = OVS_FLOW_FAMILY,
- .version = OVS_FLOW_VERSION,
- .maxattr = OVS_FLOW_ATTR_MAX,
-#ifndef HAVE_GENL_OPS_POLICY
- .policy = flow_policy,
-#endif
- .netnsok = true,
- .parallel_ops = true,
- .ops = dp_flow_genl_ops,
- .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
- .mcgrps = &ovs_dp_flow_multicast_group,
- .n_mcgrps = 1,
- .module = THIS_MODULE,
-};
-
-static size_t ovs_dp_cmd_msg_size(void)
-{
- size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
-
- msgsize += nla_total_size(IFNAMSIZ);
- msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
- msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
- msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
-
- return msgsize;
-}
-
-/* Called with ovs_mutex. */
-static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
- u32 portid, u32 seq, u32 flags, u8 cmd)
-{
- struct ovs_header *ovs_header;
- struct ovs_dp_stats dp_stats;
- struct ovs_dp_megaflow_stats dp_megaflow_stats;
- int err;
-
- ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
- flags, cmd);
- if (!ovs_header)
- goto error;
-
- ovs_header->dp_ifindex = get_dpifindex(dp);
-
- err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
- if (err)
- goto nla_put_failure;
-
- get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
- if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
- &dp_stats, OVS_DP_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
- sizeof(struct ovs_dp_megaflow_stats),
- &dp_megaflow_stats, OVS_DP_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
- goto nla_put_failure;
-
- genlmsg_end(skb, ovs_header);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(skb, ovs_header);
-error:
- return -EMSGSIZE;
-}
-
-static struct sk_buff *ovs_dp_cmd_alloc_info(void)
-{
- return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
-}
-
-/* Called with rcu_read_lock or ovs_mutex. */
-static struct datapath *lookup_datapath(struct net *net,
- const struct ovs_header *ovs_header,
- struct nlattr *a[OVS_DP_ATTR_MAX + 1])
-{
- struct datapath *dp;
-
- if (!a[OVS_DP_ATTR_NAME])
- dp = get_dp(net, ovs_header->dp_ifindex);
- else {
- struct vport *vport;
-
- vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
- dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
- }
- return dp ? dp : ERR_PTR(-ENODEV);
-}
-
-static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
-{
- struct datapath *dp;
-
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
- if (IS_ERR(dp))
- return;
-
- WARN(dp->user_features, "Dropping previously announced user features\n");
- dp->user_features = 0;
-}
-
-DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
-static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
-{
- u32 user_features = 0;
-
- if (a[OVS_DP_ATTR_USER_FEATURES]) {
- user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
-
- if (user_features & ~(OVS_DP_F_VPORT_PIDS |
- OVS_DP_F_UNALIGNED |
- OVS_DP_F_TC_RECIRC_SHARING))
- return -EOPNOTSUPP;
-
-#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
- return -EOPNOTSUPP;
-#endif
- }
-
- dp->user_features = user_features;
-
- if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
- static_branch_enable(&tc_recirc_sharing_support);
- else
- static_branch_disable(&tc_recirc_sharing_support);
-
- return 0;
-}
-
-static int ovs_dp_stats_init(struct datapath *dp)
-{
- dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
- if (!dp->stats_percpu)
- return -ENOMEM;
-
- return 0;
-}
-
-static int ovs_dp_vport_init(struct datapath *dp)
-{
- int i;
-
- dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!dp->ports)
- return -ENOMEM;
-
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- INIT_HLIST_HEAD(&dp->ports[i]);
-
- return 0;
-}
-
-static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
-{
- struct nlattr **a = info->attrs;
- struct vport_parms parms;
- struct sk_buff *reply;
- struct datapath *dp;
- struct vport *vport;
- struct ovs_net *ovs_net;
- int err;
-
- err = -EINVAL;
- if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
- goto err;
-
- reply = ovs_dp_cmd_alloc_info();
- if (!reply)
- return -ENOMEM;
-
- err = -ENOMEM;
- dp = kzalloc(sizeof(*dp), GFP_KERNEL);
- if (dp == NULL)
- goto err_destroy_reply;
-
- ovs_dp_set_net(dp, sock_net(skb->sk));
-
- /* Allocate table. */
- err = ovs_flow_tbl_init(&dp->table);
- if (err)
- goto err_destroy_dp;
-
- err = ovs_dp_stats_init(dp);
- if (err)
- goto err_destroy_table;
-
- err = ovs_dp_vport_init(dp);
- if (err)
- goto err_destroy_stats;
-
- err = ovs_meters_init(dp);
- if (err)
- goto err_destroy_ports;
-
- /* Set up our datapath device. */
- parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
- parms.type = OVS_VPORT_TYPE_INTERNAL;
- parms.options = NULL;
- parms.dp = dp;
- parms.port_no = OVSP_LOCAL;
- parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
-
- err = ovs_dp_change(dp, a);
- if (err)
- goto err_destroy_meters;
-
- /* So far only local changes have been made, now need the lock. */
- ovs_lock();
-
- vport = new_vport(&parms);
- if (IS_ERR(vport)) {
- err = PTR_ERR(vport);
- if (err == -EBUSY)
- err = -EEXIST;
-
- if (err == -EEXIST) {
- /* An outdated user space instance that does not understand
- * the concept of user_features has attempted to create a new
- * datapath and is likely to reuse it. Drop all user features.
- */
- if (info->genlhdr->version < OVS_DP_VER_FEATURES)
- ovs_dp_reset_user_features(skb, info);
- }
-
- ovs_unlock();
- goto err_destroy_meters;
- }
-
- err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
- info->snd_seq, 0, OVS_DP_CMD_NEW);
- BUG_ON(err < 0);
-
- ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
- list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
-
- ovs_unlock();
-
- ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
- return 0;
-
-err_destroy_meters:
- ovs_meters_exit(dp);
-err_destroy_ports:
- kfree(dp->ports);
-err_destroy_stats:
- free_percpu(dp->stats_percpu);
-err_destroy_table:
- ovs_flow_tbl_destroy(&dp->table);
-err_destroy_dp:
- kfree(dp);
-err_destroy_reply:
- kfree_skb(reply);
-err:
- return err;
-}
-
-/* Called with ovs_mutex. */
-static void __dp_destroy(struct datapath *dp)
-{
- int i;
-
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
- struct vport *vport;
- struct hlist_node *n;
-
- hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
- if (vport->port_no != OVSP_LOCAL)
- ovs_dp_detach_port(vport);
- }
-
- list_del_rcu(&dp->list_node);
-
- /* OVSP_LOCAL is datapath internal port. We need to make sure that
- * all ports in datapath are destroyed first before freeing datapath.
- */
- ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
-
- /* RCU destroy the flow table */
- call_rcu(&dp->rcu, destroy_dp_rcu);
-}
-
-static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
-{
- struct sk_buff *reply;
- struct datapath *dp;
- int err;
-
- reply = ovs_dp_cmd_alloc_info();
- if (!reply)
- return -ENOMEM;
-
- ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
- err = PTR_ERR(dp);
- if (IS_ERR(dp))
- goto err_unlock_free;
-
- err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
- info->snd_seq, 0, OVS_DP_CMD_DEL);
- BUG_ON(err < 0);
-
- __dp_destroy(dp);
- ovs_unlock();
-
- ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
- return 0;
-
-err_unlock_free:
- ovs_unlock();
- kfree_skb(reply);
- return err;
-}
-
-static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
-{
- struct sk_buff *reply;
- struct datapath *dp;
- int err;
-
- reply = ovs_dp_cmd_alloc_info();
- if (!reply)
- return -ENOMEM;
-
- ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
- err = PTR_ERR(dp);
- if (IS_ERR(dp))
- goto err_unlock_free;
-
- err = ovs_dp_change(dp, info->attrs);
- if (err)
- goto err_unlock_free;
-
- err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
- info->snd_seq, 0, OVS_DP_CMD_GET);
- BUG_ON(err < 0);
-
- ovs_unlock();
-
- ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
- return 0;
-
-err_unlock_free:
- ovs_unlock();
- kfree_skb(reply);
- return err;
-}
-
-static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
-{
- struct sk_buff *reply;
- struct datapath *dp;
- int err;
-
- reply = ovs_dp_cmd_alloc_info();
- if (!reply)
- return -ENOMEM;
-
- ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
- if (IS_ERR(dp)) {
- err = PTR_ERR(dp);
- goto err_unlock_free;
- }
- err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
- info->snd_seq, 0, OVS_DP_CMD_GET);
- BUG_ON(err < 0);
- ovs_unlock();
-
- return genlmsg_reply(reply, info);
-
-err_unlock_free:
- ovs_unlock();
- kfree_skb(reply);
- return err;
-}
-
-static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
- struct datapath *dp;
- int skip = cb->args[0];
- int i = 0;
-
- ovs_lock();
- list_for_each_entry(dp, &ovs_net->dps, list_node) {
- if (i >= skip &&
- ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- OVS_DP_CMD_GET) < 0)
- break;
- i++;
- }
- ovs_unlock();
-
- cb->args[0] = i;
-
- return skb->len;
-}
-
-static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
- [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
- [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
- [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
-};
-
-static const struct genl_ops dp_datapath_genl_ops[] = {
- { .cmd = OVS_DP_CMD_NEW,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = datapath_policy,
-#endif
- .doit = ovs_dp_cmd_new
- },
- { .cmd = OVS_DP_CMD_DEL,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = datapath_policy,
-#endif
- .doit = ovs_dp_cmd_del
- },
- { .cmd = OVS_DP_CMD_GET,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = 0, /* OK for unprivileged users. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = datapath_policy,
-#endif
- .doit = ovs_dp_cmd_get,
- .dumpit = ovs_dp_cmd_dump
- },
- { .cmd = OVS_DP_CMD_SET,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = datapath_policy,
-#endif
- .doit = ovs_dp_cmd_set,
- },
-};
-
-static struct genl_family dp_datapath_genl_family __ro_after_init = {
- .hdrsize = sizeof(struct ovs_header),
- .name = OVS_DATAPATH_FAMILY,
- .version = OVS_DATAPATH_VERSION,
- .maxattr = OVS_DP_ATTR_MAX,
-#ifndef HAVE_GENL_OPS_POLICY
- .policy = datapath_policy,
-#endif
- .netnsok = true,
- .parallel_ops = true,
- .ops = dp_datapath_genl_ops,
- .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
- .mcgrps = &ovs_dp_datapath_multicast_group,
- .n_mcgrps = 1,
- .module = THIS_MODULE,
-};
-
-/* Called with ovs_mutex or RCU read lock. */
-static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
- struct net *net, u32 portid, u32 seq,
- u32 flags, u8 cmd, gfp_t gfp)
-{
- struct ovs_header *ovs_header;
- struct ovs_vport_stats vport_stats;
- int err;
-
- ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
- flags, cmd);
- if (!ovs_header)
- return -EMSGSIZE;
-
- ovs_header->dp_ifindex = get_dpifindex(vport->dp);
-
- if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
- nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
- nla_put_string(skb, OVS_VPORT_ATTR_NAME,
- ovs_vport_name(vport)) ||
- nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
- goto nla_put_failure;
-
-#ifdef HAVE_PEERNET2ID_ALLOC
- if (!net_eq(net, dev_net(vport->dev))) {
- int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
-
- if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
- goto nla_put_failure;
- }
-
-#endif
- ovs_vport_get_stats(vport, &vport_stats);
- if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
- sizeof(struct ovs_vport_stats), &vport_stats,
- OVS_VPORT_ATTR_PAD))
- goto nla_put_failure;
-
- if (ovs_vport_get_upcall_portids(vport, skb))
- goto nla_put_failure;
-
- err = ovs_vport_get_options(vport, skb);
- if (err == -EMSGSIZE)
- goto error;
-
- genlmsg_end(skb, ovs_header);
- return 0;
-
-nla_put_failure:
- err = -EMSGSIZE;
-error:
- genlmsg_cancel(skb, ovs_header);
- return err;
-}
-
-static struct sk_buff *ovs_vport_cmd_alloc_info(void)
-{
- return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-}
-
-/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
-struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
- u32 portid, u32 seq, u8 cmd)
-{
- struct sk_buff *skb;
- int retval;
-
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!skb)
- return ERR_PTR(-ENOMEM);
-
- retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
- GFP_KERNEL);
- BUG_ON(retval < 0);
-
- return skb;
-}
-
-/* Called with ovs_mutex or RCU read lock. */
-static struct vport *lookup_vport(struct net *net,
- const struct ovs_header *ovs_header,
- struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
-{
- struct datapath *dp;
- struct vport *vport;
-
- if (a[OVS_VPORT_ATTR_IFINDEX])
- return ERR_PTR(-EOPNOTSUPP);
- if (a[OVS_VPORT_ATTR_NAME]) {
- vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
- if (!vport)
- return ERR_PTR(-ENODEV);
- if (ovs_header->dp_ifindex &&
- ovs_header->dp_ifindex != get_dpifindex(vport->dp))
- return ERR_PTR(-ENODEV);
- return vport;
- } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
- u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
-
- if (port_no >= DP_MAX_PORTS)
- return ERR_PTR(-EFBIG);
-
- dp = get_dp(net, ovs_header->dp_ifindex);
- if (!dp)
- return ERR_PTR(-ENODEV);
-
- vport = ovs_vport_ovsl_rcu(dp, port_no);
- if (!vport)
- return ERR_PTR(-ENODEV);
- return vport;
- } else
- return ERR_PTR(-EINVAL);
-
-}
-
-static unsigned int ovs_get_max_headroom(struct datapath *dp)
-{
- unsigned int dev_headroom, max_headroom = 0;
- struct net_device *dev;
- struct vport *vport;
- int i;
-
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
- dev = vport->dev;
- dev_headroom = netdev_get_fwd_headroom(dev);
- if (dev_headroom > max_headroom)
- max_headroom = dev_headroom;
- }
- }
-
- return max_headroom;
-}
-
-/* Called with ovs_mutex */
-static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
-{
- struct vport *vport;
- int i;
-
- dp->max_headroom = new_headroom;
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
- netdev_set_rx_headroom(vport->dev, new_headroom);
-}
-
-static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
-{
- struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
- struct vport_parms parms;
- struct sk_buff *reply;
- struct vport *vport;
- struct datapath *dp;
- unsigned int new_headroom;
- u32 port_no;
- int err;
-
- if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
- !a[OVS_VPORT_ATTR_UPCALL_PID])
- return -EINVAL;
- if (a[OVS_VPORT_ATTR_IFINDEX])
- return -EOPNOTSUPP;
-
- port_no = a[OVS_VPORT_ATTR_PORT_NO]
- ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
- if (port_no >= DP_MAX_PORTS)
- return -EFBIG;
-
- reply = ovs_vport_cmd_alloc_info();
- if (!reply)
- return -ENOMEM;
-
- ovs_lock();
-restart:
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- err = -ENODEV;
- if (!dp)
- goto exit_unlock_free;
-
- if (port_no) {
- vport = ovs_vport_ovsl(dp, port_no);
- err = -EBUSY;
- if (vport)
- goto exit_unlock_free;
- } else {
- for (port_no = 1; ; port_no++) {
- if (port_no >= DP_MAX_PORTS) {
- err = -EFBIG;
- goto exit_unlock_free;
- }
- vport = ovs_vport_ovsl(dp, port_no);
- if (!vport)
- break;
- }
- }
-
- parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
- parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
- parms.options = a[OVS_VPORT_ATTR_OPTIONS];
- parms.dp = dp;
- parms.port_no = port_no;
- parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
-
- vport = new_vport(&parms);
- err = PTR_ERR(vport);
- if (IS_ERR(vport)) {
- if (err == -EAGAIN)
- goto restart;
- goto exit_unlock_free;
- }
-
- err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
- info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_NEW, GFP_KERNEL);
- BUG_ON(err < 0);
-
- new_headroom = netdev_get_fwd_headroom(vport->dev);
-
- if (new_headroom > dp->max_headroom)
- ovs_update_headroom(dp, new_headroom);
- else
- netdev_set_rx_headroom(vport->dev, dp->max_headroom);
-
- ovs_unlock();
-
- ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
- return 0;
-
-exit_unlock_free:
- ovs_unlock();
- kfree_skb(reply);
- return err;
-}
-
-static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
-{
- struct nlattr **a = info->attrs;
- struct sk_buff *reply;
- struct vport *vport;
- int err;
-
- reply = ovs_vport_cmd_alloc_info();
- if (!reply)
- return -ENOMEM;
-
- ovs_lock();
- vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
- err = PTR_ERR(vport);
- if (IS_ERR(vport))
- goto exit_unlock_free;
-
- if (a[OVS_VPORT_ATTR_TYPE] &&
- nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
- err = -EINVAL;
- goto exit_unlock_free;
- }
-
- if (a[OVS_VPORT_ATTR_OPTIONS]) {
- err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
- if (err)
- goto exit_unlock_free;
- }
-
- if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
- struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
-
- err = ovs_vport_set_upcall_portids(vport, ids);
- if (err)
- goto exit_unlock_free;
- }
-
- err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
- info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_SET, GFP_KERNEL);
- BUG_ON(err < 0);
- ovs_unlock();
-
- ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
- return 0;
-
-exit_unlock_free:
- ovs_unlock();
- kfree_skb(reply);
- return err;
-}
-
-static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
-{
- bool update_headroom = false;
- struct nlattr **a = info->attrs;
- struct sk_buff *reply;
- struct datapath *dp;
- struct vport *vport;
- unsigned int new_headroom;
- int err;
-
- reply = ovs_vport_cmd_alloc_info();
- if (!reply)
- return -ENOMEM;
-
- ovs_lock();
- vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
- err = PTR_ERR(vport);
- if (IS_ERR(vport))
- goto exit_unlock_free;
-
- if (vport->port_no == OVSP_LOCAL) {
- err = -EINVAL;
- goto exit_unlock_free;
- }
-
- err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
- info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_DEL, GFP_KERNEL);
- BUG_ON(err < 0);
-
- /* the vport deletion may trigger dp headroom update */
- dp = vport->dp;
- if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
- update_headroom = true;
-
- netdev_reset_rx_headroom(vport->dev);
- ovs_dp_detach_port(vport);
-
- if (update_headroom) {
- new_headroom = ovs_get_max_headroom(dp);
-
- if (new_headroom < dp->max_headroom)
- ovs_update_headroom(dp, new_headroom);
- }
- ovs_unlock();
-
- ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
- return 0;
-
-exit_unlock_free:
- ovs_unlock();
- kfree_skb(reply);
- return err;
-}
-
-static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
-{
- struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
- struct sk_buff *reply;
- struct vport *vport;
- int err;
-
- reply = ovs_vport_cmd_alloc_info();
- if (!reply)
- return -ENOMEM;
-
- rcu_read_lock();
- vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
- err = PTR_ERR(vport);
- if (IS_ERR(vport))
- goto exit_unlock_free;
- err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
- info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_GET, GFP_ATOMIC);
- BUG_ON(err < 0);
- rcu_read_unlock();
-
- return genlmsg_reply(reply, info);
-
-exit_unlock_free:
- rcu_read_unlock();
- kfree_skb(reply);
- return err;
-}
-
-static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
- struct datapath *dp;
- int bucket = cb->args[0], skip = cb->args[1];
- int i, j = 0;
-
- rcu_read_lock();
- dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp) {
- rcu_read_unlock();
- return -ENODEV;
- }
- for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
- struct vport *vport;
-
- j = 0;
- hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
- if (j >= skip &&
- ovs_vport_cmd_fill_info(vport, skb,
- sock_net(skb->sk),
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI,
- OVS_VPORT_CMD_GET,
- GFP_ATOMIC) < 0)
- goto out;
-
- j++;
- }
- skip = 0;
- }
-out:
- rcu_read_unlock();
-
- cb->args[0] = i;
- cb->args[1] = j;
-
- return skb->len;
-}
-
-static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
- [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
- [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
- [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
- [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
- [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
- [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
- [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
- [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
-};
-
-static const struct genl_ops dp_vport_genl_ops[] = {
- { .cmd = OVS_VPORT_CMD_NEW,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = vport_policy,
-#endif
- .doit = ovs_vport_cmd_new
- },
- { .cmd = OVS_VPORT_CMD_DEL,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = vport_policy,
-#endif
- .doit = ovs_vport_cmd_del
- },
- { .cmd = OVS_VPORT_CMD_GET,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = 0, /* OK for unprivileged users. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = vport_policy,
-#endif
- .doit = ovs_vport_cmd_get,
- .dumpit = ovs_vport_cmd_dump
- },
- { .cmd = OVS_VPORT_CMD_SET,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = vport_policy,
-#endif
- .doit = ovs_vport_cmd_set,
- },
-};
-
-struct genl_family dp_vport_genl_family __ro_after_init = {
- .hdrsize = sizeof(struct ovs_header),
- .name = OVS_VPORT_FAMILY,
- .version = OVS_VPORT_VERSION,
- .maxattr = OVS_VPORT_ATTR_MAX,
-#ifndef HAVE_GENL_OPS_POLICY
- .policy = vport_policy,
-#endif
- .netnsok = true,
- .parallel_ops = true,
- .ops = dp_vport_genl_ops,
- .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
- .mcgrps = &ovs_dp_vport_multicast_group,
- .n_mcgrps = 1,
- .module = THIS_MODULE,
-};
-
-static struct genl_family *dp_genl_families[] = {
- &dp_datapath_genl_family,
- &dp_vport_genl_family,
- &dp_flow_genl_family,
- &dp_packet_genl_family,
- &dp_meter_genl_family,
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
- &dp_ct_limit_genl_family,
-#endif
-};
-
-static void dp_unregister_genl(int n_families)
-{
- int i;
-
- for (i = 0; i < n_families; i++)
- genl_unregister_family(dp_genl_families[i]);
-}
-
-static int __init dp_register_genl(void)
-{
- int err;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
-
- err = genl_register_family(dp_genl_families[i]);
- if (err)
- goto error;
- }
-
- return 0;
-
-error:
- dp_unregister_genl(i);
- return err;
-}
-
-static int __net_init ovs_init_net(struct net *net)
-{
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-
- INIT_LIST_HEAD(&ovs_net->dps);
- INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
- ovs_netns_frags_init(net);
- ovs_netns_frags6_init(net);
- return ovs_ct_init(net);
-}
-
-static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
- struct list_head *head)
-{
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
- struct datapath *dp;
-
- list_for_each_entry(dp, &ovs_net->dps, list_node) {
- int i;
-
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
- struct vport *vport;
-
- hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
-
- if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
- continue;
-
- if (dev_net(vport->dev) == dnet)
- list_add(&vport->detach_list, head);
- }
- }
- }
-}
-
-static void __net_exit ovs_exit_net(struct net *dnet)
-{
- struct datapath *dp, *dp_next;
- struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
- struct vport *vport, *vport_next;
- struct net *net;
- LIST_HEAD(head);
-
- ovs_netns_frags6_exit(dnet);
- ovs_netns_frags_exit(dnet);
- ovs_lock();
-
- ovs_ct_exit(dnet);
-
- list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
- __dp_destroy(dp);
-
-#ifdef HAVE_NET_RWSEM
- down_read(&net_rwsem);
-#else
- rtnl_lock();
-#endif
- for_each_net(net)
- list_vports_from_net(net, dnet, &head);
-#ifdef HAVE_NET_RWSEM
- up_read(&net_rwsem);
-#else
- rtnl_unlock();
-#endif
-
- /* Detach all vports from given namespace. */
- list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
- list_del(&vport->detach_list);
- ovs_dp_detach_port(vport);
- }
-
- ovs_unlock();
-
- cancel_work_sync(&ovs_net->dp_notify_work);
-}
-
-static struct pernet_operations ovs_net_ops = {
- .init = ovs_init_net,
- .exit = ovs_exit_net,
- .id = &ovs_net_id,
- .size = sizeof(struct ovs_net),
-};
-
-static int __init dp_init(void)
-{
- int err;
-
- BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof_field(struct sk_buff, cb));
-
- pr_info("Open vSwitch switching datapath %s\n", VERSION);
-
- ovs_nsh_init();
- err = action_fifos_init();
- if (err)
- goto error;
-
- err = ovs_internal_dev_rtnl_link_register();
- if (err)
- goto error_action_fifos_exit;
-
- err = ovs_flow_init();
- if (err)
- goto error_unreg_rtnl_link;
-
- err = ovs_vport_init();
- if (err)
- goto error_flow_exit;
-
- err = register_pernet_device(&ovs_net_ops);
- if (err)
- goto error_vport_exit;
-
- err = compat_init();
- if (err)
- goto error_netns_exit;
-
- err = register_netdevice_notifier(&ovs_dp_device_notifier);
- if (err)
- goto error_compat_exit;
-
- err = ovs_netdev_init();
- if (err)
- goto error_unreg_notifier;
-
- err = dp_register_genl();
- if (err < 0)
- goto error_unreg_netdev;
-
- return 0;
-
-error_unreg_netdev:
- ovs_netdev_exit();
-error_unreg_notifier:
- unregister_netdevice_notifier(&ovs_dp_device_notifier);
-error_compat_exit:
- compat_exit();
-error_netns_exit:
- unregister_pernet_device(&ovs_net_ops);
-error_vport_exit:
- ovs_vport_exit();
-error_flow_exit:
- ovs_flow_exit();
-error_unreg_rtnl_link:
- ovs_internal_dev_rtnl_link_unregister();
-error_action_fifos_exit:
- action_fifos_exit();
-error:
- ovs_nsh_cleanup();
- return err;
-}
-
-static void dp_cleanup(void)
-{
- dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
- ovs_netdev_exit();
- unregister_netdevice_notifier(&ovs_dp_device_notifier);
- compat_exit();
- unregister_pernet_device(&ovs_net_ops);
- rcu_barrier();
- ovs_vport_exit();
- ovs_flow_exit();
- ovs_internal_dev_rtnl_link_unregister();
- action_fifos_exit();
- ovs_nsh_cleanup();
-}
-
-module_init(dp_init);
-module_exit(dp_cleanup);
-
-MODULE_DESCRIPTION("Open vSwitch switching datapath");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(VERSION);
-MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
-MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
-MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
-MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
-MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
-MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);
diff --git a/datapath/datapath.h b/datapath/datapath.h
deleted file mode 100644
index c377e9b24..000000000
--- a/datapath/datapath.h
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * Copyright (c) 2007-2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#ifndef DATAPATH_H
-#define DATAPATH_H 1
-
-#include <asm/page.h>
-#include <linux/kernel.h>
-#include <linux/mutex.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/u64_stats_sync.h>
-#include <net/net_namespace.h>
-#include <net/ip_tunnels.h>
-
-#include "compat.h"
-#include "flow.h"
-#include "flow_table.h"
-#include "meter.h"
-#include "vport-internal_dev.h"
-
-#define DP_MAX_PORTS USHRT_MAX
-#define DP_VPORT_HASH_BUCKETS 1024
-
-/**
- * struct dp_stats_percpu - per-cpu packet processing statistics for a given
- * datapath.
- * @n_hit: Number of received packets for which a matching flow was found in
- * the flow table.
- * @n_miss: Number of received packets that had no matching flow in the flow
- * table. The sum of @n_hit and @n_miss is the number of packets that have
- * been received by the datapath.
- * @n_lost: Number of received packets that had no matching flow in the flow
- * table that could not be sent to userspace (normally due to an overflow in
- * one of the datapath's queues).
- * @n_mask_hit: Number of masks looked up for flow match.
- * @n_mask_hit / (@n_hit + @n_missed) will be the average masks looked
- * up per packet.
- */
-struct dp_stats_percpu {
- u64 n_hit;
- u64 n_missed;
- u64 n_lost;
- u64 n_mask_hit;
- struct u64_stats_sync syncp;
-};
-
-/**
- * struct datapath - datapath for flow-based packet switching
- * @rcu: RCU callback head for deferred destruction.
- * @list_node: Element in global 'dps' list.
- * @table: flow table.
- * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
- * ovs_mutex and RCU.
- * @stats_percpu: Per-CPU datapath statistics.
- * @net: Reference to net namespace.
- * @max_headroom: the maximum headroom of all vports in this datapath; it will
- * be used by all the internal vports in this dp.
- *
- * Context: See the comment on locking at the top of datapath.c for additional
- * locking information.
- */
-struct datapath {
- struct rcu_head rcu;
- struct list_head list_node;
-
- /* Flow table. */
- struct flow_table table;
-
- /* Switch ports. */
- struct hlist_head *ports;
-
- /* Stats. */
- struct dp_stats_percpu __percpu *stats_percpu;
-
- /* Network namespace ref. */
- possible_net_t net;
-
- u32 user_features;
-
- u32 max_headroom;
-
- /* Switch meters. */
- struct hlist_head *meters;
-};
-
-/**
- * struct ovs_skb_cb - OVS data in skb CB
- * @input_vport: The original vport packet came in on. This value is cached
- * when a packet is received by OVS.
- * @mru: The maximum received fragement size; 0 if the packet is not
- * fragmented.
- * @acts_origlen: The netlink size of the flow actions applied to this skb.
- * @cutlen: The number of bytes from the packet end to be removed.
- */
-struct ovs_skb_cb {
- struct vport *input_vport;
- u16 mru;
- u16 acts_origlen;
- u32 cutlen;
-};
-#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
-
-/**
- * struct dp_upcall - metadata to include with a packet to send to userspace
- * @cmd: One of %OVS_PACKET_CMD_*.
- * @userdata: If nonnull, its variable-length value is passed to userspace as
- * %OVS_PACKET_ATTR_USERDATA.
- * @portid: Netlink portid to which packet should be sent. If @portid is 0
- * then no packet is sent and the packet is accounted in the datapath's @n_lost
- * counter.
- * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
- * @mru: If not zero, Maximum received IP fragment size.
- */
-struct dp_upcall_info {
- struct ip_tunnel_info *egress_tun_info;
- const struct nlattr *userdata;
- const struct nlattr *actions;
- int actions_len;
- u32 portid;
- u8 cmd;
- u16 mru;
-};
-
-/**
- * struct ovs_net - Per net-namespace data for ovs.
- * @dps: List of datapaths to enable dumping them all out.
- * Protected by genl_mutex.
- */
-struct ovs_net {
- struct list_head dps;
- struct work_struct dp_notify_work;
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
- struct ovs_ct_limit_info *ct_limit_info;
-#endif
-
- /* Module reference for configuring conntrack. */
- bool xt_label;
-
-#ifdef HAVE_INET_FRAG_LRU_MOVE
- struct net *net;
- struct netns_frags ipv4_frags;
- struct netns_frags nf_frags;
-#endif
-};
-
-/**
- * enum ovs_pkt_hash_types - hash info to include with a packet
- * to send to userspace.
- * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack.
- * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash
- * over transport ports.
- */
-enum ovs_pkt_hash_types {
- OVS_PACKET_HASH_SW_BIT = (1ULL << 32),
- OVS_PACKET_HASH_L4_BIT = (1ULL << 33),
-};
-
-extern unsigned int ovs_net_id;
-void ovs_lock(void);
-void ovs_unlock(void);
-
-#ifdef CONFIG_LOCKDEP
-int lockdep_ovsl_is_held(void);
-#else
-#define lockdep_ovsl_is_held() 1
-#endif
-
-#define ASSERT_OVSL() WARN_ON(!lockdep_ovsl_is_held())
-#define ovsl_dereference(p) \
- rcu_dereference_protected(p, lockdep_ovsl_is_held())
-#define rcu_dereference_ovsl(p) \
- rcu_dereference_check(p, lockdep_ovsl_is_held())
-
-static inline struct net *ovs_dp_get_net(const struct datapath *dp)
-{
- return rpl_read_pnet(&dp->net);
-}
-
-static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
-{
- rpl_write_pnet(&dp->net, net);
-}
-
-struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
-
-static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
-{
- WARN_ON_ONCE(!rcu_read_lock_held());
- return ovs_lookup_vport(dp, port_no);
-}
-
-static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
-{
- WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
- return ovs_lookup_vport(dp, port_no);
-}
-
-static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
-{
- ASSERT_OVSL();
- return ovs_lookup_vport(dp, port_no);
-}
-
-/* Must be called with rcu_read_lock. */
-static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
-{
- struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
-
- if (dev) {
- struct vport *vport = ovs_internal_dev_get_vport(dev);
-
- if (vport)
- return vport->dp;
- }
-
- return NULL;
-}
-
-/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
- * returned dp pointer valid.
- */
-static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
-{
- struct datapath *dp;
-
- WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
- rcu_read_lock();
- dp = get_dp_rcu(net, dp_ifindex);
- rcu_read_unlock();
-
- return dp;
-}
-
-extern struct notifier_block ovs_dp_device_notifier;
-extern struct genl_family dp_vport_genl_family;
-extern const struct genl_multicast_group ovs_dp_vport_multicast_group;
-
-DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
-void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
-void ovs_dp_detach_port(struct vport *);
-int ovs_dp_upcall(struct datapath *, struct sk_buff *,
- const struct sw_flow_key *, const struct dp_upcall_info *,
- uint32_t cutlen);
-
-const char *ovs_dp_name(const struct datapath *dp);
-struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
- u32 portid, u32 seq, u8 cmd);
-
-int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
- const struct sw_flow_actions *, struct sw_flow_key *);
-
-void ovs_dp_notify_wq(struct work_struct *work);
-
-int action_fifos_init(void);
-void action_fifos_exit(void);
-
-/* 'KEY' must not have any bits set outside of the 'MASK' */
-#define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
-#define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK))
-
-#define OVS_NLERR(logging_allowed, fmt, ...) \
-do { \
- if (logging_allowed && net_ratelimit()) \
- pr_info("netlink: " fmt "\n", ##__VA_ARGS__); \
-} while (0)
-#endif /* datapath.h */
diff --git a/datapath/dp_notify.c b/datapath/dp_notify.c
deleted file mode 100644
index 932a37ed2..000000000
--- a/datapath/dp_notify.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#include <linux/netdevice.h>
-#include <net/genetlink.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-
-#include "datapath.h"
-#include "vport-internal_dev.h"
-#include "vport-netdev.h"
-
-static void dp_detach_port_notify(struct vport *vport)
-{
- struct sk_buff *notify;
- struct datapath *dp;
-
- dp = vport->dp;
- notify = ovs_vport_cmd_build_info(vport, ovs_dp_get_net(dp),
- 0, 0, OVS_VPORT_CMD_DEL);
- ovs_dp_detach_port(vport);
- if (IS_ERR(notify)) {
- genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0,
- GROUP_ID(&ovs_dp_vport_multicast_group),
- PTR_ERR(notify));
- return;
- }
-
- genlmsg_multicast_netns(&dp_vport_genl_family,
- ovs_dp_get_net(dp), notify, 0,
- GROUP_ID(&ovs_dp_vport_multicast_group),
- GFP_KERNEL);
-}
-
-void ovs_dp_notify_wq(struct work_struct *work)
-{
- struct ovs_net *ovs_net = container_of(work, struct ovs_net, dp_notify_work);
- struct datapath *dp;
-
- ovs_lock();
- list_for_each_entry(dp, &ovs_net->dps, list_node) {
- int i;
-
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
- struct vport *vport;
- struct hlist_node *n;
-
- hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
- if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL)
- continue;
-
- if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH))
- dp_detach_port_notify(vport);
- }
- }
- }
- ovs_unlock();
-}
-
-static int dp_device_event(struct notifier_block *unused, unsigned long event,
- void *ptr)
-{
- struct ovs_net *ovs_net;
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct vport *vport = NULL;
-
- if (!ovs_is_internal_dev(dev))
- vport = ovs_netdev_get_vport(dev);
-
- if (!vport)
- return NOTIFY_DONE;
-
- if (event == NETDEV_UNREGISTER) {
- /* upper_dev_unlink and decrement promisc immediately */
- ovs_netdev_detach_dev(vport);
-
- /* schedule vport destroy, dev_put and genl notification */
- ovs_net = net_generic(dev_net(dev), ovs_net_id);
- queue_work(system_wq, &ovs_net->dp_notify_work);
- }
-
- return NOTIFY_DONE;
-}
-
-struct notifier_block ovs_dp_device_notifier = {
- .notifier_call = dp_device_event
-};
diff --git a/datapath/flow.c b/datapath/flow.c
deleted file mode 100644
index 5a00c238c..000000000
--- a/datapath/flow.c
+++ /dev/null
@@ -1,972 +0,0 @@
-/*
- * Copyright (c) 2007-2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#include <linux/uaccess.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_ether.h>
-#include <linux/if_vlan.h>
-#include <net/llc_pdu.h>
-#include <linux/kernel.h>
-#include <linux/jhash.h>
-#include <linux/jiffies.h>
-#include <linux/llc.h>
-#include <linux/module.h>
-#include <linux/in.h>
-#include <linux/rcupdate.h>
-#include <linux/cpumask.h>
-#include <linux/if_arp.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/mpls.h>
-#include <linux/sctp.h>
-#include <linux/smp.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <linux/icmpv6.h>
-#include <linux/rculist.h>
-#include <linux/timekeeping.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
-#include <net/mpls.h>
-#include <net/ndisc.h>
-#include <net/nsh.h>
-
-#include "datapath.h"
-#include "conntrack.h"
-#include "flow.h"
-#include "flow_netlink.h"
-#include "vport.h"
-
-u64 ovs_flow_used_time(unsigned long flow_jiffies)
-{
- struct timespec64 cur_ts;
- u64 cur_ms, idle_ms;
-
- ktime_get_ts64(&cur_ts);
- idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
- cur_ms = (u64)(u32)cur_ts.tv_sec * MSEC_PER_SEC +
- cur_ts.tv_nsec / NSEC_PER_MSEC;
-
- return cur_ms - idle_ms;
-}
-
-#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
-
-void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
- const struct sk_buff *skb)
-{
- struct sw_flow_stats *stats;
- unsigned int cpu = smp_processor_id();
- int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
-
- stats = rcu_dereference(flow->stats[cpu]);
-
- /* Check if already have CPU-specific stats. */
- if (likely(stats)) {
- spin_lock(&stats->lock);
- /* Mark if we write on the pre-allocated stats. */
- if (cpu == 0 && unlikely(flow->stats_last_writer != cpu))
- flow->stats_last_writer = cpu;
- } else {
- stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
- spin_lock(&stats->lock);
-
- /* If the current CPU is the only writer on the
- * pre-allocated stats keep using them.
- */
- if (unlikely(flow->stats_last_writer != cpu)) {
- /* A previous locker may have already allocated the
- * stats, so we need to check again. If CPU-specific
- * stats were already allocated, we update the pre-
- * allocated stats as we have already locked them.
- */
- if (likely(flow->stats_last_writer != -1) &&
- likely(!rcu_access_pointer(flow->stats[cpu]))) {
- /* Try to allocate CPU-specific stats. */
- struct sw_flow_stats *new_stats;
-
- new_stats =
- kmem_cache_alloc_node(flow_stats_cache,
- GFP_NOWAIT |
- __GFP_THISNODE |
- __GFP_NOWARN |
- __GFP_NOMEMALLOC,
- numa_node_id());
- if (likely(new_stats)) {
- new_stats->used = jiffies;
- new_stats->packet_count = 1;
- new_stats->byte_count = len;
- new_stats->tcp_flags = tcp_flags;
- spin_lock_init(&new_stats->lock);
-
- rcu_assign_pointer(flow->stats[cpu],
- new_stats);
- cpumask_set_cpu(cpu, &flow->cpu_used_mask);
- goto unlock;
- }
- }
- flow->stats_last_writer = cpu;
- }
- }
-
- stats->used = jiffies;
- stats->packet_count++;
- stats->byte_count += len;
- stats->tcp_flags |= tcp_flags;
-unlock:
- spin_unlock(&stats->lock);
-}
-
-/* Must be called with rcu_read_lock or ovs_mutex. */
-void ovs_flow_stats_get(const struct sw_flow *flow,
- struct ovs_flow_stats *ovs_stats,
- unsigned long *used, __be16 *tcp_flags)
-{
- int cpu;
-
- *used = 0;
- *tcp_flags = 0;
- memset(ovs_stats, 0, sizeof(*ovs_stats));
-
- /* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
- struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
-
- if (stats) {
- /* Local CPU may write on non-local stats, so we must
- * block bottom-halves here.
- */
- spin_lock_bh(&stats->lock);
- if (!*used || time_after(stats->used, *used))
- *used = stats->used;
- *tcp_flags |= stats->tcp_flags;
- ovs_stats->n_packets += stats->packet_count;
- ovs_stats->n_bytes += stats->byte_count;
- spin_unlock_bh(&stats->lock);
- }
- }
-}
-
-/* Called with ovs_mutex. */
-void ovs_flow_stats_clear(struct sw_flow *flow)
-{
- int cpu;
-
- /* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
- struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
-
- if (stats) {
- spin_lock_bh(&stats->lock);
- stats->used = 0;
- stats->packet_count = 0;
- stats->byte_count = 0;
- stats->tcp_flags = 0;
- spin_unlock_bh(&stats->lock);
- }
- }
-}
-
-static int check_header(struct sk_buff *skb, int len)
-{
- if (unlikely(skb->len < len))
- return -EINVAL;
- if (unlikely(!pskb_may_pull(skb, len)))
- return -ENOMEM;
- return 0;
-}
-
-static bool arphdr_ok(struct sk_buff *skb)
-{
- return pskb_may_pull(skb, skb_network_offset(skb) +
- sizeof(struct arp_eth_header));
-}
-
-static int check_iphdr(struct sk_buff *skb)
-{
- unsigned int nh_ofs = skb_network_offset(skb);
- unsigned int ip_len;
- int err;
-
- err = check_header(skb, nh_ofs + sizeof(struct iphdr));
- if (unlikely(err))
- return err;
-
- ip_len = ip_hdrlen(skb);
- if (unlikely(ip_len < sizeof(struct iphdr) ||
- skb->len < nh_ofs + ip_len))
- return -EINVAL;
-
- skb_set_transport_header(skb, nh_ofs + ip_len);
- return 0;
-}
-
-static bool tcphdr_ok(struct sk_buff *skb)
-{
- int th_ofs = skb_transport_offset(skb);
- int tcp_len;
-
- if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))))
- return false;
-
- tcp_len = tcp_hdrlen(skb);
- if (unlikely(tcp_len < sizeof(struct tcphdr) ||
- skb->len < th_ofs + tcp_len))
- return false;
-
- return true;
-}
-
-static bool udphdr_ok(struct sk_buff *skb)
-{
- return pskb_may_pull(skb, skb_transport_offset(skb) +
- sizeof(struct udphdr));
-}
-
-static bool sctphdr_ok(struct sk_buff *skb)
-{
- return pskb_may_pull(skb, skb_transport_offset(skb) +
- sizeof(struct sctphdr));
-}
-
-static bool icmphdr_ok(struct sk_buff *skb)
-{
- return pskb_may_pull(skb, skb_transport_offset(skb) +
- sizeof(struct icmphdr));
-}
-
-static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
-{
- unsigned short frag_off;
- unsigned int payload_ofs = 0;
- unsigned int nh_ofs = skb_network_offset(skb);
- unsigned int nh_len;
- struct ipv6hdr *nh;
- int err, nexthdr, flags = 0;
-
- err = check_header(skb, nh_ofs + sizeof(*nh));
- if (unlikely(err))
- return err;
-
- nh = ipv6_hdr(skb);
-
- key->ip.proto = NEXTHDR_NONE;
- key->ip.tos = ipv6_get_dsfield(nh);
- key->ip.ttl = nh->hop_limit;
- key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
- key->ipv6.addr.src = nh->saddr;
- key->ipv6.addr.dst = nh->daddr;
-
- nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags);
- if (flags & IP6_FH_F_FRAG) {
- if (frag_off) {
- key->ip.frag = OVS_FRAG_TYPE_LATER;
- key->ip.proto = nexthdr;
- return 0;
- }
- key->ip.frag = OVS_FRAG_TYPE_FIRST;
- } else {
- key->ip.frag = OVS_FRAG_TYPE_NONE;
- }
-
- /* Delayed handling of error in ipv6_find_hdr() as it
- * always sets flags and frag_off to a valid value which may be
- * used to set key->ip.frag above.
- */
- if (unlikely(nexthdr < 0))
- return -EPROTO;
-
- nh_len = payload_ofs - nh_ofs;
- skb_set_transport_header(skb, nh_ofs + nh_len);
- key->ip.proto = nexthdr;
- return nh_len;
-}
-
-static bool icmp6hdr_ok(struct sk_buff *skb)
-{
- return pskb_may_pull(skb, skb_transport_offset(skb) +
- sizeof(struct icmp6hdr));
-}
-
-/**
- * Parse vlan tag from vlan header.
- * Returns ERROR on memory error.
- * Returns 0 if it encounters a non-vlan or incomplete packet.
- * Returns 1 after successfully parsing vlan tag.
- */
-static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh,
- bool untag_vlan)
-{
- struct vlan_head *vh = (struct vlan_head *)skb->data;
-
- if (likely(!eth_type_vlan(vh->tpid)))
- return 0;
-
- if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16)))
- return 0;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) +
- sizeof(__be16))))
- return -ENOMEM;
-
- vh = (struct vlan_head *)skb->data;
- key_vh->tci = vh->tci | htons(VLAN_CFI_MASK);
- key_vh->tpid = vh->tpid;
-
- if (unlikely(untag_vlan)) {
- int offset = skb->data - skb_mac_header(skb);
- u16 tci;
- int err;
-
- __skb_push(skb, offset);
- err = __skb_vlan_pop(skb, &tci);
- __skb_pull(skb, offset);
- if (err)
- return err;
- __vlan_hwaccel_put_tag(skb, key_vh->tpid, tci);
- } else {
- __skb_pull(skb, sizeof(struct vlan_head));
- }
- return 1;
-}
-
-static void clear_vlan(struct sw_flow_key *key)
-{
- key->eth.vlan.tci = 0;
- key->eth.vlan.tpid = 0;
- key->eth.cvlan.tci = 0;
- key->eth.cvlan.tpid = 0;
-}
-
-static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
-{
- int res;
-
- key->eth.vlan.tci = 0;
- key->eth.vlan.tpid = 0;
- key->eth.cvlan.tci = 0;
- key->eth.cvlan.tpid = 0;
-
- if (skb_vlan_tag_present(skb)) {
- key->eth.vlan.tci = htons(skb->vlan_tci) | htons(VLAN_CFI_MASK);
- key->eth.vlan.tpid = skb->vlan_proto;
- } else {
- /* Parse outer vlan tag in the non-accelerated case. */
- res = parse_vlan_tag(skb, &key->eth.vlan, true);
- if (res <= 0)
- return res;
- }
-
- /* Parse inner vlan tag. */
- res = parse_vlan_tag(skb, &key->eth.cvlan, false);
- if (res <= 0)
- return res;
-
- return 0;
-}
-
-static __be16 parse_ethertype(struct sk_buff *skb)
-{
- struct llc_snap_hdr {
- u8 dsap; /* Always 0xAA */
- u8 ssap; /* Always 0xAA */
- u8 ctrl;
- u8 oui[3];
- __be16 ethertype;
- };
- struct llc_snap_hdr *llc;
- __be16 proto;
-
- proto = *(__be16 *) skb->data;
- __skb_pull(skb, sizeof(__be16));
-
- if (eth_proto_is_802_3(proto))
- return proto;
-
- if (skb->len < sizeof(struct llc_snap_hdr))
- return htons(ETH_P_802_2);
-
- if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr))))
- return htons(0);
-
- llc = (struct llc_snap_hdr *) skb->data;
- if (llc->dsap != LLC_SAP_SNAP ||
- llc->ssap != LLC_SAP_SNAP ||
- (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0)
- return htons(ETH_P_802_2);
-
- __skb_pull(skb, sizeof(struct llc_snap_hdr));
-
- if (eth_proto_is_802_3(llc->ethertype))
- return llc->ethertype;
-
- return htons(ETH_P_802_2);
-}
-
-static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
- int nh_len)
-{
- struct icmp6hdr *icmp = icmp6_hdr(skb);
-
- /* The ICMPv6 type and code fields use the 16-bit transport port
- * fields, so we need to store them in 16-bit network byte order.
- */
- key->tp.src = htons(icmp->icmp6_type);
- key->tp.dst = htons(icmp->icmp6_code);
- memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
-
- if (icmp->icmp6_code == 0 &&
- (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
- icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) {
- int icmp_len = skb->len - skb_transport_offset(skb);
- struct nd_msg *nd;
- int offset;
-
- /* In order to process neighbor discovery options, we need the
- * entire packet.
- */
- if (unlikely(icmp_len < sizeof(*nd)))
- return 0;
-
- if (unlikely(skb_linearize(skb)))
- return -ENOMEM;
-
- nd = (struct nd_msg *)skb_transport_header(skb);
- key->ipv6.nd.target = nd->target;
-
- icmp_len -= sizeof(*nd);
- offset = 0;
- while (icmp_len >= 8) {
- struct nd_opt_hdr *nd_opt =
- (struct nd_opt_hdr *)(nd->opt + offset);
- int opt_len = nd_opt->nd_opt_len * 8;
-
- if (unlikely(!opt_len || opt_len > icmp_len))
- return 0;
-
- /* Store the link layer address if the appropriate
- * option is provided. It is considered an error if
- * the same link layer option is specified twice.
- */
- if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR
- && opt_len == 8) {
- if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
- goto invalid;
- ether_addr_copy(key->ipv6.nd.sll,
- &nd->opt[offset+sizeof(*nd_opt)]);
- } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
- && opt_len == 8) {
- if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
- goto invalid;
- ether_addr_copy(key->ipv6.nd.tll,
- &nd->opt[offset+sizeof(*nd_opt)]);
- }
-
- icmp_len -= opt_len;
- offset += opt_len;
- }
- }
-
- return 0;
-
-invalid:
- memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
- memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
- memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
-
- return 0;
-}
-
-static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
-{
- struct nshhdr *nh;
- unsigned int nh_ofs = skb_network_offset(skb);
- u8 version, length;
- int err;
-
- err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN);
- if (unlikely(err))
- return err;
-
- nh = nsh_hdr(skb);
- version = nsh_get_ver(nh);
- length = nsh_hdr_len(nh);
-
- if (version != 0)
- return -EINVAL;
-
- err = check_header(skb, nh_ofs + length);
- if (unlikely(err))
- return err;
-
- nh = nsh_hdr(skb);
- key->nsh.base.flags = nsh_get_flags(nh);
- key->nsh.base.ttl = nsh_get_ttl(nh);
- key->nsh.base.mdtype = nh->mdtype;
- key->nsh.base.np = nh->np;
- key->nsh.base.path_hdr = nh->path_hdr;
- switch (key->nsh.base.mdtype) {
- case NSH_M_TYPE1:
- if (length != NSH_M_TYPE1_LEN)
- return -EINVAL;
- memcpy(key->nsh.context, nh->md1.context,
- sizeof(nh->md1));
- break;
- case NSH_M_TYPE2:
- memset(key->nsh.context, 0,
- sizeof(nh->md1));
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
- * key_extract_l3l4 - extracts L3/L4 header information.
- * @skb: sk_buff that contains the frame, with skb->data pointing to the
- * L3 header
- * @key: output flow key
- */
-static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
-{
- int error;
-
- /* Network layer. */
- if (key->eth.type == htons(ETH_P_IP)) {
- struct iphdr *nh;
- __be16 offset;
-
- error = check_iphdr(skb);
- if (unlikely(error)) {
- memset(&key->ip, 0, sizeof(key->ip));
- memset(&key->ipv4, 0, sizeof(key->ipv4));
- if (error == -EINVAL) {
- skb->transport_header = skb->network_header;
- error = 0;
- }
- return error;
- }
-
- nh = ip_hdr(skb);
- key->ipv4.addr.src = nh->saddr;
- key->ipv4.addr.dst = nh->daddr;
-
- key->ip.proto = nh->protocol;
- key->ip.tos = nh->tos;
- key->ip.ttl = nh->ttl;
-
- offset = nh->frag_off & htons(IP_OFFSET);
- if (offset) {
- key->ip.frag = OVS_FRAG_TYPE_LATER;
- memset(&key->tp, 0, sizeof(key->tp));
- return 0;
- }
-#ifdef HAVE_SKB_GSO_UDP
- if (nh->frag_off & htons(IP_MF) ||
- skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-#else
- if (nh->frag_off & htons(IP_MF))
-#endif
- key->ip.frag = OVS_FRAG_TYPE_FIRST;
- else
- key->ip.frag = OVS_FRAG_TYPE_NONE;
-
- /* Transport layer. */
- if (key->ip.proto == IPPROTO_TCP) {
- if (tcphdr_ok(skb)) {
- struct tcphdr *tcp = tcp_hdr(skb);
- key->tp.src = tcp->source;
- key->tp.dst = tcp->dest;
- key->tp.flags = TCP_FLAGS_BE16(tcp);
- } else {
- memset(&key->tp, 0, sizeof(key->tp));
- }
-
- } else if (key->ip.proto == IPPROTO_UDP) {
- if (udphdr_ok(skb)) {
- struct udphdr *udp = udp_hdr(skb);
- key->tp.src = udp->source;
- key->tp.dst = udp->dest;
- } else {
- memset(&key->tp, 0, sizeof(key->tp));
- }
- } else if (key->ip.proto == IPPROTO_SCTP) {
- if (sctphdr_ok(skb)) {
- struct sctphdr *sctp = sctp_hdr(skb);
- key->tp.src = sctp->source;
- key->tp.dst = sctp->dest;
- } else {
- memset(&key->tp, 0, sizeof(key->tp));
- }
- } else if (key->ip.proto == IPPROTO_ICMP) {
- if (icmphdr_ok(skb)) {
- struct icmphdr *icmp = icmp_hdr(skb);
- /* The ICMP type and code fields use the 16-bit
- * transport port fields, so we need to store
- * them in 16-bit network byte order.
- */
- key->tp.src = htons(icmp->type);
- key->tp.dst = htons(icmp->code);
- } else {
- memset(&key->tp, 0, sizeof(key->tp));
- }
- }
-
- } else if (key->eth.type == htons(ETH_P_ARP) ||
- key->eth.type == htons(ETH_P_RARP)) {
- struct arp_eth_header *arp;
- bool arp_available = arphdr_ok(skb);
-
- arp = (struct arp_eth_header *)skb_network_header(skb);
-
- if (arp_available &&
- arp->ar_hrd == htons(ARPHRD_ETHER) &&
- arp->ar_pro == htons(ETH_P_IP) &&
- arp->ar_hln == ETH_ALEN &&
- arp->ar_pln == 4) {
-
- /* We only match on the lower 8 bits of the opcode. */
- if (ntohs(arp->ar_op) <= 0xff)
- key->ip.proto = ntohs(arp->ar_op);
- else
- key->ip.proto = 0;
-
- memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
- memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
- ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
- ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
- } else {
- memset(&key->ip, 0, sizeof(key->ip));
- memset(&key->ipv4, 0, sizeof(key->ipv4));
- }
- } else if (eth_p_mpls(key->eth.type)) {
- u8 label_count = 1;
-
- memset(&key->mpls, 0, sizeof(key->mpls));
- skb_set_inner_network_header(skb, skb->mac_len);
- while (1) {
- __be32 lse;
-
- error = check_header(skb, skb->mac_len +
- label_count * MPLS_HLEN);
- if (unlikely(error))
- return 0;
-
- memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
-
- if (label_count <= MPLS_LABEL_DEPTH)
- memcpy(&key->mpls.lse[label_count - 1], &lse,
- MPLS_HLEN);
-
- skb_set_inner_network_header(skb, skb->mac_len +
- label_count * MPLS_HLEN);
- if (lse & htonl(MPLS_LS_S_MASK))
- break;
-
- label_count++;
- }
- if (label_count > MPLS_LABEL_DEPTH)
- label_count = MPLS_LABEL_DEPTH;
-
- key->mpls.num_labels_mask = GENMASK(label_count - 1, 0);
- } else if (key->eth.type == htons(ETH_P_IPV6)) {
- int nh_len; /* IPv6 Header + Extensions */
-
- nh_len = parse_ipv6hdr(skb, key);
- if (unlikely(nh_len < 0)) {
- switch (nh_len) {
- case -EINVAL:
- memset(&key->ip, 0, sizeof(key->ip));
- memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
- /* fall-through */
- case -EPROTO:
- skb->transport_header = skb->network_header;
- error = 0;
- break;
- default:
- error = nh_len;
- }
- return error;
- }
-
- if (key->ip.frag == OVS_FRAG_TYPE_LATER) {
- memset(&key->tp, 0, sizeof(key->tp));
- return 0;
- }
-#ifdef HAVE_SKB_GSO_UDP
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
- key->ip.frag = OVS_FRAG_TYPE_FIRST;
-
-#endif
- /* Transport layer. */
- if (key->ip.proto == NEXTHDR_TCP) {
- if (tcphdr_ok(skb)) {
- struct tcphdr *tcp = tcp_hdr(skb);
- key->tp.src = tcp->source;
- key->tp.dst = tcp->dest;
- key->tp.flags = TCP_FLAGS_BE16(tcp);
- } else {
- memset(&key->tp, 0, sizeof(key->tp));
- }
- } else if (key->ip.proto == NEXTHDR_UDP) {
- if (udphdr_ok(skb)) {
- struct udphdr *udp = udp_hdr(skb);
- key->tp.src = udp->source;
- key->tp.dst = udp->dest;
- } else {
- memset(&key->tp, 0, sizeof(key->tp));
- }
- } else if (key->ip.proto == NEXTHDR_SCTP) {
- if (sctphdr_ok(skb)) {
- struct sctphdr *sctp = sctp_hdr(skb);
- key->tp.src = sctp->source;
- key->tp.dst = sctp->dest;
- } else {
- memset(&key->tp, 0, sizeof(key->tp));
- }
- } else if (key->ip.proto == NEXTHDR_ICMP) {
- if (icmp6hdr_ok(skb)) {
- error = parse_icmpv6(skb, key, nh_len);
- if (error)
- return error;
- } else {
- memset(&key->tp, 0, sizeof(key->tp));
- }
- }
- } else if (key->eth.type == htons(ETH_P_NSH)) {
- error = parse_nsh(skb, key);
- if (error)
- return error;
- }
- return 0;
-}
-
-/**
- * key_extract - extracts a flow key from an Ethernet frame.
- * @skb: sk_buff that contains the frame, with skb->data pointing to the
- * Ethernet header
- * @key: output flow key
- *
- * The caller must ensure that skb->len >= ETH_HLEN.
- *
- * Returns 0 if successful, otherwise a negative errno value.
- *
- * Initializes @skb header fields as follows:
- *
- * - skb->mac_header: the L2 header.
- *
- * - skb->network_header: just past the L2 header, or just past the
- * VLAN header, to the first byte of the L2 payload.
- *
- * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
- * on output, then just past the IP header, if one is present and
- * of a correct length, otherwise the same as skb->network_header.
- * For other key->eth.type values it is left untouched.
- *
- * - skb->protocol: the type of the data starting at skb->network_header.
- * Equals to key->eth.type.
- */
-static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
-{
- struct ethhdr *eth;
-
- /* Flags are always used as part of stats */
- key->tp.flags = 0;
-
- skb_reset_mac_header(skb);
-
- /* Link layer. */
- clear_vlan(key);
- if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
- if (unlikely(eth_type_vlan(skb->protocol)))
- return -EINVAL;
-
- skb_reset_network_header(skb);
- key->eth.type = skb->protocol;
- } else {
- eth = eth_hdr(skb);
- ether_addr_copy(key->eth.src, eth->h_source);
- ether_addr_copy(key->eth.dst, eth->h_dest);
-
- __skb_pull(skb, 2 * ETH_ALEN);
- /* We are going to push all headers that we pull, so no need to
- * update skb->csum here.
- */
-
- if (unlikely(parse_vlan(skb, key)))
- return -ENOMEM;
-
- key->eth.type = parse_ethertype(skb);
- if (unlikely(key->eth.type == htons(0)))
- return -ENOMEM;
-
- /* Multiple tagged packets need to retain TPID to satisfy
- * skb_vlan_pop(), which will later shift the ethertype into
- * skb->protocol.
- */
- if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK))
- skb->protocol = key->eth.cvlan.tpid;
- else
- skb->protocol = key->eth.type;
-
- skb_reset_network_header(skb);
- __skb_push(skb, skb->data - skb_mac_header(skb));
- }
-
- skb_reset_mac_len(skb);
-
- /* Fill out L3/L4 key info, if any */
- return key_extract_l3l4(skb, key);
-}
-
-/* In the case of conntrack fragment handling it expects L3 headers,
- * add a helper.
- */
-int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
-{
- return key_extract_l3l4(skb, key);
-}
-
-int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
-{
- int res;
-
- res = key_extract(skb, key);
- if (!res)
- key->mac_proto &= ~SW_FLOW_KEY_INVALID;
-
- return res;
-}
-
-static int key_extract_mac_proto(struct sk_buff *skb)
-{
- switch (skb->dev->type) {
- case ARPHRD_ETHER:
- return MAC_PROTO_ETHERNET;
- case ARPHRD_NONE:
- if (skb->protocol == htons(ETH_P_TEB))
- return MAC_PROTO_ETHERNET;
- return MAC_PROTO_NONE;
- }
- WARN_ON_ONCE(1);
- return -EINVAL;
-}
-
-int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
- struct sk_buff *skb, struct sw_flow_key *key)
-{
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- struct tc_skb_ext *tc_ext;
-#endif
- int res, err;
-
- /* Extract metadata from packet. */
- if (tun_info) {
- key->tun_proto = ip_tunnel_info_af(tun_info);
- memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
- BUILD_BUG_ON(((1 << (sizeof(tun_info->options_len) * 8)) - 1) >
- sizeof(key->tun_opts));
-
- if (tun_info->options_len) {
- ip_tunnel_info_opts_get(TUN_METADATA_OPTS(key, tun_info->options_len),
- tun_info);
- key->tun_opts_len = tun_info->options_len;
- } else {
- key->tun_opts_len = 0;
- }
- } else {
- key->tun_proto = 0;
- key->tun_opts_len = 0;
- memset(&key->tun_key, 0, sizeof(key->tun_key));
- }
-
- key->phy.priority = skb->priority;
- key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
- key->phy.skb_mark = skb->mark;
- key->ovs_flow_hash = 0;
- res = key_extract_mac_proto(skb);
- if (res < 0)
- return res;
- key->mac_proto = res;
-
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- if (static_branch_unlikely(&tc_recirc_sharing_support)) {
- tc_ext = skb_ext_find(skb, TC_SKB_EXT);
- key->recirc_id = tc_ext ? tc_ext->chain : 0;
- } else {
- key->recirc_id = 0;
- }
-#else
- key->recirc_id = 0;
-#endif
-
- err = key_extract(skb, key);
- if (!err)
- ovs_ct_fill_key(skb, key); /* Must be after key_extract(). */
- return err;
-}
-
-int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
- struct sk_buff *skb,
- struct sw_flow_key *key, bool log)
-{
- const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
- u64 attrs = 0;
- int err;
-
- err = parse_flow_nlattrs(attr, a, &attrs, log);
- if (err)
- return -EINVAL;
-
- /* Extract metadata from netlink attributes. */
- err = ovs_nla_get_flow_metadata(net, a, attrs, key, log);
- if (err)
- return err;
-
- /* key_extract assumes that skb->protocol is set-up for
- * layer 3 packets which is the case for other callers,
- * in particular packets received from the network stack.
- * Here the correct value can be set from the metadata
- * extracted above.
- * For L2 packet key eth type would be zero. skb protocol
- * would be set to correct value later during key-extact.
- */
-
- skb->protocol = key->eth.type;
- err = key_extract(skb, key);
- if (err)
- return err;
-
- /* Check that we have conntrack original direction tuple metadata only
- * for packets for which it makes sense. Otherwise the key may be
- * corrupted due to overlapping key fields.
- */
- if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) &&
- key->eth.type != htons(ETH_P_IP))
- return -EINVAL;
- if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) &&
- (key->eth.type != htons(ETH_P_IPV6) ||
- sw_flow_key_is_nd(key)))
- return -EINVAL;
-
- return 0;
-}
diff --git a/datapath/flow.h b/datapath/flow.h
deleted file mode 100644
index 584d9f565..000000000
--- a/datapath/flow.h
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Copyright (c) 2007-2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#ifndef FLOW_H
-#define FLOW_H 1
-
-#include <linux/cache.h>
-#include <linux/kernel.h>
-#include <linux/netlink.h>
-#include <linux/openvswitch.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/rcupdate.h>
-#include <linux/if_ether.h>
-#include <linux/in6.h>
-#include <linux/jiffies.h>
-#include <linux/time.h>
-#include <linux/cpumask.h>
-#include <net/inet_ecn.h>
-#include <net/ip_tunnels.h>
-#include <net/dst_metadata.h>
-#include <net/nsh.h>
-
-struct sk_buff;
-
-enum sw_flow_mac_proto {
- MAC_PROTO_NONE = 0,
- MAC_PROTO_ETHERNET,
-};
-#define SW_FLOW_KEY_INVALID 0x80
-#define MPLS_LABEL_DEPTH 3
-
-/* Store options at the end of the array if they are less than the
- * maximum size. This allows us to get the benefits of variable length
- * matching for small options.
- */
-#define TUN_METADATA_OFFSET(opt_len) \
- (sizeof_field(struct sw_flow_key, tun_opts) - opt_len)
-#define TUN_METADATA_OPTS(flow_key, opt_len) \
- ((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
-
-struct ovs_tunnel_info {
- struct metadata_dst *tun_dst;
-};
-
-struct vlan_head {
- __be16 tpid; /* Vlan type. Generally 802.1q or 802.1ad.*/
- __be16 tci; /* 0 if no VLAN, VLAN_CFI_MASK set otherwise. */
-};
-
-#define OVS_SW_FLOW_KEY_METADATA_SIZE \
- (offsetof(struct sw_flow_key, recirc_id) + \
- sizeof_field(struct sw_flow_key, recirc_id))
-
-struct ovs_key_nsh {
- struct ovs_nsh_key_base base;
- __be32 context[NSH_MD1_CONTEXT_SIZE];
-};
-
-struct sw_flow_key {
- u8 tun_opts[255];
- u8 tun_opts_len;
- struct ip_tunnel_key tun_key; /* Encapsulating tunnel key. */
- struct {
- u32 priority; /* Packet QoS priority. */
- u32 skb_mark; /* SKB mark. */
- u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
- } __packed phy; /* Safe when right after 'tun_key'. */
- u8 mac_proto; /* MAC layer protocol (e.g. Ethernet). */
- u8 tun_proto; /* Protocol of encapsulating tunnel. */
- u32 ovs_flow_hash; /* Datapath computed hash value. */
- u32 recirc_id; /* Recirculation ID. */
- struct {
- u8 src[ETH_ALEN]; /* Ethernet source address. */
- u8 dst[ETH_ALEN]; /* Ethernet destination address. */
- struct vlan_head vlan;
- struct vlan_head cvlan;
- __be16 type; /* Ethernet frame type. */
- } eth;
- /* Filling a hole of two bytes. */
- u8 ct_state;
- u8 ct_orig_proto; /* CT original direction tuple IP
- * protocol.
- */
- union {
- struct {
- u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
- u8 tos; /* IP ToS. */
- u8 ttl; /* IP TTL/hop limit. */
- u8 frag; /* One of OVS_FRAG_TYPE_*. */
- } ip;
- };
- u16 ct_zone; /* Conntrack zone. */
- struct {
- __be16 src; /* TCP/UDP/SCTP source port. */
- __be16 dst; /* TCP/UDP/SCTP destination port. */
- __be16 flags; /* TCP flags. */
- } tp;
- union {
- struct {
- struct {
- __be32 src; /* IP source address. */
- __be32 dst; /* IP destination address. */
- } addr;
- union {
- struct {
- __be32 src;
- __be32 dst;
- } ct_orig; /* Conntrack original direction fields. */
- struct {
- u8 sha[ETH_ALEN]; /* ARP source hardware address. */
- u8 tha[ETH_ALEN]; /* ARP target hardware address. */
- } arp;
- };
- } ipv4;
- struct {
- struct {
- struct in6_addr src; /* IPv6 source address. */
- struct in6_addr dst; /* IPv6 destination address. */
- } addr;
- __be32 label; /* IPv6 flow label. */
- union {
- struct {
- struct in6_addr src;
- struct in6_addr dst;
- } ct_orig; /* Conntrack original direction fields. */
- struct {
- struct in6_addr target; /* ND target address. */
- u8 sll[ETH_ALEN]; /* ND source link layer address. */
- u8 tll[ETH_ALEN]; /* ND target link layer address. */
- } nd;
- };
- } ipv6;
- struct {
- u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */
- __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */
- } mpls;
- struct ovs_key_nsh nsh; /* network service header */
- };
- struct {
- /* Connection tracking fields not packed above. */
- struct {
- __be16 src; /* CT orig tuple tp src port. */
- __be16 dst; /* CT orig tuple tp dst port. */
- } orig_tp;
- u32 mark;
- struct ovs_key_ct_labels labels;
- } ct;
-
-} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
-
-static inline bool sw_flow_key_is_nd(const struct sw_flow_key *key)
-{
- return key->eth.type == htons(ETH_P_IPV6) &&
- key->ip.proto == NEXTHDR_ICMP &&
- key->tp.dst == 0 &&
- (key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
- key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT));
-}
-
-struct sw_flow_key_range {
- unsigned short int start;
- unsigned short int end;
-};
-
-struct sw_flow_mask {
- int ref_count;
- struct rcu_head rcu;
- struct sw_flow_key_range range;
- struct sw_flow_key key;
-};
-
-struct sw_flow_match {
- struct sw_flow_key *key;
- struct sw_flow_key_range range;
- struct sw_flow_mask *mask;
-};
-
-#define MAX_UFID_LENGTH 16 /* 128 bits */
-
-struct sw_flow_id {
- u32 ufid_len;
- union {
- u32 ufid[MAX_UFID_LENGTH / 4];
- struct sw_flow_key *unmasked_key;
- };
-};
-
-struct sw_flow_actions {
- struct rcu_head rcu;
- size_t orig_len; /* From flow_cmd_new netlink actions size */
- u32 actions_len;
- struct nlattr actions[];
-};
-
-struct sw_flow_stats {
- u64 packet_count; /* Number of packets matched. */
- u64 byte_count; /* Number of bytes matched. */
- unsigned long used; /* Last used time (in jiffies). */
- spinlock_t lock; /* Lock for atomic stats update. */
- __be16 tcp_flags; /* Union of seen TCP flags. */
-};
-
-struct sw_flow {
- struct rcu_head rcu;
- struct {
- struct hlist_node node[2];
- u32 hash;
- } flow_table, ufid_table;
- int stats_last_writer; /* CPU id of the last writer on
- * 'stats[0]'.
- */
- struct sw_flow_key key;
- struct sw_flow_id id;
- struct cpumask cpu_used_mask;
- struct sw_flow_mask *mask;
- struct sw_flow_actions __rcu *sf_acts;
- struct sw_flow_stats __rcu *stats[]; /* One for each CPU. First one
- * is allocated at flow creation time,
- * the rest are allocated on demand
- * while holding the 'stats[0].lock'.
- */
-};
-
-struct arp_eth_header {
- __be16 ar_hrd; /* format of hardware address */
- __be16 ar_pro; /* format of protocol address */
- unsigned char ar_hln; /* length of hardware address */
- unsigned char ar_pln; /* length of protocol address */
- __be16 ar_op; /* ARP opcode (command) */
-
- /* Ethernet+IPv4 specific members. */
- unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */
- unsigned char ar_sip[4]; /* sender IP address */
- unsigned char ar_tha[ETH_ALEN]; /* target hardware address */
- unsigned char ar_tip[4]; /* target IP address */
-} __packed;
-
-static inline u8 ovs_key_mac_proto(const struct sw_flow_key *key)
-{
- return key->mac_proto & ~SW_FLOW_KEY_INVALID;
-}
-
-static inline u16 __ovs_mac_header_len(u8 mac_proto)
-{
- return mac_proto == MAC_PROTO_ETHERNET ? ETH_HLEN : 0;
-}
-
-static inline u16 ovs_mac_header_len(const struct sw_flow_key *key)
-{
- return __ovs_mac_header_len(ovs_key_mac_proto(key));
-}
-
-static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
-{
- return sfid->ufid_len;
-}
-
-static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid)
-{
- return !ovs_identifier_is_ufid(sfid);
-}
-
-void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
- const struct sk_buff *);
-void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
- unsigned long *used, __be16 *tcp_flags);
-void ovs_flow_stats_clear(struct sw_flow *);
-u64 ovs_flow_used_time(unsigned long flow_jiffies);
-
-/* Update the non-metadata part of the flow key using skb. */
-int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
-int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key);
-int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
- struct sk_buff *skb,
- struct sw_flow_key *key);
-/* Extract key from packet coming from userspace. */
-int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
- struct sk_buff *skb,
- struct sw_flow_key *key, bool log);
-
-#endif /* flow.h */
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
deleted file mode 100644
index caed44386..000000000
--- a/datapath/flow_netlink.c
+++ /dev/null
@@ -1,3519 +0,0 @@
-/*
- * Copyright (c) 2007-2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/uaccess.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_ether.h>
-#include <linux/if_vlan.h>
-#include <net/llc_pdu.h>
-#include <linux/kernel.h>
-#include <linux/jhash.h>
-#include <linux/jiffies.h>
-#include <linux/llc.h>
-#include <linux/module.h>
-#include <linux/in.h>
-#include <linux/rcupdate.h>
-#include <linux/if_arp.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/sctp.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <linux/icmpv6.h>
-#include <linux/rculist.h>
-#include <net/geneve.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
-#include <net/ndisc.h>
-#include <net/mpls.h>
-#include <net/vxlan.h>
-#include <net/tun_proto.h>
-#include <net/erspan.h>
-
-#include "datapath.h"
-#include "conntrack.h"
-#include "flow.h"
-#include "flow_netlink.h"
-#include "gso.h"
-
-struct ovs_len_tbl {
- int len;
- const struct ovs_len_tbl *next;
-};
-
-#define OVS_ATTR_NESTED -1
-#define OVS_ATTR_VARIABLE -2
-
-static bool actions_may_change_flow(const struct nlattr *actions)
-{
- struct nlattr *nla;
- int rem;
-
- nla_for_each_nested(nla, actions, rem) {
- u16 action = nla_type(nla);
-
- switch (action) {
- case OVS_ACTION_ATTR_OUTPUT:
- case OVS_ACTION_ATTR_RECIRC:
- case OVS_ACTION_ATTR_TRUNC:
- case OVS_ACTION_ATTR_USERSPACE:
- break;
-
- case OVS_ACTION_ATTR_CT:
- case OVS_ACTION_ATTR_CT_CLEAR:
- case OVS_ACTION_ATTR_HASH:
- case OVS_ACTION_ATTR_POP_ETH:
- case OVS_ACTION_ATTR_POP_MPLS:
- case OVS_ACTION_ATTR_POP_NSH:
- case OVS_ACTION_ATTR_POP_VLAN:
- case OVS_ACTION_ATTR_PUSH_ETH:
- case OVS_ACTION_ATTR_PUSH_MPLS:
- case OVS_ACTION_ATTR_PUSH_NSH:
- case OVS_ACTION_ATTR_PUSH_VLAN:
- case OVS_ACTION_ATTR_SAMPLE:
- case OVS_ACTION_ATTR_SET:
- case OVS_ACTION_ATTR_SET_MASKED:
- case OVS_ACTION_ATTR_METER:
- case OVS_ACTION_ATTR_CHECK_PKT_LEN:
- default:
- return true;
- }
- }
- return false;
-}
-
-static void update_range(struct sw_flow_match *match,
- size_t offset, size_t size, bool is_mask)
-{
- struct sw_flow_key_range *range;
- size_t start = rounddown(offset, sizeof(long));
- size_t end = roundup(offset + size, sizeof(long));
-
- if (!is_mask)
- range = &match->range;
- else
- range = &match->mask->range;
-
- if (range->start == range->end) {
- range->start = start;
- range->end = end;
- return;
- }
-
- if (range->start > start)
- range->start = start;
-
- if (range->end < end)
- range->end = end;
-}
-
-#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
- do { \
- update_range(match, offsetof(struct sw_flow_key, field), \
- sizeof((match)->key->field), is_mask); \
- if (is_mask) \
- (match)->mask->key.field = value; \
- else \
- (match)->key->field = value; \
- } while (0)
-
-#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
- do { \
- update_range(match, offset, len, is_mask); \
- if (is_mask) \
- memcpy((u8 *)&(match)->mask->key + offset, value_p, len);\
- else \
- memcpy((u8 *)(match)->key + offset, value_p, len); \
- } while (0)
-
-#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
- SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
- value_p, len, is_mask)
-
-#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \
- do { \
- update_range(match, offsetof(struct sw_flow_key, field), \
- sizeof((match)->key->field), is_mask); \
- if (is_mask) \
- memset((u8 *)&(match)->mask->key.field, value, \
- sizeof((match)->mask->key.field)); \
- else \
- memset((u8 *)&(match)->key->field, value, \
- sizeof((match)->key->field)); \
- } while (0)
-
-static bool match_validate(const struct sw_flow_match *match,
- u64 key_attrs, u64 mask_attrs, bool log)
-{
- u64 key_expected = 0;
- u64 mask_allowed = key_attrs; /* At most allow all key attributes */
-
- /* The following mask attributes allowed only if they
- * pass the validation tests.
- */
- mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4)
- | (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
- | (1ULL << OVS_KEY_ATTR_IPV6)
- | (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
- | (1ULL << OVS_KEY_ATTR_TCP)
- | (1ULL << OVS_KEY_ATTR_TCP_FLAGS)
- | (1ULL << OVS_KEY_ATTR_UDP)
- | (1ULL << OVS_KEY_ATTR_SCTP)
- | (1ULL << OVS_KEY_ATTR_ICMP)
- | (1ULL << OVS_KEY_ATTR_ICMPV6)
- | (1ULL << OVS_KEY_ATTR_ARP)
- | (1ULL << OVS_KEY_ATTR_ND)
- | (1ULL << OVS_KEY_ATTR_MPLS)
- | (1ULL << OVS_KEY_ATTR_NSH));
-
- /* Always allowed mask fields. */
- mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
- | (1ULL << OVS_KEY_ATTR_IN_PORT)
- | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
-
- /* Check key attributes. */
- if (match->key->eth.type == htons(ETH_P_ARP)
- || match->key->eth.type == htons(ETH_P_RARP)) {
- key_expected |= 1ULL << OVS_KEY_ATTR_ARP;
- if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
- mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP;
- }
-
- if (eth_p_mpls(match->key->eth.type)) {
- key_expected |= 1ULL << OVS_KEY_ATTR_MPLS;
- if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
- mask_allowed |= 1ULL << OVS_KEY_ATTR_MPLS;
- }
-
- if (match->key->eth.type == htons(ETH_P_IP)) {
- key_expected |= 1ULL << OVS_KEY_ATTR_IPV4;
- if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
- mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4;
- mask_allowed |= 1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
- }
-
- if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
- if (match->key->ip.proto == IPPROTO_UDP) {
- key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
- }
-
- if (match->key->ip.proto == IPPROTO_SCTP) {
- key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
- }
-
- if (match->key->ip.proto == IPPROTO_TCP) {
- key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
- key_expected |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
- if (match->mask && (match->mask->key.ip.proto == 0xff)) {
- mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
- mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
- }
- }
-
- if (match->key->ip.proto == IPPROTO_ICMP) {
- key_expected |= 1ULL << OVS_KEY_ATTR_ICMP;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMP;
- }
- }
- }
-
- if (match->key->eth.type == htons(ETH_P_IPV6)) {
- key_expected |= 1ULL << OVS_KEY_ATTR_IPV6;
- if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
- mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6;
- mask_allowed |= 1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
- }
-
- if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
- if (match->key->ip.proto == IPPROTO_UDP) {
- key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
- }
-
- if (match->key->ip.proto == IPPROTO_SCTP) {
- key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
- }
-
- if (match->key->ip.proto == IPPROTO_TCP) {
- key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
- key_expected |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
- if (match->mask && (match->mask->key.ip.proto == 0xff)) {
- mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
- mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
- }
- }
-
- if (match->key->ip.proto == IPPROTO_ICMPV6) {
- key_expected |= 1ULL << OVS_KEY_ATTR_ICMPV6;
- if (match->mask && (match->mask->key.ip.proto == 0xff))
- mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMPV6;
-
- if (match->key->tp.src ==
- htons(NDISC_NEIGHBOUR_SOLICITATION) ||
- match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
- key_expected |= 1ULL << OVS_KEY_ATTR_ND;
- /* Original direction conntrack tuple
- * uses the same space as the ND fields
- * in the key, so both are not allowed
- * at the same time.
- */
- mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
- if (match->mask && (match->mask->key.tp.src == htons(0xff)))
- mask_allowed |= 1ULL << OVS_KEY_ATTR_ND;
- }
- }
- }
- }
-
- if (match->key->eth.type == htons(ETH_P_NSH)) {
- key_expected |= 1 << OVS_KEY_ATTR_NSH;
- if (match->mask &&
- match->mask->key.eth.type == htons(0xffff)) {
- mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
- }
- }
-
- if ((key_attrs & key_expected) != key_expected) {
- /* Key attributes check failed. */
- OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
- (unsigned long long)key_attrs,
- (unsigned long long)key_expected);
- return false;
- }
-
- if ((mask_attrs & mask_allowed) != mask_attrs) {
- /* Mask attributes check failed. */
- OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
- (unsigned long long)mask_attrs,
- (unsigned long long)mask_allowed);
- return false;
- }
-
- return true;
-}
-
-size_t ovs_tun_key_attr_size(void)
-{
- /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
- * updating this function.
- */
- return nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */
- + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
- + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
- + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
- + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
- + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
- + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
- + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
- + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
- /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and
- * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with
- * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
- */
- + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
- + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
-}
-
-static size_t ovs_nsh_key_attr_size(void)
-{
- /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
- * updating this function.
- */
- return nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
- /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
- * mutually exclusive, so the bigger one can cover
- * the small one.
- */
- + nla_total_size(NSH_CTX_HDRS_MAX_LEN);
-}
-
-size_t ovs_key_attr_size(void)
-{
- /* Whenever adding new OVS_KEY_ FIELDS, we should consider
- * updating this function.
- */
- BUILD_BUG_ON(OVS_KEY_ATTR_MAX != 31);
-
- return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
- + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
- + ovs_tun_key_attr_size()
- + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
- + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
- + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
- + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
- + nla_total_size(4) /* OVS_KEY_ATTR_CT_STATE */
- + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
- + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
- + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */
- + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
- + nla_total_size(0) /* OVS_KEY_ATTR_NSH */
- + ovs_nsh_key_attr_size()
- + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
- + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
- + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
- + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
- + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
- + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
- + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
- + nla_total_size(28); /* OVS_KEY_ATTR_ND */
-}
-
-static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
- [OVS_VXLAN_EXT_GBP] = { .len = sizeof(u32) },
-};
-
-static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
- [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) },
- [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) },
- [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) },
- [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 },
- [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 },
- [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
- [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 },
- [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) },
- [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) },
- [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 },
- [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE },
- [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED,
- .next = ovs_vxlan_ext_key_lens },
- [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
- [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
- [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = OVS_ATTR_VARIABLE },
-};
-
-static const struct ovs_len_tbl
-ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
- [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
- [OVS_NSH_KEY_ATTR_MD1] = { .len = sizeof(struct ovs_nsh_key_md1) },
- [OVS_NSH_KEY_ATTR_MD2] = { .len = OVS_ATTR_VARIABLE },
-};
-
-/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
-static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
- [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED },
- [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) },
- [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) },
- [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) },
- [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) },
- [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) },
- [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
- [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) },
- [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) },
- [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) },
- [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
- [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) },
- [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) },
- [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) },
- [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) },
- [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) },
- [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) },
- [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
- [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) },
- [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
- .next = ovs_tunnel_key_lens, },
- [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE },
- [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) },
- [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
- [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
- [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
- [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
- .len = sizeof(struct ovs_key_ct_tuple_ipv4) },
- [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
- .len = sizeof(struct ovs_key_ct_tuple_ipv6) },
- [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED,
- .next = ovs_nsh_key_attr_lens, },
-};
-
-static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
-{
- return expected_len == attr_len ||
- expected_len == OVS_ATTR_NESTED ||
- expected_len == OVS_ATTR_VARIABLE;
-}
-
-static bool is_all_zero(const u8 *fp, size_t size)
-{
- int i;
-
- if (!fp)
- return false;
-
- for (i = 0; i < size; i++)
- if (fp[i])
- return false;
-
- return true;
-}
-
-static int __parse_flow_nlattrs(const struct nlattr *attr,
- const struct nlattr *a[],
- u64 *attrsp, bool log, bool nz)
-{
- const struct nlattr *nla;
- u64 attrs;
- int rem;
-
- attrs = *attrsp;
- nla_for_each_nested(nla, attr, rem) {
- u16 type = nla_type(nla);
- int expected_len;
-
- if (type > OVS_KEY_ATTR_MAX) {
- OVS_NLERR(log, "Key type %d is out of range max %d",
- type, OVS_KEY_ATTR_MAX);
- return -EINVAL;
- }
-
- if (type == OVS_KEY_ATTR_PACKET_TYPE ||
- type == OVS_KEY_ATTR_ND_EXTENSIONS ||
- type == OVS_KEY_ATTR_TUNNEL_INFO) {
- OVS_NLERR(log, "Key type %d is not supported", type);
- return -EINVAL;
- }
-
- if (attrs & (1ULL << type)) {
- OVS_NLERR(log, "Duplicate key (type %d).", type);
- return -EINVAL;
- }
-
- expected_len = ovs_key_lens[type].len;
- if (!check_attr_len(nla_len(nla), expected_len)) {
- OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
- type, nla_len(nla), expected_len);
- return -EINVAL;
- }
-
- if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) {
- attrs |= 1ULL << type;
- a[type] = nla;
- }
- }
- if (rem) {
- OVS_NLERR(log, "Message has %d unknown bytes.", rem);
- return -EINVAL;
- }
-
- *attrsp = attrs;
- return 0;
-}
-
-static int parse_flow_mask_nlattrs(const struct nlattr *attr,
- const struct nlattr *a[], u64 *attrsp,
- bool log)
-{
- return __parse_flow_nlattrs(attr, a, attrsp, log, true);
-}
-
-int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
- u64 *attrsp, bool log)
-{
- return __parse_flow_nlattrs(attr, a, attrsp, log, false);
-}
-
-static int genev_tun_opt_from_nlattr(const struct nlattr *a,
- struct sw_flow_match *match, bool is_mask,
- bool log)
-{
- unsigned long opt_key_offset;
-
- if (nla_len(a) > sizeof(match->key->tun_opts)) {
- OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
- nla_len(a), sizeof(match->key->tun_opts));
- return -EINVAL;
- }
-
- if (nla_len(a) % 4 != 0) {
- OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
- nla_len(a));
- return -EINVAL;
- }
-
- /* We need to record the length of the options passed
- * down, otherwise packets with the same format but
- * additional options will be silently matched.
- */
- if (!is_mask) {
- SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
- false);
- } else {
- /* This is somewhat unusual because it looks at
- * both the key and mask while parsing the
- * attributes (and by extension assumes the key
- * is parsed first). Normally, we would verify
- * that each is the correct length and that the
- * attributes line up in the validate function.
- * However, that is difficult because this is
- * variable length and we won't have the
- * information later.
- */
- if (match->key->tun_opts_len != nla_len(a)) {
- OVS_NLERR(log, "Geneve option len %d != mask len %d",
- match->key->tun_opts_len, nla_len(a));
- return -EINVAL;
- }
-
- SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
- }
-
- opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
- SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
- nla_len(a), is_mask);
- return 0;
-}
-
-static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
- struct sw_flow_match *match, bool is_mask,
- bool log)
-{
- struct nlattr *a;
- int rem;
- unsigned long opt_key_offset;
- struct vxlan_metadata opts;
-
- BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
-
- memset(&opts, 0, sizeof(opts));
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
-
- if (type > OVS_VXLAN_EXT_MAX) {
- OVS_NLERR(log, "VXLAN extension %d out of range max %d",
- type, OVS_VXLAN_EXT_MAX);
- return -EINVAL;
- }
-
- if (!check_attr_len(nla_len(a),
- ovs_vxlan_ext_key_lens[type].len)) {
- OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d",
- type, nla_len(a),
- ovs_vxlan_ext_key_lens[type].len);
- return -EINVAL;
- }
-
- switch (type) {
- case OVS_VXLAN_EXT_GBP:
- opts.gbp = nla_get_u32(a);
- break;
- default:
- OVS_NLERR(log, "Unknown VXLAN extension attribute %d",
- type);
- return -EINVAL;
- }
- }
- if (rem) {
- OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.",
- rem);
- return -EINVAL;
- }
-
- if (!is_mask)
- SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
- else
- SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
-
- opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
- SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
- is_mask);
- return 0;
-}
-
-static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
- struct sw_flow_match *match, bool is_mask,
- bool log)
-{
- unsigned long opt_key_offset;
-
- BUILD_BUG_ON(sizeof(struct erspan_metadata) >
- sizeof(match->key->tun_opts));
-
- if (nla_len(a) > sizeof(match->key->tun_opts)) {
- OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).",
- nla_len(a), sizeof(match->key->tun_opts));
- return -EINVAL;
- }
-
- if (!is_mask)
- SW_FLOW_KEY_PUT(match, tun_opts_len,
- sizeof(struct erspan_metadata), false);
- else
- SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
-
- opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
- SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
- nla_len(a), is_mask);
- return 0;
-}
-
-static int ip_tun_from_nlattr(const struct nlattr *attr,
- struct sw_flow_match *match, bool is_mask,
- bool log)
-{
- bool ttl = false, ipv4 = false, ipv6 = false;
- __be16 tun_flags = 0;
- int opts_type = 0;
- struct nlattr *a;
- int rem;
-
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
- int err;
-
- if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
- OVS_NLERR(log, "Tunnel attr %d out of range max %d",
- type, OVS_TUNNEL_KEY_ATTR_MAX);
- return -EINVAL;
- }
-
- if (!check_attr_len(nla_len(a),
- ovs_tunnel_key_lens[type].len)) {
- OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
- type, nla_len(a), ovs_tunnel_key_lens[type].len);
- return -EINVAL;
- }
-
- switch (type) {
- case OVS_TUNNEL_KEY_ATTR_ID:
- SW_FLOW_KEY_PUT(match, tun_key.tun_id,
- nla_get_be64(a), is_mask);
- tun_flags |= TUNNEL_KEY;
- break;
- case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
- SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
- nla_get_in_addr(a), is_mask);
- ipv4 = true;
- break;
- case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
- SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
- nla_get_in_addr(a), is_mask);
- ipv4 = true;
- break;
- case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
- SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
- nla_get_in6_addr(a), is_mask);
- ipv6 = true;
- break;
- case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
- SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
- nla_get_in6_addr(a), is_mask);
- ipv6 = true;
- break;
- case OVS_TUNNEL_KEY_ATTR_TOS:
- SW_FLOW_KEY_PUT(match, tun_key.tos,
- nla_get_u8(a), is_mask);
- break;
- case OVS_TUNNEL_KEY_ATTR_TTL:
- SW_FLOW_KEY_PUT(match, tun_key.ttl,
- nla_get_u8(a), is_mask);
- ttl = true;
- break;
- case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
- tun_flags |= TUNNEL_DONT_FRAGMENT;
- break;
- case OVS_TUNNEL_KEY_ATTR_CSUM:
- tun_flags |= TUNNEL_CSUM;
- break;
- case OVS_TUNNEL_KEY_ATTR_TP_SRC:
- SW_FLOW_KEY_PUT(match, tun_key.tp_src,
- nla_get_be16(a), is_mask);
- break;
- case OVS_TUNNEL_KEY_ATTR_TP_DST:
- SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
- nla_get_be16(a), is_mask);
- break;
- case OVS_TUNNEL_KEY_ATTR_OAM:
- tun_flags |= TUNNEL_OAM;
- break;
- case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
- if (opts_type) {
- OVS_NLERR(log, "Multiple metadata blocks provided");
- return -EINVAL;
- }
-
- err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
- if (err)
- return err;
-
- tun_flags |= TUNNEL_GENEVE_OPT;
- opts_type = type;
- break;
- case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
- if (opts_type) {
- OVS_NLERR(log, "Multiple metadata blocks provided");
- return -EINVAL;
- }
-
- err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
- if (err)
- return err;
-
- tun_flags |= TUNNEL_VXLAN_OPT;
- opts_type = type;
- break;
- case OVS_TUNNEL_KEY_ATTR_PAD:
- break;
- case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
- if (opts_type) {
- OVS_NLERR(log, "Multiple metadata blocks provided");
- return -EINVAL;
- }
-
- err = erspan_tun_opt_from_nlattr(a, match, is_mask,
- log);
- if (err)
- return err;
-
- tun_flags |= TUNNEL_ERSPAN_OPT;
- opts_type = type;
- break;
- default:
- OVS_NLERR(log, "Unknown IP tunnel attribute %d",
- type);
- return -EINVAL;
- }
- }
-
- SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
- if (is_mask)
- SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
- else
- SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
- false);
-
- if (rem > 0) {
- OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
- rem);
- return -EINVAL;
- }
-
- if (ipv4 && ipv6) {
- OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
- return -EINVAL;
- }
-
- if (!is_mask) {
- if (!ipv4 && !ipv6) {
- OVS_NLERR(log, "IP tunnel dst address not specified");
- return -EINVAL;
- }
- if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
- OVS_NLERR(log, "IPv4 tunnel dst address is zero");
- return -EINVAL;
- }
- if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
- OVS_NLERR(log, "IPv6 tunnel dst address is zero");
- return -EINVAL;
- }
-
- if (!ttl) {
- OVS_NLERR(log, "IP tunnel TTL not specified.");
- return -EINVAL;
- }
- }
-
- return opts_type;
-}
-
-static int vxlan_opt_to_nlattr(struct sk_buff *skb,
- const void *tun_opts, int swkey_tun_opts_len)
-{
- const struct vxlan_metadata *opts = tun_opts;
- struct nlattr *nla;
-
- nla = nla_nest_start_noflag(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
- if (!nla)
- return -EMSGSIZE;
-
- if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
- return -EMSGSIZE;
-
- nla_nest_end(skb, nla);
- return 0;
-}
-
-static int __ip_tun_to_nlattr(struct sk_buff *skb,
- const struct ip_tunnel_key *output,
- const void *tun_opts, int swkey_tun_opts_len,
- unsigned short tun_proto)
-{
- if (output->tun_flags & TUNNEL_KEY &&
- nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
- OVS_TUNNEL_KEY_ATTR_PAD))
- return -EMSGSIZE;
- switch (tun_proto) {
- case AF_INET:
- if (output->u.ipv4.src &&
- nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
- output->u.ipv4.src))
- return -EMSGSIZE;
- if (output->u.ipv4.dst &&
- nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
- output->u.ipv4.dst))
- return -EMSGSIZE;
- break;
- case AF_INET6:
- if (!ipv6_addr_any(&output->u.ipv6.src) &&
- nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
- &output->u.ipv6.src))
- return -EMSGSIZE;
- if (!ipv6_addr_any(&output->u.ipv6.dst) &&
- nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
- &output->u.ipv6.dst))
- return -EMSGSIZE;
- break;
- }
- if (output->tos &&
- nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
- return -EMSGSIZE;
- if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
- return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
- nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
- return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_CSUM) &&
- nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
- return -EMSGSIZE;
- if (output->tp_src &&
- nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
- return -EMSGSIZE;
- if (output->tp_dst &&
- nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
- return -EMSGSIZE;
- if ((output->tun_flags & TUNNEL_OAM) &&
- nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
- return -EMSGSIZE;
- if (swkey_tun_opts_len) {
- if (output->tun_flags & TUNNEL_GENEVE_OPT &&
- nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
- swkey_tun_opts_len, tun_opts))
- return -EMSGSIZE;
- else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
- vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
- return -EMSGSIZE;
- else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
- nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
- swkey_tun_opts_len, tun_opts))
- return -EMSGSIZE;
- }
-
- return 0;
-}
-
-static int ip_tun_to_nlattr(struct sk_buff *skb,
- const struct ip_tunnel_key *output,
- const void *tun_opts, int swkey_tun_opts_len,
- unsigned short tun_proto)
-{
- struct nlattr *nla;
- int err;
-
- nla = nla_nest_start_noflag(skb, OVS_KEY_ATTR_TUNNEL);
- if (!nla)
- return -EMSGSIZE;
-
- err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
- tun_proto);
- if (err)
- return err;
-
- nla_nest_end(skb, nla);
- return 0;
-}
-
-int ovs_nla_put_tunnel_info(struct sk_buff *skb,
- struct ip_tunnel_info *tun_info)
-{
- return __ip_tun_to_nlattr(skb, &tun_info->key,
- ip_tunnel_info_opts(tun_info),
- tun_info->options_len,
- ip_tunnel_info_af(tun_info));
-}
-
-static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
- const struct nlattr *a[],
- bool is_mask, bool inner)
-{
- __be16 tci = 0;
- __be16 tpid = 0;
-
- if (a[OVS_KEY_ATTR_VLAN])
- tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-
- if (a[OVS_KEY_ATTR_ETHERTYPE])
- tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-
- if (likely(!inner)) {
- SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
- SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
- SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
- }
- return 0;
-}
-
-static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
- u64 key_attrs, bool inner,
- const struct nlattr **a, bool log)
-{
- __be16 tci = 0;
-
- if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
- (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
- eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
- /* Not a VLAN. */
- return 0;
- }
-
- if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
- (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
- OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
- return -EINVAL;
- }
-
- if (a[OVS_KEY_ATTR_VLAN])
- tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-
- if (!(tci & htons(VLAN_CFI_MASK))) {
- if (tci) {
- OVS_NLERR(log, "%s TCI does not have VLAN_CFI_MASK bit set.",
- (inner) ? "C-VLAN" : "VLAN");
- return -EINVAL;
- } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
- /* Corner case for truncated VLAN header. */
- OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
- (inner) ? "C-VLAN" : "VLAN");
- return -EINVAL;
- }
- }
-
- return 1;
-}
-
-static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
- u64 key_attrs, bool inner,
- const struct nlattr **a, bool log)
-{
- __be16 tci = 0;
- __be16 tpid = 0;
- bool encap_valid = !!(match->key->eth.vlan.tci &
- htons(VLAN_CFI_MASK));
- bool i_encap_valid = !!(match->key->eth.cvlan.tci &
- htons(VLAN_CFI_MASK));
-
- if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
- /* Not a VLAN. */
- return 0;
- }
-
- if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
- OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
- (inner) ? "C-VLAN" : "VLAN");
- return -EINVAL;
- }
-
- if (a[OVS_KEY_ATTR_VLAN])
- tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-
- if (a[OVS_KEY_ATTR_ETHERTYPE])
- tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-
- if (tpid != htons(0xffff)) {
- OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
- (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
- return -EINVAL;
- }
- if (!(tci & htons(VLAN_CFI_MASK))) {
- OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_CFI_MASK bit.",
- (inner) ? "C-VLAN" : "VLAN");
- return -EINVAL;
- }
-
- return 1;
-}
-
-static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
- u64 *key_attrs, bool inner,
- const struct nlattr **a, bool is_mask,
- bool log)
-{
- int err;
- const struct nlattr *encap;
-
- if (!is_mask)
- err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
- a, log);
- else
- err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
- a, log);
- if (err <= 0)
- return err;
-
- err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
- if (err)
- return err;
-
- *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
- *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
- *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
-
- encap = a[OVS_KEY_ATTR_ENCAP];
-
- if (!is_mask)
- err = parse_flow_nlattrs(encap, a, key_attrs, log);
- else
- err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
-
- return err;
-}
-
-static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
- u64 *key_attrs, const struct nlattr **a,
- bool is_mask, bool log)
-{
- int err;
- bool encap_valid = false;
-
- err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
- is_mask, log);
- if (err)
- return err;
-
- encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_CFI_MASK));
- if (encap_valid) {
- err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
- is_mask, log);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
- u64 *attrs, const struct nlattr **a,
- bool is_mask, bool log)
-{
- __be16 eth_type;
-
- eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
- if (is_mask) {
- /* Always exact match EtherType. */
- eth_type = htons(0xffff);
- } else if (!eth_proto_is_802_3(eth_type)) {
- OVS_NLERR(log, "EtherType %x is less than min %x",
- ntohs(eth_type), ETH_P_802_3_MIN);
- return -EINVAL;
- }
-
- SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
- *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
- return 0;
-}
-
-static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
- u64 *attrs, const struct nlattr **a,
- bool is_mask, bool log)
-{
- u8 mac_proto = MAC_PROTO_ETHERNET;
-
- if (*attrs & (1ULL << OVS_KEY_ATTR_DP_HASH)) {
- u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
-
- SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
- *attrs &= ~(1ULL << OVS_KEY_ATTR_DP_HASH);
- }
-
- if (*attrs & (1ULL << OVS_KEY_ATTR_RECIRC_ID)) {
- u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
-
- SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
- *attrs &= ~(1ULL << OVS_KEY_ATTR_RECIRC_ID);
- }
-
- if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) {
- SW_FLOW_KEY_PUT(match, phy.priority,
- nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
- *attrs &= ~(1ULL << OVS_KEY_ATTR_PRIORITY);
- }
-
- if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) {
- u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
-
- if (is_mask) {
- in_port = 0xffffffff; /* Always exact match in_port. */
- } else if (in_port >= DP_MAX_PORTS) {
- OVS_NLERR(log, "Port %d exceeds max allowable %d",
- in_port, DP_MAX_PORTS);
- return -EINVAL;
- }
-
- SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
- *attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT);
- } else if (!is_mask) {
- SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
- }
-
- if (*attrs & (1ULL << OVS_KEY_ATTR_SKB_MARK)) {
- uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
-
- SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
- *attrs &= ~(1ULL << OVS_KEY_ATTR_SKB_MARK);
- }
- if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
- if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
- is_mask, log) < 0)
- return -EINVAL;
- *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
- }
-
- if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
- ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
- u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
-
- if (ct_state & ~CT_SUPPORTED_MASK) {
- OVS_NLERR(log, "ct_state flags %08x unsupported",
- ct_state);
- return -EINVAL;
- }
-
- SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask);
- *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
- }
- if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
- ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
- u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
-
- SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask);
- *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
- }
- if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
- ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
- u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
-
- SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
- *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
- }
- if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
- ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
- const struct ovs_key_ct_labels *cl;
-
- cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
- SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
- sizeof(*cl), is_mask);
- *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
- }
- if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
- const struct ovs_key_ct_tuple_ipv4 *ct;
-
- ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
-
- SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
- SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
- SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
- SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask);
- *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
- }
- if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
- const struct ovs_key_ct_tuple_ipv6 *ct;
-
- ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
-
- SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
- sizeof(match->key->ipv6.ct_orig.src),
- is_mask);
- SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
- sizeof(match->key->ipv6.ct_orig.dst),
- is_mask);
- SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
- SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
- SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask);
- *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
- }
-
- /* For layer 3 packets the Ethernet type is provided
- * and treated as metadata but no MAC addresses are provided.
- */
- if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
- (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
- mac_proto = MAC_PROTO_NONE;
-
- /* Always exact match mac_proto */
- SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
-
- if (mac_proto == MAC_PROTO_NONE)
- return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
- log);
-
- return 0;
-}
-
-int nsh_hdr_from_nlattr(const struct nlattr *attr,
- struct nshhdr *nh, size_t size)
-{
- struct nlattr *a;
- int rem;
- u8 flags = 0;
- u8 ttl = 0;
- int mdlen = 0;
-
- /* validate_nsh has check this, so we needn't do duplicate check here
- */
- if (size < NSH_BASE_HDR_LEN)
- return -ENOBUFS;
-
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
-
- switch (type) {
- case OVS_NSH_KEY_ATTR_BASE: {
- const struct ovs_nsh_key_base *base = nla_data(a);
-
- flags = base->flags;
- ttl = base->ttl;
- nh->np = base->np;
- nh->mdtype = base->mdtype;
- nh->path_hdr = base->path_hdr;
- break;
- }
- case OVS_NSH_KEY_ATTR_MD1:
- mdlen = nla_len(a);
- if (mdlen > size - NSH_BASE_HDR_LEN)
- return -ENOBUFS;
- memcpy(&nh->md1, nla_data(a), mdlen);
- break;
-
- case OVS_NSH_KEY_ATTR_MD2:
- mdlen = nla_len(a);
- if (mdlen > size - NSH_BASE_HDR_LEN)
- return -ENOBUFS;
- memcpy(&nh->md2, nla_data(a), mdlen);
- break;
-
- default:
- return -EINVAL;
- }
- }
-
- /* nsh header length = NSH_BASE_HDR_LEN + mdlen */
- nh->ver_flags_ttl_len = 0;
- nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
-
- return 0;
-}
-
-int nsh_key_from_nlattr(const struct nlattr *attr,
- struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
-{
- struct nlattr *a;
- int rem;
-
- /* validate_nsh has check this, so we needn't do duplicate check here
- */
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
-
- switch (type) {
- case OVS_NSH_KEY_ATTR_BASE: {
- const struct ovs_nsh_key_base *base = nla_data(a);
- const struct ovs_nsh_key_base *base_mask = base + 1;
-
- nsh->base = *base;
- nsh_mask->base = *base_mask;
- break;
- }
- case OVS_NSH_KEY_ATTR_MD1: {
- const struct ovs_nsh_key_md1 *md1 = nla_data(a);
- const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
-
- memcpy(nsh->context, md1->context, sizeof(*md1));
- memcpy(nsh_mask->context, md1_mask->context,
- sizeof(*md1_mask));
- break;
- }
- case OVS_NSH_KEY_ATTR_MD2:
- /* Not supported yet */
- return -ENOTSUPP;
- default:
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-static int nsh_key_put_from_nlattr(const struct nlattr *attr,
- struct sw_flow_match *match, bool is_mask,
- bool is_push_nsh, bool log)
-{
- struct nlattr *a;
- int rem;
- bool has_base = false;
- bool has_md1 = false;
- bool has_md2 = false;
- u8 mdtype = 0;
- int mdlen = 0;
-
- if (WARN_ON(is_push_nsh && is_mask))
- return -EINVAL;
-
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
- int i;
-
- if (type > OVS_NSH_KEY_ATTR_MAX) {
- OVS_NLERR(log, "nsh attr %d is out of range max %d",
- type, OVS_NSH_KEY_ATTR_MAX);
- return -EINVAL;
- }
-
- if (!check_attr_len(nla_len(a),
- ovs_nsh_key_attr_lens[type].len)) {
- OVS_NLERR(
- log,
- "nsh attr %d has unexpected len %d expected %d",
- type,
- nla_len(a),
- ovs_nsh_key_attr_lens[type].len
- );
- return -EINVAL;
- }
-
- switch (type) {
- case OVS_NSH_KEY_ATTR_BASE: {
- const struct ovs_nsh_key_base *base = nla_data(a);
-
- has_base = true;
- mdtype = base->mdtype;
- SW_FLOW_KEY_PUT(match, nsh.base.flags,
- base->flags, is_mask);
- SW_FLOW_KEY_PUT(match, nsh.base.ttl,
- base->ttl, is_mask);
- SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
- base->mdtype, is_mask);
- SW_FLOW_KEY_PUT(match, nsh.base.np,
- base->np, is_mask);
- SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
- base->path_hdr, is_mask);
- break;
- }
- case OVS_NSH_KEY_ATTR_MD1: {
- const struct ovs_nsh_key_md1 *md1 = nla_data(a);
-
- has_md1 = true;
- for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
- SW_FLOW_KEY_PUT(match, nsh.context[i],
- md1->context[i], is_mask);
- break;
- }
- case OVS_NSH_KEY_ATTR_MD2:
- if (!is_push_nsh) /* Not supported MD type 2 yet */
- return -ENOTSUPP;
-
- has_md2 = true;
- mdlen = nla_len(a);
- if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
- OVS_NLERR(
- log,
- "Invalid MD length %d for MD type %d",
- mdlen,
- mdtype
- );
- return -EINVAL;
- }
- break;
- default:
- OVS_NLERR(log, "Unknown nsh attribute %d",
- type);
- return -EINVAL;
- }
- }
-
- if (rem > 0) {
- OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
- return -EINVAL;
- }
-
- if (has_md1 && has_md2) {
- OVS_NLERR(
- 1,
- "invalid nsh attribute: md1 and md2 are exclusive."
- );
- return -EINVAL;
- }
-
- if (!is_mask) {
- if ((has_md1 && mdtype != NSH_M_TYPE1) ||
- (has_md2 && mdtype != NSH_M_TYPE2)) {
- OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
- mdtype);
- return -EINVAL;
- }
-
- if (is_push_nsh &&
- (!has_base || (!has_md1 && !has_md2))) {
- OVS_NLERR(
- 1,
- "push_nsh: missing base or metadata attributes"
- );
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
- u64 attrs, const struct nlattr **a,
- bool is_mask, bool log)
-{
- int err;
-
- err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
- if (err)
- return err;
-
- if (attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) {
- const struct ovs_key_ethernet *eth_key;
-
- eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
- SW_FLOW_KEY_MEMCPY(match, eth.src,
- eth_key->eth_src, ETH_ALEN, is_mask);
- SW_FLOW_KEY_MEMCPY(match, eth.dst,
- eth_key->eth_dst, ETH_ALEN, is_mask);
- attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET);
-
- if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) {
- /* VLAN attribute is always parsed before getting here since it
- * may occur multiple times.
- */
- OVS_NLERR(log, "VLAN attribute unexpected.");
- return -EINVAL;
- }
-
- if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) {
- err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
- log);
- if (err)
- return err;
- } else if (!is_mask) {
- SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
- }
- } else if (!match->key->eth.type) {
- OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
- return -EINVAL;
- }
-
- if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- const struct ovs_key_ipv4 *ipv4_key;
-
- ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
- if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
- OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
- ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
- return -EINVAL;
- }
- SW_FLOW_KEY_PUT(match, ip.proto,
- ipv4_key->ipv4_proto, is_mask);
- SW_FLOW_KEY_PUT(match, ip.tos,
- ipv4_key->ipv4_tos, is_mask);
- SW_FLOW_KEY_PUT(match, ip.ttl,
- ipv4_key->ipv4_ttl, is_mask);
- SW_FLOW_KEY_PUT(match, ip.frag,
- ipv4_key->ipv4_frag, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.addr.src,
- ipv4_key->ipv4_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
- ipv4_key->ipv4_dst, is_mask);
- attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
- }
-
- if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
- const struct ovs_key_ipv6 *ipv6_key;
-
- ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
- if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
- OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
- ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
- return -EINVAL;
- }
-
- if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
- OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)",
- ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
- return -EINVAL;
- }
-
- SW_FLOW_KEY_PUT(match, ipv6.label,
- ipv6_key->ipv6_label, is_mask);
- SW_FLOW_KEY_PUT(match, ip.proto,
- ipv6_key->ipv6_proto, is_mask);
- SW_FLOW_KEY_PUT(match, ip.tos,
- ipv6_key->ipv6_tclass, is_mask);
- SW_FLOW_KEY_PUT(match, ip.ttl,
- ipv6_key->ipv6_hlimit, is_mask);
- SW_FLOW_KEY_PUT(match, ip.frag,
- ipv6_key->ipv6_frag, is_mask);
- SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
- ipv6_key->ipv6_src,
- sizeof(match->key->ipv6.addr.src),
- is_mask);
- SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
- ipv6_key->ipv6_dst,
- sizeof(match->key->ipv6.addr.dst),
- is_mask);
-
- attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6);
- }
-
- if (attrs & (1ULL << OVS_KEY_ATTR_ARP)) {
- const struct ovs_key_arp *arp_key;
-
- arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
- if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
- OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
- arp_key->arp_op);
- return -EINVAL;
- }
-
- SW_FLOW_KEY_PUT(match, ipv4.addr.src,
- arp_key->arp_sip, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
- arp_key->arp_tip, is_mask);
- SW_FLOW_KEY_PUT(match, ip.proto,
- ntohs(arp_key->arp_op), is_mask);
- SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
- arp_key->arp_sha, ETH_ALEN, is_mask);
- SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
- arp_key->arp_tha, ETH_ALEN, is_mask);
-
- attrs &= ~(1ULL << OVS_KEY_ATTR_ARP);
- }
-
- if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
- if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
- is_mask, false, log) < 0)
- return -EINVAL;
- attrs &= ~(1 << OVS_KEY_ATTR_NSH);
- }
-
- if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) {
- const struct ovs_key_mpls *mpls_key;
- u32 hdr_len;
- u32 label_count, label_count_mask, i;
-
-
- mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
- hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
- label_count = hdr_len / sizeof(struct ovs_key_mpls);
-
- if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
- hdr_len % sizeof(struct ovs_key_mpls))
- return -EINVAL;
-
- label_count_mask = GENMASK(label_count - 1, 0);
-
- for (i = 0 ; i < label_count; i++)
- SW_FLOW_KEY_PUT(match, mpls.lse[i],
- mpls_key[i].mpls_lse, is_mask);
-
- SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
- label_count_mask, is_mask);
-
-
- attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS);
- }
-
- if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) {
- const struct ovs_key_tcp *tcp_key;
-
- tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
- SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
- SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
- attrs &= ~(1ULL << OVS_KEY_ATTR_TCP);
- }
-
- if (attrs & (1ULL << OVS_KEY_ATTR_TCP_FLAGS)) {
- SW_FLOW_KEY_PUT(match, tp.flags,
- nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
- is_mask);
- attrs &= ~(1ULL << OVS_KEY_ATTR_TCP_FLAGS);
- }
-
- if (attrs & (1ULL << OVS_KEY_ATTR_UDP)) {
- const struct ovs_key_udp *udp_key;
-
- udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
- SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
- SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
- attrs &= ~(1ULL << OVS_KEY_ATTR_UDP);
- }
-
- if (attrs & (1ULL << OVS_KEY_ATTR_SCTP)) {
- const struct ovs_key_sctp *sctp_key;
-
- sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
- SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
- SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
- attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP);
- }
-
- if (attrs & (1ULL << OVS_KEY_ATTR_ICMP)) {
- const struct ovs_key_icmp *icmp_key;
-
- icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
- SW_FLOW_KEY_PUT(match, tp.src,
- htons(icmp_key->icmp_type), is_mask);
- SW_FLOW_KEY_PUT(match, tp.dst,
- htons(icmp_key->icmp_code), is_mask);
- attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP);
- }
-
- if (attrs & (1ULL << OVS_KEY_ATTR_ICMPV6)) {
- const struct ovs_key_icmpv6 *icmpv6_key;
-
- icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
- SW_FLOW_KEY_PUT(match, tp.src,
- htons(icmpv6_key->icmpv6_type), is_mask);
- SW_FLOW_KEY_PUT(match, tp.dst,
- htons(icmpv6_key->icmpv6_code), is_mask);
- attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6);
- }
-
- if (attrs & (1ULL << OVS_KEY_ATTR_ND)) {
- const struct ovs_key_nd *nd_key;
-
- nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
- SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
- nd_key->nd_target,
- sizeof(match->key->ipv6.nd.target),
- is_mask);
- SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
- nd_key->nd_sll, ETH_ALEN, is_mask);
- SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
- nd_key->nd_tll, ETH_ALEN, is_mask);
- attrs &= ~(1ULL << OVS_KEY_ATTR_ND);
- }
-
- if (attrs != 0) {
- OVS_NLERR(log, "Unknown key attributes %llx",
- (unsigned long long)attrs);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static void nlattr_set(struct nlattr *attr, u8 val,
- const struct ovs_len_tbl *tbl)
-{
- struct nlattr *nla;
- int rem;
-
- /* The nlattr stream should already have been validated */
- nla_for_each_nested(nla, attr, rem) {
- if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
- nlattr_set(nla, val, tbl[nla_type(nla)].next ? : tbl);
- else
- memset(nla_data(nla), val, nla_len(nla));
-
- if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
- *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
- }
-}
-
-static void mask_set_nlattr(struct nlattr *attr, u8 val)
-{
- nlattr_set(attr, val, ovs_key_lens);
-}
-
-/**
- * ovs_nla_get_match - parses Netlink attributes into a flow key and
- * mask. In case the 'mask' is NULL, the flow is treated as exact match
- * flow. Otherwise, it is treated as a wildcarded flow, except the mask
- * does not include any don't care bit.
- * @net: Used to determine per-namespace field support.
- * @match: receives the extracted flow match information.
- * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
- * sequence. The fields should of the packet that triggered the creation
- * of this flow.
- * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
- * attribute specifies the mask field of the wildcarded flow.
- * @log: Boolean to allow kernel error logging. Normally true, but when
- * probing for feature compatibility this should be passed in as false to
- * suppress unnecessary error logging.
- */
-int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
- const struct nlattr *nla_key,
- const struct nlattr *nla_mask,
- bool log)
-{
- const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
- struct nlattr *newmask = NULL;
- u64 key_attrs = 0;
- u64 mask_attrs = 0;
- int err;
-
- err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
- if (err)
- return err;
-
- err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
- if (err)
- return err;
-
- err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
- if (err)
- return err;
-
- if (match->mask) {
- if (!nla_mask) {
- /* Create an exact match mask. We need to set to 0xff
- * all the 'match->mask' fields that have been touched
- * in 'match->key'. We cannot simply memset
- * 'match->mask', because padding bytes and fields not
- * specified in 'match->key' should be left to 0.
- * Instead, we use a stream of netlink attributes,
- * copied from 'key' and set to 0xff.
- * ovs_key_from_nlattrs() will take care of filling
- * 'match->mask' appropriately.
- */
- newmask = kmemdup(nla_key,
- nla_total_size(nla_len(nla_key)),
- GFP_KERNEL);
- if (!newmask)
- return -ENOMEM;
-
- mask_set_nlattr(newmask, 0xff);
-
- /* The userspace does not send tunnel attributes that
- * are 0, but we should not wildcard them nonetheless.
- */
- if (match->key->tun_proto)
- SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
- 0xff, true);
-
- nla_mask = newmask;
- }
-
- err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
- if (err)
- goto free_newmask;
-
- SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
- SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
-
- err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
- if (err)
- goto free_newmask;
-
- err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
- log);
- if (err)
- goto free_newmask;
- }
-
- if (!match_validate(match, key_attrs, mask_attrs, log))
- err = -EINVAL;
-
-free_newmask:
- kfree(newmask);
- return err;
-}
-
-static size_t get_ufid_len(const struct nlattr *attr, bool log)
-{
- size_t len;
-
- if (!attr)
- return 0;
-
- len = nla_len(attr);
- if (len < 1 || len > MAX_UFID_LENGTH) {
- OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
- nla_len(attr), MAX_UFID_LENGTH);
- return 0;
- }
-
- return len;
-}
-
-/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
- * or false otherwise.
- */
-bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
- bool log)
-{
- sfid->ufid_len = get_ufid_len(attr, log);
- if (sfid->ufid_len)
- memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
-
- return sfid->ufid_len;
-}
-
-int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
- const struct sw_flow_key *key, bool log)
-{
- struct sw_flow_key *new_key;
-
- if (ovs_nla_get_ufid(sfid, ufid, log))
- return 0;
-
- /* If UFID was not provided, use unmasked key. */
- new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
- if (!new_key)
- return -ENOMEM;
- memcpy(new_key, key, sizeof(*key));
- sfid->unmasked_key = new_key;
-
- return 0;
-}
-
-u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
-{
- return attr ? nla_get_u32(attr) : 0;
-}
-
-/**
- * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
- * @net: Network namespace.
- * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
- * metadata.
- * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
- * attributes.
- * @attrs: Bit mask for the netlink attributes included in @a.
- * @log: Boolean to allow kernel error logging. Normally true, but when
- * probing for feature compatibility this should be passed in as false to
- * suppress unnecessary error logging.
- *
- * This parses a series of Netlink attributes that form a flow key, which must
- * take the same form accepted by flow_from_nlattrs(), but only enough of it to
- * get the metadata, that is, the parts of the flow key that cannot be
- * extracted from the packet itself.
- *
- * This must be called before the packet key fields are filled in 'key'.
- */
-
-int ovs_nla_get_flow_metadata(struct net *net,
- const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
- u64 attrs, struct sw_flow_key *key, bool log)
-{
- struct sw_flow_match match;
-
- memset(&match, 0, sizeof(match));
- match.key = key;
-
- key->ct_state = 0;
- key->ct_zone = 0;
- key->ct_orig_proto = 0;
- memset(&key->ct, 0, sizeof(key->ct));
- memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
- memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
-
- key->phy.in_port = DP_MAX_PORTS;
-
- return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
-}
-
-static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
- bool is_mask)
-{
- __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
-
- if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
- nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
- return -EMSGSIZE;
- return 0;
-}
-
-static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
- struct sk_buff *skb)
-{
- struct nlattr *start;
-
- start = nla_nest_start_noflag(skb, OVS_KEY_ATTR_NSH);
- if (!start)
- return -EMSGSIZE;
-
- if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
- goto nla_put_failure;
-
- if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
- if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
- sizeof(nsh->context), nsh->context))
- goto nla_put_failure;
- }
-
- /* Don't support MD type 2 yet */
-
- nla_nest_end(skb, start);
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
- const struct sw_flow_key *output, bool is_mask,
- struct sk_buff *skb)
-{
- struct ovs_key_ethernet *eth_key;
- struct nlattr *nla;
- struct nlattr *encap = NULL;
- struct nlattr *in_encap = NULL;
-
- if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
- goto nla_put_failure;
-
- if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
- goto nla_put_failure;
-
- if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
- goto nla_put_failure;
-
- if ((swkey->tun_proto || is_mask)) {
- const void *opts = NULL;
-
- if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
- opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
-
- if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
- swkey->tun_opts_len, swkey->tun_proto))
- goto nla_put_failure;
- }
-
- if (swkey->phy.in_port == DP_MAX_PORTS) {
- if (is_mask && (output->phy.in_port == 0xffff))
- if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
- goto nla_put_failure;
- } else {
- u16 upper_u16;
- upper_u16 = !is_mask ? 0 : 0xffff;
-
- if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
- (upper_u16 << 16) | output->phy.in_port))
- goto nla_put_failure;
- }
-
- if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
- goto nla_put_failure;
-
- if (ovs_ct_put_key(swkey, output, skb))
- goto nla_put_failure;
-
- if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
- nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
- if (!nla)
- goto nla_put_failure;
-
- eth_key = nla_data(nla);
- ether_addr_copy(eth_key->eth_src, output->eth.src);
- ether_addr_copy(eth_key->eth_dst, output->eth.dst);
-
- if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
- if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
- goto nla_put_failure;
- encap = nla_nest_start_noflag(skb, OVS_KEY_ATTR_ENCAP);
- if (!swkey->eth.vlan.tci)
- goto unencap;
-
- if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
- if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
- goto nla_put_failure;
- in_encap = nla_nest_start_noflag(skb,
- OVS_KEY_ATTR_ENCAP);
- if (!swkey->eth.cvlan.tci)
- goto unencap;
- }
- }
-
- if (swkey->eth.type == htons(ETH_P_802_2)) {
- /*
- * Ethertype 802.2 is represented in the netlink with omitted
- * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
- * 0xffff in the mask attribute. Ethertype can also
- * be wildcarded.
- */
- if (is_mask && output->eth.type)
- if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
- output->eth.type))
- goto nla_put_failure;
- goto unencap;
- }
- }
-
- if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
- goto nla_put_failure;
-
- if (eth_type_vlan(swkey->eth.type)) {
- /* There are 3 VLAN tags, we don't know anything about the rest
- * of the packet, so truncate here.
- */
- WARN_ON_ONCE(!(encap && in_encap));
- goto unencap;
- }
-
- if (swkey->eth.type == htons(ETH_P_IP)) {
- struct ovs_key_ipv4 *ipv4_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
- if (!nla)
- goto nla_put_failure;
- ipv4_key = nla_data(nla);
- ipv4_key->ipv4_src = output->ipv4.addr.src;
- ipv4_key->ipv4_dst = output->ipv4.addr.dst;
- ipv4_key->ipv4_proto = output->ip.proto;
- ipv4_key->ipv4_tos = output->ip.tos;
- ipv4_key->ipv4_ttl = output->ip.ttl;
- ipv4_key->ipv4_frag = output->ip.frag;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- struct ovs_key_ipv6 *ipv6_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
- if (!nla)
- goto nla_put_failure;
- ipv6_key = nla_data(nla);
- memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
- sizeof(ipv6_key->ipv6_src));
- memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
- sizeof(ipv6_key->ipv6_dst));
- ipv6_key->ipv6_label = output->ipv6.label;
- ipv6_key->ipv6_proto = output->ip.proto;
- ipv6_key->ipv6_tclass = output->ip.tos;
- ipv6_key->ipv6_hlimit = output->ip.ttl;
- ipv6_key->ipv6_frag = output->ip.frag;
- } else if (swkey->eth.type == htons(ETH_P_NSH)) {
- if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
- goto nla_put_failure;
- } else if (swkey->eth.type == htons(ETH_P_ARP) ||
- swkey->eth.type == htons(ETH_P_RARP)) {
- struct ovs_key_arp *arp_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
- if (!nla)
- goto nla_put_failure;
- arp_key = nla_data(nla);
- memset(arp_key, 0, sizeof(struct ovs_key_arp));
- arp_key->arp_sip = output->ipv4.addr.src;
- arp_key->arp_tip = output->ipv4.addr.dst;
- arp_key->arp_op = htons(output->ip.proto);
- ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
- ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
- } else if (eth_p_mpls(swkey->eth.type)) {
- u8 num_labels, i;
- struct ovs_key_mpls *mpls_key;
-
- num_labels = hweight_long(output->mpls.num_labels_mask);
- nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
- num_labels * sizeof(*mpls_key));
- if (!nla)
- goto nla_put_failure;
-
- mpls_key = nla_data(nla);
- for (i = 0; i < num_labels; i++)
- mpls_key[i].mpls_lse = output->mpls.lse[i];
- }
-
- if ((swkey->eth.type == htons(ETH_P_IP) ||
- swkey->eth.type == htons(ETH_P_IPV6)) &&
- swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
-
- if (swkey->ip.proto == IPPROTO_TCP) {
- struct ovs_key_tcp *tcp_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
- if (!nla)
- goto nla_put_failure;
- tcp_key = nla_data(nla);
- tcp_key->tcp_src = output->tp.src;
- tcp_key->tcp_dst = output->tp.dst;
- if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
- output->tp.flags))
- goto nla_put_failure;
- } else if (swkey->ip.proto == IPPROTO_UDP) {
- struct ovs_key_udp *udp_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
- if (!nla)
- goto nla_put_failure;
- udp_key = nla_data(nla);
- udp_key->udp_src = output->tp.src;
- udp_key->udp_dst = output->tp.dst;
- } else if (swkey->ip.proto == IPPROTO_SCTP) {
- struct ovs_key_sctp *sctp_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
- if (!nla)
- goto nla_put_failure;
- sctp_key = nla_data(nla);
- sctp_key->sctp_src = output->tp.src;
- sctp_key->sctp_dst = output->tp.dst;
- } else if (swkey->eth.type == htons(ETH_P_IP) &&
- swkey->ip.proto == IPPROTO_ICMP) {
- struct ovs_key_icmp *icmp_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
- if (!nla)
- goto nla_put_failure;
- icmp_key = nla_data(nla);
- icmp_key->icmp_type = ntohs(output->tp.src);
- icmp_key->icmp_code = ntohs(output->tp.dst);
- } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
- swkey->ip.proto == IPPROTO_ICMPV6) {
- struct ovs_key_icmpv6 *icmpv6_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
- sizeof(*icmpv6_key));
- if (!nla)
- goto nla_put_failure;
- icmpv6_key = nla_data(nla);
- icmpv6_key->icmpv6_type = ntohs(output->tp.src);
- icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
-
- if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
- icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
- struct ovs_key_nd *nd_key;
-
- nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
- if (!nla)
- goto nla_put_failure;
- nd_key = nla_data(nla);
- memcpy(nd_key->nd_target, &output->ipv6.nd.target,
- sizeof(nd_key->nd_target));
- ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
- ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
- }
- }
- }
-
-unencap:
- if (in_encap)
- nla_nest_end(skb, in_encap);
- if (encap)
- nla_nest_end(skb, encap);
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-int ovs_nla_put_key(const struct sw_flow_key *swkey,
- const struct sw_flow_key *output, int attr, bool is_mask,
- struct sk_buff *skb)
-{
- int err;
- struct nlattr *nla;
-
- nla = nla_nest_start_noflag(skb, attr);
- if (!nla)
- return -EMSGSIZE;
- err = __ovs_nla_put_key(swkey, output, is_mask, skb);
- if (err)
- return err;
- nla_nest_end(skb, nla);
-
- return 0;
-}
-
-/* Called with ovs_mutex or RCU read lock. */
-int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
-{
- if (ovs_identifier_is_ufid(&flow->id))
- return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
- flow->id.ufid);
-
- return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
- OVS_FLOW_ATTR_KEY, false, skb);
-}
-
-/* Called with ovs_mutex or RCU read lock. */
-int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
-{
- return ovs_nla_put_key(&flow->key, &flow->key,
- OVS_FLOW_ATTR_KEY, false, skb);
-}
-
-/* Called with ovs_mutex or RCU read lock. */
-int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
-{
- return ovs_nla_put_key(&flow->key, &flow->mask->key,
- OVS_FLOW_ATTR_MASK, true, skb);
-}
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,9,0)
-#define MAX_ACTIONS_BUFSIZE (16 * 1024)
-#else
-#define MAX_ACTIONS_BUFSIZE (32 * 1024)
-#endif
-
-static struct sw_flow_actions *nla_alloc_flow_actions(int size)
-{
- struct sw_flow_actions *sfa;
-
- WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
-
- sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
- if (!sfa)
- return ERR_PTR(-ENOMEM);
-
- sfa->actions_len = 0;
- return sfa;
-}
-
-static void ovs_nla_free_set_action(const struct nlattr *a)
-{
- const struct nlattr *ovs_key = nla_data(a);
- struct ovs_tunnel_info *ovs_tun;
-
- switch (nla_type(ovs_key)) {
- case OVS_KEY_ATTR_TUNNEL_INFO:
- ovs_tun = nla_data(ovs_key);
- ovs_dst_release((struct dst_entry *)ovs_tun->tun_dst);
- break;
- }
-}
-
-void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
-{
- const struct nlattr *a;
- int rem;
-
- if (!sf_acts)
- return;
-
- nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
- switch (nla_type(a)) {
- case OVS_ACTION_ATTR_SET:
- ovs_nla_free_set_action(a);
- break;
- case OVS_ACTION_ATTR_CT:
- ovs_ct_free_action(a);
- break;
- }
- }
-
- kfree(sf_acts);
-}
-
-static void __ovs_nla_free_flow_actions(struct rcu_head *head)
-{
- ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu));
-}
-
-/* Schedules 'sf_acts' to be freed after the next RCU grace period.
- * The caller must hold rcu_read_lock for this to be sensible. */
-void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts)
-{
- call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions);
-}
-
-static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
- int attr_len, bool log)
-{
-
- struct sw_flow_actions *acts;
- int new_acts_size;
- size_t req_size = NLA_ALIGN(attr_len);
- int next_offset = offsetof(struct sw_flow_actions, actions) +
- (*sfa)->actions_len;
-
- if (req_size <= (ksize(*sfa) - next_offset))
- goto out;
-
- new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
-
- if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
- if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
- OVS_NLERR(log, "Flow action size exceeds max %u",
- MAX_ACTIONS_BUFSIZE);
- return ERR_PTR(-EMSGSIZE);
- }
- new_acts_size = MAX_ACTIONS_BUFSIZE;
- }
-
- acts = nla_alloc_flow_actions(new_acts_size);
- if (IS_ERR(acts))
- return (void *)acts;
-
- memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
- acts->actions_len = (*sfa)->actions_len;
- acts->orig_len = (*sfa)->orig_len;
- kfree(*sfa);
- *sfa = acts;
-
-out:
- (*sfa)->actions_len += req_size;
- return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
-}
-
-static struct nlattr *__add_action(struct sw_flow_actions **sfa,
- int attrtype, void *data, int len, bool log)
-{
- struct nlattr *a;
-
- a = reserve_sfa_size(sfa, nla_attr_size(len), log);
- if (IS_ERR(a))
- return a;
-
- a->nla_type = attrtype;
- a->nla_len = nla_attr_size(len);
-
- if (data)
- memcpy(nla_data(a), data, len);
- memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
-
- return a;
-}
-
-int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
- int len, bool log)
-{
- struct nlattr *a;
-
- a = __add_action(sfa, attrtype, data, len, log);
-
- return PTR_ERR_OR_ZERO(a);
-}
-
-static inline int add_nested_action_start(struct sw_flow_actions **sfa,
- int attrtype, bool log)
-{
- int used = (*sfa)->actions_len;
- int err;
-
- err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
- if (err)
- return err;
-
- return used;
-}
-
-static inline void add_nested_action_end(struct sw_flow_actions *sfa,
- int st_offset)
-{
- struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
- st_offset);
-
- a->nla_len = sfa->actions_len - st_offset;
-}
-
-static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
- const struct sw_flow_key *key,
- struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log);
-
-static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
- const struct sw_flow_key *key,
- struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log, bool last)
-{
- const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
- const struct nlattr *probability, *actions;
- const struct nlattr *a;
- int rem, start, err;
- struct sample_arg arg;
-
- memset(attrs, 0, sizeof(attrs));
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
- if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
- return -EINVAL;
- attrs[type] = a;
- }
- if (rem)
- return -EINVAL;
-
- probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
- if (!probability || nla_len(probability) != sizeof(u32))
- return -EINVAL;
-
- actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
- if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
- return -EINVAL;
-
- /* validation done, copy sample action. */
- start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
- if (start < 0)
- return start;
-
- /* When both skb and flow may be changed, put the sample
- * into a deferred fifo. On the other hand, if only skb
- * may be modified, the actions can be executed in place.
- *
- * Do this analysis at the flow installation time.
- * Set 'clone_action->exec' to true if the actions can be
- * executed without being deferred.
- *
- * If the sample is the last action, it can always be excuted
- * rather than deferred.
- */
- arg.exec = last || !actions_may_change_flow(actions);
- arg.probability = nla_get_u32(probability);
-
- err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
- log);
- if (err)
- return err;
-
- err = __ovs_nla_copy_actions(net, actions, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
-
- if (err)
- return err;
-
- add_nested_action_end(*sfa, start);
-
- return 0;
-}
-
-static int validate_and_copy_clone(struct net *net,
- const struct nlattr *attr,
- const struct sw_flow_key *key,
- struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log, bool last)
-{
- int start, err;
- u32 exec;
-
- if (nla_len(attr) && nla_len(attr) < NLA_HDRLEN)
- return -EINVAL;
-
- start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CLONE, log);
- if (start < 0)
- return start;
-
- exec = last || !actions_may_change_flow(attr);
-
- err = ovs_nla_add_action(sfa, OVS_CLONE_ATTR_EXEC, &exec,
- sizeof(exec), log);
- if (err)
- return err;
-
- err = __ovs_nla_copy_actions(net, attr, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
- if (err)
- return err;
-
- add_nested_action_end(*sfa, start);
-
- return 0;
-}
-
-void ovs_match_init(struct sw_flow_match *match,
- struct sw_flow_key *key,
- bool reset_key,
- struct sw_flow_mask *mask)
-{
- memset(match, 0, sizeof(*match));
- match->key = key;
- match->mask = mask;
-
- if (reset_key)
- memset(key, 0, sizeof(*key));
-
- if (mask) {
- memset(&mask->key, 0, sizeof(mask->key));
- mask->range.start = mask->range.end = 0;
- }
-}
-
-static int validate_geneve_opts(struct sw_flow_key *key)
-{
- struct geneve_opt *option;
- int opts_len = key->tun_opts_len;
- bool crit_opt = false;
-
- option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
- while (opts_len > 0) {
- int len;
-
- if (opts_len < sizeof(*option))
- return -EINVAL;
-
- len = sizeof(*option) + option->length * 4;
- if (len > opts_len)
- return -EINVAL;
-
- crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
-
- option = (struct geneve_opt *)((u8 *)option + len);
- opts_len -= len;
- }
-
- key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
-
- return 0;
-}
-
-static int validate_and_copy_set_tun(const struct nlattr *attr,
- struct sw_flow_actions **sfa, bool log)
-{
- struct sw_flow_match match;
- struct sw_flow_key key;
- struct metadata_dst *tun_dst;
- struct ip_tunnel_info *tun_info;
- struct ovs_tunnel_info *ovs_tun;
- struct nlattr *a;
- int err = 0, start, opts_type;
- __be16 dst_opt_type;
-
- dst_opt_type = 0;
- ovs_match_init(&match, &key, true, NULL);
- opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
- if (opts_type < 0)
- return opts_type;
-
- if (key.tun_opts_len) {
- switch (opts_type) {
- case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
- err = validate_geneve_opts(&key);
- if (err < 0)
- return err;
- dst_opt_type = TUNNEL_GENEVE_OPT;
- break;
- case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
- dst_opt_type = TUNNEL_VXLAN_OPT;
- break;
- case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
- dst_opt_type = TUNNEL_ERSPAN_OPT;
- break;
- }
- }
-
- start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
- if (start < 0)
- return start;
-
- tun_dst = metadata_dst_alloc(key.tun_opts_len, METADATA_IP_TUNNEL,
- GFP_KERNEL);
-
- if (!tun_dst)
- return -ENOMEM;
-
- err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
- if (err) {
- dst_release((struct dst_entry *)tun_dst);
- return err;
- }
- a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
- sizeof(*ovs_tun), log);
- if (IS_ERR(a)) {
- ovs_dst_release((struct dst_entry *)tun_dst);
- return PTR_ERR(a);
- }
-
- ovs_tun = nla_data(a);
- ovs_tun->tun_dst = tun_dst;
-
- tun_info = &tun_dst->u.tun_info;
- tun_info->mode = IP_TUNNEL_INFO_TX;
- if (key.tun_proto == AF_INET6)
- tun_info->mode |= IP_TUNNEL_INFO_IPV6;
- tun_info->key = key.tun_key;
-
- /* We need to store the options in the action itself since
- * everything else will go away after flow setup. We can append
- * it to tun_info and then point there.
- */
- ip_tunnel_info_opts_set(tun_info,
- TUN_METADATA_OPTS(&key, key.tun_opts_len),
- key.tun_opts_len, dst_opt_type);
- add_nested_action_end(*sfa, start);
-
- return err;
-}
-
-static bool validate_nsh(const struct nlattr *attr, bool is_mask,
- bool is_push_nsh, bool log)
-{
- struct sw_flow_match match;
- struct sw_flow_key key;
- int ret = 0;
-
- ovs_match_init(&match, &key, true, NULL);
- ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
- is_push_nsh, log);
- return !ret;
-}
-
-/* Return false if there are any non-masked bits set.
- * Mask follows data immediately, before any netlink padding.
- */
-static bool validate_masked(u8 *data, int len)
-{
- u8 *mask = data + len;
-
- while (len--)
- if (*data++ & ~*mask++)
- return false;
-
- return true;
-}
-
-static int validate_set(const struct nlattr *a,
- const struct sw_flow_key *flow_key,
- struct sw_flow_actions **sfa, bool *skip_copy,
- u8 mac_proto, __be16 eth_type, bool masked, bool log)
-{
- const struct nlattr *ovs_key = nla_data(a);
- int key_type = nla_type(ovs_key);
- size_t key_len;
-
- /* There can be only one key in a action */
- if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
- return -EINVAL;
-
- key_len = nla_len(ovs_key);
- if (masked)
- key_len /= 2;
-
- if (key_type > OVS_KEY_ATTR_MAX ||
- !check_attr_len(key_len, ovs_key_lens[key_type].len))
- return -EINVAL;
-
- if (masked && !validate_masked(nla_data(ovs_key), key_len))
- return -EINVAL;
-
- switch (key_type) {
- case OVS_KEY_ATTR_PRIORITY:
- case OVS_KEY_ATTR_SKB_MARK:
- case OVS_KEY_ATTR_CT_MARK:
- case OVS_KEY_ATTR_CT_LABELS:
- break;
-
- case OVS_KEY_ATTR_ETHERNET:
- if (mac_proto != MAC_PROTO_ETHERNET)
- return -EINVAL;
- break;
-
- case OVS_KEY_ATTR_TUNNEL: {
- int err;
-
-#ifndef USE_UPSTREAM_TUNNEL
- if (eth_p_mpls(eth_type))
- return -EINVAL;
-#endif
- if (masked)
- return -EINVAL; /* Masked tunnel set not supported. */
-
- *skip_copy = true;
- err = validate_and_copy_set_tun(a, sfa, log);
- if (err)
- return err;
- break;
- }
- case OVS_KEY_ATTR_IPV4: {
- const struct ovs_key_ipv4 *ipv4_key;
-
- if (eth_type != htons(ETH_P_IP))
- return -EINVAL;
-
- ipv4_key = nla_data(ovs_key);
-
- if (masked) {
- const struct ovs_key_ipv4 *mask = ipv4_key + 1;
-
- /* Non-writeable fields. */
- if (mask->ipv4_proto || mask->ipv4_frag)
- return -EINVAL;
- } else {
- if (ipv4_key->ipv4_proto != flow_key->ip.proto)
- return -EINVAL;
-
- if (ipv4_key->ipv4_frag != flow_key->ip.frag)
- return -EINVAL;
- }
- break;
- }
- case OVS_KEY_ATTR_IPV6: {
- const struct ovs_key_ipv6 *ipv6_key;
-
- if (eth_type != htons(ETH_P_IPV6))
- return -EINVAL;
-
- ipv6_key = nla_data(ovs_key);
-
- if (masked) {
- const struct ovs_key_ipv6 *mask = ipv6_key + 1;
-
- /* Non-writeable fields. */
- if (mask->ipv6_proto || mask->ipv6_frag)
- return -EINVAL;
-
- /* Invalid bits in the flow label mask? */
- if (ntohl(mask->ipv6_label) & 0xFFF00000)
- return -EINVAL;
- } else {
- if (ipv6_key->ipv6_proto != flow_key->ip.proto)
- return -EINVAL;
-
- if (ipv6_key->ipv6_frag != flow_key->ip.frag)
- return -EINVAL;
- }
- if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
- return -EINVAL;
-
- break;
- }
- case OVS_KEY_ATTR_TCP:
- if ((eth_type != htons(ETH_P_IP) &&
- eth_type != htons(ETH_P_IPV6)) ||
- flow_key->ip.proto != IPPROTO_TCP)
- return -EINVAL;
-
- break;
-
- case OVS_KEY_ATTR_UDP:
- if ((eth_type != htons(ETH_P_IP) &&
- eth_type != htons(ETH_P_IPV6)) ||
- flow_key->ip.proto != IPPROTO_UDP)
- return -EINVAL;
-
- break;
-
- case OVS_KEY_ATTR_MPLS:
- if (!eth_p_mpls(eth_type))
- return -EINVAL;
- break;
-
- case OVS_KEY_ATTR_SCTP:
- if ((eth_type != htons(ETH_P_IP) &&
- eth_type != htons(ETH_P_IPV6)) ||
- flow_key->ip.proto != IPPROTO_SCTP)
- return -EINVAL;
-
- break;
-
- case OVS_KEY_ATTR_NSH:
- if (eth_type != htons(ETH_P_NSH))
- return -EINVAL;
- if (!validate_nsh(nla_data(a), masked, false, log))
- return -EINVAL;
- break;
-
- default:
- return -EINVAL;
- }
-
- /* Convert non-masked non-tunnel set actions to masked set actions. */
- if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
- int start, len = key_len * 2;
- struct nlattr *at;
-
- *skip_copy = true;
-
- start = add_nested_action_start(sfa,
- OVS_ACTION_ATTR_SET_TO_MASKED,
- log);
- if (start < 0)
- return start;
-
- at = __add_action(sfa, key_type, NULL, len, log);
- if (IS_ERR(at))
- return PTR_ERR(at);
-
- memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
- memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */
- /* Clear non-writeable bits from otherwise writeable fields. */
- if (key_type == OVS_KEY_ATTR_IPV6) {
- struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
-
- mask->ipv6_label &= htonl(0x000FFFFF);
- }
- add_nested_action_end(*sfa, start);
- }
-
- return 0;
-}
-
-static int validate_userspace(const struct nlattr *attr)
-{
- static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
- [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
- [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
- [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
- };
- struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
- int error;
-
- error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr,
- userspace_policy, NULL);
- if (error)
- return error;
-
- if (!a[OVS_USERSPACE_ATTR_PID] ||
- !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
- return -EINVAL;
-
- return 0;
-}
-
-static const struct nla_policy cpl_policy[OVS_CHECK_PKT_LEN_ATTR_MAX + 1] = {
- [OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] = {.type = NLA_U16 },
- [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] = {.type = NLA_NESTED },
- [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL] = {.type = NLA_NESTED },
-};
-
-static int validate_and_copy_check_pkt_len(struct net *net,
- const struct nlattr *attr,
- const struct sw_flow_key *key,
- struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count,
- bool log, bool last)
-{
- const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
- struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1];
- struct check_pkt_len_arg arg;
- int nested_acts_start;
- int start, err;
-
- err = nla_parse_deprecated_strict(a, OVS_CHECK_PKT_LEN_ATTR_MAX,
- nla_data(attr), nla_len(attr),
- cpl_policy, NULL);
- if (err)
- return err;
-
- if (!a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] ||
- !nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]))
- return -EINVAL;
-
- acts_if_lesser_eq = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL];
- acts_if_greater = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER];
-
- /* Both the nested action should be present. */
- if (!acts_if_greater || !acts_if_lesser_eq)
- return -EINVAL;
-
- /* validation done, copy the nested actions. */
- start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CHECK_PKT_LEN,
- log);
- if (start < 0)
- return start;
-
- arg.pkt_len = nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]);
- arg.exec_for_lesser_equal =
- last || !actions_may_change_flow(acts_if_lesser_eq);
- arg.exec_for_greater =
- last || !actions_may_change_flow(acts_if_greater);
-
- err = ovs_nla_add_action(sfa, OVS_CHECK_PKT_LEN_ATTR_ARG, &arg,
- sizeof(arg), log);
- if (err)
- return err;
-
- nested_acts_start = add_nested_action_start(sfa,
- OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, log);
- if (nested_acts_start < 0)
- return nested_acts_start;
-
- err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
-
- if (err)
- return err;
-
- add_nested_action_end(*sfa, nested_acts_start);
-
- nested_acts_start = add_nested_action_start(sfa,
- OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, log);
- if (nested_acts_start < 0)
- return nested_acts_start;
-
- err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
-
- if (err)
- return err;
-
- add_nested_action_end(*sfa, nested_acts_start);
- add_nested_action_end(*sfa, start);
- return 0;
-}
-
-static int copy_action(const struct nlattr *from,
- struct sw_flow_actions **sfa, bool log)
-{
- int totlen = NLA_ALIGN(from->nla_len);
- struct nlattr *to;
-
- to = reserve_sfa_size(sfa, from->nla_len, log);
- if (IS_ERR(to))
- return PTR_ERR(to);
-
- memcpy(to, from, totlen);
- return 0;
-}
-
-static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
- const struct sw_flow_key *key,
- struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log)
-{
- u8 mac_proto = ovs_key_mac_proto(key);
- const struct nlattr *a;
- int rem, err;
-
- nla_for_each_nested(a, attr, rem) {
- /* Expected argument lengths, (u32)-1 for variable length. */
- static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
- [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
- [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
- [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
- [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
- [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
- [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
- [OVS_ACTION_ATTR_POP_VLAN] = 0,
- [OVS_ACTION_ATTR_SET] = (u32)-1,
- [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
- [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
- [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
- [OVS_ACTION_ATTR_CT] = (u32)-1,
- [OVS_ACTION_ATTR_CT_CLEAR] = 0,
- [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
- [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
- [OVS_ACTION_ATTR_POP_ETH] = 0,
- [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
- [OVS_ACTION_ATTR_POP_NSH] = 0,
- [OVS_ACTION_ATTR_METER] = sizeof(u32),
- [OVS_ACTION_ATTR_CLONE] = (u32)-1,
- [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
- };
- const struct ovs_action_push_vlan *vlan;
- int type = nla_type(a);
- bool skip_copy;
-
- if (type > OVS_ACTION_ATTR_MAX ||
- (action_lens[type] != nla_len(a) &&
- action_lens[type] != (u32)-1))
- return -EINVAL;
-
- skip_copy = false;
- switch (type) {
- case OVS_ACTION_ATTR_UNSPEC:
- return -EINVAL;
-
- case OVS_ACTION_ATTR_USERSPACE:
- err = validate_userspace(a);
- if (err)
- return err;
- break;
-
- case OVS_ACTION_ATTR_OUTPUT:
- if (nla_get_u32(a) >= DP_MAX_PORTS)
- return -EINVAL;
- break;
-
- case OVS_ACTION_ATTR_TRUNC: {
- const struct ovs_action_trunc *trunc = nla_data(a);
-
- if (trunc->max_len < ETH_HLEN)
- return -EINVAL;
- break;
- }
-
- case OVS_ACTION_ATTR_HASH: {
- const struct ovs_action_hash *act_hash = nla_data(a);
-
- switch (act_hash->hash_alg) {
- case OVS_HASH_ALG_L4:
- break;
- default:
- return -EINVAL;
- }
-
- break;
- }
-
- case OVS_ACTION_ATTR_POP_VLAN:
- if (mac_proto != MAC_PROTO_ETHERNET)
- return -EINVAL;
- vlan_tci = htons(0);
- break;
-
- case OVS_ACTION_ATTR_PUSH_VLAN:
- if (mac_proto != MAC_PROTO_ETHERNET)
- return -EINVAL;
- vlan = nla_data(a);
- if (!eth_type_vlan(vlan->vlan_tpid))
- return -EINVAL;
- if (!(vlan->vlan_tci & htons(VLAN_CFI_MASK)))
- return -EINVAL;
- vlan_tci = vlan->vlan_tci;
- break;
-
- case OVS_ACTION_ATTR_RECIRC:
- break;
-
- case OVS_ACTION_ATTR_PUSH_MPLS: {
- const struct ovs_action_push_mpls *mpls = nla_data(a);
-
- if (!eth_p_mpls(mpls->mpls_ethertype))
- return -EINVAL;
- /* Prohibit push MPLS other than to a white list
- * for packets that have a known tag order.
- */
- if (vlan_tci & htons(VLAN_CFI_MASK) ||
- (eth_type != htons(ETH_P_IP) &&
- eth_type != htons(ETH_P_IPV6) &&
- eth_type != htons(ETH_P_ARP) &&
- eth_type != htons(ETH_P_RARP) &&
- !eth_p_mpls(eth_type)))
- return -EINVAL;
- eth_type = mpls->mpls_ethertype;
- mpls_label_count++;
- break;
- }
-
- case OVS_ACTION_ATTR_POP_MPLS: {
- __be16 proto;
- if (vlan_tci & htons(VLAN_CFI_MASK) ||
- !eth_p_mpls(eth_type))
- return -EINVAL;
-
- /* Disallow subsequent L2.5+ set actions and mpls_pop
- * actions once the last MPLS label in the packet is
- * popped as there is no check here to ensure that
- * the new eth type is valid and thus set actions could
- * write off the end of the packet or otherwise corrupt
- * it.
- *
- * Support for these actions is planned using packet
- * recirculation.
- */
- proto = nla_get_be16(a);
- mpls_label_count--;
-
- if (!eth_p_mpls(proto) || !mpls_label_count)
- eth_type = htons(0);
- else
- eth_type = proto;
- break;
- }
- case OVS_ACTION_ATTR_SET:
- err = validate_set(a, key, sfa,
- &skip_copy, mac_proto, eth_type,
- false, log);
- if (err)
- return err;
- break;
-
- case OVS_ACTION_ATTR_SET_MASKED:
- err = validate_set(a, key, sfa,
- &skip_copy, mac_proto, eth_type,
- true, log);
- if (err)
- return err;
- break;
-
- case OVS_ACTION_ATTR_SAMPLE: {
- bool last = nla_is_last(a, rem);
-
- err = validate_and_copy_sample(net, a, key, sfa,
- eth_type, vlan_tci,
- mpls_label_count,
- log, last);
- if (err)
- return err;
- skip_copy = true;
- break;
- }
-
- case OVS_ACTION_ATTR_CT:
- err = ovs_ct_copy_action(net, a, key, sfa, log);
- if (err)
- return err;
- skip_copy = true;
- break;
-
- case OVS_ACTION_ATTR_CT_CLEAR:
- break;
-
- case OVS_ACTION_ATTR_PUSH_ETH:
- /* Disallow pushing an Ethernet header if one
- * is already present */
- if (mac_proto != MAC_PROTO_NONE)
- return -EINVAL;
- mac_proto = MAC_PROTO_ETHERNET;
- break;
-
- case OVS_ACTION_ATTR_POP_ETH:
- if (mac_proto != MAC_PROTO_ETHERNET)
- return -EINVAL;
- if (vlan_tci & htons(VLAN_CFI_MASK))
- return -EINVAL;
- mac_proto = MAC_PROTO_NONE;
- break;
-
- case OVS_ACTION_ATTR_PUSH_NSH:
- if (mac_proto != MAC_PROTO_ETHERNET) {
- u8 next_proto;
-
- next_proto = tun_p_from_eth_p(eth_type);
- if (!next_proto)
- return -EINVAL;
- }
- mac_proto = MAC_PROTO_NONE;
- if (!validate_nsh(nla_data(a), false, true, true))
- return -EINVAL;
- break;
-
- case OVS_ACTION_ATTR_POP_NSH: {
- __be16 inner_proto;
-
- if (eth_type != htons(ETH_P_NSH))
- return -EINVAL;
- inner_proto = tun_p_to_eth_p(key->nsh.base.np);
- if (!inner_proto)
- return -EINVAL;
- if (key->nsh.base.np == TUN_P_ETHERNET)
- mac_proto = MAC_PROTO_ETHERNET;
- else
- mac_proto = MAC_PROTO_NONE;
- break;
- }
-
- case OVS_ACTION_ATTR_METER:
- /* Non-existent meters are simply ignored. */
- break;
-
- case OVS_ACTION_ATTR_CLONE: {
- bool last = nla_is_last(a, rem);
-
- err = validate_and_copy_clone(net, a, key, sfa,
- eth_type, vlan_tci,
- mpls_label_count,
- log, last);
- if (err)
- return err;
- skip_copy = true;
- break;
- }
-
- case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
- bool last = nla_is_last(a, rem);
-
- err = validate_and_copy_check_pkt_len(net, a, key, sfa,
- eth_type,
- vlan_tci, log,
- mpls_label_count,
- last);
- if (err)
- return err;
- skip_copy = true;
- break;
- }
-
- default:
- OVS_NLERR(log, "Unknown Action type %d", type);
- return -EINVAL;
- }
- if (!skip_copy) {
- err = copy_action(a, sfa, log);
- if (err)
- return err;
- }
- }
-
- if (rem > 0)
- return -EINVAL;
-
- return 0;
-}
-
-/* 'key' must be the masked key. */
-int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
- const struct sw_flow_key *key,
- struct sw_flow_actions **sfa, bool log)
-{
- int err;
- u32 mpls_label_count = 0;
-
- *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
- if (IS_ERR(*sfa))
- return PTR_ERR(*sfa);
-
- if (eth_p_mpls(key->eth.type))
- mpls_label_count = hweight_long(key->mpls.num_labels_mask);
-
- (*sfa)->orig_len = nla_len(attr);
- err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
- key->eth.vlan.tci, mpls_label_count, log);
- if (err)
- ovs_nla_free_flow_actions(*sfa);
-
- return err;
-}
-
-static int sample_action_to_attr(const struct nlattr *attr,
- struct sk_buff *skb)
-{
- struct nlattr *start, *ac_start = NULL, *sample_arg;
- int err = 0, rem = nla_len(attr);
- const struct sample_arg *arg;
- struct nlattr *actions;
-
- start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE);
- if (!start)
- return -EMSGSIZE;
-
- sample_arg = nla_data(attr);
- arg = nla_data(sample_arg);
- actions = nla_next(sample_arg, &rem);
-
- if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
- err = -EMSGSIZE;
- goto out;
- }
-
- ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
- if (!ac_start) {
- err = -EMSGSIZE;
- goto out;
- }
-
- err = ovs_nla_put_actions(actions, rem, skb);
-
-out:
- if (err) {
- nla_nest_cancel(skb, ac_start);
- nla_nest_cancel(skb, start);
- } else {
- nla_nest_end(skb, ac_start);
- nla_nest_end(skb, start);
- }
-
- return err;
-}
-
-static int clone_action_to_attr(const struct nlattr *attr,
- struct sk_buff *skb)
-{
- struct nlattr *start;
- int err = 0, rem = nla_len(attr);
-
- start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CLONE);
- if (!start)
- return -EMSGSIZE;
-
- err = ovs_nla_put_actions(nla_data(attr), rem, skb);
-
- if (err)
- nla_nest_cancel(skb, start);
- else
- nla_nest_end(skb, start);
-
- return err;
-}
-
-static int check_pkt_len_action_to_attr(const struct nlattr *attr,
- struct sk_buff *skb)
-{
- struct nlattr *start, *ac_start = NULL;
- const struct check_pkt_len_arg *arg;
- const struct nlattr *a, *cpl_arg;
- int err = 0, rem = nla_len(attr);
-
- start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CHECK_PKT_LEN);
- if (!start)
- return -EMSGSIZE;
-
- /* The first nested attribute in 'attr' is always
- * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
- */
- cpl_arg = nla_data(attr);
- arg = nla_data(cpl_arg);
-
- if (nla_put_u16(skb, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, arg->pkt_len)) {
- err = -EMSGSIZE;
- goto out;
- }
-
- /* Second nested attribute in 'attr' is always
- * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
- */
- a = nla_next(cpl_arg, &rem);
- ac_start = nla_nest_start_noflag(skb,
- OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL);
- if (!ac_start) {
- err = -EMSGSIZE;
- goto out;
- }
-
- err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
- if (err) {
- nla_nest_cancel(skb, ac_start);
- goto out;
- } else {
- nla_nest_end(skb, ac_start);
- }
-
- /* Third nested attribute in 'attr' is always
- * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER.
- */
- a = nla_next(a, &rem);
- ac_start = nla_nest_start_noflag(skb,
- OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER);
- if (!ac_start) {
- err = -EMSGSIZE;
- goto out;
- }
-
- err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
- if (err) {
- nla_nest_cancel(skb, ac_start);
- goto out;
- } else {
- nla_nest_end(skb, ac_start);
- }
-
- nla_nest_end(skb, start);
- return 0;
-
-out:
- nla_nest_cancel(skb, start);
- return err;
-}
-
-static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
-{
- const struct nlattr *ovs_key = nla_data(a);
- int key_type = nla_type(ovs_key);
- struct nlattr *start;
- int err;
-
- switch (key_type) {
- case OVS_KEY_ATTR_TUNNEL_INFO: {
- struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
- struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
-
- start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET);
- if (!start)
- return -EMSGSIZE;
-
- err = ip_tun_to_nlattr(skb, &tun_info->key,
- ip_tunnel_info_opts(tun_info),
- tun_info->options_len,
- ip_tunnel_info_af(tun_info));
- if (err)
- return err;
- nla_nest_end(skb, start);
- break;
- }
- default:
- if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
- return -EMSGSIZE;
- break;
- }
-
- return 0;
-}
-
-static int masked_set_action_to_set_action_attr(const struct nlattr *a,
- struct sk_buff *skb)
-{
- const struct nlattr *ovs_key = nla_data(a);
- struct nlattr *nla;
- size_t key_len = nla_len(ovs_key) / 2;
-
- /* Revert the conversion we did from a non-masked set action to
- * masked set action.
- */
- nla = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET);
- if (!nla)
- return -EMSGSIZE;
-
- if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
- return -EMSGSIZE;
-
- nla_nest_end(skb, nla);
- return 0;
-}
-
-int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
-{
- const struct nlattr *a;
- int rem, err;
-
- nla_for_each_attr(a, attr, len, rem) {
- int type = nla_type(a);
-
- switch (type) {
- case OVS_ACTION_ATTR_SET:
- err = set_action_to_attr(a, skb);
- if (err)
- return err;
- break;
-
- case OVS_ACTION_ATTR_SET_TO_MASKED:
- err = masked_set_action_to_set_action_attr(a, skb);
- if (err)
- return err;
- break;
-
- case OVS_ACTION_ATTR_SAMPLE:
- err = sample_action_to_attr(a, skb);
- if (err)
- return err;
- break;
-
- case OVS_ACTION_ATTR_CT:
- err = ovs_ct_action_to_attr(nla_data(a), skb);
- if (err)
- return err;
- break;
-
- case OVS_ACTION_ATTR_CLONE:
- err = clone_action_to_attr(a, skb);
- if (err)
- return err;
- break;
-
- case OVS_ACTION_ATTR_CHECK_PKT_LEN:
- err = check_pkt_len_action_to_attr(a, skb);
- if (err)
- return err;
- break;
-
- default:
- if (nla_put(skb, type, nla_len(a), nla_data(a)))
- return -EMSGSIZE;
- break;
- }
- }
-
- return 0;
-}
diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h
deleted file mode 100644
index e10df2b5c..000000000
--- a/datapath/flow_netlink.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2007-2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-
-#ifndef FLOW_NETLINK_H
-#define FLOW_NETLINK_H 1
-
-#include <linux/kernel.h>
-#include <linux/netlink.h>
-#include <linux/openvswitch.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/rcupdate.h>
-#include <linux/if_ether.h>
-#include <linux/in6.h>
-#include <linux/jiffies.h>
-#include <linux/time.h>
-
-#include <net/inet_ecn.h>
-#include <net/ip_tunnels.h>
-
-#include "flow.h"
-
-size_t ovs_tun_key_attr_size(void);
-size_t ovs_key_attr_size(void);
-
-void ovs_match_init(struct sw_flow_match *match,
- struct sw_flow_key *key, bool reset_key,
- struct sw_flow_mask *mask);
-
-int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
- int attr, bool is_mask, struct sk_buff *);
-int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
- u64 *attrsp, bool log);
-int ovs_nla_get_flow_metadata(struct net *net,
- const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
- u64 attrs, struct sw_flow_key *key, bool log);
-
-int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
-int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
-int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
-
-int ovs_nla_get_match(struct net *, struct sw_flow_match *,
- const struct nlattr *key, const struct nlattr *mask,
- bool log);
-int ovs_nla_put_tunnel_info(struct sk_buff *skb,
- struct ip_tunnel_info *tun_info);
-
-bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
-int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
- const struct sw_flow_key *key, bool log);
-u32 ovs_nla_get_ufid_flags(const struct nlattr *attr);
-
-int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
- const struct sw_flow_key *key,
- struct sw_flow_actions **sfa, bool log);
-int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype,
- void *data, int len, bool log);
-int ovs_nla_put_actions(const struct nlattr *attr,
- int len, struct sk_buff *skb);
-
-void ovs_nla_free_flow_actions(struct sw_flow_actions *);
-void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *);
-
-int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh,
- struct ovs_key_nsh *nsh_mask);
-int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh,
- size_t size);
-
-#endif /* flow_netlink.h */
diff --git a/datapath/flow_table.c b/datapath/flow_table.c
deleted file mode 100644
index 650338fb0..000000000
--- a/datapath/flow_table.c
+++ /dev/null
@@ -1,988 +0,0 @@
-/*
- * Copyright (c) 2007-2013 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#include "flow.h"
-#include "datapath.h"
-#include <linux/uaccess.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_ether.h>
-#include <linux/if_vlan.h>
-#include <net/llc_pdu.h>
-#include <linux/kernel.h>
-#include <linux/jhash.h>
-#include <linux/jiffies.h>
-#include <linux/llc.h>
-#include <linux/module.h>
-#include <linux/in.h>
-#include <linux/rcupdate.h>
-#include <linux/cpumask.h>
-#include <linux/if_arp.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/sctp.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <linux/icmpv6.h>
-#include <linux/rculist.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
-#include <net/ndisc.h>
-
-#include "flow_netlink.h"
-
-#define TBL_MIN_BUCKETS 1024
-#define MASK_ARRAY_SIZE_MIN 16
-#define REHASH_INTERVAL (10 * 60 * HZ)
-
-#define MC_HASH_SHIFT 8
-#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT)
-#define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
-
-static struct kmem_cache *flow_cache;
-struct kmem_cache *flow_stats_cache __read_mostly;
-
-static u16 range_n_bytes(const struct sw_flow_key_range *range)
-{
- return range->end - range->start;
-}
-
-void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
- bool full, const struct sw_flow_mask *mask)
-{
- int start = full ? 0 : mask->range.start;
- int len = full ? sizeof *dst : range_n_bytes(&mask->range);
- const long *m = (const long *)((const u8 *)&mask->key + start);
- const long *s = (const long *)((const u8 *)src + start);
- long *d = (long *)((u8 *)dst + start);
- int i;
-
- /* If 'full' is true then all of 'dst' is fully initialized. Otherwise,
- * if 'full' is false the memory outside of the 'mask->range' is left
- * uninitialized. This can be used as an optimization when further
- * operations on 'dst' only use contents within 'mask->range'.
- */
- for (i = 0; i < len; i += sizeof(long))
- *d++ = *s++ & *m++;
-}
-
-struct sw_flow *ovs_flow_alloc(void)
-{
- struct sw_flow *flow;
- struct sw_flow_stats *stats;
-
- flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
- if (!flow)
- return ERR_PTR(-ENOMEM);
-
- flow->stats_last_writer = -1;
-
- /* Initialize the default stat node. */
- stats = kmem_cache_alloc_node(flow_stats_cache,
- GFP_KERNEL | __GFP_ZERO,
- node_online(0) ? 0 : NUMA_NO_NODE);
- if (!stats)
- goto err;
-
- spin_lock_init(&stats->lock);
-
- RCU_INIT_POINTER(flow->stats[0], stats);
-
- cpumask_set_cpu(0, &flow->cpu_used_mask);
-
- return flow;
-err:
- kmem_cache_free(flow_cache, flow);
- return ERR_PTR(-ENOMEM);
-}
-
-int ovs_flow_tbl_count(const struct flow_table *table)
-{
- return table->count;
-}
-
-static void flow_free(struct sw_flow *flow)
-{
- int cpu;
-
- if (ovs_identifier_is_key(&flow->id))
- kfree(flow->id.unmasked_key);
- if (flow->sf_acts)
- ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
- /* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
- if (flow->stats[cpu])
- kmem_cache_free(flow_stats_cache,
- rcu_dereference_raw(flow->stats[cpu]));
- kmem_cache_free(flow_cache, flow);
-}
-
-static void rcu_free_flow_callback(struct rcu_head *rcu)
-{
- struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
-
- flow_free(flow);
-}
-
-void ovs_flow_free(struct sw_flow *flow, bool deferred)
-{
- if (!flow)
- return;
-
- if (deferred)
- call_rcu(&flow->rcu, rcu_free_flow_callback);
- else
- flow_free(flow);
-}
-
-static void __table_instance_destroy(struct table_instance *ti)
-{
- kvfree(ti->buckets);
- kfree(ti);
-}
-
-static struct table_instance *table_instance_alloc(int new_size)
-{
- struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
- int i;
-
- if (!ti)
- return NULL;
-
- ti->buckets = kvmalloc_array(new_size, sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!ti->buckets) {
- kfree(ti);
- return NULL;
- }
-
- for (i = 0; i < new_size; i++)
- INIT_HLIST_HEAD(&ti->buckets[i]);
-
- ti->n_buckets = new_size;
- ti->node_ver = 0;
- ti->keep_flows = false;
- get_random_bytes(&ti->hash_seed, sizeof(u32));
-
- return ti;
-}
-
-static void mask_array_rcu_cb(struct rcu_head *rcu)
-{
- struct mask_array *ma = container_of(rcu, struct mask_array, rcu);
-
- kfree(ma);
-}
-
-static struct mask_array *tbl_mask_array_alloc(int size)
-{
- struct mask_array *new;
-
- size = max(MASK_ARRAY_SIZE_MIN, size);
- new = kzalloc(sizeof(struct mask_array) +
- sizeof(struct sw_flow_mask *) * size, GFP_KERNEL);
- if (!new)
- return NULL;
-
- new->count = 0;
- new->max = size;
-
- return new;
-}
-
-static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
-{
- struct mask_array *old;
- struct mask_array *new;
-
- new = tbl_mask_array_alloc(size);
- if (!new)
- return -ENOMEM;
-
- old = ovsl_dereference(tbl->mask_array);
- if (old) {
- int i, count = 0;
-
- for (i = 0; i < old->max; i++) {
- if (ovsl_dereference(old->masks[i]))
- new->masks[count++] = old->masks[i];
- }
-
- new->count = count;
- }
- rcu_assign_pointer(tbl->mask_array, new);
-
- if (old)
- call_rcu(&old->rcu, mask_array_rcu_cb);
-
- return 0;
-}
-
-static int tbl_mask_array_add_mask(struct flow_table *tbl,
- struct sw_flow_mask *new)
-{
- struct mask_array *ma = ovsl_dereference(tbl->mask_array);
- int err, ma_count = READ_ONCE(ma->count);
-
- if (ma_count >= ma->max) {
- err = tbl_mask_array_realloc(tbl, ma->max +
- MASK_ARRAY_SIZE_MIN);
- if (err)
- return err;
-
- ma = ovsl_dereference(tbl->mask_array);
- }
-
- BUG_ON(ovsl_dereference(ma->masks[ma_count]));
-
- rcu_assign_pointer(ma->masks[ma_count], new);
- WRITE_ONCE(ma->count, ma_count +1);
-
- return 0;
-}
-
-static void tbl_mask_array_del_mask(struct flow_table *tbl,
- struct sw_flow_mask *mask)
-{
- struct mask_array *ma = ovsl_dereference(tbl->mask_array);
- int i, ma_count = READ_ONCE(ma->count);
-
- /* Remove the deleted mask pointers from the array */
- for (i = 0; i < ma_count; i++) {
- if (mask == ovsl_dereference(ma->masks[i]))
- goto found;
- }
-
- BUG();
- return;
-
-found:
- WRITE_ONCE(ma->count, ma_count -1);
-
- rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
- RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
-
- kfree_rcu(mask, rcu);
-
- /* Shrink the mask array if necessary. */
- if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
- ma_count <= (ma->max / 3))
- tbl_mask_array_realloc(tbl, ma->max / 2);
-}
-
-/* Remove 'mask' from the mask list, if it is not needed any more. */
-static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
-{
- if (mask) {
- /* ovs-lock is required to protect mask-refcount and
- * mask list.
- */
- ASSERT_OVSL();
- BUG_ON(!mask->ref_count);
- mask->ref_count--;
-
- if (!mask->ref_count)
- tbl_mask_array_del_mask(tbl, mask);
- }
-}
-
-int ovs_flow_tbl_init(struct flow_table *table)
-{
- struct table_instance *ti, *ufid_ti;
- struct mask_array *ma;
-
- table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
- MC_HASH_ENTRIES, __alignof__(struct mask_cache_entry));
- if (!table->mask_cache)
- return -ENOMEM;
-
- ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
- if (!ma)
- goto free_mask_cache;
-
- ti = table_instance_alloc(TBL_MIN_BUCKETS);
- if (!ti)
- goto free_mask_array;
-
- ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
- if (!ufid_ti)
- goto free_ti;
-
- rcu_assign_pointer(table->ti, ti);
- rcu_assign_pointer(table->ufid_ti, ufid_ti);
- rcu_assign_pointer(table->mask_array, ma);
- table->last_rehash = jiffies;
- table->count = 0;
- table->ufid_count = 0;
- return 0;
-
-free_ti:
- __table_instance_destroy(ti);
-free_mask_array:
- kfree(ma);
-free_mask_cache:
- free_percpu(table->mask_cache);
- return -ENOMEM;
-}
-
-static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
-{
- struct table_instance *ti = container_of(rcu, struct table_instance, rcu);
-
- __table_instance_destroy(ti);
-}
-
-static void table_instance_flow_free(struct flow_table *table,
- struct table_instance *ti,
- struct table_instance *ufid_ti,
- struct sw_flow *flow,
- bool count)
-{
- hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
- if (count)
- table->count--;
-
- if (ovs_identifier_is_ufid(&flow->id)) {
- hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
-
- if (count)
- table->ufid_count--;
- }
-
- flow_mask_remove(table, flow->mask);
-}
-
-static void table_instance_destroy(struct flow_table *table,
- struct table_instance *ti,
- struct table_instance *ufid_ti,
- bool deferred)
-{
- int i;
-
- if (!ti)
- return;
-
- BUG_ON(!ufid_ti);
- if (ti->keep_flows)
- goto skip_flows;
-
- for (i = 0; i < ti->n_buckets; i++) {
- struct sw_flow *flow;
- struct hlist_head *head = &ti->buckets[i];
- struct hlist_node *n;
-
- hlist_for_each_entry_safe(flow, n, head,
- flow_table.node[ti->node_ver]) {
-
- table_instance_flow_free(table, ti, ufid_ti,
- flow, false);
- ovs_flow_free(flow, deferred);
- }
- }
-
-skip_flows:
- if (deferred) {
- call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
- call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
- } else {
- __table_instance_destroy(ti);
- __table_instance_destroy(ufid_ti);
- }
-}
-
-/* No need for locking this function is called from RCU callback or
- * error path.
- */
-void ovs_flow_tbl_destroy(struct flow_table *table)
-{
- struct table_instance *ti = rcu_dereference_raw(table->ti);
- struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
-
- free_percpu(table->mask_cache);
- kfree(rcu_dereference_raw(table->mask_array));
- table_instance_destroy(table, ti, ufid_ti, false);
-}
-
-struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
- u32 *bucket, u32 *last)
-{
- struct sw_flow *flow;
- struct hlist_head *head;
- int ver;
- int i;
-
- ver = ti->node_ver;
- while (*bucket < ti->n_buckets) {
- i = 0;
- head = &ti->buckets[*bucket];
- hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) {
- if (i < *last) {
- i++;
- continue;
- }
- *last = i + 1;
- return flow;
- }
- (*bucket)++;
- *last = 0;
- }
-
- return NULL;
-}
-
-static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
-{
- hash = jhash_1word(hash, ti->hash_seed);
- return &ti->buckets[hash & (ti->n_buckets - 1)];
-}
-
-static void table_instance_insert(struct table_instance *ti,
- struct sw_flow *flow)
-{
- struct hlist_head *head;
-
- head = find_bucket(ti, flow->flow_table.hash);
- hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head);
-}
-
-static void ufid_table_instance_insert(struct table_instance *ti,
- struct sw_flow *flow)
-{
- struct hlist_head *head;
-
- head = find_bucket(ti, flow->ufid_table.hash);
- hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);
-}
-
-static void flow_table_copy_flows(struct table_instance *old,
- struct table_instance *new, bool ufid)
-{
- int old_ver;
- int i;
-
- old_ver = old->node_ver;
- new->node_ver = !old_ver;
-
- /* Insert in new table. */
- for (i = 0; i < old->n_buckets; i++) {
- struct sw_flow *flow;
- struct hlist_head *head = &old->buckets[i];
-
- if (ufid)
- hlist_for_each_entry_rcu(flow, head,
- ufid_table.node[old_ver])
- ufid_table_instance_insert(new, flow);
- else
- hlist_for_each_entry_rcu(flow, head,
- flow_table.node[old_ver])
- table_instance_insert(new, flow);
- }
-
- old->keep_flows = true;
-}
-
-static struct table_instance *table_instance_rehash(struct table_instance *ti,
- int n_buckets, bool ufid)
-{
- struct table_instance *new_ti;
-
- new_ti = table_instance_alloc(n_buckets);
- if (!new_ti)
- return NULL;
-
- flow_table_copy_flows(ti, new_ti, ufid);
-
- return new_ti;
-}
-
-int ovs_flow_tbl_flush(struct flow_table *flow_table)
-{
- struct table_instance *old_ti, *new_ti;
- struct table_instance *old_ufid_ti, *new_ufid_ti;
-
- new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
- if (!new_ti)
- return -ENOMEM;
- new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
- if (!new_ufid_ti)
- goto err_free_ti;
-
- old_ti = ovsl_dereference(flow_table->ti);
- old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
-
- rcu_assign_pointer(flow_table->ti, new_ti);
- rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
- flow_table->last_rehash = jiffies;
- flow_table->count = 0;
- flow_table->ufid_count = 0;
-
- table_instance_destroy(flow_table, old_ti, old_ufid_ti, true);
- return 0;
-
-err_free_ti:
- __table_instance_destroy(new_ti);
- return -ENOMEM;
-}
-
-static u32 flow_hash(const struct sw_flow_key *key,
- const struct sw_flow_key_range *range)
-{
- const u32 *hash_key = (const u32 *)((const u8 *)key + range->start);
-
- /* Make sure number of hash bytes are multiple of u32. */
- int hash_u32s = range_n_bytes(range) >> 2;
-
- return jhash2(hash_key, hash_u32s, 0);
-}
-
-static int flow_key_start(const struct sw_flow_key *key)
-{
- if (key->tun_proto)
- return 0;
- else
- return rounddown(offsetof(struct sw_flow_key, phy),
- sizeof(long));
-}
-
-static bool cmp_key(const struct sw_flow_key *key1,
- const struct sw_flow_key *key2,
- int key_start, int key_end)
-{
- const long *cp1 = (const long *)((const u8 *)key1 + key_start);
- const long *cp2 = (const long *)((const u8 *)key2 + key_start);
- long diffs = 0;
- int i;
-
- for (i = key_start; i < key_end; i += sizeof(long))
- diffs |= *cp1++ ^ *cp2++;
-
- return diffs == 0;
-}
-
-static bool flow_cmp_masked_key(const struct sw_flow *flow,
- const struct sw_flow_key *key,
- const struct sw_flow_key_range *range)
-{
- return cmp_key(&flow->key, key, range->start, range->end);
-}
-
-static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
- const struct sw_flow_match *match)
-{
- struct sw_flow_key *key = match->key;
- int key_start = flow_key_start(key);
- int key_end = match->range.end;
-
- BUG_ON(ovs_identifier_is_ufid(&flow->id));
- return cmp_key(flow->id.unmasked_key, key, key_start, key_end);
-}
-
-static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
- const struct sw_flow_key *unmasked,
- const struct sw_flow_mask *mask,
- u32 *n_mask_hit)
-{
- struct sw_flow *flow;
- struct hlist_head *head;
- u32 hash;
- struct sw_flow_key masked_key;
-
- ovs_flow_mask_key(&masked_key, unmasked, false, mask);
- hash = flow_hash(&masked_key, &mask->range);
- head = find_bucket(ti, hash);
- (*n_mask_hit)++;
- hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
- if (flow->mask == mask && flow->flow_table.hash == hash &&
- flow_cmp_masked_key(flow, &masked_key, &mask->range))
- return flow;
- }
- return NULL;
-}
-
-/* Flow lookup does full lookup on flow table. It starts with
- * mask from index passed in *index.
- */
-static struct sw_flow *flow_lookup(struct flow_table *tbl,
- struct table_instance *ti,
- const struct mask_array *ma,
- const struct sw_flow_key *key,
- u32 *n_mask_hit,
- u32 *index)
-{
- struct sw_flow *flow;
- struct sw_flow_mask *mask;
- int i;
-
- if (likely(*index < ma->max)) {
- mask = rcu_dereference_ovsl(ma->masks[*index]);
- if (mask) {
- flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
- if (flow)
- return flow;
- }
- }
-
- for (i = 0; i < ma->max; i++) {
-
- if (i == *index)
- continue;
-
- mask = rcu_dereference_ovsl(ma->masks[i]);
- if (unlikely(!mask))
- break;
-
- flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
- if (flow) { /* Found */
- *index = i;
- return flow;
- }
- }
-
- return NULL;
-}
-
-/*
- * mask_cache maps flow to probable mask. This cache is not tightly
- * coupled cache, It means updates to mask list can result in inconsistent
- * cache entry in mask cache.
- * This is per cpu cache and is divided in MC_HASH_SEGS segments.
- * In case of a hash collision the entry is hashed in next segment.
- */
-struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
- const struct sw_flow_key *key,
- u32 skb_hash,
- u32 *n_mask_hit)
-{
- struct mask_array *ma = rcu_dereference(tbl->mask_array);
- struct table_instance *ti = rcu_dereference(tbl->ti);
- struct mask_cache_entry *entries, *ce;
- struct sw_flow *flow;
- u32 hash;
- int seg;
-
- *n_mask_hit = 0;
- if (unlikely(!skb_hash)) {
- u32 mask_index = 0;
-
- return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
- }
-
- /* Pre and post recirulation flows usually have the same skb_hash
- * value. To avoid hash collisions, rehash the 'skb_hash' with
- * 'recirc_id'. */
- if (key->recirc_id)
- skb_hash = jhash_1word(skb_hash, key->recirc_id);
-
- ce = NULL;
- hash = skb_hash;
- entries = this_cpu_ptr(tbl->mask_cache);
-
- /* Find the cache entry 'ce' to operate on. */
- for (seg = 0; seg < MC_HASH_SEGS; seg++) {
- int index = hash & (MC_HASH_ENTRIES - 1);
- struct mask_cache_entry *e;
-
- e = &entries[index];
- if (e->skb_hash == skb_hash) {
- flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
- &e->mask_index);
- if (!flow)
- e->skb_hash = 0;
- return flow;
- }
-
- if (!ce || e->skb_hash < ce->skb_hash)
- ce = e; /* A better replacement cache candidate. */
-
- hash >>= MC_HASH_SHIFT;
- }
-
- /* Cache miss, do full lookup. */
- flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
- if (flow)
- ce->skb_hash = skb_hash;
-
- return flow;
-}
-
-struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
- const struct sw_flow_key *key)
-{
- struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
- struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
- u32 __always_unused n_mask_hit;
- u32 index = 0;
-
- return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
-}
-
-struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
- const struct sw_flow_match *match)
-{
- struct mask_array *ma = ovsl_dereference(tbl->mask_array);
- int i;
-
- /* Always called under ovs-mutex. */
- for (i = 0; i < ma->max; i++) {
- struct table_instance *ti = ovsl_dereference(tbl->ti);
- u32 __always_unused n_mask_hit;
- struct sw_flow_mask *mask;
- struct sw_flow *flow;
-
- mask = ovsl_dereference(ma->masks[i]);
- if (!mask)
- continue;
- flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
- if (flow && ovs_identifier_is_key(&flow->id) &&
- ovs_flow_cmp_unmasked_key(flow, match))
- return flow;
- }
- return NULL;
-}
-
-static u32 ufid_hash(const struct sw_flow_id *sfid)
-{
- return jhash(sfid->ufid, sfid->ufid_len, 0);
-}
-
-static bool ovs_flow_cmp_ufid(const struct sw_flow *flow,
- const struct sw_flow_id *sfid)
-{
- if (flow->id.ufid_len != sfid->ufid_len)
- return false;
-
- return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len);
-}
-
-bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match)
-{
- if (ovs_identifier_is_ufid(&flow->id))
- return flow_cmp_masked_key(flow, match->key, &match->range);
-
- return ovs_flow_cmp_unmasked_key(flow, match);
-}
-
-struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
- const struct sw_flow_id *ufid)
-{
- struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
- struct sw_flow *flow;
- struct hlist_head *head;
- u32 hash;
-
- hash = ufid_hash(ufid);
- head = find_bucket(ti, hash);
- hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) {
- if (flow->ufid_table.hash == hash &&
- ovs_flow_cmp_ufid(flow, ufid))
- return flow;
- }
- return NULL;
-}
-
-int ovs_flow_tbl_num_masks(const struct flow_table *table)
-{
- struct mask_array *ma;
-
- ma = rcu_dereference_ovsl(table->mask_array);
- return READ_ONCE(ma->count);
-}
-
-static struct table_instance *table_instance_expand(struct table_instance *ti,
- bool ufid)
-{
- return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
-}
-
-/* Must be called with OVS mutex held. */
-void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
-{
- struct table_instance *ti = ovsl_dereference(table->ti);
- struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
-
- BUG_ON(table->count == 0);
- table_instance_flow_free(table, ti, ufid_ti, flow, true);
-}
-
-static struct sw_flow_mask *mask_alloc(void)
-{
- struct sw_flow_mask *mask;
-
- mask = kmalloc(sizeof(*mask), GFP_KERNEL);
- if (mask)
- mask->ref_count = 1;
-
- return mask;
-}
-
-static bool mask_equal(const struct sw_flow_mask *a,
- const struct sw_flow_mask *b)
-{
- const u8 *a_ = (const u8 *)&a->key + a->range.start;
- const u8 *b_ = (const u8 *)&b->key + b->range.start;
-
- return (a->range.end == b->range.end)
- && (a->range.start == b->range.start)
- && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
-}
-
-static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
- const struct sw_flow_mask *mask)
-{
- struct mask_array *ma;
- int i;
-
- ma = ovsl_dereference(tbl->mask_array);
- for (i = 0; i < ma->max; i++) {
- struct sw_flow_mask *t;
-
- t = ovsl_dereference(ma->masks[i]);
- if (t && mask_equal(mask, t))
- return t;
- }
-
- return NULL;
-}
-
-/* Add 'mask' into the mask list, if it is not already there. */
-static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
- const struct sw_flow_mask *new)
-{
- struct sw_flow_mask *mask;
-
- mask = flow_mask_find(tbl, new);
- if (!mask) {
- /* Allocate a new mask if none exsits. */
- mask = mask_alloc();
- if (!mask)
- return -ENOMEM;
-
- mask->key = new->key;
- mask->range = new->range;
-
- /* Add mask to mask-list. */
- if (tbl_mask_array_add_mask(tbl, mask)) {
- kfree(mask);
- return -ENOMEM;
- }
-
- } else {
- BUG_ON(!mask->ref_count);
- mask->ref_count++;
- }
-
- flow->mask = mask;
- return 0;
-}
-
-/* Must be called with OVS mutex held. */
-static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
-{
- struct table_instance *new_ti = NULL;
- struct table_instance *ti;
-
- flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);
- ti = ovsl_dereference(table->ti);
- table_instance_insert(ti, flow);
- table->count++;
-
- /* Expand table, if necessary, to make room. */
- if (table->count > ti->n_buckets)
- new_ti = table_instance_expand(ti, false);
- else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
- new_ti = table_instance_rehash(ti, ti->n_buckets, false);
-
- if (new_ti) {
- rcu_assign_pointer(table->ti, new_ti);
- call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
- table->last_rehash = jiffies;
- }
-}
-
-/* Must be called with OVS mutex held. */
-static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
-{
- struct table_instance *ti;
-
- flow->ufid_table.hash = ufid_hash(&flow->id);
- ti = ovsl_dereference(table->ufid_ti);
- ufid_table_instance_insert(ti, flow);
- table->ufid_count++;
-
- /* Expand table, if necessary, to make room. */
- if (table->ufid_count > ti->n_buckets) {
- struct table_instance *new_ti;
-
- new_ti = table_instance_expand(ti, true);
- if (new_ti) {
- rcu_assign_pointer(table->ufid_ti, new_ti);
- call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
- }
- }
-}
-
-/* Must be called with OVS mutex held. */
-int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
- const struct sw_flow_mask *mask)
-{
- int err;
-
- err = flow_mask_insert(table, flow, mask);
- if (err)
- return err;
- flow_key_insert(table, flow);
- if (ovs_identifier_is_ufid(&flow->id))
- flow_ufid_insert(table, flow);
-
- return 0;
-}
-
-/* Initializes the flow module.
- * Returns zero if successful or a negative error code.
- */
-int ovs_flow_init(void)
-{
- BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
- BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
-
- flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
- + (nr_cpu_ids
- * sizeof(struct sw_flow_stats *)),
- 0, 0, NULL);
- if (flow_cache == NULL)
- return -ENOMEM;
-
- flow_stats_cache
- = kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats),
- 0, SLAB_HWCACHE_ALIGN, NULL);
- if (flow_stats_cache == NULL) {
- kmem_cache_destroy(flow_cache);
- flow_cache = NULL;
- return -ENOMEM;
- }
-
- return 0;
-}
-
-/* Uninitializes the flow module. */
-void ovs_flow_exit(void)
-{
- kmem_cache_destroy(flow_stats_cache);
- kmem_cache_destroy(flow_cache);
-}
diff --git a/datapath/flow_table.h b/datapath/flow_table.h
deleted file mode 100644
index 1a76886b5..000000000
--- a/datapath/flow_table.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2007-2013 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#ifndef FLOW_TABLE_H
-#define FLOW_TABLE_H 1
-
-#include <linux/kernel.h>
-#include <linux/netlink.h>
-#include <linux/openvswitch.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/rcupdate.h>
-#include <linux/if_ether.h>
-#include <linux/in6.h>
-#include <linux/jiffies.h>
-#include <linux/time.h>
-
-#include <net/inet_ecn.h>
-#include <net/ip_tunnels.h>
-
-#include "flow.h"
-
-struct mask_cache_entry {
- u32 skb_hash;
- u32 mask_index;
-};
-
-struct mask_array {
- struct rcu_head rcu;
- int count, max;
- struct sw_flow_mask __rcu *masks[];
-};
-
-struct table_instance {
- struct hlist_head *buckets;
- unsigned int n_buckets;
- struct rcu_head rcu;
- int node_ver;
- u32 hash_seed;
- bool keep_flows;
-};
-
-struct flow_table {
- struct table_instance __rcu *ti;
- struct table_instance __rcu *ufid_ti;
- struct mask_cache_entry __percpu *mask_cache;
- struct mask_array __rcu *mask_array;
- unsigned long last_rehash;
- unsigned int count;
- unsigned int ufid_count;
-};
-
-extern struct kmem_cache *flow_stats_cache;
-
-int ovs_flow_init(void);
-void ovs_flow_exit(void);
-
-struct sw_flow *ovs_flow_alloc(void);
-void ovs_flow_free(struct sw_flow *, bool deferred);
-
-int ovs_flow_tbl_init(struct flow_table *);
-int ovs_flow_tbl_count(const struct flow_table *table);
-void ovs_flow_tbl_destroy(struct flow_table *table);
-int ovs_flow_tbl_flush(struct flow_table *flow_table);
-
-int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
- const struct sw_flow_mask *mask);
-void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
-int ovs_flow_tbl_num_masks(const struct flow_table *table);
-struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
- u32 *bucket, u32 *idx);
-struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
- const struct sw_flow_key *,
- u32 skb_hash,
- u32 *n_mask_hit);
-struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
- const struct sw_flow_key *);
-struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
- const struct sw_flow_match *match);
-struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *,
- const struct sw_flow_id *);
-
-bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *);
-
-void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
- bool full, const struct sw_flow_mask *mask);
-#endif /* flow_table.h */
diff --git a/datapath/linux/.gitignore b/datapath/linux/.gitignore
deleted file mode 100644
index 8e9d781b1..000000000
--- a/datapath/linux/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-/Kbuild
-/Makefile
-/Makefile.main
-/Module.markers
-/kcompat.h
-/modules.order
-/tmp
-/*.c
diff --git a/datapath/linux/Kbuild.in b/datapath/linux/Kbuild.in
deleted file mode 100644
index 395b0cbc0..000000000
--- a/datapath/linux/Kbuild.in
+++ /dev/null
@@ -1,27 +0,0 @@
-# -*- makefile -*-
-export builddir = @abs_builddir@
-export srcdir = @abs_srcdir@
-export top_srcdir = @abs_top_srcdir@
-export VERSION = @VERSION@
-
-include $(srcdir)/../Modules.mk
-include $(srcdir)/Modules.mk
-
-ccflags-y := -DVERSION=\"$(VERSION)\"
-ccflags-y += -I$(srcdir)/..
-ccflags-y += -I$(builddir)/..
-ccflags-y += -g
-ccflags-y += -include $(builddir)/kcompat.h
-
-# These include directories have to go before -I$(KSRC)/include.
-# NOSTDINC_FLAGS just happens to be a variable that goes in the
-# right place, even though it's conceptually incorrect.
-NOSTDINC_FLAGS += -include $(builddir)/kcompat.h -I$(top_srcdir)/include -I$(srcdir)/compat -I$(srcdir)/compat/include
-
-obj-m := $(subst _,-,$(patsubst %,%.o,$(build_modules)))
-
-define module_template
-$(1)-y = $$(notdir $$(patsubst %.c,%.o,$($(1)_sources)))
-endef
-
-$(foreach module,$(build_multi_modules),$(eval $(call module_template,$(module))))
diff --git a/datapath/linux/Makefile.in b/datapath/linux/Makefile.in
deleted file mode 100644
index efc1663e4..000000000
--- a/datapath/linux/Makefile.in
+++ /dev/null
@@ -1,9 +0,0 @@
-ifeq ($(KERNELRELEASE),)
-# We're being called directly by running make in this directory.
-include Makefile.main
-else
-# We're being included by the Linux kernel build system
-include Kbuild
-endif
-
-
diff --git a/datapath/linux/Makefile.main.in b/datapath/linux/Makefile.main.in
deleted file mode 100644
index 6db4aa3ab..000000000
--- a/datapath/linux/Makefile.main.in
+++ /dev/null
@@ -1,107 +0,0 @@
-# -*- makefile -*-
-export builddir = @abs_builddir@
-export srcdir = @abs_srcdir@
-export top_srcdir = @abs_top_srcdir@
-export KSRC = @KBUILD@
-export VERSION = @VERSION@
-
-include $(srcdir)/../Modules.mk
-include $(srcdir)/Modules.mk
-
-default: $(build_links)
-
-$(foreach s,$(sort $(foreach m,$(build_modules),$($(m)_sources))), \
- $(eval $(notdir $(s)): ; ln -s $(srcdir)/../$(s) $@))
-
-all: default
-distdir: clean
-install:
-install-data:
-install-exec:
-uninstall:
-install-dvi:
-install-html:
-install-info:
-install-ps:
-install-pdf:
-installdirs:
-check: all
-installcheck:
-mostlyclean:
-clean:
- rm -f *.o *.ko *.mod.* .*.gcno .*.d .*.cmd kcompat.h.new \
- .cache.mk Module.symvers modules.order .tmp_versions/*.mod
- for d in $(build_links); do if test -h $$d; then rm $$d; fi; done
-distclean: clean
- rm -f kcompat.h
-maintainer-clean: distclean
-dvi:
-pdf:
-ps:
-info:
-html:
-tags:
-TAGS:
-
-ifneq ($(KSRC),)
-
-ifeq (/lib/modules/$(shell uname -r)/source, $(KSRC))
- KOBJ := /lib/modules/$(shell uname -r)/build
-else
- KOBJ := $(KSRC)
-endif
-
-VERSION_FILE := $(KOBJ)/include/linux/version.h
-ifeq (,$(wildcard $(VERSION_FILE)))
- VERSION_FILE := $(KOBJ)/include/generated/uapi/linux/version.h
- ifeq (,$(wildcard $(VERSION_FILE)))
- $(error Linux kernel source not configured - missing version.h)
- endif
-endif
-
-CONFIG_FILE := $(KSRC)/include/generated/autoconf.h
-ifeq (,$(wildcard $(CONFIG_FILE)))
- CONFIG_FILE := $(KSRC)/include/linux/autoconf.h
- ifeq (,$(wildcard $(CONFIG_FILE)))
- $(error Linux kernel source not configured - missing autoconf.h)
- endif
-endif
-
-default:
- $(MAKE) -C $(KSRC) $(if @KARCH@,ARCH=@KARCH@) M=$(builddir) modules
-
-modules_install:
- $(MAKE) -C $(KSRC) $(if @KARCH@,ARCH=@KARCH@) M=$(builddir) modules_install
- /sbin/depmod `sed -n 's/#define UTS_RELEASE "\([^"]*\)"/\1/p' $(KSRC)/include/generated/utsrelease.h`
-endif
-
-# Much of the kernel build system in this file is derived from Intel's
-# e1000 distribution, with the following license:
-
-################################################################################
-#
-# Intel PRO/1000 Linux driver
-# Copyright(c) 1999 - 2007, 2009 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Contact Information:
-# Linux NICS <linux.nics@intel.com>
-# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-#
-################################################################################
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
deleted file mode 100644
index 372243988..000000000
--- a/datapath/linux/Modules.mk
+++ /dev/null
@@ -1,123 +0,0 @@
-openvswitch_sources += \
- linux/compat/dev-openvswitch.c \
- linux/compat/dst_cache.c \
- linux/compat/exthdrs_core.c \
- linux/compat/geneve.c \
- linux/compat/gre.c \
- linux/compat/gso.c \
- linux/compat/genetlink-openvswitch.c \
- linux/compat/inet_fragment.c \
- linux/compat/ip_gre.c \
- linux/compat/ip_fragment.c \
- linux/compat/ip_output.c \
- linux/compat/ip_tunnel.c \
- linux/compat/ip_tunnels_core.c \
- linux/compat/ip6_output.c \
- linux/compat/ip6_gre.c \
- linux/compat/ip6_tunnel.c \
- linux/compat/lisp.c \
- linux/compat/netdevice.c \
- linux/compat/nf_conncount.c \
- linux/compat/nf_conntrack_core.c \
- linux/compat/nf_conntrack_proto.c \
- linux/compat/nf_conntrack_reasm.c \
- linux/compat/nf_conntrack_timeout.c \
- linux/compat/reciprocal_div.c \
- linux/compat/skbuff-openvswitch.c \
- linux/compat/socket.c \
- linux/compat/stt.c \
- linux/compat/udp.c \
- linux/compat/udp_tunnel.c \
- linux/compat/vxlan.c \
- linux/compat/utils.c
-openvswitch_headers += \
- linux/compat/gso.h \
- linux/compat/include/linux/percpu.h \
- linux/compat/include/linux/bug.h \
- linux/compat/include/linux/cache.h \
- linux/compat/include/linux/compiler.h \
- linux/compat/include/linux/compiler-gcc.h \
- linux/compat/include/linux/cpumask.h \
- linux/compat/include/linux/err.h \
- linux/compat/include/linux/etherdevice.h \
- linux/compat/include/linux/genetlink.h \
- linux/compat/include/linux/if.h \
- linux/compat/include/linux/if_ether.h \
- linux/compat/include/linux/if_link.h \
- linux/compat/include/linux/if_vlan.h \
- linux/compat/include/linux/in.h \
- linux/compat/include/linux/jiffies.h \
- linux/compat/include/linux/kconfig.h \
- linux/compat/include/linux/kernel.h \
- linux/compat/include/net/lisp.h \
- linux/compat/include/linux/list.h \
- linux/compat/include/linux/mpls.h \
- linux/compat/include/linux/net.h \
- linux/compat/include/linux/random.h \
- linux/compat/include/linux/netdevice.h \
- linux/compat/include/linux/netdev_features.h \
- linux/compat/include/linux/netfilter_ipv6.h \
- linux/compat/include/linux/netlink.h \
- linux/compat/include/linux/openvswitch.h \
- linux/compat/include/linux/rculist.h \
- linux/compat/include/linux/rcupdate.h \
- linux/compat/include/linux/reciprocal_div.h \
- linux/compat/include/linux/rtnetlink.h \
- linux/compat/include/linux/skbuff.h \
- linux/compat/include/linux/static_key.h \
- linux/compat/include/linux/stddef.h \
- linux/compat/include/linux/types.h \
- linux/compat/include/linux/u64_stats_sync.h \
- linux/compat/include/linux/udp.h \
- linux/compat/include/linux/workqueue.h \
- linux/compat/include/linux/timekeeping.h \
- linux/compat/include/net/checksum.h \
- linux/compat/include/net/dst.h \
- linux/compat/include/net/dst_cache.h \
- linux/compat/include/net/dst_metadata.h \
- linux/compat/include/net/genetlink.h \
- linux/compat/include/net/geneve.h \
- linux/compat/include/net/gre.h \
- linux/compat/include/net/inet_ecn.h \
- linux/compat/include/net/inet_frag.h \
- linux/compat/include/net/inetpeer.h \
- linux/compat/include/net/ip.h \
- linux/compat/include/net/ip_tunnels.h \
- linux/compat/include/net/ip6_fib.h \
- linux/compat/include/net/ip6_route.h \
- linux/compat/include/net/ip6_tunnel.h \
- linux/compat/include/net/ipv6.h \
- linux/compat/include/net/ipv6_frag.h \
- linux/compat/include/net/mpls.h \
- linux/compat/include/net/net_namespace.h \
- linux/compat/include/net/netlink.h \
- linux/compat/include/net/protocol.h \
- linux/compat/include/net/route.h \
- linux/compat/include/net/rtnetlink.h \
- linux/compat/include/net/udp.h \
- linux/compat/include/net/udp_tunnel.h \
- linux/compat/include/net/sock.h \
- linux/compat/include/net/stt.h \
- linux/compat/include/net/vrf.h \
- linux/compat/include/net/tun_proto.h \
- linux/compat/include/net/nsh.h \
- linux/compat/include/net/vxlan.h \
- linux/compat/include/net/netfilter/nf_conntrack.h \
- linux/compat/include/net/netfilter/nf_conntrack_core.h \
- linux/compat/include/net/netfilter/nf_conntrack_count.h \
- linux/compat/include/net/netfilter/nf_conntrack_expect.h \
- linux/compat/include/net/netfilter/nf_conntrack_helper.h \
- linux/compat/include/net/netfilter/nf_conntrack_labels.h \
- linux/compat/include/net/netfilter/nf_conntrack_seqadj.h \
- linux/compat/include/net/netfilter/nf_conntrack_timeout.h \
- linux/compat/include/net/netfilter/nf_conntrack_zones.h \
- linux/compat/include/net/netfilter/nf_nat.h \
- linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h \
- linux/compat/include/net/sctp/checksum.h \
- linux/compat/include/net/erspan.h \
- linux/compat/include/uapi/linux/netfilter.h \
- linux/compat/include/linux/mm.h \
- linux/compat/include/linux/netfilter.h \
- linux/compat/include/linux/overflow.h \
- linux/compat/include/linux/rbtree.h
-EXTRA_DIST += linux/compat/build-aux/export-check-allow-list
diff --git a/datapath/linux/compat/build-aux/export-check-allow-list b/datapath/linux/compat/build-aux/export-check-allow-list
deleted file mode 100644
index 1178f46ee..000000000
--- a/datapath/linux/compat/build-aux/export-check-allow-list
+++ /dev/null
@@ -1 +0,0 @@
-pskb_expand_head \ No newline at end of file
diff --git a/datapath/linux/compat/dev-openvswitch.c b/datapath/linux/compat/dev-openvswitch.c
deleted file mode 100644
index 56e1a5b68..000000000
--- a/datapath/linux/compat/dev-openvswitch.c
+++ /dev/null
@@ -1,83 +0,0 @@
-#include <linux/if_bridge.h>
-#include <linux/netdevice.h>
-#include <linux/version.h>
-#include <net/rtnetlink.h>
-
-#include "gso.h"
-#include "vport.h"
-#include "vport-internal_dev.h"
-#include "vport-netdev.h"
-
-#ifndef HAVE_DEV_DISABLE_LRO
-
-#ifdef NETIF_F_LRO
-#include <linux/ethtool.h>
-
-/**
- * dev_disable_lro - disable Large Receive Offload on a device
- * @dev: device
- *
- * Disable Large Receive Offload (LRO) on a net device. Must be
- * called under RTNL. This is needed if received packets may be
- * forwarded to another interface.
- */
-void dev_disable_lro(struct net_device *dev)
-{
- if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
- dev->ethtool_ops->set_flags) {
- u32 flags = dev->ethtool_ops->get_flags(dev);
- if (flags & ETH_FLAG_LRO) {
- flags &= ~ETH_FLAG_LRO;
- dev->ethtool_ops->set_flags(dev, flags);
- }
- }
- WARN_ON(dev->features & NETIF_F_LRO);
-}
-#else
-void dev_disable_lro(struct net_device *dev) { }
-#endif /* NETIF_F_LRO */
-
-#endif /* HAVE_DEV_DISABLE_LRO */
-
-int rpl_rtnl_delete_link(struct net_device *dev)
-{
- const struct rtnl_link_ops *ops;
- LIST_HEAD(list_kill);
-
- ops = dev->rtnl_link_ops;
- if (!ops || !ops->dellink)
- return -EOPNOTSUPP;
-
- ops->dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(rpl_rtnl_delete_link);
-
-#ifndef USE_UPSTREAM_TUNNEL
-int ovs_dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
-{
- struct ip_tunnel_info *info;
- struct vport *vport;
-
- if (!SKB_SETUP_FILL_METADATA_DST(skb))
- return -ENOMEM;
-
- vport = ovs_netdev_get_vport(dev);
- if (!vport)
- return -EINVAL;
-
- if (!vport->ops->fill_metadata_dst)
- return -EINVAL;
-
- info = skb_tunnel_info(skb);
- if (!info)
- return -ENOMEM;
- if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
- return -EINVAL;
-
- return vport->ops->fill_metadata_dst(dev, skb);
-}
-EXPORT_SYMBOL_GPL(ovs_dev_fill_metadata_dst);
-#endif
diff --git a/datapath/linux/compat/dst_cache.c b/datapath/linux/compat/dst_cache.c
deleted file mode 100644
index 45990cba7..000000000
--- a/datapath/linux/compat/dst_cache.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * net/core/dst_cache.c - dst entry cache
- *
- * Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef USE_BUILTIN_DST_CACHE
-#include <linux/kernel.h>
-#include <linux/percpu.h>
-#include <net/dst_cache.h>
-#include <net/route.h>
-#if IS_ENABLED(CONFIG_IPV6)
-#include <net/ip6_fib.h>
-#endif
-#include <uapi/linux/in.h>
-
-#ifndef USE_UPSTREAM_TUNNEL
-struct dst_cache_pcpu {
- unsigned long refresh_ts;
- struct dst_entry *dst;
- u32 cookie;
- union {
- struct in_addr in_saddr;
- struct in6_addr in6_saddr;
- };
-};
-
-static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
- struct dst_entry *dst, u32 cookie)
-{
- dst_release(dst_cache->dst);
- if (dst)
- dst_hold(dst);
-
- dst_cache->cookie = cookie;
- dst_cache->dst = dst;
-}
-
-static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
- struct dst_cache_pcpu *idst)
-{
- struct dst_entry *dst;
-
- dst = idst->dst;
- if (!dst)
- goto fail;
-
- /* the cache already hold a dst reference; it can't go away */
- dst_hold(dst);
-
- if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
- (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
- dst_cache_per_cpu_dst_set(idst, NULL, 0);
- dst_release(dst);
- goto fail;
- }
- return dst;
-
-fail:
- idst->refresh_ts = jiffies;
- return NULL;
-}
-
-struct dst_entry *rpl_dst_cache_get(struct dst_cache *dst_cache)
-{
- if (!dst_cache->cache)
- return NULL;
-
- return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
-}
-EXPORT_SYMBOL_GPL(rpl_dst_cache_get);
-
-struct rtable *rpl_dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
-{
- struct dst_cache_pcpu *idst;
- struct dst_entry *dst;
-
- if (!dst_cache->cache)
- return NULL;
-
- idst = this_cpu_ptr(dst_cache->cache);
- dst = dst_cache_per_cpu_get(dst_cache, idst);
- if (!dst)
- return NULL;
-
- *saddr = idst->in_saddr.s_addr;
- return container_of(dst, struct rtable, dst);
-}
-EXPORT_SYMBOL_GPL(rpl_dst_cache_get_ip4);
-
-void rpl_dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
- __be32 saddr)
-{
- struct dst_cache_pcpu *idst;
-
- if (!dst_cache->cache)
- return;
-
- idst = this_cpu_ptr(dst_cache->cache);
- dst_cache_per_cpu_dst_set(idst, dst, 0);
- idst->in_saddr.s_addr = saddr;
-}
-EXPORT_SYMBOL_GPL(rpl_dst_cache_set_ip4);
-
-#if IS_ENABLED(CONFIG_IPV6)
-void rpl_dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
- const struct in6_addr *addr)
-{
- struct dst_cache_pcpu *idst;
-
- if (!dst_cache->cache)
- return;
-
- idst = this_cpu_ptr(dst_cache->cache);
- dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
- rt6_get_cookie((struct rt6_info *)dst));
- idst->in6_saddr = *addr;
-}
-EXPORT_SYMBOL_GPL(rpl_dst_cache_set_ip6);
-
-struct dst_entry *rpl_dst_cache_get_ip6(struct dst_cache *dst_cache,
- struct in6_addr *saddr)
-{
- struct dst_cache_pcpu *idst;
- struct dst_entry *dst;
-
- if (!dst_cache->cache)
- return NULL;
-
- idst = this_cpu_ptr(dst_cache->cache);
- dst = dst_cache_per_cpu_get(dst_cache, idst);
- if (!dst)
- return NULL;
-
- *saddr = idst->in6_saddr;
- return dst;
-}
-EXPORT_SYMBOL_GPL(rpl_dst_cache_get_ip6);
-
-#endif
-
-int rpl_dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
-{
- dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
- gfp | __GFP_ZERO);
- if (!dst_cache->cache)
- return -ENOMEM;
-
- dst_cache_reset(dst_cache);
- return 0;
-}
-EXPORT_SYMBOL_GPL(rpl_dst_cache_init);
-
-void rpl_dst_cache_destroy(struct dst_cache *dst_cache)
-{
- int i;
-
- if (!dst_cache->cache)
- return;
-
- for_each_possible_cpu(i)
- dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
-
- free_percpu(dst_cache->cache);
-}
-EXPORT_SYMBOL_GPL(rpl_dst_cache_destroy);
-#endif /*USE_UPSTREAM_TUNNEL */
-#endif /* USE_BUILTIN_DST_CACHE */
diff --git a/datapath/linux/compat/exthdrs_core.c b/datapath/linux/compat/exthdrs_core.c
deleted file mode 100644
index 697f9d082..000000000
--- a/datapath/linux/compat/exthdrs_core.c
+++ /dev/null
@@ -1,129 +0,0 @@
-#include <linux/ipv6.h>
-#include <linux/version.h>
-#include <net/ipv6.h>
-
-#ifndef HAVE_IP6_FH_F_SKIP_RH
-/*
- * find the offset to specified header or the protocol number of last header
- * if target < 0. "last header" is transport protocol header, ESP, or
- * "No next header".
- *
- * Note that *offset is used as input/output parameter. an if it is not zero,
- * then it must be a valid offset to an inner IPv6 header. This can be used
- * to explore inner IPv6 header, eg. ICMPv6 error messages.
- *
- * If target header is found, its offset is set in *offset and return protocol
- * number. Otherwise, return -1.
- *
- * If the first fragment doesn't contain the final protocol header or
- * NEXTHDR_NONE it is considered invalid.
- *
- * Note that non-1st fragment is special case that "the protocol number
- * of last header" is "next header" field in Fragment header. In this case,
- * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
- * isn't NULL.
- *
- * if flags is not NULL and it's a fragment, then the frag flag
- * IP6_FH_F_FRAG will be set. If it's an AH header, the
- * IP6_FH_F_AUTH flag is set and target < 0, then this function will
- * stop at the AH header. If IP6_FH_F_SKIP_RH flag was passed, then this
- * function will skip all those routing headers, where segements_left was 0.
- */
-int rpl_ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
- int target, unsigned short *fragoff, int *flags)
-{
- unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- unsigned int len;
- bool found;
-
- if (fragoff)
- *fragoff = 0;
-
- if (*offset) {
- struct ipv6hdr _ip6, *ip6;
-
- ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
- if (!ip6 || (ip6->version != 6)) {
- printk(KERN_ERR "IPv6 header not found\n");
- return -EBADMSG;
- }
- start = *offset + sizeof(struct ipv6hdr);
- nexthdr = ip6->nexthdr;
- }
- len = skb->len - start;
-
- do {
- struct ipv6_opt_hdr _hdr, *hp;
- unsigned int hdrlen;
- found = (nexthdr == target);
-
- if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
- if (target < 0 || found)
- break;
- return -ENOENT;
- }
-
- hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return -EBADMSG;
-
- if (nexthdr == NEXTHDR_ROUTING) {
- struct ipv6_rt_hdr _rh, *rh;
-
- rh = skb_header_pointer(skb, start, sizeof(_rh),
- &_rh);
- if (rh == NULL)
- return -EBADMSG;
-
- if (flags && (*flags & IP6_FH_F_SKIP_RH) &&
- rh->segments_left == 0)
- found = false;
- }
-
- if (nexthdr == NEXTHDR_FRAGMENT) {
- unsigned short _frag_off;
- __be16 *fp;
-
- if (flags) /* Indicate that this is a fragment */
- *flags |= IP6_FH_F_FRAG;
- fp = skb_header_pointer(skb,
- start+offsetof(struct frag_hdr,
- frag_off),
- sizeof(_frag_off),
- &_frag_off);
- if (fp == NULL)
- return -EBADMSG;
-
- _frag_off = ntohs(*fp) & ~0x7;
- if (_frag_off) {
- if (target < 0 &&
- ((!ipv6_ext_hdr(hp->nexthdr)) ||
- hp->nexthdr == NEXTHDR_NONE)) {
- if (fragoff)
- *fragoff = _frag_off;
- return hp->nexthdr;
- }
- return -ENOENT;
- }
- hdrlen = 8;
- } else if (nexthdr == NEXTHDR_AUTH) {
- if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0))
- break;
- hdrlen = (hp->hdrlen + 2) << 2;
- } else
- hdrlen = ipv6_optlen(hp);
-
- if (!found) {
- nexthdr = hp->nexthdr;
- len -= hdrlen;
- start += hdrlen;
- }
- } while (!found);
-
- *offset = start;
- return nexthdr;
-}
-EXPORT_SYMBOL_GPL(rpl_ipv6_find_hdr);
-
-#endif
diff --git a/datapath/linux/compat/genetlink-openvswitch.c b/datapath/linux/compat/genetlink-openvswitch.c
deleted file mode 100644
index 5b0ecfa8d..000000000
--- a/datapath/linux/compat/genetlink-openvswitch.c
+++ /dev/null
@@ -1,55 +0,0 @@
-#include <net/genetlink.h>
-#include <linux/version.h>
-
-#ifndef HAVE_GENL_NOTIFY_TAKES_FAMILY
-int rpl___genl_register_family(struct rpl_genl_family *f)
-{
- int err;
-
- f->compat_family.id = f->id;
- f->compat_family.hdrsize = f->hdrsize;
- strncpy(f->compat_family.name, f->name, GENL_NAMSIZ);
- f->compat_family.version = f->version;
- f->compat_family.maxattr = f->maxattr;
- f->compat_family.netnsok = f->netnsok;
-#ifdef HAVE_PARALLEL_OPS
- f->compat_family.parallel_ops = f->parallel_ops;
-#endif
- err = genl_register_family_with_ops(&f->compat_family,
- (struct genl_ops *) f->ops, f->n_ops);
- if (err)
- goto error;
-
- if (f->mcgrps) {
- /* Need to Fix GROUP_ID() for more than one group. */
- BUG_ON(f->n_mcgrps > 1);
- err = genl_register_mc_group(&f->compat_family,
- (struct genl_multicast_group *) f->mcgrps);
- if (err)
- goto error;
- }
-error:
- return err;
-
-}
-EXPORT_SYMBOL_GPL(rpl___genl_register_family);
-#endif /* HAVE_GENL_NOTIFY_TAKES_FAMILY */
-
-#ifdef HAVE_GENL_NOTIFY_TAKES_NET
-
-#undef genl_notify
-
-void rpl_genl_notify(struct genl_family *family, struct sk_buff *skb,
- struct genl_info *info, u32 group, gfp_t flags)
-{
- struct net *net = genl_info_net(info);
- u32 portid = info->snd_portid;
- struct nlmsghdr *nlh = info->nlhdr;
-
-#ifdef HAVE_GENL_NOTIFY_TAKES_FAMILY
- genl_notify(family, skb, net, portid, group, nlh, flags);
-#else
- genl_notify(skb, net, portid, group, nlh, flags);
-#endif
-}
-#endif /* HAVE_GENL_NOTIFY_TAKES_NET */
diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c
deleted file mode 100644
index 02c6403e6..000000000
--- a/datapath/linux/compat/geneve.c
+++ /dev/null
@@ -1,1854 +0,0 @@
-/*
- * GENEVE: Generic Network Virtualization Encapsulation
- *
- * Copyright (c) 2015 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/hash.h>
-#include <linux/if_link.h>
-#include <linux/if_vlan.h>
-
-#include <net/addrconf.h>
-#include <net/dst_cache.h>
-#include <net/dst_metadata.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/rtnetlink.h>
-#include <net/geneve.h>
-#include <net/protocol.h>
-#include <net/udp_tunnel.h>
-#include <net/ip6_route.h>
-#if IS_ENABLED(CONFIG_IPV6)
-#include <net/ipv6.h>
-#include <net/addrconf.h>
-#include <net/ip6_tunnel.h>
-#include <net/ip6_checksum.h>
-#endif
-
-
-#include "gso.h"
-#include "vport-netdev.h"
-#include "compat.h"
-
-#ifndef USE_UPSTREAM_TUNNEL
-
-#define GENEVE_NETDEV_VER "0.6"
-
-#define GENEVE_UDP_PORT 6081
-
-#define GENEVE_N_VID (1u << 24)
-#define GENEVE_VID_MASK (GENEVE_N_VID - 1)
-
-#define VNI_HASH_BITS 10
-#define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
-
-#define GENEVE_VER 0
-#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
-
-/* per-network namespace private data for this module */
-struct geneve_net {
- struct list_head geneve_list;
- struct list_head sock_list;
-};
-
-static int geneve_net_id;
-
-union geneve_addr {
- struct sockaddr_in sin;
- struct sockaddr_in6 sin6;
- struct sockaddr sa;
-};
-
-static union geneve_addr geneve_remote_unspec = { .sa.sa_family = AF_UNSPEC, };
-
-/* Pseudo network device */
-struct geneve_dev {
- struct hlist_node hlist; /* vni hash table */
- struct net *net; /* netns for packet i/o */
- struct net_device *dev; /* netdev for geneve tunnel */
- struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */
-#if IS_ENABLED(CONFIG_IPV6)
- struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */
-#endif
- u8 vni[3]; /* virtual network ID for tunnel */
- u8 ttl; /* TTL override */
- u8 tos; /* TOS override */
- union geneve_addr remote; /* IP address for link partner */
- struct list_head next; /* geneve's per namespace list */
- __be32 label; /* IPv6 flowlabel override */
- __be16 dst_port;
- bool collect_md;
- u32 flags;
- struct dst_cache dst_cache;
-};
-
-/* Geneve device flags */
-#define GENEVE_F_UDP_ZERO_CSUM_TX BIT(0)
-#define GENEVE_F_UDP_ZERO_CSUM6_TX BIT(1)
-#define GENEVE_F_UDP_ZERO_CSUM6_RX BIT(2)
-
-struct geneve_sock {
- bool collect_md;
- struct list_head list;
- struct socket *sock;
- struct rcu_head rcu;
- int refcnt;
- struct hlist_head vni_list[VNI_HASH_SIZE];
- u32 flags;
-#ifdef HAVE_UDP_OFFLOAD
- struct udp_offload udp_offloads;
-#endif
-};
-
-static inline __u32 geneve_net_vni_hash(u8 vni[3])
-{
- __u32 vnid;
-
- vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
- return hash_32(vnid, VNI_HASH_BITS);
-}
-
-static __be64 vni_to_tunnel_id(const __u8 *vni)
-{
-#ifdef __BIG_ENDIAN
- return (vni[0] << 16) | (vni[1] << 8) | vni[2];
-#else
- return (__force __be64)(((__force u64)vni[0] << 40) |
- ((__force u64)vni[1] << 48) |
- ((__force u64)vni[2] << 56));
-#endif
-}
-
-static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
-{
- return gs->sock->sk->sk_family;
-}
-
-static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
- __be32 addr, u8 vni[])
-{
- struct hlist_head *vni_list_head;
- struct geneve_dev *geneve;
- __u32 hash;
-
- /* Find the device for this VNI */
- hash = geneve_net_vni_hash(vni);
- vni_list_head = &gs->vni_list[hash];
- hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
- if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) &&
- addr == geneve->remote.sin.sin_addr.s_addr)
- return geneve;
- }
- return NULL;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
- struct in6_addr addr6, u8 vni[])
-{
- struct hlist_head *vni_list_head;
- struct geneve_dev *geneve;
- __u32 hash;
-
- /* Find the device for this VNI */
- hash = geneve_net_vni_hash(vni);
- vni_list_head = &gs->vni_list[hash];
- hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
- if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) &&
- ipv6_addr_equal(&addr6, &geneve->remote.sin6.sin6_addr))
- return geneve;
- }
- return NULL;
-}
-#endif
-
-static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
-{
- return (struct genevehdr *)(udp_hdr(skb) + 1);
-}
-
-static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
- struct sk_buff *skb)
-{
- u8 *vni;
- __be32 addr;
- static u8 zero_vni[3];
-#if IS_ENABLED(CONFIG_IPV6)
- static struct in6_addr zero_addr6;
-#endif
-
- if (geneve_get_sk_family(gs) == AF_INET) {
- struct iphdr *iph;
-
- iph = ip_hdr(skb); /* outer IP header... */
-
- if (gs->collect_md) {
- vni = zero_vni;
- addr = 0;
- } else {
- vni = geneve_hdr(skb)->vni;
- addr = iph->saddr;
- }
-
- return geneve_lookup(gs, addr, vni);
-#if IS_ENABLED(CONFIG_IPV6)
- } else if (geneve_get_sk_family(gs) == AF_INET6) {
- struct ipv6hdr *ip6h;
- struct in6_addr addr6;
-
- ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
-
- if (gs->collect_md) {
- vni = zero_vni;
- addr6 = zero_addr6;
- } else {
- vni = geneve_hdr(skb)->vni;
- addr6 = ip6h->saddr;
- }
-
- return geneve6_lookup(gs, addr6, vni);
-#endif
- }
- return NULL;
-}
-
-/* geneve receive/decap routine */
-static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
- struct sk_buff *skb)
-{
- struct genevehdr *gnvh = geneve_hdr(skb);
- struct metadata_dst *tun_dst = NULL;
- struct pcpu_sw_netstats *stats;
- int err = 0;
- void *oiph;
- union {
- struct metadata_dst dst;
- char buf[sizeof(struct metadata_dst) + 256];
- } buf;
-
- if (ip_tunnel_collect_metadata() || gs->collect_md) {
- __be16 flags;
-
- flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
- (gnvh->oam ? TUNNEL_OAM : 0) |
- (gnvh->critical ? TUNNEL_CRIT_OPT : 0);
-
- tun_dst = &buf.dst;
- ovs_udp_tun_rx_dst(tun_dst,
- skb, geneve_get_sk_family(gs), flags,
- vni_to_tunnel_id(gnvh->vni),
- gnvh->opt_len * 4);
- if (!tun_dst)
- goto drop;
- /* Update tunnel dst according to Geneve options. */
- ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
- gnvh->options, gnvh->opt_len * 4,
- TUNNEL_GENEVE_OPT);
- } else {
- /* Drop packets w/ critical options,
- * since we don't support any...
- */
- if (gnvh->critical)
- goto drop;
- }
-
- skb_reset_mac_header(skb);
- skb->protocol = eth_type_trans(skb, geneve->dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
-
- if (tun_dst)
- ovs_skb_dst_set(skb, &tun_dst->dst);
-
- /* Ignore packet loops (and multicast echo) */
- if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr))
- goto drop;
-
- oiph = skb_network_header(skb);
- skb_reset_network_header(skb);
-
- if (geneve_get_sk_family(gs) == AF_INET)
- err = IP_ECN_decapsulate(oiph, skb);
-#if IS_ENABLED(CONFIG_IPV6)
- else
- err = IP6_ECN_decapsulate(oiph, skb);
-#endif
- if (unlikely(err > 1)) {
- ++geneve->dev->stats.rx_frame_errors;
- ++geneve->dev->stats.rx_errors;
- goto drop;
- }
-
- stats = this_cpu_ptr(geneve->dev->tstats);
- u64_stats_update_begin(&stats->syncp);
- stats->rx_packets++;
- stats->rx_bytes += skb->len;
- u64_stats_update_end(&stats->syncp);
-
- netdev_port_receive(skb, skb_tunnel_info(skb));
- return;
-drop:
- /* Consume bad packet */
- kfree_skb(skb);
-}
-
-/* Setup stats when device is created */
-static int geneve_init(struct net_device *dev)
-{
- struct geneve_dev *geneve = netdev_priv(dev);
- int err;
-
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL);
- if (err) {
- free_percpu(dev->tstats);
- return err;
- }
-
- return 0;
-}
-
-static void geneve_uninit(struct net_device *dev)
-{
- struct geneve_dev *geneve = netdev_priv(dev);
-
- dst_cache_destroy(&geneve->dst_cache);
- free_percpu(dev->tstats);
-}
-
-/* Callback from net/ipv4/udp.c to receive packets */
-static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
-{
- struct genevehdr *geneveh;
- struct geneve_dev *geneve;
- struct geneve_sock *gs;
- int opts_len;
-
- /* Need Geneve and inner Ethernet header to be present */
- if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
- goto drop;
-
- /* Return packets with reserved bits set */
- geneveh = geneve_hdr(skb);
- if (unlikely(geneveh->ver != GENEVE_VER))
- goto drop;
-
- if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
- goto drop;
-
- gs = rcu_dereference_sk_user_data(sk);
- if (!gs)
- goto drop;
-
-#if IS_ENABLED(CONFIG_IPV6)
-#ifdef OVS_CHECK_UDP_TUNNEL_ZERO_CSUM
- if (geneve_get_sk_family(gs) == AF_INET6 &&
- !udp_hdr(skb)->check &&
- !(gs->flags & GENEVE_F_UDP_ZERO_CSUM6_RX)) {
- udp6_csum_zero_error(skb);
- goto drop;
- }
-#endif
-#endif
- geneve = geneve_lookup_skb(gs, skb);
- if (!geneve)
- goto drop;
-
- opts_len = geneveh->opt_len * 4;
- if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
- htons(ETH_P_TEB),
- !net_eq(geneve->net, dev_net(geneve->dev))))
- goto drop;
-
- geneve_rx(geneve, gs, skb);
- return 0;
-
-drop:
- /* Consume bad packet */
- kfree_skb(skb);
- return 0;
-}
-
-static struct socket *geneve_create_sock(struct net *net, bool ipv6,
- __be16 port, u32 flags)
-{
- struct socket *sock;
- struct udp_port_cfg udp_conf;
- int err;
-
- memset(&udp_conf, 0, sizeof(udp_conf));
-
- if (ipv6) {
- udp_conf.family = AF_INET6;
- udp_conf.ipv6_v6only = 1;
- udp_conf.use_udp6_rx_checksums =
- !(flags & GENEVE_F_UDP_ZERO_CSUM6_RX);
- } else {
- udp_conf.family = AF_INET;
- udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
- }
-
- udp_conf.local_udp_port = port;
-
- /* Open UDP socket */
- err = udp_sock_create(net, &udp_conf, &sock);
- if (err < 0)
- return ERR_PTR(err);
-
- return sock;
-}
-
-static void geneve_notify_add_rx_port(struct geneve_sock *gs)
-{
- struct net_device *dev;
- struct sock *sk = gs->sock->sk;
- struct net *net = sock_net(sk);
- sa_family_t sa_family = geneve_get_sk_family(gs);
- int err;
-
- if (sa_family == AF_INET) {
- err = udp_add_offload(sock_net(sk), &gs->udp_offloads);
- if (err)
- pr_warn("geneve: udp_add_offload failed with status %d\n",
- err);
- }
-
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
-#ifdef HAVE_NDO_ADD_GENEVE_PORT
- __be16 port = inet_sk(sk)->inet_sport;
-
- if (dev->netdev_ops->ndo_add_geneve_port)
- dev->netdev_ops->ndo_add_geneve_port(dev, sa_family,
- port);
-#elif defined(HAVE_NDO_UDP_TUNNEL_ADD)
- struct udp_tunnel_info ti;
- ti.type = UDP_TUNNEL_TYPE_GENEVE;
- ti.sa_family = sa_family;
- ti.port = inet_sk(sk)->inet_sport;
-
- if (dev->netdev_ops->ndo_udp_tunnel_add)
- dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
-#endif
- }
- rcu_read_unlock();
-}
-
-static void geneve_notify_del_rx_port(struct geneve_sock *gs)
-{
- struct net_device *dev;
- struct sock *sk = gs->sock->sk;
- struct net *net = sock_net(sk);
- sa_family_t sa_family = geneve_get_sk_family(gs);
-
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
-#ifdef HAVE_NDO_ADD_GENEVE_PORT
- __be16 port = inet_sk(sk)->inet_sport;
-
- if (dev->netdev_ops->ndo_del_geneve_port)
- dev->netdev_ops->ndo_del_geneve_port(dev, sa_family,
- port);
-#elif defined(HAVE_NDO_UDP_TUNNEL_ADD)
- struct udp_tunnel_info ti;
- ti.type = UDP_TUNNEL_TYPE_GENEVE;
- ti.port = inet_sk(sk)->inet_sport;
- ti.sa_family = sa_family;
-
- if (dev->netdev_ops->ndo_udp_tunnel_del)
- dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
-#endif
- }
-
- rcu_read_unlock();
-
- if (sa_family == AF_INET)
- udp_del_offload(&gs->udp_offloads);
-}
-
-#if defined(HAVE_UDP_OFFLOAD) || \
- defined(HAVE_UDP_TUNNEL_SOCK_CFG_GRO_RECEIVE)
-
-static int geneve_hlen(struct genevehdr *gh)
-{
- return sizeof(*gh) + gh->opt_len * 4;
-}
-
-#ifndef HAVE_UDP_OFFLOAD_ARG_UOFF
-static struct sk_buff **geneve_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
-#else
-static struct sk_buff **geneve_gro_receive(struct sk_buff **head,
- struct sk_buff *skb,
- struct udp_offload *uoff)
-#endif
-{
- struct sk_buff *p, **pp = NULL;
- struct genevehdr *gh, *gh2;
- unsigned int hlen, gh_len, off_gnv;
- const struct packet_offload *ptype;
- __be16 type;
- int flush = 1;
-
- off_gnv = skb_gro_offset(skb);
- hlen = off_gnv + sizeof(*gh);
- gh = skb_gro_header_fast(skb, off_gnv);
- if (skb_gro_header_hard(skb, hlen)) {
- gh = skb_gro_header_slow(skb, hlen, off_gnv);
- if (unlikely(!gh))
- goto out;
- }
-
- if (gh->ver != GENEVE_VER || gh->oam)
- goto out;
- gh_len = geneve_hlen(gh);
-
- hlen = off_gnv + gh_len;
- if (skb_gro_header_hard(skb, hlen)) {
- gh = skb_gro_header_slow(skb, hlen, off_gnv);
- if (unlikely(!gh))
- goto out;
- }
-
- for (p = *head; p; p = p->next) {
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
-
- gh2 = (struct genevehdr *)(p->data + off_gnv);
- if (gh->opt_len != gh2->opt_len ||
- memcmp(gh, gh2, gh_len)) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
- }
-
- type = gh->proto_type;
-
- rcu_read_lock();
- ptype = gro_find_receive_by_type(type);
- if (!ptype)
- goto out_unlock;
-
- skb_gro_pull(skb, gh_len);
- skb_gro_postpull_rcsum(skb, gh, gh_len);
- pp = ptype->callbacks.gro_receive(head, skb);
- flush = 0;
-
-out_unlock:
- rcu_read_unlock();
-out:
- NAPI_GRO_CB(skb)->flush |= flush;
-
- return pp;
-}
-
-#ifndef HAVE_UDP_OFFLOAD_ARG_UOFF
-static int geneve_gro_complete(struct sk_buff *skb, int nhoff)
-#else
-static int geneve_gro_complete(struct sk_buff *skb, int nhoff,
- struct udp_offload *uoff)
-#endif
-{
- struct genevehdr *gh;
- struct packet_offload *ptype;
- __be16 type;
- int gh_len;
- int err = -ENOSYS;
-
- udp_tunnel_gro_complete(skb, nhoff);
-
- gh = (struct genevehdr *)(skb->data + nhoff);
- gh_len = geneve_hlen(gh);
- type = gh->proto_type;
-
- rcu_read_lock();
- ptype = gro_find_complete_by_type(type);
- if (ptype)
- err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
-
- rcu_read_unlock();
-
- skb_set_inner_mac_header(skb, nhoff + gh_len);
- return err;
-}
-#endif
-
-/* Create new listen socket if needed */
-static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
- bool ipv6, u32 flags)
-{
- struct geneve_net *gn = net_generic(net, geneve_net_id);
- struct geneve_sock *gs;
- struct socket *sock;
- struct udp_tunnel_sock_cfg tunnel_cfg;
- int h;
-
- gs = kzalloc(sizeof(*gs), GFP_KERNEL);
- if (!gs)
- return ERR_PTR(-ENOMEM);
-
- sock = geneve_create_sock(net, ipv6, port, flags);
- if (IS_ERR(sock)) {
- kfree(gs);
- return ERR_CAST(sock);
- }
-
- gs->sock = sock;
- gs->refcnt = 1;
- for (h = 0; h < VNI_HASH_SIZE; ++h)
- INIT_HLIST_HEAD(&gs->vni_list[h]);
-
- /* Initialize the geneve udp offloads structure */
-#ifdef HAVE_UDP_OFFLOAD
- gs->udp_offloads.port = port;
- gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive;
- gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete;
-#endif
-
- geneve_notify_add_rx_port(gs);
- /* Mark socket as an encapsulation socket */
- memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
- tunnel_cfg.sk_user_data = gs;
- tunnel_cfg.encap_type = 1;
-#ifdef HAVE_UDP_TUNNEL_SOCK_CFG_GRO_RECEIVE
- tunnel_cfg.gro_receive = geneve_gro_receive;
- tunnel_cfg.gro_complete = geneve_gro_complete;
-#endif
- tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
- tunnel_cfg.encap_destroy = NULL;
- setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
- list_add(&gs->list, &gn->sock_list);
- return gs;
-}
-
-static void __geneve_sock_release(struct geneve_sock *gs)
-{
- if (!gs || --gs->refcnt)
- return;
-
- list_del(&gs->list);
- geneve_notify_del_rx_port(gs);
- udp_tunnel_sock_release(gs->sock);
- kfree_rcu(gs, rcu);
-}
-
-static void geneve_sock_release(struct geneve_dev *geneve)
-{
- struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4);
-#if IS_ENABLED(CONFIG_IPV6)
- struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6);
-
- rcu_assign_pointer(geneve->sock6, NULL);
-#endif
-
- rcu_assign_pointer(geneve->sock4, NULL);
- synchronize_net();
-
- __geneve_sock_release(gs4);
-#if IS_ENABLED(CONFIG_IPV6)
- __geneve_sock_release(gs6);
-#endif
-}
-
-static struct geneve_sock *geneve_find_sock(struct geneve_net *gn,
- sa_family_t family,
- __be16 dst_port)
-{
- struct geneve_sock *gs;
-
- list_for_each_entry(gs, &gn->sock_list, list) {
- if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
- geneve_get_sk_family(gs) == family) {
- return gs;
- }
- }
- return NULL;
-}
-
-static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
-{
- struct net *net = geneve->net;
- struct geneve_net *gn = net_generic(net, geneve_net_id);
- struct geneve_sock *gs;
- __u32 hash;
-
- gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->dst_port);
- if (gs) {
- gs->refcnt++;
- goto out;
- }
-
- gs = geneve_socket_create(net, geneve->dst_port, ipv6, geneve->flags);
- if (IS_ERR(gs))
- return PTR_ERR(gs);
-
-out:
- gs->collect_md = geneve->collect_md;
- gs->flags = geneve->flags;
-#if IS_ENABLED(CONFIG_IPV6)
- if (ipv6)
- rcu_assign_pointer(geneve->sock6, gs);
- else
-#endif
- rcu_assign_pointer(geneve->sock4, gs);
-
- hash = geneve_net_vni_hash(geneve->vni);
- hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]);
- return 0;
-}
-
-static int geneve_open(struct net_device *dev)
-{
- struct geneve_dev *geneve = netdev_priv(dev);
- bool ipv6 = geneve->remote.sa.sa_family == AF_INET6;
- bool metadata = geneve->collect_md;
- int ret = 0;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (ipv6 || metadata)
- ret = geneve_sock_add(geneve, true);
-#endif
-
- if (!ret && (!ipv6 || metadata))
- ret = geneve_sock_add(geneve, false);
- if (ret < 0)
- geneve_sock_release(geneve);
-
- return ret;
-}
-
-static int geneve_stop(struct net_device *dev)
-{
- struct geneve_dev *geneve = netdev_priv(dev);
-
- if (!hlist_unhashed(&geneve->hlist))
- hlist_del_rcu(&geneve->hlist);
- geneve_sock_release(geneve);
- return 0;
-}
-
-static void geneve_build_header(struct genevehdr *geneveh,
- __be16 tun_flags, u8 vni[3],
- u8 options_len, u8 *options)
-{
- geneveh->ver = GENEVE_VER;
- geneveh->opt_len = options_len / 4;
- geneveh->oam = !!(tun_flags & TUNNEL_OAM);
- geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT);
- geneveh->rsvd1 = 0;
- memcpy(geneveh->vni, vni, 3);
- geneveh->proto_type = htons(ETH_P_TEB);
- geneveh->rsvd2 = 0;
-
- memcpy(geneveh->options, options, options_len);
-}
-
-static int push_vlan_tag(struct sk_buff *skb)
-{
- if (skb_vlan_tag_present(skb)) {
- __be16 vlan_proto = skb->vlan_proto;
- int err;
-
- err = __vlan_insert_tag(skb, skb->vlan_proto,
- skb_vlan_tag_get(skb));
-
- if (unlikely(err))
- return err;
- skb->vlan_tci = 0;
- skb->protocol = vlan_proto;
- }
- return 0;
-}
-
-static int geneve_build_skb(struct rtable *rt, struct sk_buff *skb,
- __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
- u32 flags, bool xnet)
-{
- struct genevehdr *gnvh;
- int min_headroom;
- int err;
- bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM_TX);
-
- skb_scrub_packet(skb, xnet);
-
- min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
- + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
- + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
-
- err = skb_cow_head(skb, min_headroom);
- if (unlikely(err))
- goto free_rt;
-
- err = push_vlan_tag(skb);
- if (unlikely(err))
- goto free_rt;
-
- err = udp_tunnel_handle_offloads(skb, udp_sum);
- if (err)
- goto free_rt;
-
- gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
- geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
-
- ovs_skb_set_inner_protocol(skb, htons(ETH_P_TEB));
- return 0;
-
-free_rt:
- ip_rt_put(rt);
- return err;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static int geneve6_build_skb(struct dst_entry *dst, struct sk_buff *skb,
- __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
- u32 flags, bool xnet)
-{
- struct genevehdr *gnvh;
- int min_headroom;
- int err;
- bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM6_TX);
-
- skb_scrub_packet(skb, xnet);
-
- min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
- + GENEVE_BASE_HLEN + opt_len + sizeof(struct ipv6hdr)
- + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
-
- err = skb_cow_head(skb, min_headroom);
- if (unlikely(err))
- goto free_dst;
-
- err = push_vlan_tag(skb);
- if (unlikely(err))
- goto free_dst;
-
- err = udp_tunnel_handle_offloads(skb, udp_sum);
- if (err)
- goto free_dst;
-
- gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
- geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
-
- ovs_skb_set_inner_protocol(skb, htons(ETH_P_TEB));
- return 0;
-
-free_dst:
- dst_release(dst);
- return err;
-}
-#endif
-
-static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
- struct net_device *dev,
- struct flowi4 *fl4,
- struct ip_tunnel_info *info,
- __be16 dport, __be16 sport)
-{
- bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
- struct geneve_dev *geneve = netdev_priv(dev);
- struct dst_cache *dst_cache;
- struct rtable *rt = NULL;
- __u8 tos;
-
- if (!rcu_dereference(geneve->sock4))
- return ERR_PTR(-EIO);
-
- memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_mark = skb->mark;
- fl4->flowi4_proto = IPPROTO_UDP;
- fl4->fl4_dport = dport;
- fl4->fl4_sport = sport;
-
- if (info) {
- fl4->daddr = info->key.u.ipv4.dst;
- fl4->saddr = info->key.u.ipv4.src;
- fl4->flowi4_tos = RT_TOS(info->key.tos);
- dst_cache = &info->dst_cache;
- } else {
- tos = geneve->tos;
- if (tos == 1) {
- const struct iphdr *iip = ip_hdr(skb);
-
- tos = ip_tunnel_get_dsfield(iip, skb);
- use_cache = false;
- }
-
- fl4->flowi4_tos = RT_TOS(tos);
- fl4->daddr = geneve->remote.sin.sin_addr.s_addr;
- dst_cache = &geneve->dst_cache;
- }
-
- if (use_cache) {
- rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
- if (rt)
- return rt;
- }
-
- rt = ip_route_output_key(geneve->net, fl4);
- if (IS_ERR(rt)) {
- netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
- return ERR_PTR(-ENETUNREACH);
- }
- if (rt->dst.dev == dev) { /* is this necessary? */
- netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
- ip_rt_put(rt);
- return ERR_PTR(-ELOOP);
- }
- if (use_cache)
- dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
- return rt;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
- struct net_device *dev,
- struct flowi6 *fl6,
- struct ip_tunnel_info *info,
- __be16 dport, __be16 sport)
-{
- bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
- struct geneve_dev *geneve = netdev_priv(dev);
- struct dst_entry *dst = NULL;
- struct dst_cache *dst_cache;
- struct geneve_sock *gs6;
- __u8 prio;
-
- gs6 = rcu_dereference(geneve->sock6);
- if (!gs6)
- return ERR_PTR(-EIO);
-
- memset(fl6, 0, sizeof(*fl6));
- fl6->flowi6_mark = skb->mark;
- fl6->flowi6_proto = IPPROTO_UDP;
- fl6->fl6_dport = dport;
- fl6->fl6_sport = sport;
-
- if (info) {
- fl6->daddr = info->key.u.ipv6.dst;
- fl6->saddr = info->key.u.ipv6.src;
- fl6->flowlabel = ip6_make_flowinfo(RT_TOS(info->key.tos),
- info->key.label);
- dst_cache = &info->dst_cache;
- } else {
- prio = geneve->tos;
- if (prio == 1) {
- const struct iphdr *iip = ip_hdr(skb);
-
- prio = ip_tunnel_get_dsfield(iip, skb);
- use_cache = false;
- }
-
- fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
- geneve->label);
- fl6->daddr = geneve->remote.sin6.sin6_addr;
- dst_cache = &geneve->dst_cache;
- }
-
- if (use_cache) {
- dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
- if (dst)
- return dst;
- }
-
-#if defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) && defined(HAVE_IPV6_DST_LOOKUP_FLOW)
-#ifdef HAVE_IPV6_DST_LOOKUP_FLOW_NET
- dst = ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, fl6,
- NULL);
-#else
- dst = ipv6_stub->ipv6_dst_lookup_flow(gs6->sock->sk, fl6,
- NULL);
-#endif
- if (IS_ERR(dst)) {
-#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW_NET)
- if (ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, &dst,
- fl6)) {
-#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW)
- if (ipv6_stub->ipv6_dst_lookup_flow(gs6->sock->sk, &dst, fl6)) {
-#elif defined(HAVE_IPV6_DST_LOOKUP_NET)
- if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
-#elif defined(HAVE_IPV6_STUB)
- if (ipv6_stub->ipv6_dst_lookup(gs6->sock->sk, &dst, fl6)) {
-#else
- if (ip6_dst_lookup(gs6->sock->sk, &dst, fl6)) {
-#endif
- netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
- return ERR_PTR(-ENETUNREACH);
- }
- if (dst->dev == dev) { /* is this necessary? */
- netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr);
- dst_release(dst);
- return ERR_PTR(-ELOOP);
- }
-
- if (use_cache)
- dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
- return dst;
-}
-#endif
-
-/* Convert 64 bit tunnel ID to 24 bit VNI. */
-static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
-{
-#ifdef __BIG_ENDIAN
- vni[0] = (__force __u8)(tun_id >> 16);
- vni[1] = (__force __u8)(tun_id >> 8);
- vni[2] = (__force __u8)tun_id;
-#else
- vni[0] = (__force __u8)((__force u64)tun_id >> 40);
- vni[1] = (__force __u8)((__force u64)tun_id >> 48);
- vni[2] = (__force __u8)((__force u64)tun_id >> 56);
-#endif
-}
-
-static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
- struct ip_tunnel_info *info)
-{
- struct geneve_dev *geneve = netdev_priv(dev);
- struct geneve_sock *gs4;
- struct rtable *rt = NULL;
- const struct iphdr *iip; /* interior IP header */
- int err = -EINVAL;
- struct flowi4 fl4;
- __u8 tos, ttl;
- __be16 sport;
- __be16 df;
- bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
- u32 flags = geneve->flags;
-
- gs4 = rcu_dereference(geneve->sock4);
- if (!gs4)
- goto tx_error;
-
- if (geneve->collect_md) {
- if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
- netdev_dbg(dev, "no tunnel metadata\n");
- goto tx_error;
- }
- if (info && ip_tunnel_info_af(info) != AF_INET)
- goto tx_error;
- }
-
- sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
- rt = geneve_get_v4_rt(skb, dev, &fl4, info, geneve->dst_port, sport);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- goto tx_error;
- }
-
- skb_reset_mac_header(skb);
-
- iip = ip_hdr(skb);
-
- if (info) {
- const struct ip_tunnel_key *key = &info->key;
- u8 *opts = NULL;
- u8 vni[3];
-
- tunnel_id_to_vni(key->tun_id, vni);
- if (info->options_len)
- opts = ip_tunnel_info_opts(info);
-
- if (key->tun_flags & TUNNEL_CSUM)
- flags &= ~GENEVE_F_UDP_ZERO_CSUM_TX;
- else
- flags |= GENEVE_F_UDP_ZERO_CSUM_TX;
-
- err = geneve_build_skb(rt, skb, key->tun_flags, vni,
- info->options_len, opts, flags, xnet);
- if (unlikely(err))
- goto tx_error;
-
- tos = ip_tunnel_ecn_encap(key->tos, iip, skb);
- ttl = key->ttl;
- df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
- } else {
- err = geneve_build_skb(rt, skb, 0, geneve->vni,
- 0, NULL, flags, xnet);
- if (unlikely(err))
- goto tx_error;
-
- tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb);
- ttl = geneve->ttl;
- if (!ttl && IN_MULTICAST(ntohl(fl4.daddr)))
- ttl = 1;
- ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
- df = 0;
- }
- udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
- tos, ttl, df, sport, geneve->dst_port,
- !net_eq(geneve->net, dev_net(geneve->dev)),
- !!(flags & GENEVE_F_UDP_ZERO_CSUM_TX));
-
- return NETDEV_TX_OK;
-
-tx_error:
- dev_kfree_skb(skb);
-
- if (err == -ELOOP)
- dev->stats.collisions++;
- else if (err == -ENETUNREACH)
- dev->stats.tx_carrier_errors++;
-
- dev->stats.tx_errors++;
- return NETDEV_TX_OK;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
- struct ip_tunnel_info *info)
-{
- struct geneve_dev *geneve = netdev_priv(dev);
- struct dst_entry *dst = NULL;
- const struct iphdr *iip; /* interior IP header */
- struct geneve_sock *gs6;
- int err = -EINVAL;
- struct flowi6 fl6;
- __u8 prio, ttl;
- __be16 sport;
- __be32 label;
- bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
- u32 flags = geneve->flags;
-
- gs6 = rcu_dereference(geneve->sock6);
- if (!gs6)
- goto tx_error;
-
- if (geneve->collect_md) {
- if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
- netdev_dbg(dev, "no tunnel metadata\n");
- goto tx_error;
- }
- }
-
- sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
- dst = geneve_get_v6_dst(skb, dev, &fl6, info, geneve->dst_port, sport);
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
- goto tx_error;
- }
-
- skb_reset_mac_header(skb);
-
- iip = ip_hdr(skb);
-
- if (info) {
- const struct ip_tunnel_key *key = &info->key;
- u8 *opts = NULL;
- u8 vni[3];
-
- tunnel_id_to_vni(key->tun_id, vni);
- if (info->options_len)
- opts = ip_tunnel_info_opts(info);
-
- if (key->tun_flags & TUNNEL_CSUM)
- flags &= ~GENEVE_F_UDP_ZERO_CSUM6_TX;
- else
- flags |= GENEVE_F_UDP_ZERO_CSUM6_TX;
-
- err = geneve6_build_skb(dst, skb, key->tun_flags, vni,
- info->options_len, opts,
- flags, xnet);
- if (unlikely(err))
- goto tx_error;
-
- prio = ip_tunnel_ecn_encap(key->tos, iip, skb);
- ttl = key->ttl;
- label = info->key.label;
- } else {
- err = geneve6_build_skb(dst, skb, 0, geneve->vni,
- 0, NULL, flags, xnet);
- if (unlikely(err))
- goto tx_error;
-
- prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
- iip, skb);
- ttl = geneve->ttl;
- if (!ttl && ipv6_addr_is_multicast(&fl6.daddr))
- ttl = 1;
- ttl = ttl ? : ip6_dst_hoplimit(dst);
- label = geneve->label;
- }
- udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
- &fl6.saddr, &fl6.daddr, prio, ttl, label,
- sport, geneve->dst_port,
- !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX));
- return NETDEV_TX_OK;
-
-tx_error:
- dev_kfree_skb(skb);
-
- if (err == -ELOOP)
- dev->stats.collisions++;
- else if (err == -ENETUNREACH)
- dev->stats.tx_carrier_errors++;
-
- dev->stats.tx_errors++;
- return NETDEV_TX_OK;
-}
-#endif
-
-netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- struct geneve_dev *geneve = netdev_priv(dev);
- struct ip_tunnel_info *info = NULL;
-
- if (geneve->collect_md)
- info = skb_tunnel_info(skb);
-
-#if IS_ENABLED(CONFIG_IPV6)
- if ((info && ip_tunnel_info_af(info) == AF_INET6) ||
- (!info && geneve->remote.sa.sa_family == AF_INET6))
- return geneve6_xmit_skb(skb, dev, info);
-#endif
- return geneve_xmit_skb(skb, dev, info);
-}
-EXPORT_SYMBOL_GPL(rpl_geneve_xmit);
-
-static netdev_tx_t geneve_dev_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- /* Drop All packets coming from networking stack. OVS-CB is
- * not initialized for these packets.
- */
-
- dev_kfree_skb(skb);
- dev->stats.tx_dropped++;
- return NETDEV_TX_OK;
-}
-
-static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
-{
- struct geneve_dev *geneve = netdev_priv(dev);
- /* The max_mtu calculation does not take account of GENEVE
- * options, to avoid excluding potentially valid
- * configurations.
- */
- int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
-
- if (geneve->remote.sa.sa_family == AF_INET6)
- max_mtu -= sizeof(struct ipv6hdr);
- else
- max_mtu -= sizeof(struct iphdr);
-
- if (new_mtu < 68)
- return -EINVAL;
-
- if (new_mtu > max_mtu) {
- if (strict)
- return -EINVAL;
-
- new_mtu = max_mtu;
- }
-
- dev->mtu = new_mtu;
- return 0;
-}
-
-static int geneve_change_mtu(struct net_device *dev, int new_mtu)
-{
- return __geneve_change_mtu(dev, new_mtu, true);
-}
-
-int ovs_geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
-{
- struct ip_tunnel_info *info = skb_tunnel_info(skb);
- struct geneve_dev *geneve = netdev_priv(dev);
- struct rtable *rt;
- struct flowi4 fl4;
- __be16 sport;
-#if IS_ENABLED(CONFIG_IPV6)
- struct dst_entry *dst;
- struct flowi6 fl6;
-#endif
-
- sport = udp_flow_src_port(geneve->net, skb,
- 1, USHRT_MAX, true);
-
- if (ip_tunnel_info_af(info) == AF_INET) {
- rt = geneve_get_v4_rt(skb, dev, &fl4, info, geneve->dst_port, sport);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
-
- ip_rt_put(rt);
- info->key.u.ipv4.src = fl4.saddr;
-#if IS_ENABLED(CONFIG_IPV6)
- } else if (ip_tunnel_info_af(info) == AF_INET6) {
- dst = geneve_get_v6_dst(skb, dev, &fl6, info, geneve->dst_port, sport);
- if (IS_ERR(dst))
- return PTR_ERR(dst);
-
- dst_release(dst);
- info->key.u.ipv6.src = fl6.saddr;
-#endif
- } else {
- return -EINVAL;
- }
-
- info->key.tp_src = sport;
- info->key.tp_dst = geneve->dst_port;
- return 0;
-}
-EXPORT_SYMBOL_GPL(ovs_geneve_fill_metadata_dst);
-
-static const struct net_device_ops geneve_netdev_ops = {
- .ndo_init = geneve_init,
- .ndo_uninit = geneve_uninit,
- .ndo_open = geneve_open,
- .ndo_stop = geneve_stop,
- .ndo_start_xmit = geneve_dev_xmit,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
-#ifdef HAVE_RHEL7_MAX_MTU
- .ndo_size = sizeof(struct net_device_ops),
- .extended.ndo_change_mtu = geneve_change_mtu,
-#else
- .ndo_change_mtu = geneve_change_mtu,
-#endif
- .ndo_validate_addr = eth_validate_addr,
- .ndo_set_mac_address = eth_mac_addr,
-#ifdef HAVE_NDO_FILL_METADATA_DST
- .ndo_fill_metadata_dst = geneve_fill_metadata_dst,
-#endif
-};
-
-static void geneve_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *drvinfo)
-{
- strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
- strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
-}
-
-static const struct ethtool_ops geneve_ethtool_ops = {
- .get_drvinfo = geneve_get_drvinfo,
- .get_link = ethtool_op_get_link,
-};
-
-/* Info for udev, that this is a virtual tunnel endpoint */
-static struct device_type geneve_type = {
- .name = "geneve",
-};
-
-/* Calls the ndo_add_geneve_port or ndo_udp_tunnel_add of the caller
- * in order to supply the listening GENEVE udp ports. Callers are
- * expected to implement the ndo_add_geneve_port.
- */
-static void geneve_push_rx_ports(struct net_device *dev)
-{
-#ifdef HAVE_NDO_ADD_GENEVE_PORT
- struct net *net = dev_net(dev);
- struct geneve_net *gn = net_generic(net, geneve_net_id);
- struct geneve_sock *gs;
- sa_family_t sa_family;
- struct sock *sk;
- __be16 port;
-
- if (!dev->netdev_ops->ndo_add_geneve_port)
- return;
-
- rcu_read_lock();
- list_for_each_entry_rcu(gs, &gn->sock_list, list) {
- sk = gs->sock->sk;
- sa_family = sk->sk_family;
- port = inet_sk(sk)->inet_sport;
- dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, port);
- }
- rcu_read_unlock();
-#elif defined(HAVE_NDO_UDP_TUNNEL_ADD)
- struct net *net = dev_net(dev);
- struct geneve_net *gn = net_generic(net, geneve_net_id);
- struct geneve_sock *gs;
- struct sock *sk;
-
- if (!dev->netdev_ops->ndo_udp_tunnel_add)
- return;
-
- rcu_read_lock();
- list_for_each_entry_rcu(gs, &gn->sock_list, list) {
- struct udp_tunnel_info ti;
- ti.type = UDP_TUNNEL_TYPE_GENEVE;
- sk = gs->sock->sk;
- ti.port = inet_sk(sk)->inet_sport;
- ti.sa_family = sk->sk_family;
- dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
- }
- rcu_read_unlock();
-#endif
-}
-
-/* Initialize the device structure. */
-static void geneve_setup(struct net_device *dev)
-{
- ether_setup(dev);
-
- dev->netdev_ops = &geneve_netdev_ops;
- dev->ethtool_ops = &geneve_ethtool_ops;
-#ifndef HAVE_NEEDS_FREE_NETDEV
- dev->destructor = free_netdev;
-#else
- dev->needs_free_netdev = true;
-#endif
-
- SET_NETDEV_DEVTYPE(dev, &geneve_type);
-
- dev->features |= NETIF_F_LLTX;
- dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
- dev->features |= NETIF_F_RXCSUM;
- dev->features |= NETIF_F_GSO_SOFTWARE;
-
- dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
-
-#if 0
- netif_keep_dst(dev);
-#endif
- dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
- eth_hw_addr_random(dev);
-}
-
-static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
- [IFLA_GENEVE_ID] = { .type = NLA_U32 },
- [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
- [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) },
- [IFLA_GENEVE_TTL] = { .type = NLA_U8 },
- [IFLA_GENEVE_TOS] = { .type = NLA_U8 },
- [IFLA_GENEVE_LABEL] = { .type = NLA_U32 },
- [IFLA_GENEVE_PORT] = { .type = NLA_U16 },
- [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG },
- [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 },
- [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
- [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
-};
-
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
-static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int geneve_validate(struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- if (tb[IFLA_ADDRESS]) {
- if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
- return -EINVAL;
-
- if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
- return -EADDRNOTAVAIL;
- }
-
- if (!data)
- return -EINVAL;
-
- if (data[IFLA_GENEVE_ID]) {
- __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
-
- if (vni >= GENEVE_VID_MASK)
- return -ERANGE;
- }
-
- return 0;
-}
-
-static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
- __be16 dst_port,
- union geneve_addr *remote,
- u8 vni[],
- bool *tun_on_same_port,
- bool *tun_collect_md)
-{
- struct geneve_dev *geneve, *t;
-
- *tun_on_same_port = false;
- *tun_collect_md = false;
- t = NULL;
- list_for_each_entry(geneve, &gn->geneve_list, next) {
- if (geneve->dst_port == dst_port) {
- *tun_collect_md = geneve->collect_md;
- *tun_on_same_port = true;
- }
- if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) &&
- !memcmp(remote, &geneve->remote, sizeof(geneve->remote)) &&
- dst_port == geneve->dst_port)
- t = geneve;
- }
- return t;
-}
-
-static int geneve_configure(struct net *net, struct net_device *dev,
- union geneve_addr *remote,
- __u32 vni, __u8 ttl, __u8 tos, __be32 label,
- __be16 dst_port, bool metadata, u32 flags)
-{
- struct geneve_net *gn = net_generic(net, geneve_net_id);
- struct geneve_dev *t, *geneve = netdev_priv(dev);
- bool tun_collect_md, tun_on_same_port;
- int err, encap_len;
-
- if (!remote)
- return -EINVAL;
- if (metadata &&
- (remote->sa.sa_family != AF_UNSPEC || vni || tos || ttl || label))
- return -EINVAL;
-
- geneve->net = net;
- geneve->dev = dev;
-
- geneve->vni[0] = (vni & 0x00ff0000) >> 16;
- geneve->vni[1] = (vni & 0x0000ff00) >> 8;
- geneve->vni[2] = vni & 0x000000ff;
-
- if ((remote->sa.sa_family == AF_INET &&
- IN_MULTICAST(ntohl(remote->sin.sin_addr.s_addr))) ||
- (remote->sa.sa_family == AF_INET6 &&
- ipv6_addr_is_multicast(&remote->sin6.sin6_addr)))
- return -EINVAL;
- if (label && remote->sa.sa_family != AF_INET6)
- return -EINVAL;
-
- geneve->remote = *remote;
-
- geneve->ttl = ttl;
- geneve->tos = tos;
- geneve->label = label;
- geneve->dst_port = dst_port;
- geneve->collect_md = metadata;
- geneve->flags = flags;
-
- t = geneve_find_dev(gn, dst_port, remote, geneve->vni,
- &tun_on_same_port, &tun_collect_md);
- if (t)
- return -EBUSY;
-
- /* make enough headroom for basic scenario */
- encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
- if (remote->sa.sa_family == AF_INET)
- encap_len += sizeof(struct iphdr);
- else
- encap_len += sizeof(struct ipv6hdr);
- dev->needed_headroom = encap_len + ETH_HLEN;
-
- if (metadata) {
- if (tun_on_same_port)
- return -EPERM;
- } else {
- if (tun_collect_md)
- return -EPERM;
- }
-
- dst_cache_reset(&geneve->dst_cache);
-
- err = register_netdevice(dev);
- if (err)
- return err;
-
- list_add(&geneve->next, &gn->geneve_list);
- return 0;
-}
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
-static int geneve_newlink(struct net *net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int geneve_newlink(struct net *net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- __be16 dst_port = htons(GENEVE_UDP_PORT);
- __u8 ttl = 0, tos = 0;
- bool metadata = false;
- union geneve_addr remote = geneve_remote_unspec;
- __be32 label = 0;
- __u32 vni = 0;
- u32 flags = 0;
-
- if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6])
- return -EINVAL;
-
- if (data[IFLA_GENEVE_REMOTE]) {
- remote.sa.sa_family = AF_INET;
- remote.sin.sin_addr.s_addr =
- nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
- }
-
- if (data[IFLA_GENEVE_REMOTE6]) {
- if (!IS_ENABLED(CONFIG_IPV6))
- return -EPFNOSUPPORT;
-
- remote.sa.sa_family = AF_INET6;
- remote.sin6.sin6_addr =
- nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]);
-
- if (ipv6_addr_type(&remote.sin6.sin6_addr) &
- IPV6_ADDR_LINKLOCAL) {
- netdev_dbg(dev, "link-local remote is unsupported\n");
- return -EINVAL;
- }
- }
-
- if (data[IFLA_GENEVE_ID])
- vni = nla_get_u32(data[IFLA_GENEVE_ID]);
-
- if (data[IFLA_GENEVE_TTL])
- ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
-
- if (data[IFLA_GENEVE_TOS])
- tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
-
- if (data[IFLA_GENEVE_LABEL])
- label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
- IPV6_FLOWLABEL_MASK;
-
- if (data[IFLA_GENEVE_PORT])
- dst_port = nla_get_be16(data[IFLA_GENEVE_PORT]);
-
- if (data[IFLA_GENEVE_COLLECT_METADATA])
- metadata = true;
-
- if (data[IFLA_GENEVE_UDP_CSUM] &&
- !nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
- flags |= GENEVE_F_UDP_ZERO_CSUM_TX;
-
- if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] &&
- nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
- flags |= GENEVE_F_UDP_ZERO_CSUM6_TX;
-
- if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] &&
- nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
- flags |= GENEVE_F_UDP_ZERO_CSUM6_RX;
-
- return geneve_configure(net, dev, &remote, vni, ttl, tos, label,
- dst_port, metadata, flags);
-}
-
-static void geneve_dellink(struct net_device *dev, struct list_head *head)
-{
- struct geneve_dev *geneve = netdev_priv(dev);
-
- list_del(&geneve->next);
- unregister_netdevice_queue(dev, head);
-}
-
-static size_t geneve_get_size(const struct net_device *dev)
-{
- return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */
- nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
- nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
- nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
- nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */
- nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
- nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
- nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */
- nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
- nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
- 0;
-}
-
-static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
-{
- struct geneve_dev *geneve = netdev_priv(dev);
- __u32 vni;
-
- vni = (geneve->vni[0] << 16) | (geneve->vni[1] << 8) | geneve->vni[2];
- if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
- goto nla_put_failure;
-
- if (geneve->remote.sa.sa_family == AF_INET) {
- if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
- geneve->remote.sin.sin_addr.s_addr))
- goto nla_put_failure;
-#if IS_ENABLED(CONFIG_IPV6)
- } else {
- if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
- &geneve->remote.sin6.sin6_addr))
- goto nla_put_failure;
-#endif
- }
-
- if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) ||
- nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos) ||
- nla_put_be32(skb, IFLA_GENEVE_LABEL, geneve->label))
- goto nla_put_failure;
-
- if (nla_put_be16(skb, IFLA_GENEVE_PORT, geneve->dst_port))
- goto nla_put_failure;
-
- if (geneve->collect_md) {
- if (nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA))
- goto nla_put_failure;
- }
-
- if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
- !(geneve->flags & GENEVE_F_UDP_ZERO_CSUM_TX)) ||
- nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
- !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_TX)) ||
- nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
- !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_RX)))
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-static struct rtnl_link_ops geneve_link_ops __read_mostly = {
- .kind = "ovs_geneve",
- .maxtype = IFLA_GENEVE_MAX,
- .policy = geneve_policy,
- .priv_size = sizeof(struct geneve_dev),
- .setup = geneve_setup,
- .validate = geneve_validate,
- .newlink = geneve_newlink,
- .dellink = geneve_dellink,
- .get_size = geneve_get_size,
- .fill_info = geneve_fill_info,
-};
-
-struct net_device *rpl_geneve_dev_create_fb(struct net *net, const char *name,
- u8 name_assign_type, u16 dst_port)
-{
- struct nlattr *tb[IFLA_MAX + 1];
- struct net_device *dev;
- LIST_HEAD(list_kill);
- int err;
-
- memset(tb, 0, sizeof(tb));
- dev = rtnl_create_link(net, name, name_assign_type,
- &geneve_link_ops, tb);
- if (IS_ERR(dev))
- return dev;
-
- err = geneve_configure(net, dev, &geneve_remote_unspec,
- 0, 0, 0, 0, htons(dst_port), true,
- GENEVE_F_UDP_ZERO_CSUM6_RX);
- if (err) {
- free_netdev(dev);
- return ERR_PTR(err);
- }
-
- /* openvswitch users expect packet sizes to be unrestricted,
- * so set the largest MTU we can.
- */
- err = __geneve_change_mtu(dev, IP_MAX_MTU, false);
- if (err)
- goto err;
-
- err = rtnl_configure_link(dev, NULL);
- if (err < 0)
- goto err;
-
- return dev;
-
- err:
- geneve_dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
- return ERR_PTR(err);
-}
-EXPORT_SYMBOL_GPL(rpl_geneve_dev_create_fb);
-
-static int geneve_netdevice_event(struct notifier_block *unused,
- unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
- if (event == NETDEV_OFFLOAD_PUSH_GENEVE)
- geneve_push_rx_ports(dev);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block geneve_notifier_block __read_mostly = {
- .notifier_call = geneve_netdevice_event,
-};
-
-static __net_init int geneve_init_net(struct net *net)
-{
- struct geneve_net *gn = net_generic(net, geneve_net_id);
-
- INIT_LIST_HEAD(&gn->geneve_list);
- INIT_LIST_HEAD(&gn->sock_list);
- return 0;
-}
-
-static void __net_exit geneve_exit_net(struct net *net)
-{
- struct geneve_net *gn = net_generic(net, geneve_net_id);
- struct geneve_dev *geneve, *next;
- struct net_device *dev, *aux;
- LIST_HEAD(list);
-
- rtnl_lock();
-
- /* gather any geneve devices that were moved into this ns */
- for_each_netdev_safe(net, dev, aux)
- if (dev->rtnl_link_ops == &geneve_link_ops)
- unregister_netdevice_queue(dev, &list);
-
- /* now gather any other geneve devices that were created in this ns */
- list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
- /* If geneve->dev is in the same netns, it was already added
- * to the list by the previous loop.
- */
- if (!net_eq(dev_net(geneve->dev), net))
- unregister_netdevice_queue(geneve->dev, &list);
- }
-
- /* unregister the devices gathered above */
- unregister_netdevice_many(&list);
- rtnl_unlock();
-}
-
-static struct pernet_operations geneve_net_ops = {
- .init = geneve_init_net,
- .exit = geneve_exit_net,
- .id = &geneve_net_id,
- .size = sizeof(struct geneve_net),
-};
-
-int rpl_geneve_init_module(void)
-{
- int rc;
-
- rc = register_pernet_subsys(&geneve_net_ops);
- if (rc)
- goto out1;
-
- rc = register_netdevice_notifier(&geneve_notifier_block);
- if (rc)
- goto out2;
-
- rc = rtnl_link_register(&geneve_link_ops);
- if (rc)
- goto out3;
-
- pr_info("Geneve tunneling driver\n");
- return 0;
-
-out3:
- unregister_netdevice_notifier(&geneve_notifier_block);
-out2:
- unregister_pernet_subsys(&geneve_net_ops);
-out1:
- pr_err("Error while initializing GENEVE %d\n", rc);
- return rc;
-}
-
-void rpl_geneve_cleanup_module(void)
-{
- rtnl_link_unregister(&geneve_link_ops);
- unregister_netdevice_notifier(&geneve_notifier_block);
- unregister_pernet_subsys(&geneve_net_ops);
-}
-
-#endif
diff --git a/datapath/linux/compat/gre.c b/datapath/linux/compat/gre.c
deleted file mode 100644
index e57528f80..000000000
--- a/datapath/linux/compat/gre.c
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (c) 2007-2013 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#include <linux/version.h>
-#include <linux/kconfig.h>
-#include <linux/module.h>
-#include <linux/if.h>
-#include <linux/if_tunnel.h>
-#include <linux/icmp.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/kernel.h>
-#include <linux/kmod.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-
-#include <net/gre.h>
-#include <net/icmp.h>
-#include <net/protocol.h>
-#include <net/route.h>
-#include <net/xfrm.h>
-
-#include "gso.h"
-
-#ifndef USE_UPSTREAM_TUNNEL
-#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX)
-
-static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
-
-int rpl_gre_add_protocol(const struct gre_protocol *proto, u8 version)
-{
- if (version >= GREPROTO_MAX)
- return -EINVAL;
-
- return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ?
- 0 : -EBUSY;
-}
-EXPORT_SYMBOL_GPL(rpl_gre_add_protocol);
-
-int rpl_gre_del_protocol(const struct gre_protocol *proto, u8 version)
-{
- int ret;
-
- if (version >= GREPROTO_MAX)
- return -EINVAL;
-
- ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ?
- 0 : -EBUSY;
-
- if (ret)
- return ret;
-
- synchronize_rcu();
- return 0;
-}
-EXPORT_SYMBOL_GPL(rpl_gre_del_protocol);
-
-static int gre_rcv(struct sk_buff *skb)
-{
- const struct gre_protocol *proto;
- u8 ver;
- int ret;
-
- if (!pskb_may_pull(skb, 12))
- goto drop;
-
- ver = skb->data[1]&0x7f;
- if (ver >= GREPROTO_MAX)
- goto drop;
-
- rcu_read_lock();
- proto = rcu_dereference(gre_proto[ver]);
- if (!proto || !proto->handler)
- goto drop_unlock;
- ret = proto->handler(skb);
- rcu_read_unlock();
- return ret;
-
-drop_unlock:
- rcu_read_unlock();
-drop:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-static void gre_err(struct sk_buff *skb, u32 info)
-{
- const struct gre_protocol *proto;
- const struct iphdr *iph = (const struct iphdr *)skb->data;
- u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f;
-
- if (ver >= GREPROTO_MAX)
- return;
-
- rcu_read_lock();
- proto = rcu_dereference(gre_proto[ver]);
- if (proto && proto->err_handler)
- proto->err_handler(skb, info);
- rcu_read_unlock();
-}
-
-static const struct net_protocol net_gre_protocol = {
- .handler = gre_rcv,
- .err_handler = gre_err,
- .netns_ok = 1,
-};
-
-int rpl_gre_init(void)
-{
- pr_info("GRE over IPv4 demultiplexor driver\n");
-
- if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
- pr_err("can't add protocol\n");
- return -EEXIST;
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(rpl_gre_init);
-
-void rpl_gre_exit(void)
-{
- inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
-}
-EXPORT_SYMBOL_GPL(rpl_gre_exit);
-
-void rpl_gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
- int hdr_len)
-{
- struct gre_base_hdr *greh;
-
- skb_push(skb, hdr_len);
-
- skb_reset_transport_header(skb);
- greh = (struct gre_base_hdr *)skb->data;
- greh->flags = tnl_flags_to_gre_flags(tpi->flags);
- greh->protocol = tpi->proto;
-
- if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
- __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
-
- if (tpi->flags&TUNNEL_SEQ) {
- *ptr = tpi->seq;
- ptr--;
- }
- if (tpi->flags&TUNNEL_KEY) {
- *ptr = tpi->key;
- ptr--;
- }
- if (tpi->flags&TUNNEL_CSUM &&
- !(skb_shinfo(skb)->gso_type &
- (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
- *ptr = 0;
- *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
- skb->len, 0));
- }
- }
-}
-EXPORT_SYMBOL_GPL(rpl_gre_build_header);
-
-/* Fills in tpi and returns header length to be pulled. */
-int rpl_gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
- bool *csum_err, __be16 proto, int nhs)
-{
- const struct gre_base_hdr *greh;
- __be32 *options;
- int hdr_len;
-
- if (unlikely(!pskb_may_pull(skb, nhs + sizeof(struct gre_base_hdr))))
- return -EINVAL;
-
- greh = (struct gre_base_hdr *)(skb->data + nhs);
- if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
- return -EINVAL;
-
- tpi->flags = gre_flags_to_tnl_flags(greh->flags);
- hdr_len = gre_calc_hlen(tpi->flags);
-
- if (!pskb_may_pull(skb, nhs + hdr_len))
- return -EINVAL;
-
- greh = (struct gre_base_hdr *)(skb->data + nhs);
- tpi->proto = greh->protocol;
-
- options = (__be32 *)(greh + 1);
- if (greh->flags & GRE_CSUM) {
- if (skb_checksum_simple_validate(skb)) {
- *csum_err = true;
- return -EINVAL;
- }
-
- skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
- null_compute_pseudo);
- options++;
- }
-
- if (greh->flags & GRE_KEY) {
- tpi->key = *options;
- options++;
- } else {
- tpi->key = 0;
- }
- if (unlikely(greh->flags & GRE_SEQ)) {
- tpi->seq = *options;
- options++;
- } else {
- tpi->seq = 0;
- }
- /* WCCP version 1 and 2 protocol decoding.
- * - Change protocol to IPv4/IPv6
- * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
- */
- if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
- tpi->proto = proto;
- if ((*(u8 *)options & 0xF0) != 0x40)
- hdr_len += 4;
- }
- tpi->hdr_len = hdr_len;
- return hdr_len;
-}
-EXPORT_SYMBOL(rpl_gre_parse_header);
-
-#endif /* CONFIG_NET_IPGRE_DEMUX */
-#endif /* USE_UPSTREAM_TUNNEL */
diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c
deleted file mode 100644
index 65da5d876..000000000
--- a/datapath/linux/compat/gso.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Copyright (c) 2007-2013 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#include <linux/version.h>
-
-#include <linux/module.h>
-#include <linux/if.h>
-#include <linux/if_tunnel.h>
-#include <linux/if_vlan.h>
-#include <linux/icmp.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/kernel.h>
-#include <linux/kmod.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-
-#include <net/gre.h>
-#include <net/icmp.h>
-#include <net/mpls.h>
-#include <net/protocol.h>
-#include <net/route.h>
-#include <net/xfrm.h>
-
-#include "gso.h"
-
-#ifdef OVS_USE_COMPAT_GSO_SEGMENTATION
-/* Strictly this is not needed and will be optimised out
- * as this code is guarded by if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0).
- * It is here to make things explicit should the compatibility
- * code be extended in some way prior extending its life-span
- * beyond v3.19.
- */
-static bool supports_mpls_gso(void)
-{
-/* MPLS GSO was introduced in v3.11, however it was not correctly
- * activated using mpls_features until v3.19. */
-#ifdef OVS_USE_COMPAT_GSO_SEGMENTATION
- return true;
-#else
- return false;
-#endif
-}
-
-int rpl_dev_queue_xmit(struct sk_buff *skb)
-{
-#undef dev_queue_xmit
- int err = -ENOMEM;
- bool mpls;
-
- mpls = false;
-
- /* Avoid traversing any VLAN tags that are present to determine if
- * the ethtype is MPLS. Instead compare the mac_len (end of L2) and
- * skb_network_offset() (beginning of L3) whose inequality will
- * indicate the presence of an MPLS label stack. */
- if (skb->mac_len != skb_network_offset(skb) && !supports_mpls_gso())
- mpls = true;
-
- if (mpls) {
- int features;
-
- features = netif_skb_features(skb);
-
- /* As of v3.11 the kernel provides an mpls_features field in
- * struct net_device which allows devices to advertise which
- * features its supports for MPLS. This value defaults to
- * NETIF_F_SG and as of v3.19.
- *
- * This compatibility code is intended for kernels older
- * than v3.19 that do not support MPLS GSO and do not
- * use mpls_features. Thus this code uses NETIF_F_SG
- * directly in place of mpls_features.
- */
- if (mpls)
- features &= NETIF_F_SG;
-
- if (netif_needs_gso(skb, features)) {
- struct sk_buff *nskb;
-
- nskb = skb_gso_segment(skb, features);
- if (!nskb) {
- if (unlikely(skb_cloned(skb) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
- goto drop;
-
- skb_shinfo(skb)->gso_type &= ~SKB_GSO_DODGY;
- goto xmit;
- }
-
- if (IS_ERR(nskb)) {
- err = PTR_ERR(nskb);
- goto drop;
- }
- consume_skb(skb);
- skb = nskb;
-
- do {
- nskb = skb->next;
- skb->next = NULL;
- err = dev_queue_xmit(skb);
- skb = nskb;
- } while (skb);
-
- return err;
- }
- }
-xmit:
- return dev_queue_xmit(skb);
-
-drop:
- kfree_skb(skb);
- return err;
-}
-EXPORT_SYMBOL_GPL(rpl_dev_queue_xmit);
-#endif /* OVS_USE_COMPAT_GSO_SEGMENTATION */
-
-#ifndef USE_UPSTREAM_TUNNEL_GSO
-static __be16 __skb_network_protocol(struct sk_buff *skb)
-{
- __be16 type = skb->protocol;
- int vlan_depth = ETH_HLEN;
-
- while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vh;
-
- if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
- return 0;
-
- vh = (struct vlan_hdr *)(skb->data + vlan_depth);
- type = vh->h_vlan_encapsulated_proto;
- vlan_depth += VLAN_HLEN;
- }
-
- if (eth_p_mpls(type))
- type = ovs_skb_get_inner_protocol(skb);
-
- return type;
-}
-
-static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
- netdev_features_t features,
- bool tx_path,
- sa_family_t sa_family)
-{
- void *iph = skb_network_header(skb);
- int pkt_hlen = skb_inner_network_offset(skb); /* inner l2 + tunnel hdr. */
- int mac_offset = skb_inner_mac_offset(skb);
- int outer_l3_offset = skb_network_offset(skb);
- int outer_l4_offset = skb_transport_offset(skb);
- struct sk_buff *skb1 = skb;
- struct dst_entry *dst = skb_dst(skb);
- struct sk_buff *segs;
- __be16 proto = skb->protocol;
- char cb[sizeof(skb->cb)];
-
- BUILD_BUG_ON(sizeof(struct ovs_gso_cb) > sizeof_field(struct sk_buff, cb));
- OVS_GSO_CB(skb)->ipv6 = (sa_family == AF_INET6);
- /* setup whole inner packet to get protocol. */
- __skb_pull(skb, mac_offset);
- skb->protocol = __skb_network_protocol(skb);
-
- /* setup l3 packet to gso, to get around segmentation bug on older kernel.*/
- __skb_pull(skb, (pkt_hlen - mac_offset));
- skb_reset_mac_header(skb);
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
-
- /* From 3.9 kernel skb->cb is used by skb gso. Therefore
- * make copy of it to restore it back. */
- memcpy(cb, skb->cb, sizeof(cb));
-
- /* We are handling offloads by segmenting l3 packet, so
- * no need to call OVS compat segmentation function. */
-
-#ifdef HAVE___SKB_GSO_SEGMENT
-#undef __skb_gso_segment
- segs = __skb_gso_segment(skb, 0, tx_path);
-#else
-#undef skb_gso_segment
- segs = skb_gso_segment(skb, 0);
-#endif
-
- if (!segs || IS_ERR(segs))
- goto free;
-
- skb = segs;
- while (skb) {
- __skb_push(skb, pkt_hlen);
- skb_reset_mac_header(skb);
- skb_set_network_header(skb, outer_l3_offset);
- skb_set_transport_header(skb, outer_l4_offset);
- skb->mac_len = 0;
-
- memcpy(skb_network_header(skb), iph, pkt_hlen);
- memcpy(skb->cb, cb, sizeof(cb));
-
- skb->protocol = proto;
- if (skb->next)
- dst = dst_clone(dst);
-
- skb_dst_set(skb, dst);
- OVS_GSO_CB(skb)->fix_segment(skb);
-
- skb = skb->next;
- }
-free:
- consume_skb(skb1);
- return segs;
-}
-
-static int output_ip(struct sk_buff *skb)
-{
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-
-#undef ip_local_out
- return ip_local_out(skb);
-}
-
-int rpl_ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- if (!OVS_GSO_CB(skb)->fix_segment)
- return output_ip(skb);
-
- /* This bit set can confuse some drivers on old kernel. */
- skb->encapsulation = 0;
-
- if (skb_is_gso(skb)) {
- int ret;
- int id;
-
- skb = tnl_skb_gso_segment(skb, 0, false, AF_INET);
- if (!skb || IS_ERR(skb))
- return NET_XMIT_DROP;
-
- id = ntohs(ip_hdr(skb)->id);
- do {
- struct sk_buff *next_skb = skb->next;
-
- skb->next = NULL;
- ip_hdr(skb)->id = htons(id++);
-
- ret = output_ip(skb);
- skb = next_skb;
- } while (skb);
- return ret;
- } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
- int err;
-
- err = skb_checksum_help(skb);
- if (unlikely(err))
- return NET_XMIT_DROP;
- }
-
- return output_ip(skb);
-}
-EXPORT_SYMBOL_GPL(rpl_ip_local_out);
-
-static int output_ipv6(struct sk_buff *skb)
-{
- memset(IP6CB(skb), 0, sizeof (*IP6CB(skb)));
-#undef ip6_local_out
- return ip6_local_out(skb);
-}
-
-int rpl_ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- if (!OVS_GSO_CB(skb)->fix_segment)
- return output_ipv6(skb);
-
- /* This bit set can confuse some drivers on old kernel. */
- skb->encapsulation = 0;
-
- if (skb_is_gso(skb)) {
- int ret;
-
- skb = tnl_skb_gso_segment(skb, 0, false, AF_INET6);
- if (!skb || IS_ERR(skb))
- return NET_XMIT_DROP;
-
- do {
- struct sk_buff *next_skb = skb->next;
-
- skb->next = NULL;
- ret = output_ipv6(skb);
- skb = next_skb;
- } while (skb);
- return ret;
- } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
- int err;
-
- err = skb_checksum_help(skb);
- if (unlikely(err))
- return NET_XMIT_DROP;
- }
-
- return output_ipv6(skb);
-}
-EXPORT_SYMBOL_GPL(rpl_ip6_local_out);
-#endif /* USE_UPSTREAM_TUNNEL_GSO */
diff --git a/datapath/linux/compat/gso.h b/datapath/linux/compat/gso.h
deleted file mode 100644
index 20109406a..000000000
--- a/datapath/linux/compat/gso.h
+++ /dev/null
@@ -1,214 +0,0 @@
-#ifndef __LINUX_GSO_WRAPPER_H
-#define __LINUX_GSO_WRAPPER_H
-
-#include <linux/version.h>
-#include "datapath.h"
-
-typedef void (*gso_fix_segment_t)(struct sk_buff *);
-
-struct ovs_gso_cb {
- struct ovs_skb_cb dp_cb;
-#ifndef USE_UPSTREAM_TUNNEL
- struct metadata_dst *tun_dst;
-#endif
-#ifndef USE_UPSTREAM_TUNNEL_GSO
- gso_fix_segment_t fix_segment;
- bool ipv6;
-#endif
-#ifndef HAVE_INNER_PROTOCOL
- __be16 inner_protocol;
-#endif
-#ifndef USE_UPSTREAM_TUNNEL
- /* Keep original tunnel info during userspace action execution. */
- struct metadata_dst *fill_md_dst;
-#endif
-};
-#define OVS_GSO_CB(skb) ((struct ovs_gso_cb *)(skb)->cb)
-
-
-#ifndef USE_UPSTREAM_TUNNEL_GSO
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/protocol.h>
-
-static inline void skb_clear_ovs_gso_cb(struct sk_buff *skb)
-{
- OVS_GSO_CB(skb)->fix_segment = NULL;
-#ifndef USE_UPSTREAM_TUNNEL
- OVS_GSO_CB(skb)->tun_dst = NULL;
-#endif
-}
-#else
-static inline void skb_clear_ovs_gso_cb(struct sk_buff *skb)
-{
-#ifndef USE_UPSTREAM_TUNNEL
- OVS_GSO_CB(skb)->tun_dst = NULL;
-#endif
-}
-#endif
-
-#ifndef HAVE_INNER_PROTOCOL
-static inline void ovs_skb_init_inner_protocol(struct sk_buff *skb)
-{
- OVS_GSO_CB(skb)->inner_protocol = htons(0);
-}
-
-static inline void ovs_skb_set_inner_protocol(struct sk_buff *skb,
- __be16 ethertype)
-{
- OVS_GSO_CB(skb)->inner_protocol = ethertype;
-}
-
-static inline __be16 ovs_skb_get_inner_protocol(struct sk_buff *skb)
-{
- return OVS_GSO_CB(skb)->inner_protocol;
-}
-
-#else
-
-static inline void ovs_skb_init_inner_protocol(struct sk_buff *skb)
-{
- /* Nothing to do. The inner_protocol is either zero or
- * has been set to a value by another user.
- * Either way it may be considered initialised.
- */
-}
-
-static inline __be16 ovs_skb_get_inner_protocol(struct sk_buff *skb)
-{
- return skb->inner_protocol;
-}
-
-#ifdef ENCAP_TYPE_ETHER
-#define ovs_skb_set_inner_protocol skb_set_inner_protocol
-#else
-static inline void ovs_skb_set_inner_protocol(struct sk_buff *skb,
- __be16 ethertype)
-{
- skb->inner_protocol = ethertype;
-}
-#endif /* ENCAP_TYPE_ETHER */
-#endif /* HAVE_INNER_PROTOCOL */
-
-#define skb_inner_mac_offset rpl_skb_inner_mac_offset
-static inline int skb_inner_mac_offset(const struct sk_buff *skb)
-{
- return skb_inner_mac_header(skb) - skb->data;
-}
-
-#ifndef USE_UPSTREAM_TUNNEL_GSO
-#define ip_local_out rpl_ip_local_out
-int rpl_ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
-
-#define ip6_local_out rpl_ip6_local_out
-int rpl_ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
-#else
-
-static inline int rpl_ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-#ifdef HAVE_IP_LOCAL_OUT_TAKES_NET
- /* net and sk parameters are added at same time. */
- return ip_local_out(net, sk, skb);
-#else
- return ip_local_out(skb);
-#endif
-}
-#define ip_local_out rpl_ip_local_out
-
-static inline int rpl_ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- memset(IP6CB(skb), 0, sizeof (*IP6CB(skb)));
-#ifdef HAVE_IP_LOCAL_OUT_TAKES_NET
- return ip6_local_out(net, sk, skb);
-#else
- return ip6_local_out(skb);
-#endif
-}
-#define ip6_local_out rpl_ip6_local_out
-
-#endif /* USE_UPSTREAM_TUNNEL_GSO */
-
-#ifndef USE_UPSTREAM_TUNNEL
-/* We need two separate functions to manage different dst in this case.
- * First is dst_entry and second is tunnel-dst.
- * So define ovs_* separate functions for tun_dst.
- */
-static inline void ovs_skb_dst_set(struct sk_buff *skb, void *dst)
-{
- OVS_GSO_CB(skb)->tun_dst = (void *)dst;
-}
-
-static inline struct ip_tunnel_info *ovs_skb_tunnel_info(struct sk_buff *skb)
-{
- if (likely(OVS_GSO_CB(skb)->tun_dst))
- return &OVS_GSO_CB(skb)->tun_dst->u.tun_info;
- else
- return NULL;
-}
-
-static inline void ovs_skb_dst_drop(struct sk_buff *skb)
-{
- OVS_GSO_CB(skb)->tun_dst = NULL;
-}
-
-static inline void ovs_dst_hold(void *dst)
-{
-}
-
-static inline void ovs_dst_release(struct dst_entry *dst)
-{
- struct metadata_dst *tun_dst = (struct metadata_dst *) dst;
-
- dst_cache_destroy(&tun_dst->u.tun_info.dst_cache);
- kfree(dst);
-}
-
-#else
-#define ovs_skb_dst_set skb_dst_set
-#define ovs_skb_dst_drop skb_dst_drop
-#define ovs_dst_hold dst_hold
-#define ovs_dst_release dst_release
-#endif
-
-#ifndef USE_UPSTREAM_TUNNEL
-#define SKB_INIT_FILL_METADATA_DST(skb) OVS_GSO_CB(skb)->fill_md_dst = NULL;
-
-#define SKB_RESTORE_FILL_METADATA_DST(skb) do { \
- if (OVS_GSO_CB(skb)->fill_md_dst) { \
- kfree(OVS_GSO_CB(skb)->tun_dst); \
- OVS_GSO_CB(skb)->tun_dst = OVS_GSO_CB(skb)->fill_md_dst; \
- } \
-} while (0)
-
-
-#define SKB_SETUP_FILL_METADATA_DST(skb) ({ \
- struct metadata_dst *new_md_dst; \
- struct metadata_dst *md_dst; \
- int md_size; \
- int ret = 1; \
- \
- SKB_RESTORE_FILL_METADATA_DST(skb); \
- new_md_dst = kmalloc(sizeof(struct metadata_dst) + 256, GFP_ATOMIC); \
- if (new_md_dst) { \
- md_dst = OVS_GSO_CB(skb)->tun_dst; \
- md_size = new_md_dst->u.tun_info.options_len; \
- memcpy(&new_md_dst->u.tun_info, &md_dst->u.tun_info, \
- sizeof(struct ip_tunnel_info) + md_size); \
- \
- OVS_GSO_CB(skb)->fill_md_dst = md_dst; \
- OVS_GSO_CB(skb)->tun_dst = new_md_dst; \
- ret = 1; \
- } else { \
- ret = 0; \
- } \
- ret; \
-})
-
-#else
-#define SKB_INIT_FILL_METADATA_DST(skb) do {} while(0)
-#define SKB_SETUP_FILL_METADATA_DST(skb) (true)
-#define SKB_RESTORE_FILL_METADATA_DST(skb) do {} while(0)
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/bug.h b/datapath/linux/compat/include/linux/bug.h
deleted file mode 100644
index 6538a22fc..000000000
--- a/datapath/linux/compat/include/linux/bug.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __LINUX_BUG_WRAPPER_H
-#define __LINUX_BUG_WRAPPER_H 1
-
-#include_next <linux/bug.h>
-
-#ifdef __CHECKER__
-#ifndef BUILD_BUG_ON_INVALID
-#define BUILD_BUG_ON_INVALID(e) (0)
-#endif
-
-#endif /* __CHECKER__ */
-
-#endif
diff --git a/datapath/linux/compat/include/linux/cache.h b/datapath/linux/compat/include/linux/cache.h
deleted file mode 100644
index c8a6710b3..000000000
--- a/datapath/linux/compat/include/linux/cache.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef __LINUX_CACHE_WRAPPER_H
-#define __LINUX_CACHE_WRAPPER_H 1
-
-#include_next <linux/cache.h>
-
-/* Upstream commit c74ba8b3480d ("arch: Introduce post-init read-only memory")
- * introduced the __ro_after_init attribute, however it wasn't applied to
- * generic netlink sockets until commit 34158151d2aa ("netfilter: cttimeout:
- * use nf_ct_iterate_cleanup_net to unlink timeout objs"). Using it on
- * genetlink before the latter commit leads to crash on module unload.
- * For kernels < 4.10, define it as empty. */
-#ifdef HAVE_GENL_FAMILY_LIST
-#ifdef __ro_after_init
-#undef __ro_after_init
-#endif /* #ifdef __ro_after_init */
-#define __ro_after_init
-#else
-#ifndef __ro_after_init
-#define __ro_after_init
-#endif /* #ifndef __ro_after_init */
-#endif /* #ifdef HAVE_GENL_FAMILY_LIST */
-
-#endif
diff --git a/datapath/linux/compat/include/linux/compiler-gcc.h b/datapath/linux/compat/include/linux/compiler-gcc.h
deleted file mode 100644
index 39d2e0198..000000000
--- a/datapath/linux/compat/include/linux/compiler-gcc.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef __LINUX_COMPILER_H
-#if 0
-/* Disable this check - it no longer makes sense with so many backports
- * due to spectre mitigation
- */
-#ifndef HAVE_LINUX_COMPILER_TYPES_H
-#error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
-#endif
-#endif
-#endif
-
-#include_next <linux/compiler-gcc.h>
-
-#ifndef __packed
-#define __packed __attribute__((packed))
-#endif
-
-#ifndef __always_unused
-#define __always_unused __attribute__((unused))
-#endif
diff --git a/datapath/linux/compat/include/linux/compiler.h b/datapath/linux/compat/include/linux/compiler.h
deleted file mode 100644
index 59b506fd4..000000000
--- a/datapath/linux/compat/include/linux/compiler.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef __LINUX_COMPILER_WRAPPER_H
-#define __LINUX_COMPILER_WRAPPER_H 1
-
-#include_next <linux/compiler.h>
-
-#ifndef __percpu
-#define __percpu
-#endif
-
-#ifndef __rcu
-#define __rcu
-#endif
-
-#ifndef READ_ONCE
-#define READ_ONCE(x) (x)
-#endif
-
-#ifndef WRITE_ONCE
-#define WRITE_ONCE(x, val) \
-do { \
- *(volatile typeof(x) *)&(x) = (val); \
-} while (0)
-#endif
-
-
-#endif
diff --git a/datapath/linux/compat/include/linux/cpumask.h b/datapath/linux/compat/include/linux/cpumask.h
deleted file mode 100644
index 48c73aa8f..000000000
--- a/datapath/linux/compat/include/linux/cpumask.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __LINUX_CPUMASK_WRAPPER_H
-#define __LINUX_CPUMASK_WRAPPER_H
-
-#include_next <linux/cpumask.h>
-
-/* for_each_cpu was renamed for_each_possible_cpu in 2.6.18. */
-#ifndef for_each_possible_cpu
-#define for_each_possible_cpu for_each_cpu
-#endif
-
-#endif /* linux/cpumask.h wrapper */
diff --git a/datapath/linux/compat/include/linux/err.h b/datapath/linux/compat/include/linux/err.h
deleted file mode 100644
index 321386c21..000000000
--- a/datapath/linux/compat/include/linux/err.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef __LINUX_ERR_WRAPPER_H
-#define __LINUX_ERR_WRAPPER_H 1
-
-#include_next <linux/err.h>
-
-#ifndef HAVE_ERR_CAST
-/**
- * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type
- * @ptr: The pointer to cast.
- *
- * Explicitly cast an error-valued pointer to another pointer type in such a
- * way as to make it clear that's what's going on.
- */
-static inline void *ERR_CAST(const void *ptr)
-{
- /* cast away the const */
- return (void *) ptr;
-}
-#endif /* HAVE_ERR_CAST */
-
-#ifndef HAVE_IS_ERR_OR_NULL
-static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr)
-{
- return !ptr || IS_ERR_VALUE((unsigned long)ptr);
-}
-#endif
-
-#ifndef HAVE_PTR_ERR_OR_ZERO
-static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr)
-{
- if (IS_ERR(ptr))
- return PTR_ERR(ptr);
- else
- return 0;
-}
-#endif
-#endif
diff --git a/datapath/linux/compat/include/linux/etherdevice.h b/datapath/linux/compat/include/linux/etherdevice.h
deleted file mode 100644
index 4b2707455..000000000
--- a/datapath/linux/compat/include/linux/etherdevice.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef __LINUX_ETHERDEVICE_WRAPPER_H
-#define __LINUX_ETHERDEVICE_WRAPPER_H 1
-
-#include <linux/version.h>
-#include_next <linux/etherdevice.h>
-
-#ifndef HAVE_ETHER_ADDR_COPY
-static inline void ether_addr_copy(u8 *dst, const u8 *src)
-{
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
- *(u32 *)dst = *(const u32 *)src;
- *(u16 *)(dst + 4) = *(const u16 *)(src + 4);
-#else
- u16 *a = (u16 *)dst;
- const u16 *b = (const u16 *)src;
-
- a[0] = b[0];
- a[1] = b[1];
- a[2] = b[2];
-#endif
-}
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,2,0)
-#define eth_proto_is_802_3 rpl_eth_proto_is_802_3
-static inline bool eth_proto_is_802_3(__be16 proto)
-{
-#ifndef __BIG_ENDIAN
- /* if CPU is little endian mask off bits representing LSB */
- proto &= htons(0xFF00);
-#endif
- /* cast both to u16 and compare since LSB can be ignored */
- return (__force u16)proto >= (__force u16)htons(ETH_P_802_3_MIN);
-}
-#endif
-
-#define ether_addr_equal rpl_ether_addr_equal
-static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
-{
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
- u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) |
- ((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4)));
-
- return fold == 0;
-#else
- const u16 *a = (const u16 *)addr1;
- const u16 *b = (const u16 *)addr2;
-
- return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0;
-#endif
-}
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,0,0)
-#define eth_gro_receive rpl_eth_gro_receive
-struct sk_buff **rpl_eth_gro_receive(struct sk_buff **head,
- struct sk_buff *skb);
-
-#define eth_gro_complete rpl_eth_gro_complete
-int rpl_eth_gro_complete(struct sk_buff *skb, int nhoff);
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/genetlink.h b/datapath/linux/compat/include/linux/genetlink.h
deleted file mode 100644
index 3b85f3865..000000000
--- a/datapath/linux/compat/include/linux/genetlink.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _UAPI__LINUX_GENERIC_NETLINK_WRAPPER_H
-#define _UAPI__LINUX_GENERIC_NETLINK_WRAPPER_H
-
-#include_next <linux/genetlink.h>
-
-#ifndef GENL_UNS_ADMIN_PERM
-#define GENL_UNS_ADMIN_PERM GENL_ADMIN_PERM
-#endif
-
-#ifdef GENL_ID_GENERATE
-#if GENL_ID_GENERATE != 0
-#error "GENL_ID_GENERATE is assumed to be zero"
-#endif
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/if.h b/datapath/linux/compat/include/linux/if.h
deleted file mode 100644
index 3beb61df1..000000000
--- a/datapath/linux/compat/include/linux/if.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_IF_WRAPPER_H
-#define __LINUX_IF_WRAPPER_H 1
-
-#include_next <linux/if.h>
-
-#endif
diff --git a/datapath/linux/compat/include/linux/if_ether.h b/datapath/linux/compat/include/linux/if_ether.h
deleted file mode 100644
index 8dff938b7..000000000
--- a/datapath/linux/compat/include/linux/if_ether.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef __LINUX_IF_ETHER_WRAPPER_H
-#define __LINUX_IF_ETHER_WRAPPER_H 1
-
-#include_next <linux/if_ether.h>
-
-#ifndef ETH_MIN_MTU
-#define ETH_MIN_MTU 68 /* Min IPv4 MTU per RFC791 */
-#endif
-
-#ifndef ETH_MAX_MTU
-#define ETH_MAX_MTU 0xFFFFU /* 65535, same as IP_MAX_MTU */
-#endif
-
-#ifndef ETH_P_802_3_MIN
-#define ETH_P_802_3_MIN 0x0600
-#endif
-
-#ifndef ETH_P_8021AD
-#define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */
-#endif
-
-#ifndef ETH_P_NSH
-#define ETH_P_NSH 0x894F /* Network Service Header */
-#endif
-
-#ifndef ETH_P_ERSPAN
-#define ETH_P_ERSPAN 0x88BE /* ERSPAN TYPE II */
-#endif
-
-#ifndef ETH_P_ERSPAN2
-#define ETH_P_ERSPAN2 0x22EB /* ERSPAN version 2 (type III) */
-#endif
-
-#define inner_eth_hdr rpl_inner_eth_hdr
-static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb)
-{
- return (struct ethhdr *)skb_inner_mac_header(skb);
-}
-#endif
diff --git a/datapath/linux/compat/include/linux/if_link.h b/datapath/linux/compat/include/linux/if_link.h
deleted file mode 100644
index bd77e33d3..000000000
--- a/datapath/linux/compat/include/linux/if_link.h
+++ /dev/null
@@ -1,171 +0,0 @@
-#ifndef _LINUX_IF_LINK_WRAPPER_H
-#define _LINUX_IF_LINK_WRAPPER_H
-
-#include_next<linux/if_link.h>
-
-/* GENEVE section */
-enum {
-#define IFLA_GENEVE_UNSPEC rpl_IFLA_GENEVE_UNSPEC
- IFLA_GENEVE_UNSPEC,
-
-#define IFLA_GENEVE_ID rpl_IFLA_GENEVE_ID
- IFLA_GENEVE_ID,
-
-#define IFLA_GENEVE_REMOTE rpl_IFLA_GENEVE_REMOTE
- IFLA_GENEVE_REMOTE,
-
-#define IFLA_GENEVE_TTL rpl_IFLA_GENEVE_TTL
- IFLA_GENEVE_TTL,
-
-#define IFLA_GENEVE_TOS rpl_IFLA_GENEVE_TOS
- IFLA_GENEVE_TOS,
-
-#define IFLA_GENEVE_PORT rpl_IFLA_GENEVE_PORT
- IFLA_GENEVE_PORT, /* destination port */
-
-#define IFLA_GENEVE_COLLECT_METADATA rpl_IFLA_GENEVE_COLLECT_METADATA
- IFLA_GENEVE_COLLECT_METADATA,
-
-#define IFLA_GENEVE_REMOTE6 rpl_IFLA_GENEVE_REMOTE6
- IFLA_GENEVE_REMOTE6,
-
-#define IFLA_GENEVE_UDP_CSUM rpl_IFLA_GENEVE_UDP_CSUM
- IFLA_GENEVE_UDP_CSUM,
-
-#define IFLA_GENEVE_UDP_ZERO_CSUM6_TX rpl_IFLA_GENEVE_UDP_ZERO_CSUM6_TX
- IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
-
-#define IFLA_GENEVE_UDP_ZERO_CSUM6_RX rpl_IFLA_GENEVE_UDP_ZERO_CSUM6_RX
- IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
-
-#define IFLA_GENEVE_LABEL rpl_IFLA_GENEVE_LABEL
- IFLA_GENEVE_LABEL,
-
-#define __IFLA_GENEVE_MAX rpl__IFLA_GENEVE_MAX
- __IFLA_GENEVE_MAX
-};
-#undef IFLA_GENEVE_MAX
-#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
-
-/* STT section */
-enum {
- IFLA_STT_PORT, /* destination port */
- __IFLA_STT_MAX
-};
-#define IFLA_STT_MAX (__IFLA_STT_MAX - 1)
-
-/* LISP section */
-enum {
- IFLA_LISP_PORT, /* destination port */
- __IFLA_LISP_MAX
-};
-#define IFLA_LISP_MAX (__IFLA_LISP_MAX - 1)
-
-/* VXLAN section */
-enum {
-#define IFLA_VXLAN_UNSPEC rpl_IFLA_VXLAN_UNSPEC
- IFLA_VXLAN_UNSPEC,
-#define IFLA_VXLAN_ID rpl_IFLA_VXLAN_ID
- IFLA_VXLAN_ID,
-#define IFLA_VXLAN_GROUP rpl_IFLA_VXLAN_GROUP
- IFLA_VXLAN_GROUP, /* group or remote address */
-#define IFLA_VXLAN_LINK rpl_IFLA_VXLAN_LINK
- IFLA_VXLAN_LINK,
-#define IFLA_VXLAN_LOCAL rpl_IFLA_VXLAN_LOCAL
- IFLA_VXLAN_LOCAL,
-#define IFLA_VXLAN_TTL rpl_IFLA_VXLAN_TTL
- IFLA_VXLAN_TTL,
-#define IFLA_VXLAN_TOS rpl_IFLA_VXLAN_TOS
- IFLA_VXLAN_TOS,
-#define IFLA_VXLAN_LEARNING rpl_IFLA_VXLAN_LEARNING
- IFLA_VXLAN_LEARNING,
-#define IFLA_VXLAN_AGEING rpl_IFLA_VXLAN_AGEING
- IFLA_VXLAN_AGEING,
-#define IFLA_VXLAN_LIMIT rpl_IFLA_VXLAN_LIMIT
- IFLA_VXLAN_LIMIT,
-#define IFLA_VXLAN_PORT_RANGE rpl_IFLA_VXLAN_PORT_RANGE
- IFLA_VXLAN_PORT_RANGE, /* source port */
-#define IFLA_VXLAN_PROXY rpl_IFLA_VXLAN_PROXY
- IFLA_VXLAN_PROXY,
-#define IFLA_VXLAN_RSC rpl_IFLA_VXLAN_RSC
- IFLA_VXLAN_RSC,
-#define IFLA_VXLAN_L2MISS rpl_IFLA_VXLAN_L2MISS
- IFLA_VXLAN_L2MISS,
-#define IFLA_VXLAN_L3MISS rpl_IFLA_VXLAN_L3MISS
- IFLA_VXLAN_L3MISS,
-#define IFLA_VXLAN_PORT rpl_IFLA_VXLAN_PORT
- IFLA_VXLAN_PORT, /* destination port */
-#define IFLA_VXLAN_GROUP6 rpl_IFLA_VXLAN_GROUP6
- IFLA_VXLAN_GROUP6,
-#define IFLA_VXLAN_LOCAL6 rpl_IFLA_VXLAN_LOCAL6
- IFLA_VXLAN_LOCAL6,
-#define IFLA_VXLAN_UDP_CSUM rpl_IFLA_VXLAN_UDP_CSUM
- IFLA_VXLAN_UDP_CSUM,
-#define IFLA_VXLAN_UDP_ZERO_CSUM6_TX rpl_IFLA_VXLAN_UDP_ZERO_CSUM6_TX
- IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
-#define IFLA_VXLAN_UDP_ZERO_CSUM6_RX rpl_IFLA_VXLAN_UDP_ZERO_CSUM6_RX
- IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
-#define IFLA_VXLAN_REMCSUM_TX rpl_IFLA_VXLAN_REMCSUM_TX
- IFLA_VXLAN_REMCSUM_TX,
-#define IFLA_VXLAN_REMCSUM_RX rpl_IFLA_VXLAN_REMCSUM_RX
- IFLA_VXLAN_REMCSUM_RX,
-#define IFLA_VXLAN_GBP rpl_IFLA_VXLAN_GBP
- IFLA_VXLAN_GBP,
-#define IFLA_VXLAN_REMCSUM_NOPARTIAL rpl_IFLA_VXLAN_REMCSUM_NOPARTIAL
- IFLA_VXLAN_REMCSUM_NOPARTIAL,
-#define IFLA_VXLAN_COLLECT_METADATA rpl_IFLA_VXLAN_COLLECT_METADATA
- IFLA_VXLAN_COLLECT_METADATA,
-#define IFLA_VXLAN_LABEL rpl_IFLA_VXLAN_LABEL
- IFLA_VXLAN_LABEL,
-#define IFLA_VXLAN_GPE rpl_IFLA_VXLAN_GPE
- IFLA_VXLAN_GPE,
-
-#define __IFLA_VXLAN_MAX rpl___IFLA_VXLAN_MAX
- __IFLA_VXLAN_MAX
-};
-
-#undef IFLA_VXLAN_MAX
-#define IFLA_VXLAN_MAX (rpl___IFLA_VXLAN_MAX - 1)
-
-#define ifla_vxlan_port_range rpl_ifla_vxlan_port_range
-struct ifla_vxlan_port_range {
- __be16 low;
- __be16 high;
-};
-
-#ifndef HAVE_RTNL_LINK_STATS64
-/* The main device statistics structure */
-struct rtnl_link_stats64 {
- __u64 rx_packets; /* total packets received */
- __u64 tx_packets; /* total packets transmitted */
- __u64 rx_bytes; /* total bytes received */
- __u64 tx_bytes; /* total bytes transmitted */
- __u64 rx_errors; /* bad packets received */
- __u64 tx_errors; /* packet transmit problems */
- __u64 rx_dropped; /* no space in linux buffers */
- __u64 tx_dropped; /* no space available in linux */
- __u64 multicast; /* multicast packets received */
- __u64 collisions;
-
- /* detailed rx_errors: */
- __u64 rx_length_errors;
- __u64 rx_over_errors; /* receiver ring buff overflow */
- __u64 rx_crc_errors; /* recved pkt with crc error */
- __u64 rx_frame_errors; /* recv'd frame alignment error */
- __u64 rx_fifo_errors; /* recv'r fifo overrun */
- __u64 rx_missed_errors; /* receiver missed packet */
-
- /* detailed tx_errors */
- __u64 tx_aborted_errors;
- __u64 tx_carrier_errors;
- __u64 tx_fifo_errors;
- __u64 tx_heartbeat_errors;
- __u64 tx_window_errors;
-
- /* for cslip etc */
- __u64 rx_compressed;
- __u64 tx_compressed;
-};
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/if_vlan.h b/datapath/linux/compat/include/linux/if_vlan.h
deleted file mode 100644
index 3ed7522c7..000000000
--- a/datapath/linux/compat/include/linux/if_vlan.h
+++ /dev/null
@@ -1,306 +0,0 @@
-#ifndef __LINUX_IF_VLAN_WRAPPER_H
-#define __LINUX_IF_VLAN_WRAPPER_H 1
-
-#include <linux/skbuff.h>
-#include <linux/version.h>
-#include_next <linux/if_vlan.h>
-
-#ifndef HAVE_VLAN_INSERT_TAG_SET_PROTO
-/*
- * The behavior of __vlan_put_tag()/vlan_insert_tag_set_proto() has changed
- * over time:
- *
- * - In 2.6.26 and earlier, it adjusted both MAC and network header
- * pointers. (The latter didn't make any sense.)
- *
- * - In 2.6.27 and 2.6.28, it did not adjust any header pointers at all.
- *
- * - In 2.6.29 and later, it adjusts the MAC header pointer only.
- *
- * - In 3.19 and later, it was renamed to vlan_insert_tag_set_proto()
- *
- * This is the version from 2.6.33. We unconditionally substitute this version
- * to avoid the need to guess whether the version in the kernel tree is
- * acceptable.
- */
-#define vlan_insert_tag_set_proto(skb, proto, vlan_tci) \
- rpl_vlan_insert_tag_set_proto(skb, proto, vlan_tci)
-static inline struct sk_buff *rpl_vlan_insert_tag_set_proto(struct sk_buff *skb,
- __be16 vlan_proto,
- u16 vlan_tci)
-{
- struct vlan_ethhdr *veth;
-
- if (skb_cow_head(skb, VLAN_HLEN) < 0) {
- kfree_skb(skb);
- return NULL;
- }
- veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN);
-
- /* Move the mac addresses to the beginning of the new header. */
- memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN);
- skb->mac_header -= VLAN_HLEN;
-
- /* first, the ethernet type */
- veth->h_vlan_proto = vlan_proto;
-
- /* now, the TCI */
- veth->h_vlan_TCI = htons(vlan_tci);
-
- skb->protocol = vlan_proto;
-
- return skb;
-}
-#endif
-
-#ifndef HAVE_VLAN_HWACCEL_CLEAR_TAG
-/**
- * __vlan_hwaccel_clear_tag - clear hardware accelerated VLAN info
- * @skb: skbuff to clear
- *
- * Clears the VLAN information from @skb
- */
-#define __vlan_hwaccel_clear_tag rpl_vlan_hwaccel_clear_tag
-static inline void rpl_vlan_hwaccel_clear_tag(struct sk_buff *skb)
-{
-#ifdef HAVE_SKBUFF_VLAN_PRESENT
- skb->vlan_present = 0;
-#else
- skb->vlan_tci = 0;
- skb->vlan_proto = 0;
-#endif
-}
-#endif
-
-#ifndef HAVE_VLAN_HWACCEL_PUSH_INSIDE
-
-/*
- * __vlan_hwaccel_push_inside - pushes vlan tag to the payload
- * @skb: skbuff to tag
- *
- * Pushes the VLAN tag from @skb->vlan_tci inside to the payload.
- *
- * Following the skb_unshare() example, in case of error, the calling function
- * doesn't have to worry about freeing the original skb.
- */
-static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb)
-{
- skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
- vlan_tx_tag_get(skb));
- if (likely(skb))
- skb->vlan_tci = 0;
- return skb;
-}
-/*
- * vlan_hwaccel_push_inside - pushes vlan tag to the payload
- * @skb: skbuff to tag
- *
- * Checks is tag is present in @skb->vlan_tci and if it is, it pushes the
- * VLAN tag from @skb->vlan_tci inside to the payload.
- *
- * Following the skb_unshare() example, in case of error, the calling function
- * doesn't have to worry about freeing the original skb.
- */
-static inline struct sk_buff *vlan_hwaccel_push_inside(struct sk_buff *skb)
-{
- if (vlan_tx_tag_present(skb))
- skb = __vlan_hwaccel_push_inside(skb);
- return skb;
-}
-#endif
-
-#ifndef HAVE_ETH_TYPE_VLAN
-/**
- * eth_type_vlan - check for valid vlan ether type.
- * @ethertype: ether type to check
- *
- * Returns true if the ether type is a vlan ether type.
- */
-static inline bool eth_type_vlan(__be16 ethertype)
-{
- switch (ethertype) {
- case htons(ETH_P_8021Q):
- case htons(ETH_P_8021AD):
- return true;
- default:
- return false;
- }
-}
-#endif
-
-/* All of these were introduced in a single commit preceding 2.6.33, so
- * presumably all of them or none of them are present. */
-#ifndef VLAN_PRIO_MASK
-#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */
-#define VLAN_PRIO_SHIFT 13
-#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */
-#define VLAN_TAG_PRESENT VLAN_CFI_MASK
-#endif
-
-#ifndef HAVE_VLAN_SET_ENCAP_PROTO
-static inline void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr)
-{
- __be16 proto;
- unsigned char *rawp;
-
- /*
- * Was a VLAN packet, grab the encapsulated protocol, which the layer
- * three protocols care about.
- */
-
- proto = vhdr->h_vlan_encapsulated_proto;
- if (ntohs(proto) >= 1536) {
- skb->protocol = proto;
- return;
- }
-
- rawp = skb->data;
- if (*(unsigned short *) rawp == 0xFFFF)
- /*
- * This is a magic hack to spot IPX packets. Older Novell
- * breaks the protocol design and runs IPX over 802.3 without
- * an 802.2 LLC layer. We look for FFFF which isn't a used
- * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
- * but does for the rest.
- */
- skb->protocol = htons(ETH_P_802_3);
- else
- /*
- * Real 802.2 LLC
- */
- skb->protocol = htons(ETH_P_802_2);
-}
-#endif
-
-#ifndef HAVE___VLAN_INSERT_TAG
-/* Kernels which don't have __vlan_insert_tag() also don't have skb->vlan_proto
- * so ignore the proto paramter.
- */
-#define __vlan_insert_tag(skb, proto, tci) rpl_vlan_insert_tag(skb, tci)
-static inline int rpl_vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci)
-{
- struct vlan_ethhdr *veth;
-
- if (skb_cow_head(skb, VLAN_HLEN) < 0)
- return -ENOMEM;
-
- veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN);
-
- /* Move the mac addresses to the beginning of the new header. */
- memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN);
- skb->mac_header -= VLAN_HLEN;
-
- /* first, the ethernet type */
- veth->h_vlan_proto = htons(ETH_P_8021Q);
-
- /* now, the TCI */
- veth->h_vlan_TCI = htons(vlan_tci);
-
- return 0;
-}
-#endif
-
-#ifndef skb_vlan_tag_present
-#define skb_vlan_tag_present(skb) vlan_tx_tag_present(skb)
-#define skb_vlan_tag_get(skb) vlan_tx_tag_get(skb)
-#endif
-
-#ifndef HAVE_VLAN_GET_PROTOCOL
-
-static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type,
- int *depth)
-{
- unsigned int vlan_depth = skb->mac_len;
-
- /* if type is 802.1Q/AD then the header should already be
- * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
- * ETH_HLEN otherwise
- */
- if (eth_type_vlan(type)) {
- if (vlan_depth) {
- if (WARN_ON(vlan_depth < VLAN_HLEN))
- return 0;
- vlan_depth -= VLAN_HLEN;
- } else {
- vlan_depth = ETH_HLEN;
- }
- do {
- struct vlan_hdr *vh;
-
- if (unlikely(!pskb_may_pull(skb,
- vlan_depth + VLAN_HLEN)))
- return 0;
-
- vh = (struct vlan_hdr *)(skb->data + vlan_depth);
- type = vh->h_vlan_encapsulated_proto;
- vlan_depth += VLAN_HLEN;
- } while (eth_type_vlan(type));
- }
-
- if (depth)
- *depth = vlan_depth;
-
- return type;
-}
-
-/**
- * vlan_get_protocol - get protocol EtherType.
- * @skb: skbuff to query
- *
- * Returns the EtherType of the packet, regardless of whether it is
- * vlan encapsulated (normal or hardware accelerated) or not.
- */
-static inline __be16 vlan_get_protocol(struct sk_buff *skb)
-{
- return __vlan_get_protocol(skb, skb->protocol, NULL);
-}
-
-#endif
-
-#ifndef HAVE_SKB_VLAN_TAGGED
-/**
- * skb_vlan_tagged - check if skb is vlan tagged.
- * @skb: skbuff to query
- *
- * Returns true if the skb is tagged, regardless of whether it is hardware
- * accelerated or not.
- */
-static inline bool skb_vlan_tagged(const struct sk_buff *skb)
-{
- if (!skb_vlan_tag_present(skb) &&
- likely(!eth_type_vlan(skb->protocol)))
- return false;
-
- return true;
-}
-
-/**
- * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers.
- * @skb: skbuff to query
- *
- * Returns true if the skb is tagged with multiple vlan headers, regardless
- * of whether it is hardware accelerated or not.
- */
-static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
-{
- __be16 protocol = skb->protocol;
-
- if (!skb_vlan_tag_present(skb)) {
- struct vlan_ethhdr *veh;
-
- if (likely(!eth_type_vlan(protocol)))
- return false;
-
- veh = (struct vlan_ethhdr *)skb->data;
- protocol = veh->h_vlan_encapsulated_proto;
- }
-
- if (!eth_type_vlan(protocol))
- return false;
-
- return true;
-}
-
-#endif /* HAVE_SKB_VLAN_TAGGED */
-
-#endif /* linux/if_vlan.h wrapper */
diff --git a/datapath/linux/compat/include/linux/in.h b/datapath/linux/compat/include/linux/in.h
deleted file mode 100644
index 78f8d7731..000000000
--- a/datapath/linux/compat/include/linux/in.h
+++ /dev/null
@@ -1,56 +0,0 @@
-#ifndef __LINUX_IN_WRAPPER_H
-#define __LINUX_IN_WRAPPER_H 1
-
-#include_next <linux/in.h>
-
-#include <linux/module.h>
-#ifndef HAVE_PROTO_PORTS_OFFSET
-static inline int proto_ports_offset(int proto)
-{
- switch (proto) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_DCCP:
- case IPPROTO_ESP: /* SPI */
- case IPPROTO_SCTP:
- case IPPROTO_UDPLITE:
- return 0;
- case IPPROTO_AH: /* SPI */
- return 4;
- default:
- return -EINVAL;
- }
-}
-#endif
-
-#ifndef HAVE_IPV4_IS_MULTICAST
-
-static inline bool ipv4_is_loopback(__be32 addr)
-{
- return (addr & htonl(0xff000000)) == htonl(0x7f000000);
-}
-
-static inline bool ipv4_is_multicast(__be32 addr)
-{
- return (addr & htonl(0xf0000000)) == htonl(0xe0000000);
-}
-
-static inline bool ipv4_is_local_multicast(__be32 addr)
-{
- return (addr & htonl(0xffffff00)) == htonl(0xe0000000);
-}
-
-static inline bool ipv4_is_lbcast(__be32 addr)
-{
- /* limited broadcast */
- return addr == htonl(INADDR_BROADCAST);
-}
-
-static inline bool ipv4_is_zeronet(__be32 addr)
-{
- return (addr & htonl(0xff000000)) == htonl(0x00000000);
-}
-
-#endif /* !HAVE_IPV4_IS_MULTICAST */
-
-#endif
diff --git a/datapath/linux/compat/include/linux/jiffies.h b/datapath/linux/compat/include/linux/jiffies.h
deleted file mode 100644
index 642eacec7..000000000
--- a/datapath/linux/compat/include/linux/jiffies.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef __LINUX_JIFFIES_WRAPPER_H
-#define __LINUX_JIFFIES_WRAPPER_H 1
-
-#include_next <linux/jiffies.h>
-
-#include <linux/version.h>
-
-/* Same as above, but does so with platform independent 64bit types.
- * These must be used when utilizing jiffies_64 (i.e. return value of
- * get_jiffies_64() */
-
-#ifndef time_after64
-#define time_after64(a, b) \
- (typecheck(__u64, a) && \
- typecheck(__u64, b) && \
- ((__s64)(b) - (__s64)(a) < 0))
-#endif
-
-#ifndef time_before64
-#define time_before64(a, b) time_after64(b, a)
-#endif
-
-#ifndef time_after_eq64
-#define time_after_eq64(a, b) \
- (typecheck(__u64, a) && \
- typecheck(__u64, b) && \
- ((__s64)(a) - (__s64)(b) >= 0))
-#endif
-
-#ifndef time_before_eq64
-#define time_before_eq64(a, b) time_after_eq64(b, a)
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/kconfig.h b/datapath/linux/compat/include/linux/kconfig.h
deleted file mode 100644
index d3fa57a6b..000000000
--- a/datapath/linux/compat/include/linux/kconfig.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef __LINUX_KCONFIG_WRAPPER_H
-#define __LINUX_KCONFIG_WRAPPER_H
-
-#include <linux/version.h>
-
-#ifndef IS_ENABLED
-
-/*
- * Helper macros to use CONFIG_ options in C/CPP expressions. Note that
- * these only work with boolean and tristate options.
- */
-
-/*
- * Getting something that works in C and CPP for an arg that may or may
- * not be defined is tricky. Here, if we have "#define CONFIG_BOOGER 1"
- * we match on the placeholder define, insert the "0," for arg1 and generate
- * the triplet (0, 1, 0). Then the last step cherry picks the 2nd arg (a one).
- * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when
- * the last step cherry picks the 2nd arg, we get a zero.
- */
-#define __ARG_PLACEHOLDER_1 0,
-#define config_enabled(cfg) _config_enabled(cfg)
-#define _config_enabled(value) __config_enabled(__ARG_PLACEHOLDER_##value)
-#define __config_enabled(arg1_or_junk) ___config_enabled(arg1_or_junk 1, 0)
-#define ___config_enabled(__ignored, val, ...) val
-
-/*
- * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm',
- * 0 otherwise.
- *
- */
-#define IS_ENABLED(option) \
- (config_enabled(option) || config_enabled(option##_MODULE))
-
-/*
- * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0
- * otherwise. For boolean options, this is equivalent to
- * IS_ENABLED(CONFIG_FOO).
- */
-#define IS_BUILTIN(option) config_enabled(option)
-
-/*
- * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0
- * otherwise.
- */
-#define IS_MODULE(option) config_enabled(option##_MODULE)
-
-#endif /* IS_ENABLED */
-#endif /* __LINUX_KCONFIG_WRAPER_H */
diff --git a/datapath/linux/compat/include/linux/kernel.h b/datapath/linux/compat/include/linux/kernel.h
deleted file mode 100644
index 106b5940a..000000000
--- a/datapath/linux/compat/include/linux/kernel.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef __KERNEL_H_WRAPPER
-#define __KERNEL_H_WRAPPER 1
-
-#include_next <linux/kernel.h>
-#ifndef HAVE_LOG2_H
-#include <linux/log2.h>
-#endif
-
-#include <linux/version.h>
-
-#ifndef USHRT_MAX
-#define USHRT_MAX ((u16)(~0U))
-#define SHRT_MAX ((s16)(USHRT_MAX>>1))
-#define SHRT_MIN ((s16)(-SHRT_MAX - 1))
-#endif
-
-#ifndef DIV_ROUND_UP
-#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
-#endif
-
-#ifndef rounddown
-#define rounddown(x, y) ( \
-{ \
- typeof(x) __x = (x); \
- __x - (__x % (y)); \
-} \
-)
-#endif
-
-/* U32_MAX was introduced in include/linux/kernel.h after version 3.14. */
-#ifndef U32_MAX
-#define U32_MAX ((u32)~0U)
-#endif
-
-#ifndef sizeof_field
-#define sizeof_field(t, f) (sizeof(((t*)0)->f))
-#endif
-
-#endif /* linux/kernel.h */
diff --git a/datapath/linux/compat/include/linux/list.h b/datapath/linux/compat/include/linux/list.h
deleted file mode 100644
index 4234c17ce..000000000
--- a/datapath/linux/compat/include/linux/list.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef __LINUX_LIST_WRAPPER_H
-#define __LINUX_LIST_WRAPPER_H 1
-
-#include_next <linux/list.h>
-
-#ifndef hlist_entry_safe
-#define hlist_entry_safe(ptr, type, member) \
- ({ typeof(ptr) ____ptr = (ptr); \
- ____ptr ? hlist_entry(____ptr, type, member) : NULL; \
- })
-
-#undef hlist_for_each_entry
-#define hlist_for_each_entry(pos, head, member) \
- for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member);\
- pos; \
- pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
-
-#undef hlist_for_each_entry_safe
-#define hlist_for_each_entry_safe(pos, n, head, member) \
- for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);\
- pos && ({ n = pos->member.next; 1; }); \
- pos = hlist_entry_safe(n, typeof(*pos), member))
-
-#endif
-
-#ifndef list_first_entry_or_null
-#define list_first_entry_or_null(ptr, type, member) \
- (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/mm.h b/datapath/linux/compat/include/linux/mm.h
deleted file mode 100644
index 681f3db89..000000000
--- a/datapath/linux/compat/include/linux/mm.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef OVS_MM_H
-#define OVS_MM_H
-
-#include <linux/overflow.h>
-
-#ifndef HAVE_KVMALLOC_ARRAY
-#ifndef HAVE_KVMALLOC_NODE
-extern void *vmalloc_node(unsigned long size, int node);
-#define kvmalloc_node(a, b, c) vmalloc_node(a, c)
-#else
-extern void *kvmalloc_node(size_t size, gfp_t flags, int node);
-#endif /* HAVE_KVMALLOC_NODE */
-static inline void *kvmalloc(size_t size, gfp_t flags)
-{
- return kvmalloc_node(size, flags, NUMA_NO_NODE);
-}
-static inline void *kvzalloc_node(size_t size, gfp_t flags, int node)
-{
- return kvmalloc_node(size, flags | __GFP_ZERO, node);
-}
-static inline void *kvzalloc(size_t size, gfp_t flags)
-{
- return kvmalloc(size, flags | __GFP_ZERO);
-}
-
-static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
-{
- size_t bytes;
-
- if (unlikely(check_mul_overflow(n, size, &bytes)))
- return NULL;
-
- return kvmalloc(bytes, flags);
-}
-
-static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
-{
- return kvmalloc_array(n, size, flags | __GFP_ZERO);
-}
-
-#endif
-#include_next <linux/mm.h>
-#endif /* OVS_MM_H */
-
diff --git a/datapath/linux/compat/include/linux/mpls.h b/datapath/linux/compat/include/linux/mpls.h
deleted file mode 100644
index ab99ebc30..000000000
--- a/datapath/linux/compat/include/linux/mpls.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef _UAPI_MPLS_WRAPPER_H
-#define _UAPI_MPLS_WRAPPER_H
-
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,15,0)
-#include_next <linux/mpls.h>
-#else
-
-#include <linux/types.h>
-#include <asm/byteorder.h>
-
-/* Reference: RFC 5462, RFC 3032
- *
- * 0 1 2 3
- * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Label | TC |S| TTL |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * Label: Label Value, 20 bits
- * TC: Traffic Class field, 3 bits
- * S: Bottom of Stack, 1 bit
- * TTL: Time to Live, 8 bits
- */
-
-struct mpls_label {
- __be32 entry;
-};
-
-#define MPLS_LS_LABEL_MASK 0xFFFFF000
-#define MPLS_LS_LABEL_SHIFT 12
-#define MPLS_LS_TC_MASK 0x00000E00
-#define MPLS_LS_TC_SHIFT 9
-#define MPLS_LS_S_MASK 0x00000100
-#define MPLS_LS_S_SHIFT 8
-#define MPLS_LS_TTL_MASK 0x000000FF
-#define MPLS_LS_TTL_SHIFT 0
-#endif
-
-#endif /* _UAPI_MPLS_WRAPPER_H */
diff --git a/datapath/linux/compat/include/linux/net.h b/datapath/linux/compat/include/linux/net.h
deleted file mode 100644
index 2a6903d0a..000000000
--- a/datapath/linux/compat/include/linux/net.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef __LINUX_NET_WRAPPER_H
-#define __LINUX_NET_WRAPPER_H 1
-
-#include_next <linux/net.h>
-#include <linux/types.h>
-
-#ifndef net_ratelimited_function
-#define net_ratelimited_function(function, ...) \
-do { \
- if (net_ratelimit()) \
- function(__VA_ARGS__); \
-} while (0)
-
-#define net_emerg_ratelimited(fmt, ...) \
- net_ratelimited_function(pr_emerg, fmt, ##__VA_ARGS__)
-#define net_alert_ratelimited(fmt, ...) \
- net_ratelimited_function(pr_alert, fmt, ##__VA_ARGS__)
-#define net_crit_ratelimited(fmt, ...) \
- net_ratelimited_function(pr_crit, fmt, ##__VA_ARGS__)
-#define net_err_ratelimited(fmt, ...) \
- net_ratelimited_function(pr_err, fmt, ##__VA_ARGS__)
-#define net_notice_ratelimited(fmt, ...) \
- net_ratelimited_function(pr_notice, fmt, ##__VA_ARGS__)
-#define net_warn_ratelimited(fmt, ...) \
- net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__)
-#define net_info_ratelimited(fmt, ...) \
- net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__)
-#define net_dbg_ratelimited(fmt, ...) \
- net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__)
-#endif
-
-#ifndef net_get_random_once
-#define __net_get_random_once rpl___net_get_random_once
-bool rpl___net_get_random_once(void *buf, int nbytes, bool *done,
- atomic_t *done_key);
-
-#define ___NET_RANDOM_STATIC_KEY_INIT ATOMIC_INIT(0)
-
-
-#define net_get_random_once(buf, nbytes) \
-({ \
- bool ___ret = false; \
- static bool ___done = false; \
- static atomic_t ___done_key = \
- ___NET_RANDOM_STATIC_KEY_INIT; \
- if (!atomic_read(&___done_key)) \
- ___ret = __net_get_random_once(buf, \
- nbytes, \
- &___done, \
- &___done_key); \
- ___ret; \
-})
-#endif
-
-#ifndef HAVE_SOCK_CREATE_KERN_NET
-int ovs_sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res);
-void ovs_sock_release(struct socket *sock);
-#define sock_create_kern ovs_sock_create_kern
-#define sock_release ovs_sock_release
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/netdev_features.h b/datapath/linux/compat/include/linux/netdev_features.h
deleted file mode 100644
index 411f2949b..000000000
--- a/datapath/linux/compat/include/linux/netdev_features.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#ifndef __LINUX_NETDEV_FEATURES_WRAPPER_H
-#define __LINUX_NETDEV_FEATURES_WRAPPER_H
-
-#include_next <linux/netdev_features.h>
-
-#ifndef NETIF_F_GSO_GRE
-#define NETIF_F_GSO_GRE 0
-#endif
-
-#ifndef NETIF_F_GSO_GRE_CSUM
-#define NETIF_F_GSO_GRE_CSUM 0
-#else
-#define HAVE_NETIF_F_GSO_GRE_CSUM
-#endif
-
-#ifndef NETIF_F_GSO_IPIP
-#define NETIF_F_GSO_IPIP 0
-#endif
-
-#ifndef NETIF_F_GSO_SIT
-#define NETIF_F_GSO_SIT 0
-#endif
-
-#ifndef NETIF_F_CSUM_MASK
-#define NETIF_F_CSUM_MASK 0
-#endif
-
-#ifndef NETIF_F_GSO_UDP_TUNNEL
-#define NETIF_F_GSO_UDP_TUNNEL 0
-#else
-#define HAVE_NETIF_F_GSO_UDP_TUNNEL 0
-#endif
-
-#ifndef NETIF_F_GSO_UDP_TUNNEL_CSUM
-#define NETIF_F_GSO_UDP_TUNNEL_CSUM 0
-#define SKB_GSO_UDP_TUNNEL_CSUM 0
-#endif
-
-#ifndef NETIF_F_GSO_MPLS
-#define NETIF_F_GSO_MPLS 0
-#endif
-
-#ifndef NETIF_F_HW_VLAN_STAG_TX
-#define NETIF_F_HW_VLAN_STAG_TX 0
-#endif
-
-#ifndef NETIF_F_GSO_TUNNEL_REMCSUM
-#define NETIF_F_GSO_TUNNEL_REMCSUM 0
-#define SKB_GSO_TUNNEL_REMCSUM 0
-#else
-/* support for REM_CSUM is added in 3.19 but API are not defined
- * till 4.0, so turn on REMSUM support on kernel 4.0 onwards.
- */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0)
-#define HAVE_NETIF_F_GSO_TUNNEL_REMCSUM
-#endif
-#endif
-
-#ifndef NETIF_F_RXCSUM
-#define NETIF_F_RXCSUM 0
-#endif
-
-#ifndef NETIF_F_GSO_ENCAP_ALL
-#define NETIF_F_GSO_ENCAP_ALL (NETIF_F_GSO_GRE | \
- NETIF_F_GSO_GRE_CSUM | \
- NETIF_F_GSO_IPIP | \
- NETIF_F_GSO_SIT | \
- NETIF_F_GSO_UDP_TUNNEL | \
- NETIF_F_GSO_UDP_TUNNEL_CSUM | \
- NETIF_F_GSO_MPLS)
-#endif
-
-#ifndef HAVE_NETIF_F_GSO_GRE_CSUM
-#define SKB_GSO_GRE_CSUM 0
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/netdevice.h b/datapath/linux/compat/include/linux/netdevice.h
deleted file mode 100644
index 126ff23cf..000000000
--- a/datapath/linux/compat/include/linux/netdevice.h
+++ /dev/null
@@ -1,336 +0,0 @@
-#ifndef __LINUX_NETDEVICE_WRAPPER_H
-#define __LINUX_NETDEVICE_WRAPPER_H 1
-
-#include_next <linux/netdevice.h>
-#include <linux/if_bridge.h>
-
-struct net;
-
-#include <linux/version.h>
-
-#ifndef IFF_TX_SKB_SHARING
-#define IFF_TX_SKB_SHARING 0
-#endif
-
-#ifndef IFF_OVS_DATAPATH
-#define IFF_OVS_DATAPATH 0
-#else
-#define HAVE_OVS_DATAPATH
-#endif
-
-#ifndef IFF_LIVE_ADDR_CHANGE
-#define IFF_LIVE_ADDR_CHANGE 0
-#endif
-
-#ifndef IFF_OPENVSWITCH
-#define IFF_OPENVSWITCH 0
-#endif
-
-#ifndef to_net_dev
-#define to_net_dev(class) container_of(class, struct net_device, NETDEV_DEV_MEMBER)
-#endif
-
-#ifndef HAVE_NET_NAME_UNKNOWN
-#undef alloc_netdev
-#define NET_NAME_UNKNOWN 0
-#define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \
- alloc_netdev_mq(sizeof_priv, name, setup, 1)
-#endif
-
-#ifndef HAVE_DEV_DISABLE_LRO
-extern void dev_disable_lro(struct net_device *dev);
-#endif
-
-#ifndef HAVE_DEV_GET_BY_INDEX_RCU
-static inline struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
-{
- struct net_device *dev;
-
- read_lock(&dev_base_lock);
- dev = __dev_get_by_index(net, ifindex);
- read_unlock(&dev_base_lock);
-
- return dev;
-}
-#endif
-
-#ifndef NETIF_F_FSO
-#define NETIF_F_FSO 0
-#endif
-
-#ifndef HAVE_NETDEV_FEATURES_T
-typedef u32 netdev_features_t;
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0)
-#define OVS_USE_COMPAT_GSO_SEGMENTATION
-#endif
-
-#ifdef OVS_USE_COMPAT_GSO_SEGMENTATION
-/* define compat version to handle MPLS segmentation offload. */
-#define __skb_gso_segment rpl__skb_gso_segment
-struct sk_buff *rpl__skb_gso_segment(struct sk_buff *skb,
- netdev_features_t features,
- bool tx_path);
-
-#define skb_gso_segment rpl_skb_gso_segment
-static inline
-struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
-{
- return rpl__skb_gso_segment(skb, features, true);
-}
-#endif
-
-#ifdef HAVE_NETIF_NEEDS_GSO_NETDEV
-#define netif_needs_gso rpl_netif_needs_gso
-static inline bool netif_needs_gso(struct sk_buff *skb,
- netdev_features_t features)
-{
- return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
- unlikely((skb->ip_summed != CHECKSUM_PARTIAL) &&
- (skb->ip_summed != CHECKSUM_UNNECESSARY)));
-}
-#endif
-
-#ifndef HAVE_NETDEV_MASTER_UPPER_DEV_LINK_PRIV
-#ifndef HAVE_NETDEV_MASTER_UPPER_DEV_LINK_RH
-static inline int rpl_netdev_master_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev,
- void *upper_priv,
- void *upper_info, void *extack)
-{
- return netdev_master_upper_dev_link(dev, upper_dev);
-}
-#define netdev_master_upper_dev_link rpl_netdev_master_upper_dev_link
-#else /* #ifndef HAVE_NETDEV_MASTER_UPPER_DEV_LINK_RH */
-static inline int rpl_netdev_master_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev,
- void *upper_priv,
- void *upper_info, void *extack)
-{
- return netdev_master_upper_dev_link(dev, upper_dev,
- upper_priv, upper_info);
-}
-#undef netdev_master_upper_dev_link
-#define netdev_master_upper_dev_link rpl_netdev_master_upper_dev_link
-#endif /* #else HAVE_NETDEV_MASTER_UPPER_DEV_LINK_RH */
-#else /* #ifndef HAVE_NETDEV_MASTER_UPPER_DEV_LINK_PRIV */
-#ifndef HAVE_UPPER_DEV_LINK_EXTACK
-static inline int rpl_netdev_master_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev,
- void *upper_priv,
- void *upper_info, void *extack)
-{
- return netdev_master_upper_dev_link(dev, upper_dev, upper_priv,
- upper_info);
-}
-#define netdev_master_upper_dev_link rpl_netdev_master_upper_dev_link
-#endif /* #ifndef HAVE_UPPER_DEV_LINK_EXTACK */
-#endif /* #else HAVE_NETDEV_MASTER_UPPER_DEV_LINK_PRIV */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
-#define dev_queue_xmit rpl_dev_queue_xmit
-int rpl_dev_queue_xmit(struct sk_buff *skb);
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0)
-static inline struct net_device *rpl_netdev_notifier_info_to_dev(void *info)
-{
- return info;
-}
-#define netdev_notifier_info_to_dev rpl_netdev_notifier_info_to_dev
-#endif
-
-#ifndef HAVE_PCPU_SW_NETSTATS
-#define pcpu_sw_netstats pcpu_tstats
-#endif
-
-#if RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8,0)
-/* Use compat version for all redhas releases */
-#undef netdev_alloc_pcpu_stats
-#endif
-
-#ifndef netdev_alloc_pcpu_stats
-#define netdev_alloc_pcpu_stats(type) \
-({ \
- typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \
- if (pcpu_stats) { \
- int ____i; \
- for_each_possible_cpu(____i) { \
- typeof(type) *stat; \
- stat = per_cpu_ptr(pcpu_stats, ____i); \
- u64_stats_init(&stat->syncp); \
- } \
- } \
- pcpu_stats; \
-})
-#endif
-
-#ifndef HAVE_DEV_RECURSION_LEVEL
-static inline bool dev_recursion_level(void) { return false; }
-#endif
-
-#ifndef NET_NAME_USER
-#define NET_NAME_USER 3
-#endif
-
-#ifndef HAVE_GRO_REMCSUM
-struct gro_remcsum {
-};
-
-#define skb_gro_remcsum_init(grc)
-#define skb_gro_remcsum_cleanup(a1, a2)
-#else
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,3,0)
-
-#define skb_gro_remcsum_process rpl_skb_gro_remcsum_process
-static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
- unsigned int off, size_t hdrlen,
- int start, int offset,
- struct gro_remcsum *grc,
- bool nopartial)
-{
- __wsum delta;
- size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
-
- BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
-
- if (!nopartial) {
- NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start;
- return ptr;
- }
-
- ptr = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, off + plen)) {
- ptr = skb_gro_header_slow(skb, off + plen, off);
- if (!ptr)
- return NULL;
- }
-
- delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum,
- start, offset);
-
- /* Adjust skb->csum since we changed the packet */
- NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
-
- grc->offset = off + hdrlen + offset;
- grc->delta = delta;
-
- return ptr;
-}
-#endif
-#endif
-
-#ifndef HAVE_RTNL_LINK_STATS64
-#define dev_get_stats rpl_dev_get_stats
-struct rtnl_link_stats64 *rpl_dev_get_stats(struct net_device *dev,
- struct rtnl_link_stats64 *storage);
-#endif
-
-#if RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)
-/* Only required on RHEL 6. */
-#define dev_get_stats dev_get_stats64
-#endif
-
-#ifndef netdev_dbg
-#define netdev_dbg(__dev, format, args...) \
-do { \
- printk(KERN_DEBUG "%s ", __dev->name); \
- printk(KERN_DEBUG format, ##args); \
-} while (0)
-#endif
-
-#ifndef netdev_info
-#define netdev_info(__dev, format, args...) \
-do { \
- printk(KERN_INFO "%s ", __dev->name); \
- printk(KERN_INFO format, ##args); \
-} while (0)
-
-#endif
-
-#ifndef USE_UPSTREAM_TUNNEL
-#define dev_fill_metadata_dst ovs_dev_fill_metadata_dst
-int ovs_dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
-#endif
-
-#ifndef NETDEV_OFFLOAD_PUSH_VXLAN
-#define NETDEV_OFFLOAD_PUSH_VXLAN 0x001C
-#endif
-
-#ifndef NETDEV_OFFLOAD_PUSH_GENEVE
-#define NETDEV_OFFLOAD_PUSH_GENEVE 0x001D
-#endif
-
-#ifndef HAVE_IFF_PHONY_HEADROOM
-
-#define IFF_PHONY_HEADROOM 0
-static inline unsigned netdev_get_fwd_headroom(struct net_device *dev)
-{
- return 0;
-}
-
-static inline void netdev_set_rx_headroom(struct net_device *dev, int new_hr)
-{
-}
-
-/* set the device rx headroom to the dev's default */
-static inline void netdev_reset_rx_headroom(struct net_device *dev)
-{
-}
-
-#endif
-
-#ifdef IFF_NO_QUEUE
-#define HAVE_IFF_NO_QUEUE
-#else
-#define IFF_NO_QUEUE 0
-#endif
-
-#ifndef HAVE_SKB_CSUM_HWOFFLOAD_HELP
-static inline int skb_csum_hwoffload_help(struct sk_buff *skb,
- const netdev_features_t features)
-{
- /* It's less accurate to approximate to this for older kernels, but
- * it was sufficient for a long time. If you care about ensuring that
- * upstream commit 7529390d08f0 has the same effect on older kernels,
- * consider backporting the following commits:
- * b72b5bf6a8fc ("net: introduce skb_crc32c_csum_help")
- * 43c26a1a4593 ("net: more accurate checksumming in validate_xmit_skb()")
- */
- return skb_checksum_help(skb);
-}
-#endif
-
-#ifndef HAVE_SKB_GSO_ERROR_UNWIND
-static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
- int pulled_hlen, u16 mac_offset,
- int mac_len)
-{
- skb->protocol = protocol;
- skb->encapsulation = 1;
- skb_push(skb, pulled_hlen);
- skb_reset_transport_header(skb);
- skb->mac_header = mac_offset;
- skb->network_header = skb->mac_header + mac_len;
- skb->mac_len = mac_len;
-}
-#endif
-
-#ifndef HAVE_NETIF_KEEP_DST
-static inline void netif_keep_dst(struct net_device *dev)
-{
-}
-#endif
-
-#ifndef HAVE_DEV_CHANGE_FLAGS_TAKES_EXTACK
-static inline int rpl_dev_change_flags(struct net_device *dev,
- unsigned int flags,
- struct netlink_ext_ack *extack)
-{
- return dev_change_flags(dev, flags);
-}
-#define dev_change_flags rpl_dev_change_flags
-#endif
-
-#endif /* __LINUX_NETDEVICE_WRAPPER_H */
diff --git a/datapath/linux/compat/include/linux/netfilter.h b/datapath/linux/compat/include/linux/netfilter.h
deleted file mode 100644
index a6ed6172d..000000000
--- a/datapath/linux/compat/include/linux/netfilter.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef __NETFILTER_WRAPPER_H
-#define __NETFILTER_WRAPPER_H
-
-#include_next <linux/netfilter.h>
-
-#if !defined(HAVE_NF_HOOK_STATE) || !defined(HAVE_NF_HOOK_STATE_NET)
-struct rpl_nf_hook_state {
- unsigned int hook;
- u_int8_t pf;
- struct net_device *in;
- struct net_device *out;
- struct sock *sk;
- struct net *net;
- int (*okfn)(struct net *, struct sock *, struct sk_buff *);
-};
-#define nf_hook_state rpl_nf_hook_state
-#endif
-
-#endif /* __NETFILTER_WRAPPER_H */
diff --git a/datapath/linux/compat/include/linux/netfilter_ipv6.h b/datapath/linux/compat/include/linux/netfilter_ipv6.h
deleted file mode 100644
index 8d896fbc5..000000000
--- a/datapath/linux/compat/include/linux/netfilter_ipv6.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef __NETFILTER_IPV6_WRAPPER_H
-#define __NETFILTER_IPV6_WRAPPER_H 1
-
-#include_next <linux/netfilter_ipv6.h>
-
-#include <linux/version.h>
-#include <net/ip.h> /* For OVS_VPORT_OUTPUT_PARAMS */
-#include <net/ip6_route.h>
-
-#ifndef HAVE_NF_IPV6_OPS_FRAGMENT
-/* Try to minimise changes required to the actions.c code for calling IPv6
- * fragmentation. We can keep the fragment() API mostly the same, except that
- * the callback parameter needs to be in the form that older kernels accept.
- * We don't backport the other ipv6_ops as they're currently unused by OVS. */
-struct ovs_nf_ipv6_ops {
- int (*fragment)(struct sock *sk, struct sk_buff *skb,
- int (*output)(OVS_VPORT_OUTPUT_PARAMS));
-};
-#define nf_ipv6_ops ovs_nf_ipv6_ops
-
-static struct ovs_nf_ipv6_ops ovs_ipv6_ops = {
- .fragment = ip6_fragment,
-};
-
-static inline struct ovs_nf_ipv6_ops *ovs_nf_get_ipv6_ops(void)
-{
- return &ovs_ipv6_ops;
-}
-#define nf_get_ipv6_ops ovs_nf_get_ipv6_ops
-
-#endif /* HAVE_NF_IPV6_OPS_FRAGMENT */
-#endif /* __NETFILTER_IPV6_WRAPPER_H */
diff --git a/datapath/linux/compat/include/linux/netlink.h b/datapath/linux/compat/include/linux/netlink.h
deleted file mode 100644
index a64de4ff8..000000000
--- a/datapath/linux/compat/include/linux/netlink.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef __LINUX_NETLINK_WRAPPER_H
-#define __LINUX_NETLINK_WRAPPER_H 1
-
-#include <linux/skbuff.h>
-#include_next <linux/netlink.h>
-
-#ifndef NLA_TYPE_MASK
-#define NLA_F_NESTED (1 << 15)
-#define NLA_F_NET_BYTEORDER (1 << 14)
-#define NLA_TYPE_MASK (~(NLA_F_NESTED | NLA_F_NET_BYTEORDER))
-#endif
-
-#include <net/netlink.h>
-
-#ifndef NLMSG_DEFAULT_SIZE
-#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN)
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/overflow.h b/datapath/linux/compat/include/linux/overflow.h
deleted file mode 100644
index 13ae6cf6a..000000000
--- a/datapath/linux/compat/include/linux/overflow.h
+++ /dev/null
@@ -1,313 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
-#if defined(HAVE_OVERFLOW_H) && defined(HAVE_STRUCT_SIZE)
-#include_next <linux/overflow.h>
-#else
-#ifndef __LINUX_OVERFLOW_H
-#define __LINUX_OVERFLOW_H
-
-#include <linux/compiler.h>
-
-/*
- * In the fallback code below, we need to compute the minimum and
- * maximum values representable in a given type. These macros may also
- * be useful elsewhere, so we provide them outside the
- * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
- *
- * It would seem more obvious to do something like
- *
- * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
- * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
- *
- * Unfortunately, the middle expressions, strictly speaking, have
- * undefined behaviour, and at least some versions of gcc warn about
- * the type_max expression (but not if -fsanitize=undefined is in
- * effect; in that case, the warning is deferred to runtime...).
- *
- * The slightly excessive casting in type_min is to make sure the
- * macros also produce sensible values for the exotic type _Bool. [The
- * overflow checkers only almost work for _Bool, but that's
- * a-feature-not-a-bug, since people shouldn't be doing arithmetic on
- * _Bools. Besides, the gcc builtins don't allow _Bool* as third
- * argument.]
- *
- * Idea stolen from
- * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html -
- * credit to Christian Biere.
- */
-#define is_signed_type(type) (((type)(-1)) < (type)1)
-#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
-#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
-#define type_min(T) ((T)((T)-type_max(T)-(T)1))
-
-
-#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
-/*
- * For simplicity and code hygiene, the fallback code below insists on
- * a, b and *d having the same type (similar to the min() and max()
- * macros), whereas gcc's type-generic overflow checkers accept
- * different types. Hence we don't just make check_add_overflow an
- * alias for __builtin_add_overflow, but add type checks similar to
- * below.
- */
-#define check_add_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- __builtin_add_overflow(__a, __b, __d); \
-})
-
-#define check_sub_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- __builtin_sub_overflow(__a, __b, __d); \
-})
-
-#define check_mul_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- __builtin_mul_overflow(__a, __b, __d); \
-})
-
-#else
-
-
-/* Checking for unsigned overflow is relatively easy without causing UB. */
-#define __unsigned_add_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = __a + __b; \
- *__d < __a; \
-})
-#define __unsigned_sub_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = __a - __b; \
- __a < __b; \
-})
-/*
- * If one of a or b is a compile-time constant, this avoids a division.
- */
-#define __unsigned_mul_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = __a * __b; \
- __builtin_constant_p(__b) ? \
- __b > 0 && __a > type_max(typeof(__a)) / __b : \
- __a > 0 && __b > type_max(typeof(__b)) / __a; \
-})
-
-/*
- * For signed types, detecting overflow is much harder, especially if
- * we want to avoid UB. But the interface of these macros is such that
- * we must provide a result in *d, and in fact we must produce the
- * result promised by gcc's builtins, which is simply the possibly
- * wrapped-around value. Fortunately, we can just formally do the
- * operations in the widest relevant unsigned type (u64) and then
- * truncate the result - gcc is smart enough to generate the same code
- * with and without the (u64) casts.
- */
-
-/*
- * Adding two signed integers can overflow only if they have the same
- * sign, and overflow has happened iff the result has the opposite
- * sign.
- */
-#define __signed_add_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = (u64)__a + (u64)__b; \
- (((~(__a ^ __b)) & (*__d ^ __a)) \
- & type_min(typeof(__a))) != 0; \
-})
-
-/*
- * Subtraction is similar, except that overflow can now happen only
- * when the signs are opposite. In this case, overflow has happened if
- * the result has the opposite sign of a.
- */
-#define __signed_sub_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = (u64)__a - (u64)__b; \
- ((((__a ^ __b)) & (*__d ^ __a)) \
- & type_min(typeof(__a))) != 0; \
-})
-
-/*
- * Signed multiplication is rather hard. gcc always follows C99, so
- * division is truncated towards 0. This means that we can write the
- * overflow check like this:
- *
- * (a > 0 && (b > MAX/a || b < MIN/a)) ||
- * (a < -1 && (b > MIN/a || b < MAX/a) ||
- * (a == -1 && b == MIN)
- *
- * The redundant casts of -1 are to silence an annoying -Wtype-limits
- * (included in -Wextra) warning: When the type is u8 or u16, the
- * __b_c_e in check_mul_overflow obviously selects
- * __unsigned_mul_overflow, but unfortunately gcc still parses this
- * code and warns about the limited range of __b.
- */
-
-#define __signed_mul_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- typeof(a) __tmax = type_max(typeof(a)); \
- typeof(a) __tmin = type_min(typeof(a)); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = (u64)__a * (u64)__b; \
- (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \
- (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \
- (__b == (typeof(__b))-1 && __a == __tmin); \
-})
-
-
-#define check_add_overflow(a, b, d) \
- __builtin_choose_expr(is_signed_type(typeof(a)), \
- __signed_add_overflow(a, b, d), \
- __unsigned_add_overflow(a, b, d))
-
-#define check_sub_overflow(a, b, d) \
- __builtin_choose_expr(is_signed_type(typeof(a)), \
- __signed_sub_overflow(a, b, d), \
- __unsigned_sub_overflow(a, b, d))
-
-#define check_mul_overflow(a, b, d) \
- __builtin_choose_expr(is_signed_type(typeof(a)), \
- __signed_mul_overflow(a, b, d), \
- __unsigned_mul_overflow(a, b, d))
-
-
-#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
-
-/** check_shl_overflow() - Calculate a left-shifted value and check overflow
- *
- * @a: Value to be shifted
- * @s: How many bits left to shift
- * @d: Pointer to where to store the result
- *
- * Computes *@d = (@a << @s)
- *
- * Returns true if '*d' cannot hold the result or when 'a << s' doesn't
- * make sense. Example conditions:
- * - 'a << s' causes bits to be lost when stored in *d.
- * - 's' is garbage (e.g. negative) or so large that the result of
- * 'a << s' is guaranteed to be 0.
- * - 'a' is negative.
- * - 'a << s' sets the sign bit, if any, in '*d'.
- *
- * '*d' will hold the results of the attempted shift, but is not
- * considered "safe for use" if false is returned.
- */
-#define check_shl_overflow(a, s, d) ({ \
- typeof(a) _a = a; \
- typeof(s) _s = s; \
- typeof(d) _d = d; \
- u64 _a_full = _a; \
- unsigned int _to_shift = \
- _s >= 0 && _s < 8 * sizeof(*d) ? _s : 0; \
- *_d = (_a_full << _to_shift); \
- (_to_shift != _s || *_d < 0 || _a < 0 || \
- (*_d >> _to_shift) != _a); \
-})
-
-/**
- * array_size() - Calculate size of 2-dimensional array.
- *
- * @a: dimension one
- * @b: dimension two
- *
- * Calculates size of 2-dimensional array: @a * @b.
- *
- * Returns: number of bytes needed to represent the array or SIZE_MAX on
- * overflow.
- */
-static inline __must_check size_t array_size(size_t a, size_t b)
-{
- size_t bytes;
-
- if (check_mul_overflow(a, b, &bytes))
- return SIZE_MAX;
-
- return bytes;
-}
-
-/**
- * array3_size() - Calculate size of 3-dimensional array.
- *
- * @a: dimension one
- * @b: dimension two
- * @c: dimension three
- *
- * Calculates size of 3-dimensional array: @a * @b * @c.
- *
- * Returns: number of bytes needed to represent the array or SIZE_MAX on
- * overflow.
- */
-static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
-{
- size_t bytes;
-
- if (check_mul_overflow(a, b, &bytes))
- return SIZE_MAX;
- if (check_mul_overflow(bytes, c, &bytes))
- return SIZE_MAX;
-
- return bytes;
-}
-
-static inline __must_check size_t __ab_c_size(size_t n, size_t size, size_t c)
-{
- size_t bytes;
-
- if (check_mul_overflow(n, size, &bytes))
- return SIZE_MAX;
- if (check_add_overflow(bytes, c, &bytes))
- return SIZE_MAX;
-
- return bytes;
-}
-
-/**
- * struct_size() - Calculate size of structure with trailing array.
- * @p: Pointer to the structure.
- * @member: Name of the array member.
- * @n: Number of elements in the array.
- *
- * Calculates size of memory needed for structure @p followed by an
- * array of @n @member elements.
- *
- * Return: number of bytes needed or SIZE_MAX on overflow.
- */
-#define struct_size(p, member, n) \
- __ab_c_size(n, \
- sizeof(*(p)->member) + __must_be_array((p)->member),\
- sizeof(*(p)))
-
-#endif /* __LINUX_OVERFLOW_H */
-#endif /* defined(HAVE_OVERFLOW_H) && defined(HAVE_STRUCT_SIZE) */
diff --git a/datapath/linux/compat/include/linux/percpu.h b/datapath/linux/compat/include/linux/percpu.h
deleted file mode 100644
index a039142e2..000000000
--- a/datapath/linux/compat/include/linux/percpu.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef __LINUX_PERCPU_WRAPPER_H
-#define __LINUX_PERCPU_WRAPPER_H 1
-
-#include_next <linux/percpu.h>
-
-#if !defined this_cpu_ptr
-#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, smp_processor_id())
-#endif
-
-#if !defined this_cpu_read
-#define this_cpu_read(ptr) percpu_read(ptr)
-#endif
-
-#if !defined this_cpu_inc
-#define this_cpu_inc(ptr) percpu_add(ptr, 1)
-#endif
-
-#if !defined this_cpu_dec
-#define this_cpu_dec(ptr) percpu_sub(ptr, 1)
-#endif
-
-#ifndef alloc_percpu_gfp
-#define NEED_ALLOC_PERCPU_GFP
-
-void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp);
-
-#define alloc_percpu_gfp(type, gfp) \
- (typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type), \
- __alignof__(type), gfp)
-#endif
-
-
-#endif
diff --git a/datapath/linux/compat/include/linux/random.h b/datapath/linux/compat/include/linux/random.h
deleted file mode 100644
index 5c088a2d8..000000000
--- a/datapath/linux/compat/include/linux/random.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef __LINUX_RANDOM_WRAPPER_H
-#define __LINUX_RANDOM_WRAPPER_H 1
-
-#include_next <linux/random.h>
-
-#ifndef HAVE_PRANDOM_U32
-#define prandom_u32() random32()
-#endif
-
-#ifndef HAVE_PRANDOM_U32_MAX
-static inline u32 prandom_u32_max(u32 ep_ro)
-{
- return (u32)(((u64) prandom_u32() * ep_ro) >> 32);
-}
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/rbtree.h b/datapath/linux/compat/include/linux/rbtree.h
deleted file mode 100644
index dbf20ff0e..000000000
--- a/datapath/linux/compat/include/linux/rbtree.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef __LINUX_RBTREE_WRAPPER_H
-#define __LINUX_RBTREE_WRAPPER_H 1
-
-#include_next <linux/rbtree.h>
-
-#ifndef HAVE_RBTREE_RB_LINK_NODE_RCU
-#include <linux/rcupdate.h>
-
-static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
- struct rb_node **rb_link)
-{
- node->__rb_parent_color = (unsigned long)parent;
- node->rb_left = node->rb_right = NULL;
-
- rcu_assign_pointer(*rb_link, node);
-}
-#endif
-
-#endif /* __LINUX_RBTREE_WRAPPER_H */
diff --git a/datapath/linux/compat/include/linux/rculist.h b/datapath/linux/compat/include/linux/rculist.h
deleted file mode 100644
index 40fd5e171..000000000
--- a/datapath/linux/compat/include/linux/rculist.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef __LINUX_RCULIST_WRAPPER_H
-#define __LINUX_RCULIST_WRAPPER_H
-
-#include_next <linux/rculist.h>
-
-#ifndef hlist_first_rcu
-#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first)))
-#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next)))
-#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev)))
-#endif
-
-/*
- * Check during list traversal that we are within an RCU reader
- */
-
-#define check_arg_count_one(dummy)
-
-#ifdef CONFIG_PROVE_RCU_LIST
-#define __list_check_rcu(dummy, cond, extra...) \
- ({ \
- check_arg_count_one(extra); \
- RCU_LOCKDEP_WARN(!cond && !rcu_read_lock_any_held(), \
- "RCU-list traversed in non-reader section!"); \
- })
-#else
-#define __list_check_rcu(dummy, cond, extra...) \
- ({ check_arg_count_one(extra); })
-#endif
-
-#undef hlist_for_each_entry_rcu
-#define hlist_for_each_entry_rcu(pos, head, member, cond...) \
- for (__list_check_rcu(dummy, ## cond, 0), \
- pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\
- typeof(*(pos)), member); \
- pos; \
- pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
- &(pos)->member)), typeof(*(pos)), member))
-
-#endif
diff --git a/datapath/linux/compat/include/linux/rcupdate.h b/datapath/linux/compat/include/linux/rcupdate.h
deleted file mode 100644
index 85e3c3b76..000000000
--- a/datapath/linux/compat/include/linux/rcupdate.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef __RCUPDATE_WRAPPER_H
-#define __RCUPDATE_WRAPPER_H 1
-
-#include_next <linux/rcupdate.h>
-
-#ifndef rcu_dereference_check
-#define rcu_dereference_check(p, c) rcu_dereference(p)
-#endif
-
-#ifndef rcu_dereference_protected
-#define rcu_dereference_protected(p, c) (p)
-#endif
-
-#ifndef rcu_dereference_raw
-#define rcu_dereference_raw(p) rcu_dereference_check(p, 1)
-#endif
-
-#ifndef rcu_access_pointer
-#define rcu_access_pointer(p) rcu_dereference(p)
-#endif
-
-#ifndef HAVE_RCU_READ_LOCK_HELD
-static inline int rcu_read_lock_held(void)
-{
- return 1;
-}
-#endif
-
-#ifndef RCU_INITIALIZER
-#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
-#endif
-
-#ifndef RCU_INIT_POINTER
-#define RCU_INIT_POINTER(p, v) \
- do { \
- p = RCU_INITIALIZER(v); \
- } while (0)
-
-#endif
-
-#endif /* linux/rcupdate.h wrapper */
diff --git a/datapath/linux/compat/include/linux/reciprocal_div.h b/datapath/linux/compat/include/linux/reciprocal_div.h
deleted file mode 100644
index f50d8e4ee..000000000
--- a/datapath/linux/compat/include/linux/reciprocal_div.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef _LINUX_RECIPROCAL_DIV_WRAPPER_H
-#define _LINUX_RECIPROCAL_DIV_WRAPPER_H 1
-
-#include <linux/types.h>
-
-/*
- * This algorithm is based on the paper "Division by Invariant
- * Integers Using Multiplication" by Torbjörn Granlund and Peter
- * L. Montgomery.
- *
- * The assembler implementation from Agner Fog, which this code is
- * based on, can be found here:
- * http://www.agner.org/optimize/asmlib.zip
- *
- * This optimization for A/B is helpful if the divisor B is mostly
- * runtime invariant. The reciprocal of B is calculated in the
- * slow-path with reciprocal_value(). The fast-path can then just use
- * a much faster multiplication operation with a variable dividend A
- * to calculate the division A/B.
- */
-
-#define reciprocal_value rpl_reciprocal_value
-struct reciprocal_value {
- u32 m;
- u8 sh1, sh2;
-};
-
-struct reciprocal_value rpl_reciprocal_value(u32 d);
-
-#define reciprocal_divide rpl_reciprocal_divide
-static inline u32 rpl_reciprocal_divide(u32 a, struct reciprocal_value R)
-{
- u32 t = (u32)(((u64)a * R.m) >> 32);
- return (t + ((a - t) >> R.sh1)) >> R.sh2;
-}
-
-#endif /* _LINUX_RECIPROCAL_DIV_WRAPPER_H */
diff --git a/datapath/linux/compat/include/linux/rtnetlink.h b/datapath/linux/compat/include/linux/rtnetlink.h
deleted file mode 100644
index cd1e1a0c0..000000000
--- a/datapath/linux/compat/include/linux/rtnetlink.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef __RTNETLINK_WRAPPER_H
-#define __RTNETLINK_WRAPPER_H 1
-
-#include_next <linux/rtnetlink.h>
-
-#ifndef HAVE_LOCKDEP_RTNL_IS_HELD
-#ifdef CONFIG_PROVE_LOCKING
-static inline int lockdep_rtnl_is_held(void)
-{
- return 1;
-}
-#endif
-#endif
-
-#ifndef rcu_dereference_rtnl
-/**
- * rcu_dereference_rtnl - rcu_dereference with debug checking
- * @p: The pointer to read, prior to dereferencing
- *
- * Do an rcu_dereference(p), but check caller either holds rcu_read_lock()
- * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference()
- */
-#define rcu_dereference_rtnl(p) \
- rcu_dereference_check(p, rcu_read_lock_held() || \
- lockdep_rtnl_is_held())
-#endif
-
-#ifndef rtnl_dereference
-/**
- * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL
- * @p: The pointer to read, prior to dereferencing
- *
- * Return the value of the specified RCU-protected pointer, but omit
- * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
- * caller holds RTNL.
- */
-#define rtnl_dereference(p) \
- rcu_dereference_protected(p, lockdep_rtnl_is_held())
-#endif
-
-#endif /* linux/rtnetlink.h wrapper */
diff --git a/datapath/linux/compat/include/linux/skbuff.h b/datapath/linux/compat/include/linux/skbuff.h
deleted file mode 100644
index 396a5e406..000000000
--- a/datapath/linux/compat/include/linux/skbuff.h
+++ /dev/null
@@ -1,491 +0,0 @@
-#ifndef __LINUX_SKBUFF_WRAPPER_H
-#define __LINUX_SKBUFF_WRAPPER_H 1
-
-#include <linux/version.h>
-#include <linux/types.h>
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
-/* This should be before skbuff.h to make sure that we rewrite
- * the calls there. */
-struct sk_buff;
-
-int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
- gfp_t gfp_mask);
-#define pskb_expand_head rpl_pskb_expand_head
-#endif
-
-#include_next <linux/skbuff.h>
-#include <linux/jhash.h>
-
-#ifndef HAVE_IGNORE_DF_RENAME
-#define ignore_df local_df
-#endif
-
-
-#ifndef HAVE_NULL_COMPUTE_PSEUDO
-static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
-{
- return 0;
-}
-#endif
-
-#ifndef HAVE_SKB_CHECKSUM_CONVERT
-static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
-{
-#ifdef HAVE_SKBUFF_CSUM_VALID
- return (skb->ip_summed == CHECKSUM_NONE && skb->csum_valid);
-#else
- return skb->ip_summed == CHECKSUM_NONE;
-#endif
-}
-
-static inline void __skb_checksum_convert(struct sk_buff *skb,
- __sum16 check, __wsum pseudo)
-{
- skb->csum = ~pseudo;
- skb->ip_summed = CHECKSUM_COMPLETE;
-}
-
-#define skb_checksum_try_convert(skb, proto, check, compute_pseudo) \
-do { \
- if (__skb_checksum_convert_check(skb)) \
- __skb_checksum_convert(skb, check, \
- compute_pseudo(skb, proto)); \
-} while (0)
-
-#endif
-
-#ifndef SKB_CHECKSUM_SIMPLE_VALIDATE
-
-#ifndef __skb_checksum_validate
-#define __skb_checksum_validate(skb, proto, complete, \
- zero_okay, check, compute_pseudo) \
-({ \
- __sum16 __ret = 0; \
- __ret; \
-})
-#endif
-
-#define skb_checksum_simple_validate(skb) \
- __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
-#endif
-
-#ifndef HAVE_SKB_COPY_FROM_LINEAR_DATA_OFFSET
-static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb,
- const int offset, void *to,
- const unsigned int len)
-{
- memcpy(to, skb->data + offset, len);
-}
-
-static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
- const int offset,
- const void *from,
- const unsigned int len)
-{
- memcpy(skb->data + offset, from, len);
-}
-
-#endif /* !HAVE_SKB_COPY_FROM_LINEAR_DATA_OFFSET */
-
-#ifndef HAVE_SKB_INNER_TRANSPORT_OFFSET
-static inline int skb_inner_transport_offset(const struct sk_buff *skb)
-{
- return skb_inner_transport_header(skb) - skb->data;
-}
-#endif
-
-#ifndef HAVE_SKB_RESET_TAIL_POINTER
-static inline void skb_reset_tail_pointer(struct sk_buff *skb)
-{
- skb->tail = skb->data;
-}
-#endif
-/*
- * The networking layer reserves some headroom in skb data (via
- * dev_alloc_skb). This is used to avoid having to reallocate skb data when
- * the header has to grow. In the default case, if the header has to grow
- * 16 bytes or less we avoid the reallocation.
- *
- * Unfortunately this headroom changes the DMA alignment of the resulting
- * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive
- * on some architectures. An architecture can override this value,
- * perhaps setting it to a cacheline in size (since that will maintain
- * cacheline alignment of the DMA). It must be a power of 2.
- *
- * Various parts of the networking layer expect at least 16 bytes of
- * headroom, you should not reduce this.
- */
-#ifndef NET_SKB_PAD
-#define NET_SKB_PAD 16
-#endif
-
-#ifndef HAVE_SKB_COW_HEAD
-static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
- int cloned)
-{
- int delta = 0;
-
- if (headroom < NET_SKB_PAD)
- headroom = NET_SKB_PAD;
- if (headroom > skb_headroom(skb))
- delta = headroom - skb_headroom(skb);
-
- if (delta || cloned)
- return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0,
- GFP_ATOMIC);
- return 0;
-}
-
-static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom)
-{
- return __skb_cow(skb, headroom, skb_header_cloned(skb));
-}
-#endif /* !HAVE_SKB_COW_HEAD */
-
-#ifndef HAVE_SKB_DST_ACCESSOR_FUNCS
-static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
-{
- return (struct dst_entry *)skb->dst;
-}
-
-static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
-{
- skb->dst = dst;
-}
-
-static inline struct rtable *skb_rtable(const struct sk_buff *skb)
-{
- return (struct rtable *)skb->dst;
-}
-#endif
-
-#ifndef CHECKSUM_PARTIAL
-#define CHECKSUM_PARTIAL CHECKSUM_HW
-#endif
-#ifndef CHECKSUM_COMPLETE
-#define CHECKSUM_COMPLETE CHECKSUM_HW
-#endif
-
-#ifndef HAVE_SKB_WARN_LRO
-#ifndef NETIF_F_LRO
-static inline bool skb_warn_if_lro(const struct sk_buff *skb)
-{
- return false;
-}
-#else
-extern void __skb_warn_lro_forwarding(const struct sk_buff *skb);
-
-static inline bool skb_warn_if_lro(const struct sk_buff *skb)
-{
- /* LRO sets gso_size but not gso_type, whereas if GSO is really
- * wanted then gso_type will be set. */
- struct skb_shared_info *shinfo = skb_shinfo(skb);
- if (shinfo->gso_size != 0 && unlikely(shinfo->gso_type == 0)) {
- __skb_warn_lro_forwarding(skb);
- return true;
- }
- return false;
-}
-#endif /* NETIF_F_LRO */
-#endif /* HAVE_SKB_WARN_LRO */
-
-#ifndef HAVE_CONSUME_SKB
-#define consume_skb kfree_skb
-#endif
-
-#ifndef HAVE_SKB_FRAG_PAGE
-#include <linux/mm.h>
-
-static inline struct page *skb_frag_page(const skb_frag_t *frag)
-{
- return frag->page;
-}
-
-static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page)
-{
- frag->page = page;
-}
-static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
-{
- frag->size = size;
-}
-static inline void __skb_frag_ref(skb_frag_t *frag)
-{
- get_page(skb_frag_page(frag));
-}
-static inline void __skb_frag_unref(skb_frag_t *frag)
-{
- put_page(skb_frag_page(frag));
-}
-
-static inline void skb_frag_ref(struct sk_buff *skb, int f)
-{
- __skb_frag_ref(&skb_shinfo(skb)->frags[f]);
-}
-
-static inline void skb_frag_unref(struct sk_buff *skb, int f)
-{
- __skb_frag_unref(&skb_shinfo(skb)->frags[f]);
-}
-
-#endif
-
-#ifndef HAVE_SKB_RESET_MAC_LEN
-static inline void skb_reset_mac_len(struct sk_buff *skb)
-{
- skb->mac_len = skb->network_header - skb->mac_header;
-}
-#endif
-
-#ifndef HAVE_SKB_UNCLONE
-static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
-{
- might_sleep_if(pri & __GFP_WAIT);
-
- if (skb_cloned(skb))
- return pskb_expand_head(skb, 0, 0, pri);
-
- return 0;
-}
-#endif
-
-#ifndef HAVE_SKB_ORPHAN_FRAGS
-static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
-{
- return 0;
-}
-#endif
-
-#ifndef HAVE_SKB_GET_HASH
-#define skb_get_hash skb_get_rxhash
-#endif /* HAVE_SKB_GET_HASH */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0)
-#define skb_zerocopy_headlen rpl_skb_zerocopy_headlen
-unsigned int rpl_skb_zerocopy_headlen(const struct sk_buff *from);
-#endif
-
-#ifndef HAVE_SKB_ZEROCOPY
-#define skb_zerocopy rpl_skb_zerocopy
-int rpl_skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len,
- int hlen);
-#endif
-
-#ifndef HAVE_SKB_CLEAR_HASH
-static inline void skb_clear_hash(struct sk_buff *skb)
-{
-#ifdef HAVE_RXHASH
- skb->rxhash = 0;
-#endif
- skb->l4_hash = 0;
-}
-#endif
-
-#ifndef HAVE_SKB_HAS_FRAG_LIST
-#define skb_has_frag_list skb_has_frags
-#endif
-
-#ifndef HAVE___SKB_FILL_PAGE_DESC
-static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
- struct page *page, int off, int size)
-{
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
- __skb_frag_set_page(frag, page);
- frag->page_offset = off;
- skb_frag_size_set(frag, size);
-}
-#endif
-
-#ifndef HAVE_SKB_ENSURE_WRITABLE
-#define skb_ensure_writable rpl_skb_ensure_writable
-int rpl_skb_ensure_writable(struct sk_buff *skb, int write_len);
-#endif
-
-#ifndef HAVE___SKB_VLAN_POP
-#define __skb_vlan_pop rpl___skb_vlan_pop
-int rpl___skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
-#endif
-
-#ifndef HAVE_SKB_VLAN_POP
-#define skb_vlan_pop rpl_skb_vlan_pop
-int rpl_skb_vlan_pop(struct sk_buff *skb);
-#endif
-
-#ifndef HAVE_SKB_VLAN_PUSH
-#define skb_vlan_push rpl_skb_vlan_push
-int rpl_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
-#endif
-
-#ifndef HAVE_KFREE_SKB_LIST
-void rpl_kfree_skb_list(struct sk_buff *segs);
-#define kfree_skb_list rpl_kfree_skb_list
-#endif
-
-#ifndef HAVE_SKB_CHECKSUM_START_OFFSET
-static inline int skb_checksum_start_offset(const struct sk_buff *skb)
-{
- return skb->csum_start - skb_headroom(skb);
-}
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,3,0)
-#define skb_postpull_rcsum rpl_skb_postpull_rcsum
-static inline void skb_postpull_rcsum(struct sk_buff *skb,
- const void *start, unsigned int len)
-{
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
- else if (skb->ip_summed == CHECKSUM_PARTIAL &&
- skb_checksum_start_offset(skb) < 0)
- skb->ip_summed = CHECKSUM_NONE;
-}
-
-#define skb_pull_rcsum rpl_skb_pull_rcsum
-static inline unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
-{
- unsigned char *data = skb->data;
-
- BUG_ON(len > skb->len);
- __skb_pull(skb, len);
- skb_postpull_rcsum(skb, data, len);
- return skb->data;
-}
-
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0)
-#define skb_scrub_packet rpl_skb_scrub_packet
-void rpl_skb_scrub_packet(struct sk_buff *skb, bool xnet);
-#endif
-
-#define skb_pop_mac_header rpl_skb_pop_mac_header
-static inline void skb_pop_mac_header(struct sk_buff *skb)
-{
- skb->mac_header = skb->network_header;
-}
-
-#ifndef HAVE_SKB_CLEAR_HASH_IF_NOT_L4
-static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb)
-{
- if (!skb->l4_hash)
- skb_clear_hash(skb);
-}
-#endif
-
-#ifndef HAVE_SKB_POSTPUSH_RCSUM
-static inline void skb_postpush_rcsum(struct sk_buff *skb,
- const void *start, unsigned int len)
-{
- /* For performing the reverse operation to skb_postpull_rcsum(),
- * we can instead of ...
- *
- * skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
- *
- * ... just use this equivalent version here to save a few
- * instructions. Feeding csum of 0 in csum_partial() and later
- * on adding skb->csum is equivalent to feed skb->csum in the
- * first place.
- */
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_partial(start, len, skb->csum);
-}
-#endif
-
-#define skb_checksum_start rpl_skb_checksum_start
-static inline unsigned char *skb_checksum_start(const struct sk_buff *skb)
-{
- return skb->head + skb->csum_start;
-}
-
-#ifndef HAVE_LCO_CSUM
-static inline __wsum lco_csum(struct sk_buff *skb)
-{
- unsigned char *csum_start = skb_checksum_start(skb);
- unsigned char *l4_hdr = skb_transport_header(skb);
- __wsum partial;
-
- /* Start with complement of inner checksum adjustment */
- partial = ~csum_unfold(*(__force __sum16 *)(csum_start +
- skb->csum_offset));
-
- /* Add in checksum of our headers (incl. outer checksum
- * adjustment filled in by caller) and return result.
- */
- return csum_partial(l4_hdr, csum_start - l4_hdr, partial);
-}
-#endif
-
-#ifndef HAVE_SKB_NFCT
-static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb)
-{
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- return skb->nfct;
-#else
- return NULL;
-#endif
-}
-#endif
-
-#ifndef HAVE_SKB_PUT_ZERO
-static inline void *skb_put_zero(struct sk_buff *skb, unsigned int len)
-{
- void *tmp = skb_put(skb, len);
-
- memset(tmp, 0, len);
-
- return tmp;
-}
-#endif
-
-#ifndef HAVE_SKB_GSO_IPXIP6
-#define SKB_GSO_IPXIP6 (1 << 10)
-#endif
-
-#ifndef HAVE_SKB_SET_INNER_IPPROTO
-static inline void skb_set_inner_ipproto(struct sk_buff *skb,
- __u8 ipproto)
-{
-}
-#endif
-
-#ifndef HAVE_NF_RESET_CT
-#define nf_reset_ct nf_reset
-#endif
-
-#ifndef HAVE___SKB_SET_HASH
-static inline void
-__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4)
-{
-#ifdef HAVE_RXHASH
- skb->rxhash = hash;
-#else
- skb->hash = hash;
-#endif
- skb->l4_hash = is_l4;
-#ifdef HAVE_SW_HASH
- skb->sw_hash = is_sw;
-#endif
-}
-#endif
-
-#ifndef HAVE_SKB_GET_HASH_RAW
-static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
-{
-#ifdef HAVE_RXHASH
- return skb->rxhash;
-#else
- return skb->hash;
-#endif
-}
-#endif
-
-#ifndef skb_list_walk_safe
-/* Iterate through singly-linked GSO fragments of an skb. */
-#define skb_list_walk_safe(first, skb, next_skb) \
- for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb); \
- (skb) = (next_skb), (next_skb) = (skb) ? (skb)->next : NULL)
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/static_key.h b/datapath/linux/compat/include/linux/static_key.h
deleted file mode 100644
index 432feccb9..000000000
--- a/datapath/linux/compat/include/linux/static_key.h
+++ /dev/null
@@ -1,86 +0,0 @@
-#ifndef _STATIC_KEY_WRAPPER_H
-#define _STATIC_KEY_WRAPPER_H
-
-#include <linux/atomic.h>
-#include_next <linux/static_key.h>
-#ifndef HAVE_UPSTREAM_STATIC_KEY
-/*
- * This backport is based on upstream net-next commit 11276d5306b8
- * ("locking/static_keys: Add a new static_key interface").
- *
- * For kernel that does not support the new static key interface,
- * we do not backport the jump label support but the fall back version
- * of static key that is simply a conditional branch.
- */
-
-struct static_key_true {
- struct static_key key;
-};
-
-struct static_key_false {
- struct static_key key;
-};
-
-#define rpl_STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) }
-#define rpl_STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) }
-
-#define rpl_STATIC_KEY_TRUE_INIT \
- (struct static_key_true) { .key = rpl_STATIC_KEY_INIT_TRUE, }
-#define rpl_STATIC_KEY_FALSE_INIT \
- (struct static_key_false){ .key = rpl_STATIC_KEY_INIT_FALSE, }
-
-#define rpl_DEFINE_STATIC_KEY_TRUE(name) \
- struct static_key_true name = rpl_STATIC_KEY_TRUE_INIT
-
-#define rpl_DEFINE_STATIC_KEY_FALSE(name) \
- struct static_key_false name = rpl_STATIC_KEY_FALSE_INIT
-
-static inline int rpl_static_key_count(struct static_key *key)
-{
- return atomic_read(&key->enabled);
-}
-
-static inline void rpl_static_key_enable(struct static_key *key)
-{
- int count = rpl_static_key_count(key);
-
- WARN_ON_ONCE(count < 0 || count > 1);
-
- if (!count)
- static_key_slow_inc(key);
-}
-
-static inline void rpl_static_key_disable(struct static_key *key)
-{
- int count = rpl_static_key_count(key);
-
- WARN_ON_ONCE(count < 0 || count > 1);
-
- if (count)
- static_key_slow_dec(key);
-}
-
-#ifdef HAVE_DEFINE_STATIC_KEY
-#undef DEFINE_STATIC_KEY_TRUE
-#undef DEFINE_STATIC_KEY_FALSE
-#endif
-
-#define DEFINE_STATIC_KEY_TRUE rpl_DEFINE_STATIC_KEY_TRUE
-#define DEFINE_STATIC_KEY_FALSE rpl_DEFINE_STATIC_KEY_FALSE
-
-#define static_branch_likely(x) likely(static_key_enabled(&(x)->key))
-#define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key))
-
-#define static_branch_enable(x) rpl_static_key_enable(&(x)->key)
-#define static_branch_disable(x) rpl_static_key_disable(&(x)->key)
-
-#ifndef HAVE_DECLARE_STATIC_KEY
-#define DECLARE_STATIC_KEY_TRUE(name) \
- extern struct static_key_true name
-#define DECLARE_STATIC_KEY_FALSE(name) \
- extern struct static_key_false name
-#endif
-
-#endif /* HAVE_UPSTREAM_STATIC_KEY */
-
-#endif /* _STATIC_KEY_WRAPPER_H */
diff --git a/datapath/linux/compat/include/linux/stddef.h b/datapath/linux/compat/include/linux/stddef.h
deleted file mode 100644
index 5b44c0dee..000000000
--- a/datapath/linux/compat/include/linux/stddef.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __LINUX_STDDEF_WRAPPER_H
-#define __LINUX_STDDEF_WRAPPER_H 1
-
-#include_next <linux/stddef.h>
-
-#ifdef __KERNEL__
-
-#ifndef offsetofend
-#define offsetofend(TYPE, MEMBER) \
- (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
-#endif
-
-#endif /* __KERNEL__ */
-
-#endif
diff --git a/datapath/linux/compat/include/linux/timekeeping.h b/datapath/linux/compat/include/linux/timekeeping.h
deleted file mode 100644
index 3a3b18331..000000000
--- a/datapath/linux/compat/include/linux/timekeeping.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _LINUX_TIMEKEEPING_WRAPPER_H
-#define _LINUX_TIMEKEEPING_WRAPPER_H
-
-#ifndef HAVE_KTIME_GET_TS64
-#define ktime_get_ts64 ktime_get_ts
-#define timespec64 timespec
-#else
-#include_next <linux/timekeeping.h>
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/types.h b/datapath/linux/compat/include/linux/types.h
deleted file mode 100644
index a58623e70..000000000
--- a/datapath/linux/compat/include/linux/types.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __LINUX_TYPES_WRAPPER_H
-#define __LINUX_TYPES_WRAPPER_H 1
-
-#include_next <linux/types.h>
-
-#ifndef HAVE_CSUM_TYPES
-typedef __u16 __bitwise __sum16;
-typedef __u32 __bitwise __wsum;
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/u64_stats_sync.h b/datapath/linux/compat/include/linux/u64_stats_sync.h
deleted file mode 100644
index 9342f73d0..000000000
--- a/datapath/linux/compat/include/linux/u64_stats_sync.h
+++ /dev/null
@@ -1,155 +0,0 @@
-#ifndef _LINUX_U64_STATS_SYNC_WRAPPER_H
-#define _LINUX_U64_STATS_SYNC_WRAPPER_H
-
-#include <linux/version.h>
-
-#if defined(HAVE_U64_STATS_FETCH_BEGIN_IRQ) && \
- LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0)
-#include_next <linux/u64_stats_sync.h>
-#else
-
-/*
- * To properly implement 64bits network statistics on 32bit and 64bit hosts,
- * we provide a synchronization point, that is a noop on 64bit or UP kernels.
- *
- * Key points :
- * 1) Use a seqcount on SMP 32bits, with low overhead.
- * 2) Whole thing is a noop on 64bit arches or UP kernels.
- * 3) Write side must ensure mutual exclusion or one seqcount update could
- * be lost, thus blocking readers forever.
- * If this synchronization point is not a mutex, but a spinlock or
- * spinlock_bh() or disable_bh() :
- * 3.1) Write side should not sleep.
- * 3.2) Write side should not allow preemption.
- * 3.3) If applicable, interrupts should be disabled.
- *
- * 4) If reader fetches several counters, there is no guarantee the whole values
- * are consistent (remember point 1) : this is a noop on 64bit arches anyway)
- *
- * 5) readers are allowed to sleep or be preempted/interrupted : They perform
- * pure reads. But if they have to fetch many values, it's better to not allow
- * preemptions/interruptions to avoid many retries.
- *
- * 6) If counter might be written by an interrupt, readers should block interrupts.
- * (On UP, there is no seqcount_t protection, a reader allowing interrupts could
- * read partial values)
- *
- * 7) For irq or softirq uses, readers can use u64_stats_fetch_begin_irq() and
- * u64_stats_fetch_retry_irq() helpers
- *
- * Usage :
- *
- * Stats producer (writer) should use following template granted it already got
- * an exclusive access to counters (a lock is already taken, or per cpu
- * data is used [in a non preemptable context])
- *
- * spin_lock_bh(...) or other synchronization to get exclusive access
- * ...
- * u64_stats_update_begin(&stats->syncp);
- * stats->bytes64 += len; // non atomic operation
- * stats->packets64++; // non atomic operation
- * u64_stats_update_end(&stats->syncp);
- *
- * While a consumer (reader) should use following template to get consistent
- * snapshot for each variable (but no guarantee on several ones)
- *
- * u64 tbytes, tpackets;
- * unsigned int start;
- *
- * do {
- * start = u64_stats_fetch_begin(&stats->syncp);
- * tbytes = stats->bytes64; // non atomic operation
- * tpackets = stats->packets64; // non atomic operation
- * } while (u64_stats_fetch_retry(&stats->syncp, start));
- *
- *
- * Example of use in drivers/net/loopback.c, using per_cpu containers,
- * in BH disabled context.
- */
-#include <linux/seqlock.h>
-
-struct u64_stats_sync {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- seqcount_t seq;
-#endif
-};
-
-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
-# define u64_stats_init(syncp) seqcount_init(syncp.seq)
-#else
-# define u64_stats_init(syncp) do { } while (0)
-#endif
-
-static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- write_seqcount_begin(&syncp->seq);
-#endif
-}
-
-static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- write_seqcount_end(&syncp->seq);
-#endif
-}
-
-static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- return read_seqcount_begin(&syncp->seq);
-#else
-#if BITS_PER_LONG==32
- preempt_disable();
-#endif
- return 0;
-#endif
-}
-
-static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
- unsigned int start)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- return read_seqcount_retry(&syncp->seq, start);
-#else
-#if BITS_PER_LONG==32
- preempt_enable();
-#endif
- return false;
-#endif
-}
-
-/*
- * In case irq handlers can update u64 counters, readers can use following helpers
- * - SMP 32bit arches use seqcount protection, irq safe.
- * - UP 32bit must disable irqs.
- * - 64bit have no problem atomically reading u64 values, irq safe.
- */
-static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- return read_seqcount_begin(&syncp->seq);
-#else
-#if BITS_PER_LONG==32
- local_irq_disable();
-#endif
- return 0;
-#endif
-}
-
-static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
- unsigned int start)
-{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- return read_seqcount_retry(&syncp->seq, start);
-#else
-#if BITS_PER_LONG==32
- local_irq_enable();
-#endif
- return false;
-#endif
-}
-
-#endif /* !HAVE_U64_STATS_FETCH_BEGIN_IRQ || kernel < 3.13 */
-
-#endif /* _LINUX_U64_STATS_SYNC_WRAPPER_H */
diff --git a/datapath/linux/compat/include/linux/udp.h b/datapath/linux/compat/include/linux/udp.h
deleted file mode 100644
index 22e57d4c0..000000000
--- a/datapath/linux/compat/include/linux/udp.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef __LINUX_UDP_WRAPPER_H
-#define __LINUX_UDP_WRAPPER_H 1
-
-#include_next <linux/udp.h>
-#include <linux/ipv6.h>
-
-#ifndef HAVE_NO_CHECK6_TX
-static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
-{
-#ifdef HAVE_SK_NO_CHECK_TX
- sk->sk_no_check_tx = val;
-#endif
-}
-
-static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
-{
-#ifdef HAVE_SK_NO_CHECK_TX
- sk->sk_no_check_rx = val;
-#else
- /* since netwroking stack is not checking for zero UDP checksum
- * check it in OVS module. */
- #define OVS_CHECK_UDP_TUNNEL_ZERO_CSUM
-#endif
-}
-#endif
-
-#ifdef OVS_CHECK_UDP_TUNNEL_ZERO_CSUM
-#define udp6_csum_zero_error rpl_udp6_csum_zero_error
-
-void rpl_udp6_csum_zero_error(struct sk_buff *skb);
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/linux/workqueue.h b/datapath/linux/compat/include/linux/workqueue.h
deleted file mode 100644
index ed573c226..000000000
--- a/datapath/linux/compat/include/linux/workqueue.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_WORKQUEUE_WRAPPER_H
-#define __LINUX_WORKQUEUE_WRAPPER_H 1
-
-#include_next <linux/workqueue.h>
-
-#endif
diff --git a/datapath/linux/compat/include/net/checksum.h b/datapath/linux/compat/include/net/checksum.h
deleted file mode 100644
index d1f1125d1..000000000
--- a/datapath/linux/compat/include/net/checksum.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef __NET_CHECKSUM_WRAPPER_H
-#define __NET_CHECKSUM_WRAPPER_H 1
-
-#include_next <net/checksum.h>
-
-#ifndef HAVE_CSUM_UNFOLD
-static inline __wsum csum_unfold(__sum16 n)
-{
- return (__force __wsum)n;
-}
-#endif /* !HAVE_CSUM_UNFOLD */
-
-/* Workaround for debugging included in certain versions of XenServer. It only
- * applies to 32-bit x86.
- */
-#if defined(HAVE_CSUM_COPY_DBG) && defined(CONFIG_X86_32)
-#define csum_and_copy_to_user(src, dst, len, sum, err_ptr) \
- csum_and_copy_to_user(src, dst, len, sum, NULL, err_ptr)
-#endif
-
-#ifndef HAVE_CSUM_REPLACE4
-static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
-{
- __be32 diff[] = { ~from, to };
-
- *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum_unfold(*sum)));
-}
-
-static inline void csum_replace2(__sum16 *sum, __be16 from, __be16 to)
-{
- csum_replace4(sum, (__force __be32)from, (__force __be32)to);
-}
-#endif
-
-#ifndef CSUM_MANGLED_0
-#define CSUM_MANGLED_0 ((__force __sum16)0xffff)
-#endif
-
-#endif /* checksum.h */
diff --git a/datapath/linux/compat/include/net/dst.h b/datapath/linux/compat/include/net/dst.h
deleted file mode 100644
index af78a6ca6..000000000
--- a/datapath/linux/compat/include/net/dst.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#ifndef __NET_DST_WRAPPER_H
-#define __NET_DST_WRAPPER_H 1
-
-#include <linux/version.h>
-#include_next <net/dst.h>
-
-#ifndef HAVE_SKB_DST_ACCESSOR_FUNCS
-
-static inline void skb_dst_drop(struct sk_buff *skb)
-{
- if (skb->dst)
- dst_release(skb_dst(skb));
- skb->dst = NULL;
-}
-
-#endif
-
-#ifndef DST_OBSOLETE_NONE
-#define DST_OBSOLETE_NONE 0
-#endif
-
-#ifndef DST_NOCOUNT
-#define DST_NOCOUNT 0
-#endif
-
-#if !defined(HAVE___SKB_DST_COPY)
-static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst)
-{
- nskb->_skb_refdst = refdst;
- if (!(nskb->_skb_refdst & SKB_DST_NOREF))
- dst_clone(skb_dst(nskb));
-}
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,3,0)
-static const u32 rpl_dst_default_metrics[RTAX_MAX + 1] = {
- /* This initializer is needed to force linker to place this variable
- * into const section. Otherwise it might end into bss section.
- * We really want to avoid false sharing on this variable, and catch
- * any writes on it.
- */
- [RTAX_MAX] = 0xdeadbeef,
-};
-#define dst_default_metrics rpl_dst_default_metrics
-
-static inline void rpl_dst_init(struct dst_entry *dst, struct dst_ops *ops,
- struct net_device *dev, int initial_ref,
- int initial_obsolete, unsigned short flags)
-{
- /* XXX: It's easier to handle compatibility by zeroing, as we can
- * refer to fewer fields. Do that here.
- */
- memset(dst, 0, sizeof *dst);
-
- dst->dev = dev;
- if (dev)
- dev_hold(dev);
- dst->ops = ops;
- dst_init_metrics(dst, dst_default_metrics, true);
- dst->path = dst;
- dst->input = dst_discard;
-#ifndef HAVE_DST_DISCARD_SK
- dst->output = dst_discard;
-#else
- dst->output = dst_discard_sk;
-#endif
- dst->obsolete = initial_obsolete;
- atomic_set(&dst->__refcnt, initial_ref);
- dst->lastuse = jiffies;
- dst->flags = flags;
- if (!(flags & DST_NOCOUNT))
- dst_entries_add(ops, 1);
-}
-#define dst_init rpl_dst_init
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/net/dst_cache.h b/datapath/linux/compat/include/net/dst_cache.h
deleted file mode 100644
index 6084d4eea..000000000
--- a/datapath/linux/compat/include/net/dst_cache.h
+++ /dev/null
@@ -1,114 +0,0 @@
-#ifndef _NET_DST_CACHE_WRAPPER_H
-#define _NET_DST_CACHE_WRAPPER_H
-
-#ifdef USE_BUILTIN_DST_CACHE
-#include_next <net/dst_cache.h>
-#else
-
-#include <linux/jiffies.h>
-#include <net/dst.h>
-#if IS_ENABLED(CONFIG_IPV6)
-#include <net/ip6_fib.h>
-#endif
-
-#ifdef USE_UPSTREAM_TUNNEL
-#include_next <net/dst_cache.h>
-
-#else
-struct dst_cache {
- struct dst_cache_pcpu __percpu *cache;
- unsigned long reset_ts;
-};
-
-/**
- * dst_cache_get - perform cache lookup
- * @dst_cache: the cache
- *
- * The caller should use dst_cache_get_ip4() if it need to retrieve the
- * source address to be used when xmitting to the cached dst.
- * local BH must be disabled.
- */
-#define rpl_dst_cache_get dst_cache_get
-struct dst_entry *rpl_dst_cache_get(struct dst_cache *dst_cache);
-
-/**
- * dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address
- * @dst_cache: the cache
- * @saddr: return value for the retrieved source address
- *
- * local BH must be disabled.
- */
-#define rpl_dst_cache_get_ip4 dst_cache_get_ip4
-struct rtable *rpl_dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr);
-
-/**
- * dst_cache_set_ip4 - store the ipv4 dst into the cache
- * @dst_cache: the cache
- * @dst: the entry to be cached
- * @saddr: the source address to be stored inside the cache
- *
- * local BH must be disabled.
- */
-#define rpl_dst_cache_set_ip4 dst_cache_set_ip4
-void rpl_dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
- __be32 saddr);
-
-#if IS_ENABLED(CONFIG_IPV6)
-
-/**
- * dst_cache_set_ip6 - store the ipv6 dst into the cache
- * @dst_cache: the cache
- * @dst: the entry to be cached
- * @saddr: the source address to be stored inside the cache
- *
- * local BH must be disabled.
- */
-#define rpl_dst_cache_set_ip6 dst_cache_set_ip6
-void rpl_dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
- const struct in6_addr *addr);
-
-/**
- * dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
- * @dst_cache: the cache
- * @saddr: return value for the retrieved source address
- *
- * local BH must be disabled.
- */
-#define rpl_dst_cache_get_ip6 dst_cache_get_ip6
-struct dst_entry *rpl_dst_cache_get_ip6(struct dst_cache *dst_cache,
- struct in6_addr *saddr);
-#endif
-
-/**
- * dst_cache_reset - invalidate the cache contents
- * @dst_cache: the cache
- *
- * This do not free the cached dst to avoid races and contentions.
- * the dst will be freed on later cache lookup.
- */
-static inline void dst_cache_reset(struct dst_cache *dst_cache)
-{
- dst_cache->reset_ts = jiffies;
-}
-
-/**
- * dst_cache_init - initialize the cache, allocating the required storage
- * @dst_cache: the cache
- * @gfp: allocation flags
- */
-#define rpl_dst_cache_init dst_cache_init
-int rpl_dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp);
-
-/**
- * dst_cache_destroy - empty the cache and free the allocated storage
- * @dst_cache: the cache
- *
- * No synchronization is enforced: it must be called only when the cache
- * is unsed.
- */
-#define rpl_dst_cache_destroy dst_cache_destroy
-void rpl_dst_cache_destroy(struct dst_cache *dst_cache);
-
-#endif /* USE_UPSTREAM_TUNNEL */
-#endif /* USE_BUILTIN_DST_CACHE */
-#endif
diff --git a/datapath/linux/compat/include/net/dst_metadata.h b/datapath/linux/compat/include/net/dst_metadata.h
deleted file mode 100644
index 4ffafccce..000000000
--- a/datapath/linux/compat/include/net/dst_metadata.h
+++ /dev/null
@@ -1,269 +0,0 @@
-#ifndef __NET_DST_METADATA_WRAPPER_H
-#define __NET_DST_METADATA_WRAPPER_H 1
-
-#ifdef USE_UPSTREAM_TUNNEL
-#include_next <net/dst_metadata.h>
-#else
-#include <linux/skbuff.h>
-
-#include <net/dsfield.h>
-#include <net/dst.h>
-#include <net/ipv6.h>
-#include <net/ip_tunnels.h>
-
-enum metadata_type {
- METADATA_IP_TUNNEL,
- METADATA_HW_PORT_MUX,
-};
-
-struct hw_port_info {
- struct net_device *lower_dev;
- u32 port_id;
-};
-
-struct metadata_dst {
- struct dst_entry dst;
- enum metadata_type type;
- union {
- struct ip_tunnel_info tun_info;
- struct hw_port_info port_info;
- } u;
-};
-
-#ifndef DST_METADATA
-#define DST_METADATA 0x0080
-#endif
-
-extern struct dst_ops md_dst_ops;
-
-static void rpl__metadata_dst_init(struct metadata_dst *md_dst,
- enum metadata_type type, u8 optslen)
-
-{
- struct dst_entry *dst;
-
- dst = &md_dst->dst;
- dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
- DST_METADATA | DST_NOCOUNT);
-
-#if 0
- /* unused in OVS */
- dst->input = dst_md_discard;
- dst->output = dst_md_discard_out;
-#endif
- memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
- md_dst->type = type;
-}
-
-static struct
-metadata_dst *__rpl_metadata_dst_alloc(u8 optslen,
- enum metadata_type type,
- gfp_t flags)
-{
- struct metadata_dst *md_dst;
-
- md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
- if (!md_dst)
- return NULL;
-
- rpl__metadata_dst_init(md_dst, type, optslen);
-
- return md_dst;
-}
-static inline struct metadata_dst *rpl_tun_rx_dst(int md_size)
-{
- struct metadata_dst *tun_dst;
-
- tun_dst = __rpl_metadata_dst_alloc(md_size, METADATA_IP_TUNNEL,
- GFP_ATOMIC);
- if (!tun_dst)
- return NULL;
-
- tun_dst->u.tun_info.options_len = 0;
- tun_dst->u.tun_info.mode = 0;
- return tun_dst;
-}
-static inline struct metadata_dst *rpl__ip_tun_set_dst(__be32 saddr,
- __be32 daddr,
- __u8 tos, __u8 ttl,
- __be16 tp_dst,
- __be16 flags,
- __be64 tunnel_id,
- int md_size)
-{
- struct metadata_dst *tun_dst;
-
- tun_dst = rpl_tun_rx_dst(md_size);
- if (!tun_dst)
- return NULL;
-
- ip_tunnel_key_init(&tun_dst->u.tun_info.key,
- saddr, daddr, tos, ttl,
- 0, 0, tp_dst, tunnel_id, flags);
- return tun_dst;
-}
-
-static inline struct metadata_dst *rpl_ip_tun_rx_dst(struct sk_buff *skb,
- __be16 flags,
- __be64 tunnel_id,
- int md_size)
-{
- const struct iphdr *iph = ip_hdr(skb);
-
- return rpl__ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl,
- 0, flags, tunnel_id, md_size);
-}
-
-static inline
-struct metadata_dst *rpl__ipv6_tun_set_dst(const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- __u8 tos, __u8 ttl,
- __be16 tp_dst,
- __be32 label,
- __be16 flags,
- __be64 tunnel_id,
- int md_size)
-{
- struct metadata_dst *tun_dst;
- struct ip_tunnel_info *info;
-
- tun_dst = rpl_tun_rx_dst(md_size);
- if (!tun_dst)
- return NULL;
-
- info = &tun_dst->u.tun_info;
- info->mode = IP_TUNNEL_INFO_IPV6;
- info->key.tun_flags = flags;
- info->key.tun_id = tunnel_id;
- info->key.tp_src = 0;
- info->key.tp_dst = tp_dst;
-
- info->key.u.ipv6.src = *saddr;
- info->key.u.ipv6.dst = *daddr;
-
- info->key.tos = tos;
- info->key.ttl = ttl;
- info->key.label = label;
-
- return tun_dst;
-}
-
-static inline struct metadata_dst *rpl_ipv6_tun_rx_dst(struct sk_buff *skb,
- __be16 flags,
- __be64 tunnel_id,
- int md_size)
-{
- const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-
- return rpl__ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr,
- ipv6_get_dsfield(ip6h), ip6h->hop_limit,
- 0, ip6_flowlabel(ip6h), flags, tunnel_id,
- md_size);
-}
-
-static void __metadata_dst_init(struct metadata_dst *md_dst, u8 optslen)
-{
- struct dst_entry *dst;
-
- dst = &md_dst->dst;
-
-#if 0
- dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
- DST_METADATA | DST_NOCACHE | DST_NOCOUNT);
-
- dst->input = dst_md_discard;
- dst->output = dst_md_discard_out;
-#endif
-
- memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
-}
-
-static inline struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags)
-{
- struct metadata_dst *md_dst;
-
- md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
- if (!md_dst)
- return NULL;
-
- __metadata_dst_init(md_dst, optslen);
- return md_dst;
-}
-
-#define skb_tunnel_info ovs_skb_tunnel_info
-
-static inline void ovs_tun_rx_dst(struct metadata_dst *md_dst, int optslen)
-{
- /* No need to allocate for OVS backport case. */
-#if 0
- struct metadata_dst *tun_dst;
- struct ip_tunnel_info *info;
-
- tun_dst = metadata_dst_alloc(md_size, GFP_ATOMIC);
- if (!tun_dst)
- return NULL;
-#endif
- __metadata_dst_init(md_dst, optslen);
-}
-
-static inline void ovs_ip_tun_rx_dst(struct metadata_dst *md_dst,
- struct sk_buff *skb, __be16 flags,
- __be64 tunnel_id, int md_size)
-{
- const struct iphdr *iph = ip_hdr(skb);
-
- ovs_tun_rx_dst(md_dst, md_size);
- ip_tunnel_key_init(&md_dst->u.tun_info.key,
- iph->saddr, iph->daddr, iph->tos, iph->ttl, 0,
- 0, 0, tunnel_id, flags);
-}
-
-static inline void ovs_ipv6_tun_rx_dst(struct metadata_dst *md_dst,
- struct sk_buff *skb,
- __be16 flags,
- __be64 tunnel_id,
- int md_size)
-{
- struct ip_tunnel_info *info = &md_dst->u.tun_info;
- const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-
- ovs_tun_rx_dst(md_dst, md_size);
- info->mode = IP_TUNNEL_INFO_IPV6;
- info->key.tun_flags = flags;
- info->key.tun_id = tunnel_id;
- info->key.tp_src = 0;
- info->key.tp_dst = 0;
-
- info->key.u.ipv6.src = ip6h->saddr;
- info->key.u.ipv6.dst = ip6h->daddr;
-
- info->key.tos = ipv6_get_dsfield(ip6h);
- info->key.ttl = ip6h->hop_limit;
- info->key.label = ip6_flowlabel(ip6h);
-}
-
-#endif /* USE_UPSTREAM_TUNNEL */
-
-void ovs_ip_tunnel_rcv(struct net_device *dev, struct sk_buff *skb,
- struct metadata_dst *tun_dst);
-
-static inline struct metadata_dst *
-rpl_metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags)
-{
-#if defined(HAVE_METADATA_DST_ALLOC_WITH_METADATA_TYPE) && defined(USE_UPSTREAM_TUNNEL)
- return metadata_dst_alloc(optslen, type, flags);
-#else
- return metadata_dst_alloc(optslen, flags);
-#endif
-}
-#define metadata_dst_alloc rpl_metadata_dst_alloc
-
-static inline bool rpl_skb_valid_dst(const struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
-
- return dst && !(dst->flags & DST_METADATA);
-}
-#define skb_valid_dst rpl_skb_valid_dst
-
-#endif /* __NET_DST_METADATA_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/erspan.h b/datapath/linux/compat/include/net/erspan.h
deleted file mode 100644
index 4a6a8f240..000000000
--- a/datapath/linux/compat/include/net/erspan.h
+++ /dev/null
@@ -1,342 +0,0 @@
-#ifndef USE_UPSTREAM_TUNNEL
-#ifndef __LINUX_ERSPAN_H
-#define __LINUX_ERSPAN_H
-
-/*
- * GRE header for ERSPAN encapsulation (8 octets [34:41]) -- 8 bytes
- * 0 1 2 3
- * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |0|0|0|1|0|00000|000000000|00000| Protocol Type for ERSPAN |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Sequence Number (increments per packet per session) |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * Note that in the above GRE header [RFC1701] out of the C, R, K, S,
- * s, Recur, Flags, Version fields only S (bit 03) is set to 1. The
- * other fields are set to zero, so only a sequence number follows.
- *
- * ERSPAN Version 1 (Type II) header (8 octets [42:49])
- * 0 1 2 3
- * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Ver | VLAN | COS | En|T| Session ID |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Reserved | Index |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- *
- * ERSPAN Version 2 (Type III) header (12 octets [42:49])
- * 0 1 2 3
- * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Ver | VLAN | COS |BSO|T| Session ID |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Timestamp |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | SGT |P| FT | Hw ID |D|Gra|O|
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * Platform Specific SubHeader (8 octets, optional)
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Platf ID | Platform Specific Info |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Platform Specific Info |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB
- */
-
-/* #include <uapi/linux/erspan.h> */
-/* Just insert uapi/linux/erspan.h here since
- * we don't pull in uapi to compat
- */
-/* ERSPAN version 2 metadata header */
-struct erspan_md2 {
- __be32 timestamp;
- __be16 sgt; /* security group tag */
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u8 hwid_upper:2,
- ft:5,
- p:1;
- __u8 o:1,
- gra:2,
- dir:1,
- hwid:4;
-#elif defined(__BIG_ENDIAN_BITFIELD)
- __u8 p:1,
- ft:5,
- hwid_upper:2;
- __u8 hwid:4,
- dir:1,
- gra:2,
- o:1;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-};
-
-struct erspan_metadata {
- int version;
- union {
- __be32 index; /* Version 1 (type II)*/
- struct erspan_md2 md2; /* Version 2 (type III) */
- } u;
-};
-
-#define ERSPAN_VERSION 0x1 /* ERSPAN type II */
-#define VER_MASK 0xf000
-#define VLAN_MASK 0x0fff
-#define COS_MASK 0xe000
-#define EN_MASK 0x1800
-#define T_MASK 0x0400
-#define ID_MASK 0x03ff
-#define INDEX_MASK 0xfffff
-
-#define ERSPAN_VERSION2 0x2 /* ERSPAN type III*/
-#define BSO_MASK EN_MASK
-#define SGT_MASK 0xffff0000
-#define P_MASK 0x8000
-#define FT_MASK 0x7c00
-#define HWID_MASK 0x03f0
-#define DIR_MASK 0x0008
-#define GRA_MASK 0x0006
-#define O_MASK 0x0001
-
-#define HWID_OFFSET 4
-#define DIR_OFFSET 3
-
-enum erspan_encap_type {
- ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */
- ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */
- ERSPAN_ENCAP_8021Q = 0x2, /* originally 802.1Q encapsulated */
- ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */
-};
-
-#define ERSPAN_V1_MDSIZE 4
-#define ERSPAN_V2_MDSIZE 8
-
-struct erspan_base_hdr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u8 vlan_upper:4,
- ver:4;
- __u8 vlan:8;
- __u8 session_id_upper:2,
- t:1,
- en:2,
- cos:3;
- __u8 session_id:8;
-#elif defined(__BIG_ENDIAN_BITFIELD)
- __u8 ver: 4,
- vlan_upper:4;
- __u8 vlan:8;
- __u8 cos:3,
- en:2,
- t:1,
- session_id_upper:2;
- __u8 session_id:8;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-};
-
-static inline void set_session_id(struct erspan_base_hdr *ershdr, u16 id)
-{
- ershdr->session_id = id & 0xff;
- ershdr->session_id_upper = (id >> 8) & 0x3;
-}
-
-static inline u16 get_session_id(const struct erspan_base_hdr *ershdr)
-{
- return (ershdr->session_id_upper << 8) + ershdr->session_id;
-}
-
-static inline void set_vlan(struct erspan_base_hdr *ershdr, u16 vlan)
-{
- ershdr->vlan = vlan & 0xff;
- ershdr->vlan_upper = (vlan >> 8) & 0xf;
-}
-
-static inline u16 get_vlan(const struct erspan_base_hdr *ershdr)
-{
- return (ershdr->vlan_upper << 8) + ershdr->vlan;
-}
-
-static inline void set_hwid(struct erspan_md2 *md2, u8 hwid)
-{
- md2->hwid = hwid & 0xf;
- md2->hwid_upper = (hwid >> 4) & 0x3;
-}
-
-static inline u8 get_hwid(const struct erspan_md2 *md2)
-{
- return (md2->hwid_upper << 4) + md2->hwid;
-}
-
-static inline int erspan_hdr_len(int version)
-{
- return sizeof(struct erspan_base_hdr) +
- (version == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE);
-}
-
-static inline u8 tos_to_cos(u8 tos)
-{
- u8 dscp, cos;
-
- dscp = tos >> 2;
- cos = dscp >> 3;
- return cos;
-}
-
-static inline void erspan_build_header(struct sk_buff *skb,
- u32 id, u32 index,
- bool truncate, bool is_ipv4)
-{
- struct ethhdr *eth = (struct ethhdr *)skb->data;
- enum erspan_encap_type enc_type;
- struct erspan_base_hdr *ershdr;
- struct qtag_prefix {
- __be16 eth_type;
- __be16 tci;
- } *qp;
- u16 vlan_tci = 0;
- u8 tos;
- __be32 *idx;
-
- tos = is_ipv4 ? ip_hdr(skb)->tos :
- (ipv6_hdr(skb)->priority << 4) +
- (ipv6_hdr(skb)->flow_lbl[0] >> 4);
-
- enc_type = ERSPAN_ENCAP_NOVLAN;
-
- /* If mirrored packet has vlan tag, extract tci and
- * perserve vlan header in the mirrored frame.
- */
- if (eth->h_proto == htons(ETH_P_8021Q)) {
- qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
- vlan_tci = ntohs(qp->tci);
- enc_type = ERSPAN_ENCAP_INFRAME;
- }
-
- skb_push(skb, sizeof(*ershdr) + ERSPAN_V1_MDSIZE);
- ershdr = (struct erspan_base_hdr *)skb->data;
- memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V1_MDSIZE);
-
- /* Build base header */
- ershdr->ver = ERSPAN_VERSION;
- ershdr->cos = tos_to_cos(tos);
- ershdr->en = enc_type;
- ershdr->t = truncate;
- set_vlan(ershdr, vlan_tci);
- set_session_id(ershdr, id);
-
- /* Build metadata */
- idx = (__be32 *)(ershdr + 1);
- *idx = htonl(index & INDEX_MASK);
-}
-
-/* ERSPAN GRA: timestamp granularity
- * 00b --> granularity = 100 microseconds
- * 01b --> granularity = 100 nanoseconds
- * 10b --> granularity = IEEE 1588
- * Here we only support 100 microseconds.
- */
-static inline __be32 erspan_get_timestamp(void)
-{
- u64 h_usecs;
- ktime_t kt;
-
- kt = ktime_get_real();
- h_usecs = ktime_divns(kt, 100 * NSEC_PER_USEC);
-
- /* ERSPAN base header only has 32-bit,
- * so it wraps around 4 days.
- */
- return htonl((u32)h_usecs);
-}
-
-/* ERSPAN BSO (Bad/Short/Oversized), see RFC1757
- * 00b --> Good frame with no error, or unknown integrity
- * 01b --> Payload is a Short Frame
- * 10b --> Payload is an Oversized Frame
- * 11b --> Payload is a Bad Frame with CRC or Alignment Error
- */
-enum erspan_bso {
- BSO_NOERROR = 0x0,
- BSO_SHORT = 0x1,
- BSO_OVERSIZED = 0x2,
- BSO_BAD = 0x3,
-};
-
-static inline u8 erspan_detect_bso(struct sk_buff *skb)
-{
- /* BSO_BAD is not handled because the frame CRC
- * or alignment error information is in FCS.
- */
- if (skb->len < ETH_ZLEN)
- return BSO_SHORT;
-
- if (skb->len > ETH_FRAME_LEN)
- return BSO_OVERSIZED;
-
- return BSO_NOERROR;
-}
-
-static inline void erspan_build_header_v2(struct sk_buff *skb,
- u32 id, u8 direction, u16 hwid,
- bool truncate, bool is_ipv4)
-{
- struct ethhdr *eth = (struct ethhdr *)skb->data;
- struct erspan_base_hdr *ershdr;
- struct erspan_md2 *md2;
- struct qtag_prefix {
- __be16 eth_type;
- __be16 tci;
- } *qp;
- u16 vlan_tci = 0;
- u8 gra = 0; /* 100 usec */
- u8 bso = truncate; /* Bad/Short/Oversized */
- u8 sgt = 0;
- u8 tos;
-
- tos = is_ipv4 ? ip_hdr(skb)->tos :
- (ipv6_hdr(skb)->priority << 4) +
- (ipv6_hdr(skb)->flow_lbl[0] >> 4);
-
- /* Unlike v1, v2 does not have En field,
- * so only extract vlan tci field.
- */
- if (eth->h_proto == htons(ETH_P_8021Q)) {
- qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
- vlan_tci = ntohs(qp->tci);
- }
-
- bso = erspan_detect_bso(skb);
- skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
- ershdr = (struct erspan_base_hdr *)skb->data;
- memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
-
- /* Build base header */
- ershdr->ver = ERSPAN_VERSION2;
- ershdr->cos = tos_to_cos(tos);
- ershdr->en = bso;
- ershdr->t = truncate;
- set_vlan(ershdr, vlan_tci);
- set_session_id(ershdr, id);
-
- /* Build metadata */
- md2 = (struct erspan_md2 *)(ershdr + 1);
- md2->timestamp = erspan_get_timestamp();
- md2->sgt = htons(sgt);
- md2->p = 1;
- md2->ft = 0;
- md2->dir = direction;
- md2->gra = gra;
- md2->o = 0;
- set_hwid(md2, hwid);
-}
-
-#endif
-#else
-#include_next <net/erspan.h>
-#endif
diff --git a/datapath/linux/compat/include/net/genetlink.h b/datapath/linux/compat/include/net/genetlink.h
deleted file mode 100644
index 602ce38d3..000000000
--- a/datapath/linux/compat/include/net/genetlink.h
+++ /dev/null
@@ -1,136 +0,0 @@
-#ifndef __NET_GENERIC_NETLINK_WRAPPER_H
-#define __NET_GENERIC_NETLINK_WRAPPER_H 1
-
-#include <linux/version.h>
-#include <linux/netlink.h>
-#include <net/net_namespace.h>
-#include_next <net/genetlink.h>
-
-#ifndef HAVE_GENL_NOTIFY_TAKES_FAMILY
-struct rpl_genl_family {
- struct genl_family compat_family;
- unsigned int id;
- unsigned int hdrsize;
- char name[GENL_NAMSIZ];
- unsigned int version;
- unsigned int maxattr;
- bool netnsok;
- bool parallel_ops;
- int (*pre_doit)(const struct genl_ops *ops,
- struct sk_buff *skb,
- struct genl_info *info);
- void (*post_doit)(const struct genl_ops *ops,
- struct sk_buff *skb,
- struct genl_info *info);
- struct nlattr ** attrbuf; /* private */
- const struct genl_ops * ops; /* private */
- const struct genl_multicast_group *mcgrps; /* private */
- unsigned int n_ops; /* private */
- unsigned int n_mcgrps; /* private */
- unsigned int mcgrp_offset; /* private */
- struct list_head family_list; /* private */
- struct module *module;
-};
-
-#define genl_family rpl_genl_family
-static inline void *rpl_genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
- struct genl_family *family, int flags, u8 cmd)
-{
- return genlmsg_put(skb, portid, seq, &family->compat_family, flags, cmd);
-}
-
-#define genlmsg_put rpl_genlmsg_put
-
-static inline int rpl_genl_unregister_family(struct genl_family *family)
-{
- return genl_unregister_family(&family->compat_family);
-}
-#define genl_unregister_family rpl_genl_unregister_family
-
-#define genl_set_err rpl_genl_set_err
-static inline int genl_set_err(struct genl_family *family, struct net *net,
- u32 portid, u32 group, int code)
-{
-#ifdef HAVE_VOID_NETLINK_SET_ERR
- netlink_set_err(net->genl_sock, portid, group, code);
- return 0;
-#else
- return netlink_set_err(net->genl_sock, portid, group, code);
-#endif
-}
-
-#define genlmsg_multicast_netns rpl_genlmsg_multicast_netns
-static inline int genlmsg_multicast_netns(struct genl_family *family,
- struct net *net, struct sk_buff *skb,
- u32 portid, unsigned int group, gfp_t flags)
-{
- return nlmsg_multicast(net->genl_sock, skb, portid, group, flags);
-}
-
-
-#define __genl_register_family rpl___genl_register_family
-int rpl___genl_register_family(struct genl_family *family);
-
-#define genl_register_family rpl_genl_register_family
-static inline int rpl_genl_register_family(struct genl_family *family)
-{
- family->module = THIS_MODULE;
- return rpl___genl_register_family(family);
-}
-#endif
-
-#ifdef HAVE_GENL_NOTIFY_TAKES_NET
-#define genl_notify rpl_genl_notify
-void rpl_genl_notify(struct genl_family *family, struct sk_buff *skb,
- struct genl_info *info , u32 group, gfp_t flags);
-#endif
-
-#ifndef HAVE_GENL_HAS_LISTENERS
-static inline int genl_has_listeners(struct genl_family *family,
- struct net *net, unsigned int group)
-{
-#ifdef HAVE_MCGRP_OFFSET
- if (WARN_ON_ONCE(group >= family->n_mcgrps))
- return -EINVAL;
- group = family->mcgrp_offset + group;
-#endif
- return netlink_has_listeners(net->genl_sock, group);
-}
-#else
-
-#ifndef HAVE_GENL_HAS_LISTENERS_TAKES_NET
-static inline int rpl_genl_has_listeners(struct genl_family *family,
- struct net *net, unsigned int group)
-{
-#ifdef HAVE_GENL_NOTIFY_TAKES_FAMILY
- return genl_has_listeners(family, net->genl_sock, group);
-#else
- return genl_has_listeners(&family->compat_family, net->genl_sock, group);
-#endif
-}
-
-#define genl_has_listeners rpl_genl_has_listeners
-#endif
-
-#endif /* HAVE_GENL_HAS_LISTENERS */
-
-#ifndef HAVE_NETLINK_EXT_ACK
-struct netlink_ext_ack;
-
-static inline int rpl_genlmsg_parse(const struct nlmsghdr *nlh,
- const struct genl_family *family,
- struct nlattr *tb[], int maxtype,
- const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
-{
-#ifdef HAVE_GENLMSG_PARSE
- return genlmsg_parse(nlh, family, tb, maxtype, policy);
-#else
- return nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype,
- policy);
-#endif
-}
-#define genlmsg_parse rpl_genlmsg_parse
-#endif
-
-#endif /* genetlink.h */
diff --git a/datapath/linux/compat/include/net/geneve.h b/datapath/linux/compat/include/net/geneve.h
deleted file mode 100644
index d9c9f0bf7..000000000
--- a/datapath/linux/compat/include/net/geneve.h
+++ /dev/null
@@ -1,107 +0,0 @@
-#ifndef __NET_GENEVE_WRAPPER_H
-#define __NET_GENEVE_WRAPPER_H 1
-
-#ifdef CONFIG_INET
-#include <net/udp_tunnel.h>
-#endif
-
-
-#ifdef USE_UPSTREAM_TUNNEL
-#include_next <net/geneve.h>
-
-static inline int rpl_geneve_init_module(void)
-{
- return 0;
-}
-static inline void rpl_geneve_cleanup_module(void)
-{}
-
-#define geneve_xmit dev_queue_xmit
-
-#ifdef CONFIG_INET
-#ifndef HAVE_NAME_ASSIGN_TYPE
-static inline struct net_device *rpl_geneve_dev_create_fb(
- struct net *net, const char *name, u8 name_assign_type, u16 dst_port) {
- return geneve_dev_create_fb(net, name, dst_port);
-}
-#define geneve_dev_create_fb rpl_geneve_dev_create_fb
-#endif
-#endif
-
-#else
-/* Geneve Header:
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |Ver| Opt Len |O|C| Rsvd. | Protocol Type |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Virtual Network Identifier (VNI) | Reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Variable Length Options |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * Option Header:
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Option Class | Type |R|R|R| Length |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Variable Option Data |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- */
-
-struct geneve_opt {
- __be16 opt_class;
- u8 type;
-#ifdef __LITTLE_ENDIAN_BITFIELD
- u8 length:5;
- u8 r3:1;
- u8 r2:1;
- u8 r1:1;
-#else
- u8 r1:1;
- u8 r2:1;
- u8 r3:1;
- u8 length:5;
-#endif
- u8 opt_data[];
-};
-
-#define GENEVE_CRIT_OPT_TYPE (1 << 7)
-
-struct genevehdr {
-#ifdef __LITTLE_ENDIAN_BITFIELD
- u8 opt_len:6;
- u8 ver:2;
- u8 rsvd1:6;
- u8 critical:1;
- u8 oam:1;
-#else
- u8 ver:2;
- u8 opt_len:6;
- u8 oam:1;
- u8 critical:1;
- u8 rsvd1:6;
-#endif
- __be16 proto_type;
- u8 vni[3];
- u8 rsvd2;
- struct geneve_opt options[];
-};
-
-#ifdef CONFIG_INET
-#define geneve_dev_create_fb rpl_geneve_dev_create_fb
-struct net_device *rpl_geneve_dev_create_fb(struct net *net, const char *name,
- u8 name_assign_type, u16 dst_port);
-#endif /*ifdef CONFIG_INET */
-
-int rpl_geneve_init_module(void);
-void rpl_geneve_cleanup_module(void);
-
-#define geneve_xmit rpl_geneve_xmit
-netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb);
-
-#endif
-#define geneve_init_module rpl_geneve_init_module
-#define geneve_cleanup_module rpl_geneve_cleanup_module
-
-#define geneve_fill_metadata_dst ovs_geneve_fill_metadata_dst
-int ovs_geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
-
-#endif /*ifdef__NET_GENEVE_H */
diff --git a/datapath/linux/compat/include/net/gre.h b/datapath/linux/compat/include/net/gre.h
deleted file mode 100644
index 57293b6c2..000000000
--- a/datapath/linux/compat/include/net/gre.h
+++ /dev/null
@@ -1,191 +0,0 @@
-#ifndef __LINUX_GRE_WRAPPER_H
-#define __LINUX_GRE_WRAPPER_H
-
-#include <linux/version.h>
-#include <linux/skbuff.h>
-#include <net/ip_tunnels.h>
-
-#ifdef USE_UPSTREAM_TUNNEL
-#include_next <net/gre.h>
-
-static inline int rpl_ipgre_init(void)
-{
- return 0;
-}
-static inline void rpl_ipgre_fini(void)
-{}
-
-static inline int rpl_ip6gre_init(void)
-{
- return 0;
-}
-
-static inline void rpl_ip6gre_fini(void)
-{}
-
-static inline int rpl_ip6_tunnel_init(void)
-{
- return 0;
-}
-
-static inline void rpl_ip6_tunnel_cleanup(void)
-{
-}
-
-static inline int rpl_gre_init(void)
-{
- return 0;
-}
-
-static inline void rpl_gre_exit(void)
-{
-}
-
-#define gre_fb_xmit dev_queue_xmit
-
-#ifdef CONFIG_INET
-#ifndef HAVE_NAME_ASSIGN_TYPE
-static inline struct net_device *rpl_gretap_fb_dev_create(
- struct net *net, const char *name, u8 name_assign_type) {
- return gretap_fb_dev_create(net, name);
-}
-#define gretap_fb_dev_create rpl_gretap_fb_dev_create
-#endif
-#endif
-
-#else
-#include_next <net/gre.h>
-
-#ifndef HAVE_GRE_CALC_HLEN
-static inline int gre_calc_hlen(__be16 o_flags)
-{
- int addend = 4;
-
- if (o_flags & TUNNEL_CSUM)
- addend += 4;
- if (o_flags & TUNNEL_KEY)
- addend += 4;
- if (o_flags & TUNNEL_SEQ)
- addend += 4;
- return addend;
-}
-
-#define ip_gre_calc_hlen gre_calc_hlen
-#else
-#ifdef HAVE_IP_GRE_CALC_HLEN
-#define gre_calc_hlen ip_gre_calc_hlen
-#endif
-#endif
-
-#define tnl_flags_to_gre_flags rpl_tnl_flags_to_gre_flags
-static inline __be16 rpl_tnl_flags_to_gre_flags(__be16 tflags)
-{
- __be16 flags = 0;
-
- if (tflags & TUNNEL_CSUM)
- flags |= GRE_CSUM;
- if (tflags & TUNNEL_ROUTING)
- flags |= GRE_ROUTING;
- if (tflags & TUNNEL_KEY)
- flags |= GRE_KEY;
- if (tflags & TUNNEL_SEQ)
- flags |= GRE_SEQ;
- if (tflags & TUNNEL_STRICT)
- flags |= GRE_STRICT;
- if (tflags & TUNNEL_REC)
- flags |= GRE_REC;
- if (tflags & TUNNEL_VERSION)
- flags |= GRE_VERSION;
-
- return flags;
-}
-
-#define gre_flags_to_tnl_flags rpl_gre_flags_to_tnl_flags
-static inline __be16 rpl_gre_flags_to_tnl_flags(__be16 flags)
-{
- __be16 tflags = 0;
-
- if (flags & GRE_CSUM)
- tflags |= TUNNEL_CSUM;
- if (flags & GRE_ROUTING)
- tflags |= TUNNEL_ROUTING;
- if (flags & GRE_KEY)
- tflags |= TUNNEL_KEY;
- if (flags & GRE_SEQ)
- tflags |= TUNNEL_SEQ;
- if (flags & GRE_STRICT)
- tflags |= TUNNEL_STRICT;
- if (flags & GRE_REC)
- tflags |= TUNNEL_REC;
- if (flags & GRE_VERSION)
- tflags |= TUNNEL_VERSION;
-
- return tflags;
-}
-#define gre_tnl_flags_to_gre_flags rpl_gre_tnl_flags_to_gre_flags
-static inline __be16 rpl_gre_tnl_flags_to_gre_flags(__be16 tflags)
-{
- __be16 flags = 0;
-
- if (tflags & TUNNEL_CSUM)
- flags |= GRE_CSUM;
- if (tflags & TUNNEL_ROUTING)
- flags |= GRE_ROUTING;
- if (tflags & TUNNEL_KEY)
- flags |= GRE_KEY;
- if (tflags & TUNNEL_SEQ)
- flags |= GRE_SEQ;
- if (tflags & TUNNEL_STRICT)
- flags |= GRE_STRICT;
- if (tflags & TUNNEL_REC)
- flags |= GRE_REC;
- if (tflags & TUNNEL_VERSION)
- flags |= GRE_VERSION;
-
- return flags;
-}
-
-#define gre_build_header rpl_gre_build_header
-void rpl_gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
- int hdr_len);
-
-int rpl_ipgre_init(void);
-void rpl_ipgre_fini(void);
-int rpl_ip6gre_init(void);
-void rpl_ip6gre_fini(void);
-int rpl_ip6_tunnel_init(void);
-void rpl_ip6_tunnel_cleanup(void);
-int rpl_gre_init(void);
-void rpl_gre_exit(void);
-
-#define gretap_fb_dev_create rpl_gretap_fb_dev_create
-struct net_device *rpl_gretap_fb_dev_create(struct net *net, const char *name,
- u8 name_assign_type);
-
-#define gre_parse_header rpl_gre_parse_header
-int rpl_gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
- bool *csum_err, __be16 proto, int nhs);
-
-#define gre_fb_xmit rpl_gre_fb_xmit
-netdev_tx_t rpl_gre_fb_xmit(struct sk_buff *skb);
-
-#define gre_add_protocol rpl_gre_add_protocol
-int rpl_gre_add_protocol(const struct gre_protocol *proto, u8 version);
-#define gre_del_protocol rpl_gre_del_protocol
-int rpl_gre_del_protocol(const struct gre_protocol *proto, u8 version);
-#endif /* USE_UPSTREAM_TUNNEL */
-
-#define ipgre_init rpl_ipgre_init
-#define ipgre_fini rpl_ipgre_fini
-#define ip6gre_init rpl_ip6gre_init
-#define ip6gre_fini rpl_ip6gre_fini
-#define ip6_tunnel_init rpl_ip6_tunnel_init
-#define ip6_tunnel_cleanup rpl_ip6_tunnel_cleanup
-#define gre_init rpl_gre_init
-#define gre_exit rpl_gre_exit
-
-#define gre_fill_metadata_dst ovs_gre_fill_metadata_dst
-int ovs_gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
-
-
-#endif
diff --git a/datapath/linux/compat/include/net/inet_ecn.h b/datapath/linux/compat/include/net/inet_ecn.h
deleted file mode 100644
index f0591b322..000000000
--- a/datapath/linux/compat/include/net/inet_ecn.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef _INET_ECN_WRAPPER_H_
-#define _INET_ECN_WRAPPER_H_
-
-#include_next <net/inet_ecn.h>
-
-#define INET_ECN_decapsulate rpl_INET_ECN_decapsulate
-static inline int INET_ECN_decapsulate(struct sk_buff *skb,
- __u8 outer, __u8 inner)
-{
- if (INET_ECN_is_not_ect(inner)) {
- switch (outer & INET_ECN_MASK) {
- case INET_ECN_NOT_ECT:
- return 0;
- case INET_ECN_ECT_0:
- case INET_ECN_ECT_1:
- return 1;
- case INET_ECN_CE:
- return 2;
- }
- }
-
- if (INET_ECN_is_ce(outer))
- INET_ECN_set_ce(skb);
-
- return 0;
-}
-
-#define IP_ECN_decapsulate rpl_IP_ECN_decapsulate
-static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
- struct sk_buff *skb)
-{
- __u8 inner;
-
- if (skb->protocol == htons(ETH_P_IP))
- inner = ip_hdr(skb)->tos;
- else if (skb->protocol == htons(ETH_P_IPV6))
- inner = ipv6_get_dsfield(ipv6_hdr(skb));
- else
- return 0;
-
- return INET_ECN_decapsulate(skb, oiph->tos, inner);
-}
-
-#define IP6_ECN_decapsulate rpl_IP6_ECN_decapsulate
-static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h,
- struct sk_buff *skb)
-{
- __u8 inner;
-
- if (skb->protocol == htons(ETH_P_IP))
- inner = ip_hdr(skb)->tos;
- else if (skb->protocol == htons(ETH_P_IPV6))
- inner = ipv6_get_dsfield(ipv6_hdr(skb));
- else
- return 0;
-
- return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner);
-}
-#endif
diff --git a/datapath/linux/compat/include/net/inet_frag.h b/datapath/linux/compat/include/net/inet_frag.h
deleted file mode 100644
index 00784da2b..000000000
--- a/datapath/linux/compat/include/net/inet_frag.h
+++ /dev/null
@@ -1,83 +0,0 @@
-#ifndef __NET_INET_FRAG_WRAPPER_H
-#define __NET_INET_FRAG_WRAPPER_H 1
-
-#include <linux/version.h>
-#include_next <net/inet_frag.h>
-
-#ifdef HAVE_INET_FRAGS_LAST_IN
-#define q_flags(q) (q->last_in)
-#define qp_flags(qp) (qp->q.last_in)
-#else
-#define q_flags(q) (q->flags)
-#define qp_flags(qp) (qp->q.flags)
-#endif
-
-#ifndef HAVE_CORRECT_MRU_HANDLING
-#ifndef HAVE_INET_FRAG_EVICTING
-static inline bool inet_frag_evicting(struct inet_frag_queue *q)
-{
-#ifdef HAVE_INET_FRAG_QUEUE_WITH_LIST_EVICTOR
- return !hlist_unhashed(&q->list_evictor);
-#else
- return (q_flags(q) & INET_FRAG_FIRST_IN) && q->fragments != NULL;
-#endif /* HAVE_INET_FRAG_QUEUE_WITH_LIST_EVICTOR */
-}
-#endif /* HAVE_INET_FRAG_EVICTING */
-#endif /* HAVE_CORRECT_MRU_HANDLING */
-
-/* Upstream commit 3fd588eb90bf ("inet: frag: remove lru list") dropped this
- * function, but we call it from our compat code. Provide a noop version. */
-#ifndef HAVE_INET_FRAG_LRU_MOVE
-#define inet_frag_lru_move(q)
-#endif
-
-#ifdef HAVE_INET_FRAG_FQDIR
-#define netns_frags fqdir
-#endif
-
-#ifndef HAVE_SUB_FRAG_MEM_LIMIT_ARG_STRUCT_NETNS_FRAGS
-#ifdef HAVE_FRAG_PERCPU_COUNTER_BATCH
-static inline void rpl_sub_frag_mem_limit(struct netns_frags *nf, int i)
-{
- __percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch);
-}
-#define sub_frag_mem_limit rpl_sub_frag_mem_limit
-
-static inline void rpl_add_frag_mem_limit(struct netns_frags *nf, int i)
-{
- __percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch);
-}
-#define add_frag_mem_limit rpl_add_frag_mem_limit
-#else /* !frag_percpu_counter_batch */
-static inline void rpl_sub_frag_mem_limit(struct netns_frags *nf, int i)
-{
-#ifdef HAVE_INET_FRAG_FQDIR
- atomic_long_sub(i, &nf->mem);
-#else
- atomic_sub(i, &nf->mem);
-#endif
-}
-#define sub_frag_mem_limit rpl_sub_frag_mem_limit
-
-static inline void rpl_add_frag_mem_limit(struct netns_frags *nf, int i)
-{
-#ifdef HAVE_INET_FRAG_FQDIR
- atomic_long_add(i, &nf->mem);
-#else
- atomic_add(i, &nf->mem);
-#endif
-}
-#define add_frag_mem_limit rpl_add_frag_mem_limit
-#endif /* frag_percpu_counter_batch */
-#endif
-
-#ifdef HAVE_VOID_INET_FRAGS_INIT
-static inline int rpl_inet_frags_init(struct inet_frags *frags)
-{
- inet_frags_init(frags);
- return 0;
-}
-#define inet_frags_init rpl_inet_frags_init
-#endif
-
-#endif /* inet_frag.h */
diff --git a/datapath/linux/compat/include/net/inetpeer.h b/datapath/linux/compat/include/net/inetpeer.h
deleted file mode 100644
index c5f5eb12b..000000000
--- a/datapath/linux/compat/include/net/inetpeer.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _NET_INETPEER_WRAPPER_H
-#define _NET_INETPEER_WRAPPER_H
-
-#include_next <net/inetpeer.h>
-
-#ifndef HAVE_INETPEER_VIF_SUPPORT
-static inline struct inet_peer *rpl_inet_getpeer_v4(struct inet_peer_base *base,
- __be32 v4daddr, int vif,
- int create)
-{
- return inet_getpeer_v4(base, v4daddr, create);
-}
-#define inet_getpeer_v4 rpl_inet_getpeer_v4
-#endif /* HAVE_INETPEER_VIF_SUPPORT */
-
-#endif /* _NET_INETPEER_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/ip.h b/datapath/linux/compat/include/net/ip.h
deleted file mode 100644
index ad5ac33ee..000000000
--- a/datapath/linux/compat/include/net/ip.h
+++ /dev/null
@@ -1,143 +0,0 @@
-#ifndef __NET_IP_WRAPPER_H
-#define __NET_IP_WRAPPER_H 1
-
-#include_next <net/ip.h>
-
-#include <net/route.h>
-#include <linux/version.h>
-
-#ifndef HAVE_INET_GET_LOCAL_PORT_RANGE_USING_NET
-static inline void rpl_inet_get_local_port_range(struct net *net, int *low,
- int *high)
-{
- inet_get_local_port_range(low, high);
-}
-#define inet_get_local_port_range rpl_inet_get_local_port_range
-
-#endif
-
-#ifndef IPSKB_FRAG_PMTU
-#define IPSKB_FRAG_PMTU BIT(6)
-#endif
-
-/* IPv4 datagram length is stored into 16bit field (tot_len) */
-#ifndef IP_MAX_MTU
-#define IP_MAX_MTU 0xFFFFU
-#endif
-
-#ifndef HAVE_IP_SKB_DST_MTU
-static inline bool rpl_ip_sk_use_pmtu(const struct sock *sk)
-{
- return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
-}
-#define ip_sk_use_pmtu rpl_ip_sk_use_pmtu
-
-static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
- bool forwarding)
-{
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0)
- struct net *net = dev_net(dst->dev);
-
- if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
- dst_metric_locked(dst, RTAX_MTU) ||
- !forwarding)
- return dst_mtu(dst);
-#endif
-
- return min(dst->dev->mtu, IP_MAX_MTU);
-}
-
-static inline unsigned int rpl_ip_skb_dst_mtu(const struct sk_buff *skb)
-{
- if (!skb->sk || ip_sk_use_pmtu(skb->sk)) {
- bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
- return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
- } else {
- return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU);
- }
-}
-#define ip_skb_dst_mtu rpl_ip_skb_dst_mtu
-#endif /* HAVE_IP_SKB_DST_MTU */
-
-#ifdef HAVE_IP_FRAGMENT_TAKES_SOCK
-#ifdef HAVE_IP_LOCAL_OUT_TAKES_NET
-#define OVS_VPORT_OUTPUT_PARAMS struct net *net, struct sock *sock, struct sk_buff *skb
-#else
-#define OVS_VPORT_OUTPUT_PARAMS struct sock *sock, struct sk_buff *skb
-#endif
-#else
-#define OVS_VPORT_OUTPUT_PARAMS struct sk_buff *skb
-#endif
-
-/* Prior to upstream commit d6b915e29f4a ("ip_fragment: don't forward
- * defragmented DF packet"), IPCB(skb)->frag_max_size was not always populated
- * correctly, which would lead to reassembled packets not being refragmented.
- * So, we backport all of ip_defrag() in these cases.
- */
-#ifndef HAVE_CORRECT_MRU_HANDLING
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,2,0)
-static inline bool ip_defrag_user_in_between(u32 user,
- enum ip_defrag_users lower_bond,
- enum ip_defrag_users upper_bond)
-{
- return user >= lower_bond && user <= upper_bond;
-}
-#endif /* < v4.2 */
-
-int rpl_ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
- int (*output)(OVS_VPORT_OUTPUT_PARAMS));
-#define ip_do_fragment rpl_ip_do_fragment
-
-/* If backporting IP defrag, then init/exit functions need to be called from
- * compat_{in,ex}it() to prepare the backported fragmentation cache. In this
- * case we declare the functions which are defined in
- * datapath/linux/compat/ip_fragment.c. */
-int rpl_ip_defrag(struct net *net, struct sk_buff *skb, u32 user);
-#define ip_defrag rpl_ip_defrag
-int __init rpl_ipfrag_init(void);
-void rpl_ipfrag_fini(void);
-void ovs_netns_frags_init(struct net *net);
-void ovs_netns_frags_exit(struct net *net);
-
-#else /* HAVE_CORRECT_MRU_HANDLING */
-
-#ifndef HAVE_IP_DO_FRAGMENT_TAKES_NET
-static inline int rpl_ip_do_fragment(struct net *net, struct sock *sk,
- struct sk_buff *skb,
- int (*output)(OVS_VPORT_OUTPUT_PARAMS))
-{
- return ip_do_fragment(sk, skb, output);
-}
-#define ip_do_fragment rpl_ip_do_fragment
-#endif /* IP_DO_FRAGMENT_TAKES_NET */
-
-/* We have no good way to detect the presence of upstream commit 8282f27449bf
- * ("inet: frag: Always orphan skbs inside ip_defrag()"), but it should be
- * always included in kernels 4.5+. */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,5,0)
-static inline int rpl_ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
-{
- skb_orphan(skb);
-#ifndef HAVE_IP_DEFRAG_TAKES_NET
- return ip_defrag(skb, user);
-#else
- return ip_defrag(net, skb, user);
-#endif
-}
-#define ip_defrag rpl_ip_defrag
-#endif
-
-/* If we can use upstream defrag then we can rely on the upstream
- * defrag module to init/exit correctly. In this case the calls in
- * compat_{in,ex}it() can be no-ops. */
-static inline int rpl_ipfrag_init(void) { return 0; }
-static inline void rpl_ipfrag_fini(void) { }
-static inline void ovs_netns_frags_init(struct net *net) { }
-static inline void ovs_netns_frags_exit(struct net *net) { }
-#endif /* HAVE_CORRECT_MRU_HANDLING */
-
-#define ipfrag_init rpl_ipfrag_init
-#define ipfrag_fini rpl_ipfrag_fini
-
-#endif
diff --git a/datapath/linux/compat/include/net/ip6_fib.h b/datapath/linux/compat/include/net/ip6_fib.h
deleted file mode 100644
index 0cc435813..000000000
--- a/datapath/linux/compat/include/net/ip6_fib.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Linux INET6 implementation
- *
- * Authors:
- * Pedro Roque <roque@di.fc.ul.pt>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _IP6_FIB_WRAPPER_H
-#define _IP6_FIB_WRAPPER_H
-
-#include_next <net/ip6_fib.h>
-
-#ifndef HAVE_RT6_GET_COOKIE
-
-#ifndef RTF_PCPU
-#define RTF_PCPU 0x40000000
-#endif
-
-#ifndef RTF_LOCAL
-#define RTF_LOCAL 0x80000000
-#endif
-
-#define rt6_get_cookie rpl_rt6_get_cookie
-static inline u32 rt6_get_cookie(const struct rt6_info *rt)
-{
- if (rt->rt6i_flags & RTF_PCPU ||
-#ifdef HAVE_DST_NOCACHE
- (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
-#else
- (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
-#endif
- rt = (struct rt6_info *)(rt->dst.from);
-
- return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
-}
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/net/ip6_route.h b/datapath/linux/compat/include/net/ip6_route.h
deleted file mode 100644
index 7c78fd5c6..000000000
--- a/datapath/linux/compat/include/net/ip6_route.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __NET_IP6_ROUTE_WRAPPER
-#define __NET_IP6_ROUTE_WRAPPER
-
-#include <net/route.h>
-#include <net/ip.h> /* For OVS_VPORT_OUTPUT_PARAMS */
-#include <net/ipv6.h>
-
-#include_next<net/ip6_route.h>
-
-#ifndef HAVE_NF_IPV6_OPS_FRAGMENT
-int rpl_ip6_fragment(struct sock *sk, struct sk_buff *skb,
- int (*output)(OVS_VPORT_OUTPUT_PARAMS));
-#define ip6_fragment rpl_ip6_fragment
-#endif /* HAVE_NF_IPV6_OPS_FRAGMENT */
-
-#endif /* _NET_IP6_ROUTE_WRAPPER */
diff --git a/datapath/linux/compat/include/net/ip6_tunnel.h b/datapath/linux/compat/include/net/ip6_tunnel.h
deleted file mode 100644
index e0a33a646..000000000
--- a/datapath/linux/compat/include/net/ip6_tunnel.h
+++ /dev/null
@@ -1,208 +0,0 @@
-#ifndef NET_IP6_TUNNEL_WRAPPER_H
-#define NET_IP6_TUNNEL_WRAPPER_H 1
-
-#ifdef HAVE_IP6_TNL_PARM_ERSPAN_VER
-#include_next <net/ip6_tunnel.h>
-#else
-
-#include <linux/ipv6.h>
-#include <linux/netdevice.h>
-#include <linux/if_tunnel.h>
-#include <linux/ip6_tunnel.h>
-#include <net/ip_tunnels.h>
-#include <net/dst_cache.h>
-#include <net/dst_metadata.h>
-#include "gso.h"
-
-#define IP6TUNNEL_ERR_TIMEO (30*HZ)
-
-/* capable of sending packets */
-#define IP6_TNL_F_CAP_XMIT 0x10000
-/* capable of receiving packets */
-#define IP6_TNL_F_CAP_RCV 0x20000
-/* determine capability on a per-packet basis */
-#define IP6_TNL_F_CAP_PER_PACKET 0x40000
-
-#ifndef IP6_TNL_F_ALLOW_LOCAL_REMOTE
-#define IP6_TNL_F_ALLOW_LOCAL_REMOTE 0
-#endif
-
-struct rpl__ip6_tnl_parm {
- char name[IFNAMSIZ]; /* name of tunnel device */
- int link; /* ifindex of underlying L2 interface */
- __u8 proto; /* tunnel protocol */
- __u8 encap_limit; /* encapsulation limit for tunnel */
- __u8 hop_limit; /* hop limit for tunnel */
- bool collect_md;
- __be32 flowinfo; /* traffic class and flowlabel for tunnel */
- __u32 flags; /* tunnel flags */
- struct in6_addr laddr; /* local tunnel end-point address */
- struct in6_addr raddr; /* remote tunnel end-point address */
-
- __be16 i_flags;
- __be16 o_flags;
- __be32 i_key;
- __be32 o_key;
-
- __u32 fwmark;
- __u32 index; /* ERSPAN type II index */
- __u8 erspan_ver; /* ERSPAN version */
- __u8 dir; /* direction */
- __u16 hwid; /* hwid */
-};
-
-#define __ip6_tnl_parm rpl__ip6_tnl_parm
-
-/* IPv6 tunnel */
-struct rpl_ip6_tnl {
- struct rpl_ip6_tnl __rcu *next; /* next tunnel in list */
- struct net_device *dev; /* virtual device associated with tunnel */
- struct net *net; /* netns for packet i/o */
- struct __ip6_tnl_parm parms; /* tunnel configuration parameters */
- struct flowi fl; /* flowi template for xmit */
- struct dst_cache dst_cache; /* cached dst */
- struct gro_cells gro_cells;
-
- int err_count;
- unsigned long err_time;
-
- /* These fields used only by GRE */
- __u32 i_seqno; /* The last seen seqno */
- __u32 o_seqno; /* The last output seqno */
- int hlen; /* tun_hlen + encap_hlen */
- int tun_hlen; /* Precalculated header length */
- int encap_hlen; /* Encap header length (FOU,GUE) */
- struct ip_tunnel_encap encap;
- int mlink;
-};
-
-#define ip6_tnl rpl_ip6_tnl
-
-struct rpl_ip6_tnl_encap_ops {
- size_t (*encap_hlen)(struct ip_tunnel_encap *e);
- int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
- u8 *protocol, struct flowi6 *fl6);
-};
-
-#define ip6_tnl_encap_ops rpl_ip6_tnl_encap_ops
-
-#ifdef CONFIG_INET
-
-#ifndef MAX_IPTUN_ENCAP_OPS
-#define MAX_IPTUN_ENCAP_OPS 8
-#endif
-
-extern const struct ip6_tnl_encap_ops __rcu *
- rpl_ip6tun_encaps[MAX_IPTUN_ENCAP_OPS];
-
-int rpl_ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops,
- unsigned int num);
-#define ip6_tnl_encap_add_ops rpl_ip6_tnl_encap_add_ops
-int rpl_ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops,
- unsigned int num);
-#define ip6_tnl_encap_del_ops rpl_ip6_tnl_encap_del_ops
-int rpl_ip6_tnl_encap_setup(struct ip6_tnl *t,
- struct ip_tunnel_encap *ipencap);
-#define ip6_tnl_encap_setup rpl_ip6_tnl_encap_setup
-
-#ifndef HAVE_TUNNEL_ENCAP_TYPES
-enum tunnel_encap_types {
- TUNNEL_ENCAP_NONE,
- TUNNEL_ENCAP_FOU,
- TUNNEL_ENCAP_GUE,
-};
-
-#endif
-static inline int ip6_encap_hlen(struct ip_tunnel_encap *e)
-{
- const struct ip6_tnl_encap_ops *ops;
- int hlen = -EINVAL;
-
- if (e->type == TUNNEL_ENCAP_NONE)
- return 0;
-
- if (e->type >= MAX_IPTUN_ENCAP_OPS)
- return -EINVAL;
-
- rcu_read_lock();
- ops = rcu_dereference(rpl_ip6tun_encaps[e->type]);
- if (likely(ops && ops->encap_hlen))
- hlen = ops->encap_hlen(e);
- rcu_read_unlock();
-
- return hlen;
-}
-
-static inline int ip6_tnl_encap(struct sk_buff *skb, struct ip6_tnl *t,
- u8 *protocol, struct flowi6 *fl6)
-{
- const struct ip6_tnl_encap_ops *ops;
- int ret = -EINVAL;
-
- if (t->encap.type == TUNNEL_ENCAP_NONE)
- return 0;
-
- if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
- return -EINVAL;
-
- rcu_read_lock();
- ops = rcu_dereference(rpl_ip6tun_encaps[t->encap.type]);
- if (likely(ops && ops->build_header))
- ret = ops->build_header(skb, &t->encap, protocol, fl6);
- rcu_read_unlock();
-
- return ret;
-}
-
-/* Tunnel encapsulation limit destination sub-option */
-
-struct ipv6_tlv_tnl_enc_lim {
- __u8 type; /* type-code for option */
- __u8 length; /* option length */
- __u8 encap_limit; /* tunnel encapsulation limit */
-} __packed;
-
-int rpl_ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
- const struct in6_addr *raddr);
-#define ip6_tnl_rcv_ctl rpl_ip6_tnl_rcv_ctl
-int rpl_ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
- const struct tnl_ptk_info *tpi,
- struct metadata_dst *tun_dst,
- bool log_ecn_error);
-#define ip6_tnl_rcv rpl_ip6_tnl_rcv
-int rpl_ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
- const struct in6_addr *raddr);
-#define ip6_tnl_xmit_ctl rpl_ip6_tnl_xmit_ctl
-int rpl_ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
- struct flowi6 *fl6, int encap_limit, __u32 *pmtu,
- __u8 proto);
-#define ip6_tnl_xmit rpl_ip6_tnl_xmit
-__u16 rpl_ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw);
-#define ip6_tnl_parse_tlv_enc_lim rpl_ip6_tnl_parse_tlv_enc_lim
-__u32 rpl_ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr,
- const struct in6_addr *raddr);
-#define ip6_tnl_get_cap rpl_ip6_tnl_get_cap
-struct net *rpl_ip6_tnl_get_link_net(const struct net_device *dev);
-#define ip6_tnl_get_link_net rpl_ip6_tnl_get_link_net
-int rpl_ip6_tnl_get_iflink(const struct net_device *dev);
-#define ip6_tnl_get_iflink rpl_ip6_tnl_get_iflink
-int rpl_ip6_tnl_change_mtu(struct net_device *dev, int new_mtu);
-#define ip6_tnl_change_mtu rpl_ip6_tnl_change_mtu
-
-static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
- struct net_device *dev)
-{
- int pkt_len, err;
-
- memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
- pkt_len = skb->len - skb_inner_network_offset(skb);
- err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
- if (unlikely(net_xmit_eval(err)))
- pkt_len = -1;
- iptunnel_xmit_stats(dev, pkt_len);
-}
-#endif
-
-#endif /* HAVE_IP6_TNL_PARM_ERSPAN_VER */
-
-#endif
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h
deleted file mode 100644
index 617a753c7..000000000
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ /dev/null
@@ -1,513 +0,0 @@
-#ifndef __NET_IP_TUNNELS_WRAPPER_H
-#define __NET_IP_TUNNELS_WRAPPER_H 1
-
-#include <linux/version.h>
-
-#ifdef USE_UPSTREAM_TUNNEL
-/* Block all ip_tunnel functions.
- * Only function that do not depend on ip_tunnel structure can
- * be used. Those needs to be explicitly defined in this header file. */
-#include_next <net/ip_tunnels.h>
-
-#ifndef TUNNEL_ERSPAN_OPT
-#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000)
-#endif
-#define ovs_ip_tunnel_encap ip_tunnel_encap
-
-#ifndef HAVE_IP_TUNNEL_INFO_OPTS_SET_FLAGS
-static inline void rpl_ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
- const void *from, int len,
- __be16 flags)
-{
- memcpy(ip_tunnel_info_opts(info), from, len);
- info->options_len = len;
- info->key.tun_flags |= flags;
-}
-
-#define ip_tunnel_info_opts_set rpl_ip_tunnel_info_opts_set
-#endif
-
-#else /* USE_UPSTREAM_TUNNEL */
-
-#include <linux/if_tunnel.h>
-#include <linux/types.h>
-#include <net/dsfield.h>
-#include <net/dst_cache.h>
-#include <net/flow.h>
-#include <net/inet_ecn.h>
-#include <net/ip.h>
-#include <net/rtnetlink.h>
-#include <net/gro_cells.h>
-
-#ifndef MAX_IPTUN_ENCAP_OPS
-#define MAX_IPTUN_ENCAP_OPS 8
-#endif
-
-#ifndef HAVE_TUNNEL_ENCAP_TYPES
-enum tunnel_encap_types {
- TUNNEL_ENCAP_NONE,
- TUNNEL_ENCAP_FOU,
- TUNNEL_ENCAP_GUE,
-};
-
-#define HAVE_TUNNEL_ENCAP_TYPES 1
-#endif
-
-#define __iptunnel_pull_header rpl___iptunnel_pull_header
-int rpl___iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
- __be16 inner_proto, bool raw_proto, bool xnet);
-
-#define iptunnel_pull_header rpl_iptunnel_pull_header
-static inline int rpl_iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
- __be16 inner_proto, bool xnet)
-{
- return rpl___iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet);
-}
-
-int ovs_iptunnel_handle_offloads(struct sk_buff *skb,
- int gso_type_mask,
- void (*fix_segment)(struct sk_buff *));
-
-/* This is required to compile upstream gre.h. gre_handle_offloads()
- * is defined in gre.h and needs iptunnel_handle_offloads(). This provides
- * default signature for this function.
- * rpl prefix is to make OVS build happy.
- */
-#define iptunnel_handle_offloads rpl_iptunnel_handle_offloads
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
-struct sk_buff *rpl_iptunnel_handle_offloads(struct sk_buff *skb,
- bool csum_help,
- int gso_type_mask);
-#else
-int rpl_iptunnel_handle_offloads(struct sk_buff *skb,
- bool csum_help,
- int gso_type_mask);
-#endif
-
-#define iptunnel_xmit rpl_iptunnel_xmit
-void rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
- __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl,
- __be16 df, bool xnet);
-#define ip_tunnel_xmit rpl_ip_tunnel_xmit
-void rpl_ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
- const struct iphdr *tnl_params, const u8 protocol);
-
-
-#ifndef TUNNEL_CSUM
-#define TUNNEL_CSUM __cpu_to_be16(0x01)
-#define TUNNEL_ROUTING __cpu_to_be16(0x02)
-#define TUNNEL_KEY __cpu_to_be16(0x04)
-#define TUNNEL_SEQ __cpu_to_be16(0x08)
-#define TUNNEL_STRICT __cpu_to_be16(0x10)
-#define TUNNEL_REC __cpu_to_be16(0x20)
-#define TUNNEL_VERSION __cpu_to_be16(0x40)
-#define TUNNEL_NO_KEY __cpu_to_be16(0x80)
-#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
-#define TUNNEL_OAM __cpu_to_be16(0x0200)
-#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
-#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
-#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
-#define TUNNEL_NOCACHE __cpu_to_be16(0x2000)
-#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000)
-
-#undef TUNNEL_OPTIONS_PRESENT
-#define TUNNEL_OPTIONS_PRESENT \
- (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
-
-struct tnl_ptk_info {
- __be16 flags;
- __be16 proto;
- __be32 key;
- __be32 seq;
- int hdr_len;
-};
-
-#define PACKET_RCVD 0
-#define PACKET_REJECT 1
-#define PACKET_NEXT 2
-#endif
-
-#define IP_TNL_HASH_BITS 7
-#define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS)
-
-/* Keep error state on tunnel for 30 sec */
-#define IPTUNNEL_ERR_TIMEO (30*HZ)
-
-/* Used to memset ip_tunnel padding. */
-#define IP_TUNNEL_KEY_SIZE offsetofend(struct ip_tunnel_key, tp_dst)
-
-/* Used to memset ipv4 address padding. */
-#define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst)
-#define IP_TUNNEL_KEY_IPV4_PAD_LEN \
- (sizeof_field(struct ip_tunnel_key, u) - \
- sizeof_field(struct ip_tunnel_key, u.ipv4))
-
-struct ip_tunnel_key {
- __be64 tun_id;
- union {
- struct {
- __be32 src;
- __be32 dst;
- } ipv4;
- struct {
- struct in6_addr src;
- struct in6_addr dst;
- } ipv6;
- } u;
- __be16 tun_flags;
- u8 tos; /* TOS for IPv4, TC for IPv6 */
- u8 ttl; /* TTL for IPv4, HL for IPv6 */
- __be32 label; /* Flow Label for IPv6 */
- __be16 tp_src;
- __be16 tp_dst;
-};
-
-/* Flags for ip_tunnel_info mode. */
-#define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */
-#define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */
-
-struct ip_tunnel_info {
- struct ip_tunnel_key key;
- struct dst_cache dst_cache;
- u8 options_len;
- u8 mode;
-};
-
-/* 6rd prefix/relay information */
-#ifdef CONFIG_IPV6_SIT_6RD
-struct ip_tunnel_6rd_parm {
- struct in6_addr prefix;
- __be32 relay_prefix;
- u16 prefixlen;
- u16 relay_prefixlen;
-};
-#endif
-
-struct ip_tunnel_encap {
- u16 type;
- u16 flags;
- __be16 sport;
- __be16 dport;
-};
-
-struct ip_tunnel_prl_entry {
- struct ip_tunnel_prl_entry __rcu *next;
- __be32 addr;
- u16 flags;
- struct rcu_head rcu_head;
-};
-
-static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info *tun_info)
-{
- return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET;
-}
-
-static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info)
-{
- return info + 1;
-}
-
-static inline void ip_tunnel_info_opts_get(void *to,
- const struct ip_tunnel_info *info)
-{
- memcpy(to, info + 1, info->options_len);
-}
-
-static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
- const void *from, int len,
- __be16 flags)
-{
- memcpy(ip_tunnel_info_opts(info), from, len);
- info->options_len = len;
- info->key.tun_flags |= flags;
-}
-
-static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
- __be32 saddr, __be32 daddr,
- u8 tos, u8 ttl, __be32 label,
- __be16 tp_src, __be16 tp_dst,
- __be64 tun_id, __be16 tun_flags)
-{
- key->tun_id = tun_id;
- key->u.ipv4.src = saddr;
- key->u.ipv4.dst = daddr;
- memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD,
- 0, IP_TUNNEL_KEY_IPV4_PAD_LEN);
- key->tos = tos;
- key->ttl = ttl;
- key->label = label;
- key->tun_flags = tun_flags;
-
- /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
- * the upper tunnel are used.
- * E.g: GRE over IPSEC, the tp_src and tp_port are zero.
- */
- key->tp_src = tp_src;
- key->tp_dst = tp_dst;
-
- /* Clear struct padding. */
- if (sizeof(*key) != IP_TUNNEL_KEY_SIZE)
- memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE,
- 0, sizeof(*key) - IP_TUNNEL_KEY_SIZE);
-}
-
-#define ip_tunnel_collect_metadata() true
-
-#undef TUNNEL_NOCACHE
-#define TUNNEL_NOCACHE 0
-
-static inline bool
-ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
- const struct ip_tunnel_info *info)
-{
- if (skb->mark)
- return false;
- if (!info)
- return true;
- if (info->key.tun_flags & TUNNEL_NOCACHE)
- return false;
-
- return true;
-}
-
-#define ip_tunnel_dst rpl_ip_tunnel_dst
-struct rpl_ip_tunnel_dst {
- struct dst_entry __rcu *dst;
- __be32 saddr;
-};
-
-#define ip_tunnel rpl_ip_tunnel
-struct rpl_ip_tunnel {
- struct ip_tunnel __rcu *next;
- struct hlist_node hash_node;
- struct net_device *dev;
- struct net *net; /* netns for packet i/o */
-
- unsigned long err_time; /* Time when the last ICMP error
- * arrived */
- int err_count; /* Number of arrived ICMP errors */
-
- /* These four fields used only by GRE */
- u32 i_seqno; /* The last seen seqno */
- u32 o_seqno; /* The last output seqno */
- int tun_hlen; /* Precalculated header length */
-
- /* These four fields used only by ERSPAN */
- u32 index; /* ERSPAN type II index */
- u8 erspan_ver; /* ERSPAN version */
- u8 dir; /* ERSPAN direction */
- u16 hwid; /* ERSPAN hardware ID */
-
- struct dst_cache dst_cache;
-
- struct ip_tunnel_parm parms;
-
- int mlink;
- int encap_hlen; /* Encap header length (FOU,GUE) */
- int hlen; /* tun_hlen + encap_hlen */
- struct ip_tunnel_encap encap;
-
- /* for SIT */
-#ifdef CONFIG_IPV6_SIT_6RD
- struct ip_tunnel_6rd_parm ip6rd;
-#endif
- struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */
- unsigned int prl_count; /* # of entries in PRL */
- unsigned int ip_tnl_net_id;
- struct gro_cells gro_cells;
- __u32 fwmark;
- bool collect_md;
- bool ignore_df;
-};
-
-#define ip_tunnel_net rpl_ip_tunnel_net
-struct rpl_ip_tunnel_net {
- struct net_device *fb_tunnel_dev;
- struct hlist_head tunnels[IP_TNL_HASH_SIZE];
- struct ip_tunnel __rcu *collect_md_tun;
-};
-
-
-struct ip_tunnel_encap_ops {
- size_t (*encap_hlen)(struct ip_tunnel_encap *e);
- int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
- const u8 *protocol, struct flowi4 *fl4);
-};
-
-extern const struct ip_tunnel_encap_ops __rcu *
- rpl_iptun_encaps[MAX_IPTUN_ENCAP_OPS];
-
-#define ip_encap_hlen rpl_ip_encap_hlen
-static inline int rpl_ip_encap_hlen(struct ip_tunnel_encap *e)
-{
- const struct ip_tunnel_encap_ops *ops;
- int hlen = -EINVAL;
-
- if (e->type == TUNNEL_ENCAP_NONE)
- return 0;
-
- if (e->type >= MAX_IPTUN_ENCAP_OPS)
- return -EINVAL;
-
- rcu_read_lock();
- ops = rcu_dereference(rpl_iptun_encaps[e->type]);
- if (likely(ops && ops->encap_hlen))
- hlen = ops->encap_hlen(e);
- rcu_read_unlock();
-
- return hlen;
-}
-
-static inline int ovs_ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
- const u8 *protocol, struct flowi4 *fl4)
-{
- const struct ip_tunnel_encap_ops *ops;
- int ret = -EINVAL;
-
- if (t->encap.type == TUNNEL_ENCAP_NONE)
- return 0;
-
- if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
- return -EINVAL;
-
- rcu_read_lock();
- ops = rcu_dereference(rpl_iptun_encaps[t->encap.type]);
- if (likely(ops && ops->build_header))
- ret = ops->build_header(skb, &t->encap, protocol, fl4);
- rcu_read_unlock();
-
- return ret;
-}
-
-#define ip_tunnel_get_stats64 rpl_ip_tunnel_get_stats64
-#if !defined(HAVE_VOID_NDO_GET_STATS64) && !defined(HAVE_RHEL7_MAX_MTU)
-struct rtnl_link_stats64 *rpl_ip_tunnel_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot);
-#else
-void rpl_ip_tunnel_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot);
-#endif
-#define ip_tunnel_get_dsfield rpl_ip_tunnel_get_dsfield
-static inline u8 rpl_ip_tunnel_get_dsfield(const struct iphdr *iph,
- const struct sk_buff *skb)
-{
- if (skb->protocol == htons(ETH_P_IP))
- return iph->tos;
- else if (skb->protocol == htons(ETH_P_IPV6))
- return ipv6_get_dsfield((const struct ipv6hdr *)iph);
- else
- return 0;
-}
-
-#define ip_tunnel_ecn_encap rpl_ip_tunnel_ecn_encap
-static inline u8 rpl_ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
- const struct sk_buff *skb)
-{
- u8 inner = ip_tunnel_get_dsfield(iph, skb);
-
- return INET_ECN_encapsulate(tos, inner);
-}
-
-static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
-{
- if (pkt_len > 0) {
- struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
-
- u64_stats_update_begin(&tstats->syncp);
- tstats->tx_bytes += pkt_len;
- tstats->tx_packets++;
- u64_stats_update_end(&tstats->syncp);
- put_cpu_ptr(tstats);
- } else {
- struct net_device_stats *err_stats = &dev->stats;
-
- if (pkt_len < 0) {
- err_stats->tx_errors++;
- err_stats->tx_aborted_errors++;
- } else {
- err_stats->tx_dropped++;
- }
- }
-}
-
-static inline __be64 key32_to_tunnel_id(__be32 key)
-{
-#ifdef __BIG_ENDIAN
- return (__force __be64)key;
-#else
- return (__force __be64)((__force u64)key << 32);
-#endif
-}
-
-/* Returns the least-significant 32 bits of a __be64. */
-static inline __be32 tunnel_id_to_key32(__be64 tun_id)
-{
-#ifdef __BIG_ENDIAN
- return (__force __be32)tun_id;
-#else
- return (__force __be32)((__force u64)tun_id >> 32);
-#endif
-}
-
-#define ip_tunnel_init rpl_ip_tunnel_init
-int rpl_ip_tunnel_init(struct net_device *dev);
-
-#define ip_tunnel_uninit rpl_ip_tunnel_uninit
-void rpl_ip_tunnel_uninit(struct net_device *dev);
-
-#define ip_tunnel_change_mtu rpl_ip_tunnel_change_mtu
-int rpl_ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
-
-#define ip_tunnel_newlink rpl_ip_tunnel_newlink
-int rpl_ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p);
-
-#define ip_tunnel_dellink rpl_ip_tunnel_dellink
-void rpl_ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
-
-#define ip_tunnel_init_net rpl_ip_tunnel_init_net
-int rpl_ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
- struct rtnl_link_ops *ops, char *devname);
-
-#define ip_tunnel_delete_net rpl_ip_tunnel_delete_net
-void rpl_ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops);
-
-#define ip_tunnel_setup rpl_ip_tunnel_setup
-void rpl_ip_tunnel_setup(struct net_device *dev, int net_id);
-
-#define ip_tunnel_get_iflink rpl_ip_tunnel_get_iflink
-int rpl_ip_tunnel_get_iflink(const struct net_device *dev);
-
-#define ip_tunnel_get_link_net rpl_ip_tunnel_get_link_net
-struct net *rpl_ip_tunnel_get_link_net(const struct net_device *dev);
-
-#define __ip_tunnel_change_mtu rpl___ip_tunnel_change_mtu
-int rpl___ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
-
-#define ip_tunnel_lookup rpl_ip_tunnel_lookup
-struct ip_tunnel *rpl_ip_tunnel_lookup(struct ip_tunnel_net *itn,
- int link, __be16 flags,
- __be32 remote, __be32 local,
- __be32 key);
-
-static inline int iptunnel_pull_offloads(struct sk_buff *skb)
-{
- if (skb_is_gso(skb)) {
- int err;
-
- err = skb_unclone(skb, GFP_ATOMIC);
- if (unlikely(err))
- return err;
- skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >>
- NETIF_F_GSO_SHIFT);
- }
-
- skb->encapsulation = 0;
- return 0;
-}
-#endif /* USE_UPSTREAM_TUNNEL */
-
-#define skb_is_encapsulated ovs_skb_is_encapsulated
-bool ovs_skb_is_encapsulated(struct sk_buff *skb);
-
-#endif /* __NET_IP_TUNNELS_H */
diff --git a/datapath/linux/compat/include/net/ipv6.h b/datapath/linux/compat/include/net/ipv6.h
deleted file mode 100644
index 6379457e8..000000000
--- a/datapath/linux/compat/include/net/ipv6.h
+++ /dev/null
@@ -1,88 +0,0 @@
-#ifndef __NET_IPV6_WRAPPER_H
-#define __NET_IPV6_WRAPPER_H 1
-
-#include <linux/version.h>
-
-#include_next <net/ipv6.h>
-
-#ifndef NEXTHDR_SCTP
-#define NEXTHDR_SCTP 132 /* Stream Control Transport Protocol */
-#endif
-
-#ifndef HAVE_IP6_FH_F_SKIP_RH
-
-enum {
- IP6_FH_F_FRAG = (1 << 0),
- IP6_FH_F_AUTH = (1 << 1),
- IP6_FH_F_SKIP_RH = (1 << 2),
-};
-
-/* This function is upstream, but not the version which skips routing
- * headers with 0 segments_left. We fixed it when we introduced
- * IP6_FH_F_SKIP_RH.
- */
-#define ipv6_find_hdr rpl_ipv6_find_hdr
-extern int rpl_ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
- int target, unsigned short *fragoff, int *fragflg);
-#endif
-
-#ifndef HAVE___IPV6_ADDR_JHASH
-static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 unused)
-{
- return ipv6_addr_jhash(a);
-}
-#endif
-
-#define ip6_flowlabel rpl_ip6_flowlabel
-static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
-{
- return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
-}
-
-#ifndef HAVE_IP6_MAKE_FLOWLABEL_FL6
-#define ip6_make_flowlabel rpl_ip6_make_flowlabel
-static inline __be32 rpl_ip6_make_flowlabel(struct net *net,
- struct sk_buff *skb,
- __be32 flowlabel, bool autolabel,
- struct flowi6 *fl6)
-{
-#ifndef HAVE_NETNS_SYSCTL_IPV6_AUTO_FLOWLABELS
- if (!flowlabel && autolabel) {
-#else
- if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) {
-#endif
- u32 hash;
-
- hash = skb_get_hash(skb);
-
- /* Since this is being sent on the wire obfuscate hash a bit
- * to minimize possbility that any useful information to an
- * attacker is leaked. Only lower 20 bits are relevant.
- */
- hash ^= hash >> 12;
-
- flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
- }
-
- return flowlabel;
-}
-#endif
-
-#ifndef IPV6_TCLASS_SHIFT
-#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
-#define IPV6_TCLASS_SHIFT 20
-#endif
-
-#define ip6_tclass rpl_ip6_tclass
-static inline u8 ip6_tclass(__be32 flowinfo)
-{
- return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT;
-}
-
-#define ip6_make_flowinfo rpl_ip6_make_flowinfo
-static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel)
-{
- return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel;
-}
-
-#endif
diff --git a/datapath/linux/compat/include/net/ipv6_frag.h b/datapath/linux/compat/include/net/ipv6_frag.h
deleted file mode 100644
index 5d1cc901b..000000000
--- a/datapath/linux/compat/include/net/ipv6_frag.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __NET_IPV6_FRAG_WRAPPER_H
-#define __NET_IPV6_FRAG_WRAPPER_H
-
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && defined(HAVE_IPV6_FRAG_H)
-#include_next <net/ipv6_frag.h>
-#endif
-
-#endif /* __NET_IPV6_FRAG_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/lisp.h b/datapath/linux/compat/include/net/lisp.h
deleted file mode 100644
index 6b43c77e2..000000000
--- a/datapath/linux/compat/include/net/lisp.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef __NET_LISP_WRAPPER_H
-#define __NET_LISP_WRAPPER_H 1
-
-#ifdef CONFIG_INET
-#include <net/udp_tunnel.h>
-#endif
-
-
-#ifdef CONFIG_INET
-#define lisp_dev_create_fb rpl_lisp_dev_create_fb
-struct net_device *rpl_lisp_dev_create_fb(struct net *net, const char *name,
- u8 name_assign_type, u16 dst_port);
-#endif /*ifdef CONFIG_INET */
-
-#define lisp_init_module rpl_lisp_init_module
-int rpl_lisp_init_module(void);
-
-#define lisp_cleanup_module rpl_lisp_cleanup_module
-void rpl_lisp_cleanup_module(void);
-
-#define lisp_xmit rpl_lisp_xmit
-netdev_tx_t rpl_lisp_xmit(struct sk_buff *skb);
-
-#define lisp_fill_metadata_dst ovs_lisp_fill_metadata_dst
-int ovs_lisp_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
-
-#endif /*ifdef__NET_LISP_H */
diff --git a/datapath/linux/compat/include/net/mpls.h b/datapath/linux/compat/include/net/mpls.h
deleted file mode 100644
index 9359a2369..000000000
--- a/datapath/linux/compat/include/net/mpls.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2014 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-
-#ifndef _NET_MPLS_WRAPPER_H
-#define _NET_MPLS_WRAPPER_H 1
-
-#include <linux/if_ether.h>
-#include <linux/netdevice.h>
-
-#define MPLS_HLEN 4
-
-struct mpls_shim_hdr {
- __be32 label_stack_entry;
-};
-
-static inline bool eth_p_mpls(__be16 eth_type)
-{
- return eth_type == htons(ETH_P_MPLS_UC) ||
- eth_type == htons(ETH_P_MPLS_MC);
-}
-
-/* Starting from kernel 4.9, commit 48d2ab609b6b ("net: mpls: Fixups for GSO")
- * and commit 85de4a2101ac ("openvswitch: use mpls_hdr") introduced
- * behavioural changes to mpls_gso kernel module. It now assumes that
- * skb_network_header() points to the mpls header and
- * skb_inner_network_header() points to the L3 header. However, the old
- * mpls_gso kernel module assumes that the skb_network_header() points
- * to the L3 header. We shall backport the following function to ensure
- * MPLS GSO works properly for kernels older than the one which contains
- * these commits.
- */
-#ifdef MPLS_HEADER_IS_L3
-static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
-{
- return (struct mpls_shim_hdr *)skb_network_header(skb);
-}
-#else
-#define mpls_hdr rpl_mpls_hdr
-/*
- * For non-MPLS skbs this will correspond to the network header.
- * For MPLS skbs it will be before the network_header as the MPLS
- * label stack lies between the end of the mac header and the network
- * header. That is, for MPLS skbs the end of the mac header
- * is the top of the MPLS label stack.
- */
-static inline struct mpls_shim_hdr *rpl_mpls_hdr(const struct sk_buff *skb)
-{
- return (struct mpls_shim_hdr *) (skb_mac_header(skb) + skb->mac_len);
-}
-#endif
-
-#endif /* _NET_MPLS_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/net_namespace.h b/datapath/linux/compat/include/net/net_namespace.h
deleted file mode 100644
index 427072249..000000000
--- a/datapath/linux/compat/include/net/net_namespace.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef __NET_NET_NAMESPACE_WRAPPER_H
-#define __NET_NET_NAMESPACE_WRAPPER_H 1
-
-#include_next <net/net_namespace.h>
-
-#ifndef HAVE_POSSIBLE_NET_T
-typedef struct {
-#ifdef CONFIG_NET_NS
- struct net *net;
-#endif
-} possible_net_t;
-
-static inline void rpl_write_pnet(possible_net_t *pnet, struct net *net)
-{
-#ifdef CONFIG_NET_NS
- pnet->net = net;
-#endif
-}
-
-static inline struct net *rpl_read_pnet(const possible_net_t *pnet)
-{
-#ifdef CONFIG_NET_NS
- return pnet->net;
-#else
- return &init_net;
-#endif
-}
-#else /* Linux >= 4.1 */
-#define rpl_read_pnet read_pnet
-#define rpl_write_pnet write_pnet
-#endif /* Linux >= 4.1 */
-
-#endif /* net/net_namespace.h wrapper */
diff --git a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h
deleted file mode 100644
index c4c0f79ab..000000000
--- a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef _NF_DEFRAG_IPV6_WRAPPER_H
-#define _NF_DEFRAG_IPV6_WRAPPER_H
-
-#include <linux/kconfig.h>
-#include_next <net/netfilter/ipv6/nf_defrag_ipv6.h>
-
-/* Upstream commit 029f7f3b8701 ("netfilter: ipv6: nf_defrag: avoid/free clone
- * operations") changed the semantics of nf_ct_frag6_gather(), so we need
- * to backport for all prior kernels, i.e. kernel < 4.5.0.
- *
- * Upstream commit 48cac18ecf1d ("ipv6: orphan skbs in reassembly unit") fixes
- * a bug that requires all kernels prior to this fix, i.e. kernel < 4.11.0
- * to be backported.
- */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,11,0)
-#define OVS_NF_DEFRAG6_BACKPORT 1
-int rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user);
-#define nf_ct_frag6_gather rpl_nf_ct_frag6_gather
-
-/* If backporting IPv6 defrag, then init/exit functions need to be called from
- * compat_{in,ex}it() to prepare the backported fragmentation cache. In this
- * case we declare the functions which are defined in
- * datapath/linux/compat/nf_conntrack_reasm.c.
- *
- * Otherwise, if we can use upstream defrag then we can rely on the upstream
- * nf_defrag_ipv6 module to init/exit correctly. In this case the calls in
- * compat_{in,ex}it() can be no-ops.
- */
-int __init rpl_nf_ct_frag6_init(void);
-void rpl_nf_ct_frag6_cleanup(void);
-void ovs_netns_frags6_init(struct net *net);
-void ovs_netns_frags6_exit(struct net *net);
-#else /* !OVS_NF_DEFRAG6_BACKPORT */
-static inline int __init rpl_nf_ct_frag6_init(void) { return 0; }
-static inline void rpl_nf_ct_frag6_cleanup(void) { }
-static inline void ovs_netns_frags6_init(struct net *net) { }
-static inline void ovs_netns_frags6_exit(struct net *net) { }
-#endif /* OVS_NF_DEFRAG6_BACKPORT */
-#define nf_ct_frag6_init rpl_nf_ct_frag6_init
-#define nf_ct_frag6_cleanup rpl_nf_ct_frag6_cleanup
-
-#endif /* __NF_DEFRAG_IPV6_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack.h
deleted file mode 100644
index 50db914a3..000000000
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef _NF_CONNTRACK_WRAPPER_H
-#define _NF_CONNTRACK_WRAPPER_H
-
-#include_next <net/netfilter/nf_conntrack.h>
-
-#ifndef HAVE_NF_CT_GET_TUPLEPR_TAKES_STRUCT_NET
-static inline bool rpl_nf_ct_get_tuplepr(const struct sk_buff *skb,
- unsigned int nhoff,
- u_int16_t l3num, struct net *net,
- struct nf_conntrack_tuple *tuple)
-{
- return nf_ct_get_tuplepr(skb, nhoff, l3num, tuple);
-}
-#define nf_ct_get_tuplepr rpl_nf_ct_get_tuplepr
-#endif
-
-#ifndef HAVE_NF_CT_SET
-static inline void
-nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
-{
- skb->nfct = &ct->ct_general;
- skb->nfctinfo = info;
-}
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0)
-int rpl_nf_ct_netns_get(struct net *net, u8 nfproto);
-void rpl_nf_ct_netns_put(struct net *net, u8 nfproto);
-#define nf_ct_netns_get rpl_nf_ct_netns_get
-#define nf_ct_netns_put rpl_nf_ct_netns_put
-#endif
-
-#endif /* _NF_CONNTRACK_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
deleted file mode 100644
index bc18c56b8..000000000
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
+++ /dev/null
@@ -1,137 +0,0 @@
-#ifndef _NF_CONNTRACK_CORE_WRAPPER_H
-#define _NF_CONNTRACK_CORE_WRAPPER_H
-
-#include_next <net/netfilter/nf_conntrack_core.h>
-
-#ifndef HAVE_NF_CT_TMPL_ALLOC_TAKES_STRUCT_ZONE
-
-#include <net/netfilter/nf_conntrack_zones.h>
-
-/* Released via destroy_conntrack() */
-static inline struct nf_conn *
-rpl_nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone,
- gfp_t flags)
-{
- struct nf_conn *tmpl;
-
- tmpl = kzalloc(sizeof(*tmpl), flags);
- if (tmpl == NULL)
- return NULL;
-
- tmpl->status = IPS_TEMPLATE;
- write_pnet(&tmpl->ct_net, net);
-
- if (nf_ct_zone_add(tmpl, flags, zone) < 0)
- goto out_free;
-
- atomic_set(&tmpl->ct_general.use, 0);
-
- return tmpl;
-out_free:
- kfree(tmpl);
- return NULL;
-}
-#define nf_ct_tmpl_alloc rpl_nf_ct_tmpl_alloc
-
-static inline void rpl_nf_ct_tmpl_free(struct nf_conn *tmpl)
-{
- nf_ct_ext_destroy(tmpl);
- nf_ct_ext_free(tmpl);
- kfree(tmpl);
-}
-#define nf_ct_tmpl_free rpl_nf_ct_tmpl_free
-
-static inline struct nf_conntrack_tuple_hash *
-rpl_nf_conntrack_find_get(struct net *net,
- const struct nf_conntrack_zone *zone,
- const struct nf_conntrack_tuple *tuple)
-{
- return nf_conntrack_find_get(net, zone->id, tuple);
-}
-#define nf_conntrack_find_get rpl_nf_conntrack_find_get
-#endif /* HAVE_NF_CT_TMPL_ALLOC_TAKES_STRUCT_ZONE */
-
-#ifndef HAVE_NF_CT_GET_TUPLEPR_TAKES_STRUCT_NET
-static inline bool rpl_nf_ct_get_tuple(const struct sk_buff *skb,
- unsigned int nhoff,
- unsigned int dataoff, u_int16_t l3num,
- u_int8_t protonum,
- struct net *net,
- struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_l3proto *l3proto,
- const struct nf_conntrack_l4proto *l4proto)
-{
- return nf_ct_get_tuple(skb, nhoff, dataoff, l3num, protonum, tuple,
- l3proto, l4proto);
-}
-#define nf_ct_get_tuple rpl_nf_ct_get_tuple
-#endif /* HAVE_NF_CT_GET_TUPLEPR_TAKES_STRUCT_NET */
-
-#ifdef HAVE_NF_CONN_TIMER
-
-#ifndef HAVE_NF_CT_DELETE
-#include <net/netfilter/nf_conntrack_timestamp.h>
-#endif
-
-static inline bool rpl_nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
-{
- if (del_timer(&ct->timeout))
-#ifdef HAVE_NF_CT_DELETE
- return nf_ct_delete(ct, portid, report);
-#else
- {
- struct nf_conn_tstamp *tstamp;
-
- tstamp = nf_conn_tstamp_find(ct);
- if (tstamp && tstamp->stop == 0)
- tstamp->stop = ktime_to_ns(ktime_get_real());
-
- if (!test_bit(IPS_DYING_BIT, &ct->status) &&
- unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
- /* destroy event was not delivered */
- nf_ct_delete_from_lists(ct);
- nf_ct_dying_timeout(ct);
- return false;
- }
- set_bit(IPS_DYING_BIT, &ct->status);
- nf_ct_delete_from_lists(ct);
- nf_ct_put(ct);
- return true;
- }
-#endif
- return false;
-}
-#define nf_ct_delete rpl_nf_ct_delete
-#endif /* HAVE_NF_CONN_TIMER */
-
-#ifndef HAVE_NF_CONNTRACK_IN_TAKES_NF_HOOK_STATE
-static inline unsigned int
-rpl_nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
-{
- int err;
-
- /* Repeat if requested, see nf_iterate(). */
- do {
- err = nf_conntrack_in(state->net, state->pf, state->hook, skb);
- } while (err == NF_REPEAT);
-
- return err;
-}
-#define nf_conntrack_in rpl_nf_conntrack_in
-#endif /* HAVE_NF_CONNTRACK_IN_TAKES_NF_HOOK_STATE */
-
-#ifdef HAVE_NF_CT_INVERT_TUPLEPR
-static inline bool rpl_nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
- const struct nf_conntrack_tuple *orig)
-{
- return nf_ct_invert_tuplepr(inverse, orig);
-}
-#else
-static inline bool rpl_nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
- const struct nf_conntrack_tuple *orig)
-{
- return nf_ct_invert_tuple(inverse, orig);
-}
-#endif /* HAVE_NF_CT_INVERT_TUPLEPR */
-
-#endif /* _NF_CONNTRACK_CORE_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h
deleted file mode 100644
index 2143136aa..000000000
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_count.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef _NF_CONNTRACK_COUNT_WRAPPER_H
-#define _NF_CONNTRACK_COUNT_WRAPPER_H
-
-#include <linux/list.h>
-#include <net/netfilter/nf_conntrack_tuple.h>
-#include <net/netfilter/nf_conntrack_zones.h>
-
-#ifdef HAVE_UPSTREAM_NF_CONNCOUNT
-#include_next <net/netfilter/nf_conntrack_count.h>
-
-static inline int rpl_nf_conncount_modinit(void)
-{
- return 0;
-}
-
-static inline void rpl_nf_conncount_modexit(void)
-{
-}
-
-#else
-#define CONFIG_NETFILTER_CONNCOUNT 1
-struct nf_conncount_data;
-
-struct nf_conncount_list {
- spinlock_t list_lock;
- struct list_head head; /* connections with the same filtering key */
- unsigned int count; /* length of list */
-};
-
-struct nf_conncount_data
-*rpl_nf_conncount_init(struct net *net, unsigned int family,
- unsigned int keylen);
-
-void rpl_nf_conncount_destroy(struct net *net, unsigned int family,
- struct nf_conncount_data *data);
-
-unsigned int rpl_nf_conncount_count(struct net *net,
- struct nf_conncount_data *data,
- const u32 *key,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone);
-
-#define nf_conncount_init rpl_nf_conncount_init
-#define nf_conncount_destroy rpl_nf_conncount_destroy
-#define nf_conncount_count rpl_nf_conncount_count
-
-int rpl_nf_conncount_modinit(void);
-void rpl_nf_conncount_modexit(void);
-#endif /* HAVE_UPSTREAM_NF_CONNCOUNT */
-
-#define nf_conncount_mod_init rpl_nf_conncount_modinit
-#define nf_conncount_modexit rpl_nf_conncount_modexit
-
-#endif /* _NF_CONNTRACK_COUNT_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_expect.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_expect.h
deleted file mode 100644
index a13f0ce60..000000000
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_expect.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef _NF_CONNTRACK_EXPECT_WRAPPER_H
-#define _NF_CONNTRACK_EXPECT_WRAPPER_H
-
-#include_next <net/netfilter/nf_conntrack_expect.h>
-
-#ifndef HAVE_NF_CT_ZONE_INIT
-
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_zones.h>
-
-static inline struct nf_conntrack_expect *
-rpl___nf_ct_expect_find(struct net *net,
- const struct nf_conntrack_zone *zone,
- const struct nf_conntrack_tuple *tuple)
-{
- return __nf_ct_expect_find(net, zone->id, tuple);
-}
-#define __nf_ct_expect_find rpl___nf_ct_expect_find
-
-#endif /* HAVE_NF_CT_ZONE_INIT */
-#endif /* _NF_CONNTRACK_EXPECT_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_helper.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_helper.h
deleted file mode 100644
index 78f97375b..000000000
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_helper.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef _NF_CONNTRACK_HELPER_WRAPPER_H
-#define _NF_CONNTRACK_HELPER_WRAPPER_H
-
-#include_next <net/netfilter/nf_conntrack_helper.h>
-
-#ifndef HAVE_NF_CONNTRACK_HELPER_PUT
-static inline void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) {
- module_put(helper->me);
-}
-#endif
-
-#ifndef HAVE_NF_CT_HELPER_EXT_ADD_TAKES_HELPER
-static inline struct nf_conn_help *
-rpl_nf_ct_helper_ext_add(struct nf_conn *ct,
- struct nf_conntrack_helper *helper, gfp_t gfp)
-{
- return nf_ct_helper_ext_add(ct, gfp);
-}
-#define nf_ct_helper_ext_add rpl_nf_ct_helper_ext_add
-#endif /* HAVE_NF_CT_HELPER_EXT_ADD_TAKES_HELPER */
-
-#ifndef HAVE_NF_NAT_HELPER_TRY_MODULE_GET
-static inline int rpl_nf_nat_helper_try_module_get(const char *name, u16 l3num,
- u8 protonum)
-{
- request_module("ip_nat_%s", name);
- return 0;
-}
-#define nf_nat_helper_try_module_get rpl_nf_nat_helper_try_module_get
-#endif /* HAVE_NF_NAT_HELPER_TRY_MODULE_GET */
-
-#ifndef HAVE_NF_NAT_HELPER_PUT
-void rpl_nf_nat_helper_put(struct nf_conntrack_helper *helper)
-{
-}
-#define nf_nat_helper_put rpl_nf_nat_helper_put
-#endif /* HAVE_NF_NAT_HELPER_PUT */
-
-#endif /* _NF_CONNTRACK_HELPER_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_labels.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_labels.h
deleted file mode 100644
index 14cb35716..000000000
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_labels.h
+++ /dev/null
@@ -1,107 +0,0 @@
-#ifndef _NF_CONNTRACK_LABELS_WRAPPER_H
-#define _NF_CONNTRACK_LABELS_WRAPPER_H
-
-#include <linux/kconfig.h>
-#include <linux/version.h>
-#include_next <net/netfilter/nf_conntrack_labels.h>
-
-#ifndef NF_CT_LABELS_MAX_SIZE
-#define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
-#endif
-
-#ifndef HAVE_NF_CONNLABELS_GET_TAKES_BIT
-#if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)
-
-/* XXX: This doesn't lock others out from doing the same configuration
- * simultaneously. */
-static inline int rpl_nf_connlabels_get(struct net *net, unsigned int bits)
-{
-#ifndef HAVE_NF_CONNLABELS_GET
- size_t words;
-
- words = BIT_WORD(bits) + 1;
- if (words > NF_CT_LABELS_MAX_SIZE / sizeof(long))
- return -ERANGE;
-
- net->ct.labels_used++;
- if (words > net->ct.label_words)
- net->ct.label_words = words;
-
- return 0;
-#else
- return nf_connlabels_get(net, bits + 1);
-#endif /* HAVE_NF_CONNLABELS_GET */
-}
-#define nf_connlabels_get rpl_nf_connlabels_get
-
-static inline void rpl_nf_connlabels_put(struct net *net)
-{
-#ifndef HAVE_NF_CONNLABELS_GET
- net->ct.labels_used--;
- if (net->ct.labels_used == 0)
- net->ct.label_words = 0;
-#else
- nf_connlabels_put(net);
-#endif /* HAVE_NF_CONNLABELS_GET */
-}
-#define nf_connlabels_put rpl_nf_connlabels_put
-
-#else /* CONFIG_NF_CONNTRACK_LABELS */
-#define nf_connlabels_get rpl_nf_connlabels_get
-static inline int nf_connlabels_get(struct net *net, unsigned int bits)
-{
- return -ERANGE;
-}
-
-#define nf_connlabels_put rpl_nf_connlabels_put
-static inline void nf_connlabels_put(struct net *net) { }
-#endif /* CONFIG_NF_CONNTRACK_LABELS */
-#endif /* HAVE_NF_CONNLABELS_GET_TAKES_BIT */
-
-/* Upstream commit 5a8145f7b222 ("netfilter: labels: don't emit ct event if
- * labels were not changed"), released in Linux 4.7, introduced a functional
- * change to trigger conntrack event for a label change only when the labels
- * actually changed. There is no way we can detect this from the headers, so
- * provide replacements that work the same for OVS (where labels size is 128
- * bits == 16 bytes == 4 4-byte words). */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
-static int replace_u32(u32 *address, u32 mask, u32 new)
-{
- u32 old, tmp;
-
- do {
- old = *address;
- tmp = (old & mask) ^ new;
- if (old == tmp)
- return 0;
- } while (cmpxchg(address, old, tmp) != old);
-
- return 1;
-}
-
-static int rpl_nf_connlabels_replace(struct nf_conn *ct,
- const u32 *data,
- const u32 *mask, unsigned int words32)
-{
- struct nf_conn_labels *labels;
- unsigned int i;
- int changed = 0;
- u32 *dst;
-
- labels = nf_ct_labels_find(ct);
- if (!labels)
- return -ENOSPC;
-
- dst = (u32 *) labels->bits;
- for (i = 0; i < words32; i++)
- changed |= replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]);
-
- if (changed)
- nf_conntrack_event_cache(IPCT_LABEL, ct);
-
- return 0;
-}
-#define nf_connlabels_replace rpl_nf_connlabels_replace
-#endif
-
-#endif /* _NF_CONNTRACK_LABELS_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_seqadj.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_seqadj.h
deleted file mode 100644
index b11d1a578..000000000
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_seqadj.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef _NF_CONNTRACK_SEQADJ_WRAPPER_H
-#define _NF_CONNTRACK_SEQADJ_WRAPPER_H
-
-#ifdef HAVE_NF_CT_SEQ_ADJUST
-#include_next <net/netfilter/nf_conntrack_seqadj.h>
-#else
-
-#include <net/netfilter/nf_nat_helper.h>
-
-/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
-static inline int
-nf_ct_seq_adjust(struct sk_buff *skb,
- struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- unsigned int protoff)
-{
- typeof(nf_nat_seq_adjust_hook) seq_adjust;
-
- seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
- if (!seq_adjust ||
- !seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
- NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
- return 0;
- }
-
- return 1;
-}
-
-#endif /* HAVE_NF_CT_SEQ_ADJUST */
-
-#endif /* _NF_CONNTRACK_SEQADJ_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_timeout.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_timeout.h
deleted file mode 100644
index 134e72b83..000000000
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_timeout.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef _NF_CONNTRACK_TIMEOUT_WRAPPER_H
-#define _NF_CONNTRACK_TIMEOUT_WRAPPER_H
-
-#include_next <net/netfilter/nf_conntrack_timeout.h>
-
-#ifndef HAVE_NF_CT_SET_TIMEOUT
-
-#ifndef HAVE_NF_CT_TIMEOUT
-#define nf_ct_timeout ctnl_timeout
-#endif
-
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-int rpl_nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num,
- const char *timeout_name);
-void rpl_nf_ct_destroy_timeout(struct nf_conn *ct);
-#else
-static inline int rpl_nf_ct_set_timeout(struct net *net, struct nf_conn *ct,
- u8 l3num, u8 l4num,
- const char *timeout_name)
-{
- return -EOPNOTSUPP;
-}
-
-static inline void rpl_nf_ct_destroy_timeout(struct nf_conn *ct)
-{
- return;
-}
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-
-#define nf_ct_set_timeout rpl_nf_ct_set_timeout
-#define nf_ct_destroy_timeout rpl_nf_ct_destroy_timeout
-
-#endif /* HAVE_NF_CT_SET_TIMEOUT */
-#endif /* _NF_CONNTRACK_TIMEOUT_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_zones.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_zones.h
deleted file mode 100644
index d46c098c7..000000000
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_zones.h
+++ /dev/null
@@ -1,101 +0,0 @@
-#ifndef _NF_CONNTRACK_ZONES_WRAPPER_H
-#define _NF_CONNTRACK_ZONES_WRAPPER_H
-
-#include <linux/version.h>
-
-#include_next <net/netfilter/nf_conntrack_zones.h>
-
-#ifndef HAVE_NF_CT_ZONE_INIT
-
-#include <linux/kconfig.h>
-#include <linux/types.h>
-#include <linux/netfilter/nf_conntrack_tuple_common.h>
-
-#define NF_CT_DEFAULT_ZONE_ID 0
-
-#define NF_CT_ZONE_DIR_ORIG (1 << IP_CT_DIR_ORIGINAL)
-#define NF_CT_ZONE_DIR_REPL (1 << IP_CT_DIR_REPLY)
-
-#define NF_CT_DEFAULT_ZONE_DIR (NF_CT_ZONE_DIR_ORIG | NF_CT_ZONE_DIR_REPL)
-
-#define NF_CT_FLAG_MARK 1
-
-struct rpl_nf_conntrack_zone {
- u16 id;
- u8 flags;
- u8 dir;
-};
-#define nf_conntrack_zone rpl_nf_conntrack_zone
-
-extern const struct nf_conntrack_zone nf_ct_zone_dflt;
-
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-#include <net/netfilter/nf_conntrack_extend.h>
-
-static inline const struct nf_conntrack_zone *
-rpl_nf_ct_zone(const struct nf_conn *ct)
-{
- const struct nf_conntrack_zone *nf_ct_zone = NULL;
-
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE);
-#endif
- return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt;
-}
-#define nf_ct_zone rpl_nf_ct_zone
-
-static inline const struct nf_conntrack_zone *
-nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags)
-{
- zone->id = id;
- zone->flags = flags;
- zone->dir = dir;
-
- return zone;
-}
-
-static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags,
- const struct nf_conntrack_zone *info)
-{
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- struct nf_conntrack_zone *nf_ct_zone;
-
- nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags);
- if (!nf_ct_zone)
- return -ENOMEM;
-
- nf_ct_zone_init(nf_ct_zone, info->id, info->dir,
- info->flags);
-#endif
- return 0;
-}
-
-static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
- enum ip_conntrack_dir dir)
-{
- return zone->dir & (1 << dir);
-}
-
-static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone,
- enum ip_conntrack_dir dir)
-{
- return nf_ct_zone_matches_dir(zone, dir) ?
- zone->id : NF_CT_DEFAULT_ZONE_ID;
-}
-
-static inline bool nf_ct_zone_equal(const struct nf_conn *a,
- const struct nf_conntrack_zone *b,
- enum ip_conntrack_dir dir)
-{
- return nf_ct_zone_id(nf_ct_zone(a), dir) ==
- nf_ct_zone_id(b, dir);
-}
-
-static inline bool nf_ct_zone_equal_any(const struct nf_conn *a,
- const struct nf_conntrack_zone *b)
-{
- return nf_ct_zone(a)->id == b->id;
-}
-#endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */
-#endif /* HAVE_NF_CT_ZONE_INIT */
-#endif /* _NF_CONNTRACK_ZONES_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netfilter/nf_nat.h b/datapath/linux/compat/include/net/netfilter/nf_nat.h
deleted file mode 100644
index 773e569cb..000000000
--- a/datapath/linux/compat/include/net/netfilter/nf_nat.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef _NF_NAT_WRAPPER_H
-#define _NF_NAT_WRAPPER_H
-
-#include_next <net/netfilter/nf_nat.h>
-
-#ifndef HAVE_NF_CT_NAT_EXT_ADD
-
-static inline struct nf_conn_nat *
-nf_ct_nat_ext_add(struct nf_conn *ct)
-{
- struct nf_conn_nat *nat = nfct_nat(ct);
- if (nat)
- return nat;
-
- if (!nf_ct_is_confirmed(ct))
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-
- return nat;
-}
-#endif /* HAVE_NF_CT_NAT_EXT_ADD */
-
-#ifndef HAVE_NF_NAT_ALLOC_NULL_BINDING
-static inline unsigned int
-nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
-{
- /* Force range to this IP; let proto decide mapping for
- * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
- * Use reply in case it's already been mangled (eg local packet).
- */
- union nf_inet_addr ip =
- (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
- struct nf_nat_range range = {
- .flags = NF_NAT_RANGE_MAP_IPS,
- .min_addr = ip,
- .max_addr = ip,
- };
- return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
-}
-
-#endif /* HAVE_NF_NAT_ALLOC_NULL_BINDING */
-
-#endif /* _NF_NAT_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/netlink.h b/datapath/linux/compat/include/net/netlink.h
deleted file mode 100644
index 84e073974..000000000
--- a/datapath/linux/compat/include/net/netlink.h
+++ /dev/null
@@ -1,185 +0,0 @@
-#ifndef __NET_NETLINK_WRAPPER_H
-#define __NET_NETLINK_WRAPPER_H 1
-
-#include <linux/version.h>
-#include_next <net/netlink.h>
-#include_next <linux/in6.h>
-
-#ifndef HAVE_NLA_GET_BE16
-/**
- * nla_get_be16 - return payload of __be16 attribute
- * @nla: __be16 netlink attribute
- */
-static inline __be16 nla_get_be16(const struct nlattr *nla)
-{
- return *(__be16 *) nla_data(nla);
-}
-#endif /* !HAVE_NLA_GET_BE16 */
-
-#ifndef HAVE_NLA_PUT_BE16
-static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value)
-{
- return nla_put(skb, attrtype, sizeof(__be16), &value);
-}
-#endif
-
-#ifndef HAVE_NLA_PUT_BE32
-static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value)
-{
- return nla_put(skb, attrtype, sizeof(__be32), &value);
-}
-#endif
-
-#ifndef HAVE_NLA_PUT_BE64
-static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value)
-{
- return nla_put(skb, attrtype, sizeof(__be64), &value);
-}
-#endif
-
-#ifndef nla_for_each_nested
-#define nla_for_each_nested(pos, nla, rem) \
- nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem)
-#endif
-
-#ifndef HAVE_NLA_FIND_NESTED
-static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype)
-{
- return nla_find(nla_data(nla), nla_len(nla), attrtype);
-}
-#endif
-
-#ifndef HAVE_NLA_IS_LAST
-static inline bool nla_is_last(const struct nlattr *nla, int rem)
-{
- return nla->nla_len == rem;
-}
-#endif
-
-#ifndef HAVE_NLA_PUT_IN_ADDR
-static inline int nla_put_in_addr(struct sk_buff *skb, int attrtype,
- __be32 addr)
-{
- return nla_put_be32(skb, attrtype, addr);
-}
-
-static inline int nla_put_in6_addr(struct sk_buff *skb, int attrtype,
- const struct in6_addr *addr)
-{
- return nla_put(skb, attrtype, sizeof(*addr), addr);
-}
-
-static inline __be32 nla_get_in_addr(const struct nlattr *nla)
-{
- return *(__be32 *) nla_data(nla);
-}
-
-static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla)
-{
- struct in6_addr tmp;
-
- nla_memcpy(&tmp, nla, sizeof(tmp));
- return tmp;
-}
-#endif
-
-#ifndef HAVE_NLA_PUT_64BIT
-static inline bool nla_need_padding_for_64bit(struct sk_buff *skb)
-{
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
- /* The nlattr header is 4 bytes in size, that's why we test
- * if the skb->data _is_ aligned. A NOP attribute, plus
- * nlattr header for next attribute, will make nla_data()
- * 8-byte aligned.
- */
- if (IS_ALIGNED((unsigned long)skb_tail_pointer(skb), 8))
- return true;
-#endif
- return false;
-}
-
-static inline int nla_align_64bit(struct sk_buff *skb, int padattr)
-{
- if (nla_need_padding_for_64bit(skb) &&
- !nla_reserve(skb, padattr, 0))
- return -EMSGSIZE;
-
- return 0;
-}
-
-static inline int nla_total_size_64bit(int payload)
-{
- return NLA_ALIGN(nla_attr_size(payload))
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
- + NLA_ALIGN(nla_attr_size(0))
-#endif
- ;
-}
-
-#define nla_put_64bit rpl_nla_put_64bit
-int rpl_nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen,
- const void *data, int padattr);
-
-#define __nla_put_64bit rpl___nla_put_64bit
-void rpl___nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen,
- const void *data, int padattr);
-
-#define __nla_reserve_64bit rpl___nla_reserve_64bit
-struct nlattr *rpl___nla_reserve_64bit(struct sk_buff *skb, int attrtype,
- int attrlen, int padattr);
-
-static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype,
- u64 value, int padattr)
-{
- return nla_put_64bit(skb, attrtype, sizeof(u64), &value, padattr);
-}
-
-#define nla_put_be64 rpl_nla_put_be64
-static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value,
- int padattr)
-{
- return nla_put_64bit(skb, attrtype, sizeof(__be64), &value, padattr);
-}
-
-#endif
-
-#ifndef HAVE_NLA_PARSE_DEPRECATED_STRICT
-#define nla_parse_nested_deprecated nla_parse_nested
-#define nla_parse_deprecated_strict nla_parse
-#define genlmsg_parse_deprecated genlmsg_parse
-
-#ifndef HAVE_NETLINK_EXT_ACK
-struct netlink_ext_ack;
-
-static inline int rpl_nla_parse_nested(struct nlattr *tb[], int maxtype,
- const struct nlattr *nla,
- const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
-{
- return nla_parse_nested(tb, maxtype, nla, policy);
-}
-#undef nla_parse_nested_deprecated
-#define nla_parse_nested_deprecated rpl_nla_parse_nested
-
-static inline int rpl_nla_parse(struct nlattr **tb, int maxtype,
- const struct nlattr *head, int len,
- const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
-{
- return nla_parse(tb, maxtype, head, len, policy);
-}
-#undef nla_parse_deprecated_strict
-#define nla_parse_deprecated_strict rpl_nla_parse
-#endif
-#endif /* HAVE_NLA_PARSE_DEPRECATED_STRICT */
-
-#ifndef HAVE_NLA_NEST_START_NOFLAG
-static inline struct nlattr *rpl_nla_nest_start_noflag(struct sk_buff *skb,
- int attrtype)
-{
- return nla_nest_start(skb, attrtype);
-}
-#define nla_nest_start_noflag rpl_nla_nest_start_noflag
-#endif
-
-#endif /* net/netlink.h */
diff --git a/datapath/linux/compat/include/net/nsh.h b/datapath/linux/compat/include/net/nsh.h
deleted file mode 100644
index 76894910c..000000000
--- a/datapath/linux/compat/include/net/nsh.h
+++ /dev/null
@@ -1,313 +0,0 @@
-#ifndef __NET_NSH_H
-#define __NET_NSH_H 1
-
-#include <linux/skbuff.h>
-
-/*
- * Network Service Header:
- * 0 1 2 3
- * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |Ver|O|U| TTL | Length |U|U|U|U|MD Type| Next Protocol |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Service Path Identifier (SPI) | Service Index |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | |
- * ~ Mandatory/Optional Context Headers ~
- * | |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * Version: The version field is used to ensure backward compatibility
- * going forward with future NSH specification updates. It MUST be set
- * to 0x0 by the sender, in this first revision of NSH. Given the
- * widespread implementation of existing hardware that uses the first
- * nibble after an MPLS label stack for ECMP decision processing, this
- * document reserves version 01b and this value MUST NOT be used in
- * future versions of the protocol. Please see [RFC7325] for further
- * discussion of MPLS-related forwarding requirements.
- *
- * O bit: Setting this bit indicates an Operations, Administration, and
- * Maintenance (OAM) packet. The actual format and processing of SFC
- * OAM packets is outside the scope of this specification (see for
- * example [I-D.ietf-sfc-oam-framework] for one approach).
- *
- * The O bit MUST be set for OAM packets and MUST NOT be set for non-OAM
- * packets. The O bit MUST NOT be modified along the SFP.
- *
- * SF/SFF/SFC Proxy/Classifier implementations that do not support SFC
- * OAM procedures SHOULD discard packets with O bit set, but MAY support
- * a configurable parameter to enable forwarding received SFC OAM
- * packets unmodified to the next element in the chain. Forwarding OAM
- * packets unmodified by SFC elements that do not support SFC OAM
- * procedures may be acceptable for a subset of OAM functions, but can
- * result in unexpected outcomes for others, thus it is recommended to
- * analyze the impact of forwarding an OAM packet for all OAM functions
- * prior to enabling this behavior. The configurable parameter MUST be
- * disabled by default.
- *
- * TTL: Indicates the maximum SFF hops for an SFP. This field is used
- * for service plane loop detection. The initial TTL value SHOULD be
- * configurable via the control plane; the configured initial value can
- * be specific to one or more SFPs. If no initial value is explicitly
- * provided, the default initial TTL value of 63 MUST be used. Each SFF
- * involved in forwarding an NSH packet MUST decrement the TTL value by
- * 1 prior to NSH forwarding lookup. Decrementing by 1 from an incoming
- * value of 0 shall result in a TTL value of 63. The packet MUST NOT be
- * forwarded if TTL is, after decrement, 0.
- *
- * All other flag fields, marked U, are unassigned and available for
- * future use, see Section 11.2.1. Unassigned bits MUST be set to zero
- * upon origination, and MUST be ignored and preserved unmodified by
- * other NSH supporting elements. Elements which do not understand the
- * meaning of any of these bits MUST NOT modify their actions based on
- * those unknown bits.
- *
- * Length: The total length, in 4-byte words, of NSH including the Base
- * Header, the Service Path Header, the Fixed Length Context Header or
- * Variable Length Context Header(s). The length MUST be 0x6 for MD
- * Type equal to 0x1, and MUST be 0x2 or greater for MD Type equal to
- * 0x2. The length of the NSH header MUST be an integer multiple of 4
- * bytes, thus variable length metadata is always padded out to a
- * multiple of 4 bytes.
- *
- * MD Type: Indicates the format of NSH beyond the mandatory Base Header
- * and the Service Path Header. MD Type defines the format of the
- * metadata being carried.
- *
- * 0x0 - This is a reserved value. Implementations SHOULD silently
- * discard packets with MD Type 0x0.
- *
- * 0x1 - This indicates that the format of the header includes a fixed
- * length Context Header (see Figure 4 below).
- *
- * 0x2 - This does not mandate any headers beyond the Base Header and
- * Service Path Header, but may contain optional variable length Context
- * Header(s). The semantics of the variable length Context Header(s)
- * are not defined in this document. The format of the optional
- * variable length Context Headers is provided in Section 2.5.1.
- *
- * 0xF - This value is reserved for experimentation and testing, as per
- * [RFC3692]. Implementations not explicitly configured to be part of
- * an experiment SHOULD silently discard packets with MD Type 0xF.
- *
- * Next Protocol: indicates the protocol type of the encapsulated data.
- * NSH does not alter the inner payload, and the semantics on the inner
- * protocol remain unchanged due to NSH service function chaining.
- * Please see the IANA Considerations section below, Section 11.2.5.
- *
- * This document defines the following Next Protocol values:
- *
- * 0x1: IPv4
- * 0x2: IPv6
- * 0x3: Ethernet
- * 0x4: NSH
- * 0x5: MPLS
- * 0xFE: Experiment 1
- * 0xFF: Experiment 2
- *
- * Packets with Next Protocol values not supported SHOULD be silently
- * dropped by default, although an implementation MAY provide a
- * configuration parameter to forward them. Additionally, an
- * implementation not explicitly configured for a specific experiment
- * [RFC3692] SHOULD silently drop packets with Next Protocol values 0xFE
- * and 0xFF.
- *
- * Service Path Identifier (SPI): Identifies a service path.
- * Participating nodes MUST use this identifier for Service Function
- * Path selection. The initial classifier MUST set the appropriate SPI
- * for a given classification result.
- *
- * Service Index (SI): Provides location within the SFP. The initial
- * classifier for a given SFP SHOULD set the SI to 255, however the
- * control plane MAY configure the initial value of SI as appropriate
- * (i.e., taking into account the length of the service function path).
- * The Service Index MUST be decremented by a value of 1 by Service
- * Functions or by SFC Proxy nodes after performing required services
- * and the new decremented SI value MUST be used in the egress packet's
- * NSH. The initial Classifier MUST send the packet to the first SFF in
- * the identified SFP for forwarding along an SFP. If re-classification
- * occurs, and that re-classification results in a new SPI, the
- * (re)classifier is, in effect, the initial classifier for the
- * resultant SPI.
- *
- * The SI is used in conjunction the with Service Path Identifier for
- * Service Function Path Selection and for determining the next SFF/SF
- * in the path. The SI is also valuable when troubleshooting or
- * reporting service paths. Additionally, while the TTL field is the
- * main mechanism for service plane loop detection, the SI can also be
- * used for detecting service plane loops.
- *
- * When the Base Header specifies MD Type = 0x1, a Fixed Length Context
- * Header (16-bytes) MUST be present immediately following the Service
- * Path Header. The value of a Fixed Length Context
- * Header that carries no metadata MUST be set to zero.
- *
- * When the base header specifies MD Type = 0x2, zero or more Variable
- * Length Context Headers MAY be added, immediately following the
- * Service Path Header (see Figure 5). Therefore, Length = 0x2,
- * indicates that only the Base Header followed by the Service Path
- * Header are present. The optional Variable Length Context Headers
- * MUST be of an integer number of 4-bytes. The base header Length
- * field MUST be used to determine the offset to locate the original
- * packet or frame for SFC nodes that require access to that
- * information.
- *
- * The format of the optional variable length Context Headers
- *
- * 0 1 2 3
- * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Metadata Class | Type |U| Length |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Variable Metadata |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * Metadata Class (MD Class): Defines the scope of the 'Type' field to
- * provide a hierarchical namespace. The IANA Considerations
- * Section 11.2.4 defines how the MD Class values can be allocated to
- * standards bodies, vendors, and others.
- *
- * Type: Indicates the explicit type of metadata being carried. The
- * definition of the Type is the responsibility of the MD Class owner.
- *
- * Unassigned bit: One unassigned bit is available for future use. This
- * bit MUST NOT be set, and MUST be ignored on receipt.
- *
- * Length: Indicates the length of the variable metadata, in bytes. In
- * case the metadata length is not an integer number of 4-byte words,
- * the sender MUST add pad bytes immediately following the last metadata
- * byte to extend the metadata to an integer number of 4-byte words.
- * The receiver MUST round up the length field to the nearest 4-byte
- * word boundary, to locate and process the next field in the packet.
- * The receiver MUST access only those bytes in the metadata indicated
- * by the length field (i.e., actual number of bytes) and MUST ignore
- * the remaining bytes up to the nearest 4-byte word boundary. The
- * Length may be 0 or greater.
- *
- * A value of 0 denotes a Context Header without a Variable Metadata
- * field.
- *
- * [0] https://datatracker.ietf.org/doc/draft-ietf-sfc-nsh/
- */
-
-/**
- * struct nsh_md1_ctx - Keeps track of NSH context data
- * @nshc<1-4>: NSH Contexts.
- */
-struct nsh_md1_ctx {
- __be32 context[4];
-};
-
-struct nsh_md2_tlv {
- __be16 md_class;
- u8 type;
- u8 length;
- u8 md_value[];
-};
-
-struct nshhdr {
- __be16 ver_flags_ttl_len;
- u8 mdtype;
- u8 np;
- __be32 path_hdr;
- union {
- struct nsh_md1_ctx md1;
- struct nsh_md2_tlv md2;
- };
-};
-
-/* Masking NSH header fields. */
-#define NSH_VER_MASK 0xc000
-#define NSH_VER_SHIFT 14
-#define NSH_FLAGS_MASK 0x3000
-#define NSH_FLAGS_SHIFT 12
-#define NSH_TTL_MASK 0x0fc0
-#define NSH_TTL_SHIFT 6
-#define NSH_LEN_MASK 0x003f
-#define NSH_LEN_SHIFT 0
-
-#define NSH_MDTYPE_MASK 0x0f
-#define NSH_MDTYPE_SHIFT 0
-
-#define NSH_SPI_MASK 0xffffff00
-#define NSH_SPI_SHIFT 8
-#define NSH_SI_MASK 0x000000ff
-#define NSH_SI_SHIFT 0
-
-/* MD Type Registry. */
-#define NSH_M_TYPE1 0x01
-#define NSH_M_TYPE2 0x02
-#define NSH_M_EXP1 0xFE
-#define NSH_M_EXP2 0xFF
-
-/* NSH Base Header Length */
-#define NSH_BASE_HDR_LEN 8
-
-/* NSH MD Type 1 header Length. */
-#define NSH_M_TYPE1_LEN 24
-
-/* NSH header maximum Length. */
-#define NSH_HDR_MAX_LEN 252
-
-/* NSH context headers maximum Length. */
-#define NSH_CTX_HDRS_MAX_LEN 244
-
-static inline struct nshhdr *nsh_hdr(struct sk_buff *skb)
-{
- return (struct nshhdr *)skb_network_header(skb);
-}
-
-static inline u16 nsh_hdr_len(const struct nshhdr *nsh)
-{
- return ((ntohs(nsh->ver_flags_ttl_len) & NSH_LEN_MASK)
- >> NSH_LEN_SHIFT) << 2;
-}
-
-static inline u8 nsh_get_ver(const struct nshhdr *nsh)
-{
- return (ntohs(nsh->ver_flags_ttl_len) & NSH_VER_MASK)
- >> NSH_VER_SHIFT;
-}
-
-static inline u8 nsh_get_flags(const struct nshhdr *nsh)
-{
- return (ntohs(nsh->ver_flags_ttl_len) & NSH_FLAGS_MASK)
- >> NSH_FLAGS_SHIFT;
-}
-
-static inline u8 nsh_get_ttl(const struct nshhdr *nsh)
-{
- return (ntohs(nsh->ver_flags_ttl_len) & NSH_TTL_MASK)
- >> NSH_TTL_SHIFT;
-}
-
-static inline void __nsh_set_xflag(struct nshhdr *nsh, u16 xflag, u16 xmask)
-{
- nsh->ver_flags_ttl_len
- = (nsh->ver_flags_ttl_len & ~htons(xmask)) | htons(xflag);
-}
-
-static inline void nsh_set_flags_and_ttl(struct nshhdr *nsh, u8 flags, u8 ttl)
-{
- __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) |
- ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK),
- NSH_FLAGS_MASK | NSH_TTL_MASK);
-}
-
-static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags,
- u8 ttl, u8 len)
-{
- len = len >> 2;
- __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) |
- ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK) |
- ((len << NSH_LEN_SHIFT) & NSH_LEN_MASK),
- NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK);
-}
-
-int ovs_nsh_init(void);
-void ovs_nsh_cleanup(void);
-
-int ovs_nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh);
-int ovs_nsh_pop(struct sk_buff *skb);
-
-#endif /* __NET_NSH_H */
diff --git a/datapath/linux/compat/include/net/protocol.h b/datapath/linux/compat/include/net/protocol.h
deleted file mode 100644
index 0247a26c7..000000000
--- a/datapath/linux/compat/include/net/protocol.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef _NET_PROTOCOL_WRAPPER_H
-#define _NET_PROTOCOL_WRAPPER_H
-
-#include_next <net/protocol.h>
-
-#ifdef HAVE_UDP_OFFLOAD
-
-#ifndef HAVE_UDP_ADD_OFFLOAD_TAKES_NET
-#define udp_add_offload(net, prot) udp_add_offload(prot)
-#endif
-
-#else
-
-#define udp_add_offload(net, prot) 0
-#define udp_del_offload(prot) do {} while(0)
-
-#endif /* HAVE_UDP_OFFLOAD */
-
-#endif /* _NET_PROTOCOL_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/route.h b/datapath/linux/compat/include/net/route.h
deleted file mode 100644
index 9e4a1f18a..000000000
--- a/datapath/linux/compat/include/net/route.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __NET_ROUTE_H_WRAPPER
-#define __NET_ROUTE_H_WRAPPER
-
-#include_next <net/route.h>
-
-#endif
diff --git a/datapath/linux/compat/include/net/rtnetlink.h b/datapath/linux/compat/include/net/rtnetlink.h
deleted file mode 100644
index e026cab95..000000000
--- a/datapath/linux/compat/include/net/rtnetlink.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef __NET_RTNETLINK_WRAPPER_H
-#define __NET_RTNETLINK_WRAPPER_H
-#include_next <net/rtnetlink.h>
-
-#define rtnl_delete_link rpl_rtnl_delete_link
-int rpl_rtnl_delete_link(struct net_device *dev);
-
-#ifndef HAVE_NAME_ASSIGN_TYPE
-#ifdef HAVE_RTNL_CREATE_LINK_SRC_NET
-static inline struct net_device *rpl_rtnl_create_link(struct net *net, const char *ifname,
- unsigned char name_assign_type,
- const struct rtnl_link_ops *ops,
- struct nlattr *tb[])
-{
- return rtnl_create_link(net, net, (char *)ifname, ops, tb);
-}
-
-#else
-static inline struct net_device *rpl_rtnl_create_link(struct net *net, const char *ifname,
- unsigned char name_assign_type,
- const struct rtnl_link_ops *ops,
- struct nlattr *tb[])
-{
- return rtnl_create_link(net, (char *)ifname, ops, tb);
-}
-#endif
-#else
-/* This function is only defined to avoid warning related to ifname. Some backported
- * function did not changed the name to const type. */
-static inline struct net_device *rpl_rtnl_create_link(struct net *net, const char *ifname,
- unsigned char name_assign_type,
- const struct rtnl_link_ops *ops,
- struct nlattr *tb[])
-{
-#ifdef HAVE_RTNL_CREATE_LINK_TAKES_EXTACK
- return rtnl_create_link(net, (char *) ifname, name_assign_type, ops, tb, NULL);
-#else
- return rtnl_create_link(net, (char *) ifname, name_assign_type, ops, tb);
-#endif
-}
-#endif
-
-#define rtnl_create_link rpl_rtnl_create_link
-#endif
diff --git a/datapath/linux/compat/include/net/sctp/checksum.h b/datapath/linux/compat/include/net/sctp/checksum.h
deleted file mode 100644
index 7832abce0..000000000
--- a/datapath/linux/compat/include/net/sctp/checksum.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __SCTP_CHECKSUM_WRAPPER_H
-#define __SCTP_CHECKSUM_WRAPPER_H 1
-
-#include_next <net/sctp/checksum.h>
-
-#ifndef HAVE_SCTP_COMPUTE_CKSUM
-static inline __le32 sctp_compute_cksum(const struct sk_buff *skb,
- unsigned int offset)
-{
- const struct sk_buff *iter;
-
- __u32 crc32 = sctp_start_cksum(skb->data + offset,
- skb_headlen(skb) - offset);
- skb_walk_frags(skb, iter)
- crc32 = sctp_update_cksum((__u8 *) iter->data,
- skb_headlen(iter), crc32);
-
- /* Open-code sctp_end_cksum() to avoid a sparse warning due to a bug in
- * sparse annotations in Linux fixed in 3.10 in commit eee1d5a14 (sctp:
- * Correct type and usage of sctp_end_cksum()). */
- return cpu_to_le32(~crc32);
-}
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/net/sock.h b/datapath/linux/compat/include/net/sock.h
deleted file mode 100644
index 2900704ec..000000000
--- a/datapath/linux/compat/include/net/sock.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __NET_SOCK_WRAPPER_H
-#define __NET_SOCK_WRAPPER_H 1
-
-#include_next <net/sock.h>
-
-#ifndef __sk_user_data
-#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
-
-#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk)))
-#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr)
-#endif
-
-#endif
diff --git a/datapath/linux/compat/include/net/stt.h b/datapath/linux/compat/include/net/stt.h
deleted file mode 100644
index d2e63d163..000000000
--- a/datapath/linux/compat/include/net/stt.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef __NET_STT_H
-#define __NET_STT_H 1
-
-#include <linux/kconfig.h>
-#include <linux/errno.h>
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) && IS_ENABLED(CONFIG_NETFILTER)
-#include <net/ip_tunnels.h>
-#define OVS_STT
-
-struct stthdr {
- __u8 version;
- __u8 flags;
- __u8 l4_offset;
- __u8 reserved;
- __be16 mss;
- __be16 vlan_tci;
- __be64 key;
-};
-
-/* Padding after the end of the tunnel headers to provide alignment
- * for inner packet IP header after 14 byte Ethernet header.
- */
-#define STT_ETH_PAD 2
-
-#define STT_BASE_HLEN (sizeof(struct stthdr) + STT_ETH_PAD)
-#define STT_HEADER_LEN (sizeof(struct tcphdr) + STT_BASE_HLEN)
-
-static inline struct stthdr *stt_hdr(const struct sk_buff *skb)
-{
- return (struct stthdr *)(skb_transport_header(skb) +
- sizeof(struct tcphdr));
-}
-
-struct net_device *ovs_stt_dev_create_fb(struct net *net, const char *name,
- u8 name_assign_type, u16 dst_port);
-
-netdev_tx_t ovs_stt_xmit(struct sk_buff *skb);
-
-int ovs_stt_init_module(void);
-
-void ovs_stt_cleanup_module(void);
-#else
-static inline int ovs_stt_init_module(void)
-{
- return 0;
-}
-
-static inline void ovs_stt_cleanup_module(void)
-{}
-
-static inline struct net_device *ovs_stt_dev_create_fb(struct net *net, const char *name,
- u8 name_assign_type, u16 dst_port)
-{
- return ERR_PTR(-EOPNOTSUPP);
-}
-static inline netdev_tx_t ovs_stt_xmit(struct sk_buff *skb)
-{
- BUG();
- return NETDEV_TX_OK;
-}
-#endif
-
-#define stt_dev_create_fb ovs_stt_dev_create_fb
-#define stt_init_module ovs_stt_init_module
-#define stt_cleanup_module ovs_stt_cleanup_module
-
-#define stt_fill_metadata_dst ovs_stt_fill_metadata_dst
-int ovs_stt_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
-
-#endif /*ifdef__NET_STT_H */
diff --git a/datapath/linux/compat/include/net/tun_proto.h b/datapath/linux/compat/include/net/tun_proto.h
deleted file mode 100644
index 2ea3deba4..000000000
--- a/datapath/linux/compat/include/net/tun_proto.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef __NET_TUN_PROTO_H
-#define __NET_TUN_PROTO_H
-
-#include <linux/kernel.h>
-
-/* One byte protocol values as defined by VXLAN-GPE and NSH. These will
- * hopefully get a shared IANA registry.
- */
-#define TUN_P_IPV4 0x01
-#define TUN_P_IPV6 0x02
-#define TUN_P_ETHERNET 0x03
-#define TUN_P_NSH 0x04
-#define TUN_P_MPLS_UC 0x05
-
-static inline __be16 tun_p_to_eth_p(u8 proto)
-{
- switch (proto) {
- case TUN_P_IPV4:
- return htons(ETH_P_IP);
- case TUN_P_IPV6:
- return htons(ETH_P_IPV6);
- case TUN_P_ETHERNET:
- return htons(ETH_P_TEB);
- case TUN_P_NSH:
- return htons(ETH_P_NSH);
- case TUN_P_MPLS_UC:
- return htons(ETH_P_MPLS_UC);
- }
- return 0;
-}
-
-static inline u8 tun_p_from_eth_p(__be16 proto)
-{
- switch (proto) {
- case htons(ETH_P_IP):
- return TUN_P_IPV4;
- case htons(ETH_P_IPV6):
- return TUN_P_IPV6;
- case htons(ETH_P_TEB):
- return TUN_P_ETHERNET;
- case htons(ETH_P_NSH):
- return TUN_P_NSH;
- case htons(ETH_P_MPLS_UC):
- return TUN_P_MPLS_UC;
- }
- return 0;
-}
-
-#endif
diff --git a/datapath/linux/compat/include/net/udp.h b/datapath/linux/compat/include/net/udp.h
deleted file mode 100644
index 447999218..000000000
--- a/datapath/linux/compat/include/net/udp.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef __NET_UDP_WRAPPER_H
-#define __NET_UDP_WRAPPER_H 1
-
-#include <net/ip.h>
-
-#ifdef inet_get_local_port_range
-/* Earlier RHEL7 kernels backport udp_flow_src_port() using an older version of
- * inet_get_local_port_range(). */
-#undef inet_get_local_port_range
-#include_next <net/udp.h>
-#define inet_get_local_port_range rpl_inet_get_local_port_range
-#else
-#include_next <net/udp.h>
-#endif
-
-#ifndef HAVE_UDP_FLOW_SRC_PORT
-static inline __be16 rpl_udp_flow_src_port(struct net *net, struct sk_buff *skb,
- int min, int max, bool use_eth)
-{
- u32 hash;
-
- if (min >= max) {
- /* Use default range */
- inet_get_local_port_range(net, &min, &max);
- }
-
- hash = skb_get_hash(skb);
- if (unlikely(!hash) && use_eth) {
- /* Can't find a normal hash, caller has indicated an Ethernet
- * packet so use that to compute a hash.
- */
- hash = jhash(skb->data, 2 * ETH_ALEN,
- (__force u32) skb->protocol);
- }
-
- /* Since this is being sent on the wire obfuscate hash a bit
- * to minimize possbility that any useful information to an
- * attacker is leaked. Only upper 16 bits are relevant in the
- * computation for 16 bit port value.
- */
- hash ^= hash << 16;
-
- return htons((((u64) hash * (max - min)) >> 32) + min);
-}
-
-#define udp_flow_src_port rpl_udp_flow_src_port
-#endif
-
-#ifndef HAVE_UDP_V4_CHECK
-static inline __sum16 udp_v4_check(int len, __be32 saddr,
- __be32 daddr, __wsum base)
-{
- return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base);
-}
-#endif
-
-#ifndef USE_UPSTREAM_TUNNEL
-#define udp_set_csum rpl_udp_set_csum
-void rpl_udp_set_csum(bool nocheck, struct sk_buff *skb,
- __be32 saddr, __be32 daddr, int len);
-#endif
-#endif
diff --git a/datapath/linux/compat/include/net/udp_tunnel.h b/datapath/linux/compat/include/net/udp_tunnel.h
deleted file mode 100644
index 6e4063359..000000000
--- a/datapath/linux/compat/include/net/udp_tunnel.h
+++ /dev/null
@@ -1,208 +0,0 @@
-#ifndef __NET_UDP_TUNNEL_WRAPPER_H
-#define __NET_UDP_TUNNEL_WRAPPER_H
-
-#include <linux/version.h>
-#include <linux/kconfig.h>
-
-#include <net/addrconf.h>
-#include <net/dst_metadata.h>
-#include <linux/netdev_features.h>
-
-#ifdef USE_UPSTREAM_TUNNEL
-#include_next <net/udp_tunnel.h>
-
-#else
-
-#include <net/addrconf.h>
-#include <net/ip_tunnels.h>
-#include <net/udp.h>
-
-struct udp_port_cfg {
- u8 family;
-
- /* Used only for kernel-created sockets */
- union {
- struct in_addr local_ip;
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr local_ip6;
-#endif
- };
-
- union {
- struct in_addr peer_ip;
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr peer_ip6;
-#endif
- };
-
- __be16 local_udp_port;
- __be16 peer_udp_port;
- unsigned int use_udp_checksums:1,
- use_udp6_tx_checksums:1,
- use_udp6_rx_checksums:1,
- ipv6_v6only:1;
-};
-
-#ifdef HAVE_NDO_UDP_TUNNEL_ADD
-enum udp_parsable_tunnel_type {
- UDP_TUNNEL_TYPE_VXLAN, /* RFC 7348 */
- UDP_TUNNEL_TYPE_GENEVE, /* draft-ietf-nvo3-geneve */
- UDP_TUNNEL_TYPE_VXLAN_GPE, /* draft-ietf-nvo3-vxlan-gpe */
-};
-
-struct udp_tunnel_info {
- unsigned short type;
- sa_family_t sa_family;
- __be16 port;
-};
-#endif
-
-#define udp_sock_create4 rpl_udp_sock_create4
-int rpl_udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
- struct socket **sockp);
-
-#define udp_sock_create6 rpl_udp_sock_create6
-#if IS_ENABLED(CONFIG_IPV6)
-int rpl_udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
- struct socket **sockp);
-#else
-static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
- struct socket **sockp)
-{
- return -EPFNOSUPPORT;
-}
-#endif
-
-#define udp_sock_create rpl_udp_sock_create
-static inline int udp_sock_create(struct net *net,
- struct udp_port_cfg *cfg,
- struct socket **sockp)
-{
- if (cfg->family == AF_INET)
- return udp_sock_create4(net, cfg, sockp);
-
- if (cfg->family == AF_INET6)
- return udp_sock_create6(net, cfg, sockp);
-
- return -EPFNOSUPPORT;
-}
-
-typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
-typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
-typedef struct sk_buff **(*udp_tunnel_gro_receive_t)(struct sock *sk,
- struct sk_buff **head,
- struct sk_buff *skb);
-typedef int (*udp_tunnel_gro_complete_t)(struct sock *sk, struct sk_buff *skb,
- int nhoff);
-
-struct udp_tunnel_sock_cfg {
- void *sk_user_data; /* user data used by encap_rcv call back */
- /* Used for setting up udp_sock fields, see udp.h for details */
- __u8 encap_type;
- udp_tunnel_encap_rcv_t encap_rcv;
- udp_tunnel_encap_destroy_t encap_destroy;
-#ifdef HAVE_UDP_TUNNEL_SOCK_CFG_GRO_RECEIVE
- udp_tunnel_gro_receive_t gro_receive;
- udp_tunnel_gro_complete_t gro_complete;
-#endif
-};
-
-/* Setup the given (UDP) sock to receive UDP encapsulated packets */
-#define setup_udp_tunnel_sock rpl_setup_udp_tunnel_sock
-void rpl_setup_udp_tunnel_sock(struct net *net, struct socket *sock,
- struct udp_tunnel_sock_cfg *sock_cfg);
-
-/* Transmit the skb using UDP encapsulation. */
-#define udp_tunnel_xmit_skb rpl_udp_tunnel_xmit_skb
-void rpl_udp_tunnel_xmit_skb(struct rtable *rt,
- struct sock *sk, struct sk_buff *skb,
- __be32 src, __be32 dst, __u8 tos, __u8 ttl,
- __be16 df, __be16 src_port, __be16 dst_port,
- bool xnet, bool nocheck);
-
-
-#define udp_tunnel_sock_release rpl_udp_tunnel_sock_release
-void rpl_udp_tunnel_sock_release(struct socket *sock);
-
-#define udp_tunnel_encap_enable rpl_udp_tunnel_encap_enable
-static inline void udp_tunnel_encap_enable(struct socket *sock)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sock->sk->sk_family == PF_INET6)
-#ifdef HAVE_IPV6_STUB
- ipv6_stub->udpv6_encap_enable();
-#else
- udpv6_encap_enable();
-#endif
- else
-#endif
- udp_encap_enable();
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-#define udp_tunnel6_xmit_skb rpl_udp_tunnel6_xmit_skb
-int rpl_udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb,
- struct net_device *dev, struct in6_addr *saddr,
- struct in6_addr *daddr,
- __u8 prio, __u8 ttl, __be32 label, __be16 src_port,
- __be16 dst_port, bool nocheck);
-#endif
-
-static inline void udp_tunnel_gro_complete(struct sk_buff *skb, int nhoff)
-{
- struct udphdr *uh;
-
- uh = (struct udphdr *)(skb->data + nhoff - sizeof(struct udphdr));
- skb_shinfo(skb)->gso_type |= uh->check ?
- SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
-}
-
-void ovs_udp_gso(struct sk_buff *skb);
-void ovs_udp_csum_gso(struct sk_buff *skb);
-
-static inline int rpl_udp_tunnel_handle_offloads(struct sk_buff *skb,
- bool udp_csum)
-{
- void (*fix_segment)(struct sk_buff *);
- int type = 0;
-
- type |= udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
-#ifndef USE_UPSTREAM_TUNNEL_GSO
- if (!udp_csum)
- fix_segment = ovs_udp_gso;
- else
- fix_segment = ovs_udp_csum_gso;
- /* This functuin is not used by vxlan lan tunnel. On older
- * udp offload only supports vxlan, therefore fallback to software
- * segmentation.
- */
- type = 0;
-#else
- fix_segment = NULL;
-#endif
-
- return ovs_iptunnel_handle_offloads(skb, type, fix_segment);
-}
-
-#define udp_tunnel_handle_offloads rpl_udp_tunnel_handle_offloads
-static inline void ovs_udp_tun_rx_dst(struct metadata_dst *md_dst,
- struct sk_buff *skb,
- unsigned short family,
- __be16 flags, __be64 tunnel_id, int md_size)
-{
- struct ip_tunnel_info *info = &md_dst->u.tun_info;
-
- if (family == AF_INET)
- ovs_ip_tun_rx_dst(md_dst, skb, flags, tunnel_id, md_size);
- else
- ovs_ipv6_tun_rx_dst(md_dst, skb, flags, tunnel_id, md_size);
-
- info->key.tp_src = udp_hdr(skb)->source;
- info->key.tp_dst = udp_hdr(skb)->dest;
- if (udp_hdr(skb)->check)
- info->key.tun_flags |= TUNNEL_CSUM;
-}
-#endif /* USE_UPSTREAM_TUNNEL */
-
-#endif
diff --git a/datapath/linux/compat/include/net/vrf.h b/datapath/linux/compat/include/net/vrf.h
deleted file mode 100644
index f5b6e8900..000000000
--- a/datapath/linux/compat/include/net/vrf.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * include/net/net_vrf.h - adds vrf dev structure definitions
- * Copyright (c) 2015 Cumulus Networks
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __LINUX_NET_VRF_WRAPPER_H
-#define __LINUX_NET_VRF_WRAPPER_H
-
-#include <linux/version.h>
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)
-#include_next <net/vrf.h>
-#else
-
-static inline int vrf_master_ifindex_rcu(const struct net_device *dev)
-{
- return 0;
-}
-#endif
-
-#endif /* __LINUX_NET_VRF_WRAPPER_H */
diff --git a/datapath/linux/compat/include/net/vxlan.h b/datapath/linux/compat/include/net/vxlan.h
deleted file mode 100644
index 18f5474d9..000000000
--- a/datapath/linux/compat/include/net/vxlan.h
+++ /dev/null
@@ -1,444 +0,0 @@
-#ifndef __NET_VXLAN_WRAPPER_H
-#define __NET_VXLAN_WRAPPER_H 1
-
-#ifdef CONFIG_INET
-#include <net/udp_tunnel.h>
-#endif
-
-#ifdef USE_UPSTREAM_TUNNEL
-#include_next <net/vxlan.h>
-
-static inline int rpl_vxlan_init_module(void)
-{
- return 0;
-}
-static inline void rpl_vxlan_cleanup_module(void)
-{}
-
-#define vxlan_xmit dev_queue_xmit
-
-#ifdef CONFIG_INET
-#ifndef HAVE_NAME_ASSIGN_TYPE
-static inline struct net_device *rpl_vxlan_dev_create(
- struct net *net, const char *name, u8 name_assign_type,
- struct vxlan_config *conf) {
- return vxlan_dev_create(net, name, conf);
-}
-#define vxlan_dev_create rpl_vxlan_dev_create
-#endif
-#endif
-
-#else /* USE_UPSTREAM_TUNNEL */
-
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/if_vlan.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/udp.h>
-#include <net/dst_cache.h>
-#include <net/dst_metadata.h>
-
-#include "compat.h"
-#include "gso.h"
-
-/* VXLAN protocol (RFC 7348) header:
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |R|R|R|R|I|R|R|R| Reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | VXLAN Network Identifier (VNI) | Reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * I = VXLAN Network Identifier (VNI) present.
- */
-struct vxlanhdr {
- __be32 vx_flags;
- __be32 vx_vni;
-};
-
-/* VXLAN header flags. */
-#define VXLAN_HF_VNI cpu_to_be32(BIT(27))
-
-#define VXLAN_N_VID (1u << 24)
-#define VXLAN_VID_MASK (VXLAN_N_VID - 1)
-#define VXLAN_VNI_MASK cpu_to_be32(VXLAN_VID_MASK << 8)
-#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
-
-#define VNI_HASH_BITS 10
-#define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
-#define FDB_HASH_BITS 8
-#define FDB_HASH_SIZE (1<<FDB_HASH_BITS)
-
-/* Remote checksum offload for VXLAN (VXLAN_F_REMCSUM_[RT]X):
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |R|R|R|R|I|R|R|R|R|R|C| Reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | VXLAN Network Identifier (VNI) |O| Csum start |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * C = Remote checksum offload bit. When set indicates that the
- * remote checksum offload data is present.
- *
- * O = Offset bit. Indicates the checksum offset relative to
- * checksum start.
- *
- * Csum start = Checksum start divided by two.
- *
- * http://tools.ietf.org/html/draft-herbert-vxlan-rco
- */
-
-/* VXLAN-RCO header flags. */
-#define VXLAN_HF_RCO cpu_to_be32(BIT(21))
-
-/* Remote checksum offload header option */
-#define VXLAN_RCO_MASK cpu_to_be32(0x7f) /* Last byte of vni field */
-#define VXLAN_RCO_UDP cpu_to_be32(0x80) /* Indicate UDP RCO (TCP when not set *) */
-#define VXLAN_RCO_SHIFT 1 /* Left shift of start */
-#define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1)
-#define VXLAN_MAX_REMCSUM_START (0x7f << VXLAN_RCO_SHIFT)
-
-/*
- * VXLAN Group Based Policy Extension (VXLAN_F_GBP):
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |G|R|R|R|I|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | VXLAN Network Identifier (VNI) | Reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * G = Group Policy ID present.
- *
- * D = Don't Learn bit. When set, this bit indicates that the egress
- * VTEP MUST NOT learn the source address of the encapsulated frame.
- *
- * A = Indicates that the group policy has already been applied to
- * this packet. Policies MUST NOT be applied by devices when the
- * A bit is set.
- *
- * https://tools.ietf.org/html/draft-smith-vxlan-group-policy
- */
-struct vxlanhdr_gbp {
- u8 vx_flags;
-#ifdef __LITTLE_ENDIAN_BITFIELD
- u8 reserved_flags1:3,
- policy_applied:1,
- reserved_flags2:2,
- dont_learn:1,
- reserved_flags3:1;
-#elif defined(__BIG_ENDIAN_BITFIELD)
- u8 reserved_flags1:1,
- dont_learn:1,
- reserved_flags2:2,
- policy_applied:1,
- reserved_flags3:3;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
- __be16 policy_id;
- __be32 vx_vni;
-};
-
-/* VXLAN-GBP header flags. */
-#define VXLAN_HF_GBP cpu_to_be32(BIT(31))
-
-#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | cpu_to_be32(0xFFFFFF))
-
-/* skb->mark mapping
- *
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- */
-#define VXLAN_GBP_DONT_LEARN (BIT(6) << 16)
-#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16)
-#define VXLAN_GBP_ID_MASK (0xFFFF)
-
-/*
- * VXLAN Generic Protocol Extension (VXLAN_F_GPE):
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |R|R|Ver|I|P|R|O| Reserved |Next Protocol |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | VXLAN Network Identifier (VNI) | Reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * Ver = Version. Indicates VXLAN GPE protocol version.
- *
- * P = Next Protocol Bit. The P bit is set to indicate that the
- * Next Protocol field is present.
- *
- * O = OAM Flag Bit. The O bit is set to indicate that the packet
- * is an OAM packet.
- *
- * Next Protocol = This 8 bit field indicates the protocol header
- * immediately following the VXLAN GPE header.
- *
- * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01
- */
-
-struct vxlanhdr_gpe {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- u8 oam_flag:1,
- reserved_flags1:1,
- np_applied:1,
- instance_applied:1,
- version:2,
-reserved_flags2:2;
-#elif defined(__BIG_ENDIAN_BITFIELD)
- u8 reserved_flags2:2,
- version:2,
- instance_applied:1,
- np_applied:1,
- reserved_flags1:1,
- oam_flag:1;
-#endif
- u8 reserved_flags3;
- u8 reserved_flags4;
- u8 next_protocol;
- __be32 vx_vni;
-};
-
-/* VXLAN-GPE header flags. */
-#define VXLAN_HF_VER cpu_to_be32(BIT(29) | BIT(28))
-#define VXLAN_HF_NP cpu_to_be32(BIT(26))
-#define VXLAN_HF_OAM cpu_to_be32(BIT(24))
-
-#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \
- cpu_to_be32(0xff))
-
-struct vxlan_metadata {
- u32 gbp;
-};
-
-/* per UDP socket information */
-struct vxlan_sock {
- struct hlist_node hlist;
- struct socket *sock;
- struct hlist_head vni_list[VNI_HASH_SIZE];
- atomic_t refcnt;
- u32 flags;
-#ifdef HAVE_UDP_OFFLOAD
- struct udp_offload udp_offloads;
-#endif
-};
-
-union vxlan_addr {
- struct sockaddr_in sin;
- struct sockaddr_in6 sin6;
- struct sockaddr sa;
-};
-
-struct vxlan_rdst {
- union vxlan_addr remote_ip;
- __be16 remote_port;
- __be32 remote_vni;
- u32 remote_ifindex;
- struct list_head list;
- struct rcu_head rcu;
- struct dst_cache dst_cache;
-};
-
-struct vxlan_config {
- union vxlan_addr remote_ip;
- union vxlan_addr saddr;
- __be32 vni;
- int remote_ifindex;
- int mtu;
- __be16 dst_port;
- u16 port_min;
- u16 port_max;
- u8 tos;
- u8 ttl;
- __be32 label;
- u32 flags;
- unsigned long age_interval;
- unsigned int addrmax;
- bool no_share;
-};
-
-/* Pseudo network device */
-struct vxlan_dev {
- struct hlist_node hlist; /* vni hash table */
- struct list_head next; /* vxlan's per namespace list */
- struct vxlan_sock __rcu *vn4_sock; /* listening socket for IPv4 */
-#if IS_ENABLED(CONFIG_IPV6)
- struct vxlan_sock __rcu *vn6_sock; /* listening socket for IPv6 */
-#endif
- struct net_device *dev;
- struct net *net; /* netns for packet i/o */
- struct vxlan_rdst default_dst; /* default destination */
- u32 flags; /* VXLAN_F_* in vxlan.h */
-
- struct timer_list age_timer;
- spinlock_t hash_lock;
- unsigned int addrcnt;
-
- struct vxlan_config cfg;
-
- struct hlist_head fdb_head[FDB_HASH_SIZE];
-};
-
-#define VXLAN_F_LEARN 0x01
-#define VXLAN_F_PROXY 0x02
-#define VXLAN_F_RSC 0x04
-#define VXLAN_F_L2MISS 0x08
-#define VXLAN_F_L3MISS 0x10
-#define VXLAN_F_IPV6 0x20
-#define VXLAN_F_UDP_ZERO_CSUM_TX 0x40
-#define VXLAN_F_UDP_ZERO_CSUM6_TX 0x80
-#define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100
-#define VXLAN_F_REMCSUM_TX 0x200
-#define VXLAN_F_REMCSUM_RX 0x400
-#define VXLAN_F_GBP 0x800
-#define VXLAN_F_REMCSUM_NOPARTIAL 0x1000
-#define VXLAN_F_COLLECT_METADATA 0x2000
-#define VXLAN_F_GPE 0x4000
-
-/* Flags that are used in the receive path. These flags must match in
- * order for a socket to be shareable
- */
-#define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \
- VXLAN_F_GPE | \
- VXLAN_F_UDP_ZERO_CSUM6_RX | \
- VXLAN_F_REMCSUM_RX | \
- VXLAN_F_REMCSUM_NOPARTIAL | \
- VXLAN_F_COLLECT_METADATA)
-
-/* Flags that can be set together with VXLAN_F_GPE. */
-#define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \
- VXLAN_F_IPV6 | \
- VXLAN_F_UDP_ZERO_CSUM_TX | \
- VXLAN_F_UDP_ZERO_CSUM6_TX | \
- VXLAN_F_UDP_ZERO_CSUM6_RX | \
- VXLAN_F_COLLECT_METADATA)
-
-#define vxlan_dev_create rpl_vxlan_dev_create
-struct net_device *rpl_vxlan_dev_create(struct net *net, const char *name,
- u8 name_assign_type, struct vxlan_config *conf);
-
-static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
- netdev_features_t features)
-{
- u8 l4_hdr = 0;
-
- if (!skb->encapsulation)
- return features;
-
- switch (vlan_get_protocol(skb)) {
- case htons(ETH_P_IP):
- l4_hdr = ip_hdr(skb)->protocol;
- break;
- case htons(ETH_P_IPV6):
- l4_hdr = ipv6_hdr(skb)->nexthdr;
- break;
- default:
- return features;;
- }
-
- if ((l4_hdr == IPPROTO_UDP) && (
-#ifdef HAVE_INNER_PROTOCOL_TYPE
- skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
-#endif
-#ifdef HAVE_INNER_PROTOCOL
- skb->inner_protocol != htons(ETH_P_TEB) ||
-#endif
- (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
- sizeof(struct udphdr) + sizeof(struct vxlanhdr)) ||
- (skb->ip_summed != CHECKSUM_NONE &&
- !can_checksum_protocol(features, inner_eth_hdr(skb)->h_proto))))
- return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
-
- return features;
-}
-
-/* IP header + UDP + VXLAN + Ethernet header */
-#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
-/* IPv6 header + UDP + VXLAN + Ethernet header */
-#define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
-
-static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb)
-{
- return (struct vxlanhdr *)(udp_hdr(skb) + 1);
-}
-
-static inline __be32 vxlan_vni(__be32 vni_field)
-{
-#if defined(__BIG_ENDIAN)
- return (__force __be32)((__force u32)vni_field >> 8);
-#else
- return (__force __be32)((__force u32)(vni_field & VXLAN_VNI_MASK) << 8);
-#endif
-}
-
-static inline __be32 vxlan_vni_field(__be32 vni)
-{
-#if defined(__BIG_ENDIAN)
- return (__force __be32)((__force u32)vni << 8);
-#else
- return (__force __be32)((__force u32)vni >> 8);
-#endif
-}
-
-static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id)
-{
-#if defined(__BIG_ENDIAN)
- return (__force __be32)tun_id;
-#else
- return (__force __be32)((__force u64)tun_id >> 32);
-#endif
-}
-
-static inline __be64 vxlan_vni_to_tun_id(__be32 vni)
-{
-#if defined(__BIG_ENDIAN)
- return (__force __be64)vni;
-#else
- return (__force __be64)((u64)(__force u32)vni << 32);
-#endif
-}
-
-static inline size_t vxlan_rco_start(__be32 vni_field)
-{
- return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
-}
-
-static inline size_t vxlan_rco_offset(__be32 vni_field)
-{
- return (vni_field & VXLAN_RCO_UDP) ?
- offsetof(struct udphdr, check) :
- offsetof(struct tcphdr, check);
-}
-
-static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset)
-{
- __be32 vni_field = cpu_to_be32(start >> VXLAN_RCO_SHIFT);
-
- if (offset == offsetof(struct udphdr, check))
- vni_field |= VXLAN_RCO_UDP;
- return vni_field;
-}
-
-static inline void vxlan_get_rx_port(struct net_device *netdev)
-{
- ASSERT_RTNL();
- call_netdevice_notifiers(NETDEV_OFFLOAD_PUSH_VXLAN, netdev);
-}
-
-static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
-{
- return vs->sock->sk->sk_family;
-}
-
-int rpl_vxlan_init_module(void);
-void rpl_vxlan_cleanup_module(void);
-
-#define vxlan_fill_metadata_dst ovs_vxlan_fill_metadata_dst
-int ovs_vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
-
-#define vxlan_xmit rpl_vxlan_xmit
-netdev_tx_t rpl_vxlan_xmit(struct sk_buff *skb);
-
-#endif /* USE_UPSTREAM_TUNNEL */
-
-#define vxlan_init_module rpl_vxlan_init_module
-#define vxlan_cleanup_module rpl_vxlan_cleanup_module
-
-#endif
diff --git a/datapath/linux/compat/include/uapi/linux/netfilter.h b/datapath/linux/compat/include/uapi/linux/netfilter.h
deleted file mode 100644
index 56895b17b..000000000
--- a/datapath/linux/compat/include/uapi/linux/netfilter.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _NETFILTER_WRAPPER_H
-#define _NETFILTER_WRAPPER_H
-
-#include_next <uapi/linux/netfilter.h>
-
-/*
- * NFPROTO_INET was introduced in net-next commit 1d49144c0aaa
- * ("netfilter: nf_tables: add "inet" table for IPv4/IPv6") in v3.14.
- * Define this symbol to support back to v3.10 kernel. */
-#ifndef HAVE_NFPROTO_INET
-#define NFPROTO_INET 1
-#endif
-
-#endif /* _NETFILTER_WRAPPER_H */
diff --git a/datapath/linux/compat/inet_fragment.c b/datapath/linux/compat/inet_fragment.c
deleted file mode 100644
index 21736e61a..000000000
--- a/datapath/linux/compat/inet_fragment.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * inet fragments management
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Authors: Pavel Emelyanov <xemul@openvz.org>
- * Started as consolidation of ipv4/ip_fragment.c,
- * ipv6/reassembly. and ipv6 nf conntrack reassembly
- */
-
-#ifndef HAVE_CORRECT_MRU_HANDLING
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/slab.h>
-
-#include <net/sock.h>
-#include <net/inet_frag.h>
-#include <net/inet_ecn.h>
-
-
-#endif /* !HAVE_CORRECT_MRU_HANDLING */
diff --git a/datapath/linux/compat/ip6_gre.c b/datapath/linux/compat/ip6_gre.c
deleted file mode 100644
index 3aa9844b3..000000000
--- a/datapath/linux/compat/ip6_gre.c
+++ /dev/null
@@ -1,2746 +0,0 @@
-/*
- * GRE over IPv6 protocol decoder.
- *
- * Authors: Dmitry Kozlov (xeb@mail.ru)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#ifndef USE_UPSTREAM_TUNNEL
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/in.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/if_arp.h>
-#include <linux/init.h>
-#include <linux/in6.h>
-#include <linux/inetdevice.h>
-#include <linux/igmp.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/etherdevice.h>
-#include <linux/if_ether.h>
-#include <linux/hash.h>
-#include <linux/if_tunnel.h>
-#include <linux/ip6_tunnel.h>
-
-#include <net/sock.h>
-#include <net/ip.h>
-#include <net/ip_tunnels.h>
-#include <net/icmp.h>
-#include <net/protocol.h>
-#include <net/addrconf.h>
-#include <net/arp.h>
-#include <net/checksum.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <net/xfrm.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/rtnetlink.h>
-
-#include <net/ipv6.h>
-#include <net/ip6_fib.h>
-#include <net/ip6_route.h>
-#include <net/ip6_tunnel.h>
-#include <net/gre.h>
-#include <net/erspan.h>
-#include <net/dst_metadata.h>
-
-#include "vport-netdev.h"
-
-#define IP6_GRE_HASH_SIZE_SHIFT 5
-#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
-
-static unsigned int ip6gre_net_id __read_mostly;
-static bool ip6_gre_loaded = false;
-struct ip6gre_net {
- struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
-
- struct ip6_tnl __rcu *collect_md_tun;
- struct ip6_tnl __rcu *collect_md_tun_erspan;
- struct net_device *fb_tunnel_dev;
-};
-
-static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
-static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
-static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly;
-static int ip6gre_tunnel_init(struct net_device *dev);
-static void ip6gre_tunnel_setup(struct net_device *dev);
-static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
-static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
-static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu);
-
-/* Tunnel hash table */
-
-/*
- 4 hash tables:
-
- 3: (remote,local)
- 2: (remote,*)
- 1: (*,local)
- 0: (*,*)
-
- We require exact key match i.e. if a key is present in packet
- it will match only tunnel with the same key; if it is not present,
- it will match only keyless tunnel.
-
- All keysless packets, if not matched configured keyless tunnels
- will match fallback tunnel.
- */
-
-#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1))
-static u32 HASH_ADDR(const struct in6_addr *addr)
-{
- u32 hash = ipv6_addr_hash(addr);
-
- return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT);
-}
-
-#define tunnels_r_l tunnels[3]
-#define tunnels_r tunnels[2]
-#define tunnels_l tunnels[1]
-#define tunnels_wc tunnels[0]
-
-/* Given src, dst and key, find appropriate for input tunnel. */
-
-static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
- const struct in6_addr *remote, const struct in6_addr *local,
- __be32 key, __be16 gre_proto)
-{
- struct net *net = dev_net(dev);
- int link = dev->ifindex;
- unsigned int h0 = HASH_ADDR(remote);
- unsigned int h1 = HASH_KEY(key);
- struct ip6_tnl *t, *cand = NULL;
- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
- int dev_type = (gre_proto == htons(ETH_P_TEB) ||
- gre_proto == htons(ETH_P_ERSPAN) ||
- gre_proto == htons(ETH_P_ERSPAN2)) ?
- ARPHRD_ETHER : ARPHRD_IP6GRE;
- int score, cand_score = 4;
-
- for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
- if (!ipv6_addr_equal(local, &t->parms.laddr) ||
- !ipv6_addr_equal(remote, &t->parms.raddr) ||
- key != t->parms.i_key ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (t->dev->type != ARPHRD_IP6GRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
- }
-
- for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
- if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
- key != t->parms.i_key ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (t->dev->type != ARPHRD_IP6GRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
- }
-
- for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
- if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
- (!ipv6_addr_equal(local, &t->parms.raddr) ||
- !ipv6_addr_is_multicast(local))) ||
- key != t->parms.i_key ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (t->dev->type != ARPHRD_IP6GRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
- }
-
- for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
- if (t->parms.i_key != key ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (t->dev->type != ARPHRD_IP6GRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
- }
-
- if (cand)
- return cand;
-
- if (gre_proto == htons(ETH_P_ERSPAN) ||
- gre_proto == htons(ETH_P_ERSPAN2))
- t = rcu_dereference(ign->collect_md_tun_erspan);
- else
- t = rcu_dereference(ign->collect_md_tun);
-
- if (t && t->dev->flags & IFF_UP)
- return t;
-
- dev = ign->fb_tunnel_dev;
- if (dev->flags & IFF_UP)
- return netdev_priv(dev);
-
- return NULL;
-}
-
-static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
- const struct __ip6_tnl_parm *p)
-{
- const struct in6_addr *remote = &p->raddr;
- const struct in6_addr *local = &p->laddr;
- unsigned int h = HASH_KEY(p->i_key);
- int prio = 0;
-
- if (!ipv6_addr_any(local))
- prio |= 1;
- if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) {
- prio |= 2;
- h ^= HASH_ADDR(remote);
- }
-
- return &ign->tunnels[prio][h];
-}
-
-static void ip6gre_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
-{
- if (t->parms.collect_md)
- rcu_assign_pointer(ign->collect_md_tun, t);
-}
-
-static void ip6erspan_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
-{
- if (t->parms.collect_md)
- rcu_assign_pointer(ign->collect_md_tun_erspan, t);
-}
-
-static void ip6gre_tunnel_unlink_md(struct ip6gre_net *ign, struct ip6_tnl *t)
-{
- if (t->parms.collect_md)
- rcu_assign_pointer(ign->collect_md_tun, NULL);
-}
-
-static void ip6erspan_tunnel_unlink_md(struct ip6gre_net *ign,
- struct ip6_tnl *t)
-{
- if (t->parms.collect_md)
- rcu_assign_pointer(ign->collect_md_tun_erspan, NULL);
-}
-
-static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
- const struct ip6_tnl *t)
-{
- return __ip6gre_bucket(ign, &t->parms);
-}
-
-static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
-{
- struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
-
- rcu_assign_pointer(t->next, rtnl_dereference(*tp));
- rcu_assign_pointer(*tp, t);
-}
-
-static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
-{
- struct ip6_tnl __rcu **tp;
- struct ip6_tnl *iter;
-
- for (tp = ip6gre_bucket(ign, t);
- (iter = rtnl_dereference(*tp)) != NULL;
- tp = &iter->next) {
- if (t == iter) {
- rcu_assign_pointer(*tp, t->next);
- break;
- }
- }
-}
-
-static struct ip6_tnl *ip6gre_tunnel_find(struct net *net,
- const struct __ip6_tnl_parm *parms,
- int type)
-{
- const struct in6_addr *remote = &parms->raddr;
- const struct in6_addr *local = &parms->laddr;
- __be32 key = parms->i_key;
- int link = parms->link;
- struct ip6_tnl *t;
- struct ip6_tnl __rcu **tp;
- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
-
- for (tp = __ip6gre_bucket(ign, parms);
- (t = rtnl_dereference(*tp)) != NULL;
- tp = &t->next)
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr) &&
- key == t->parms.i_key &&
- link == t->parms.link &&
- type == t->dev->type)
- break;
-
- return t;
-}
-
-static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
- const struct __ip6_tnl_parm *parms, int create)
-{
- struct ip6_tnl *t, *nt;
- struct net_device *dev;
- char name[IFNAMSIZ];
- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
-
- t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
- if (t && create)
- return NULL;
- if (t || !create)
- return t;
-
- if (parms->name[0])
- strlcpy(name, parms->name, IFNAMSIZ);
- else
- strlcpy(name, "ovs-ip6gre%d", IFNAMSIZ);
-
- dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
- ip6gre_tunnel_setup);
- if (!dev)
- return NULL;
-
- dev_net_set(dev, net);
-
- nt = netdev_priv(dev);
- nt->parms = *parms;
- dev->rtnl_link_ops = &ip6gre_link_ops;
-
- nt->dev = dev;
- nt->net = dev_net(dev);
-
- if (register_netdevice(dev) < 0)
- goto failed_free;
-
- ip6gre_tnl_link_config(nt, 1);
-
- /* Can use a lockless transmit, unless we generate output sequences */
- if (!(nt->parms.o_flags & TUNNEL_SEQ))
- dev->features |= NETIF_F_LLTX;
-
- dev_hold(dev);
- ip6gre_tunnel_link(ign, nt);
- return nt;
-
-failed_free:
- free_netdev(dev);
- return NULL;
-}
-
-static void ip6erspan_tunnel_uninit(struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
-
- ip6erspan_tunnel_unlink_md(ign, t);
- ip6gre_tunnel_unlink(ign, t);
- dst_cache_reset(&t->dst_cache);
- dev_put(dev);
-}
-
-static void ip6gre_tunnel_uninit(struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
-
- ip6gre_tunnel_unlink_md(ign, t);
- ip6gre_tunnel_unlink(ign, t);
- dst_cache_reset(&t->dst_cache);
- dev_put(dev);
-}
-
-
-static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
-{
-#if 0
- struct net *net = dev_net(skb->dev);
- const struct gre_base_hdr *greh;
- const struct ipv6hdr *ipv6h;
- int grehlen = sizeof(*greh);
- struct ip6_tnl *t;
- int key_off = 0;
- __be16 flags;
- __be32 key;
-
- if (!pskb_may_pull(skb, offset + grehlen))
- return;
- greh = (const struct gre_base_hdr *)(skb->data + offset);
- flags = greh->flags;
- if (flags & (GRE_VERSION | GRE_ROUTING))
- return;
- if (flags & GRE_CSUM)
- grehlen += 4;
- if (flags & GRE_KEY) {
- key_off = grehlen + offset;
- grehlen += 4;
- }
-
- if (!pskb_may_pull(skb, offset + grehlen))
- return;
- ipv6h = (const struct ipv6hdr *)skb->data;
- greh = (const struct gre_base_hdr *)(skb->data + offset);
- key = key_off ? *(__be32 *)(skb->data + key_off) : 0;
-
- t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
- key, greh->protocol);
- if (!t)
- return;
-
- switch (type) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 teli;
- case ICMPV6_DEST_UNREACH:
- net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
- t->parms.name);
- if (code != ICMPV6_PORT_UNREACH)
- break;
- return;
- case ICMPV6_TIME_EXCEED:
- if (code == ICMPV6_EXC_HOPLIMIT) {
- net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
- t->parms.name);
- break;
- }
- return;
- case ICMPV6_PARAMPROB:
- teli = 0;
- if (code == ICMPV6_HDR_FIELD)
- teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
-
- if (teli && teli == be32_to_cpu(info) - 2) {
- tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
- if (tel->encap_limit == 0) {
- net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
- t->parms.name);
- }
- } else {
- net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
- t->parms.name);
- }
- return;
- case ICMPV6_PKT_TOOBIG:
- ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
- return;
- case NDISC_REDIRECT:
- ip6_redirect(skb, net, skb->dev->ifindex, 0,
- sock_net_uid(net, NULL));
- return;
- }
-
- if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
- t->err_count++;
- else
- t->err_count = 1;
- t->err_time = jiffies;
-#endif
-}
-
-static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
-{
- const struct ipv6hdr *ipv6h;
- struct ip6_tnl *tunnel;
-
- ipv6h = ipv6_hdr(skb);
- tunnel = ip6gre_tunnel_lookup(skb->dev,
- &ipv6h->saddr, &ipv6h->daddr, tpi->key,
- tpi->proto);
- if (tunnel) {
- struct metadata_dst *tun_dst = NULL;
- if (tunnel->parms.collect_md) {
- __be64 tun_id;
- __be16 flags;
-
- flags = tpi->flags;
- tun_id = key32_to_tunnel_id(tpi->key);
-
- tun_dst = rpl_ipv6_tun_rx_dst(skb, flags, tun_id, 0);
- if (!tun_dst)
- return PACKET_REJECT;
-
- }
-
- ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, false);
- kfree(tun_dst);
- return PACKET_RCVD;
- }
-
- return PACKET_RCVD;
-}
-
-static int ip6erspan_rcv(struct sk_buff *skb,
- struct tnl_ptk_info *tpi,
- int gre_hdr_len)
-{
- struct erspan_base_hdr *ershdr;
- const struct ipv6hdr *ipv6h;
- struct erspan_md2 *md2;
- struct ip6_tnl *tunnel;
- u8 ver;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
- return PACKET_REJECT;
-
- ipv6h = ipv6_hdr(skb);
- ershdr = (struct erspan_base_hdr *)skb->data;
- ver = ershdr->ver;
- tpi->key = cpu_to_be32(get_session_id(ershdr));
-
- tunnel = ip6gre_tunnel_lookup(skb->dev,
- &ipv6h->saddr, &ipv6h->daddr, 0,
- tpi->proto);
- if (tunnel) {
- struct metadata_dst *tun_dst = NULL;
- int len = erspan_hdr_len(ver);
-
- if (unlikely(!pskb_may_pull(skb, len)))
- return PACKET_REJECT;
-
- if (__iptunnel_pull_header(skb, len,
- htons(ETH_P_TEB),
- false, false) < 0)
- return PACKET_REJECT;
-
- if (tunnel->parms.collect_md) {
- struct erspan_metadata *pkt_md, *md;
- struct ip_tunnel_info *info;
- unsigned char *gh;
- __be64 tun_id;
- __be16 flags;
-
- tpi->flags |= TUNNEL_KEY;
- flags = tpi->flags;
- tun_id = key32_to_tunnel_id(tpi->key);
-
- tun_dst = rpl_ipv6_tun_rx_dst(skb, flags, tun_id,
- sizeof(*md));
- if (!tun_dst)
- return PACKET_REJECT;
-
- /* skb can be uncloned in __iptunnel_pull_header, so
- * old pkt_md is no longer valid and we need to reset
- * it
- */
- gh = skb_network_header(skb) +
- skb_network_header_len(skb);
- pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
- sizeof(*ershdr));
- info = &tun_dst->u.tun_info;
- md = ip_tunnel_info_opts(info);
- md->version = ver;
- md2 = &md->u.md2;
- memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
- ERSPAN_V2_MDSIZE);
- info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
- info->options_len = sizeof(*md);
- }
-
- ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, false);
- kfree(tun_dst);
- return PACKET_RCVD;
- }
-
- kfree(skb);
- return PACKET_RCVD;
-}
-
-static int gre_rcv(struct sk_buff *skb)
-{
- struct tnl_ptk_info tpi;
- bool csum_err = false;
- int hdr_len;
-
- hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0);
- if (hdr_len < 0)
- goto drop;
-
- if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
- goto drop;
-
- if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
- tpi.proto == htons(ETH_P_ERSPAN2))) {
- if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
- return 0;
- goto out;
- }
-
- if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
- return 0;
-
-out:
- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
-drop:
- kfree_skb(skb);
- return 0;
-}
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
-#include "gso.h"
-/* gre_handle_offloads() has different return type on older kernsl. */
-static void gre_nop_fix(struct sk_buff *skb) { }
-
-static void gre_csum_fix(struct sk_buff *skb)
-{
- struct gre_base_hdr *greh;
- __be32 *options;
- int gre_offset = skb_transport_offset(skb);
-
- greh = (struct gre_base_hdr *)skb_transport_header(skb);
- options = ((__be32 *)greh + 1);
-
- *options = 0;
- *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset,
- skb->len - gre_offset, 0));
-}
-
-#define gre_handle_offloads rpl_gre_handle_offloads
-static int rpl_gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
-{
- int type = gre_csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE;
- gso_fix_segment_t fix_segment;
-
- if (gre_csum)
- fix_segment = gre_csum_fix;
- else
- fix_segment = gre_nop_fix;
-
- return ovs_iptunnel_handle_offloads(skb, type, fix_segment);
-}
-#else
-static int gre_handle_offloads(struct sk_buff *skb, bool csum)
-{
- return iptunnel_handle_offloads(skb, csum,
- csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
-}
-#endif
-
-static void prepare_ip6gre_xmit_ipv4(struct sk_buff *skb,
- struct net_device *dev,
- struct flowi6 *fl6, __u8 *dsfield,
- int *encap_limit)
-{
- const struct iphdr *iph = ip_hdr(skb);
- struct ip6_tnl *t = netdev_priv(dev);
-
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- *encap_limit = t->parms.encap_limit;
-
- memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- *dsfield = ipv4_get_dsfield(iph);
- else
- *dsfield = ip6_tclass(t->parms.flowinfo);
-
-#ifndef IP6_TNL_F_USE_ORIG_FWMARK
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6->flowi6_mark = skb->mark;
- else
- fl6->flowi6_mark = t->parms.fwmark;
-
- fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
-#endif
-}
-
-static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
- struct net_device *dev,
- struct flowi6 *fl6, __u8 *dsfield,
- int *encap_limit)
-{
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- struct ip6_tnl *t = netdev_priv(dev);
- __u16 offset;
-
- offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
- /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
-
- if (offset > 0) {
- struct ipv6_tlv_tnl_enc_lim *tel;
-
- tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
- if (tel->encap_limit == 0) {
- icmpv6_send(skb, ICMPV6_PARAMPROB,
- ICMPV6_HDR_FIELD, offset + 2);
- return -1;
- }
- *encap_limit = tel->encap_limit - 1;
- } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
- *encap_limit = t->parms.encap_limit;
- }
-
- memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- *dsfield = ipv6_get_dsfield(ipv6h);
- else
- *dsfield = ip6_tclass(t->parms.flowinfo);
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
- fl6->flowlabel |= ip6_flowlabel(ipv6h);
-
-#ifndef IP6_TNL_F_USE_ORIG_FWMARK
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6->flowi6_mark = skb->mark;
- else
- fl6->flowi6_mark = t->parms.fwmark;
-
- fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
-#endif
-
- return 0;
-}
-
-static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
- struct net_device *dev, __u8 dsfield,
- struct flowi6 *fl6, int encap_limit,
- __u32 *pmtu, __be16 proto)
-{
- struct ip6_tnl *tunnel = netdev_priv(dev);
- struct tnl_ptk_info tpi;
- __be16 protocol;
-
- if (dev->header_ops && dev->type == ARPHRD_IP6GRE)
- fl6->daddr = ((struct ipv6hdr *)skb->data)->daddr;
- else
- fl6->daddr = tunnel->parms.raddr;
-
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
-
- if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
- return -ENOMEM;
-
- /* Push GRE header. */
- protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
-
- if (tunnel->parms.collect_md) {
- struct ip_tunnel_info *tun_info;
- const struct ip_tunnel_key *key;
- __be16 flags;
-
- tun_info = skb_tunnel_info(skb);
- if (unlikely(!tun_info ||
- !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
- ip_tunnel_info_af(tun_info) != AF_INET6))
- return -EINVAL;
-
- key = &tun_info->key;
- memset(fl6, 0, sizeof(*fl6));
- fl6->flowi6_proto = IPPROTO_GRE;
- fl6->daddr = key->u.ipv6.dst;
- fl6->flowlabel = key->label;
-// FIX ME!
-// fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
-
- dsfield = key->tos;
- flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
- tunnel->tun_hlen = gre_calc_hlen(flags);
-
- tpi.flags = flags;
- tpi.proto = protocol;
- tpi.key = tunnel_id_to_key32(key->tun_id);
- tpi.seq = htonl(tunnel->o_seqno++);
- tpi.hdr_len = tunnel->tun_hlen;
-
- gre_build_header(skb, &tpi, 8);
- } else {
- tpi.flags = tunnel->parms.o_flags;
- tpi.proto = protocol;
- tpi.key = tunnel->parms.o_key;
- tpi.seq = htonl(tunnel->o_seqno++);
- tpi.hdr_len = tunnel->tun_hlen;
-
- gre_build_header(skb, &tpi, 8);
- }
-
- return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
- NEXTHDR_GRE);
-}
-
-static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- int encap_limit = -1;
- struct flowi6 fl6;
- __u8 dsfield = 0;
- __u32 mtu;
- int err;
-
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-
- if (!t->parms.collect_md)
- prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
- &dsfield, &encap_limit);
-
- err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
- if (err)
- return -1;
-
- err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
- skb->protocol);
- if (err != 0) {
- /* XXX: send ICMP error even if DF is not set. */
- if (err == -EMSGSIZE)
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
- return -1;
- }
-
- return 0;
-}
-
-static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- int encap_limit = -1;
- struct flowi6 fl6;
- __u8 dsfield = 0;
- __u32 mtu;
- int err;
-
- if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
- return -1;
-
- if (!t->parms.collect_md &&
- prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
- return -1;
-
- if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
- return -1;
-
- err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit,
- &mtu, skb->protocol);
- if (err != 0) {
- if (err == -EMSGSIZE)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- return -1;
- }
-
- return 0;
-}
-
-/**
- * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
- * @t: the outgoing tunnel device
- * @hdr: IPv6 header from the incoming packet
- *
- * Description:
- * Avoid trivial tunneling loop by checking that tunnel exit-point
- * doesn't match source of incoming packet.
- *
- * Return:
- * 1 if conflict,
- * 0 else
- **/
-
-static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t,
- const struct ipv6hdr *hdr)
-{
- return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
-}
-
-static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- int encap_limit = -1;
- struct flowi6 fl6;
- __u32 mtu;
- int err;
-
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
-
- if (!t->parms.collect_md)
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
- err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
- if (err)
- return err;
-
- err = __gre6_xmit(skb, dev, 0, &fl6, encap_limit, &mtu, skb->protocol);
-
- return err;
-}
-
-static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
- struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct net_device_stats *stats = &t->dev->stats;
- int ret;
-
- if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
- goto tx_err;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- ret = ip6gre_xmit_ipv4(skb, dev);
- break;
- case htons(ETH_P_IPV6):
- ret = ip6gre_xmit_ipv6(skb, dev);
- break;
- default:
- ret = ip6gre_xmit_other(skb, dev);
- break;
- }
-
- if (ret < 0)
- goto tx_err;
-
- return NETDEV_TX_OK;
-
-tx_err:
- stats->tx_errors++;
- stats->tx_dropped++;
- kfree_skb(skb);
- return NETDEV_TX_OK;
-}
-
-static netdev_tx_t __ip6gre_tunnel_xmit(struct sk_buff *skb)
-{
- return ip6gre_tunnel_xmit(skb, skb->dev);
-}
-
-static bool erspan_skb_would_panic(struct sk_buff *skb, int erspan_md_size)
-{
- /* check if there is enough headroom in packet, if not
- * drop it. Checking for 8 bytes of gre header space +
- * erspan base hdr and erspan type specific header.
- */
- if (skb_headroom(skb) < (8 + sizeof(struct erspan_base_hdr) +
- erspan_md_size))
- return true;
-
- return false;
-}
-
-static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
- struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct dst_entry *dst = skb_dst(skb);
- struct ip_tunnel_info *tun_info;
- const struct ip_tunnel_key *key;
- struct net_device_stats *stats;
- struct erspan_metadata *md;
- struct tnl_ptk_info tpi;
- bool truncate = false;
- int encap_limit = -1;
- __u8 dsfield = false;
- struct flowi6 fl6;
- int err = -EINVAL;
- __be32 tun_id;
- __u32 mtu;
- int nhoff;
- int thoff;
-
-
- /* OVS doesn't support native mode ip6 tunnel traffic so
- * take an early exit in that case. */
- if (!t->parms.collect_md)
- goto tx_err;
-
- if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
- goto tx_err;
-
- if (gre_handle_offloads(skb, false))
- goto tx_err;
-
- if (skb->len > dev->mtu + dev->hard_header_len) {
- pskb_trim(skb, dev->mtu + dev->hard_header_len);
- truncate = true;
- }
-
- nhoff = skb_network_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IP) &&
- (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
- truncate = true;
-
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
-
- if (skb_cow_head(skb, dev->needed_headroom ? : t->hlen))
- goto tx_err;
-
- t->parms.o_flags &= ~TUNNEL_KEY;
-
- tun_info = ovs_skb_tunnel_info(skb);
- if (unlikely(!tun_info ||
- !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
- ip_tunnel_info_af(tun_info) != AF_INET6))
- return -EINVAL;
-
- key = &tun_info->key;
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_GRE;
- fl6.daddr = key->u.ipv6.dst;
- fl6.flowlabel = key->label;
- // fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
-
- dsfield = key->tos;
- md = ip_tunnel_info_opts(tun_info);
- if (!md)
- goto tx_err;
-
- if (erspan_skb_would_panic(skb,
- md->version == 1 ?
- ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE))
- goto tx_err;
-
- tun_id = tunnel_id_to_key32(key->tun_id);
- if (md->version == 1) {
- erspan_build_header(skb,
- ntohl(tun_id),
- ntohl(md->u.index), truncate,
- false);
- tpi.hdr_len = ERSPAN_V1_MDSIZE;
- tpi.proto = htons(ETH_P_ERSPAN);
- } else if (md->version == 2) {
- erspan_build_header_v2(skb,
- ntohl(tun_id),
- md->u.md2.dir,
- get_hwid(&md->u.md2),
- truncate, false);
- tpi.hdr_len = ERSPAN_V2_MDSIZE;
- tpi.proto = htons(ETH_P_ERSPAN2);
- } else {
- goto tx_err;
- }
-
- tpi.flags = TUNNEL_SEQ;
- tpi.key = 0;
- tpi.seq = htonl(t->o_seqno++);
-
- /* Push GRE header. */
- gre_build_header(skb, &tpi, 8);
-
- /* TooBig packet may have updated dst->dev's mtu */
- if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
-#ifndef HAVE_DST_OPS_CONFIRM_NEIGH
- dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
-#else
- dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false);
-#endif
-
- err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
- NEXTHDR_GRE);
- if (err != 0)
- goto tx_err;
-
- return NETDEV_TX_OK;
-
-tx_err:
- stats = &t->dev->stats;
- stats->tx_errors++;
- stats->tx_dropped++;
- kfree_skb(skb);
- return NETDEV_TX_OK;
-}
-
-static netdev_tx_t __ip6erspan_tunnel_xmit(struct sk_buff *skb)
-{
- return ip6erspan_tunnel_xmit(skb, skb->dev);
-}
-
-static void ip6gre_tnl_link_config_common(struct ip6_tnl *t)
-{
- struct net_device *dev = t->dev;
- struct __ip6_tnl_parm *p = &t->parms;
- struct flowi6 *fl6 = &t->fl.u.ip6;
-
- if (dev->type != ARPHRD_ETHER) {
- memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
- memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
- }
-
- /* Set up flowi template */
- fl6->saddr = p->laddr;
- fl6->daddr = p->raddr;
- fl6->flowi6_oif = p->link;
- fl6->flowlabel = 0;
- fl6->flowi6_proto = IPPROTO_GRE;
-
- if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
- fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
- if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
- fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
-
- p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
- p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
-
- if (p->flags&IP6_TNL_F_CAP_XMIT &&
- p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER)
- dev->flags |= IFF_POINTOPOINT;
- else
- dev->flags &= ~IFF_POINTOPOINT;
-}
-
-static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu,
- int t_hlen)
-{
- const struct __ip6_tnl_parm *p = &t->parms;
- struct net_device *dev = t->dev;
-
- if (p->flags & IP6_TNL_F_CAP_XMIT) {
- int strict = (ipv6_addr_type(&p->raddr) &
- (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
-
- struct rt6_info *rt = rt6_lookup(t->net,
- &p->raddr, &p->laddr,
- p->link, strict);
-
- if (!rt)
- return;
-
- if (rt->dst.dev) {
- dev->hard_header_len = rt->dst.dev->hard_header_len +
- t_hlen;
-
- if (set_mtu) {
- dev->mtu = rt->dst.dev->mtu - t_hlen;
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
- if (dev->type == ARPHRD_ETHER)
- dev->mtu -= ETH_HLEN;
-
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
- }
- }
- ip6_rt_put(rt);
- }
-}
-
-static int ip6gre_calc_hlen(struct ip6_tnl *tunnel)
-{
- int t_hlen;
-
- tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
- tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
-
- t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
- tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen;
- return t_hlen;
-}
-
-static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
-{
- ip6gre_tnl_link_config_common(t);
- ip6gre_tnl_link_config_route(t, set_mtu, ip6gre_calc_hlen(t));
-}
-
-static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t,
- const struct __ip6_tnl_parm *p)
-{
- t->parms.laddr = p->laddr;
- t->parms.raddr = p->raddr;
- t->parms.flags = p->flags;
- t->parms.hop_limit = p->hop_limit;
- t->parms.encap_limit = p->encap_limit;
- t->parms.flowinfo = p->flowinfo;
- t->parms.link = p->link;
- t->parms.proto = p->proto;
- t->parms.i_key = p->i_key;
- t->parms.o_key = p->o_key;
- t->parms.i_flags = p->i_flags;
- t->parms.o_flags = p->o_flags;
- t->parms.fwmark = p->fwmark;
- dst_cache_reset(&t->dst_cache);
-}
-
-static int ip6gre_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p,
- int set_mtu)
-{
- ip6gre_tnl_copy_tnl_parm(t, p);
- ip6gre_tnl_link_config(t, set_mtu);
- return 0;
-}
-
-static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
- const struct ip6_tnl_parm2 *u)
-{
- p->laddr = u->laddr;
- p->raddr = u->raddr;
- p->flags = u->flags;
- p->hop_limit = u->hop_limit;
- p->encap_limit = u->encap_limit;
- p->flowinfo = u->flowinfo;
- p->link = u->link;
- p->i_key = u->i_key;
- p->o_key = u->o_key;
- p->i_flags = gre_flags_to_tnl_flags(u->i_flags);
- p->o_flags = gre_flags_to_tnl_flags(u->o_flags);
- memcpy(p->name, u->name, sizeof(u->name));
-}
-
-static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
- const struct __ip6_tnl_parm *p)
-{
- u->proto = IPPROTO_GRE;
- u->laddr = p->laddr;
- u->raddr = p->raddr;
- u->flags = p->flags;
- u->hop_limit = p->hop_limit;
- u->encap_limit = p->encap_limit;
- u->flowinfo = p->flowinfo;
- u->link = p->link;
- u->i_key = p->i_key;
- u->o_key = p->o_key;
- u->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
- u->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
- memcpy(u->name, p->name, sizeof(u->name));
-}
-
-static int ip6gre_tunnel_ioctl(struct net_device *dev,
- struct ifreq *ifr, int cmd)
-{
- int err = 0;
- struct ip6_tnl_parm2 p;
- struct __ip6_tnl_parm p1;
- struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = t->net;
- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
-
- memset(&p1, 0, sizeof(p1));
-
- switch (cmd) {
- case SIOCGETTUNNEL:
- if (dev == ign->fb_tunnel_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
- err = -EFAULT;
- break;
- }
- ip6gre_tnl_parm_from_user(&p1, &p);
- t = ip6gre_tunnel_locate(net, &p1, 0);
- if (!t)
- t = netdev_priv(dev);
- }
- memset(&p, 0, sizeof(p));
- ip6gre_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
- err = -EFAULT;
- break;
-
- case SIOCADDTUNNEL:
- case SIOCCHGTUNNEL:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto done;
-
- err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- goto done;
-
- err = -EINVAL;
- if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))
- goto done;
-
- if (!(p.i_flags&GRE_KEY))
- p.i_key = 0;
- if (!(p.o_flags&GRE_KEY))
- p.o_key = 0;
-
- ip6gre_tnl_parm_from_user(&p1, &p);
- t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL);
-
- if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
- if (t) {
- if (t->dev != dev) {
- err = -EEXIST;
- break;
- }
- } else {
- t = netdev_priv(dev);
-
- ip6gre_tunnel_unlink(ign, t);
- synchronize_net();
- ip6gre_tnl_change(t, &p1, 1);
- ip6gre_tunnel_link(ign, t);
- netdev_state_change(dev);
- }
- }
-
- if (t) {
- err = 0;
-
- memset(&p, 0, sizeof(p));
- ip6gre_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
- err = -EFAULT;
- } else
- err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
- break;
-
- case SIOCDELTUNNEL:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto done;
-
- if (dev == ign->fb_tunnel_dev) {
- err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- goto done;
- err = -ENOENT;
- ip6gre_tnl_parm_from_user(&p1, &p);
- t = ip6gre_tunnel_locate(net, &p1, 0);
- if (!t)
- goto done;
- err = -EPERM;
- if (t == netdev_priv(ign->fb_tunnel_dev))
- goto done;
- dev = t->dev;
- }
- unregister_netdevice(dev);
- err = 0;
- break;
-
- default:
- err = -EINVAL;
- }
-
-done:
- return err;
-}
-
-static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
- unsigned short type, const void *daddr,
- const void *saddr, unsigned int len)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct ipv6hdr *ipv6h;
- __be16 *p;
-
- ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen + sizeof(*ipv6h));
- ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb,
- t->fl.u.ip6.flowlabel,
- true, &t->fl.u.ip6));
- ipv6h->hop_limit = t->parms.hop_limit;
- ipv6h->nexthdr = NEXTHDR_GRE;
- ipv6h->saddr = t->parms.laddr;
- ipv6h->daddr = t->parms.raddr;
-
- p = (__be16 *)(ipv6h + 1);
- p[0] = t->parms.o_flags;
- p[1] = htons(type);
-
- /*
- * Set the source hardware address.
- */
-
- if (saddr)
- memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr));
- if (daddr)
- memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr));
- if (!ipv6_addr_any(&ipv6h->daddr))
- return t->hlen;
-
- return -t->hlen;
-}
-
-static const struct header_ops ip6gre_header_ops = {
- .create = ip6gre_header,
-};
-
-static const struct net_device_ops ip6gre_netdev_ops = {
- .ndo_init = ip6gre_tunnel_init,
- .ndo_uninit = ip6gre_tunnel_uninit,
- .ndo_start_xmit = ip6gre_tunnel_xmit,
- .ndo_do_ioctl = ip6gre_tunnel_ioctl,
-#ifdef HAVE_RHEL7_MAX_MTU
- .ndo_size = sizeof(struct net_device_ops),
- .extended.ndo_change_mtu = ip6_tnl_change_mtu,
-#else
- .ndo_change_mtu = ip6_tnl_change_mtu,
-#endif
- .ndo_get_stats64 = ip_tunnel_get_stats64,
-#ifdef HAVE_NDO_GET_IFLINK
- .ndo_get_iflink = ip6_tnl_get_iflink,
-#endif
-};
-
-#ifdef HAVE_NEEDS_FREE_NETDEV
-static void ip6gre_dev_free(struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
-
- dst_cache_destroy(&t->dst_cache);
- free_percpu(dev->tstats);
-}
-
-#endif
-static void ip6gre_tunnel_setup(struct net_device *dev)
-{
- dev->netdev_ops = &ip6gre_netdev_ops;
-#ifndef HAVE_NEEDS_FREE_NETDEV
- dev->destructor = free_netdev;
-#else
- dev->needs_free_netdev = true;
- dev->priv_destructor = ip6gre_dev_free;
-#endif
-
- dev->type = ARPHRD_IP6GRE;
-
- dev->flags |= IFF_NOARP;
- dev->addr_len = sizeof(struct in6_addr);
- netif_keep_dst(dev);
- /* This perm addr will be used as interface identifier by IPv6 */
- dev->addr_assign_type = NET_ADDR_RANDOM;
- eth_random_addr(dev->perm_addr);
-}
-
-#define GRE6_FEATURES (NETIF_F_SG | \
- NETIF_F_FRAGLIST | \
- NETIF_F_HIGHDMA | \
- NETIF_F_HW_CSUM)
-
-static void ip6gre_tnl_init_features(struct net_device *dev)
-{
- struct ip6_tnl *nt = netdev_priv(dev);
-
- dev->features |= GRE6_FEATURES;
- dev->hw_features |= GRE6_FEATURES;
-
- if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported, nor
- * can we support 2 levels of outer headers requiring
- * an update.
- */
- if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
- nt->encap.type == TUNNEL_ENCAP_NONE) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- }
-
- /* Can use a lockless transmit, unless we generate
- * output sequences
- */
- dev->features |= NETIF_F_LLTX;
- }
-}
-
-static int ip6gre_tunnel_init_common(struct net_device *dev)
-{
- struct ip6_tnl *tunnel;
- int ret;
- int t_hlen;
-
- tunnel = netdev_priv(dev);
-
- tunnel->dev = dev;
- tunnel->net = dev_net(dev);
- strcpy(tunnel->parms.name, dev->name);
-
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
- if (ret) {
- free_percpu(dev->tstats);
- dev->tstats = NULL;
- return ret;
- }
-
- t_hlen = ip6gre_calc_hlen(tunnel);
- dev->mtu = ETH_DATA_LEN - t_hlen;
- if (dev->type == ARPHRD_ETHER)
- dev->mtu -= ETH_HLEN;
- if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
-
- if (tunnel->parms.collect_md) {
- dev->features |= NETIF_F_NETNS_LOCAL;
- netif_keep_dst(dev);
- }
- ip6gre_tnl_init_features(dev);
-
- return 0;
-}
-
-static int ip6gre_tunnel_init(struct net_device *dev)
-{
- struct ip6_tnl *tunnel;
- int ret;
-
- ret = ip6gre_tunnel_init_common(dev);
- if (ret)
- return ret;
-
- tunnel = netdev_priv(dev);
-
- if (tunnel->parms.collect_md)
- return 0;
-
- memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
- memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
-
- if (ipv6_addr_any(&tunnel->parms.raddr))
- dev->header_ops = &ip6gre_header_ops;
-
- return 0;
-}
-
-static void ip6gre_fb_tunnel_init(struct net_device *dev)
-{
- struct ip6_tnl *tunnel = netdev_priv(dev);
-
- tunnel->dev = dev;
- tunnel->net = dev_net(dev);
- strcpy(tunnel->parms.name, dev->name);
-
- tunnel->hlen = sizeof(struct ipv6hdr) + 4;
-
- dev_hold(dev);
-}
-
-static struct inet6_protocol ip6gre_protocol __read_mostly = {
- .handler = gre_rcv,
- .err_handler = ip6gre_err,
- .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
-};
-
-static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
-{
- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
- struct net_device *dev, *aux;
- int prio;
-
- for_each_netdev_safe(net, dev, aux)
- if (dev->rtnl_link_ops == &ip6gre_link_ops ||
- dev->rtnl_link_ops == &ip6gre_tap_ops ||
- dev->rtnl_link_ops == &ip6erspan_tap_ops)
- unregister_netdevice_queue(dev, head);
-
- for (prio = 0; prio < 4; prio++) {
- int h;
- for (h = 0; h < IP6_GRE_HASH_SIZE; h++) {
- struct ip6_tnl *t;
-
- t = rtnl_dereference(ign->tunnels[prio][h]);
-
- while (t) {
- /* If dev is in the same netns, it has already
- * been added to the list by the previous loop.
- */
- if (!net_eq(dev_net(t->dev), net))
- unregister_netdevice_queue(t->dev,
- head);
- t = rtnl_dereference(t->next);
- }
- }
- }
-}
-
-static int __net_init ip6gre_init_net(struct net *net)
-{
- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
- int err;
-
- ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl),
- "ovs-ip6gre0",
- NET_NAME_UNKNOWN,
- ip6gre_tunnel_setup);
- if (!ign->fb_tunnel_dev) {
- err = -ENOMEM;
- goto err_alloc_dev;
- }
- dev_net_set(ign->fb_tunnel_dev, net);
- /* FB netdevice is special: we have one, and only one per netns.
- * Allowing to move it to another netns is clearly unsafe.
- */
- ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
-
-
- ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
- ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
-
- err = register_netdev(ign->fb_tunnel_dev);
- if (err)
- goto err_reg_dev;
-
- rcu_assign_pointer(ign->tunnels_wc[0],
- netdev_priv(ign->fb_tunnel_dev));
- return 0;
-
-err_reg_dev:
- free_netdev(ign->fb_tunnel_dev);
-err_alloc_dev:
- return err;
-}
-
-static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list)
-{
- struct net *net;
- LIST_HEAD(list);
-
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list)
- ip6gre_destroy_tunnels(net, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
-}
-
-enum {
-#ifndef HAVE_IFLA_GRE_ENCAP_DPORT
- IFLA_GRE_ENCAP_TYPE = IFLA_GRE_FLAGS + 1,
- IFLA_GRE_ENCAP_FLAGS,
- IFLA_GRE_ENCAP_SPORT,
- IFLA_GRE_ENCAP_DPORT,
-#endif
-#ifndef HAVE_IFLA_GRE_COLLECT_METADATA
- IFLA_GRE_COLLECT_METADATA = IFLA_GRE_ENCAP_DPORT + 1,
-#endif
-#ifndef HAVE_IFLA_GRE_IGNORE_DF
- IFLA_GRE_IGNORE_DF = IFLA_GRE_COLLECT_METADATA + 1,
-#endif
-#ifndef HAVE_IFLA_GRE_FWMARK
- IFLA_GRE_FWMARK = IFLA_GRE_IGNORE_DF + 1,
-#endif
-#ifndef HAVE_IFLA_GRE_ERSPAN_INDEX
- IFLA_GRE_ERSPAN_INDEX = IFLA_GRE_FWMARK + 1,
-#endif
-#ifndef HAVE_IFLA_GRE_ERSPAN_HWID
- IFLA_GRE_ERSPAN_VER = IFLA_GRE_ERSPAN_INDEX + 1,
- IFLA_GRE_ERSPAN_DIR,
- IFLA_GRE_ERSPAN_HWID,
-#endif
-};
-
-#define RPL_IFLA_GRE_MAX (IFLA_GRE_ERSPAN_HWID + 1)
-
-static struct pernet_operations ip6gre_net_ops = {
- .init = ip6gre_init_net,
- .exit_batch = ip6gre_exit_batch_net,
- .id = &ip6gre_net_id,
- .size = sizeof(struct ip6gre_net),
-};
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
-static int rpl_ip6gre_tunnel_validate(struct nlattr *tb[],
- struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int rpl_ip6gre_tunnel_validate(struct nlattr *tb[],
- struct nlattr *data[])
-#endif
-{
- __be16 flags;
-
- if (!data)
- return 0;
-
- flags = 0;
- if (data[IFLA_GRE_IFLAGS])
- flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
- if (data[IFLA_GRE_OFLAGS])
- flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
- if (flags & (GRE_VERSION|GRE_ROUTING))
- return -EINVAL;
-
- return 0;
-}
-#define ip6gre_tunnel_validate rpl_ip6gre_tunnel_validate
-
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
-static int rpl_ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int rpl_ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- struct in6_addr daddr;
-
- if (tb[IFLA_ADDRESS]) {
- if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
- return -EINVAL;
- if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
- return -EADDRNOTAVAIL;
- }
-
- if (!data)
- goto out;
-
- if (data[IFLA_GRE_REMOTE]) {
- daddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]);
- if (ipv6_addr_any(&daddr))
- return -EINVAL;
- }
-
-out:
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
- return ip6gre_tunnel_validate(tb, data, extack);
-#else
- return ip6gre_tunnel_validate(tb, data);
-#endif
-}
-#define ip6gre_tap_validate rpl_ip6gre_tap_validate
-
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
-static int rpl_ip6erspan_tap_validate(struct nlattr *tb[],
- struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int rpl_ip6erspan_tap_validate(struct nlattr *tb[],
- struct nlattr *data[])
-#endif
-{
- __be16 flags = 0;
- int ret, ver = 0;
-
- if (!data)
- return 0;
-
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
- ret = ip6gre_tap_validate(tb, data, extack);
-#else
- ret = ip6gre_tap_validate(tb, data);
-#endif
- if (ret)
- return ret;
-
- /* ERSPAN should only have GRE sequence and key flag */
- if (data[IFLA_GRE_OFLAGS])
- flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
- if (data[IFLA_GRE_IFLAGS])
- flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
- if (!data[IFLA_GRE_COLLECT_METADATA] &&
- flags != (GRE_SEQ | GRE_KEY))
- return -EINVAL;
-
- /* ERSPAN Session ID only has 10-bit. Since we reuse
- * 32-bit key field as ID, check it's range.
- */
- if (data[IFLA_GRE_IKEY] &&
- (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
- return -EINVAL;
-
- if (data[IFLA_GRE_OKEY] &&
- (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
- return -EINVAL;
-
- if (data[IFLA_GRE_ERSPAN_VER]) {
- ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
- if (ver != 1 && ver != 2)
- return -EINVAL;
- }
-
- if (ver == 1) {
- if (data[IFLA_GRE_ERSPAN_INDEX]) {
- u32 index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
-
- if (index & ~INDEX_MASK)
- return -EINVAL;
- }
- } else if (ver == 2) {
- if (data[IFLA_GRE_ERSPAN_DIR]) {
- u16 dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
-
- if (dir & ~(DIR_MASK >> DIR_OFFSET))
- return -EINVAL;
- }
-
- if (data[IFLA_GRE_ERSPAN_HWID]) {
- u16 hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
-
- if (hwid & ~(HWID_MASK >> HWID_OFFSET))
- return -EINVAL;
- }
- }
-
- return 0;
-}
-#define ip6erspan_tap_validate rpl_ip6erspan_tap_validate
-
-static void ip6gre_netlink_parms(struct nlattr *data[],
- struct __ip6_tnl_parm *parms)
-{
-#if 0
- /* Do not use in case of OVS - our vport needs to set a parm
- * directly and this erases it
- */
- memset(parms, 0, sizeof(*parms));
-
-#endif
- if (!data)
- return;
-
- if (data[IFLA_GRE_LINK])
- parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
-
- if (data[IFLA_GRE_IFLAGS])
- parms->i_flags = gre_flags_to_tnl_flags(
- nla_get_be16(data[IFLA_GRE_IFLAGS]));
-
- if (data[IFLA_GRE_OFLAGS])
- parms->o_flags = gre_flags_to_tnl_flags(
- nla_get_be16(data[IFLA_GRE_OFLAGS]));
-
- if (data[IFLA_GRE_IKEY])
- parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
-
- if (data[IFLA_GRE_OKEY])
- parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
-
- if (data[IFLA_GRE_LOCAL])
- parms->laddr = nla_get_in6_addr(data[IFLA_GRE_LOCAL]);
-
- if (data[IFLA_GRE_REMOTE])
- parms->raddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]);
-
- if (data[IFLA_GRE_TTL])
- parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]);
-
- if (data[IFLA_GRE_ENCAP_LIMIT])
- parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
-
- if (data[IFLA_GRE_FLOWINFO])
- parms->flowinfo = nla_get_be32(data[IFLA_GRE_FLOWINFO]);
-
- if (data[IFLA_GRE_FLAGS])
- parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
-
- if (data[IFLA_GRE_FWMARK])
- parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
-
- if (data[IFLA_GRE_COLLECT_METADATA])
- parms->collect_md = true;
-
- parms->erspan_ver = 1;
- if (data[IFLA_GRE_ERSPAN_VER])
- parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
-
- if (parms->erspan_ver == 1) {
- if (data[IFLA_GRE_ERSPAN_INDEX])
- parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
- } else if (parms->erspan_ver == 2) {
- if (data[IFLA_GRE_ERSPAN_DIR])
- parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
- if (data[IFLA_GRE_ERSPAN_HWID])
- parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
- }
-}
-
-static int ip6gre_tap_init(struct net_device *dev)
-{
- int ret;
-
- ret = ip6gre_tunnel_init_common(dev);
- if (ret)
- return ret;
-
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
-
- return 0;
-}
-
-static const struct net_device_ops ip6gre_tap_netdev_ops = {
- .ndo_init = ip6gre_tap_init,
- .ndo_uninit = ip6gre_tunnel_uninit,
- .ndo_start_xmit = ip6gre_tunnel_xmit,
- .ndo_set_mac_address = eth_mac_addr,
- .ndo_validate_addr = eth_validate_addr,
-#ifdef HAVE_RHEL7_MAX_MTU
- .ndo_size = sizeof(struct net_device_ops),
- .extended.ndo_change_mtu = ip6_tnl_change_mtu,
-#else
- .ndo_change_mtu = ip6_tnl_change_mtu,
-#endif
- .ndo_get_stats64 = ip_tunnel_get_stats64,
-#ifdef HAVE_NDO_GET_IFLINK
- .ndo_get_iflink = ip6_tnl_get_iflink,
-#endif
-};
-
-static int ip6erspan_calc_hlen(struct ip6_tnl *tunnel)
-{
- int t_hlen;
-
- tunnel->tun_hlen = 8;
- tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
- erspan_hdr_len(tunnel->parms.erspan_ver);
-
- t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
- tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen;
- return t_hlen;
-}
-
-static int ip6erspan_tap_init(struct net_device *dev)
-{
- struct ip6_tnl *tunnel;
- int t_hlen;
- int ret;
-
- tunnel = netdev_priv(dev);
-
- tunnel->dev = dev;
- tunnel->net = dev_net(dev);
- strcpy(tunnel->parms.name, dev->name);
-
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
- if (ret) {
- free_percpu(dev->tstats);
- dev->tstats = NULL;
- return ret;
- }
-
- t_hlen = ip6erspan_calc_hlen(tunnel);
- dev->mtu = ETH_DATA_LEN - t_hlen;
- if (dev->type == ARPHRD_ETHER)
- dev->mtu -= ETH_HLEN;
- if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
-
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- tunnel = netdev_priv(dev);
- ip6erspan_tnl_link_config(tunnel, 1);
-
- return 0;
-}
-
-static const struct net_device_ops ip6erspan_netdev_ops = {
- .ndo_init = ip6erspan_tap_init,
- .ndo_uninit = ip6erspan_tunnel_uninit,
- .ndo_start_xmit = ip6erspan_tunnel_xmit,
- .ndo_set_mac_address = eth_mac_addr,
- .ndo_validate_addr = eth_validate_addr,
-#ifdef HAVE_RHEL7_MAX_MTU
- .ndo_size = sizeof(struct net_device_ops),
- .extended.ndo_change_mtu = ip6_tnl_change_mtu,
-#else
- .ndo_change_mtu = ip6_tnl_change_mtu,
-#endif
- .ndo_get_stats64 = ip_tunnel_get_stats64,
-#ifdef HAVE_NDO_GET_IFLINK
- .ndo_get_iflink = ip6_tnl_get_iflink,
-#endif
-};
-
-static void ip6gre_tap_setup(struct net_device *dev)
-{
-
- ether_setup(dev);
-#ifdef HAVE_NET_DEVICE_MAX_MTU
- dev->max_mtu = 0;
-#endif
- dev->netdev_ops = &ip6gre_tap_netdev_ops;
-#ifndef HAVE_NEEDS_FREE_NETDEV
- dev->destructor = free_netdev;
-#else
- dev->needs_free_netdev = true;
- dev->priv_destructor = ip6gre_dev_free;
-#endif
-
- dev->features |= NETIF_F_NETNS_LOCAL;
- dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- netif_keep_dst(dev);
-}
-
-static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
- struct ip_tunnel_encap *ipencap)
-{
- bool ret = false;
-
- memset(ipencap, 0, sizeof(*ipencap));
-
- if (!data)
- return ret;
-
- if (data[IFLA_GRE_ENCAP_TYPE]) {
- ret = true;
- ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
- }
-
- if (data[IFLA_GRE_ENCAP_FLAGS]) {
- ret = true;
- ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
- }
-
- if (data[IFLA_GRE_ENCAP_SPORT]) {
- ret = true;
- ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
- }
-
- if (data[IFLA_GRE_ENCAP_DPORT]) {
- ret = true;
- ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
- }
-
- return ret;
-}
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
-static int rpl_ip6gre_newlink_common(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int rpl_ip6gre_newlink_common(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- struct ip6_tnl *nt;
- struct ip_tunnel_encap ipencap;
- int err;
-
- nt = netdev_priv(dev);
-
- if (ip6gre_netlink_encap_parms(data, &ipencap)) {
- int err = ip6_tnl_encap_setup(nt, &ipencap);
-
- if (err < 0)
- return err;
- }
-
- if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
- eth_hw_addr_random(dev);
-
- nt->dev = dev;
- nt->net = dev_net(dev);
-
- err = register_netdevice(dev);
- if (err)
- goto out;
-
- if (tb[IFLA_MTU])
- ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
-
- dev_hold(dev);
-
-out:
- return err;
-}
-#define ip6gre_newlink_common rpl_ip6gre_newlink_common
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
-static int rpl_ip6gre_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int rpl_ip6gre_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- struct ip6_tnl *nt = netdev_priv(dev);
- struct net *net = dev_net(dev);
- struct ip6gre_net *ign;
- int err;
-
- ip6gre_netlink_parms(data, &nt->parms);
- ign = net_generic(net, ip6gre_net_id);
-
- if (nt->parms.collect_md) {
- if (rtnl_dereference(ign->collect_md_tun))
- return -EEXIST;
- } else {
- if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
- return -EEXIST;
- }
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
- err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
-#else
- err = ip6gre_newlink_common(src_net, dev, tb, data);
-#endif
- if (!err) {
- ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
- ip6gre_tunnel_link_md(ign, nt);
- ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
- }
- return err;
-}
-
-#define ip6gre_newlink rpl_ip6gre_newlink
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
-static struct ip6_tnl *
-rpl_ip6gre_changelink_common(struct net_device *dev, struct nlattr *tb[],
- struct nlattr *data[], struct __ip6_tnl_parm *p_p,
- struct netlink_ext_ack *extack)
-#else
-static struct ip6_tnl *
-rpl_ip6gre_changelink_common(struct net_device *dev, struct nlattr *tb[],
- struct nlattr *data[], struct __ip6_tnl_parm *p_p)
-#endif
-{
- struct ip6_tnl *t, *nt = netdev_priv(dev);
- struct net *net = nt->net;
- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
- struct ip_tunnel_encap ipencap;
-
- if (dev == ign->fb_tunnel_dev)
- return ERR_PTR(-EINVAL);
-
- if (ip6gre_netlink_encap_parms(data, &ipencap)) {
- int err = ip6_tnl_encap_setup(nt, &ipencap);
-
- if (err < 0)
- return ERR_PTR(err);
- }
-
- ip6gre_netlink_parms(data, p_p);
-
- t = ip6gre_tunnel_locate(net, p_p, 0);
-
- if (t) {
- if (t->dev != dev)
- return ERR_PTR(-EEXIST);
- } else {
- t = nt;
- }
-
- return t;
-}
-#define ip6gre_changelink_common rpl_ip6gre_changelink_common
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
-static int rpl_ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
- struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int rpl_ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
- struct nlattr *data[])
-#endif
-{
- struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
- struct __ip6_tnl_parm p;
- struct ip6_tnl *t;
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
- t = ip6gre_changelink_common(dev, tb, data, &p, extack);
-#else
- t = ip6gre_changelink_common(dev, tb, data, &p);
-#endif
- if (IS_ERR(t))
- return PTR_ERR(t);
-
- ip6gre_tunnel_unlink_md(ign, t);
- ip6gre_tunnel_unlink(ign, t);
- ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
- ip6gre_tunnel_link_md(ign, t);
- ip6gre_tunnel_link(ign, t);
- return 0;
-}
-#define ip6gre_changelink rpl_ip6gre_changelink
-
-static void ip6gre_dellink(struct net_device *dev, struct list_head *head)
-{
- struct net *net = dev_net(dev);
- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
-
- if (dev != ign->fb_tunnel_dev)
- unregister_netdevice_queue(dev, head);
-}
-
-static size_t ip6gre_get_size(const struct net_device *dev)
-{
- return
- /* IFLA_GRE_LINK */
- nla_total_size(4) +
- /* IFLA_GRE_IFLAGS */
- nla_total_size(2) +
- /* IFLA_GRE_OFLAGS */
- nla_total_size(2) +
- /* IFLA_GRE_IKEY */
- nla_total_size(4) +
- /* IFLA_GRE_OKEY */
- nla_total_size(4) +
- /* IFLA_GRE_LOCAL */
- nla_total_size(sizeof(struct in6_addr)) +
- /* IFLA_GRE_REMOTE */
- nla_total_size(sizeof(struct in6_addr)) +
- /* IFLA_GRE_TTL */
- nla_total_size(1) +
- /* IFLA_GRE_ENCAP_LIMIT */
- nla_total_size(1) +
- /* IFLA_GRE_FLOWINFO */
- nla_total_size(4) +
- /* IFLA_GRE_FLAGS */
- nla_total_size(4) +
- /* IFLA_GRE_ENCAP_TYPE */
- nla_total_size(2) +
- /* IFLA_GRE_ENCAP_FLAGS */
- nla_total_size(2) +
- /* IFLA_GRE_ENCAP_SPORT */
- nla_total_size(2) +
- /* IFLA_GRE_ENCAP_DPORT */
- nla_total_size(2) +
- /* IFLA_GRE_COLLECT_METADATA */
- nla_total_size(0) +
- /* IFLA_GRE_FWMARK */
- nla_total_size(4) +
- /* IFLA_GRE_ERSPAN_INDEX */
- nla_total_size(4) +
- 0;
-}
-
-static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct __ip6_tnl_parm *p = &t->parms;
-
- if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
- nla_put_be16(skb, IFLA_GRE_IFLAGS,
- gre_tnl_flags_to_gre_flags(p->i_flags)) ||
- nla_put_be16(skb, IFLA_GRE_OFLAGS,
- gre_tnl_flags_to_gre_flags(p->o_flags)) ||
- nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
- nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
- nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
- nla_put_in6_addr(skb, IFLA_GRE_REMOTE, &p->raddr) ||
- nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
- nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
- nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
- nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
- nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) ||
- nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
- goto nla_put_failure;
-
- if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
- t->encap.type) ||
- nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
- t->encap.sport) ||
- nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
- t->encap.dport) ||
- nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
- t->encap.flags))
- goto nla_put_failure;
-
- if (p->collect_md) {
- if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
- goto nla_put_failure;
- }
-
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
- goto nla_put_failure;
-
- if (p->erspan_ver == 1) {
- if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
- goto nla_put_failure;
- } else if (p->erspan_ver == 2) {
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
- goto nla_put_failure;
- if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
- goto nla_put_failure;
- }
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-static const struct nla_policy ip6gre_policy[RPL_IFLA_GRE_MAX + 1] = {
- [IFLA_GRE_LINK] = { .type = NLA_U32 },
- [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
- [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
- [IFLA_GRE_IKEY] = { .type = NLA_U32 },
- [IFLA_GRE_OKEY] = { .type = NLA_U32 },
- [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct ipv6hdr, saddr) },
- [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct ipv6hdr, daddr) },
- [IFLA_GRE_TTL] = { .type = NLA_U8 },
- [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
- [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
- [IFLA_GRE_FLAGS] = { .type = NLA_U32 },
- [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
- [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
- [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
- [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
- [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
- [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
- [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
- [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
- [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
- [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
-};
-
-static void ip6erspan_tap_setup(struct net_device *dev)
-{
- ether_setup(dev);
-
- dev->netdev_ops = &ip6erspan_netdev_ops;
-#ifndef HAVE_NEEDS_FREE_NETDEV
- dev->destructor = free_netdev;
-#else
- dev->needs_free_netdev = true;
- dev->priv_destructor = ip6gre_dev_free;
-#endif
-
- dev->features |= NETIF_F_NETNS_LOCAL;
- dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- netif_keep_dst(dev);
-}
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
-static int rpl_ip6erspan_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int rpl_ip6erspan_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- struct ip6_tnl *nt = netdev_priv(dev);
- struct net *net = dev_net(dev);
- struct ip6gre_net *ign;
- int err;
-
- ip6gre_netlink_parms(data, &nt->parms);
- ign = net_generic(net, ip6gre_net_id);
-
- if (nt->parms.collect_md) {
- if (rtnl_dereference(ign->collect_md_tun_erspan))
- return -EEXIST;
- } else {
- if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
- return -EEXIST;
- }
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
- err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
-#else
- err = ip6gre_newlink_common(src_net, dev, tb, data);
-#endif
- if (!err) {
- ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]);
- ip6erspan_tunnel_link_md(ign, nt);
- ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
- }
- return err;
-}
-#define ip6erspan_newlink rpl_ip6erspan_newlink
-
-static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu)
-{
- ip6gre_tnl_link_config_common(t);
- ip6gre_tnl_link_config_route(t, set_mtu, ip6erspan_calc_hlen(t));
-}
-
-static int ip6erspan_tnl_change(struct ip6_tnl *t,
- const struct __ip6_tnl_parm *p, int set_mtu)
-{
- ip6gre_tnl_copy_tnl_parm(t, p);
- ip6erspan_tnl_link_config(t, set_mtu);
- return 0;
-}
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
-static int rpl_ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
- struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int rpl_ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
- struct nlattr *data[])
-#endif
-{
- struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
- struct __ip6_tnl_parm p;
- struct ip6_tnl *t;
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
- t = ip6gre_changelink_common(dev, tb, data, &p, extack);
-#else
- t = ip6gre_changelink_common(dev, tb, data, &p);
-#endif
- if (IS_ERR(t))
- return PTR_ERR(t);
-
- ip6gre_tunnel_unlink_md(ign, t);
- ip6gre_tunnel_unlink(ign, t);
- ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
- ip6erspan_tunnel_link_md(ign, t);
- ip6gre_tunnel_link(ign, t);
- return 0;
-}
-#define ip6erspan_changelink rpl_ip6erspan_changelink
-
-static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
- .kind = "ip6gre",
- .maxtype = RPL_IFLA_GRE_MAX,
- .policy = ip6gre_policy,
- .priv_size = sizeof(struct ip6_tnl),
- .setup = ip6gre_tunnel_setup,
- .validate = ip6gre_tunnel_validate,
- .newlink = ip6gre_newlink,
- .changelink = ip6gre_changelink,
- .dellink = ip6gre_dellink,
- .get_size = ip6gre_get_size,
- .fill_info = ip6gre_fill_info,
-#ifdef HAVE_GET_LINK_NET
- .get_link_net = ip6_tnl_get_link_net,
-#endif
-};
-
-static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
- .kind = "ip6gretap",
- .maxtype = RPL_IFLA_GRE_MAX,
- .policy = ip6gre_policy,
- .priv_size = sizeof(struct ip6_tnl),
- .setup = ip6gre_tap_setup,
- .validate = ip6gre_tap_validate,
- .newlink = ip6gre_newlink,
- .changelink = ip6gre_changelink,
- .dellink = ip6gre_dellink,
- .get_size = ip6gre_get_size,
- .fill_info = ip6gre_fill_info,
-#ifdef HAVE_GET_LINK_NET
- .get_link_net = ip6_tnl_get_link_net,
-#endif
-};
-
-static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
- .kind = "ip6erspan",
- .maxtype = RPL_IFLA_GRE_MAX,
- .policy = ip6gre_policy,
- .priv_size = sizeof(struct ip6_tnl),
- .setup = ip6erspan_tap_setup,
- .validate = ip6erspan_tap_validate,
- .newlink = ip6erspan_newlink,
- .changelink = ip6erspan_changelink,
- .dellink = ip6gre_dellink,
- .get_size = ip6gre_get_size,
- .fill_info = ip6gre_fill_info,
-#ifdef HAVE_GET_LINK_NET
- .get_link_net = ip6_tnl_get_link_net,
-#endif
-};
-
-struct net_device *ip6erspan_fb_dev_create(struct net *net, const char *name,
- u8 name_assign_type)
-{
- struct nlattr *tb[IFLA_MAX + 1];
- struct net_device *dev;
- LIST_HEAD(list_kill);
- struct ip6_tnl *t;
- int err;
-
- memset(&tb, 0, sizeof(tb));
-
- dev = rtnl_create_link(net, (char *)name, name_assign_type,
- &ip6erspan_tap_ops, tb);
- if (IS_ERR(dev))
- return dev;
-
- t = netdev_priv(dev);
- t->parms.collect_md = true;
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
- err = ip6erspan_newlink(net, dev, tb, NULL, NULL);
-#else
- err = ip6erspan_newlink(net, dev, tb, NULL);
-#endif
- if (err < 0) {
- free_netdev(dev);
- return ERR_PTR(err);
- }
-
- /* openvswitch users expect packet sizes to be unrestricted,
- * so set the largest MTU we can.
- */
- err = ip6_tnl_change_mtu(dev, 64000);
- if (err)
- goto out;
-
- return dev;
-out:
- ip6gre_dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
- return ERR_PTR(err);
-}
-
-static struct vport_ops ovs_erspan6_vport_ops;
-
-static struct vport *erspan6_tnl_create(const struct vport_parms *parms)
-{
- struct net *net = ovs_dp_get_net(parms->dp);
- struct net_device *dev;
- struct vport *vport;
- int err;
-
- vport = ovs_vport_alloc(0, &ovs_erspan6_vport_ops, parms);
- if (IS_ERR(vport))
- return vport;
-
- rtnl_lock();
- dev = ip6erspan_fb_dev_create(net, parms->name, NET_NAME_USER);
- if (IS_ERR(dev)) {
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_CAST(dev);
- }
-
- err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
- if (err < 0) {
- rtnl_delete_link(dev);
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_PTR(err);
- }
-
- rtnl_unlock();
- return vport;
-}
-
-static struct vport *erspan6_create(const struct vport_parms *parms)
-{
- struct vport *vport;
-
- vport = erspan6_tnl_create(parms);
- if (IS_ERR(vport))
- return vport;
-
- return ovs_netdev_link(vport, parms->name);
-}
-
-#ifndef OVS_VPORT_TYPE_IP6ERSPAN
-/* Just until integration */
-#define OVS_VPORT_TYPE_IP6ERSPAN 108
-#endif
-static struct vport_ops ovs_erspan6_vport_ops = {
- .type = OVS_VPORT_TYPE_IP6ERSPAN,
- .create = erspan6_create,
- .send = __ip6erspan_tunnel_xmit,
-#ifndef USE_UPSTREAM_TUNNEL
- .fill_metadata_dst = gre_fill_metadata_dst,
-#endif
- .destroy = ovs_netdev_tunnel_destroy,
-};
-
-struct net_device *ip6gre_fb_dev_create(struct net *net, const char *name,
- u8 name_assign_type)
-{
- struct nlattr *tb[IFLA_MAX + 1];
- struct net_device *dev;
- LIST_HEAD(list_kill);
- struct ip6_tnl *t;
- int err;
-
- memset(&tb, 0, sizeof(tb));
-
- dev = rtnl_create_link(net, (char *)name, name_assign_type,
- &ip6gre_tap_ops, tb);
- if (IS_ERR(dev))
- return dev;
-
- t = netdev_priv(dev);
- t->parms.collect_md = true;
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
- err = ip6gre_newlink(net, dev, tb, NULL, NULL);
-#else
- err = ip6gre_newlink(net, dev, tb, NULL);
-#endif
- if (err < 0) {
- free_netdev(dev);
- return ERR_PTR(err);
- }
-
- /* openvswitch users expect packet sizes to be unrestricted,
- * so set the largest MTU we can.
- */
- err = ip6_tnl_change_mtu(dev, 64000);
- if (err)
- goto out;
-
- return dev;
-out:
- ip6gre_dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
- return ERR_PTR(err);
-}
-
-static struct vport_ops ovs_ip6gre_vport_ops;
-
-static struct vport *ip6gre_tnl_create(const struct vport_parms *parms)
-{
- struct net *net = ovs_dp_get_net(parms->dp);
- struct net_device *dev;
- struct vport *vport;
- int err;
-
- vport = ovs_vport_alloc(0, &ovs_ip6gre_vport_ops, parms);
- if (IS_ERR(vport))
- return vport;
-
- rtnl_lock();
- dev = ip6gre_fb_dev_create(net, parms->name, NET_NAME_USER);
- if (IS_ERR(dev)) {
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_CAST(dev);
- }
-
- err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
- if (err < 0) {
- rtnl_delete_link(dev);
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_PTR(err);
- }
-
- rtnl_unlock();
- return vport;
-}
-
-static struct vport *ip6gre_create(const struct vport_parms *parms)
-{
- struct vport *vport;
-
- vport = ip6gre_tnl_create(parms);
- if (IS_ERR(vport))
- return vport;
-
- return ovs_netdev_link(vport, parms->name);
-}
-
-static struct vport_ops ovs_ip6gre_vport_ops = {
- .type = OVS_VPORT_TYPE_IP6GRE,
- .create = ip6gre_create,
- .send = __ip6gre_tunnel_xmit,
-#ifndef USE_UPSTREAM_TUNNEL
- .fill_metadata_dst = gre_fill_metadata_dst,
-#endif
- .destroy = ovs_netdev_tunnel_destroy,
-};
-
-
-/*
- * And now the modules code and kernel interface.
- */
-
-int rpl_ip6gre_init(void)
-{
- int err;
-
- err = register_pernet_device(&ip6gre_net_ops);
- if (err < 0) {
- if (err == -EEXIST)
- goto ip6_gre_loaded;
- else
- goto out;
- }
-
- err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE);
- if (err < 0) {
- pr_info("%s: can't add protocol\n", __func__);
- unregister_pernet_device(&ip6gre_net_ops);
- /*
- * inet6_add_protocol will return a -1 if it fails
- * to grab the pointer but the vport initialization
- * expects a return value of -EEXIST. Set err to
- * -EEXIST here to ensure proper handling.
- */
- err = -EEXIST;
- goto ip6_gre_loaded;
- }
-
- pr_info("GRE over IPv6 tunneling driver\n");
- ovs_vport_ops_register(&ovs_ip6gre_vport_ops);
- ovs_vport_ops_register(&ovs_erspan6_vport_ops);
- return err;
-
-ip6_gre_loaded:
- /* Since IPv6 GRE only allows single receiver to be registerd,
- * we skip here so only transmit works, see:
- *
- * commit f9242b6b28d61295f2bf7e8adfb1060b382e5381
- * Author: David S. Miller <davem@davemloft.net>
- * Date: Tue Jun 19 18:56:21 2012 -0700
- *
- * inet: Sanitize inet{,6} protocol demux.
- *
- * OVS GRE receive part is disabled.
- */
- pr_info("GRE TX only over IPv6 tunneling driver\n");
- ip6_gre_loaded = true;
- ovs_vport_ops_register(&ovs_ip6gre_vport_ops);
- ovs_vport_ops_register(&ovs_erspan6_vport_ops);
-out:
- return err;
-}
-
-void rpl_ip6gre_fini(void)
-{
- ovs_vport_ops_unregister(&ovs_erspan6_vport_ops);
- ovs_vport_ops_unregister(&ovs_ip6gre_vport_ops);
- if (!ip6_gre_loaded) {
- inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
- unregister_pernet_device(&ip6gre_net_ops);
- }
-}
-#endif /* USE_UPSTREAM_TUNNEL */
diff --git a/datapath/linux/compat/ip6_output.c b/datapath/linux/compat/ip6_output.c
deleted file mode 100644
index 688884275..000000000
--- a/datapath/linux/compat/ip6_output.c
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Backported from upstream commit 9ef2e965e554
- * ("ipv6: drop frames with attached skb->sk in forwarding")
- *
- * IPv6 output functions
- * Linux INET6 implementation
- *
- * Authors:
- * Pedro Roque <roque@di.fc.ul.pt>
- *
- * Based on linux/net/ipv4/ip_output.c
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Changes:
- * A.N.Kuznetsov : airthmetics in fragmentation.
- * extension headers are implemented.
- * route changes now work.
- * ip6_forward does not confuse sniffers.
- * etc.
- *
- * H. von Brand : Added missing #include <linux/string.h>
- * Imran Patel : frag id should be in NBO
- * Kazunori MIYAZAWA @USAGI
- * : add ip6_append_data and related functions
- * for datagram xmit
- */
-
-#include <linux/version.h>
-
-#ifndef HAVE_NF_IPV6_OPS_FRAGMENT
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/in6.h>
-#include <linux/tcp.h>
-#include <linux/random.h>
-#include <linux/route.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-
-#include <net/sock.h>
-#include <net/snmp.h>
-
-#include <net/ipv6.h>
-#include <net/ndisc.h>
-#include <net/protocol.h>
-#include <net/ip6_route.h>
-#include <net/addrconf.h>
-#include <net/rawv6.h>
-#include <net/icmp.h>
-#include <net/xfrm.h>
-#include <net/checksum.h>
-#include <linux/mroute6.h>
-
-#define IP_IDENTS_SZ 2048u
-
-static atomic_t *ip_idents __read_mostly;
-static u32 *ip_tstamps __read_mostly;
-
-int __init ip6_output_init(void);
-void ip6_output_exit(void);
-
-/* In order to protect privacy, we add a perturbation to identifiers
- * if one generator is seldom used. This makes hard for an attacker
- * to infer how many packets were sent between two points in time.
- */
-static u32 rpl_ip_idents_reserve(u32 hash, int segs)
-{
- u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
- atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
- u32 old = ACCESS_ONCE(*p_tstamp);
- u32 now = (u32)jiffies;
- u32 delta = 0;
-
- if (old != now && cmpxchg(p_tstamp, old, now) == old)
- delta = prandom_u32_max(now - old);
-
- return atomic_add_return(segs + delta, p_id) - segs;
-}
-
-static u32 rpl___ipv6_select_ident(struct net *net, u32 hashrnd,
- const struct in6_addr *dst,
- const struct in6_addr *src)
-{
- u32 hash, id;
-
- hash = __ipv6_addr_jhash(dst, hashrnd);
- hash = __ipv6_addr_jhash(src, hash);
- hash ^= net_hash_mix(net);
-
- /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
- * set the hight order instead thus minimizing possible future
- * collisions.
- */
- id = rpl_ip_idents_reserve(hash, 1);
- if (unlikely(!id))
- id = 1 << 31;
-
- return id;
-}
-
-static __be32 rpl_ipv6_select_ident(struct net *net,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr)
-{
- static u32 ip6_idents_hashrnd __read_mostly;
- u32 id;
-
- net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
-
- id = rpl___ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
- return htonl(id);
-}
-
-static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
-{
- to->pkt_type = from->pkt_type;
- to->priority = from->priority;
- to->protocol = from->protocol;
- skb_dst_drop(to);
- skb_dst_set(to, dst_clone(skb_dst(from)));
- to->dev = from->dev;
- to->mark = from->mark;
-
-#ifdef CONFIG_NET_SCHED
- to->tc_index = from->tc_index;
-#endif
- nf_copy(to, from);
- skb_copy_secmark(to, from);
-}
-
-#ifdef HAVE_IP_FRAGMENT_TAKES_SOCK
-#define OUTPUT(skb) output(skb->sk, skb)
-#else
-#define OUTPUT(skb) output(skb)
-#endif
-
-int ip6_fragment(struct sock *sk, struct sk_buff *skb,
- int (*output)(OVS_VPORT_OUTPUT_PARAMS))
-{
- struct sk_buff *frag;
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
- struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
- inet6_sk(skb->sk) : NULL;
- struct ipv6hdr *tmp_hdr;
- struct frag_hdr *fh;
- unsigned int mtu, hlen, left, len;
- int hroom, troom;
- __be32 frag_id;
- int ptr, offset = 0, err = 0;
- u8 *prevhdr, nexthdr = 0;
- struct net *net = dev_net(skb_dst(skb)->dev);
-
- hlen = ip6_find_1stfragopt(skb, &prevhdr);
- nexthdr = *prevhdr;
-
- mtu = ip6_skb_dst_mtu(skb);
-
- /* We must not fragment if the socket is set to force MTU discovery
- * or if the skb it not generated by a local socket.
- */
- if (unlikely(!skb->ignore_df && skb->len > mtu))
- goto fail_toobig;
-
- if (IP6CB(skb)->frag_max_size) {
- if (IP6CB(skb)->frag_max_size > mtu)
- goto fail_toobig;
-
- /* don't send fragments larger than what we received */
- mtu = IP6CB(skb)->frag_max_size;
- if (mtu < IPV6_MIN_MTU)
- mtu = IPV6_MIN_MTU;
- }
-
- if (np && np->frag_size < mtu) {
- if (np->frag_size)
- mtu = np->frag_size;
- }
- mtu -= hlen + sizeof(struct frag_hdr);
-
- frag_id = rpl_ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
- &ipv6_hdr(skb)->saddr);
-
- hroom = LL_RESERVED_SPACE(rt->dst.dev);
- if (skb_has_frag_list(skb)) {
- int first_len = skb_pagelen(skb);
- struct sk_buff *frag2;
-
- if (first_len - hlen > mtu ||
- ((first_len - hlen) & 7) ||
- skb_cloned(skb) ||
- skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
- goto slow_path;
-
- skb_walk_frags(skb, frag) {
- /* Correct geometry. */
- if (frag->len > mtu ||
- ((frag->len & 7) && frag->next) ||
- skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
- goto slow_path_clean;
-
- /* Partially cloned skb? */
- if (skb_shared(frag))
- goto slow_path_clean;
-
- BUG_ON(frag->sk);
- if (skb->sk) {
- frag->sk = skb->sk;
- frag->destructor = sock_wfree;
- }
- skb->truesize -= frag->truesize;
- }
-
- err = 0;
- offset = 0;
- /* BUILD HEADER */
-
- *prevhdr = NEXTHDR_FRAGMENT;
- tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
- if (!tmp_hdr) {
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_FRAGFAILS);
- err = -ENOMEM;
- goto fail;
- }
- frag = skb_shinfo(skb)->frag_list;
- skb_frag_list_init(skb);
-
- __skb_pull(skb, hlen);
- fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
- __skb_push(skb, hlen);
- skb_reset_network_header(skb);
- memcpy(skb_network_header(skb), tmp_hdr, hlen);
-
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->frag_off = htons(IP6_MF);
- fh->identification = frag_id;
-
- first_len = skb_pagelen(skb);
- skb->data_len = first_len - skb_headlen(skb);
- skb->len = first_len;
- ipv6_hdr(skb)->payload_len = htons(first_len -
- sizeof(struct ipv6hdr));
-
- dst_hold(&rt->dst);
-
- for (;;) {
- /* Prepare header of the next frame,
- * before previous one went down. */
- if (frag) {
- frag->ip_summed = CHECKSUM_NONE;
- skb_reset_transport_header(frag);
- fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
- __skb_push(frag, hlen);
- skb_reset_network_header(frag);
- memcpy(skb_network_header(frag), tmp_hdr,
- hlen);
- offset += skb->len - hlen - sizeof(struct frag_hdr);
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->frag_off = htons(offset);
- if (frag->next)
- fh->frag_off |= htons(IP6_MF);
- fh->identification = frag_id;
- ipv6_hdr(frag)->payload_len =
- htons(frag->len -
- sizeof(struct ipv6hdr));
- ip6_copy_metadata(frag, skb);
- }
-
- err = OUTPUT(skb);
- if (!err)
- IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
- IPSTATS_MIB_FRAGCREATES);
-
- if (err || !frag)
- break;
-
- skb = frag;
- frag = skb->next;
- skb->next = NULL;
- }
-
- kfree(tmp_hdr);
-
- if (err == 0) {
- IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
- IPSTATS_MIB_FRAGOKS);
- ip6_rt_put(rt);
- return 0;
- }
-
- kfree_skb_list(frag);
-
- IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
- IPSTATS_MIB_FRAGFAILS);
- ip6_rt_put(rt);
- return err;
-
-slow_path_clean:
- skb_walk_frags(skb, frag2) {
- if (frag2 == frag)
- break;
- frag2->sk = NULL;
- frag2->destructor = NULL;
- skb->truesize += frag2->truesize;
- }
- }
-
-slow_path:
- if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
- skb_checksum_help(skb))
- goto fail;
-
- left = skb->len - hlen; /* Space per frame */
- ptr = hlen; /* Where to start from */
-
- /*
- * Fragment the datagram.
- */
-
- *prevhdr = NEXTHDR_FRAGMENT;
- troom = rt->dst.dev->needed_tailroom;
-
- /*
- * Keep copying data until we run out.
- */
- while (left > 0) {
- len = left;
- /* IF: it doesn't fit, use 'mtu' - the data space left */
- if (len > mtu)
- len = mtu;
- /* IF: we are not sending up to and including the packet end
- then align the next start on an eight byte boundary */
- if (len < left) {
- len &= ~7;
- }
-
- /* Allocate buffer */
- frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
- hroom + troom, GFP_ATOMIC);
- if (!frag) {
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_FRAGFAILS);
- err = -ENOMEM;
- goto fail;
- }
-
- /*
- * Set up data on packet
- */
-
- ip6_copy_metadata(frag, skb);
- skb_reserve(frag, hroom);
- skb_put(frag, len + hlen + sizeof(struct frag_hdr));
- skb_reset_network_header(frag);
- fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
- frag->transport_header = (frag->network_header + hlen +
- sizeof(struct frag_hdr));
-
- /*
- * Charge the memory for the fragment to any owner
- * it might possess
- */
- if (skb->sk)
- skb_set_owner_w(frag, skb->sk);
-
- /*
- * Copy the packet header into the new buffer.
- */
- skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
-
- /*
- * Build fragment header.
- */
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->identification = frag_id;
-
- /*
- * Copy a block of the IP datagram.
- */
- BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
- len));
- left -= len;
-
- fh->frag_off = htons(offset);
- if (left > 0)
- fh->frag_off |= htons(IP6_MF);
- ipv6_hdr(frag)->payload_len = htons(frag->len -
- sizeof(struct ipv6hdr));
-
- ptr += len;
- offset += len;
-
- /*
- * Put this fragment into the sending queue.
- */
- err = OUTPUT(frag);
- if (err)
- goto fail;
-
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_FRAGCREATES);
- }
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_FRAGOKS);
- consume_skb(skb);
- return err;
-
-fail_toobig:
- if (skb->sk && dst_allfrag(skb_dst(skb)))
- sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
-
- skb->dev = skb_dst(skb)->dev;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- err = -EMSGSIZE;
-
-fail:
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_FRAGFAILS);
- kfree_skb(skb);
- return err;
-}
-#undef OUTPUT
-
-int __init ip6_output_init(void)
-{
- ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
- if (!ip_idents) {
- pr_warn("IP: failed to allocate ip_idents\n");
- goto error;
- }
-
- prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
-
- ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
- if (!ip_tstamps) {
- pr_warn("IP: failed to allocate ip_tstamps\n");
- goto error_ip_idents_free;
- }
-
- return 0;
-
-error_ip_idents_free:
- kfree(ip_idents);
-error:
- return -ENOMEM;
-}
-
-void ip6_output_exit(void)
-{
- kfree(ip_tstamps);
- kfree(ip_idents);
-}
-
-#endif /* !HAVE_NF_IPV6_OPS_FRAGMENT */
diff --git a/datapath/linux/compat/ip6_tunnel.c b/datapath/linux/compat/ip6_tunnel.c
deleted file mode 100644
index 984a51bfb..000000000
--- a/datapath/linux/compat/ip6_tunnel.c
+++ /dev/null
@@ -1,2213 +0,0 @@
-/*
- * IPv6 tunneling device
- * Linux INET6 implementation
- *
- * Authors:
- * Ville Nuorvala <vnuorval@tcs.hut.fi>
- * Yasuyuki Kozakai <kozakai@linux-ipv6.org>
- *
- * Based on:
- * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
- *
- * RFC 2473
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#ifndef USE_UPSTREAM_TUNNEL
-#include <linux/module.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/sockios.h>
-#include <linux/icmp.h>
-#include <linux/if.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/net.h>
-#include <linux/in6.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/icmpv6.h>
-#include <linux/init.h>
-#include <linux/route.h>
-#include <linux/rtnetlink.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/slab.h>
-#include <linux/hash.h>
-#include <linux/etherdevice.h>
-
-#include <linux/uaccess.h>
-#include <linux/atomic.h>
-
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/ip_tunnels.h>
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#include <net/addrconf.h>
-#include <net/ip6_tunnel.h>
-#include <net/xfrm.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include "gso.h"
-#include <net/dst_metadata.h>
-
-#include "vport-netdev.h"
-
-#define IP6_TUNNEL_HASH_SIZE_SHIFT 5
-#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT)
-
-enum {
-#ifndef HAVE_IFLA_IPTUN_ENCAP_TYPE
- IFLA_IPTUN_ENCAP_TYPE = IFLA_IPTUN_6RD_RELAY_PREFIXLEN + 1,
- IFLA_IPTUN_ENCAP_FLAGS,
- IFLA_IPTUN_ENCAP_SPORT,
- IFLA_IPTUN_ENCAP_DPORT,
-#endif
-#ifndef HAVE_IFLA_IPTUN_COLLECT_METADATA
- IFLA_IPTUN_COLLECT_METADATA = IFLA_IPTUN_ENCAP_DPORT + 1,
-#endif
-#ifndef HAVE_IFLA_IPTUN_FWMARK
- IFLA_IPTUN_FWMARK = IFLA_IPTUN_COLLECT_METADATA + 1,
-#endif
- RPL__IFLA_IPTUN_MAX = IFLA_IPTUN_FWMARK + 1,
-};
-
-#define RPL_IFLA_IPTUN_MAX RPL__IFLA_IPTUN_MAX
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
-/* Undef the one from ip_tunnels.h - we need a different one here */
-/* At least I think... */
-#undef iptunnel_handle_offloads
-/* gre_handle_offloads() has different return type on older kernsl. */
-static void gre_nop_fix(struct sk_buff *skb) { }
-
-static void gre_csum_fix(struct sk_buff *skb)
-{
- struct gre_base_hdr *greh;
- __be32 *options;
- int gre_offset = skb_transport_offset(skb);
-
- greh = (struct gre_base_hdr *)skb_transport_header(skb);
- options = ((__be32 *)greh + 1);
-
- *options = 0;
- *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset,
- skb->len - gre_offset, 0));
-}
-
-#define iptunnel_handle_offloads rpl__iptunnel_handle_offloads
-static int rpl__iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,
- int __always_unused ignored)
-{
- int type = gre_csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE;
- gso_fix_segment_t fix_segment;
-
- if (gre_csum)
- fix_segment = gre_csum_fix;
- else
- fix_segment = gre_nop_fix;
-
- return ovs_iptunnel_handle_offloads(skb, type, fix_segment);
-}
-
-#endif
-static bool log_ecn_error = true;
-
-static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
-{
- u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
-
- return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
-}
-
-static int ip6_tnl_dev_init(struct net_device *dev);
-static void ip6_tnl_dev_setup(struct net_device *dev);
-static struct rtnl_link_ops ip6_link_ops __read_mostly;
-
-static unsigned int ip6_tnl_net_id __read_mostly;
-struct ip6_tnl_net {
- /* the IPv6 tunnel fallback device */
- struct net_device *fb_tnl_dev;
- /* lists for storing tunnels in use */
- struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
- struct ip6_tnl __rcu *tnls_wc[1];
- struct ip6_tnl __rcu **tnls[2];
- struct ip6_tnl __rcu *collect_md_tun;
-};
-
-static struct net_device_stats *ip6_get_stats(struct net_device *dev)
-{
- struct pcpu_sw_netstats tmp, sum = { 0 };
- int i;
-
- for_each_possible_cpu(i) {
- unsigned int start;
- const struct pcpu_sw_netstats *tstats =
- per_cpu_ptr(dev->tstats, i);
-
- do {
- start = u64_stats_fetch_begin_irq(&tstats->syncp);
- tmp.rx_packets = tstats->rx_packets;
- tmp.rx_bytes = tstats->rx_bytes;
- tmp.tx_packets = tstats->tx_packets;
- tmp.tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
-
- sum.rx_packets += tmp.rx_packets;
- sum.rx_bytes += tmp.rx_bytes;
- sum.tx_packets += tmp.tx_packets;
- sum.tx_bytes += tmp.tx_bytes;
- }
- dev->stats.rx_packets = sum.rx_packets;
- dev->stats.rx_bytes = sum.rx_bytes;
- dev->stats.tx_packets = sum.tx_packets;
- dev->stats.tx_bytes = sum.tx_bytes;
- return &dev->stats;
-}
-
-/**
- * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
- * @remote: the address of the tunnel exit-point
- * @local: the address of the tunnel entry-point
- *
- * Return:
- * tunnel matching given end-points if found,
- * else fallback tunnel if its device is up,
- * else %NULL
- **/
-
-#define for_each_ip6_tunnel_rcu(start) \
- for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
-
-static struct ip6_tnl *
-ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
-{
- unsigned int hash = HASH(remote, local);
- struct ip6_tnl *t;
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- struct in6_addr any;
-
- for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr) &&
- (t->dev->flags & IFF_UP))
- return t;
- }
-
- memset(&any, 0, sizeof(any));
- hash = HASH(&any, local);
- for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_any(&t->parms.raddr) &&
- (t->dev->flags & IFF_UP))
- return t;
- }
-
- hash = HASH(remote, &any);
- for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(remote, &t->parms.raddr) &&
- ipv6_addr_any(&t->parms.laddr) &&
- (t->dev->flags & IFF_UP))
- return t;
- }
-
- t = rcu_dereference(ip6n->collect_md_tun);
- if (t && t->dev->flags & IFF_UP)
- return t;
-
- t = rcu_dereference(ip6n->tnls_wc[0]);
- if (t && (t->dev->flags & IFF_UP))
- return t;
-
- return NULL;
-}
-
-/**
- * ip6_tnl_bucket - get head of list matching given tunnel parameters
- * @p: parameters containing tunnel end-points
- *
- * Description:
- * ip6_tnl_bucket() returns the head of the list matching the
- * &struct in6_addr entries laddr and raddr in @p.
- *
- * Return: head of IPv6 tunnel list
- **/
-
-static struct ip6_tnl __rcu **
-ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
-{
- const struct in6_addr *remote = &p->raddr;
- const struct in6_addr *local = &p->laddr;
- unsigned int h = 0;
- int prio = 0;
-
- if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
- prio = 1;
- h = HASH(remote, local);
- }
- return &ip6n->tnls[prio][h];
-}
-
-/**
- * ip6_tnl_link - add tunnel to hash table
- * @t: tunnel to be added
- **/
-
-static void
-ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
-{
- struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
-
- if (t->parms.collect_md)
- rcu_assign_pointer(ip6n->collect_md_tun, t);
- rcu_assign_pointer(t->next , rtnl_dereference(*tp));
- rcu_assign_pointer(*tp, t);
-}
-
-/**
- * ip6_tnl_unlink - remove tunnel from hash table
- * @t: tunnel to be removed
- **/
-
-static void
-ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
-{
- struct ip6_tnl __rcu **tp;
- struct ip6_tnl *iter;
-
- if (t->parms.collect_md)
- rcu_assign_pointer(ip6n->collect_md_tun, NULL);
-
- for (tp = ip6_tnl_bucket(ip6n, &t->parms);
- (iter = rtnl_dereference(*tp)) != NULL;
- tp = &iter->next) {
- if (t == iter) {
- rcu_assign_pointer(*tp, t->next);
- break;
- }
- }
-}
-
-#ifdef HAVE_NEEDS_FREE_NETDEV
-static void ip6_dev_free(struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
-
- gro_cells_destroy(&t->gro_cells);
- dst_cache_destroy(&t->dst_cache);
- free_percpu(dev->tstats);
-}
-
-#endif
-static int ip6_tnl_create2(struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = dev_net(dev);
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- int err;
-
- t = netdev_priv(dev);
-
- dev->rtnl_link_ops = &ip6_link_ops;
- err = register_netdevice(dev);
- if (err < 0)
- goto out;
-
- strcpy(t->parms.name, dev->name);
-
- dev_hold(dev);
- ip6_tnl_link(ip6n, t);
- return 0;
-
-out:
- return err;
-}
-
-/**
- * ip6_tnl_create - create a new tunnel
- * @p: tunnel parameters
- * @pt: pointer to new tunnel
- *
- * Description:
- * Create tunnel matching given parameters.
- *
- * Return:
- * created tunnel or error pointer
- **/
-
-static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
-{
- struct net_device *dev;
- struct ip6_tnl *t;
- char name[IFNAMSIZ];
- int err = -ENOMEM;
-
- if (p->name[0])
- strlcpy(name, p->name, IFNAMSIZ);
- else
- strlcpy(name, "ovs-ip6tnl%d", IFNAMSIZ);
-
- dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
- ip6_tnl_dev_setup);
- if (!dev)
- goto failed;
-
- dev_net_set(dev, net);
-
- t = netdev_priv(dev);
- t->parms = *p;
- t->net = dev_net(dev);
- err = ip6_tnl_create2(dev);
- if (err < 0)
- goto failed_free;
-
- return t;
-
-failed_free:
- free_netdev(dev);
-failed:
- return ERR_PTR(err);
-}
-
-/**
- * ip6_tnl_locate - find or create tunnel matching given parameters
- * @p: tunnel parameters
- * @create: != 0 if allowed to create new tunnel if no match found
- *
- * Description:
- * ip6_tnl_locate() first tries to locate an existing tunnel
- * based on @parms. If this is unsuccessful, but @create is set a new
- * tunnel device is created and registered for use.
- *
- * Return:
- * matching tunnel or error pointer
- **/
-
-static struct ip6_tnl *ip6_tnl_locate(struct net *net,
- struct __ip6_tnl_parm *p, int create)
-{
- const struct in6_addr *remote = &p->raddr;
- const struct in6_addr *local = &p->laddr;
- struct ip6_tnl __rcu **tp;
- struct ip6_tnl *t;
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
-
- for (tp = ip6_tnl_bucket(ip6n, p);
- (t = rtnl_dereference(*tp)) != NULL;
- tp = &t->next) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr)) {
- if (create)
- return ERR_PTR(-EEXIST);
-
- return t;
- }
- }
- if (!create)
- return ERR_PTR(-ENODEV);
- return ip6_tnl_create(net, p);
-}
-
-/**
- * ip6_tnl_dev_uninit - tunnel device uninitializer
- * @dev: the device to be destroyed
- *
- * Description:
- * ip6_tnl_dev_uninit() removes tunnel from its list
- **/
-
-static void
-ip6_tnl_dev_uninit(struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = t->net;
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
-
- if (dev == ip6n->fb_tnl_dev)
- RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
- else
- ip6_tnl_unlink(ip6n, t);
- dst_cache_reset(&t->dst_cache);
- dev_put(dev);
-}
-
-/**
- * parse_tvl_tnl_enc_lim - handle encapsulation limit option
- * @skb: received socket buffer
- *
- * Return:
- * 0 if none was found,
- * else index to encapsulation limit
- **/
-
-__u16 rpl_ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
-{
- const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
- unsigned int nhoff = raw - skb->data;
- unsigned int off = nhoff + sizeof(*ipv6h);
- u8 next, nexthdr = ipv6h->nexthdr;
-
- while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
- struct ipv6_opt_hdr *hdr;
- u16 optlen;
-
- if (!pskb_may_pull(skb, off + sizeof(*hdr)))
- break;
-
- hdr = (struct ipv6_opt_hdr *)(skb->data + off);
- if (nexthdr == NEXTHDR_FRAGMENT) {
- struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
- if (frag_hdr->frag_off)
- break;
- optlen = 8;
- } else if (nexthdr == NEXTHDR_AUTH) {
- optlen = (hdr->hdrlen + 2) << 2;
- } else {
- optlen = ipv6_optlen(hdr);
- }
- /* cache hdr->nexthdr, since pskb_may_pull() might
- * invalidate hdr
- */
- next = hdr->nexthdr;
- if (nexthdr == NEXTHDR_DEST) {
- u16 i = 2;
-
- /* Remember : hdr is no longer valid at this point. */
- if (!pskb_may_pull(skb, off + optlen))
- break;
-
- while (1) {
- struct ipv6_tlv_tnl_enc_lim *tel;
-
- /* No more room for encapsulation limit */
- if (i + sizeof(*tel) > optlen)
- break;
-
- tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i);
- /* return index of option if found and valid */
- if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
- tel->length == 1)
- return i + off - nhoff;
- /* else jump to next option */
- if (tel->type)
- i += tel->length + 2;
- else
- i++;
- }
- }
- nexthdr = next;
- off += optlen;
- }
- return 0;
-}
-
-static int
-ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
-{
- return PACKET_REJECT;
-}
-
-static int
-ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
-{
- return PACKET_REJECT;
-}
-
-static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
- const struct ipv6hdr *ipv6h,
- struct sk_buff *skb)
-{
- __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
-
- if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
- ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
-
- return IP6_ECN_decapsulate(ipv6h, skb);
-}
-
-static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
- const struct ipv6hdr *ipv6h,
- struct sk_buff *skb)
-{
- if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
- ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
-
- return IP6_ECN_decapsulate(ipv6h, skb);
-}
-
-__u32 rpl_ip6_tnl_get_cap(struct ip6_tnl *t,
- const struct in6_addr *laddr,
- const struct in6_addr *raddr)
-{
- struct __ip6_tnl_parm *p = &t->parms;
- int ltype = ipv6_addr_type(laddr);
- int rtype = ipv6_addr_type(raddr);
- __u32 flags = 0;
-
- if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
- flags = IP6_TNL_F_CAP_PER_PACKET;
- } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
- rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
- !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
- (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
- if (ltype&IPV6_ADDR_UNICAST)
- flags |= IP6_TNL_F_CAP_XMIT;
- if (rtype&IPV6_ADDR_UNICAST)
- flags |= IP6_TNL_F_CAP_RCV;
- }
- return flags;
-}
-
-/* called with rcu_read_lock() */
-int rpl_ip6_tnl_rcv_ctl(struct ip6_tnl *t,
- const struct in6_addr *laddr,
- const struct in6_addr *raddr)
-{
- struct __ip6_tnl_parm *p = &t->parms;
- int ret = 0;
- struct net *net = t->net;
-
- if ((p->flags & IP6_TNL_F_CAP_RCV) ||
- ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
- (rpl_ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
- struct net_device *ldev = NULL;
-
- if (p->link)
- ldev = dev_get_by_index_rcu(net, p->link);
-
- if ((ipv6_addr_is_multicast(laddr) ||
- likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
- ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
- likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
- ret = 1;
- }
- return ret;
-}
-
-static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
- const struct tnl_ptk_info *tpi,
- struct metadata_dst *tun_dst,
- int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
- const struct ipv6hdr *ipv6h,
- struct sk_buff *skb),
- bool log_ecn_err)
-{
- struct pcpu_sw_netstats *tstats;
-
- if ((!(tpi->flags & TUNNEL_CSUM) &&
- (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
- ((tpi->flags & TUNNEL_CSUM) &&
- !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
- tunnel->dev->stats.rx_crc_errors++;
- tunnel->dev->stats.rx_errors++;
- goto drop;
- }
-
- if (tunnel->parms.i_flags & TUNNEL_SEQ) {
- if (!(tpi->flags & TUNNEL_SEQ) ||
- (tunnel->i_seqno &&
- (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
- tunnel->dev->stats.rx_fifo_errors++;
- tunnel->dev->stats.rx_errors++;
- goto drop;
- }
- tunnel->i_seqno = ntohl(tpi->seq) + 1;
- }
-
-#if 0
- /* Warning: All skb pointers will be invalidated! */
- if (tunnel->dev->type == ARPHRD_ETHER) {
- if (!pskb_may_pull(skb, ETH_HLEN)) {
- tunnel->dev->stats.rx_length_errors++;
- tunnel->dev->stats.rx_errors++;
- goto drop;
- }
-
- ipv6h = ipv6_hdr(skb);
- skb->protocol = eth_type_trans(skb, tunnel->dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
- } else {
- skb->dev = tunnel->dev;
- }
-
- skb_reset_network_header(skb);
- memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
-
- __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
-
- err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
- if (unlikely(err)) {
- if (log_ecn_err)
- net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n",
- &ipv6h->saddr,
- ipv6_get_dsfield(ipv6h));
- if (err > 1) {
- ++tunnel->dev->stats.rx_frame_errors;
- ++tunnel->dev->stats.rx_errors;
- goto drop;
- }
- }
-
-#endif
- tstats = this_cpu_ptr(tunnel->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
-
- skb_reset_mac_header(skb);
- skb_scrub_packet(skb, false);
- skb->protocol = eth_type_trans(skb, tunnel->dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
-
- ovs_skb_dst_set(skb, (struct dst_entry *)tun_dst);
- netdev_port_receive(skb, &tun_dst->u.tun_info);
- return 0;
-
-drop:
- /* In OVS case caller will free tun_dst and skb */
-#if 0
- if (tun_dst)
- dst_release((struct dst_entry *)tun_dst);
- kfree_skb(skb);
-#endif
- return 0;
-}
-
-int rpl_ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
- const struct tnl_ptk_info *tpi,
- struct metadata_dst *tun_dst,
- bool log_ecn_err)
-{
- return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate,
- log_ecn_err);
-}
-
-static const struct tnl_ptk_info tpi_v6 = {
- /* no tunnel info required for ipxip6. */
- .proto = htons(ETH_P_IPV6),
-};
-
-static const struct tnl_ptk_info tpi_v4 = {
- /* no tunnel info required for ipxip6. */
- .proto = htons(ETH_P_IP),
-};
-
-static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
- const struct tnl_ptk_info *tpi,
- int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
- const struct ipv6hdr *ipv6h,
- struct sk_buff *skb))
-{
- struct ip6_tnl *t;
- const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- struct metadata_dst *tun_dst = NULL;
- int ret = -1;
-
- rcu_read_lock();
- t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
-
- if (t) {
- u8 tproto = READ_ONCE(t->parms.proto);
-
- if (tproto != ipproto && tproto != 0)
- goto drop;
- if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
- goto drop;
- if (!rpl_ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr))
- goto drop;
- if (iptunnel_pull_header(skb, 0, tpi->proto, false))
- goto drop;
- if (t->parms.collect_md) {
- ovs_ipv6_tun_rx_dst(tun_dst, skb, 0, 0, 0);
- if (!tun_dst)
- goto drop;
- }
- ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
- log_ecn_error);
- }
-
- rcu_read_unlock();
-
- return ret;
-
-drop:
- rcu_read_unlock();
- kfree_skb(skb);
- return 0;
-}
-
-static int ip4ip6_rcv(struct sk_buff *skb)
-{
- return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4,
- ip4ip6_dscp_ecn_decapsulate);
-}
-
-static int ip6ip6_rcv(struct sk_buff *skb)
-{
- return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6,
- ip6ip6_dscp_ecn_decapsulate);
-}
-
-struct ipv6_tel_txoption {
- struct ipv6_txoptions ops;
- __u8 dst_opt[8];
-};
-
-static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
-{
- memset(opt, 0, sizeof(struct ipv6_tel_txoption));
-
- opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
- opt->dst_opt[3] = 1;
- opt->dst_opt[4] = encap_limit;
- opt->dst_opt[5] = IPV6_TLV_PADN;
- opt->dst_opt[6] = 1;
-
- opt->ops.dst1opt = (struct ipv6_opt_hdr *) opt->dst_opt;
- opt->ops.opt_nflen = 8;
-}
-
-/**
- * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
- * @t: the outgoing tunnel device
- * @hdr: IPv6 header from the incoming packet
- *
- * Description:
- * Avoid trivial tunneling loop by checking that tunnel exit-point
- * doesn't match source of incoming packet.
- *
- * Return:
- * 1 if conflict,
- * 0 else
- **/
-
-static inline bool
-ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
-{
- return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
-}
-
-int rpl_ip6_tnl_xmit_ctl(struct ip6_tnl *t,
- const struct in6_addr *laddr,
- const struct in6_addr *raddr)
-{
- struct __ip6_tnl_parm *p = &t->parms;
- int ret = 0;
- struct net *net = t->net;
-
- if (t->parms.collect_md)
- return 1;
-
- if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
- ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
- (rpl_ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
- struct net_device *ldev = NULL;
-
- rcu_read_lock();
- if (p->link)
- ldev = dev_get_by_index_rcu(net, p->link);
-
- if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
- pr_warn("%s xmit: Local address not yet configured!\n",
- p->name);
- else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
- !ipv6_addr_is_multicast(raddr) &&
- unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
- pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
- p->name);
- else
- ret = 1;
- rcu_read_unlock();
- }
- return ret;
-}
-
-static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto,
- u8 type, struct ipv6_opt_hdr *opt)
-{
- struct ipv6_opt_hdr *h =
- (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt));
-
- memcpy(h, opt, ipv6_optlen(opt));
- h->nexthdr = *proto;
- *proto = type;
-}
-
-void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
- u8 *proto)
-{
- if (opt->dst1opt)
- ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt);
-}
-
-/**
- * ip6_tnl_xmit - encapsulate packet and send
- * @skb: the outgoing socket buffer
- * @dev: the outgoing tunnel device
- * @dsfield: dscp code for outer header
- * @fl6: flow of tunneled packet
- * @encap_limit: encapsulation limit
- * @pmtu: Path MTU is stored if packet is too big
- * @proto: next header value
- *
- * Description:
- * Build new header and do some sanity checks on the packet before sending
- * it.
- *
- * Return:
- * 0 on success
- * -1 fail
- * %-EMSGSIZE message too big. return mtu in this case.
- **/
-
-int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
- struct flowi6 *fl6, int encap_limit, __u32 *pmtu,
- __u8 proto)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = t->net;
- struct net_device_stats *stats = &t->dev->stats;
- struct ipv6hdr *ipv6h;
- struct ipv6_tel_txoption opt;
- struct dst_entry *dst = NULL, *ndst = NULL;
- struct net_device *tdev;
- int mtu;
- unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0;
- unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
- unsigned int max_headroom = psh_hlen;
- bool use_cache = false;
- u8 hop_limit;
- int err = -1;
-
- if (t->parms.collect_md) {
- hop_limit = skb_tunnel_info(skb)->key.ttl;
- goto route_lookup;
- } else {
- hop_limit = t->parms.hop_limit;
- }
-
- /* NBMA tunnel */
- if (ipv6_addr_any(&t->parms.raddr)) {
- if (skb->protocol == htons(ETH_P_IPV6)) {
- struct in6_addr *addr6;
- struct neighbour *neigh;
- int addr_type;
-
- if (!skb_dst(skb))
- goto tx_err_link_failure;
-
- neigh = dst_neigh_lookup(skb_dst(skb),
- &ipv6_hdr(skb)->daddr);
- if (!neigh)
- goto tx_err_link_failure;
-
- addr6 = (struct in6_addr *)&neigh->primary_key;
- addr_type = ipv6_addr_type(addr6);
-
- if (addr_type == IPV6_ADDR_ANY)
- addr6 = &ipv6_hdr(skb)->daddr;
-
- memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
- neigh_release(neigh);
- }
- } else if (t->parms.proto != 0 && !(t->parms.flags &
- (IP6_TNL_F_USE_ORIG_TCLASS |
- IP6_TNL_F_USE_ORIG_FWMARK))) {
- /* enable the cache only if neither the outer protocol nor the
- * routing decision depends on the current inner header value
- */
- use_cache = true;
- }
-
- if (use_cache)
- dst = dst_cache_get(&t->dst_cache);
-
- if (!rpl_ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
- goto tx_err_link_failure;
-
- if (!dst) {
-route_lookup:
- /* add dsfield to flowlabel for route lookup */
- fl6->flowlabel = ip6_make_flowinfo(dsfield, fl6->flowlabel);
-
- dst = ip6_route_output(net, NULL, fl6);
-
- if (dst->error)
- goto tx_err_link_failure;
- dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
- dst = NULL;
- goto tx_err_link_failure;
- }
- if (t->parms.collect_md &&
- ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
- &fl6->daddr, 0, &fl6->saddr))
- goto tx_err_link_failure;
- ndst = dst;
- }
-
- tdev = dst->dev;
-
- if (tdev == dev) {
- stats->collisions++;
- net_warn_ratelimited("%s: Local routing loop detected!\n",
- t->parms.name);
- goto tx_err_dst_release;
- }
- mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen;
- if (encap_limit >= 0) {
- max_headroom += 8;
- mtu -= 8;
- }
- if (skb->protocol == htons(ETH_P_IPV6)) {
- if (mtu < IPV6_MIN_MTU)
- mtu = IPV6_MIN_MTU;
- } else if (mtu < 576) {
- mtu = 576;
- }
-
-// FIX ME
-// skb_dst_update_pmtu(skb, mtu);
- if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
- *pmtu = mtu;
- err = -EMSGSIZE;
- goto tx_err_dst_release;
- }
-
- if (t->err_count > 0) {
- if (time_before(jiffies,
- t->err_time + IP6TUNNEL_ERR_TIMEO)) {
- t->err_count--;
-
- dst_link_failure(skb);
- } else {
- t->err_count = 0;
- }
- }
-
- skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
-
- /*
- * Okay, now see if we can stuff it in the buffer as-is.
- */
- max_headroom += LL_RESERVED_SPACE(tdev);
-
- if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
- (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
- struct sk_buff *new_skb;
-
- new_skb = skb_realloc_headroom(skb, max_headroom);
- if (!new_skb)
- goto tx_err_dst_release;
-
- if (skb->sk)
- skb_set_owner_w(new_skb, skb->sk);
- consume_skb(skb);
- skb = new_skb;
- }
-
- if (t->parms.collect_md) {
- if (t->encap.type != TUNNEL_ENCAP_NONE)
- goto tx_err_dst_release;
- } else {
- if (use_cache && ndst)
- dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
- }
- skb_dst_set(skb, dst);
-
- if (encap_limit >= 0) {
- init_tel_txopt(&opt, encap_limit);
- ipv6_push_frag_opts(skb, &opt.ops, &proto);
- }
- hop_limit = hop_limit ? : ip6_dst_hoplimit(dst);
-
- /* Calculate max headroom for all the headers and adjust
- * needed_headroom if necessary.
- */
- max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
- + dst->header_len + t->hlen;
- if (max_headroom > dev->needed_headroom)
- dev->needed_headroom = max_headroom;
-
- err = ip6_tnl_encap(skb, t, &proto, fl6);
- if (err)
- return err;
-
- skb_push(skb, sizeof(struct ipv6hdr));
- skb_reset_network_header(skb);
- ipv6h = ipv6_hdr(skb);
- ip6_flow_hdr(ipv6h, dsfield,
- ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
- ipv6h->hop_limit = hop_limit;
- ipv6h->nexthdr = proto;
- ipv6h->saddr = fl6->saddr;
- ipv6h->daddr = fl6->daddr;
- ip6tunnel_xmit(NULL, skb, dev);
- return 0;
-tx_err_link_failure:
- stats->tx_carrier_errors++;
- dst_link_failure(skb);
-tx_err_dst_release:
- dst_release(dst);
- return err;
-}
-EXPORT_SYMBOL(ip6_tnl_xmit);
-
-static inline int
-ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- const struct iphdr *iph = ip_hdr(skb);
- int encap_limit = -1;
- struct flowi6 fl6;
- __u8 dsfield;
- __u32 mtu;
- u8 tproto;
- int err;
-
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-
- tproto = READ_ONCE(t->parms.proto);
- if (tproto != IPPROTO_IPIP && tproto != 0)
- return -1;
-
- if (t->parms.collect_md) {
- struct ip_tunnel_info *tun_info;
- const struct ip_tunnel_key *key;
-
- tun_info = skb_tunnel_info(skb);
- if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
- ip_tunnel_info_af(tun_info) != AF_INET6))
- return -1;
- key = &tun_info->key;
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPIP;
- fl6.daddr = key->u.ipv6.dst;
- fl6.flowlabel = key->label;
- dsfield = key->tos;
- } else {
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
-
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPIP;
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- dsfield = ipv4_get_dsfield(iph);
- else
- dsfield = ip6_tclass(t->parms.flowinfo);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6.flowi6_mark = skb->mark;
- else
- fl6.flowi6_mark = t->parms.fwmark;
- }
-
-// FIX ME
-// fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
-
- if (iptunnel_handle_offloads(skb, true, SKB_GSO_IPXIP6))
- return -1;
-
- dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
-
- skb_set_inner_ipproto(skb, IPPROTO_IPIP);
-
- err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
- IPPROTO_IPIP);
- if (err != 0) {
- /* XXX: send ICMP error even if DF is not set. */
- if (err == -EMSGSIZE)
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
- return -1;
- }
-
- return 0;
-}
-
-static inline int
-ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- int encap_limit = -1;
- __u16 offset;
- struct flowi6 fl6;
- __u8 dsfield;
- __u32 mtu;
- u8 tproto;
- int err;
-
- tproto = READ_ONCE(t->parms.proto);
- if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
- ip6_tnl_addr_conflict(t, ipv6h))
- return -1;
-
- if (t->parms.collect_md) {
- struct ip_tunnel_info *tun_info;
- const struct ip_tunnel_key *key;
-
- tun_info = skb_tunnel_info(skb);
- if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
- ip_tunnel_info_af(tun_info) != AF_INET6))
- return -1;
- key = &tun_info->key;
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPV6;
- fl6.daddr = key->u.ipv6.dst;
- fl6.flowlabel = key->label;
- dsfield = key->tos;
- } else {
- offset = rpl_ip6_tnl_parse_tlv_enc_lim(skb,
- skb_network_header(skb));
- /*
- * ip6_tnl_parse_tlv_enc_lim() might
- * have reallocated skb->head
- */
- ipv6h = ipv6_hdr(skb);
- if (offset > 0) {
- struct ipv6_tlv_tnl_enc_lim *tel;
-
- tel = (void *)&skb_network_header(skb)[offset];
- if (tel->encap_limit == 0) {
- icmpv6_send(skb, ICMPV6_PARAMPROB,
- ICMPV6_HDR_FIELD, offset + 2);
- return -1;
- }
- encap_limit = tel->encap_limit - 1;
- } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
- encap_limit = t->parms.encap_limit;
- }
-
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPV6;
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- dsfield = ipv6_get_dsfield(ipv6h);
- else
- dsfield = ip6_tclass(t->parms.flowinfo);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
- fl6.flowlabel |= ip6_flowlabel(ipv6h);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6.flowi6_mark = skb->mark;
- else
- fl6.flowi6_mark = t->parms.fwmark;
- }
-
-// FIX ME
-// fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
-
- if (iptunnel_handle_offloads(skb, true, SKB_GSO_IPXIP6))
- return -1;
-
- dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
-
- skb_set_inner_ipproto(skb, IPPROTO_IPV6);
-
- err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
- IPPROTO_IPV6);
- if (err != 0) {
- if (err == -EMSGSIZE)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- return -1;
- }
-
- return 0;
-}
-
-static netdev_tx_t
-ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct net_device_stats *stats = &t->dev->stats;
- int ret;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- ret = ip4ip6_tnl_xmit(skb, dev);
- break;
- case htons(ETH_P_IPV6):
- ret = ip6ip6_tnl_xmit(skb, dev);
- break;
- default:
- goto tx_err;
- }
-
- if (ret < 0)
- goto tx_err;
-
- return NETDEV_TX_OK;
-
-tx_err:
- stats->tx_errors++;
- stats->tx_dropped++;
- kfree_skb(skb);
- return NETDEV_TX_OK;
-}
-
-static void ip6_tnl_link_config(struct ip6_tnl *t)
-{
- struct net_device *dev = t->dev;
- struct __ip6_tnl_parm *p = &t->parms;
- struct flowi6 *fl6 = &t->fl.u.ip6;
- int t_hlen;
-
- memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
- memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
-
- /* Set up flowi template */
- fl6->saddr = p->laddr;
- fl6->daddr = p->raddr;
- fl6->flowi6_oif = p->link;
- fl6->flowlabel = 0;
-
- if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
- fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
- if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
- fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
-
- p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
- p->flags |= rpl_ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
-
- if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
- dev->flags |= IFF_POINTOPOINT;
- else
- dev->flags &= ~IFF_POINTOPOINT;
-
- t->tun_hlen = 0;
- t->hlen = t->encap_hlen + t->tun_hlen;
- t_hlen = t->hlen + sizeof(struct ipv6hdr);
-
- if (p->flags & IP6_TNL_F_CAP_XMIT) {
- int strict = (ipv6_addr_type(&p->raddr) &
- (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
-
- struct rt6_info *rt = rt6_lookup(t->net,
- &p->raddr, &p->laddr,
- p->link, strict);
-
- if (!rt)
- return;
-
- if (rt->dst.dev) {
- dev->hard_header_len = rt->dst.dev->hard_header_len +
- t_hlen;
-
- dev->mtu = rt->dst.dev->mtu - t_hlen;
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
-
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
- }
- ip6_rt_put(rt);
- }
-}
-
-/**
- * ip6_tnl_change - update the tunnel parameters
- * @t: tunnel to be changed
- * @p: tunnel configuration parameters
- *
- * Description:
- * ip6_tnl_change() updates the tunnel parameters
- **/
-
-static int
-ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
-{
- t->parms.laddr = p->laddr;
- t->parms.raddr = p->raddr;
- t->parms.flags = p->flags;
- t->parms.hop_limit = p->hop_limit;
- t->parms.encap_limit = p->encap_limit;
- t->parms.flowinfo = p->flowinfo;
- t->parms.link = p->link;
- t->parms.proto = p->proto;
- t->parms.fwmark = p->fwmark;
- dst_cache_reset(&t->dst_cache);
- ip6_tnl_link_config(t);
- return 0;
-}
-
-static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
-{
- struct net *net = t->net;
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- int err;
-
- ip6_tnl_unlink(ip6n, t);
- synchronize_net();
- err = ip6_tnl_change(t, p);
- ip6_tnl_link(ip6n, t);
- netdev_state_change(t->dev);
- return err;
-}
-
-static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
-{
- /* for default tnl0 device allow to change only the proto */
- t->parms.proto = p->proto;
- netdev_state_change(t->dev);
- return 0;
-}
-
-static void
-ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
-{
- p->laddr = u->laddr;
- p->raddr = u->raddr;
- p->flags = u->flags;
- p->hop_limit = u->hop_limit;
- p->encap_limit = u->encap_limit;
- p->flowinfo = u->flowinfo;
- p->link = u->link;
- p->proto = u->proto;
- memcpy(p->name, u->name, sizeof(u->name));
-}
-
-static void
-ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
-{
- u->laddr = p->laddr;
- u->raddr = p->raddr;
- u->flags = p->flags;
- u->hop_limit = p->hop_limit;
- u->encap_limit = p->encap_limit;
- u->flowinfo = p->flowinfo;
- u->link = p->link;
- u->proto = p->proto;
- memcpy(u->name, p->name, sizeof(u->name));
-}
-
-/**
- * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
- * @dev: virtual device associated with tunnel
- * @ifr: parameters passed from userspace
- * @cmd: command to be performed
- *
- * Description:
- * ip6_tnl_ioctl() is used for managing IPv6 tunnels
- * from userspace.
- *
- * The possible commands are the following:
- * %SIOCGETTUNNEL: get tunnel parameters for device
- * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
- * %SIOCCHGTUNNEL: change tunnel parameters to those given
- * %SIOCDELTUNNEL: delete tunnel
- *
- * The fallback device "ovs-ip6tnl0", created during module
- * initialization, can be used for creating other tunnel devices.
- *
- * Return:
- * 0 on success,
- * %-EFAULT if unable to copy data to or from userspace,
- * %-EPERM if current process hasn't %CAP_NET_ADMIN set
- * %-EINVAL if passed tunnel parameters are invalid,
- * %-EEXIST if changing a tunnel's parameters would cause a conflict
- * %-ENODEV if attempting to change or delete a nonexisting device
- **/
-
-static int
-ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
- int err = 0;
- struct ip6_tnl_parm p;
- struct __ip6_tnl_parm p1;
- struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = t->net;
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
-
- memset(&p1, 0, sizeof(p1));
-
- switch (cmd) {
- case SIOCGETTUNNEL:
- if (dev == ip6n->fb_tnl_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
- err = -EFAULT;
- break;
- }
- ip6_tnl_parm_from_user(&p1, &p);
- t = ip6_tnl_locate(net, &p1, 0);
- if (IS_ERR(t))
- t = netdev_priv(dev);
- } else {
- memset(&p, 0, sizeof(p));
- }
- ip6_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) {
- err = -EFAULT;
- }
- break;
- case SIOCADDTUNNEL:
- case SIOCCHGTUNNEL:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- break;
- err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- break;
- err = -EINVAL;
- if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
- p.proto != 0)
- break;
- ip6_tnl_parm_from_user(&p1, &p);
- t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
- if (cmd == SIOCCHGTUNNEL) {
- if (!IS_ERR(t)) {
- if (t->dev != dev) {
- err = -EEXIST;
- break;
- }
- } else
- t = netdev_priv(dev);
- if (dev == ip6n->fb_tnl_dev)
- err = ip6_tnl0_update(t, &p1);
- else
- err = ip6_tnl_update(t, &p1);
- }
- if (!IS_ERR(t)) {
- err = 0;
- ip6_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
- err = -EFAULT;
-
- } else {
- err = PTR_ERR(t);
- }
- break;
- case SIOCDELTUNNEL:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- break;
-
- if (dev == ip6n->fb_tnl_dev) {
- err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- break;
- err = -ENOENT;
- ip6_tnl_parm_from_user(&p1, &p);
- t = ip6_tnl_locate(net, &p1, 0);
- if (IS_ERR(t))
- break;
- err = -EPERM;
- if (t->dev == ip6n->fb_tnl_dev)
- break;
- dev = t->dev;
- }
- err = 0;
- unregister_netdevice(dev);
- break;
- default:
- err = -EINVAL;
- }
- return err;
-}
-
-/**
- * ip6_tnl_change_mtu - change mtu manually for tunnel device
- * @dev: virtual device associated with tunnel
- * @new_mtu: the new mtu
- *
- * Return:
- * 0 on success,
- * %-EINVAL if mtu too small
- **/
-
-int rpl_ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
-{
- struct ip6_tnl *tnl = netdev_priv(dev);
-
- if (tnl->parms.proto == IPPROTO_IPV6) {
- if (new_mtu < IPV6_MIN_MTU)
- return -EINVAL;
- } else {
- if (new_mtu < ETH_MIN_MTU)
- return -EINVAL;
- }
- if (new_mtu > 0xFFF8 - dev->hard_header_len)
- return -EINVAL;
- dev->mtu = new_mtu;
- return 0;
-}
-
-#ifdef HAVE_NDO_GET_IFLINK
-int rpl_ip6_tnl_get_iflink(const struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
-
- return t->parms.link;
-}
-
-#endif
-const struct ip6_tnl_encap_ops __rcu *
- rpl_ip6tun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
-
-int rpl_ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops,
- unsigned int num)
-{
- if (num >= MAX_IPTUN_ENCAP_OPS)
- return -ERANGE;
-
- return !cmpxchg((const struct ip6_tnl_encap_ops **)
- &rpl_ip6tun_encaps[num],
- NULL, ops) ? 0 : -1;
-}
-
-int rpl_ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops,
- unsigned int num)
-{
- int ret;
-
- if (num >= MAX_IPTUN_ENCAP_OPS)
- return -ERANGE;
-
- ret = (cmpxchg((const struct ip6_tnl_encap_ops **)
- &rpl_ip6tun_encaps[num],
- ops, NULL) == ops) ? 0 : -1;
-
- synchronize_net();
-
- return ret;
-}
-
-int rpl_ip6_tnl_encap_setup(struct ip6_tnl *t,
- struct ip_tunnel_encap *ipencap)
-{
- int hlen;
-
- memset(&t->encap, 0, sizeof(t->encap));
-
- hlen = ip6_encap_hlen(ipencap);
- if (hlen < 0)
- return hlen;
-
- t->encap.type = ipencap->type;
- t->encap.sport = ipencap->sport;
- t->encap.dport = ipencap->dport;
- t->encap.flags = ipencap->flags;
-
- t->encap_hlen = hlen;
- t->hlen = t->encap_hlen + t->tun_hlen;
-
- return 0;
-}
-
-static const struct net_device_ops ip6_tnl_netdev_ops = {
- .ndo_init = ip6_tnl_dev_init,
- .ndo_uninit = ip6_tnl_dev_uninit,
- .ndo_start_xmit = ip6_tnl_start_xmit,
- .ndo_do_ioctl = ip6_tnl_ioctl,
-#ifdef HAVE_RHEL7_MAX_MTU
- .ndo_size = sizeof(struct net_device_ops),
- .extended.ndo_change_mtu = ip6_tnl_change_mtu,
-#else
- .ndo_change_mtu = ip6_tnl_change_mtu,
-#endif
- .ndo_get_stats = ip6_get_stats,
-#ifdef HAVE_NDO_GET_IFLINK
- .ndo_get_iflink = ip6_tnl_get_iflink,
-#endif
-};
-
-#define IPXIPX_FEATURES (NETIF_F_SG | \
- NETIF_F_FRAGLIST | \
- NETIF_F_HIGHDMA | \
- NETIF_F_GSO_SOFTWARE | \
- NETIF_F_HW_CSUM)
-
-/**
- * ip6_tnl_dev_setup - setup virtual tunnel device
- * @dev: virtual device associated with tunnel
- *
- * Description:
- * Initialize function pointers and device parameters
- **/
-
-static void ip6_tnl_dev_setup(struct net_device *dev)
-{
- dev->netdev_ops = &ip6_tnl_netdev_ops;
-#ifndef HAVE_NEEDS_FREE_NETDEV
- dev->destructor = free_netdev;
-#else
- dev->needs_free_netdev = true;
- dev->priv_destructor = ip6_dev_free;
-#endif
-
- dev->type = ARPHRD_TUNNEL6;
- dev->flags |= IFF_NOARP;
- dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_LLTX;
- netif_keep_dst(dev);
-
- dev->features |= IPXIPX_FEATURES;
- dev->hw_features |= IPXIPX_FEATURES;
-
- /* This perm addr will be used as interface identifier by IPv6 */
- dev->addr_assign_type = NET_ADDR_RANDOM;
- eth_random_addr(dev->perm_addr);
-}
-
-
-/**
- * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
- * @dev: virtual device associated with tunnel
- **/
-
-static inline int
-ip6_tnl_dev_init_gen(struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- int ret;
- int t_hlen;
-
- t->dev = dev;
- t->net = dev_net(dev);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
- if (ret)
- goto free_stats;
-
- ret = gro_cells_init(&t->gro_cells, dev);
- if (ret)
- goto destroy_dst;
-
- t->tun_hlen = 0;
- t->hlen = t->encap_hlen + t->tun_hlen;
- t_hlen = t->hlen + sizeof(struct ipv6hdr);
-
- dev->type = ARPHRD_TUNNEL6;
- dev->hard_header_len = LL_MAX_HEADER + t_hlen;
- dev->mtu = ETH_DATA_LEN - t_hlen;
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
-#ifdef HAVE_NET_DEVICE_MAX_MTU
- dev->min_mtu = ETH_MIN_MTU;
- dev->max_mtu = 0xFFF8 - dev->hard_header_len;
-#endif
-
- return 0;
-
-destroy_dst:
- dst_cache_destroy(&t->dst_cache);
-free_stats:
- free_percpu(dev->tstats);
- dev->tstats = NULL;
-
- return ret;
-}
-
-/**
- * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
- * @dev: virtual device associated with tunnel
- **/
-
-static int ip6_tnl_dev_init(struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- int err = ip6_tnl_dev_init_gen(dev);
-
- if (err)
- return err;
- ip6_tnl_link_config(t);
- if (t->parms.collect_md) {
- dev->features |= NETIF_F_NETNS_LOCAL;
- netif_keep_dst(dev);
- }
- return 0;
-}
-
-/**
- * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
- * @dev: fallback device
- *
- * Return: 0
- **/
-
-static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = dev_net(dev);
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
-
- t->parms.proto = IPPROTO_IPV6;
- dev_hold(dev);
-
- rcu_assign_pointer(ip6n->tnls_wc[0], t);
- return 0;
-}
-
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
-static int rpl_ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int rpl_ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- u8 proto;
-
- if (!data || !data[IFLA_IPTUN_PROTO])
- return 0;
-
- proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
- if (proto != IPPROTO_IPV6 &&
- proto != IPPROTO_IPIP &&
- proto != 0)
- return -EINVAL;
-
- return 0;
-}
-#define ip6_tnl_validate rpl_ip6_tnl_validate
-
-static void ip6_tnl_netlink_parms(struct nlattr *data[],
- struct __ip6_tnl_parm *parms)
-{
- memset(parms, 0, sizeof(*parms));
-
- if (!data)
- return;
-
- if (data[IFLA_IPTUN_LINK])
- parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
-
- if (data[IFLA_IPTUN_LOCAL])
- parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]);
-
- if (data[IFLA_IPTUN_REMOTE])
- parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]);
-
- if (data[IFLA_IPTUN_TTL])
- parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);
-
- if (data[IFLA_IPTUN_ENCAP_LIMIT])
- parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);
-
- if (data[IFLA_IPTUN_FLOWINFO])
- parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
-
- if (data[IFLA_IPTUN_FLAGS])
- parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);
-
- if (data[IFLA_IPTUN_PROTO])
- parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
-
- if (data[IFLA_IPTUN_COLLECT_METADATA])
- parms->collect_md = true;
-
- if (data[IFLA_IPTUN_FWMARK])
- parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
-}
-
-static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
- struct ip_tunnel_encap *ipencap)
-{
- bool ret = false;
-
- memset(ipencap, 0, sizeof(*ipencap));
-
- if (!data)
- return ret;
-
- if (data[IFLA_IPTUN_ENCAP_TYPE]) {
- ret = true;
- ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
- }
-
- if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
- ret = true;
- ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
- }
-
- if (data[IFLA_IPTUN_ENCAP_SPORT]) {
- ret = true;
- ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
- }
-
- if (data[IFLA_IPTUN_ENCAP_DPORT]) {
- ret = true;
- ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
- }
-
- return ret;
-}
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
-static int rpl_ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int rpl_ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- struct net *net = dev_net(dev);
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- struct ip_tunnel_encap ipencap;
- struct ip6_tnl *nt, *t;
- int err;
-
- nt = netdev_priv(dev);
-
- if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
- err = ip6_tnl_encap_setup(nt, &ipencap);
- if (err < 0)
- return err;
- }
-
- ip6_tnl_netlink_parms(data, &nt->parms);
-
- if (nt->parms.collect_md) {
- if (rtnl_dereference(ip6n->collect_md_tun))
- return -EEXIST;
- } else {
- t = ip6_tnl_locate(net, &nt->parms, 0);
- if (!IS_ERR(t))
- return -EEXIST;
- }
-
- err = ip6_tnl_create2(dev);
- if (!err && tb[IFLA_MTU])
- ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
-
- return err;
-}
-#define ip6_tnl_newlink rpl_ip6_tnl_newlink
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
-static int rpl_ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
- struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int rpl_ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
- struct nlattr *data[])
-#endif
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct __ip6_tnl_parm p;
- struct net *net = t->net;
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- struct ip_tunnel_encap ipencap;
-
- if (dev == ip6n->fb_tnl_dev)
- return -EINVAL;
-
- if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
- int err = ip6_tnl_encap_setup(t, &ipencap);
-
- if (err < 0)
- return err;
- }
- ip6_tnl_netlink_parms(data, &p);
- if (p.collect_md)
- return -EINVAL;
-
- t = ip6_tnl_locate(net, &p, 0);
- if (!IS_ERR(t)) {
- if (t->dev != dev)
- return -EEXIST;
- } else
- t = netdev_priv(dev);
-
- return ip6_tnl_update(t, &p);
-}
-#define ip6_tnl_changelink rpl_ip6_tnl_changelink
-
-static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head)
-{
- struct net *net = dev_net(dev);
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
-
- if (dev != ip6n->fb_tnl_dev)
- unregister_netdevice_queue(dev, head);
-}
-
-static size_t ip6_tnl_get_size(const struct net_device *dev)
-{
- return
- /* IFLA_IPTUN_LINK */
- nla_total_size(4) +
- /* IFLA_IPTUN_LOCAL */
- nla_total_size(sizeof(struct in6_addr)) +
- /* IFLA_IPTUN_REMOTE */
- nla_total_size(sizeof(struct in6_addr)) +
- /* IFLA_IPTUN_TTL */
- nla_total_size(1) +
- /* IFLA_IPTUN_ENCAP_LIMIT */
- nla_total_size(1) +
- /* IFLA_IPTUN_FLOWINFO */
- nla_total_size(4) +
- /* IFLA_IPTUN_FLAGS */
- nla_total_size(4) +
- /* IFLA_IPTUN_PROTO */
- nla_total_size(1) +
- /* IFLA_IPTUN_ENCAP_TYPE */
- nla_total_size(2) +
- /* IFLA_IPTUN_ENCAP_FLAGS */
- nla_total_size(2) +
- /* IFLA_IPTUN_ENCAP_SPORT */
- nla_total_size(2) +
- /* IFLA_IPTUN_ENCAP_DPORT */
- nla_total_size(2) +
- /* IFLA_IPTUN_COLLECT_METADATA */
- nla_total_size(0) +
- /* IFLA_IPTUN_FWMARK */
- nla_total_size(4) +
- 0;
-}
-
-static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
-{
- struct ip6_tnl *tunnel = netdev_priv(dev);
- struct __ip6_tnl_parm *parm = &tunnel->parms;
-
- if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
- nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) ||
- nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) ||
- nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
- nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
- nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
- nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
- nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) ||
- nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark))
- goto nla_put_failure;
-
- if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) ||
- nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) ||
- nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) ||
- nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags))
- goto nla_put_failure;
-
- if (parm->collect_md)
- if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-#ifdef HAVE_GET_LINK_NET
-struct net *rpl_ip6_tnl_get_link_net(const struct net_device *dev)
-{
- struct ip6_tnl *tunnel = netdev_priv(dev);
-
- return tunnel->net;
-}
-
-#endif
-static const struct nla_policy ip6_tnl_policy[RPL_IFLA_IPTUN_MAX + 1] = {
- [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
- [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) },
- [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) },
- [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
- [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 },
- [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 },
- [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 },
- [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
- [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
- [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
- [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
- [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
- [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
- [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
-};
-
-static struct rtnl_link_ops ip6_link_ops __read_mostly = {
- .kind = "ip6tnl",
- .maxtype = RPL_IFLA_IPTUN_MAX,
- .policy = ip6_tnl_policy,
- .priv_size = sizeof(struct ip6_tnl),
- .setup = ip6_tnl_dev_setup,
- .validate = ip6_tnl_validate,
- .newlink = ip6_tnl_newlink,
- .changelink = ip6_tnl_changelink,
- .dellink = ip6_tnl_dellink,
- .get_size = ip6_tnl_get_size,
- .fill_info = ip6_tnl_fill_info,
-#ifdef HAVE_GET_LINK_NET
- .get_link_net = ip6_tnl_get_link_net,
-#endif
-};
-
-static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
- .handler = ip4ip6_rcv,
- .err_handler = ip4ip6_err,
- .priority = 1,
-};
-
-static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
- .handler = ip6ip6_rcv,
- .err_handler = ip6ip6_err,
- .priority = 1,
-};
-
-static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
-{
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- struct net_device *dev, *aux;
- int h;
- struct ip6_tnl *t;
-
- for_each_netdev_safe(net, dev, aux)
- if (dev->rtnl_link_ops == &ip6_link_ops)
- unregister_netdevice_queue(dev, list);
-
- for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
- t = rtnl_dereference(ip6n->tnls_r_l[h]);
- while (t) {
- /* If dev is in the same netns, it has already
- * been added to the list by the previous loop.
- */
- if (!net_eq(dev_net(t->dev), net))
- unregister_netdevice_queue(t->dev, list);
- t = rtnl_dereference(t->next);
- }
- }
-}
-
-static int __net_init ip6_tnl_init_net(struct net *net)
-{
- struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- struct ip6_tnl *t = NULL;
- int err;
-
- ip6n->tnls[0] = ip6n->tnls_wc;
- ip6n->tnls[1] = ip6n->tnls_r_l;
-
- err = -ENOMEM;
- ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ovs-ip6tnl0",
- NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
-
- if (!ip6n->fb_tnl_dev)
- goto err_alloc_dev;
- dev_net_set(ip6n->fb_tnl_dev, net);
- ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
- /* FB netdevice is special: we have one, and only one per netns.
- * Allowing to move it to another netns is clearly unsafe.
- */
- ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
-
- err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
- if (err < 0)
- goto err_register;
-
- err = register_netdev(ip6n->fb_tnl_dev);
- if (err < 0)
- goto err_register;
-
- t = netdev_priv(ip6n->fb_tnl_dev);
-
- strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
- return 0;
-
-err_register:
- free_netdev(ip6n->fb_tnl_dev);
-err_alloc_dev:
- return err;
-}
-
-static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
-{
- struct net *net;
- LIST_HEAD(list);
-
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list)
- ip6_tnl_destroy_tunnels(net, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
-}
-
-static struct pernet_operations ip6_tnl_net_ops = {
- .init = ip6_tnl_init_net,
- .exit_batch = ip6_tnl_exit_batch_net,
- .id = &ip6_tnl_net_id,
- .size = sizeof(struct ip6_tnl_net),
-};
-
-/**
- * ip6_tunnel_init - register protocol and reserve needed resources
- *
- * Return: 0 on success
- **/
-
-int rpl_ip6_tunnel_init(void)
-{
- int err;
-
-#if 0
- if (!ipv6_mod_enabled())
- return -EOPNOTSUPP;
-#endif
- err = register_pernet_device(&ip6_tnl_net_ops);
- if (err < 0) {
- pr_err("%s: can't register ip6_tnl pernet device\n",
- __func__);
- goto out_pernet;
- }
-
- err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
- if (err < 0) {
- pr_err("%s: can't register ip4ip6\n", __func__);
- goto out_ip4ip6;
- }
-
- err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
- if (err < 0) {
- pr_err("%s: can't register ip6ip6\n", __func__);
- goto out_ip6ip6;
- }
-
- err = rtnl_link_register(&ip6_link_ops);
- if (err < 0) {
- pr_err("%s: can't register ip6_lin_ops\n",
- __func__);
- goto rtnl_link_failed;
- }
- return 0;
-
-rtnl_link_failed:
- xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
-out_ip6ip6:
- xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
-out_ip4ip6:
- unregister_pernet_device(&ip6_tnl_net_ops);
-out_pernet:
- return err;
-}
-
-/**
- * ip6_tunnel_cleanup - free resources and unregister protocol
- **/
-
-void rpl_ip6_tunnel_cleanup(void)
-{
- rtnl_link_unregister(&ip6_link_ops);
- if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
- pr_info("%s: can't deregister ip4ip6\n", __func__);
-
- if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
- pr_info("%s: can't deregister ip6ip6\n", __func__);
-
- unregister_pernet_device(&ip6_tnl_net_ops);
-}
-
-#endif /* USE_UPSTREAM_TUNNEL */
diff --git a/datapath/linux/compat/ip_fragment.c b/datapath/linux/compat/ip_fragment.c
deleted file mode 100644
index f910b99b4..000000000
--- a/datapath/linux/compat/ip_fragment.c
+++ /dev/null
@@ -1,831 +0,0 @@
-/*
- * IP fragmentation backport, heavily based on linux/net/ipv4/ip_fragment.c,
- * copied from Linux 192132b9a034 net: Add support for VRFs to inetpeer cache
- *
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * The IP fragmentation functionality.
- *
- * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
- * Alan Cox <alan@lxorguk.ukuu.org.uk>
- *
- * Fixes:
- * Alan Cox : Split from ip.c , see ip_input.c for history.
- * David S. Miller : Begin massive cleanup...
- * Andi Kleen : Add sysctls.
- * xxxx : Overlapfrag bug.
- * Ultima : ip_expire() kernel panic.
- * Bill Hawes : Frag accounting and evictor fixes.
- * John McDonald : 0 length frag bug.
- * Alexey Kuznetsov: SMP races, threading, cleanup.
- * Patrick McHardy : LRU queue of frag heads for evictor.
- */
-
-#include <linux/version.h>
-
-#ifndef HAVE_CORRECT_MRU_HANDLING
-
-#define pr_fmt(fmt) "IPv4: " fmt
-
-#include <linux/compiler.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/jiffies.h>
-#include <linux/skbuff.h>
-#include <linux/list.h>
-#include <linux/ip.h>
-#include <linux/icmp.h>
-#include <linux/netdevice.h>
-#include <linux/jhash.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <net/route.h>
-#include <net/dst.h>
-#include <net/sock.h>
-#include <net/ip.h>
-#include <net/icmp.h>
-#include <net/checksum.h>
-#include <net/inetpeer.h>
-#include <net/inet_frag.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/inet.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/inet_ecn.h>
-#include <net/vrf.h>
-#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
-#include <net/netns/generic.h>
-#include "datapath.h"
-
-/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
- * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
- * as well. Or notify me, at least. --ANK
- */
-
-static int sysctl_ipfrag_max_dist __read_mostly = 64;
-static const char ip_frag_cache_name[] = "ovs-frag4";
-
-struct ipfrag_skb_cb
-{
- struct inet_skb_parm h;
- int offset;
-};
-
-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
-
-/* Describe an entry in the "incomplete datagrams" queue. */
-struct ipq {
- struct inet_frag_queue q;
-
- u32 user;
- __be32 saddr;
- __be32 daddr;
- __be16 id;
- u8 protocol;
- u8 ecn; /* RFC3168 support */
- u16 max_df_size; /* largest frag with DF set seen */
- int iif;
- int vif; /* VRF device index */
- unsigned int rid;
- struct inet_peer *peer;
-};
-
-static u8 ip4_frag_ecn(u8 tos)
-{
- return 1 << (tos & INET_ECN_MASK);
-}
-
-static struct inet_frags ip4_frags;
-
-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
- struct net_device *dev);
-
-struct ip4_create_arg {
- struct iphdr *iph;
- u32 user;
- int vif;
-};
-
-static struct netns_frags *get_netns_frags_from_net(struct net *net)
-{
-#ifdef HAVE_INET_FRAG_LRU_MOVE
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
- return &(ovs_net->ipv4_frags);
-#else
- return &(net->ipv4.frags);
-#endif
-}
-
-static struct net *get_net_from_netns_frags(struct netns_frags *frags)
-{
- struct net *net;
-#ifdef HAVE_INET_FRAG_LRU_MOVE
- struct ovs_net *ovs_net;
-
- ovs_net = container_of(frags, struct ovs_net, ipv4_frags);
- net = ovs_net->net;
-#else
- net = container_of(frags, struct net, ipv4.frags);
-#endif
- return net;
-}
-
-void ovs_netns_frags_init(struct net *net)
-{
-#ifdef HAVE_INET_FRAG_LRU_MOVE
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-
- ovs_net->ipv4_frags.high_thresh = 4 * 1024 * 1024;
- ovs_net->ipv4_frags.low_thresh = 3 * 1024 * 1024;
- ovs_net->ipv4_frags.timeout = IP_FRAG_TIME;
- inet_frags_init_net(&(ovs_net->ipv4_frags));
- ovs_net->net = net;
-#endif
-}
-
-void ovs_netns_frags_exit(struct net *net)
-{
- struct netns_frags *frags;
-
- frags = get_netns_frags_from_net(net);
- inet_frags_exit_net(frags, &ip4_frags);
-}
-
-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
-{
- net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
- return jhash_3words((__force u32)id << 16 | prot,
- (__force u32)saddr, (__force u32)daddr,
- ip4_frags.rnd);
-}
-/* fb3cfe6e75b9 ("inet: frag: remove hash size assumptions from callers")
- * shifted this logic into inet_fragment, but prior kernels still need this.
- */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,17,0)
-#define ipqhashfn(a, b, c, d) (ipqhashfn(a, b, c, d) & (INETFRAGS_HASHSZ - 1))
-#endif
-
-#ifdef HAVE_INET_FRAGS_CONST
-static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
-#else
-static unsigned int ip4_hashfn(struct inet_frag_queue *q)
-#endif
-{
- const struct ipq *ipq;
-
- ipq = container_of(q, struct ipq, q);
- return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
-}
-
-#ifdef HAVE_INET_FRAGS_CONST
-static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
-#else
-static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
-#endif
-{
- const struct ipq *qp;
- const struct ip4_create_arg *arg = a;
-
- qp = container_of(q, struct ipq, q);
- return qp->id == arg->iph->id &&
- qp->saddr == arg->iph->saddr &&
- qp->daddr == arg->iph->daddr &&
- qp->protocol == arg->iph->protocol &&
- qp->user == arg->user &&
- qp->vif == arg->vif;
-}
-
-#ifdef HAVE_INET_FRAGS_CONST
-static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
-#else
-static void ip4_frag_init(struct inet_frag_queue *q, void *a)
-#endif
-{
- struct ipq *qp = container_of(q, struct ipq, q);
- struct net *net = get_net_from_netns_frags(q->net);
-
- const struct ip4_create_arg *arg = a;
-
- qp->protocol = arg->iph->protocol;
- qp->id = arg->iph->id;
- qp->ecn = ip4_frag_ecn(arg->iph->tos);
- qp->saddr = arg->iph->saddr;
- qp->daddr = arg->iph->daddr;
- qp->vif = arg->vif;
- qp->user = arg->user;
- qp->peer = sysctl_ipfrag_max_dist ?
- inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
- NULL;
-}
-
-static void ip4_frag_free(struct inet_frag_queue *q)
-{
- struct ipq *qp;
-
- qp = container_of(q, struct ipq, q);
- if (qp->peer)
- inet_putpeer(qp->peer);
-}
-
-
-/* Destruction primitives. */
-
-static void ipq_put(struct ipq *ipq)
-{
- inet_frag_put(&ipq->q, &ip4_frags);
-}
-
-/* Kill ipq entry. It is not destroyed immediately,
- * because caller (and someone more) holds reference count.
- */
-static void ipq_kill(struct ipq *ipq)
-{
- inet_frag_kill(&ipq->q, &ip4_frags);
-}
-
-static bool frag_expire_skip_icmp(u32 user)
-{
- return user == IP_DEFRAG_AF_PACKET ||
- ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN,
- __IP_DEFRAG_CONNTRACK_IN_END) ||
- ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_BRIDGE_IN,
- __IP_DEFRAG_CONNTRACK_BRIDGE_IN);
-}
-
-/*
- * Oops, a fragment queue timed out. Kill it and send an ICMP reply.
- */
-static void ip_expire(unsigned long arg)
-{
- struct ipq *qp;
- struct net *net;
-
- qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
- net = get_net_from_netns_frags(qp->q.net);
-
- spin_lock(&qp->q.lock);
-
- if (qp_flags(qp) & INET_FRAG_COMPLETE)
- goto out;
-
- ipq_kill(qp);
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
-
- if (!inet_frag_evicting(&qp->q)) {
- struct sk_buff *head = qp->q.fragments;
- const struct iphdr *iph;
- int err;
-
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
-
- if (!(qp_flags(qp) & INET_FRAG_FIRST_IN) || !qp->q.fragments)
- goto out;
-
- rcu_read_lock();
- head->dev = dev_get_by_index_rcu(net, qp->iif);
- if (!head->dev)
- goto out_rcu_unlock;
-
- /* skb has no dst, perform route lookup again */
- iph = ip_hdr(head);
- err = ip_route_input_noref(head, iph->daddr, iph->saddr,
- iph->tos, head->dev);
- if (err)
- goto out_rcu_unlock;
-
- /* Only an end host needs to send an ICMP
- * "Fragment Reassembly Timeout" message, per RFC792.
- */
- if (frag_expire_skip_icmp(qp->user) &&
- (skb_rtable(head)->rt_type != RTN_LOCAL))
- goto out_rcu_unlock;
-
- /* Send an ICMP "Fragment Reassembly Timeout" message. */
- icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-out_rcu_unlock:
- rcu_read_unlock();
- }
-out:
- spin_unlock(&qp->q.lock);
- ipq_put(qp);
-}
-
-#ifdef HAVE_INET_FRAG_EVICTOR
-/* Memory limiting on fragments. Evictor trashes the oldest
- * fragment queue until we are back under the threshold.
- *
- * Necessary for kernels earlier than v3.17. Replaced in commit
- * b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue").
- */
-static void ip_evictor(struct net *net)
-{
- int evicted;
- struct netns_frags *frags;
-
- frags = get_netns_frags_from_net(net);
- evicted = inet_frag_evictor(frags, &ip4_frags, false);
- if (evicted)
- IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
-}
-#endif
-
-/* Find the correct entry in the "incomplete datagrams" queue for
- * this IP datagram, and create new one, if nothing is found.
- */
-static struct ipq *ip_find(struct net *net, struct iphdr *iph,
- u32 user, int vif)
-{
- struct inet_frag_queue *q;
- struct ip4_create_arg arg;
- unsigned int hash;
- struct netns_frags *frags;
-
- arg.iph = iph;
- arg.user = user;
- arg.vif = vif;
-
-#ifdef HAVE_INET_FRAGS_WITH_RWLOCK
- read_lock(&ip4_frags.lock);
-#endif
- hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
-
- frags = get_netns_frags_from_net(net);
- q = inet_frag_find(frags, &ip4_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
- return NULL;
- }
- return container_of(q, struct ipq, q);
-}
-
-/* Is the fragment too far ahead to be part of ipq? */
-static int ip_frag_too_far(struct ipq *qp)
-{
- struct inet_peer *peer = qp->peer;
- unsigned int max = sysctl_ipfrag_max_dist;
- unsigned int start, end;
-
- int rc;
-
- if (!peer || !max)
- return 0;
-
- start = qp->rid;
- end = atomic_inc_return(&peer->rid);
- qp->rid = end;
-
- rc = qp->q.fragments && (end - start) > max;
-
- if (rc) {
- struct net *net;
-
- net = get_net_from_netns_frags(qp->q.net);
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
- }
-
- return rc;
-}
-
-static int ip_frag_reinit(struct ipq *qp)
-{
- struct sk_buff *fp;
- unsigned int sum_truesize = 0;
-
- if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
- atomic_inc(&qp->q.refcnt);
- return -ETIMEDOUT;
- }
-
- fp = qp->q.fragments;
- do {
- struct sk_buff *xp = fp->next;
-
- sum_truesize += fp->truesize;
- kfree_skb(fp);
- fp = xp;
- } while (fp);
- sub_frag_mem_limit(qp->q.net, sum_truesize);
-
- qp_flags(qp) = 0;
- qp->q.len = 0;
- qp->q.meat = 0;
- qp->q.fragments = NULL;
- qp->q.fragments_tail = NULL;
- qp->iif = 0;
- qp->ecn = 0;
-
- return 0;
-}
-
-/* Add new segment to existing queue. */
-static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
-{
- struct sk_buff *prev, *next;
- struct net_device *dev;
- unsigned int fragsize;
- int flags, offset;
- int ihl, end;
- int err = -ENOENT;
- u8 ecn;
-
- if (qp_flags(qp) & INET_FRAG_COMPLETE)
- goto err;
-
- if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
- unlikely(ip_frag_too_far(qp)) &&
- unlikely(err = ip_frag_reinit(qp))) {
- ipq_kill(qp);
- goto err;
- }
-
- ecn = ip4_frag_ecn(ip_hdr(skb)->tos);
- offset = ntohs(ip_hdr(skb)->frag_off);
- flags = offset & ~IP_OFFSET;
- offset &= IP_OFFSET;
- offset <<= 3; /* offset is in 8-byte chunks */
- ihl = ip_hdrlen(skb);
-
- /* Determine the position of this fragment. */
- end = offset + skb->len - skb_network_offset(skb) - ihl;
- err = -EINVAL;
-
- /* Is this the final fragment? */
- if ((flags & IP_MF) == 0) {
- /* If we already have some bits beyond end
- * or have different end, the segment is corrupted.
- */
- if (end < qp->q.len ||
- ((qp_flags(qp) & INET_FRAG_LAST_IN) && end != qp->q.len))
- goto err;
- qp_flags(qp) |= INET_FRAG_LAST_IN;
- qp->q.len = end;
- } else {
- if (end&7) {
- end &= ~7;
- if (skb->ip_summed != CHECKSUM_UNNECESSARY)
- skb->ip_summed = CHECKSUM_NONE;
- }
- if (end > qp->q.len) {
- /* Some bits beyond end -> corruption. */
- if (qp_flags(qp) & INET_FRAG_LAST_IN)
- goto err;
- qp->q.len = end;
- }
- }
- if (end == offset)
- goto err;
-
- err = -ENOMEM;
- if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
- goto err;
-
- err = pskb_trim_rcsum(skb, end - offset);
- if (err)
- goto err;
-
- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
- prev = qp->q.fragments_tail;
- if (!prev || FRAG_CB(prev)->offset < offset) {
- next = NULL;
- goto found;
- }
- prev = NULL;
- for (next = qp->q.fragments; next != NULL; next = next->next) {
- if (FRAG_CB(next)->offset >= offset)
- break; /* bingo! */
- prev = next;
- }
-
-found:
- /* We found where to put this one. Check for overlap with
- * preceding fragment, and, if needed, align things so that
- * any overlaps are eliminated.
- */
- if (prev) {
- int i = (FRAG_CB(prev)->offset + prev->len) - offset;
-
- if (i > 0) {
- offset += i;
- err = -EINVAL;
- if (end <= offset)
- goto err;
- err = -ENOMEM;
- if (!pskb_pull(skb, i))
- goto err;
- if (skb->ip_summed != CHECKSUM_UNNECESSARY)
- skb->ip_summed = CHECKSUM_NONE;
- }
- }
-
- err = -ENOMEM;
-
- while (next && FRAG_CB(next)->offset < end) {
- int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
-
- if (i < next->len) {
- /* Eat head of the next overlapped fragment
- * and leave the loop. The next ones cannot overlap.
- */
- if (!pskb_pull(next, i))
- goto err;
- FRAG_CB(next)->offset += i;
- qp->q.meat -= i;
- if (next->ip_summed != CHECKSUM_UNNECESSARY)
- next->ip_summed = CHECKSUM_NONE;
- break;
- } else {
- struct sk_buff *free_it = next;
-
- /* Old fragment is completely overridden with
- * new one drop it.
- */
- next = next->next;
-
- if (prev)
- prev->next = next;
- else
- qp->q.fragments = next;
-
- qp->q.meat -= free_it->len;
- sub_frag_mem_limit(qp->q.net, free_it->truesize);
- kfree_skb(free_it);
- }
- }
-
- FRAG_CB(skb)->offset = offset;
-
- /* Insert this fragment in the chain of fragments. */
- skb->next = next;
- if (!next)
- qp->q.fragments_tail = skb;
- if (prev)
- prev->next = skb;
- else
- qp->q.fragments = skb;
-
- dev = skb->dev;
- if (dev) {
- qp->iif = dev->ifindex;
- skb->dev = NULL;
- }
- qp->q.stamp = skb->tstamp;
- qp->q.meat += skb->len;
- qp->ecn |= ecn;
- add_frag_mem_limit(qp->q.net, skb->truesize);
- if (offset == 0)
- qp_flags(qp) |= INET_FRAG_FIRST_IN;
-
- fragsize = skb->len + ihl;
-
- if (fragsize > qp->q.max_size)
- qp->q.max_size = fragsize;
-
- if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
- fragsize > qp->max_df_size)
- qp->max_df_size = fragsize;
-
- if (qp_flags(qp) == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
- qp->q.meat == qp->q.len) {
- unsigned long orefdst = skb->_skb_refdst;
-
- skb->_skb_refdst = 0UL;
- err = ip_frag_reasm(qp, prev, dev);
- skb->_skb_refdst = orefdst;
- return err;
- }
-
- skb_dst_drop(skb);
- inet_frag_lru_move(&qp->q);
- return -EINPROGRESS;
-
-err:
- kfree_skb(skb);
- return err;
-}
-
-
-/* Build a new IP datagram from all its fragments. */
-
-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
- struct net_device *dev)
-{
- struct net *net = get_net_from_netns_frags(qp->q.net);
- struct iphdr *iph;
- struct sk_buff *fp, *head = qp->q.fragments;
- int len;
- int ihlen;
- int err;
- u8 ecn;
-
- ipq_kill(qp);
-
- ecn = ip_frag_ecn_table[qp->ecn];
- if (unlikely(ecn == 0xff)) {
- err = -EINVAL;
- goto out_fail;
- }
- /* Make the one we just received the head. */
- if (prev) {
- head = prev->next;
- fp = skb_clone(head, GFP_ATOMIC);
- if (!fp)
- goto out_nomem;
-
- fp->next = head->next;
- if (!fp->next)
- qp->q.fragments_tail = fp;
- prev->next = fp;
-
- skb_morph(head, qp->q.fragments);
- head->next = qp->q.fragments->next;
-
- consume_skb(qp->q.fragments);
- qp->q.fragments = head;
- }
-
- WARN_ON(!head);
- WARN_ON(FRAG_CB(head)->offset != 0);
-
- /* Allocate a new buffer for the datagram. */
- ihlen = ip_hdrlen(head);
- len = ihlen + qp->q.len;
-
- err = -E2BIG;
- if (len > 65535)
- goto out_oversize;
-
- /* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC))
- goto out_nomem;
-
- /* If the first fragment is fragmented itself, we split
- * it to two chunks: the first with data and paged part
- * and the second, holding only fragments. */
- if (skb_has_frag_list(head)) {
- struct sk_buff *clone;
- int i, plen = 0;
-
- clone = alloc_skb(0, GFP_ATOMIC);
- if (!clone)
- goto out_nomem;
- clone->next = head->next;
- head->next = clone;
- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_frag_list_init(head);
- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
- clone->len = clone->data_len = head->data_len - plen;
- head->data_len -= clone->len;
- head->len -= clone->len;
- clone->csum = 0;
- clone->ip_summed = head->ip_summed;
- add_frag_mem_limit(qp->q.net, clone->truesize);
- }
-
- skb_shinfo(head)->frag_list = head->next;
- skb_push(head, head->data - skb_network_header(head));
-
- for (fp=head->next; fp; fp = fp->next) {
- head->data_len += fp->len;
- head->len += fp->len;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
- head->truesize += fp->truesize;
- }
- sub_frag_mem_limit(qp->q.net, head->truesize);
-
- head->next = NULL;
- head->dev = dev;
- head->tstamp = qp->q.stamp;
- IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
-
- iph = ip_hdr(head);
- iph->tot_len = htons(len);
- iph->tos |= ecn;
-
- /* When we set IP_DF on a refragmented skb we must also force a
- * call to ip_fragment to avoid forwarding a DF-skb of size s while
- * original sender only sent fragments of size f (where f < s).
- *
- * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest
- * frag seen to avoid sending tiny DF-fragments in case skb was built
- * from one very small df-fragment and one large non-df frag.
- */
- if (qp->max_df_size == qp->q.max_size) {
- IPCB(head)->flags |= IPSKB_FRAG_PMTU;
- iph->frag_off = htons(IP_DF);
- } else {
- iph->frag_off = 0;
- }
-
- ip_send_check(iph);
-
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
- qp->q.fragments = NULL;
- qp->q.fragments_tail = NULL;
- return 0;
-
-out_nomem:
- net_dbg_ratelimited("queue_glue: no memory for gluing queue %p\n", qp);
- err = -ENOMEM;
- goto out_fail;
-out_oversize:
- net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
-out_fail:
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
- return err;
-}
-
-/* Process an incoming IP datagram fragment. */
-int rpl_ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
-{
- struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
- int vif = vrf_master_ifindex_rcu(dev);
- struct ipq *qp;
-
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
- skb_orphan(skb);
-
-#ifdef HAVE_INET_FRAG_EVICTOR
- /* Start by cleaning up the memory. */
- ip_evictor(net);
-#endif
-
- /* Lookup (or create) queue header */
- qp = ip_find(net, ip_hdr(skb), user, vif);
- if (qp) {
- int ret;
-
- spin_lock(&qp->q.lock);
-
- ret = ip_frag_queue(qp, skb);
-
- spin_unlock(&qp->q.lock);
- ipq_put(qp);
- return ret;
- }
-
- IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
- kfree_skb(skb);
- return -ENOMEM;
-}
-
-#ifdef HAVE_DEFRAG_ENABLE_TAKES_NET
-static int __net_init ipv4_frags_init_net(struct net *net)
-{
- return nf_defrag_ipv4_enable(net);
-}
-#endif
-
-static void __net_exit ipv4_frags_exit_net(struct net *net)
-{
-}
-
-static struct pernet_operations ip4_frags_ops = {
-#ifdef HAVE_DEFRAG_ENABLE_TAKES_NET
- .init = ipv4_frags_init_net,
-#endif
- .exit = ipv4_frags_exit_net,
-};
-
-int __init rpl_ipfrag_init(void)
-{
-#ifndef HAVE_DEFRAG_ENABLE_TAKES_NET
- nf_defrag_ipv4_enable();
-#endif
- register_pernet_subsys(&ip4_frags_ops);
- ip4_frags.hashfn = ip4_hashfn;
- ip4_frags.constructor = ip4_frag_init;
- ip4_frags.destructor = ip4_frag_free;
- ip4_frags.skb_free = NULL;
- ip4_frags.qsize = sizeof(struct ipq);
- ip4_frags.match = ip4_frag_match;
- ip4_frags.frag_expire = ip_expire;
-#ifdef HAVE_INET_FRAGS_WITH_FRAGS_WORK
- ip4_frags.frags_cache_name = ip_frag_cache_name;
-#endif
-#if RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8,0)
- ip4_frags.secret_interval = 10 * 60 * HZ;
-#endif
- if (inet_frags_init(&ip4_frags)) {
- pr_warn("IP: failed to allocate ip4_frags cache\n");
- return -ENOMEM;
- }
- return 0;
-}
-
-void rpl_ipfrag_fini(void)
-{
- inet_frags_fini(&ip4_frags);
- unregister_pernet_subsys(&ip4_frags_ops);
-}
-
-#endif /* !HAVE_CORRECT_MRU_HANDLING */
diff --git a/datapath/linux/compat/ip_gre.c b/datapath/linux/compat/ip_gre.c
deleted file mode 100644
index c194ffe00..000000000
--- a/datapath/linux/compat/ip_gre.c
+++ /dev/null
@@ -1,1450 +0,0 @@
-/*
- * Linux NET3: GRE over IP protocol decoder.
- *
- * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#ifndef USE_UPSTREAM_TUNNEL
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/kconfig.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/netdev_features.h>
-#include <linux/in.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/if_arp.h>
-#include <linux/mroute.h>
-#include <linux/if_vlan.h>
-#include <linux/init.h>
-#include <linux/in6.h>
-#include <linux/inetdevice.h>
-#include <linux/igmp.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/etherdevice.h>
-#include <linux/if_ether.h>
-
-#include <net/sock.h>
-#include <net/ip.h>
-#include <net/icmp.h>
-#include <net/protocol.h>
-#include <net/ip_tunnels.h>
-#include <net/arp.h>
-#include <net/checksum.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <net/xfrm.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/rtnetlink.h>
-#include <net/gre.h>
-#include <net/dst_metadata.h>
-#include <net/erspan.h>
-
-#if IS_ENABLED(CONFIG_IPV6)
-#include <net/ipv6.h>
-#include <net/ip6_fib.h>
-#include <net/ip6_route.h>
-#endif
-
-#include "gso.h"
-#include "vport-netdev.h"
-
-static int gre_tap_net_id __read_mostly;
-static unsigned int erspan_net_id __read_mostly;
-static void erspan_build_header(struct sk_buff *skb,
- __be32 id, u32 index,
- bool truncate, bool is_ipv4);
-
-static bool ip_gre_loaded = false;
-
-/* Normally in net/core/dst.c but move it here */
-struct dst_ops md_dst_ops = {
- .family = AF_UNSPEC,
-};
-
-#ifndef ip_gre_calc_hlen
-#define ip_gre_calc_hlen gre_calc_hlen
-#endif
-
-static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
- int gre_hdr_len)
-{
- struct net *net = dev_net(skb->dev);
- struct metadata_dst *tun_dst = NULL;
- struct erspan_base_hdr *ershdr;
- struct erspan_metadata *pkt_md;
- struct ip_tunnel_net *itn;
- struct ip_tunnel *tunnel;
- const struct iphdr *iph;
- struct erspan_md2 *md2;
- int ver;
- int len;
-
- itn = net_generic(net, erspan_net_id);
- len = gre_hdr_len + sizeof(*ershdr);
-
- /* Check based hdr len */
- if (unlikely(!pskb_may_pull(skb, len)))
- return PACKET_REJECT;
-
- iph = ip_hdr(skb);
- ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
- ver = ershdr->ver;
-
- /* The original GRE header does not have key field,
- * Use ERSPAN 10-bit session ID as key.
- */
- tpi->key = cpu_to_be32(get_session_id(ershdr));
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
- tpi->flags,
- iph->saddr, iph->daddr, tpi->key);
-
- if (tunnel) {
- len = gre_hdr_len + erspan_hdr_len(ver);
- if (unlikely(!pskb_may_pull(skb, len)))
- return PACKET_REJECT;
-
- ershdr = (struct erspan_base_hdr *)skb->data;
- pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
- if (__iptunnel_pull_header(skb,
- len,
- htons(ETH_P_TEB),
- false, false) < 0)
- goto drop;
-
- if (tunnel->collect_md) {
- struct ip_tunnel_info *info;
- struct erspan_metadata *md;
- __be64 tun_id;
- __be16 flags;
-
- tpi->flags |= TUNNEL_KEY;
- flags = tpi->flags;
- tun_id = key32_to_tunnel_id(tpi->key);
-
- tun_dst = rpl_ip_tun_rx_dst(skb, flags, tun_id, sizeof(*md));
- if (!tun_dst)
- return PACKET_REJECT;
-
- md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
- md->version = ver;
- md2 = &md->u.md2;
- memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
- ERSPAN_V2_MDSIZE);
-
- info = &tun_dst->u.tun_info;
- info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
- info->options_len = sizeof(*md);
- }
-
- skb_reset_mac_header(skb);
- ovs_ip_tunnel_rcv(tunnel->dev, skb, tun_dst);
- kfree(tun_dst);
- return PACKET_RCVD;
- }
-drop:
- kfree_skb(skb);
- return PACKET_RCVD;
-}
-
-
-static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
- struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
-{
- struct metadata_dst tun_dst;
- const struct iphdr *iph;
- struct ip_tunnel *tunnel;
-
- iph = ip_hdr(skb);
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
- iph->saddr, iph->daddr, tpi->key);
-
- if (tunnel) {
- if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
- raw_proto, false) < 0)
- goto drop;
-
- if (tunnel->dev->type != ARPHRD_NONE)
- skb_pop_mac_header(skb);
- else
- skb_reset_mac_header(skb);
- if (tunnel->collect_md) {
- __be16 flags;
- __be64 tun_id;
-
- flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
- tun_id = key32_to_tunnel_id(tpi->key);
- ovs_ip_tun_rx_dst(&tun_dst, skb, flags, tun_id, 0);
- }
-
- ovs_ip_tunnel_rcv(tunnel->dev, skb, &tun_dst);
- return PACKET_RCVD;
- }
- return PACKET_NEXT;
-
-drop:
- kfree_skb(skb);
- return PACKET_RCVD;
-}
-
-
-static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
- int hdr_len)
-{
- struct net *net = dev_net(skb->dev);
- struct ip_tunnel_net *itn;
- int res;
-
- if (tpi->proto == htons(ETH_P_TEB))
- itn = net_generic(net, gre_tap_net_id);
- else if (tpi->proto == htons(ETH_P_ERSPAN) ||
- tpi->proto == htons(ETH_P_ERSPAN2))
- itn = net_generic(net, erspan_net_id);
- else
- return PACKET_RCVD;
-
- res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
-
- return res;
-}
-
-static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
- const struct iphdr *tnl_params,
- __be16 proto)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- struct tnl_ptk_info tpi;
-
- tpi.flags = tunnel->parms.o_flags;
- tpi.proto = proto;
- tpi.key = tunnel->parms.o_key;
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
- tpi.seq = htonl(tunnel->o_seqno);
-
- /* Push GRE header. */
- gre_build_header(skb, &tpi, tunnel->hlen);
-
- ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
-}
-
-static int gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *unused_tpi)
-{
- struct tnl_ptk_info tpi;
- bool csum_err = false;
- int hdr_len;
-
- hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
- if (hdr_len < 0)
- goto drop;
-
- if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
- tpi.proto == htons(ETH_P_ERSPAN2))) {
- if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
- return 0;
- goto drop;
- }
-
- if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
- return 0;
-drop:
-
- kfree_skb(skb);
- return 0;
-}
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
-#include "gso.h"
-/* gre_handle_offloads() has different return type on older kernsl. */
-static void gre_nop_fix(struct sk_buff *skb) { }
-
-static void gre_csum_fix(struct sk_buff *skb)
-{
- struct gre_base_hdr *greh;
- __be32 *options;
- int gre_offset = skb_transport_offset(skb);
-
- greh = (struct gre_base_hdr *)skb_transport_header(skb);
- options = ((__be32 *)greh + 1);
-
- *options = 0;
- *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset,
- skb->len - gre_offset, 0));
-}
-
-#define gre_handle_offloads rpl_gre_handle_offloads
-static int rpl_gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
-{
- int type = gre_csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE;
- gso_fix_segment_t fix_segment;
-
- if (gre_csum)
- fix_segment = gre_csum_fix;
- else
- fix_segment = gre_nop_fix;
-
- return ovs_iptunnel_handle_offloads(skb, type, fix_segment);
-}
-#else
-static int gre_handle_offloads(struct sk_buff *skb, bool csum)
-{
- return iptunnel_handle_offloads(skb, csum,
- csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
-}
-#endif
-
-static bool is_gre_gso(struct sk_buff *skb)
-{
- return skb_shinfo(skb)->gso_type &
- (SKB_GSO_GRE | SKB_GSO_GRE_CSUM);
-}
-
-static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
- __be16 proto, __be32 key, __be32 seq)
-{
- struct gre_base_hdr *greh;
-
- skb_push(skb, hdr_len);
-
- skb_reset_transport_header(skb);
- greh = (struct gre_base_hdr *)skb->data;
- greh->flags = tnl_flags_to_gre_flags(flags);
- greh->protocol = proto;
-
- if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
- __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
-
- if (flags & TUNNEL_SEQ) {
- *ptr = seq;
- ptr--;
- }
- if (flags & TUNNEL_KEY) {
- *ptr = key;
- ptr--;
- }
- if (flags & TUNNEL_CSUM && !is_gre_gso(skb)) {
- *ptr = 0;
- *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
- skb->len, 0));
- }
- }
- ovs_skb_set_inner_protocol(skb, proto);
-}
-
-static struct rtable *gre_get_rt(struct sk_buff *skb,
- struct net_device *dev,
- struct flowi4 *fl,
- const struct ip_tunnel_key *key)
-{
- struct net *net = dev_net(dev);
-
- memset(fl, 0, sizeof(*fl));
- fl->daddr = key->u.ipv4.dst;
- fl->saddr = key->u.ipv4.src;
- fl->flowi4_tos = RT_TOS(key->tos);
- fl->flowi4_mark = skb->mark;
- fl->flowi4_proto = IPPROTO_GRE;
-
- return ip_route_output_key(net, fl);
-}
-
-static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
- struct net_device *dev,
- struct flowi4 *fl,
- int tunnel_hlen)
-{
- struct ip_tunnel_info *tun_info;
- const struct ip_tunnel_key *key;
- struct rtable *rt = NULL;
- int min_headroom;
- bool use_cache;
- int err;
-
- tun_info = skb_tunnel_info(skb);
- key = &tun_info->key;
- use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
-
- if (use_cache)
- rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
- if (!rt) {
- rt = gre_get_rt(skb, dev, fl, key);
- if (IS_ERR(rt))
- goto err_free_skb;
- if (use_cache)
- dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
- fl->saddr);
- }
-
- min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
- + tunnel_hlen + sizeof(struct iphdr);
- if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
- int head_delta = SKB_DATA_ALIGN(min_headroom -
- skb_headroom(skb) +
- 16);
- err = pskb_expand_head(skb, max_t(int, head_delta, 0),
- 0, GFP_ATOMIC);
- if (unlikely(err))
- goto err_free_rt;
- }
- return rt;
-
-err_free_rt:
- ip_rt_put(rt);
-err_free_skb:
- kfree_skb(skb);
- dev->stats.tx_dropped++;
- return NULL;
-}
-
-netdev_tx_t rpl_gre_fb_xmit(struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- struct ip_tunnel_info *tun_info;
- const struct ip_tunnel_key *key;
- struct flowi4 fl;
- struct rtable *rt;
- int min_headroom;
- int tunnel_hlen;
- __be16 df, flags;
- int err;
-
- tun_info = skb_tunnel_info(skb);
- if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
- ip_tunnel_info_af(tun_info) != AF_INET))
- goto err_free_skb;
-
- key = &tun_info->key;
-
- rt = gre_get_rt(skb, dev, &fl, key);
- if (IS_ERR(rt))
- goto err_free_skb;
-
- tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
-
- min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
- + tunnel_hlen + sizeof(struct iphdr)
- + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
- if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
- int head_delta = SKB_DATA_ALIGN(min_headroom -
- skb_headroom(skb) +
- 16);
- err = pskb_expand_head(skb, max_t(int, head_delta, 0),
- 0, GFP_ATOMIC);
- if (unlikely(err))
- goto err_free_rt;
- }
-
- if (skb_vlan_tag_present(skb)) {
- skb = __vlan_hwaccel_push_inside(skb);
- if (unlikely(!skb)) {
- err = -ENOMEM;
- goto err_free_rt;
- }
- }
-
- /* Push Tunnel header. */
- err = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
- if (err)
- goto err_free_rt;
-
- flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
- build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
- tunnel_id_to_key32(tun_info->key.tun_id), 0);
-
- df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
- iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
- key->tos, key->ttl, df, false);
- return NETDEV_TX_OK;
-
-err_free_rt:
- ip_rt_put(rt);
-err_free_skb:
- kfree_skb(skb);
- dev->stats.tx_dropped++;
- return NETDEV_TX_OK;
-}
-EXPORT_SYMBOL(rpl_gre_fb_xmit);
-
-static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
- __be16 proto)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_info *tun_info;
- const struct ip_tunnel_key *key;
- struct erspan_metadata *md;
- struct rtable *rt = NULL;
- struct tnl_ptk_info tpi;
- bool truncate = false;
- struct flowi4 fl;
- int tunnel_hlen;
- int version;
- __be16 df;
- int nhoff;
- int thoff;
-
- tun_info = skb_tunnel_info(skb);
- if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
- ip_tunnel_info_af(tun_info) != AF_INET))
- goto err_free_skb;
-
- key = &tun_info->key;
- if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
- goto err_free_rt;
- md = ip_tunnel_info_opts(tun_info);
- if (!md)
- goto err_free_rt;
-
- /* ERSPAN has fixed 8 byte GRE header */
- version = md->version;
- tunnel_hlen = 8 + erspan_hdr_len(version);
-
- rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
- if (!rt)
- return;
-
- if (gre_handle_offloads(skb, false))
- goto err_free_rt;
-
- if (skb->len > dev->mtu + dev->hard_header_len) {
- pskb_trim(skb, dev->mtu + dev->hard_header_len);
- truncate = true;
- }
-
- nhoff = skb_network_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IP) &&
- (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
- truncate = true;
-
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
-
- if (version == 1) {
- erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
- ntohl(md->u.index), truncate, true);
- tpi.hdr_len = ERSPAN_V1_MDSIZE;
- tpi.proto = htons(ETH_P_ERSPAN);
- } else if (version == 2) {
- erspan_build_header_v2(skb,
- ntohl(tunnel_id_to_key32(key->tun_id)),
- md->u.md2.dir,
- get_hwid(&md->u.md2),
- truncate, true);
- tpi.hdr_len = ERSPAN_V2_MDSIZE;
- tpi.proto = htons(ETH_P_ERSPAN2);
- } else {
- goto err_free_rt;
- }
-
- tpi.flags = TUNNEL_SEQ;
- tpi.key = tunnel_id_to_key32(key->tun_id);
- tpi.seq = htonl(tunnel->o_seqno++);
-
- gre_build_header(skb, &tpi, 8);
-
- df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
-
- iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
- key->tos, key->ttl, df, false);
- return;
-
-err_free_rt:
- ip_rt_put(rt);
-err_free_skb:
- kfree_skb(skb);
- dev->stats.tx_dropped++;
-}
-
-#define GRE_FEATURES (NETIF_F_SG | \
- NETIF_F_FRAGLIST | \
- NETIF_F_HIGHDMA | \
- NETIF_F_HW_CSUM | \
- NETIF_F_NETNS_LOCAL)
-
-static void __gre_tunnel_init(struct net_device *dev)
-{
- struct ip_tunnel *tunnel;
-
- tunnel = netdev_priv(dev);
- tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
- tunnel->parms.iph.protocol = IPPROTO_GRE;
-
- tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
-
- dev->features |= GRE_FEATURES;
- dev->hw_features |= GRE_FEATURES;
-
- if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported, nor
- * can we support 2 levels of outer headers requiring
- * an update.
- */
- if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
- (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- }
-
- /* Can use a lockless transmit, unless we generate
- * output sequences
- */
- dev->features |= NETIF_F_LLTX;
- }
-}
-
-static int __gre_rcv(struct sk_buff *skb)
-{
- return gre_rcv(skb, NULL);
-}
-
-void __gre_err(struct sk_buff *skb, u32 info)
-{
- pr_warn("%s: GRE receive error\n", __func__);
-}
-
-static const struct gre_protocol ipgre_protocol = {
- .handler = __gre_rcv,
- .err_handler = __gre_err,
-};
-
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
-static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- __be16 flags;
-
- if (!data)
- return 0;
-
- flags = 0;
- if (data[IFLA_GRE_IFLAGS])
- flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
- if (data[IFLA_GRE_OFLAGS])
- flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
- if (flags & (GRE_VERSION|GRE_ROUTING))
- return -EINVAL;
-
- return 0;
-}
-
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
-static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- __be32 daddr;
-
- if (tb[IFLA_ADDRESS]) {
- if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
- return -EINVAL;
- if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
- return -EADDRNOTAVAIL;
- }
-
- if (!data)
- goto out;
-
- if (data[IFLA_GRE_REMOTE]) {
- memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
- if (!daddr)
- return -EINVAL;
- }
-
-out:
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
- return ipgre_tunnel_validate(tb, data, NULL);
-#else
- return ipgre_tunnel_validate(tb, data);
-#endif
-}
-
-enum {
-#ifndef HAVE_IFLA_GRE_ENCAP_DPORT
- IFLA_GRE_ENCAP_TYPE = IFLA_GRE_FLAGS + 1,
- IFLA_GRE_ENCAP_FLAGS,
- IFLA_GRE_ENCAP_SPORT,
- IFLA_GRE_ENCAP_DPORT,
-#endif
-#ifndef HAVE_IFLA_GRE_COLLECT_METADATA
- IFLA_GRE_COLLECT_METADATA = IFLA_GRE_ENCAP_DPORT + 1,
-#endif
-#ifndef HAVE_IFLA_GRE_IGNORE_DF
- IFLA_GRE_IGNORE_DF = IFLA_GRE_COLLECT_METADATA + 1,
-#endif
-#ifndef HAVE_IFLA_GRE_FWMARK
- IFLA_GRE_FWMARK = IFLA_GRE_IGNORE_DF + 1,
-#endif
-#ifndef HAVE_IFLA_GRE_ERSPAN_INDEX
- IFLA_GRE_ERSPAN_INDEX = IFLA_GRE_FWMARK + 1,
-#endif
-#ifndef HAVE_IFLA_GRE_ERSPAN_HWID
- IFLA_GRE_ERSPAN_VER = IFLA_GRE_ERSPAN_INDEX + 1,
- IFLA_GRE_ERSPAN_DIR,
- IFLA_GRE_ERSPAN_HWID,
-#endif
-};
-
-#define RPL_IFLA_GRE_MAX (IFLA_GRE_ERSPAN_HWID + 1)
-
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
-static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int erspan_validate(struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- __be16 flags = 0;
- int ret;
-
- if (!data)
- return 0;
-
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
- ret = ipgre_tap_validate(tb, data, NULL);
-#else
- ret = ipgre_tap_validate(tb, data);
-#endif
- if (ret)
- return ret;
-
- /* ERSPAN should only have GRE sequence and key flag */
- if (data[IFLA_GRE_OFLAGS])
- flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
- if (data[IFLA_GRE_IFLAGS])
- flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
- if (!data[IFLA_GRE_COLLECT_METADATA] &&
- flags != (GRE_SEQ | GRE_KEY))
- return -EINVAL;
-
- /* ERSPAN Session ID only has 10-bit. Since we reuse
- * 32-bit key field as ID, check it's range.
- */
- if (data[IFLA_GRE_OKEY] &&
- (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
- return -EINVAL;
-
- return 0;
-}
-
-static int ipgre_netlink_parms(struct net_device *dev,
- struct nlattr *data[],
- struct nlattr *tb[],
- struct ip_tunnel_parm *parms)
-{
- struct ip_tunnel *t = netdev_priv(dev);
-
- memset(parms, 0, sizeof(*parms));
-
- parms->iph.protocol = IPPROTO_GRE;
-
- if (!data)
- return 0;
-
- if (data[IFLA_GRE_LINK])
- parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
-
- if (data[IFLA_GRE_IFLAGS])
- parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
-
- if (data[IFLA_GRE_OFLAGS])
- parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
-
- if (data[IFLA_GRE_IKEY])
- parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
-
- if (data[IFLA_GRE_OKEY])
- parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
-
- if (data[IFLA_GRE_LOCAL])
- parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
-
- if (data[IFLA_GRE_REMOTE])
- parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
-
- if (data[IFLA_GRE_TTL])
- parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
-
- if (data[IFLA_GRE_TOS])
- parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
-
- if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
- if (t->ignore_df)
- return -EINVAL;
- parms->iph.frag_off = htons(IP_DF);
- }
-
- if (data[IFLA_GRE_COLLECT_METADATA]) {
- t->collect_md = true;
- if (dev->type == ARPHRD_IPGRE)
- dev->type = ARPHRD_NONE;
- }
-
- if (data[IFLA_GRE_IGNORE_DF]) {
- if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
- && (parms->iph.frag_off & htons(IP_DF)))
- return -EINVAL;
- t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
- }
-
- if (data[IFLA_GRE_ERSPAN_INDEX]) {
- t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
-
- if (t->index & ~INDEX_MASK)
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int gre_tap_init(struct net_device *dev)
-{
- __gre_tunnel_init(dev);
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
-
- return ip_tunnel_init(dev);
-}
-
-static netdev_tx_t gre_dev_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- /* Drop All packets coming from networking stack. OVS-CB is
- * not initialized for these packets.
- */
-
- dev_kfree_skb(skb);
- dev->stats.tx_dropped++;
- return NETDEV_TX_OK;
-}
-
-static netdev_tx_t erspan_xmit(struct sk_buff *skb,
- struct net_device *dev)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- bool truncate = false;
-
- if (tunnel->collect_md) {
- erspan_fb_xmit(skb, dev, skb->protocol);
- return NETDEV_TX_OK;
- }
-
- if (gre_handle_offloads(skb, false))
- goto free_skb;
-
- if (skb_cow_head(skb, dev->needed_headroom))
- goto free_skb;
-
- if (skb->len > dev->mtu + dev->hard_header_len) {
- pskb_trim(skb, dev->mtu + dev->hard_header_len);
- truncate = true;
- }
-
- /* Push ERSPAN header */
- if (tunnel->erspan_ver == 1)
- erspan_build_header(skb, ntohl(tunnel->parms.o_key),
- tunnel->index,
- truncate, true);
- else if (tunnel->erspan_ver == 2)
- erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
- tunnel->dir, tunnel->hwid,
- truncate, true);
- else
- goto free_skb;
-
- tunnel->parms.o_flags &= ~TUNNEL_KEY;
- __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
- return NETDEV_TX_OK;
-
-free_skb:
- kfree_skb(skb);
- dev->stats.tx_dropped++;
- return NETDEV_TX_OK;
-}
-
-static netdev_tx_t __erspan_fb_xmit(struct sk_buff *skb)
-{
- erspan_fb_xmit(skb, skb->dev, skb->protocol);
- return NETDEV_TX_OK;
-}
-
-int ovs_gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
-{
- struct ip_tunnel_info *info = skb_tunnel_info(skb);
- struct rtable *rt;
- struct flowi4 fl4;
-
- if (ip_tunnel_info_af(info) != AF_INET)
- return -EINVAL;
-
- rt = gre_get_rt(skb, dev, &fl4, &info->key);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
-
- ip_rt_put(rt);
- info->key.u.ipv4.src = fl4.saddr;
- return 0;
-}
-EXPORT_SYMBOL_GPL(ovs_gre_fill_metadata_dst);
-
-static int erspan_tunnel_init(struct net_device *dev)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
-
- tunnel->tun_hlen = 8;
- tunnel->parms.iph.protocol = IPPROTO_GRE;
- tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
- erspan_hdr_len(tunnel->erspan_ver);
-
- dev->features |= GRE_FEATURES;
- dev->hw_features |= GRE_FEATURES;
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- netif_keep_dst(dev);
-
- return ip_tunnel_init(dev);
-}
-
-static const struct net_device_ops gre_tap_netdev_ops = {
- .ndo_init = gre_tap_init,
- .ndo_uninit = rpl_ip_tunnel_uninit,
- .ndo_start_xmit = gre_dev_xmit,
- .ndo_set_mac_address = eth_mac_addr,
- .ndo_validate_addr = eth_validate_addr,
-#ifdef HAVE_RHEL7_MAX_MTU
- .ndo_size = sizeof(struct net_device_ops),
- .extended.ndo_change_mtu = ip_tunnel_change_mtu,
-#else
- .ndo_change_mtu = ip_tunnel_change_mtu,
-#endif
- .ndo_get_stats64 = ip_tunnel_get_stats64,
-#ifdef HAVE_NDO_GET_IFLINK
- .ndo_get_iflink = rpl_ip_tunnel_get_iflink,
-#endif
-#ifdef HAVE_NDO_FILL_METADATA_DST
- .ndo_fill_metadata_dst = gre_fill_metadata_dst,
-#endif
-};
-
-static const struct net_device_ops erspan_netdev_ops = {
- .ndo_init = erspan_tunnel_init,
- .ndo_uninit = rpl_ip_tunnel_uninit,
- .ndo_start_xmit = erspan_xmit,
- .ndo_set_mac_address = eth_mac_addr,
- .ndo_validate_addr = eth_validate_addr,
-#ifdef HAVE_RHEL7_MAX_MTU
- .ndo_size = sizeof(struct net_device_ops),
- .extended.ndo_change_mtu = ip_tunnel_change_mtu,
-#else
- .ndo_change_mtu = ip_tunnel_change_mtu,
-#endif
- .ndo_get_stats64 = ip_tunnel_get_stats64,
-#ifdef HAVE_NDO_GET_IFLINK
- .ndo_get_iflink = rpl_ip_tunnel_get_iflink,
-#endif
-#ifdef HAVE_NDO_FILL_METADATA_DST
- .ndo_fill_metadata_dst = gre_fill_metadata_dst,
-#endif
-};
-
-static void ipgre_tap_setup(struct net_device *dev)
-{
- ether_setup(dev);
-#ifdef HAVE_NET_DEVICE_MAX_MTU
- dev->max_mtu = 0;
-#endif
- dev->netdev_ops = &gre_tap_netdev_ops;
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- ip_tunnel_setup(dev, gre_tap_net_id);
-}
-
-static void erspan_setup(struct net_device *dev)
-{
- struct ip_tunnel *t = netdev_priv(dev);
-
- eth_hw_addr_random(dev);
- ether_setup(dev);
-#ifdef HAVE_NET_DEVICE_MAX_MTU
- dev->max_mtu = 0;
-#endif
- dev->netdev_ops = &erspan_netdev_ops;
- dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- ip_tunnel_setup(dev, erspan_net_id);
- t->erspan_ver = 1;
-}
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
-static int ipgre_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int ipgre_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- struct ip_tunnel_parm p;
- int err;
-
- ipgre_netlink_parms(dev, data, tb, &p);
- err = ip_tunnel_newlink(dev, tb, &p);
- return err;
-
-}
-
-static size_t ipgre_get_size(const struct net_device *dev)
-{
- return
- /* IFLA_GRE_LINK */
- nla_total_size(4) +
- /* IFLA_GRE_IFLAGS */
- nla_total_size(2) +
- /* IFLA_GRE_OFLAGS */
- nla_total_size(2) +
- /* IFLA_GRE_IKEY */
- nla_total_size(4) +
- /* IFLA_GRE_OKEY */
- nla_total_size(4) +
- /* IFLA_GRE_LOCAL */
- nla_total_size(4) +
- /* IFLA_GRE_REMOTE */
- nla_total_size(4) +
- /* IFLA_GRE_TTL */
- nla_total_size(1) +
- /* IFLA_GRE_TOS */
- nla_total_size(1) +
- /* IFLA_GRE_PMTUDISC */
- nla_total_size(1) +
- /* IFLA_GRE_ENCAP_TYPE */
- nla_total_size(2) +
- /* IFLA_GRE_ENCAP_FLAGS */
- nla_total_size(2) +
- /* IFLA_GRE_ENCAP_SPORT */
- nla_total_size(2) +
- /* IFLA_GRE_ENCAP_DPORT */
- nla_total_size(2) +
- /* IFLA_GRE_COLLECT_METADATA */
- nla_total_size(0) +
- /* IFLA_GRE_ERSPAN_INDEX */
- nla_total_size(4) +
- /* IFLA_GRE_ERSPAN_VER */
- nla_total_size(1) +
- /* IFLA_GRE_ERSPAN_DIR */
- nla_total_size(1) +
- /* IFLA_GRE_ERSPAN_HWID */
- nla_total_size(2) +
- 0;
-}
-
-static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
-{
- struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm *p = &t->parms;
-
- if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
- nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
- nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
- nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
- nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
- nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
- nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
- nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
- nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
- nla_put_u8(skb, IFLA_GRE_PMTUDISC,
- !!(p->iph.frag_off & htons(IP_DF))))
- goto nla_put_failure;
-
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
- goto nla_put_failure;
-
- if (t->erspan_ver == 1) {
- if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
- goto nla_put_failure;
- } else if (t->erspan_ver == 2) {
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
- goto nla_put_failure;
- if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
- goto nla_put_failure;
- }
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-static const struct nla_policy ipgre_policy[RPL_IFLA_GRE_MAX + 1] = {
- [IFLA_GRE_LINK] = { .type = NLA_U32 },
- [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
- [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
- [IFLA_GRE_IKEY] = { .type = NLA_U32 },
- [IFLA_GRE_OKEY] = { .type = NLA_U32 },
- [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
- [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
- [IFLA_GRE_TTL] = { .type = NLA_U8 },
- [IFLA_GRE_TOS] = { .type = NLA_U8 },
- [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
- [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
- [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
- [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
- [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
-};
-
-static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
- .kind = "ovs_gretap",
- .maxtype = RPL_IFLA_GRE_MAX,
- .policy = ipgre_policy,
- .priv_size = sizeof(struct ip_tunnel),
- .setup = ipgre_tap_setup,
- .validate = ipgre_tap_validate,
- .newlink = ipgre_newlink,
- .dellink = ip_tunnel_dellink,
- .get_size = ipgre_get_size,
- .fill_info = ipgre_fill_info,
-#ifdef HAVE_GET_LINK_NET
- .get_link_net = ip_tunnel_get_link_net,
-#endif
-};
-
-static struct rtnl_link_ops erspan_link_ops __read_mostly = {
- .kind = "erspan",
- .maxtype = RPL_IFLA_GRE_MAX,
- .policy = ipgre_policy,
- .priv_size = sizeof(struct ip_tunnel),
- .setup = erspan_setup,
- .validate = erspan_validate,
- .newlink = ipgre_newlink,
- .dellink = ip_tunnel_dellink,
- .get_size = ipgre_get_size,
- .fill_info = ipgre_fill_info,
-#ifdef HAVE_GET_LINK_NET
- .get_link_net = ip_tunnel_get_link_net,
-#endif
-};
-
-struct net_device *rpl_gretap_fb_dev_create(struct net *net, const char *name,
- u8 name_assign_type)
-{
- struct nlattr *tb[IFLA_MAX + 1];
- struct net_device *dev;
- LIST_HEAD(list_kill);
- struct ip_tunnel *t;
- int err;
-
- memset(&tb, 0, sizeof(tb));
-
- dev = rtnl_create_link(net, (char *)name, name_assign_type,
- &ipgre_tap_ops, tb);
- if (IS_ERR(dev))
- return dev;
-
- t = netdev_priv(dev);
- t->collect_md = true;
- /* Configure flow based GRE device. */
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
- err = ipgre_newlink(net, dev, tb, NULL, NULL);
-#else
- err = ipgre_newlink(net, dev, tb, NULL);
-#endif
- if (err < 0) {
- free_netdev(dev);
- return ERR_PTR(err);
- }
-
- /* openvswitch users expect packet sizes to be unrestricted,
- * so set the largest MTU we can.
- */
- err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
- if (err)
- goto out;
-
- return dev;
-out:
- ip_tunnel_dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
- return ERR_PTR(err);
-}
-EXPORT_SYMBOL_GPL(rpl_gretap_fb_dev_create);
-
-static int __net_init erspan_init_net(struct net *net)
-{
- return ip_tunnel_init_net(net, erspan_net_id,
- &erspan_link_ops, NULL);
-}
-
-static void __net_exit erspan_exit_net(struct net *net)
-{
- struct ip_tunnel_net *itn = net_generic(net, erspan_net_id);
-
- ip_tunnel_delete_net(itn, &erspan_link_ops);
-}
-
-static struct pernet_operations erspan_net_ops = {
- .init = erspan_init_net,
- .exit = erspan_exit_net,
- .id = &erspan_net_id,
- .size = sizeof(struct ip_tunnel_net),
-};
-
-static int __net_init ipgre_tap_init_net(struct net *net)
-{
- return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "ovs-gretap0");
-}
-
-static void __net_exit ipgre_tap_exit_net(struct net *net)
-{
- struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
-
- ip_tunnel_delete_net(itn, &ipgre_tap_ops);
-}
-
-static struct pernet_operations ipgre_tap_net_ops = {
- .init = ipgre_tap_init_net,
- .exit = ipgre_tap_exit_net,
- .id = &gre_tap_net_id,
- .size = sizeof(struct ip_tunnel_net),
-};
-
-static struct net_device *erspan_fb_dev_create(struct net *net,
- const char *name,
- u8 name_assign_type)
-{
- struct nlattr *tb[IFLA_MAX + 1];
- struct net_device *dev;
- LIST_HEAD(list_kill);
- struct ip_tunnel *t;
- int err;
-
- memset(&tb, 0, sizeof(tb));
-
- dev = rtnl_create_link(net, (char *)name, name_assign_type,
- &erspan_link_ops, tb);
- if (IS_ERR(dev))
- return dev;
-
- t = netdev_priv(dev);
- t->collect_md = true;
- /* Configure flow based GRE device. */
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
- err = ipgre_newlink(net, dev, tb, NULL, NULL);
-#else
- err = ipgre_newlink(net, dev, tb, NULL);
-#endif
- if (err < 0) {
- free_netdev(dev);
- return ERR_PTR(err);
- }
-
- /* openvswitch users expect packet sizes to be unrestricted,
- * so set the largest MTU we can.
- */
- err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
- if (err)
- goto out;
-
- return dev;
-out:
- ip_tunnel_dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
- return ERR_PTR(err);
-}
-
-static struct vport_ops ovs_erspan_vport_ops;
-
-static struct vport *erspan_tnl_create(const struct vport_parms *parms)
-{
- struct net *net = ovs_dp_get_net(parms->dp);
- struct net_device *dev;
- struct vport *vport;
- int err;
-
- vport = ovs_vport_alloc(0, &ovs_erspan_vport_ops, parms);
- if (IS_ERR(vport))
- return vport;
-
- rtnl_lock();
- dev = erspan_fb_dev_create(net, parms->name, NET_NAME_USER);
- if (IS_ERR(dev)) {
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_CAST(dev);
- }
-
- err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
- if (err < 0) {
- rtnl_delete_link(dev);
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_PTR(err);
- }
-
- rtnl_unlock();
- return vport;
-}
-
-static struct vport *erspan_create(const struct vport_parms *parms)
-{
- struct vport *vport;
-
- vport = erspan_tnl_create(parms);
- if (IS_ERR(vport))
- return vport;
-
- return ovs_netdev_link(vport, parms->name);
-}
-
-static struct vport_ops ovs_erspan_vport_ops = {
- .type = OVS_VPORT_TYPE_ERSPAN,
- .create = erspan_create,
- .send = __erspan_fb_xmit,
-#ifndef USE_UPSTREAM_TUNNEL
- .fill_metadata_dst = gre_fill_metadata_dst,
-#endif
- .destroy = ovs_netdev_tunnel_destroy,
-};
-
-static struct vport_ops ovs_ipgre_vport_ops;
-
-static struct vport *ipgre_tnl_create(const struct vport_parms *parms)
-{
- struct net *net = ovs_dp_get_net(parms->dp);
- struct net_device *dev;
- struct vport *vport;
- int err;
-
- vport = ovs_vport_alloc(0, &ovs_ipgre_vport_ops, parms);
- if (IS_ERR(vport))
- return vport;
-
- rtnl_lock();
- dev = gretap_fb_dev_create(net, parms->name, NET_NAME_USER);
- if (IS_ERR(dev)) {
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_CAST(dev);
- }
-
- err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
- if (err < 0) {
- rtnl_delete_link(dev);
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_PTR(err);
- }
-
- rtnl_unlock();
- return vport;
-}
-
-static struct vport *ipgre_create(const struct vport_parms *parms)
-{
- struct vport *vport;
-
- vport = ipgre_tnl_create(parms);
- if (IS_ERR(vport))
- return vport;
-
- return ovs_netdev_link(vport, parms->name);
-}
-
-static struct vport_ops ovs_ipgre_vport_ops = {
- .type = OVS_VPORT_TYPE_GRE,
- .create = ipgre_create,
- .send = gre_fb_xmit,
-#ifndef USE_UPSTREAM_TUNNEL
- .fill_metadata_dst = gre_fill_metadata_dst,
-#endif
- .destroy = ovs_netdev_tunnel_destroy,
-};
-
-int rpl_ipgre_init(void)
-{
- int err;
-
- err = register_pernet_device(&ipgre_tap_net_ops);
- if (err < 0) {
- if (err == -EEXIST)
- goto ip_gre_loaded;
- else
- goto pnet_tap_failed;
- }
-
- err = register_pernet_device(&erspan_net_ops);
- if (err < 0) {
- if (err == -EEXIST)
- goto ip_gre_loaded;
- else
- goto pnet_erspan_failed;
- }
-
- err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
- if (err < 0) {
- pr_info("%s: can't add protocol\n", __func__);
- if (err == -EBUSY) {
- goto ip_gre_loaded;
- } else {
- goto add_proto_failed;
- }
- }
-
- pr_info("GRE over IPv4 tunneling driver\n");
- ovs_vport_ops_register(&ovs_ipgre_vport_ops);
- ovs_vport_ops_register(&ovs_erspan_vport_ops);
- return 0;
-
-ip_gre_loaded:
- /* Since GRE only allows single receiver to be registerd,
- * we skip here so only gre transmit works, see:
- *
- * commit 9f57c67c379d88a10e8ad676426fee5ae7341b14
- * Author: Pravin B Shelar <pshelar@nicira.com>
- * Date: Fri Aug 7 23:51:52 2015 -0700
- * gre: Remove support for sharing GRE protocol hook
- *
- * OVS GRE receive part is disabled.
- */
- pr_info("GRE TX only over IPv4 tunneling driver\n");
- ip_gre_loaded = true;
- ovs_vport_ops_register(&ovs_ipgre_vport_ops);
- ovs_vport_ops_register(&ovs_erspan_vport_ops);
- return 0;
-
-add_proto_failed:
- unregister_pernet_device(&erspan_net_ops);
-pnet_erspan_failed:
- unregister_pernet_device(&ipgre_tap_net_ops);
-pnet_tap_failed:
- pr_err("Error while initializing GRE %d\n", err);
- return err;
-}
-
-void rpl_ipgre_fini(void)
-{
- ovs_vport_ops_unregister(&ovs_erspan_vport_ops);
- ovs_vport_ops_unregister(&ovs_ipgre_vport_ops);
-
- if (!ip_gre_loaded) {
- gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
- unregister_pernet_device(&erspan_net_ops);
- unregister_pernet_device(&ipgre_tap_net_ops);
- }
-}
-
-#endif
diff --git a/datapath/linux/compat/ip_output.c b/datapath/linux/compat/ip_output.c
deleted file mode 100644
index e2f869f9a..000000000
--- a/datapath/linux/compat/ip_output.c
+++ /dev/null
@@ -1,418 +0,0 @@
-/*
- * IP fragmentation backport, heavily based on linux/net/ipv4/ip_output.c,
- * copied from Linux ae7ef81ef000 ("skbuff: introduce skb_gso_validate_mtu")
- *
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * The Internet Protocol (IP) output module.
- *
- * Authors: Ross Biro
- * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
- * Donald Becker, <becker@super.org>
- * Alan Cox, <Alan.Cox@linux.org>
- * Richard Underwood
- * Stefan Becker, <stefanb@yello.ping.de>
- * Jorge Cwik, <jorge@laser.satlink.net>
- * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- * Hirokazu Takahashi, <taka@valinux.co.jp>
- *
- * See ip_input.c for original log
- *
- * Fixes:
- * Alan Cox : Missing nonblock feature in ip_build_xmit.
- * Mike Kilburn : htons() missing in ip_build_xmit.
- * Bradford Johnson: Fix faulty handling of some frames when
- * no route is found.
- * Alexander Demenshin: Missing sk/skb free in ip_queue_xmit
- * (in case if packet not accepted by
- * output firewall rules)
- * Mike McLagan : Routing by source
- * Alexey Kuznetsov: use new route cache
- * Andi Kleen: Fix broken PMTU recovery and remove
- * some redundant tests.
- * Vitaly E. Lavrov : Transparent proxy revived after year coma.
- * Andi Kleen : Replace ip_reply with ip_send_reply.
- * Andi Kleen : Split fast and slow ip_build_xmit path
- * for decreased register pressure on x86
- * and more readibility.
- * Marc Boucher : When call_out_firewall returns FW_QUEUE,
- * silently drop skb instead of failing with -EPERM.
- * Detlev Wengorz : Copy protocol for fragments.
- * Hirokazu Takahashi: HW checksumming for outgoing UDP
- * datagrams.
- * Hirokazu Takahashi: sendfile() on UDP works now.
- */
-
-#ifndef HAVE_CORRECT_MRU_HANDLING
-#include <asm/uaccess.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-
-#include <net/snmp.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <net/route.h>
-#include <net/xfrm.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/arp.h>
-#include <net/icmp.h>
-#include <net/checksum.h>
-#include <net/inetpeer.h>
-#include <linux/igmp.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_bridge.h>
-#include <linux/netlink.h>
-#include <linux/tcp.h>
-
-static inline void rpl_ip_options_fragment(struct sk_buff *skb)
-{
- unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr);
- struct ip_options *opt = &(IPCB(skb)->opt);
- int l = opt->optlen;
- int optlen;
-
- while (l > 0) {
- switch (*optptr) {
- case IPOPT_END:
- return;
- case IPOPT_NOOP:
- l--;
- optptr++;
- continue;
- }
- optlen = optptr[1];
- if (optlen < 2 || optlen > l)
- return;
- if (!IPOPT_COPIED(*optptr))
- memset(optptr, IPOPT_NOOP, optlen);
- l -= optlen;
- optptr += optlen;
- }
- opt->ts = 0;
- opt->rr = 0;
- opt->rr_needaddr = 0;
- opt->ts_needaddr = 0;
- opt->ts_needtime = 0;
-}
-#define ip_options_fragment rpl_ip_options_fragment
-
-static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
-{
- to->pkt_type = from->pkt_type;
- to->priority = from->priority;
- to->protocol = from->protocol;
- skb_dst_drop(to);
- skb_dst_copy(to, from);
- to->dev = from->dev;
- to->mark = from->mark;
-
- /* Copy the flags to each fragment. */
- IPCB(to)->flags = IPCB(from)->flags;
-
-#ifdef CONFIG_NET_SCHED
- to->tc_index = from->tc_index;
-#endif
- nf_copy(to, from);
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
- to->ipvs_property = from->ipvs_property;
-#endif
- skb_copy_secmark(to, from);
-}
-
-#ifdef HAVE_IP_DO_FRAGMENT_USING_NET
-#define OUTPUT(net, sk, skb) output(net, sk, skb)
-#elif defined(HAVE_IP_FRAGMENT_TAKES_SOCK)
-#define OUTPUT(net, sk, skb) output(sk, skb)
-#else
-#define OUTPUT(net, sk, skb) output(skb)
-#endif
-
-/*
- * This IP datagram is too large to be sent in one piece. Break it up into
- * smaller pieces (each of size equal to IP header plus
- * a block of the data of the original IP data part) that will yet fit in a
- * single device frame, and queue such a frame for sending.
- */
-
-int rpl_ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
- int (*output)(OVS_VPORT_OUTPUT_PARAMS))
-{
- struct iphdr *iph;
- int ptr;
- struct net_device *dev;
- struct sk_buff *skb2;
- unsigned int mtu, hlen, left, len, ll_rs;
- int offset;
- __be16 not_last_frag;
- struct rtable *rt = skb_rtable(skb);
- int err = 0;
-
- dev = rt->dst.dev;
-
- /* for offloaded checksums cleanup checksum before fragmentation */
- if (skb->ip_summed == CHECKSUM_PARTIAL &&
- (err = skb_checksum_help(skb)))
- goto fail;
-
- /*
- * Point into the IP datagram header.
- */
-
- iph = ip_hdr(skb);
-
- mtu = ip_skb_dst_mtu(skb);
- if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu)
- mtu = IPCB(skb)->frag_max_size;
-
- /*
- * Setup starting values.
- */
-
- hlen = iph->ihl * 4;
- mtu = mtu - hlen; /* Size of data space */
- IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
-
- /* When frag_list is given, use it. First, check its validity:
- * some transformers could create wrong frag_list or break existing
- * one, it is not prohibited. In this case fall back to copying.
- *
- * LATER: this step can be merged to real generation of fragments,
- * we can switch to copy when see the first bad fragment.
- */
- if (skb_has_frag_list(skb)) {
- struct sk_buff *frag, *frag2;
- int first_len = skb_pagelen(skb);
-
- if (first_len - hlen > mtu ||
- ((first_len - hlen) & 7) ||
- ip_is_fragment(iph) ||
- skb_cloned(skb))
- goto slow_path;
-
- skb_walk_frags(skb, frag) {
- /* Correct geometry. */
- if (frag->len > mtu ||
- ((frag->len & 7) && frag->next) ||
- skb_headroom(frag) < hlen)
- goto slow_path_clean;
-
- /* Partially cloned skb? */
- if (skb_shared(frag))
- goto slow_path_clean;
-
- BUG_ON(frag->sk);
- if (skb->sk) {
- frag->sk = skb->sk;
- frag->destructor = sock_wfree;
- }
- skb->truesize -= frag->truesize;
- }
-
- /* Everything is OK. Generate! */
-
- err = 0;
- offset = 0;
- frag = skb_shinfo(skb)->frag_list;
- skb_frag_list_init(skb);
- skb->data_len = first_len - skb_headlen(skb);
- skb->len = first_len;
- iph->tot_len = htons(first_len);
- iph->frag_off = htons(IP_MF);
- ip_send_check(iph);
-
- for (;;) {
- /* Prepare header of the next frame,
- * before previous one went down. */
- if (frag) {
- frag->ip_summed = CHECKSUM_NONE;
- skb_reset_transport_header(frag);
- __skb_push(frag, hlen);
- skb_reset_network_header(frag);
- memcpy(skb_network_header(frag), iph, hlen);
- iph = ip_hdr(frag);
- iph->tot_len = htons(frag->len);
- ip_copy_metadata(frag, skb);
- if (offset == 0)
- ip_options_fragment(frag);
- offset += skb->len - hlen;
- iph->frag_off = htons(offset>>3);
- if (frag->next)
- iph->frag_off |= htons(IP_MF);
- /* Ready, complete checksum */
- ip_send_check(iph);
- }
-
- err = OUTPUT(net, sk, skb);
-
- if (!err)
- IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
- if (err || !frag)
- break;
-
- skb = frag;
- frag = skb->next;
- skb->next = NULL;
- }
-
- if (err == 0) {
- IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
- return 0;
- }
-
- while (frag) {
- skb = frag->next;
- kfree_skb(frag);
- frag = skb;
- }
- IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
- return err;
-
-slow_path_clean:
- skb_walk_frags(skb, frag2) {
- if (frag2 == frag)
- break;
- frag2->sk = NULL;
- frag2->destructor = NULL;
- skb->truesize += frag2->truesize;
- }
- }
-
-slow_path:
- iph = ip_hdr(skb);
-
- left = skb->len - hlen; /* Space per frame */
- ptr = hlen; /* Where to start from */
-
- ll_rs = LL_RESERVED_SPACE(rt->dst.dev);
-
- /*
- * Fragment the datagram.
- */
-
- offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
- not_last_frag = iph->frag_off & htons(IP_MF);
-
- /*
- * Keep copying data until we run out.
- */
-
- while (left > 0) {
- len = left;
- /* IF: it doesn't fit, use 'mtu' - the data space left */
- if (len > mtu)
- len = mtu;
- /* IF: we are not sending up to and including the packet end
- then align the next start on an eight byte boundary */
- if (len < left) {
- len &= ~7;
- }
-
- /* Allocate buffer */
- skb2 = alloc_skb(len + hlen + ll_rs, GFP_ATOMIC);
- if (!skb2) {
- err = -ENOMEM;
- goto fail;
- }
-
- /*
- * Set up data on packet
- */
-
- ip_copy_metadata(skb2, skb);
- skb_reserve(skb2, ll_rs);
- skb_put(skb2, len + hlen);
- skb_reset_network_header(skb2);
- skb2->transport_header = skb2->network_header + hlen;
-
- /*
- * Charge the memory for the fragment to any owner
- * it might possess
- */
-
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
-
- /*
- * Copy the packet header into the new buffer.
- */
-
- skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
-
- /*
- * Copy a block of the IP datagram.
- */
- if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
- BUG();
- left -= len;
-
- /*
- * Fill in the new header fields.
- */
- iph = ip_hdr(skb2);
- iph->frag_off = htons((offset >> 3));
-
- if (IPCB(skb)->flags & IPSKB_FRAG_PMTU)
- iph->frag_off |= htons(IP_DF);
-
- /* ANK: dirty, but effective trick. Upgrade options only if
- * the segment to be fragmented was THE FIRST (otherwise,
- * options are already fixed) and make it ONCE
- * on the initial skb, so that all the following fragments
- * will inherit fixed options.
- */
- if (offset == 0)
- ip_options_fragment(skb);
-
- /*
- * Added AC : If we are fragmenting a fragment that's not the
- * last fragment then keep MF on each bit
- */
- if (left > 0 || not_last_frag)
- iph->frag_off |= htons(IP_MF);
- ptr += len;
- offset += len;
-
- /*
- * Put this fragment into the sending queue.
- */
- iph->tot_len = htons(len + hlen);
-
- ip_send_check(iph);
-
- err = OUTPUT(net, sk, skb2);
- if (err)
- goto fail;
-
- IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
- }
- consume_skb(skb);
- IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
- return err;
-
-fail:
- kfree_skb(skb);
- IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
- return err;
-}
-EXPORT_SYMBOL(rpl_ip_do_fragment);
-
-#endif /* HAVE_CORRECT_MRU_HANDLING */
diff --git a/datapath/linux/compat/ip_tunnel.c b/datapath/linux/compat/ip_tunnel.c
deleted file mode 100644
index e7a039358..000000000
--- a/datapath/linux/compat/ip_tunnel.c
+++ /dev/null
@@ -1,776 +0,0 @@
-/*
- * Copyright (c) 2013,2018 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/in.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/if_arp.h>
-#include <linux/mroute.h>
-#include <linux/init.h>
-#include <linux/in6.h>
-#include <linux/inetdevice.h>
-#include <linux/igmp.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/etherdevice.h>
-#include <linux/if_ether.h>
-#include <linux/if_vlan.h>
-#include <linux/rculist.h>
-#include <linux/err.h>
-
-#include <net/sock.h>
-#include <net/ip.h>
-#include <net/icmp.h>
-#include <net/protocol.h>
-#include <net/ip_tunnels.h>
-#include <net/arp.h>
-#include <net/checksum.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <net/xfrm.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/rtnetlink.h>
-#include <net/dst_metadata.h>
-
-#if IS_ENABLED(CONFIG_IPV6)
-#include <net/ipv6.h>
-#include <net/ip6_fib.h>
-#include <net/ip6_route.h>
-#endif
-
-#include "compat.h"
-
-#ifndef USE_UPSTREAM_TUNNEL
-const struct ip_tunnel_encap_ops __rcu *
- rpl_iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
-
-static unsigned int rpl_ip_tunnel_hash(__be32 key, __be32 remote)
-{
- return hash_32((__force u32)key ^ (__force u32)remote,
- IP_TNL_HASH_BITS);
-}
-
-static bool rpl_ip_tunnel_key_match(const struct ip_tunnel_parm *p,
- __be16 flags, __be32 key)
-{
- if (p->i_flags & TUNNEL_KEY) {
- if (flags & TUNNEL_KEY)
- return key == p->i_key;
- else
- /* key expected, none present */
- return false;
- } else
- return !(flags & TUNNEL_KEY);
-}
-
-static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
- struct ip_tunnel_parm *parms)
-{
- unsigned int h;
- __be32 remote;
- __be32 i_key = parms->i_key;
-
- if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
- remote = parms->iph.daddr;
- else
- remote = 0;
-
- if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
- i_key = 0;
-
- h = rpl_ip_tunnel_hash(i_key, remote);
- return &itn->tunnels[h];
-}
-
-static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
-{
- struct hlist_head *head = ip_bucket(itn, &t->parms);
-
- if (t->collect_md)
- rcu_assign_pointer(itn->collect_md_tun, t);
- hlist_add_head_rcu(&t->hash_node, head);
-}
-
-static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
-{
- if (t->collect_md)
- rcu_assign_pointer(itn->collect_md_tun, NULL);
- hlist_del_init_rcu(&t->hash_node);
-}
-
-static struct net_device *__ip_tunnel_create(struct net *net,
- const struct rtnl_link_ops *ops,
- struct ip_tunnel_parm *parms)
-{
- int err;
- struct ip_tunnel *tunnel;
- struct net_device *dev;
- char name[IFNAMSIZ];
-
- if (parms->name[0])
- strlcpy(name, parms->name, IFNAMSIZ);
- else {
- if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
- err = -E2BIG;
- goto failed;
- }
- strlcpy(name, ops->kind, IFNAMSIZ);
- strncat(name, "%d", 2);
- }
-
- ASSERT_RTNL();
- dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
- if (!dev) {
- err = -ENOMEM;
- goto failed;
- }
- dev_net_set(dev, net);
-
- dev->rtnl_link_ops = ops;
-
- tunnel = netdev_priv(dev);
- tunnel->parms = *parms;
- tunnel->net = net;
-
- err = register_netdevice(dev);
- if (err)
- goto failed_free;
-
- return dev;
-
-failed_free:
- free_netdev(dev);
-failed:
- return ERR_PTR(err);
-}
-
-static inline void init_tunnel_flow(struct flowi4 *fl4,
- int proto,
- __be32 daddr, __be32 saddr,
- __be32 key, __u8 tos, int oif)
-{
- memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_oif = oif;
- fl4->daddr = daddr;
- fl4->saddr = saddr;
- fl4->flowi4_tos = tos;
- fl4->flowi4_proto = proto;
- fl4->fl4_gre_key = key;
-}
-
-static int ip_tunnel_bind_dev(struct net_device *dev)
-{
- struct net_device *tdev = NULL;
- struct ip_tunnel *tunnel = netdev_priv(dev);
- const struct iphdr *iph;
- int hlen = LL_MAX_HEADER;
- int mtu = ETH_DATA_LEN;
- int t_hlen = tunnel->hlen + sizeof(struct iphdr);
-
- iph = &tunnel->parms.iph;
-
- /* Guess output device to choose reasonable mtu and needed_headroom */
- if (iph->daddr) {
- struct flowi4 fl4;
- struct rtable *rt;
-
- init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
- iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), tunnel->parms.link);
- rt = ip_route_output_key(tunnel->net, &fl4);
-
- if (!IS_ERR(rt)) {
- tdev = rt->dst.dev;
- ip_rt_put(rt);
- }
- if (dev->type != ARPHRD_ETHER)
- dev->flags |= IFF_POINTOPOINT;
-
- dst_cache_reset(&tunnel->dst_cache);
- }
-
- if (!tdev && tunnel->parms.link)
- tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
-
- if (tdev) {
- hlen = tdev->hard_header_len + tdev->needed_headroom;
- mtu = tdev->mtu;
- }
-
- dev->needed_headroom = t_hlen + hlen;
- mtu -= (dev->hard_header_len + t_hlen);
-
- if (mtu < 68)
- mtu = 68;
-
- return mtu;
-}
-
-int rpl___ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
-
- if (new_mtu < 68)
- return -EINVAL;
-
- if (new_mtu > max_mtu) {
- if (strict)
- return -EINVAL;
-
- new_mtu = max_mtu;
- }
-
- dev->mtu = new_mtu;
- return 0;
-}
-
-int rpl_ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
-{
- return rpl___ip_tunnel_change_mtu(dev, new_mtu, true);
-}
-
-static int rpl_tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
- struct rtable *rt, __be16 df,
- const struct iphdr *inner_iph)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
- int mtu;
-
- if (df)
- mtu = dst_mtu(&rt->dst) - dev->hard_header_len
- - sizeof(struct iphdr) - tunnel->hlen;
- else
- mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
-
- if (skb_valid_dst(skb))
-#ifndef HAVE_DST_OPS_CONFIRM_NEIGH
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-#else
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb),
- NULL, skb, mtu, false);
-#endif
-
- if (skb->protocol == htons(ETH_P_IP)) {
- if (!skb_is_gso(skb) &&
- (inner_iph->frag_off & htons(IP_DF)) &&
- mtu < pkt_size) {
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
- return -E2BIG;
- }
- }
-#if IS_ENABLED(CONFIG_IPV6)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- struct rt6_info *rt6;
-
- rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
- NULL;
-
- if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
- mtu >= IPV6_MIN_MTU) {
- if ((tunnel->parms.iph.daddr &&
- !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
- rt6->rt6i_dst.plen == 128) {
- rt6->rt6i_flags |= RTF_MODIFIED;
- dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
- }
- }
-
- if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
- mtu < pkt_size) {
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- return -E2BIG;
- }
- }
-#endif
- return 0;
-}
-
-void rpl_ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
- const struct iphdr *tnl_params, const u8 protocol)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- const struct iphdr *inner_iph;
- struct flowi4 fl4;
- u8 tos, ttl;
- __be16 df;
- struct rtable *rt; /* Route to the other host */
- unsigned int max_headroom; /* The extra header space needed */
- __be32 dst;
- bool connected;
-
- inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
- connected = (tunnel->parms.iph.daddr != 0);
-
- dst = tnl_params->daddr;
- if (dst == 0) {
- /* NBMA tunnel */
-
- if (skb_dst(skb) == NULL) {
- dev->stats.tx_fifo_errors++;
- goto tx_error;
- }
-
- if (skb->protocol == htons(ETH_P_IP)) {
- rt = skb_rtable(skb);
- dst = rt_nexthop(rt, inner_iph->daddr);
- }
-#if IS_ENABLED(CONFIG_IPV6)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- const struct in6_addr *addr6;
- struct neighbour *neigh;
- bool do_tx_error_icmp;
- int addr_type;
-
- neigh = dst_neigh_lookup(skb_dst(skb),
- &ipv6_hdr(skb)->daddr);
- if (neigh == NULL)
- goto tx_error;
-
- addr6 = (const struct in6_addr *)&neigh->primary_key;
- addr_type = ipv6_addr_type(addr6);
-
- if (addr_type == IPV6_ADDR_ANY) {
- addr6 = &ipv6_hdr(skb)->daddr;
- addr_type = ipv6_addr_type(addr6);
- }
-
- if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
- do_tx_error_icmp = true;
- else {
- do_tx_error_icmp = false;
- dst = addr6->s6_addr32[3];
- }
- neigh_release(neigh);
- if (do_tx_error_icmp)
- goto tx_error_icmp;
- }
-#endif
- else
- goto tx_error;
-
- connected = false;
- }
-
- tos = tnl_params->tos;
- if (tos & 0x1) {
- tos &= ~0x1;
- if (skb->protocol == htons(ETH_P_IP)) {
- tos = inner_iph->tos;
- connected = false;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
- tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
- connected = false;
- }
- }
-
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
-
- if (ovs_ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
- goto tx_error;
-
- rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
- NULL;
-
- if (!rt) {
- rt = ip_route_output_key(tunnel->net, &fl4);
-
- if (IS_ERR(rt)) {
- dev->stats.tx_carrier_errors++;
- goto tx_error;
- }
- if (connected)
- dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
- fl4.saddr);
- }
-
- if (rt->dst.dev == dev) {
- ip_rt_put(rt);
- dev->stats.collisions++;
- goto tx_error;
- }
-
- if (rpl_tnl_update_pmtu(dev, skb, rt,
- tnl_params->frag_off, inner_iph)) {
- ip_rt_put(rt);
- goto tx_error;
- }
-
- if (tunnel->err_count > 0) {
- if (time_before(jiffies,
- tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
- tunnel->err_count--;
-
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- dst_link_failure(skb);
- } else
- tunnel->err_count = 0;
- }
-
- tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
- ttl = tnl_params->ttl;
- if (ttl == 0) {
- if (skb->protocol == htons(ETH_P_IP))
- ttl = inner_iph->ttl;
-#if IS_ENABLED(CONFIG_IPV6)
- else if (skb->protocol == htons(ETH_P_IPV6))
- ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
-#endif
- else
- ttl = ip4_dst_hoplimit(&rt->dst);
- }
-
- df = tnl_params->frag_off;
- if (skb->protocol == htons(ETH_P_IP))
- df |= (inner_iph->frag_off&htons(IP_DF));
-
- max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
- + rt->dst.header_len;
- if (max_headroom > dev->needed_headroom)
- dev->needed_headroom = max_headroom;
-
- if (skb_cow_head(skb, dev->needed_headroom)) {
- ip_rt_put(rt);
- dev->stats.tx_dropped++;
- kfree_skb(skb);
- return;
- }
-
- iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
- tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
-
- return;
-
-#if IS_ENABLED(CONFIG_IPV6)
-tx_error_icmp:
- dst_link_failure(skb);
-#endif
-tx_error:
- dev->stats.tx_errors++;
- kfree_skb(skb);
-}
-EXPORT_SYMBOL_GPL(rpl_ip_tunnel_xmit);
-
-static void ip_tunnel_dev_free(struct net_device *dev)
-{
- free_percpu(dev->tstats);
-#ifndef HAVE_NEEDS_FREE_NETDEV
- free_netdev(dev);
-#endif
-}
-
-void rpl_ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_net *itn;
-
- itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
-
- if (itn->fb_tunnel_dev != dev) {
- ip_tunnel_del(itn, netdev_priv(dev));
- unregister_netdevice_queue(dev, head);
- }
-}
-
-int rpl_ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
- struct rtnl_link_ops *ops, char *devname)
-{
- struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
- struct ip_tunnel_parm parms;
- unsigned int i;
-
- for (i = 0; i < IP_TNL_HASH_SIZE; i++)
- INIT_HLIST_HEAD(&itn->tunnels[i]);
-
- if (!ops) {
- itn->fb_tunnel_dev = NULL;
- return 0;
- }
-
- memset(&parms, 0, sizeof(parms));
- if (devname)
- strlcpy(parms.name, devname, IFNAMSIZ);
-
- rtnl_lock();
- itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
- /* FB netdevice is special: we have one, and only one per netns.
- * * Allowing to move it to another netns is clearly unsafe.
- * */
- if (!IS_ERR(itn->fb_tunnel_dev)) {
- itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
- itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
- ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
- }
- rtnl_unlock();
-
- return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
-}
-
-static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
- struct rtnl_link_ops *ops)
-{
- struct net *net = dev_net(itn->fb_tunnel_dev);
- struct net_device *dev, *aux;
- int h;
-
- for_each_netdev_safe(net, dev, aux)
- if (dev->rtnl_link_ops == ops)
- unregister_netdevice_queue(dev, head);
-
- for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
- struct ip_tunnel *t;
- struct hlist_node *n;
- struct hlist_head *thead = &itn->tunnels[h];
-
- hlist_for_each_entry_safe(t, n, thead, hash_node)
- /* If dev is in the same netns, it has already
- * been added to the list by the previous loop.
- */
- if (!net_eq(dev_net(t->dev), net))
- unregister_netdevice_queue(t->dev, head);
- }
-}
-
-void rpl_ip_tunnel_delete_net(struct ip_tunnel_net *itn,
- struct rtnl_link_ops *ops)
-{
- LIST_HEAD(list);
-
- rtnl_lock();
- ip_tunnel_destroy(itn, &list, ops);
- unregister_netdevice_many(&list);
- rtnl_unlock();
-}
-
-int rpl_ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p)
-{
- struct ip_tunnel *nt;
- struct net *net = dev_net(dev);
- struct ip_tunnel_net *itn;
- int mtu;
- int err;
-
- nt = netdev_priv(dev);
- itn = net_generic(net, nt->ip_tnl_net_id);
-
- if (nt->collect_md) {
- if (rtnl_dereference(itn->collect_md_tun))
- return -EEXIST;
- } else {
- return -EOPNOTSUPP;
- }
-
- nt->net = net;
- nt->parms = *p;
- err = register_netdevice(dev);
- if (err)
- goto out;
-
- if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
- eth_hw_addr_random(dev);
-
- mtu = ip_tunnel_bind_dev(dev);
- if (!tb[IFLA_MTU])
- dev->mtu = mtu;
-
- ip_tunnel_add(itn, nt);
-out:
- return err;
-}
-
-int rpl_ip_tunnel_init(struct net_device *dev)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- struct iphdr *iph = &tunnel->parms.iph;
- int err;
-
-#ifndef HAVE_NEEDS_FREE_NETDEV
- dev->destructor = ip_tunnel_dev_free;
-#else
- dev->needs_free_netdev = true;
- dev->priv_destructor = ip_tunnel_dev_free;
-#endif
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
- if (err) {
- free_percpu(dev->tstats);
- return err;
- }
-
- err = gro_cells_init(&tunnel->gro_cells, dev);
- if (err) {
- dst_cache_destroy(&tunnel->dst_cache);
- free_percpu(dev->tstats);
- return err;
- }
-
- tunnel->dev = dev;
- tunnel->net = dev_net(dev);
- strcpy(tunnel->parms.name, dev->name);
- iph->version = 4;
- iph->ihl = 5;
-
- if (tunnel->collect_md) {
- dev->features |= NETIF_F_NETNS_LOCAL;
- netif_keep_dst(dev);
- }
- return 0;
-}
-
-void rpl_ip_tunnel_uninit(struct net_device *dev)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- struct net *net = tunnel->net;
- struct ip_tunnel_net *itn;
-
- itn = net_generic(net, tunnel->ip_tnl_net_id);
- if (itn->fb_tunnel_dev != dev)
- ip_tunnel_del(itn, netdev_priv(dev));
-}
-
-/* Do least required initialization, rest of init is done in tunnel_init call */
-void rpl_ip_tunnel_setup(struct net_device *dev, int net_id)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
-
- tunnel->ip_tnl_net_id = net_id;
-}
-
-int rpl_ip_tunnel_get_iflink(const struct net_device *dev)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
-
- return tunnel->parms.link;
-}
-
-struct net *rpl_ip_tunnel_get_link_net(const struct net_device *dev)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
-
- return tunnel->net;
-}
-
-struct ip_tunnel *rpl_ip_tunnel_lookup(struct ip_tunnel_net *itn,
- int link, __be16 flags,
- __be32 remote, __be32 local,
- __be32 key)
-{
- unsigned int hash;
- struct ip_tunnel *t, *cand = NULL;
- struct hlist_head *head;
-
- hash = rpl_ip_tunnel_hash(key, remote);
- head = &itn->tunnels[hash];
-
- hlist_for_each_entry_rcu(t, head, hash_node) {
- if (local != t->parms.iph.saddr ||
- remote != t->parms.iph.daddr ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (!rpl_ip_tunnel_key_match(&t->parms, flags, key))
- continue;
-
- if (t->parms.link == link)
- return t;
- else
- cand = t;
- }
-
- hlist_for_each_entry_rcu(t, head, hash_node) {
- if (remote != t->parms.iph.daddr ||
- t->parms.iph.saddr != 0 ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (!rpl_ip_tunnel_key_match(&t->parms, flags, key))
- continue;
-
- if (t->parms.link == link)
- return t;
- else if (!cand)
- cand = t;
- }
-
- hash = rpl_ip_tunnel_hash(key, 0);
- head = &itn->tunnels[hash];
-
- hlist_for_each_entry_rcu(t, head, hash_node) {
- if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
- (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
- continue;
-
- if (!(t->dev->flags & IFF_UP))
- continue;
-
- if (!rpl_ip_tunnel_key_match(&t->parms, flags, key))
- continue;
-
- if (t->parms.link == link)
- return t;
- else if (!cand)
- cand = t;
- }
-
- if (flags & TUNNEL_NO_KEY)
- goto skip_key_lookup;
-
- hlist_for_each_entry_rcu(t, head, hash_node) {
- if (t->parms.i_key != key ||
- t->parms.iph.saddr != 0 ||
- t->parms.iph.daddr != 0 ||
- !(t->dev->flags & IFF_UP))
- continue;
-
- if (t->parms.link == link)
- return t;
- else if (!cand)
- cand = t;
- }
-
-skip_key_lookup:
- if (cand)
- return cand;
-
- t = rcu_dereference(itn->collect_md_tun);
- if (t)
- return t;
-
- if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
- return netdev_priv(itn->fb_tunnel_dev);
-
-
- return NULL;
-}
-EXPORT_SYMBOL_GPL(rpl_ip_tunnel_lookup);
-
-#endif
diff --git a/datapath/linux/compat/ip_tunnels_core.c b/datapath/linux/compat/ip_tunnels_core.c
deleted file mode 100644
index a3b1f7fc1..000000000
--- a/datapath/linux/compat/ip_tunnels_core.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * Copyright (c) 2007-2013 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/if_vlan.h>
-#include <linux/in.h>
-#include <linux/in_route.h>
-#include <linux/inetdevice.h>
-#include <linux/jhash.h>
-#include <linux/list.h>
-#include <linux/kernel.h>
-#include <linux/version.h>
-#include <linux/workqueue.h>
-#include <linux/rculist.h>
-#include <net/ip_tunnels.h>
-#include <net/ip6_tunnel.h>
-#include <net/route.h>
-#include <net/xfrm.h>
-
-#include "compat.h"
-#include "gso.h"
-#include "vport-netdev.h"
-
-#ifndef USE_UPSTREAM_TUNNEL
-void rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
- __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl,
- __be16 df, bool xnet)
-{
- struct net_device *dev = skb->dev;
- int pkt_len = skb->len - skb_inner_network_offset(skb);
- struct iphdr *iph;
- int err;
-
- skb_scrub_packet(skb, xnet);
-
- skb_clear_hash(skb);
- skb_dst_set(skb, &rt->dst);
-
-#if 0
- /* Do not clear ovs_skb_cb. It will be done in gso code. */
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-#endif
-
- /* Push down and install the IP header. */
- __skb_push(skb, sizeof(struct iphdr));
- skb_reset_network_header(skb);
-
- iph = ip_hdr(skb);
-
- iph->version = 4;
- iph->ihl = sizeof(struct iphdr) >> 2;
- iph->frag_off = df;
- iph->protocol = proto;
- iph->tos = tos;
- iph->daddr = dst;
- iph->saddr = src;
- iph->ttl = ttl;
-
-#ifdef HAVE_IP_SELECT_IDENT_USING_DST_ENTRY
- __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
-#elif defined(HAVE_IP_SELECT_IDENT_USING_NET)
- __ip_select_ident(dev_net(rt->dst.dev), iph,
- skb_shinfo(skb)->gso_segs ?: 1);
-#else
- __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
-#endif
-
- err = ip_local_out(dev_net(rt->dst.dev), sk, skb);
- if (unlikely(net_xmit_eval(err)))
- pkt_len = 0;
- iptunnel_xmit_stats(dev, pkt_len);
-}
-EXPORT_SYMBOL_GPL(rpl_iptunnel_xmit);
-
-int ovs_iptunnel_handle_offloads(struct sk_buff *skb,
- int gso_type_mask,
- void (*fix_segment)(struct sk_buff *))
-{
- int err;
-
- if (likely(!skb_is_encapsulated(skb))) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- } else if (skb_is_gso(skb)) {
- err = -ENOSYS;
- goto error;
- }
-
- if (skb_is_gso(skb)) {
- err = skb_unclone(skb, GFP_ATOMIC);
- if (unlikely(err))
- goto error;
- skb_shinfo(skb)->gso_type |= gso_type_mask;
-
-#ifndef USE_UPSTREAM_TUNNEL_GSO
- if (gso_type_mask)
- fix_segment = NULL;
-
- OVS_GSO_CB(skb)->fix_segment = fix_segment;
-#endif
- return 0;
- }
-
- if (skb->ip_summed != CHECKSUM_PARTIAL) {
- skb->ip_summed = CHECKSUM_NONE;
- skb->encapsulation = 0;
- }
-
- return 0;
-error:
- return err;
-}
-EXPORT_SYMBOL_GPL(ovs_iptunnel_handle_offloads);
-
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
-struct sk_buff *rpl_iptunnel_handle_offloads(struct sk_buff *skb,
- bool csum_help,
- int gso_type_mask)
-#else
-int rpl_iptunnel_handle_offloads(struct sk_buff *skb,
- bool csum_help,
- int gso_type_mask)
-#endif
-{
- int err;
-
- if (likely(!skb->encapsulation)) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
-
- if (skb_is_gso(skb)) {
- err = skb_unclone(skb, GFP_ATOMIC);
- if (unlikely(err))
- goto error;
- skb_shinfo(skb)->gso_type |= gso_type_mask;
- goto out;
- }
-
- /* If packet is not gso and we are resolving any partial checksum,
- * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
- * on the outer header without confusing devices that implement
- * NETIF_F_IP_CSUM with encapsulation.
- */
- if (csum_help)
- skb->encapsulation = 0;
-
- if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
- err = skb_checksum_help(skb);
- if (unlikely(err))
- goto error;
- } else if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->ip_summed = CHECKSUM_NONE;
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
-out:
- return skb;
-error:
- kfree_skb(skb);
- return ERR_PTR(err);
-#else
-out:
-error:
- return 0;
-#endif
-}
-EXPORT_SYMBOL_GPL(rpl_iptunnel_handle_offloads);
-
-int rpl___iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
- __be16 inner_proto, bool raw_proto, bool xnet)
-{
- if (unlikely(!pskb_may_pull(skb, hdr_len)))
- return -ENOMEM;
-
- skb_pull_rcsum(skb, hdr_len);
-
- if (!raw_proto && inner_proto == htons(ETH_P_TEB)) {
- struct ethhdr *eh;
-
- if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
- return -ENOMEM;
-
- eh = (struct ethhdr *)skb->data;
- if (likely(eth_proto_is_802_3(eh->h_proto)))
- skb->protocol = eh->h_proto;
- else
- skb->protocol = htons(ETH_P_802_2);
-
- } else {
- skb->protocol = inner_proto;
- }
-
- skb_clear_hash_if_not_l4(skb);
- skb->vlan_tci = 0;
- skb_set_queue_mapping(skb, 0);
- skb_scrub_packet(skb, xnet);
-
- return iptunnel_pull_offloads(skb);
-}
-EXPORT_SYMBOL_GPL(rpl___iptunnel_pull_header);
-#endif /* USE_UPSTREAM_TUNNEL */
-
-bool ovs_skb_is_encapsulated(struct sk_buff *skb)
-{
- /* checking for inner protocol should be sufficient on newer kernel, but
- * old kernel just set encapsulation bit.
- */
- return ovs_skb_get_inner_protocol(skb) || skb->encapsulation;
-}
-EXPORT_SYMBOL_GPL(ovs_skb_is_encapsulated);
-
-/* derived from ip_tunnel_rcv(). */
-void ovs_ip_tunnel_rcv(struct net_device *dev, struct sk_buff *skb,
- struct metadata_dst *tun_dst)
-{
- struct pcpu_sw_netstats *tstats;
-
- tstats = this_cpu_ptr((struct pcpu_sw_netstats __percpu *)dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
-
- skb_reset_mac_header(skb);
- skb_scrub_packet(skb, false);
- skb->protocol = eth_type_trans(skb, dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
-
- ovs_skb_dst_set(skb, (struct dst_entry *)tun_dst);
-
-#ifndef USE_UPSTREAM_TUNNEL
- netdev_port_receive(skb, &tun_dst->u.tun_info);
-#else
- netif_rx(skb);
-#endif
-}
-
-#ifndef HAVE_PCPU_SW_NETSTATS
-#define netdev_stats_to_stats64 rpl_netdev_stats_to_stats64
-static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
- const struct net_device_stats *netdev_stats)
-{
-#if BITS_PER_LONG == 64
- BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
- memcpy(stats64, netdev_stats, sizeof(*stats64));
-#else
- size_t i, n = sizeof(*stats64) / sizeof(u64);
- const unsigned long *src = (const unsigned long *)netdev_stats;
- u64 *dst = (u64 *)stats64;
-
- BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
- sizeof(*stats64) / sizeof(u64));
- for (i = 0; i < n; i++)
- dst[i] = src[i];
-#endif
-}
-#endif
-
-#if !defined(HAVE_VOID_NDO_GET_STATS64) && !defined(HAVE_RHEL7_MAX_MTU)
-struct rtnl_link_stats64 *rpl_ip_tunnel_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot)
-#else
-void rpl_ip_tunnel_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot)
-#endif
-{
- int i;
-
- netdev_stats_to_stats64(tot, &dev->stats);
-
- for_each_possible_cpu(i) {
- const struct pcpu_sw_netstats *tstats =
- per_cpu_ptr((struct pcpu_sw_netstats __percpu *)dev->tstats, i);
- u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
- unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_irq(&tstats->syncp);
- rx_packets = tstats->rx_packets;
- tx_packets = tstats->tx_packets;
- rx_bytes = tstats->rx_bytes;
- tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
-
- tot->rx_packets += rx_packets;
- tot->tx_packets += tx_packets;
- tot->rx_bytes += rx_bytes;
- tot->tx_bytes += tx_bytes;
- }
-
-#if !defined(HAVE_VOID_NDO_GET_STATS64) && !defined(HAVE_RHEL7_MAX_MTU)
- return tot;
-#endif
-}
-
-void rpl_ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
- struct net_device *dev)
-{
- int pkt_len, err;
-
- pkt_len = skb->len - skb_inner_network_offset(skb);
-#ifdef HAVE_IP6_LOCAL_OUT_SK
- err = ip6_local_out_sk(sk, skb);
-#else
- err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
-#endif
- if (net_xmit_eval(err))
- pkt_len = -1;
-
- iptunnel_xmit_stats(dev, pkt_len);
-}
-EXPORT_SYMBOL_GPL(rpl_ip6tunnel_xmit);
diff --git a/datapath/linux/compat/lisp.c b/datapath/linux/compat/lisp.c
deleted file mode 100644
index 49c60f4ed..000000000
--- a/datapath/linux/compat/lisp.c
+++ /dev/null
@@ -1,816 +0,0 @@
-/*
- * Copyright (c) 2015 Nicira, Inc.
- * Copyright (c) 2013 Cisco Systems, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/version.h>
-
-#include <linux/etherdevice.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/net.h>
-#include <linux/module.h>
-#include <linux/rculist.h>
-#include <linux/udp.h>
-
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/lisp.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/route.h>
-#include <net/udp.h>
-#include <net/udp_tunnel.h>
-#include <net/xfrm.h>
-
-#include "datapath.h"
-#include "gso.h"
-#include "vport.h"
-#include "vport-netdev.h"
-
-#define LISP_UDP_PORT 4341
-#define LISP_NETDEV_VER "0.1"
-static int lisp_net_id;
-
-/* Pseudo network device */
-struct lisp_dev {
- struct net *net; /* netns for packet i/o */
- struct net_device *dev; /* netdev for lisp tunnel */
- struct socket __rcu *sock;
- __be16 dst_port;
- struct list_head next;
-};
-
-/* per-network namespace private data for this module */
-struct lisp_net {
- struct list_head lisp_list;
-};
-
-/*
- * LISP encapsulation header:
- *
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |N|L|E|V|I|flags| Nonce/Map-Version |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | Instance ID/Locator Status Bits |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- */
-
-/**
- * struct lisphdr - LISP header
- * @nonce_present: Flag indicating the presence of a 24 bit nonce value.
- * @locator_status_bits_present: Flag indicating the presence of Locator Status
- * Bits (LSB).
- * @solicit_echo_nonce: Flag indicating the use of the echo noncing mechanism.
- * @map_version_present: Flag indicating the use of mapping versioning.
- * @instance_id_present: Flag indicating the presence of a 24 bit Instance ID.
- * @reserved_flags: 3 bits reserved for future flags.
- * @nonce: 24 bit nonce value.
- * @map_version: 24 bit mapping version.
- * @locator_status_bits: Locator Status Bits: 32 bits when instance_id_present
- * is not set, 8 bits when it is.
- * @instance_id: 24 bit Instance ID
- */
-struct lisphdr {
-#ifdef __LITTLE_ENDIAN_BITFIELD
- __u8 reserved_flags:3;
- __u8 instance_id_present:1;
- __u8 map_version_present:1;
- __u8 solicit_echo_nonce:1;
- __u8 locator_status_bits_present:1;
- __u8 nonce_present:1;
-#else
- __u8 nonce_present:1;
- __u8 locator_status_bits_present:1;
- __u8 solicit_echo_nonce:1;
- __u8 map_version_present:1;
- __u8 instance_id_present:1;
- __u8 reserved_flags:3;
-#endif
- union {
- __u8 nonce[3];
- __u8 map_version[3];
- } u1;
- union {
- __be32 locator_status_bits;
- struct {
- __u8 instance_id[3];
- __u8 locator_status_bits;
- } word2;
- } u2;
-};
-
-#define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr))
-#define LISP_MAX_MTU (IP_MAX_MTU - LISP_HLEN - sizeof(struct iphdr))
-
-static inline struct lisphdr *lisp_hdr(const struct sk_buff *skb)
-{
- return (struct lisphdr *)(udp_hdr(skb) + 1);
-}
-
-/* Convert 64 bit tunnel ID to 24 bit Instance ID. */
-static void tunnel_id_to_instance_id(__be64 tun_id, __u8 *iid)
-{
-
-#ifdef __BIG_ENDIAN
- iid[0] = (__force __u8)(tun_id >> 16);
- iid[1] = (__force __u8)(tun_id >> 8);
- iid[2] = (__force __u8)tun_id;
-#else
- iid[0] = (__force __u8)((__force u64)tun_id >> 40);
- iid[1] = (__force __u8)((__force u64)tun_id >> 48);
- iid[2] = (__force __u8)((__force u64)tun_id >> 56);
-#endif
-}
-
-/* Convert 24 bit Instance ID to 64 bit tunnel ID. */
-static __be64 instance_id_to_tunnel_id(__u8 *iid)
-{
-#ifdef __BIG_ENDIAN
- return (iid[0] << 16) | (iid[1] << 8) | iid[2];
-#else
- return (__force __be64)(((__force u64)iid[0] << 40) |
- ((__force u64)iid[1] << 48) |
- ((__force u64)iid[2] << 56));
-#endif
-}
-
-/* Compute source UDP port for outgoing packet.
- * Currently we use the flow hash.
- */
-static u16 get_src_port(struct net *net, struct sk_buff *skb)
-{
- u32 hash = skb_get_hash(skb);
- unsigned int range;
- int high;
- int low;
-
- if (!hash) {
- if (skb->protocol == htons(ETH_P_IP)) {
- struct iphdr *iph;
- int size = (sizeof(iph->saddr) * 2) / sizeof(u32);
-
- iph = (struct iphdr *) skb_network_header(skb);
- hash = jhash2((const u32 *)&iph->saddr, size, 0);
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
- struct ipv6hdr *ipv6hdr;
-
- ipv6hdr = (struct ipv6hdr *) skb_network_header(skb);
- hash = jhash2((const u32 *)&ipv6hdr->saddr,
- (sizeof(struct in6_addr) * 2) / sizeof(u32), 0);
- } else {
- pr_warn_once("LISP inner protocol is not IP when "
- "calculating hash.\n");
- }
- }
-
- inet_get_local_port_range(net, &low, &high);
- range = (high - low) + 1;
- return (((u64) hash * range) >> 32) + low;
-}
-
-static void lisp_build_header(struct sk_buff *skb,
- const struct ip_tunnel_key *tun_key)
-{
- struct lisphdr *lisph;
-
- lisph = (struct lisphdr *)__skb_push(skb, sizeof(struct lisphdr));
- lisph->nonce_present = 0; /* We don't support echo nonce algorithm */
- lisph->locator_status_bits_present = 1; /* Set LSB */
- lisph->solicit_echo_nonce = 0; /* No echo noncing */
- lisph->map_version_present = 0; /* No mapping versioning, nonce instead */
- lisph->instance_id_present = 1; /* Store the tun_id as Instance ID */
- lisph->reserved_flags = 0; /* Reserved flags, set to 0 */
-
- lisph->u1.nonce[0] = 0;
- lisph->u1.nonce[1] = 0;
- lisph->u1.nonce[2] = 0;
-
- tunnel_id_to_instance_id(tun_key->tun_id, &lisph->u2.word2.instance_id[0]);
- lisph->u2.word2.locator_status_bits = 1;
-}
-
-/* Called with rcu_read_lock and BH disabled. */
-static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
-{
- struct lisp_dev *lisp_dev;
- struct net_device *dev;
- struct lisphdr *lisph;
- struct iphdr *inner_iph;
- struct metadata_dst *tun_dst;
-#ifndef USE_UPSTREAM_TUNNEL
- struct metadata_dst temp;
-#endif
- __be64 key;
- struct ethhdr *ethh;
- __be16 protocol;
-
- dev = rcu_dereference_sk_user_data(sk);
- if (unlikely(!dev))
- goto error;
-
- lisp_dev = netdev_priv(dev);
- if (iptunnel_pull_header(skb, LISP_HLEN, 0,
- !net_eq(lisp_dev->net, dev_net(lisp_dev->dev))))
- goto error;
-
- lisph = lisp_hdr(skb);
-
- if (lisph->instance_id_present != 1)
- key = 0;
- else
- key = instance_id_to_tunnel_id(&lisph->u2.word2.instance_id[0]);
-
- /* Save outer tunnel values */
-#ifndef USE_UPSTREAM_TUNNEL
- tun_dst = &temp;
- ovs_udp_tun_rx_dst(tun_dst, skb, AF_INET, TUNNEL_KEY, key, 0);
-#else
- tun_dst = udp_tun_rx_dst(skb, AF_INET, TUNNEL_KEY, key, 0);
-#endif
- /* Drop non-IP inner packets */
- inner_iph = (struct iphdr *)(lisph + 1);
- switch (inner_iph->version) {
- case 4:
- protocol = htons(ETH_P_IP);
- break;
- case 6:
- protocol = htons(ETH_P_IPV6);
- break;
- default:
- goto error;
- }
- skb->protocol = protocol;
-
- /* Add Ethernet header */
- ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
- memset(ethh, 0, ETH_HLEN);
- ethh->h_dest[0] = 0x02;
- ethh->h_source[0] = 0x02;
- ethh->h_proto = protocol;
-
- ovs_ip_tunnel_rcv(dev, skb, tun_dst);
- goto out;
-
-error:
- kfree_skb(skb);
-out:
- return 0;
-}
-
-static struct rtable *lisp_get_rt(struct sk_buff *skb,
- struct net_device *dev,
- struct flowi4 *fl,
- const struct ip_tunnel_key *key)
-{
- struct net *net = dev_net(dev);
-
- /* Route lookup */
- memset(fl, 0, sizeof(*fl));
- fl->daddr = key->u.ipv4.dst;
- fl->saddr = key->u.ipv4.src;
- fl->flowi4_tos = RT_TOS(key->tos);
- fl->flowi4_mark = skb->mark;
- fl->flowi4_proto = IPPROTO_UDP;
-
- return ip_route_output_key(net, fl);
-}
-
-/* this is to handle the return type change in handle-offload
- * functions.
- */
-#if !defined(HAVE_UDP_TUNNEL_HANDLE_OFFLOAD_RET_SKB) || !defined(USE_UPSTREAM_TUNNEL)
-static struct sk_buff *
-__udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum)
-{
- int err;
-
- err = udp_tunnel_handle_offloads(skb, udp_csum);
- if (err) {
- kfree_skb(skb);
- return NULL;
- }
- return skb;
-}
-#else
-#define __udp_tunnel_handle_offloads udp_tunnel_handle_offloads
-#endif
-
-netdev_tx_t rpl_lisp_xmit(struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- struct lisp_dev *lisp_dev = netdev_priv(dev);
- struct net *net = lisp_dev->net;
- int network_offset = skb_network_offset(skb);
- struct ip_tunnel_info *info;
- struct ip_tunnel_key *tun_key;
- __be16 src_port, dst_port;
- struct rtable *rt;
- int min_headroom;
- struct socket *sock;
- struct flowi4 fl;
- __be16 df;
- int err;
-
- info = skb_tunnel_info(skb);
- if (unlikely(!info)) {
- err = -EINVAL;
- goto error;
- }
-
- sock = rcu_dereference(lisp_dev->sock);
- if (!sock) {
- err = -EIO;
- goto error;
- }
-
- if (skb->protocol != htons(ETH_P_IP) &&
- skb->protocol != htons(ETH_P_IPV6)) {
- err = 0;
- goto error;
- }
-
- tun_key = &info->key;
-
- rt = lisp_get_rt(skb, dev, &fl, tun_key);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- goto error;
- }
-
- min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
- + sizeof(struct iphdr) + LISP_HLEN;
-
- if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
- int head_delta = SKB_DATA_ALIGN(min_headroom -
- skb_headroom(skb) +
- 16);
-
- err = pskb_expand_head(skb, max_t(int, head_delta, 0),
- 0, GFP_ATOMIC);
- if (unlikely(err))
- goto err_free_rt;
- }
-
- /* Reset l2 headers. */
- skb_pull(skb, network_offset);
- skb_reset_mac_header(skb);
- skb->vlan_tci = 0;
-
- if (skb_is_gso(skb) && skb_is_encapsulated(skb))
- goto err_free_rt;
-
- skb = __udp_tunnel_handle_offloads(skb, false);
- if (!skb)
- return NETDEV_TX_OK;
-
- src_port = htons(get_src_port(net, skb));
- dst_port = lisp_dev->dst_port;
-
- lisp_build_header(skb, tun_key);
-
- skb->ignore_df = 1;
-
- ovs_skb_set_inner_protocol(skb, skb->protocol);
-
- df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
- udp_tunnel_xmit_skb(rt, sock->sk, skb,
- fl.saddr, tun_key->u.ipv4.dst,
- tun_key->tos, tun_key->ttl,
- df, src_port, dst_port, false, true);
-
- return NETDEV_TX_OK;
-
-err_free_rt:
- ip_rt_put(rt);
-error:
- kfree_skb(skb);
- return NETDEV_TX_OK;
-}
-EXPORT_SYMBOL(rpl_lisp_xmit);
-
-/* Setup stats when device is created */
-static int lisp_init(struct net_device *dev)
-{
- dev->tstats = (typeof(dev->tstats)) netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- return 0;
-}
-
-static void lisp_uninit(struct net_device *dev)
-{
- free_percpu(dev->tstats);
-}
-
-static struct socket *create_sock(struct net *net, bool ipv6,
- __be16 port)
-{
- struct socket *sock;
- struct udp_port_cfg udp_conf;
- int err;
-
- memset(&udp_conf, 0, sizeof(udp_conf));
-
- if (ipv6) {
- udp_conf.family = AF_INET6;
- } else {
- udp_conf.family = AF_INET;
- udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
- }
-
- udp_conf.local_udp_port = port;
-
- /* Open UDP socket */
- err = udp_sock_create(net, &udp_conf, &sock);
- if (err < 0)
- return ERR_PTR(err);
-
- return sock;
-}
-
-static int lisp_open(struct net_device *dev)
-{
- struct lisp_dev *lisp = netdev_priv(dev);
- struct udp_tunnel_sock_cfg tunnel_cfg;
- struct net *net = lisp->net;
- struct socket *sock;
-
- sock = create_sock(net, false, lisp->dst_port);
- if (IS_ERR(sock))
- return PTR_ERR(sock);
-
- rcu_assign_pointer(lisp->sock, sock);
- /* Mark socket as an encapsulation socket */
- memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
- tunnel_cfg.sk_user_data = dev;
- tunnel_cfg.encap_type = 1;
- tunnel_cfg.encap_rcv = lisp_rcv;
- tunnel_cfg.encap_destroy = NULL;
- setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
- return 0;
-}
-
-static int lisp_stop(struct net_device *dev)
-{
- struct lisp_dev *lisp = netdev_priv(dev);
- struct socket *socket;
-
- socket = rtnl_dereference(lisp->sock);
- if (!socket)
- return 0;
-
- rcu_assign_pointer(lisp->sock, NULL);
-
- synchronize_net();
- udp_tunnel_sock_release(socket);
- return 0;
-}
-
-static netdev_tx_t lisp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-#ifdef USE_UPSTREAM_TUNNEL
- return rpl_lisp_xmit(skb);
-#else
- /* Drop All packets coming from networking stack. OVS-CB is
- * not initialized for these packets.
- */
-
- dev_kfree_skb(skb);
- dev->stats.tx_dropped++;
- return NETDEV_TX_OK;
-#endif
-}
-
-static int lisp_change_mtu(struct net_device *dev, int new_mtu)
-{
- if (new_mtu < 68 || new_mtu > LISP_MAX_MTU)
- return -EINVAL;
-
- dev->mtu = new_mtu;
- return 0;
-}
-
-static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
- struct ip_tunnel_info *info,
- __be16 sport, __be16 dport)
-{
- struct rtable *rt;
- struct flowi4 fl4;
-
- rt = lisp_get_rt(skb, dev, &fl4, &info->key);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- ip_rt_put(rt);
-
- info->key.u.ipv4.src = fl4.saddr;
- info->key.tp_src = sport;
- info->key.tp_dst = dport;
- return 0;
-}
-
-int ovs_lisp_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
-{
- struct lisp_dev *lisp = netdev_priv(dev);
- struct net *net = lisp->net;
- struct ip_tunnel_info *info = skb_tunnel_info(skb);
- __be16 sport, dport;
-
- sport = htons(get_src_port(net, skb));
- dport = lisp->dst_port;
-
- if (ip_tunnel_info_af(info) == AF_INET)
- return egress_ipv4_tun_info(dev, skb, info, sport, dport);
- return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(ovs_lisp_fill_metadata_dst);
-
-static const struct net_device_ops lisp_netdev_ops = {
- .ndo_init = lisp_init,
- .ndo_uninit = lisp_uninit,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
- .ndo_open = lisp_open,
- .ndo_stop = lisp_stop,
- .ndo_start_xmit = lisp_dev_xmit,
-#ifdef HAVE_RHEL7_MAX_MTU
- .ndo_size = sizeof(struct net_device_ops),
- .extended.ndo_change_mtu = lisp_change_mtu,
-#else
- .ndo_change_mtu = lisp_change_mtu,
-#endif
- .ndo_validate_addr = eth_validate_addr,
- .ndo_set_mac_address = eth_mac_addr,
-#ifdef USE_UPSTREAM_TUNNEL
-#ifdef HAVE_NDO_FILL_METADATA_DST
- .ndo_fill_metadata_dst = lisp_fill_metadata_dst,
-#endif
-#endif
-};
-
-static void lisp_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *drvinfo)
-{
- strlcpy(drvinfo->version, LISP_NETDEV_VER, sizeof(drvinfo->version));
- strlcpy(drvinfo->driver, "lisp", sizeof(drvinfo->driver));
-}
-
-static const struct ethtool_ops lisp_ethtool_ops = {
- .get_drvinfo = lisp_get_drvinfo,
- .get_link = ethtool_op_get_link,
-};
-
-/* Info for udev, that this is a virtual tunnel endpoint */
-static struct device_type lisp_type = {
- .name = "lisp",
-};
-
-/* Initialize the device structure. */
-static void lisp_setup(struct net_device *dev)
-{
- ether_setup(dev);
-
- dev->netdev_ops = &lisp_netdev_ops;
- dev->ethtool_ops = &lisp_ethtool_ops;
-#ifndef HAVE_NEEDS_FREE_NETDEV
- dev->destructor = free_netdev;
-#else
- dev->needs_free_netdev = true;
-#endif
-
- SET_NETDEV_DEVTYPE(dev, &lisp_type);
-
- dev->features |= NETIF_F_LLTX | NETIF_F_NETNS_LOCAL;
- dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
- dev->features |= NETIF_F_RXCSUM;
- dev->features |= NETIF_F_GSO_SOFTWARE;
-
- dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
-#ifdef USE_UPSTREAM_TUNNEL
- netif_keep_dst(dev);
-#endif
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
- eth_hw_addr_random(dev);
-}
-
-static const struct nla_policy lisp_policy[IFLA_LISP_MAX + 1] = {
- [IFLA_LISP_PORT] = { .type = NLA_U16 },
-};
-
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
-static int lisp_validate(struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack __always_unused *extack)
-#else
-static int lisp_validate(struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- if (tb[IFLA_ADDRESS]) {
- if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
- return -EINVAL;
-
- if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
- return -EADDRNOTAVAIL;
- }
-
- return 0;
-}
-
-static struct lisp_dev *find_dev(struct net *net, __be16 dst_port)
-{
- struct lisp_net *ln = net_generic(net, lisp_net_id);
- struct lisp_dev *dev;
-
- list_for_each_entry(dev, &ln->lisp_list, next) {
- if (dev->dst_port == dst_port)
- return dev;
- }
- return NULL;
-}
-
-static int lisp_configure(struct net *net, struct net_device *dev,
- __be16 dst_port)
-{
- struct lisp_net *ln = net_generic(net, lisp_net_id);
- struct lisp_dev *lisp = netdev_priv(dev);
- int err;
-
- lisp->net = net;
- lisp->dev = dev;
-
- lisp->dst_port = dst_port;
-
- if (find_dev(net, dst_port))
- return -EBUSY;
-
- err = lisp_change_mtu(dev, LISP_MAX_MTU);
- if (err)
- return err;
-
- err = register_netdevice(dev);
- if (err)
- return err;
-
- list_add(&lisp->next, &ln->lisp_list);
- return 0;
-}
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
-static int lisp_newlink(struct net *net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack __always_unused *extack)
-#else
-static int lisp_newlink(struct net *net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- __be16 dst_port = htons(LISP_UDP_PORT);
-
- if (data[IFLA_LISP_PORT])
- dst_port = nla_get_be16(data[IFLA_LISP_PORT]);
-
- return lisp_configure(net, dev, dst_port);
-}
-
-static void lisp_dellink(struct net_device *dev, struct list_head *head)
-{
- struct lisp_dev *lisp = netdev_priv(dev);
-
- list_del(&lisp->next);
- unregister_netdevice_queue(dev, head);
-}
-
-static size_t lisp_get_size(const struct net_device *dev)
-{
- return nla_total_size(sizeof(__be32)); /* IFLA_LISP_PORT */
-}
-
-static int lisp_fill_info(struct sk_buff *skb, const struct net_device *dev)
-{
- struct lisp_dev *lisp = netdev_priv(dev);
-
- if (nla_put_be16(skb, IFLA_LISP_PORT, lisp->dst_port))
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-static struct rtnl_link_ops lisp_link_ops __read_mostly = {
- .kind = "lisp",
- .maxtype = IFLA_LISP_MAX,
- .policy = lisp_policy,
- .priv_size = sizeof(struct lisp_dev),
- .setup = lisp_setup,
- .validate = lisp_validate,
- .newlink = lisp_newlink,
- .dellink = lisp_dellink,
- .get_size = lisp_get_size,
- .fill_info = lisp_fill_info,
-};
-
-struct net_device *rpl_lisp_dev_create_fb(struct net *net, const char *name,
- u8 name_assign_type, u16 dst_port)
-{
- struct nlattr *tb[IFLA_MAX + 1];
- struct net_device *dev;
- int err;
-
- memset(tb, 0, sizeof(tb));
- dev = rtnl_create_link(net, (char *) name, name_assign_type,
- &lisp_link_ops, tb);
- if (IS_ERR(dev))
- return dev;
-
- err = lisp_configure(net, dev, htons(dst_port));
- if (err) {
- free_netdev(dev);
- return ERR_PTR(err);
- }
- return dev;
-}
-EXPORT_SYMBOL_GPL(rpl_lisp_dev_create_fb);
-
-static int lisp_init_net(struct net *net)
-{
- struct lisp_net *ln = net_generic(net, lisp_net_id);
-
- INIT_LIST_HEAD(&ln->lisp_list);
- return 0;
-}
-
-static void lisp_exit_net(struct net *net)
-{
- struct lisp_net *ln = net_generic(net, lisp_net_id);
- struct lisp_dev *lisp, *next;
- struct net_device *dev, *aux;
- LIST_HEAD(list);
-
- rtnl_lock();
-
- /* gather any lisp devices that were moved into this ns */
- for_each_netdev_safe(net, dev, aux)
- if (dev->rtnl_link_ops == &lisp_link_ops)
- unregister_netdevice_queue(dev, &list);
-
- list_for_each_entry_safe(lisp, next, &ln->lisp_list, next) {
- /* If lisp->dev is in the same netns, it was already added
- * to the lisp by the previous loop.
- */
- if (!net_eq(dev_net(lisp->dev), net))
- unregister_netdevice_queue(lisp->dev, &list);
- }
-
- /* unregister the devices gathered above */
- unregister_netdevice_many(&list);
- rtnl_unlock();
-}
-
-static struct pernet_operations lisp_net_ops = {
- .init = lisp_init_net,
- .exit = lisp_exit_net,
- .id = &lisp_net_id,
- .size = sizeof(struct lisp_net),
-};
-
-int rpl_lisp_init_module(void)
-{
- int rc;
-
- rc = register_pernet_subsys(&lisp_net_ops);
- if (rc)
- goto out1;
-
- rc = rtnl_link_register(&lisp_link_ops);
- if (rc)
- goto out2;
-
- pr_info("LISP tunneling driver\n");
- return 0;
-out2:
- unregister_pernet_subsys(&lisp_net_ops);
-out1:
- pr_err("Error while initializing LISP %d\n", rc);
- return rc;
-}
-
-void rpl_lisp_cleanup_module(void)
-{
- rtnl_link_unregister(&lisp_link_ops);
- unregister_pernet_subsys(&lisp_net_ops);
-}
diff --git a/datapath/linux/compat/netdevice.c b/datapath/linux/compat/netdevice.c
deleted file mode 100644
index c0ffbbd31..000000000
--- a/datapath/linux/compat/netdevice.c
+++ /dev/null
@@ -1,167 +0,0 @@
-#include <linux/netdevice.h>
-#include <linux/if_vlan.h>
-#include <net/mpls.h>
-
-#include "gso.h"
-
-#ifdef OVS_USE_COMPAT_GSO_SEGMENTATION
-struct sk_buff *rpl__skb_gso_segment(struct sk_buff *skb,
- netdev_features_t features,
- bool tx_path)
-{
- int vlan_depth = ETH_HLEN;
- __be16 type = skb->protocol;
- __be16 skb_proto;
- struct sk_buff *skb_gso;
-
- while (type == htons(ETH_P_8021Q)) {
- struct vlan_hdr *vh;
-
- if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
- return ERR_PTR(-EINVAL);
-
- vh = (struct vlan_hdr *)(skb->data + vlan_depth);
- type = vh->h_vlan_encapsulated_proto;
- vlan_depth += VLAN_HLEN;
- }
-
- if (eth_p_mpls(type))
- type = ovs_skb_get_inner_protocol(skb);
-
- /* this hack needed to get regular skb_gso_segment() */
- skb_proto = skb->protocol;
- skb->protocol = type;
-
-#ifdef HAVE___SKB_GSO_SEGMENT
-#undef __skb_gso_segment
- skb_gso = __skb_gso_segment(skb, features, tx_path);
-#else
-#undef skb_gso_segment
- skb_gso = skb_gso_segment(skb, features);
-#endif
-
- skb->protocol = skb_proto;
- return skb_gso;
-}
-EXPORT_SYMBOL_GPL(rpl__skb_gso_segment);
-
-#endif /* OVS_USE_COMPAT_GSO_SEGMENTATION */
-
-#ifdef HAVE_UDP_OFFLOAD
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,0,0)
-struct sk_buff **rpl_eth_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
-{
- struct sk_buff *p, **pp = NULL;
- struct ethhdr *eh, *eh2;
- unsigned int hlen, off_eth;
- const struct packet_offload *ptype;
- __be16 type;
- int flush = 1;
-
- off_eth = skb_gro_offset(skb);
- hlen = off_eth + sizeof(*eh);
- eh = skb_gro_header_fast(skb, off_eth);
- if (skb_gro_header_hard(skb, hlen)) {
- eh = skb_gro_header_slow(skb, hlen, off_eth);
- if (unlikely(!eh))
- goto out;
- }
-
- flush = 0;
-
- for (p = *head; p; p = p->next) {
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
-
- eh2 = (struct ethhdr *)(p->data + off_eth);
- if (compare_ether_header(eh, eh2)) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
- }
-
- type = eh->h_proto;
-
- rcu_read_lock();
- ptype = gro_find_receive_by_type(type);
- if (ptype == NULL) {
- flush = 1;
- goto out_unlock;
- }
-
- skb_gro_pull(skb, sizeof(*eh));
- skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
- pp = ptype->callbacks.gro_receive(head, skb);
-
-out_unlock:
- rcu_read_unlock();
-out:
- NAPI_GRO_CB(skb)->flush |= flush;
-
- return pp;
-}
-
-int rpl_eth_gro_complete(struct sk_buff *skb, int nhoff)
-{
- struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff);
- __be16 type = eh->h_proto;
- struct packet_offload *ptype;
- int err = -ENOSYS;
-
- if (skb->encapsulation)
- skb_set_inner_mac_header(skb, nhoff);
-
- rcu_read_lock();
- ptype = gro_find_complete_by_type(type);
- if (ptype != NULL)
- err = ptype->callbacks.gro_complete(skb, nhoff +
- sizeof(struct ethhdr));
-
- rcu_read_unlock();
- return err;
-}
-
-#endif
-#endif /* HAVE_UDP_OFFLOAD */
-
-#ifndef HAVE_RTNL_LINK_STATS64
-#undef dev_get_stats
-struct rtnl_link_stats64 *rpl_dev_get_stats(struct net_device *dev,
- struct rtnl_link_stats64 *storage)
-{
- const struct net_device_stats *stats = dev_get_stats(dev);
-
-#define copy(s) storage->s = stats->s
-
- copy(rx_packets);
- copy(tx_packets);
- copy(rx_bytes);
- copy(tx_bytes);
- copy(rx_errors);
- copy(tx_errors);
- copy(rx_dropped);
- copy(tx_dropped);
- copy(multicast);
- copy(collisions);
-
- copy(rx_length_errors);
- copy(rx_over_errors);
- copy(rx_crc_errors);
- copy(rx_frame_errors);
- copy(rx_fifo_errors);
- copy(rx_missed_errors);
-
- copy(tx_aborted_errors);
- copy(tx_carrier_errors);
- copy(tx_fifo_errors);
- copy(tx_heartbeat_errors);
- copy(tx_window_errors);
-
- copy(rx_compressed);
- copy(tx_compressed);
-
-#undef copy
- return storage;
-}
-#endif
diff --git a/datapath/linux/compat/nf_conncount.c b/datapath/linux/compat/nf_conncount.c
deleted file mode 100644
index 97bdfb933..000000000
--- a/datapath/linux/compat/nf_conncount.c
+++ /dev/null
@@ -1,621 +0,0 @@
-/*
- * Backported from upstream commit 5c789e131cbb ("netfilter:
- * nf_conncount: Add list lock and gc worker, and RCU for init tree search")
- *
- * count the number of connections matching an arbitrary key.
- *
- * (C) 2017 Red Hat GmbH
- * Author: Florian Westphal <fw@strlen.de>
- *
- * split from xt_connlimit.c:
- * (c) 2000 Gerd Knorr <kraxel@bytesex.org>
- * Nov 2002: Martin Bene <martin.bene@icomedias.com>:
- * only ignore TIME_WAIT or gone connections
- * (C) CC Computer Consultants GmbH, 2007
- */
-#ifndef HAVE_UPSTREAM_NF_CONNCOUNT
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/jhash.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/rbtree.h>
-#include <linux/module.h>
-#include <linux/random.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/netfilter/nf_conntrack_tcp.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_count.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_tuple.h>
-#include <net/netfilter/nf_conntrack_zones.h>
-
-#define CONNCOUNT_SLOTS 256U
-
-#define CONNCOUNT_GC_MAX_NODES 8
-#define MAX_KEYLEN 5
-
-/* we will save the tuples of all connections we care about */
-struct nf_conncount_tuple {
- struct list_head node;
- struct nf_conntrack_tuple tuple;
- struct nf_conntrack_zone zone;
- int cpu;
- u32 jiffies32;
-};
-
-struct nf_conncount_rb {
- struct rb_node node;
- struct nf_conncount_list list;
- u32 key[MAX_KEYLEN];
- struct rcu_head rcu_head;
-};
-
-static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp;
-
-struct nf_conncount_data {
- unsigned int keylen;
- struct rb_root root[CONNCOUNT_SLOTS];
- struct net *net;
- struct work_struct gc_work;
- unsigned long pending_trees[BITS_TO_LONGS(CONNCOUNT_SLOTS)];
- unsigned int gc_tree;
-};
-
-static u_int32_t conncount_rnd __read_mostly;
-static struct kmem_cache *conncount_rb_cachep __read_mostly;
-static struct kmem_cache *conncount_conn_cachep __read_mostly;
-
-static inline bool already_closed(const struct nf_conn *conn)
-{
- if (nf_ct_protonum(conn) == IPPROTO_TCP)
- return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
- conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
- else
- return false;
-}
-
-static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
-{
- return memcmp(a, b, klen * sizeof(u32));
-}
-
-static void conn_free(struct nf_conncount_list *list,
- struct nf_conncount_tuple *conn)
-{
- lockdep_assert_held(&list->list_lock);
-
- list->count--;
- list_del(&conn->node);
-
- kmem_cache_free(conncount_conn_cachep, conn);
-}
-
-static const struct nf_conntrack_tuple_hash *
-find_or_evict(struct net *net, struct nf_conncount_list *list,
- struct nf_conncount_tuple *conn)
-{
- const struct nf_conntrack_tuple_hash *found;
- unsigned long a, b;
- int cpu = raw_smp_processor_id();
- u32 age;
-
- found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
- if (found)
- return found;
- b = conn->jiffies32;
- a = (u32)jiffies;
-
- /* conn might have been added just before by another cpu and
- * might still be unconfirmed. In this case, nf_conntrack_find()
- * returns no result. Thus only evict if this cpu added the
- * stale entry or if the entry is older than two jiffies.
- */
- age = a - b;
- if (conn->cpu == cpu || age >= 2) {
- conn_free(list, conn);
- return ERR_PTR(-ENOENT);
- }
-
- return ERR_PTR(-EAGAIN);
-}
-
-static int __nf_conncount_add(struct net *net,
- struct nf_conncount_list *list,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
-{
- const struct nf_conntrack_tuple_hash *found;
- struct nf_conncount_tuple *conn, *conn_n;
- struct nf_conn *found_ct;
- unsigned int collect = 0;
-
- /* check the saved connections */
- list_for_each_entry_safe(conn, conn_n, &list->head, node) {
- if (collect > CONNCOUNT_GC_MAX_NODES)
- break;
-
- found = find_or_evict(net, list, conn);
- if (IS_ERR(found)) {
- /* Not found, but might be about to be confirmed */
- if (PTR_ERR(found) == -EAGAIN) {
- if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
- nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
- nf_ct_zone_id(zone, zone->dir))
- return 0; /* already exists */
- } else {
- collect++;
- }
- continue;
- }
-
- found_ct = nf_ct_tuplehash_to_ctrack(found);
-
- if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
- nf_ct_zone_equal(found_ct, zone, zone->dir)) {
- /*
- * We should not see tuples twice unless someone hooks
- * this into a table without "-p tcp --syn".
- *
- * Attempt to avoid a re-add in this case.
- */
- nf_ct_put(found_ct);
- return 0;
- } else if (already_closed(found_ct)) {
- /*
- * we do not care about connections which are
- * closed already -> ditch it
- */
- nf_ct_put(found_ct);
- conn_free(list, conn);
- collect++;
- continue;
- }
-
- nf_ct_put(found_ct);
- }
-
- if (WARN_ON_ONCE(list->count > INT_MAX))
- return -EOVERFLOW;
-
- conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
- if (conn == NULL)
- return -ENOMEM;
-
- conn->tuple = *tuple;
- conn->zone = *zone;
- conn->cpu = raw_smp_processor_id();
- conn->jiffies32 = (u32)jiffies;
- list_add_tail(&conn->node, &list->head);
- list->count++;
- return 0;
-}
-
-int nf_conncount_add(struct net *net,
- struct nf_conncount_list *list,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
-{
- int ret;
-
- /* check the saved connections */
- spin_lock_bh(&list->list_lock);
- ret = __nf_conncount_add(net, list, tuple, zone);
- spin_unlock_bh(&list->list_lock);
-
- return ret;
-}
-
-static void nf_conncount_list_init(struct nf_conncount_list *list)
-{
- spin_lock_init(&list->list_lock);
- INIT_LIST_HEAD(&list->head);
- list->count = 0;
-}
-
-/* Return true if the list is empty. Must be called with BH disabled. */
-static bool nf_conncount_gc_list(struct net *net,
- struct nf_conncount_list *list)
-{
- const struct nf_conntrack_tuple_hash *found;
- struct nf_conncount_tuple *conn, *conn_n;
- struct nf_conn *found_ct;
- unsigned int collected = 0;
- bool ret = false;
-
- /* don't bother if other cpu is already doing GC */
- if (!spin_trylock(&list->list_lock))
- return false;
-
- list_for_each_entry_safe(conn, conn_n, &list->head, node) {
- found = find_or_evict(net, list, conn);
- if (IS_ERR(found)) {
- if (PTR_ERR(found) == -ENOENT)
- collected++;
- continue;
- }
-
- found_ct = nf_ct_tuplehash_to_ctrack(found);
- if (already_closed(found_ct)) {
- /*
- * we do not care about connections which are
- * closed already -> ditch it
- */
- nf_ct_put(found_ct);
- conn_free(list, conn);
- collected++;
- continue;
- }
-
- nf_ct_put(found_ct);
- if (collected > CONNCOUNT_GC_MAX_NODES)
- break;
- }
-
- if (!list->count)
- ret = true;
- spin_unlock(&list->list_lock);
-
- return ret;
-}
-
-static void __tree_nodes_free(struct rcu_head *h)
-{
- struct nf_conncount_rb *rbconn;
-
- rbconn = container_of(h, struct nf_conncount_rb, rcu_head);
- kmem_cache_free(conncount_rb_cachep, rbconn);
-}
-
-/* caller must hold tree nf_conncount_locks[] lock */
-static void tree_nodes_free(struct rb_root *root,
- struct nf_conncount_rb *gc_nodes[],
- unsigned int gc_count)
-{
- struct nf_conncount_rb *rbconn;
-
- while (gc_count) {
- rbconn = gc_nodes[--gc_count];
- spin_lock(&rbconn->list.list_lock);
- if (!rbconn->list.count) {
- rb_erase(&rbconn->node, root);
- call_rcu(&rbconn->rcu_head, __tree_nodes_free);
- }
- spin_unlock(&rbconn->list.list_lock);
- }
-}
-
-static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
-{
- set_bit(tree, data->pending_trees);
- schedule_work(&data->gc_work);
-}
-
-static unsigned int
-insert_tree(struct net *net,
- struct nf_conncount_data *data,
- struct rb_root *root,
- unsigned int hash,
- const u32 *key,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
-{
- struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
- struct rb_node **rbnode, *parent;
- struct nf_conncount_rb *rbconn;
- struct nf_conncount_tuple *conn;
- unsigned int count = 0, gc_count = 0;
- u8 keylen = data->keylen;
- bool do_gc = true;
-
- spin_lock_bh(&nf_conncount_locks[hash]);
-restart:
- parent = NULL;
- rbnode = &(root->rb_node);
- while (*rbnode) {
- int diff;
- rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
-
- parent = *rbnode;
- diff = key_diff(key, rbconn->key, keylen);
- if (diff < 0) {
- rbnode = &((*rbnode)->rb_left);
- } else if (diff > 0) {
- rbnode = &((*rbnode)->rb_right);
- } else {
- int ret;
-
- ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
- if (ret)
- count = 0; /* hotdrop */
- else
- count = rbconn->list.count;
- tree_nodes_free(root, gc_nodes, gc_count);
- goto out_unlock;
- }
-
- if (gc_count >= ARRAY_SIZE(gc_nodes))
- continue;
-
- if (do_gc && nf_conncount_gc_list(net, &rbconn->list))
- gc_nodes[gc_count++] = rbconn;
- }
-
- if (gc_count) {
- tree_nodes_free(root, gc_nodes, gc_count);
- schedule_gc_worker(data, hash);
- gc_count = 0;
- do_gc = false;
- goto restart;
- }
-
- /* expected case: match, insert new node */
- rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
- if (rbconn == NULL)
- goto out_unlock;
-
- conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
- if (conn == NULL) {
- kmem_cache_free(conncount_rb_cachep, rbconn);
- goto out_unlock;
- }
-
- conn->tuple = *tuple;
- conn->zone = *zone;
- memcpy(rbconn->key, key, sizeof(u32) * keylen);
-
- nf_conncount_list_init(&rbconn->list);
- list_add(&conn->node, &rbconn->list.head);
- count = 1;
- rbconn->list.count = count;
-
- rb_link_node_rcu(&rbconn->node, parent, rbnode);
- rb_insert_color(&rbconn->node, root);
-out_unlock:
- spin_unlock_bh(&nf_conncount_locks[hash]);
- return count;
-}
-
-static unsigned int
-count_tree(struct net *net,
- struct nf_conncount_data *data,
- const u32 *key,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
-{
- struct rb_root *root;
- struct rb_node *parent;
- struct nf_conncount_rb *rbconn;
- unsigned int hash;
- u8 keylen = data->keylen;
-
- hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
- root = &data->root[hash];
-
- parent = rcu_dereference_raw(root->rb_node);
- while (parent) {
- int diff;
-
- rbconn = rb_entry(parent, struct nf_conncount_rb, node);
-
- diff = key_diff(key, rbconn->key, keylen);
- if (diff < 0) {
- parent = rcu_dereference_raw(parent->rb_left);
- } else if (diff > 0) {
- parent = rcu_dereference_raw(parent->rb_right);
- } else {
- int ret;
-
- if (!tuple) {
- nf_conncount_gc_list(net, &rbconn->list);
- return rbconn->list.count;
- }
-
- spin_lock_bh(&rbconn->list.list_lock);
- /* Node might be about to be free'd.
- * We need to defer to insert_tree() in this case.
- */
- if (rbconn->list.count == 0) {
- spin_unlock_bh(&rbconn->list.list_lock);
- break;
- }
-
- /* same source network -> be counted! */
- ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
- spin_unlock_bh(&rbconn->list.list_lock);
- if (ret)
- return 0; /* hotdrop */
- else
- return rbconn->list.count;
- }
- }
-
- if (!tuple)
- return 0;
-
- return insert_tree(net, data, root, hash, key, tuple, zone);
-}
-
-static void tree_gc_worker(struct work_struct *work)
-{
- struct nf_conncount_data *data = container_of(work, struct nf_conncount_data, gc_work);
- struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES], *rbconn;
- struct rb_root *root;
- struct rb_node *node;
- unsigned int tree, next_tree, gc_count = 0;
-
- tree = data->gc_tree % CONNCOUNT_SLOTS;
- root = &data->root[tree];
-
- local_bh_disable();
- rcu_read_lock();
- for (node = rb_first(root); node != NULL; node = rb_next(node)) {
- rbconn = rb_entry(node, struct nf_conncount_rb, node);
- if (nf_conncount_gc_list(data->net, &rbconn->list))
- gc_count++;
- }
- rcu_read_unlock();
- local_bh_enable();
-
- cond_resched();
-
- spin_lock_bh(&nf_conncount_locks[tree]);
- if (gc_count < ARRAY_SIZE(gc_nodes))
- goto next; /* do not bother */
-
- gc_count = 0;
- node = rb_first(root);
- while (node != NULL) {
- rbconn = rb_entry(node, struct nf_conncount_rb, node);
- node = rb_next(node);
-
- if (rbconn->list.count > 0)
- continue;
-
- gc_nodes[gc_count++] = rbconn;
- if (gc_count >= ARRAY_SIZE(gc_nodes)) {
- tree_nodes_free(root, gc_nodes, gc_count);
- gc_count = 0;
- }
- }
-
- tree_nodes_free(root, gc_nodes, gc_count);
-next:
-
- clear_bit(tree, data->pending_trees);
-
- next_tree = (tree + 1) % CONNCOUNT_SLOTS;
- next_tree = find_next_bit(data->pending_trees, CONNCOUNT_SLOTS, next_tree);
-
- if (next_tree < CONNCOUNT_SLOTS) {
- data->gc_tree = next_tree;
- schedule_work(work);
- }
-
- spin_unlock_bh(&nf_conncount_locks[tree]);
-}
-
-/* Count and return number of conntrack entries in 'net' with particular 'key'.
- * If 'tuple' is not null, insert it into the accounting data structure.
- * Call with RCU read lock.
- */
-unsigned int rpl_nf_conncount_count(struct net *net,
- struct nf_conncount_data *data,
- const u32 *key,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
-{
- return count_tree(net, data, key, tuple, zone);
-}
-EXPORT_SYMBOL_GPL(rpl_nf_conncount_count);
-
-struct nf_conncount_data *rpl_nf_conncount_init(struct net *net, unsigned int family,
- unsigned int keylen)
-{
- struct nf_conncount_data *data;
- int ret, i;
-
- if (keylen % sizeof(u32) ||
- keylen / sizeof(u32) > MAX_KEYLEN ||
- keylen == 0)
- return ERR_PTR(-EINVAL);
-
- net_get_random_once(&conncount_rnd, sizeof(conncount_rnd));
-
- data = kmalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- return ERR_PTR(-ENOMEM);
-
- ret = nf_ct_netns_get(net, family);
- if (ret < 0) {
- kfree(data);
- return ERR_PTR(ret);
- }
-
- for (i = 0; i < ARRAY_SIZE(data->root); ++i)
- data->root[i] = RB_ROOT;
-
- data->keylen = keylen / sizeof(u32);
- data->net = net;
- INIT_WORK(&data->gc_work, tree_gc_worker);
-
- return data;
-}
-EXPORT_SYMBOL_GPL(rpl_nf_conncount_init);
-
-static void nf_conncount_cache_free(struct nf_conncount_list *list)
-{
- struct nf_conncount_tuple *conn, *conn_n;
-
- list_for_each_entry_safe(conn, conn_n, &list->head, node)
- kmem_cache_free(conncount_conn_cachep, conn);
-}
-
-static void destroy_tree(struct rb_root *r)
-{
- struct nf_conncount_rb *rbconn;
- struct rb_node *node;
-
- while ((node = rb_first(r)) != NULL) {
- rbconn = rb_entry(node, struct nf_conncount_rb, node);
-
- rb_erase(node, r);
-
- nf_conncount_cache_free(&rbconn->list);
-
- kmem_cache_free(conncount_rb_cachep, rbconn);
- }
-}
-
-void rpl_nf_conncount_destroy(struct net *net, unsigned int family,
- struct nf_conncount_data *data)
-{
- unsigned int i;
-
- cancel_work_sync(&data->gc_work);
- nf_ct_netns_put(net, family);
-
- for (i = 0; i < ARRAY_SIZE(data->root); ++i)
- destroy_tree(&data->root[i]);
-
- kfree(data);
-}
-EXPORT_SYMBOL_GPL(rpl_nf_conncount_destroy);
-
-int rpl_nf_conncount_modinit(void)
-{
- int i;
-
- for (i = 0; i < CONNCOUNT_SLOTS; ++i)
- spin_lock_init(&nf_conncount_locks[i]);
-
- conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
- sizeof(struct nf_conncount_tuple),
- 0, 0, NULL);
- if (!conncount_conn_cachep)
- return -ENOMEM;
-
- conncount_rb_cachep = kmem_cache_create("nf_conncount_rb",
- sizeof(struct nf_conncount_rb),
- 0, 0, NULL);
- if (!conncount_rb_cachep) {
- kmem_cache_destroy(conncount_conn_cachep);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void rpl_nf_conncount_modexit(void)
-{
- kmem_cache_destroy(conncount_conn_cachep);
- kmem_cache_destroy(conncount_rb_cachep);
-}
-
-#endif /* HAVE_UPSTREAM_NF_CONNCOUNT */
diff --git a/datapath/linux/compat/nf_conntrack_core.c b/datapath/linux/compat/nf_conntrack_core.c
deleted file mode 100644
index a7d3d4331..000000000
--- a/datapath/linux/compat/nf_conntrack_core.c
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <linux/version.h>
-
-#ifndef HAVE_NF_CT_ZONE_INIT
-
-#include <net/netfilter/nf_conntrack_zones.h>
-
-/* Built-in default zone used e.g. by modules. */
-const struct nf_conntrack_zone nf_ct_zone_dflt = {
- .id = NF_CT_DEFAULT_ZONE_ID,
- .dir = NF_CT_DEFAULT_ZONE_DIR,
-};
-
-#endif /* HAVE_NF_CT_ZONE_INIT */
diff --git a/datapath/linux/compat/nf_conntrack_proto.c b/datapath/linux/compat/nf_conntrack_proto.c
deleted file mode 100644
index fe291dbf2..000000000
--- a/datapath/linux/compat/nf_conntrack_proto.c
+++ /dev/null
@@ -1,114 +0,0 @@
-#include <linux/types.h>
-
-#include <net/netfilter/nf_conntrack.h>
-#ifdef HAVE_NF_CONNTRACK_L3PROATO_H
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#endif
-
-/*
- * Upstream net-next commmit 7e35ec0e8044
- * ("netfilter: conntrack: move nf_ct_netns_{get,put}() to core")
- * is introduced in v4.15, and it supports NFPROTO_INET in
- * nf_ct_netns_{get,put}() that OVS conntrack uses this feature.
- *
- * However, we only need this feature if the underlying nf_conntrack_l3proto
- * supports net_ns_get/put. Thus, we just mock the functions if
- * HAVE_NET_NS_GET is false.
- */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0)
-#ifdef HAVE_NET_NS_GET
-static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
-{
- const struct nf_conntrack_l3proto *l3proto;
- int ret;
-
- might_sleep();
-
- ret = nf_ct_l3proto_try_module_get(nfproto);
- if (ret < 0)
- return ret;
-
- /* we already have a reference, can't fail */
- rcu_read_lock();
- l3proto = __nf_ct_l3proto_find(nfproto);
- rcu_read_unlock();
-
- if (!l3proto->net_ns_get)
- return 0;
-
- ret = l3proto->net_ns_get(net);
- if (ret < 0)
- nf_ct_l3proto_module_put(nfproto);
-
- return ret;
-}
-
-int rpl_nf_ct_netns_get(struct net *net, u8 nfproto)
-{
- int err;
-
- if (nfproto == NFPROTO_INET) {
- err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
- if (err < 0)
- goto err1;
- err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
- if (err < 0)
- goto err2;
- } else {
- err = nf_ct_netns_do_get(net, nfproto);
- if (err < 0)
- goto err1;
- }
- return 0;
-
-err2:
- nf_ct_netns_put(net, NFPROTO_IPV4);
-err1:
- return err;
-}
-EXPORT_SYMBOL_GPL(rpl_nf_ct_netns_get);
-
-static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
-{
- const struct nf_conntrack_l3proto *l3proto;
-
- might_sleep();
-
- /* same as nf_conntrack_netns_get(), reference assumed */
- rcu_read_lock();
- l3proto = __nf_ct_l3proto_find(nfproto);
- rcu_read_unlock();
-
- if (WARN_ON(!l3proto))
- return;
-
- if (l3proto->net_ns_put)
- l3proto->net_ns_put(net);
-
- nf_ct_l3proto_module_put(nfproto);
-}
-
-void rpl_nf_ct_netns_put(struct net *net, uint8_t nfproto)
-{
- if (nfproto == NFPROTO_INET) {
- nf_ct_netns_do_put(net, NFPROTO_IPV4);
- nf_ct_netns_do_put(net, NFPROTO_IPV6);
- } else
- nf_ct_netns_do_put(net, nfproto);
-}
-EXPORT_SYMBOL_GPL(rpl_nf_ct_netns_put);
-
-#else /* !HAVE_NET_NS_GET */
-void rpl_nf_ct_netns_put(struct net *net, uint8_t nfproto)
-{
-}
-EXPORT_SYMBOL_GPL(rpl_nf_ct_netns_put);
-
-int rpl_nf_ct_netns_get(struct net *net, u8 nfproto)
-{
- return 0;
-}
-EXPORT_SYMBOL_GPL(rpl_nf_ct_netns_get);
-
-#endif /* HAVE_NET_NS_GET */
-#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0) */
diff --git a/datapath/linux/compat/nf_conntrack_reasm.c b/datapath/linux/compat/nf_conntrack_reasm.c
deleted file mode 100644
index 77b4b2548..000000000
--- a/datapath/linux/compat/nf_conntrack_reasm.c
+++ /dev/null
@@ -1,740 +0,0 @@
-/*
- * Backported from upstream commit 5b490047240f
- * ("ipv6: Export nf_ct_frag6_gather()")
- *
- * IPv6 fragment reassembly for connection tracking
- *
- * Copyright (C)2004 USAGI/WIDE Project
- *
- * Author:
- * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *
- * Based on: net/ipv6/reassembly.c
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define pr_fmt(fmt) "IPv6-nf: " fmt
-
-#include <linux/version.h>
-
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/jiffies.h>
-#include <linux/net.h>
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/in6.h>
-#include <linux/ipv6.h>
-#include <linux/icmpv6.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-
-#include <net/sock.h>
-#include <net/snmp.h>
-#include <net/inet_frag.h>
-
-#include <net/ipv6.h>
-#include <net/ipv6_frag.h>
-#include <net/protocol.h>
-#include <net/transp_v6.h>
-#include <net/rawv6.h>
-#include <net/ndisc.h>
-#include <net/addrconf.h>
-#include <net/inet_ecn.h>
-#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
-#include <net/netns/generic.h>
-#include "datapath.h"
-
-#if defined(HAVE_INET_FRAGS_WITH_FRAGS_WORK) || !defined(HAVE_INET_FRAGS_RND)
-
-static const char nf_frags_cache_name[] = "ovs-frag6";
-
-#endif
-
-#ifdef OVS_NF_DEFRAG6_BACKPORT
-struct nf_ct_frag6_skb_cb
-{
- struct inet6_skb_parm h;
- int offset;
-};
-
-#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
-
-static struct inet_frags nf_frags;
-
-static struct netns_frags *get_netns_frags6_from_net(struct net *net)
-{
-#ifdef HAVE_INET_FRAG_LRU_MOVE
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
- return &(ovs_net->nf_frags);
-#else
- return &(net->nf_frag.frags);
-#endif
-}
-
-static struct net *get_net_from_netns_frags6(struct netns_frags *frags)
-{
- struct net *net;
-#ifdef HAVE_INET_FRAG_LRU_MOVE
- struct ovs_net *ovs_net;
-
- ovs_net = container_of(frags, struct ovs_net, nf_frags);
- net = ovs_net->net;
-#else
- net = container_of(frags, struct net, nf_frag.frags);
-#endif
- return net;
-}
-
-static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
-{
- return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
-}
-
-#ifdef HAVE_INET_FRAGS_RND
-static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, nf_frags.rnd);
-}
-/* fb3cfe6e75b9 ("inet: frag: remove hash size assumptions from callers")
- * shifted this logic into inet_fragment, but prior kernels still need this.
- */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,17,0)
-#define nf_hash_frag(a, b, c) (nf_hash_frag(a, b, c) & (INETFRAGS_HASHSZ - 1))
-#endif
-
-#ifdef HAVE_INET_FRAGS_CONST
-static unsigned int nf_hashfn(const struct inet_frag_queue *q)
-#else
-static unsigned int nf_hashfn(struct inet_frag_queue *q)
-#endif
-{
- const struct frag_queue *nq;
-
- nq = container_of(q, struct frag_queue, q);
- return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
-}
-
-#endif /* HAVE_INET_FRAGS_RND */
-static void nf_ct_frag6_expire(unsigned long data)
-{
- struct frag_queue *fq;
- struct net *net;
-
- fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
- net = get_net_from_netns_frags6(fq->q.net);
-
-#ifdef HAVE_INET_FRAGS_RND
- ip6_expire_frag_queue(net, fq, &nf_frags);
-#else
-#ifdef HAVE_IPV6_FRAG_H
- ip6frag_expire_frag_queue(net, fq);
-#else
- ip6_expire_frag_queue(net, fq);
-#endif
-#endif
-}
-
-#ifdef HAVE_INET_FRAGS_RND
-/* Creation primitives. */
-static inline struct frag_queue *fq_find(struct net *net, __be32 id,
- u32 user, struct in6_addr *src,
- struct in6_addr *dst, u8 ecn)
-{
- struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;
- struct netns_frags *frags;
-
- arg.id = id;
- arg.user = user;
- arg.src = src;
- arg.dst = dst;
- arg.ecn = ecn;
-
-#ifdef HAVE_INET_FRAGS_WITH_RWLOCK
- read_lock_bh(&nf_frags.lock);
-#else
- local_bh_disable();
-#endif
- hash = nf_hash_frag(id, src, dst);
-
- frags = get_netns_frags6_from_net(net);
- q = inet_frag_find(frags, &nf_frags, &arg, hash);
- local_bh_enable();
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
- return NULL;
- }
- return container_of(q, struct frag_queue, q);
-}
-#else
-static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
- const struct ipv6hdr *hdr, int iif)
-{
- struct frag_v6_compare_key key = {
- .id = id,
- .saddr = hdr->saddr,
- .daddr = hdr->daddr,
- .user = user,
- .iif = iif,
- };
- struct inet_frag_queue *q;
-
- q = inet_frag_find(&net->nf_frag.frags, &key);
- if (!q)
- return NULL;
-
- return container_of(q, struct frag_queue, q);
-}
-
-#endif /* HAVE_INET_FRAGS_RND */
-
-static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
- const struct frag_hdr *fhdr, int nhoff)
-{
- struct sk_buff *prev, *next;
- unsigned int payload_len;
- int offset, end;
- u8 ecn;
-
- if (qp_flags(fq) & INET_FRAG_COMPLETE) {
- pr_debug("Already completed\n");
- goto err;
- }
-
- payload_len = ntohs(ipv6_hdr(skb)->payload_len);
-
- offset = ntohs(fhdr->frag_off) & ~0x7;
- end = offset + (payload_len -
- ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
-
- if ((unsigned int)end > IPV6_MAXPLEN) {
- pr_debug("offset is too large.\n");
- return -1;
- }
-
- ecn = ip6_frag_ecn(ipv6_hdr(skb));
-
- if (skb->ip_summed == CHECKSUM_COMPLETE) {
- const unsigned char *nh = skb_network_header(skb);
- skb->csum = csum_sub(skb->csum,
- csum_partial(nh, (u8 *)(fhdr + 1) - nh,
- 0));
- }
-
- /* Is this the final fragment? */
- if (!(fhdr->frag_off & htons(IP6_MF))) {
- /* If we already have some bits beyond end
- * or have different end, the segment is corrupted.
- */
- if (end < fq->q.len ||
- ((qp_flags(fq) & INET_FRAG_LAST_IN) && end != fq->q.len)) {
- pr_debug("already received last fragment\n");
- goto err;
- }
- qp_flags(fq) |= INET_FRAG_LAST_IN;
- fq->q.len = end;
- } else {
- /* Check if the fragment is rounded to 8 bytes.
- * Required by the RFC.
- */
- if (end & 0x7) {
- /* RFC2460 says always send parameter problem in
- * this case. -DaveM
- */
- pr_debug("end of fragment not rounded to 8 bytes.\n");
- return -1;
- }
- if (end > fq->q.len) {
- /* Some bits beyond end -> corruption. */
- if (qp_flags(fq) & INET_FRAG_LAST_IN) {
- pr_debug("last packet already reached.\n");
- goto err;
- }
- fq->q.len = end;
- }
- }
-
- if (end == offset)
- goto err;
-
- /* Point into the IP datagram 'data' part. */
- if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
- pr_debug("queue: message is too short.\n");
- goto err;
- }
- if (pskb_trim_rcsum(skb, end - offset)) {
- pr_debug("Can't trim\n");
- goto err;
- }
-
- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
- prev = fq->q.fragments_tail;
- if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
- next = NULL;
- goto found;
- }
- prev = NULL;
- for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (NFCT_FRAG6_CB(next)->offset >= offset)
- break; /* bingo! */
- prev = next;
- }
-
-found:
- /* RFC5722, Section 4:
- * When reassembling an IPv6 datagram, if
- * one or more its constituent fragments is determined to be an
- * overlapping fragment, the entire datagram (and any constituent
- * fragments, including those not yet received) MUST be silently
- * discarded.
- */
-
- /* Check for overlap with preceding fragment. */
- if (prev &&
- (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
- goto discard_fq;
-
- /* Look for overlap with succeeding segment. */
- if (next && NFCT_FRAG6_CB(next)->offset < end)
- goto discard_fq;
-
- NFCT_FRAG6_CB(skb)->offset = offset;
-
- /* Insert this fragment in the chain of fragments. */
- skb->next = next;
- if (!next)
- fq->q.fragments_tail = skb;
- if (prev)
- prev->next = skb;
- else
- fq->q.fragments = skb;
-
- if (skb->dev) {
- fq->iif = skb->dev->ifindex;
- skb->dev = NULL;
- }
- fq->q.stamp = skb->tstamp;
- fq->q.meat += skb->len;
- fq->ecn |= ecn;
- if (payload_len > fq->q.max_size)
- fq->q.max_size = payload_len;
- add_frag_mem_limit(fq->q.net, skb->truesize);
-
- /* The first fragment.
- * nhoffset is obtained from the first fragment, of course.
- */
- if (offset == 0) {
- fq->nhoffset = nhoff;
- qp_flags(fq) |= INET_FRAG_FIRST_IN;
- }
-
- inet_frag_lru_move(&fq->q);
- return 0;
-
-discard_fq:
-#ifdef HAVE_INET_FRAGS_RND
- inet_frag_kill(&fq->q, &nf_frags);
-#else
- inet_frag_kill(&fq->q);
-#endif
-err:
- return -1;
-}
-
-/*
- * Check if this packet is complete.
- *
- * It is called with locked fq, and caller must check that
- * queue is eligible for reassembly i.e. it is not COMPLETE,
- * the last and the first frames arrived and all the bits are here.
- *
- * returns true if *prev skb has been transformed into the reassembled
- * skb, false otherwise.
- */
-static bool
-nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
-{
- struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len;
- u8 ecn;
-
-#ifdef HAVE_INET_FRAGS_RND
- inet_frag_kill(&fq->q, &nf_frags);
-#else
- inet_frag_kill(&fq->q);
-#endif
-
- WARN_ON(head == NULL);
- WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
-
- ecn = ip_frag_ecn_table[fq->ecn];
- if (unlikely(ecn == 0xff))
- return false;
-
- /* Unfragmented part is taken from the first segment. */
- payload_len = ((head->data - skb_network_header(head)) -
- sizeof(struct ipv6hdr) + fq->q.len -
- sizeof(struct frag_hdr));
- if (payload_len > IPV6_MAXPLEN) {
- net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
- payload_len);
- return false;
- }
-
- /* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC))
- return false;
-
- /* If the first fragment is fragmented itself, we split
- * it to two chunks: the first with data and paged part
- * and the second, holding only fragments. */
- if (skb_has_frag_list(head)) {
- struct sk_buff *clone;
- int i, plen = 0;
-
- clone = alloc_skb(0, GFP_ATOMIC);
- if (clone == NULL)
- return false;
-
- clone->next = head->next;
- head->next = clone;
- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_frag_list_init(head);
- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
- clone->len = clone->data_len = head->data_len - plen;
- head->data_len -= clone->len;
- head->len -= clone->len;
- clone->csum = 0;
- clone->ip_summed = head->ip_summed;
-
- add_frag_mem_limit(fq->q.net, clone->truesize);
- }
-
- /* morph head into last received skb: prev.
- *
- * This allows callers of ipv6 conntrack defrag to continue
- * to use the last skb(frag) passed into the reasm engine.
- * The last skb frag 'silently' turns into the full reassembled skb.
- *
- * Since prev is also part of q->fragments we have to clone it first.
- */
- if (head != prev) {
- struct sk_buff *iter;
-
- fp = skb_clone(prev, GFP_ATOMIC);
- if (!fp)
- return false;
-
- fp->next = prev->next;
-
- iter = head;
- while (iter) {
- if (iter->next == prev) {
- iter->next = fp;
- break;
- }
- iter = iter->next;
- }
-
- skb_morph(prev, head);
- prev->next = head->next;
- consume_skb(head);
- head = prev;
- }
-
- /* We have to remove fragment header from datagram and to relocate
- * header in order to calculate ICV correctly. */
- skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
- memmove(head->head + sizeof(struct frag_hdr), head->head,
- (head->data - head->head) - sizeof(struct frag_hdr));
- head->mac_header += sizeof(struct frag_hdr);
- head->network_header += sizeof(struct frag_hdr);
-
- skb_shinfo(head)->frag_list = head->next;
- skb_reset_transport_header(head);
- skb_push(head, head->data - skb_network_header(head));
-
- for (fp=head->next; fp; fp = fp->next) {
- head->data_len += fp->len;
- head->len += fp->len;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
- head->truesize += fp->truesize;
- }
- sub_frag_mem_limit(fq->q.net, head->truesize);
-
- head->ignore_df = 1;
- head->next = NULL;
- head->dev = dev;
- head->tstamp = fq->q.stamp;
- ipv6_hdr(head)->payload_len = htons(payload_len);
- ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
- IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
-
- /* Yes, and fold redundant checksum back. 8) */
- if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_partial(skb_network_header(head),
- skb_network_header_len(head),
- head->csum);
-
- fq->q.fragments = NULL;
- fq->q.fragments_tail = NULL;
-
- return true;
-}
-
-/*
- * find the header just before Fragment Header.
- *
- * if success return 0 and set ...
- * (*prevhdrp): the value of "Next Header Field" in the header
- * just before Fragment Header.
- * (*prevhoff): the offset of "Next Header Field" in the header
- * just before Fragment Header.
- * (*fhoff) : the offset of Fragment Header.
- *
- * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
- *
- */
-static int
-find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
-{
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- const int netoff = skb_network_offset(skb);
- u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr);
- int start = netoff + sizeof(struct ipv6hdr);
- int len = skb->len - start;
- u8 prevhdr = NEXTHDR_IPV6;
-
- while (nexthdr != NEXTHDR_FRAGMENT) {
- struct ipv6_opt_hdr hdr;
- int hdrlen;
-
- if (!ipv6_ext_hdr(nexthdr)) {
- return -1;
- }
- if (nexthdr == NEXTHDR_NONE) {
- pr_debug("next header is none\n");
- return -1;
- }
- if (len < (int)sizeof(struct ipv6_opt_hdr)) {
- pr_debug("too short\n");
- return -1;
- }
- if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
- BUG();
- if (nexthdr == NEXTHDR_AUTH)
- hdrlen = (hdr.hdrlen+2)<<2;
- else
- hdrlen = ipv6_optlen(&hdr);
-
- prevhdr = nexthdr;
- prev_nhoff = start;
-
- nexthdr = hdr.nexthdr;
- len -= hdrlen;
- start += hdrlen;
- }
-
- if (len < 0)
- return -1;
-
- *prevhdrp = prevhdr;
- *prevhoff = prev_nhoff;
- *fhoff = start;
-
- return 0;
-}
-
-int rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
-{
- struct net_device *dev = skb->dev;
- int fhoff, nhoff, ret;
- struct frag_hdr *fhdr;
- struct frag_queue *fq;
- struct ipv6hdr *hdr;
- u8 prevhdr;
- struct netns_frags *frags;
-
- /* Jumbo payload inhibits frag. header */
- if (ipv6_hdr(skb)->payload_len == 0) {
- pr_debug("payload len = 0\n");
- return -EINVAL;
- }
-
- if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
- return -EINVAL;
-
- if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr)))
- return -ENOMEM;
-
- skb_set_transport_header(skb, fhoff);
- hdr = ipv6_hdr(skb);
- fhdr = (struct frag_hdr *)skb_transport_header(skb);
-
-/* See ip_evictor(). */
- frags = get_netns_frags6_from_net(net);
-#ifdef HAVE_INET_FRAG_EVICTOR
- local_bh_disable();
- inet_frag_evictor(frags, &nf_frags, false);
- local_bh_enable();
-#endif
-
- skb_orphan(skb);
-#ifdef HAVE_INET_FRAGS_RND
- fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
- ip6_frag_ecn(hdr));
-#else
- fq = fq_find(net, fhdr->identification, user, hdr,
- skb->dev ? skb->dev->ifindex : 0);
-#endif
- if (fq == NULL)
- return -ENOMEM;
-
- spin_lock_bh(&fq->q.lock);
-
- if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) {
- ret = -EINVAL;
- goto out_unlock;
- }
-
- /* after queue has assumed skb ownership, only 0 or -EINPROGRESS
- * must be returned.
- */
- ret = -EINPROGRESS;
- if (qp_flags(fq) == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
- fq->q.meat == fq->q.len &&
- nf_ct_frag6_reasm(fq, skb, dev))
- ret = 0;
-
-out_unlock:
- spin_unlock_bh(&fq->q.lock);
-#ifdef HAVE_INET_FRAGS_RND
- inet_frag_put(&fq->q, &nf_frags);
-#else
- inet_frag_put(&fq->q);
-#endif
- return ret;
-}
-
-#ifdef HAVE_DEFRAG_ENABLE_TAKES_NET
-static int nf_ct_net_init(struct net *net)
-{
- return nf_defrag_ipv6_enable(net);
-}
-#endif
-
-static void nf_ct_net_exit(struct net *net)
-{
-}
-
-void ovs_netns_frags6_init(struct net *net)
-{
-#ifdef HAVE_INET_FRAG_LRU_MOVE
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-
- ovs_net->nf_frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
- ovs_net->nf_frags.low_thresh = IPV6_FRAG_LOW_THRESH;
- ovs_net->nf_frags.timeout = IPV6_FRAG_TIMEOUT;
-
- inet_frags_init_net(&(ovs_net->nf_frags));
-#endif
-}
-
-void ovs_netns_frags6_exit(struct net *net)
-{
-#ifdef HAVE_INET_FRAGS_RND
- struct netns_frags *frags;
-
- frags = get_netns_frags6_from_net(net);
- inet_frags_exit_net(frags, &nf_frags);
-#endif
-}
-
-static struct pernet_operations nf_ct_net_ops = {
-#ifdef HAVE_DEFRAG_ENABLE_TAKES_NET
- .init = nf_ct_net_init,
-#endif
- .exit = nf_ct_net_exit,
-};
-
-#ifdef HAVE_IPV6_FRAG_H
-static const struct rhashtable_params nfct_rhash_params = {
- .head_offset = offsetof(struct inet_frag_queue, node),
- .hashfn = ip6frag_key_hashfn,
- .obj_hashfn = ip6frag_obj_hashfn,
- .obj_cmpfn = ip6frag_obj_cmpfn,
- .automatic_shrinking = true,
-};
-#endif
-
-int rpl_nf_ct_frag6_init(void)
-{
- int ret = 0;
-
-#ifndef HAVE_DEFRAG_ENABLE_TAKES_NET
- nf_defrag_ipv6_enable();
-#endif
-#ifdef HAVE_INET_FRAGS_RND
- nf_frags.hashfn = nf_hashfn;
- nf_frags.match = ip6_frag_match;
- nf_frags.constructor = ip6_frag_init;
-#else
-#ifdef HAVE_IPV6_FRAG_H
- nf_frags.rhash_params = nfct_rhash_params;
- nf_frags.constructor = ip6frag_init;
-#else
- nf_frags.rhash_params = ip6_rhash_params;
- nf_frags.constructor = ip6_frag_init;
-#endif
-#endif /* HAVE_INET_FRAGS_RND */
- nf_frags.destructor = NULL;
- nf_frags.qsize = sizeof(struct frag_queue);
- nf_frags.frag_expire = nf_ct_frag6_expire;
-#if defined(HAVE_INET_FRAGS_WITH_FRAGS_WORK) || !defined(HAVE_INET_FRAGS_RND)
- nf_frags.frags_cache_name = nf_frags_cache_name;
-#endif
-#if RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8,0)
- nf_frags.secret_interval = 10 * 60 * HZ;
-#endif
- ret = inet_frags_init(&nf_frags);
- if (ret)
- goto out;
- ret = register_pernet_subsys(&nf_ct_net_ops);
- if (ret)
- inet_frags_fini(&nf_frags);
-
-out:
- return ret;
-}
-
-void rpl_nf_ct_frag6_cleanup(void)
-{
- unregister_pernet_subsys(&nf_ct_net_ops);
- inet_frags_fini(&nf_frags);
-}
-
-#endif /* OVS_NF_DEFRAG6_BACKPORT */
diff --git a/datapath/linux/compat/nf_conntrack_timeout.c b/datapath/linux/compat/nf_conntrack_timeout.c
deleted file mode 100644
index c02baff57..000000000
--- a/datapath/linux/compat/nf_conntrack_timeout.c
+++ /dev/null
@@ -1,102 +0,0 @@
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_extend.h>
-#include <net/netfilter/nf_conntrack_timeout.h>
-
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-#ifndef HAVE_NF_CT_SET_TIMEOUT
-static void rpl__nf_ct_timeout_put(struct nf_ct_timeout *timeout)
-{
- typeof(nf_ct_timeout_put_hook) timeout_put;
-
- timeout_put = rcu_dereference(nf_ct_timeout_put_hook);
- if (timeout_put)
- timeout_put(timeout);
-}
-
-int rpl_nf_ct_set_timeout(struct net *net, struct nf_conn *ct,
- u8 l3num, u8 l4num, const char *timeout_name)
-{
- typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
- struct nf_ct_timeout *timeout;
- struct nf_conn_timeout *timeout_ext;
- const char *errmsg = NULL;
- int ret = 0;
-
- rcu_read_lock();
- timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook);
- if (!timeout_find_get) {
- ret = -ENOENT;
- errmsg = "Timeout policy base is empty";
- goto out;
- }
-
-#ifdef HAVE_NF_CT_TIMEOUT_FIND_GET_HOOK_NET
- timeout = timeout_find_get(net, timeout_name);
-#else
- timeout = timeout_find_get(timeout_name);
-#endif
- if (!timeout) {
- ret = -ENOENT;
- pr_info_ratelimited("No such timeout policy \"%s\"\n",
- timeout_name);
- goto out;
- }
-
- if (timeout->l3num != l3num) {
- ret = -EINVAL;
- pr_info_ratelimited("Timeout policy `%s' can only be used by "
- "L%d protocol number %d\n",
- timeout_name, 3, timeout->l3num);
- goto err_put_timeout;
- }
- /* Make sure the timeout policy matches any existing protocol tracker,
- * otherwise default to generic.
- */
- if (timeout->l4proto->l4proto != l4num) {
- ret = -EINVAL;
- pr_info_ratelimited("Timeout policy `%s' can only be used by "
- "L%d protocol number %d\n",
- timeout_name, 4, timeout->l4proto->l4proto);
- goto err_put_timeout;
- }
- timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
- if (!timeout_ext) {
- ret = -ENOMEM;
- goto err_put_timeout;
- }
-
- rcu_read_unlock();
- return ret;
-
-err_put_timeout:
- rpl__nf_ct_timeout_put(timeout);
-out:
- rcu_read_unlock();
- if (errmsg)
- pr_info_ratelimited("%s\n", errmsg);
- return ret;
-}
-EXPORT_SYMBOL_GPL(rpl_nf_ct_set_timeout);
-
-void rpl_nf_ct_destroy_timeout(struct nf_conn *ct)
-{
- struct nf_conn_timeout *timeout_ext;
- typeof(nf_ct_timeout_put_hook) timeout_put;
-
- rcu_read_lock();
- timeout_put = rcu_dereference(nf_ct_timeout_put_hook);
-
- if (timeout_put) {
- timeout_ext = nf_ct_timeout_find(ct);
- if (timeout_ext) {
- timeout_put(timeout_ext->timeout);
- RCU_INIT_POINTER(timeout_ext->timeout, NULL);
- }
- }
- rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(rpl_nf_ct_destroy_timeout);
-
-#endif /* HAVE_NF_CT_SET_TIMEOUT */
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
diff --git a/datapath/linux/compat/reciprocal_div.c b/datapath/linux/compat/reciprocal_div.c
deleted file mode 100644
index 818502a0f..000000000
--- a/datapath/linux/compat/reciprocal_div.c
+++ /dev/null
@@ -1,27 +0,0 @@
-#include <linux/kernel.h>
-#include <asm/div64.h>
-#include <linux/module.h>
-#include <linux/reciprocal_div.h>
-
-/*
- * For a description of the algorithm please have a look at
- * include/linux/reciprocal_div.h
- */
-
-struct reciprocal_value rpl_reciprocal_value(u32 d)
-{
- struct reciprocal_value R;
- u64 m;
- int l;
-
- l = fls(d - 1);
- m = ((1ULL << 32) * ((1ULL << l) - d));
- do_div(m, d);
- ++m;
- R.m = (u32)m;
- R.sh1 = min(l, 1);
- R.sh2 = max(l - 1, 0);
-
- return R;
-}
-EXPORT_SYMBOL_GPL(rpl_reciprocal_value);
diff --git a/datapath/linux/compat/skbuff-openvswitch.c b/datapath/linux/compat/skbuff-openvswitch.c
deleted file mode 100644
index 4cdeedc58..000000000
--- a/datapath/linux/compat/skbuff-openvswitch.c
+++ /dev/null
@@ -1,310 +0,0 @@
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/if_vlan.h>
-#include <linux/kconfig.h>
-
-#include "gso.h"
-
-#if !defined(HAVE_SKB_WARN_LRO) && defined(NETIF_F_LRO)
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-void __skb_warn_lro_forwarding(const struct sk_buff *skb)
-{
- if (net_ratelimit())
- pr_warn("%s: received packets cannot be forwarded while LRO is enabled\n",
- skb->dev->name);
-}
-
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0)
-
-static inline bool head_frag(const struct sk_buff *skb)
-{
- return skb->head_frag;
-}
-
- /**
- * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
- * @from: source buffer
- *
- * Calculates the amount of linear headroom needed in the 'to' skb passed
- * into skb_zerocopy().
- */
-unsigned int
-rpl_skb_zerocopy_headlen(const struct sk_buff *from)
-{
- unsigned int hlen = 0;
-
- if (!head_frag(from) ||
- skb_headlen(from) < L1_CACHE_BYTES ||
- skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
- hlen = skb_headlen(from);
-
- if (skb_has_frag_list(from))
- hlen = from->len;
-
- return hlen;
-}
-EXPORT_SYMBOL_GPL(rpl_skb_zerocopy_headlen);
-
-#ifndef HAVE_SKB_ZEROCOPY
-/**
- * skb_zerocopy - Zero copy skb to skb
- * @to: destination buffer
- * @source: source buffer
- * @len: number of bytes to copy from source buffer
- * @hlen: size of linear headroom in destination buffer
- *
- * Copies up to `len` bytes from `from` to `to` by creating references
- * to the frags in the source buffer.
- *
- * The `hlen` as calculated by skb_zerocopy_headlen() specifies the
- * headroom in the `to` buffer.
- *
- * Return value:
- * 0: everything is OK
- * -ENOMEM: couldn't orphan frags of @from due to lack of memory
- * -EFAULT: skb_copy_bits() found some problem with skb geometry
- */
-int
-rpl_skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
-{
- int i, j = 0;
- int plen = 0; /* length of skb->head fragment */
- int ret;
- struct page *page;
- unsigned int offset;
-
- BUG_ON(!head_frag(from) && !hlen);
-
- /* dont bother with small payloads */
- if (len <= skb_tailroom(to))
- return skb_copy_bits(from, 0, skb_put(to, len), len);
-
- if (hlen) {
- ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
- if (unlikely(ret))
- return ret;
- len -= hlen;
- } else {
- plen = min_t(int, skb_headlen(from), len);
- if (plen) {
- page = virt_to_head_page(from->head);
- offset = from->data - (unsigned char *)page_address(page);
- __skb_fill_page_desc(to, 0, page, offset, plen);
- get_page(page);
- j = 1;
- len -= plen;
- }
- }
-
- to->truesize += len + plen;
- to->len += len + plen;
- to->data_len += len + plen;
-
- if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) {
- skb_tx_error(from);
- return -ENOMEM;
- }
-
- for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
- if (!len)
- break;
- skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
- skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
- len -= skb_shinfo(to)->frags[j].size;
- skb_frag_ref(to, j);
- j++;
- }
- skb_shinfo(to)->nr_frags = j;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(rpl_skb_zerocopy);
-#endif
-#endif
-
-#ifndef HAVE_SKB_ENSURE_WRITABLE
-int rpl_skb_ensure_writable(struct sk_buff *skb, int write_len)
-{
- if (!pskb_may_pull(skb, write_len))
- return -ENOMEM;
-
- if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
- return 0;
-
- return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-}
-EXPORT_SYMBOL_GPL(rpl_skb_ensure_writable);
-#endif
-
-#if !defined(HAVE___SKB_VLAN_POP) || !defined(HAVE_SKB_VLAN_POP)
-/* remove VLAN header from packet and update csum accordingly. */
-int rpl___skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
-{
- struct vlan_hdr *vhdr;
- unsigned int offset = skb->data - skb_mac_header(skb);
- int err;
-
- __skb_push(skb, offset);
- err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
- if (unlikely(err))
- goto pull;
-
- skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
-
- vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
- *vlan_tci = ntohs(vhdr->h_vlan_TCI);
-
- memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
- __skb_pull(skb, VLAN_HLEN);
-
- vlan_set_encap_proto(skb, vhdr);
- skb->mac_header += VLAN_HLEN;
-
- if (skb_network_offset(skb) < ETH_HLEN)
- skb_set_network_header(skb, ETH_HLEN);
-
- skb_reset_mac_len(skb);
-pull:
- __skb_pull(skb, offset);
-
- return err;
-}
-#endif
-
-#ifndef HAVE_SKB_VLAN_POP
-int rpl_skb_vlan_pop(struct sk_buff *skb)
-{
- u16 vlan_tci;
- __be16 vlan_proto;
- int err;
-
- if (likely(skb_vlan_tag_present(skb))) {
- skb->vlan_tci = 0;
- } else {
- if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
- skb->protocol != htons(ETH_P_8021AD)) ||
- skb->len < VLAN_ETH_HLEN))
- return 0;
-
- err = rpl___skb_vlan_pop(skb, &vlan_tci);
- if (err)
- return err;
- }
- /* move next vlan tag to hw accel tag */
- if (likely((skb->protocol != htons(ETH_P_8021Q) &&
- skb->protocol != htons(ETH_P_8021AD)) ||
- skb->len < VLAN_ETH_HLEN))
- return 0;
-
- vlan_proto = htons(ETH_P_8021Q);
- err = __skb_vlan_pop(skb, &vlan_tci);
- if (unlikely(err))
- return err;
-
- __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
- return 0;
-}
-EXPORT_SYMBOL_GPL(rpl_skb_vlan_pop);
-#endif
-
-#ifndef HAVE_SKB_VLAN_PUSH
-int rpl_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
-{
- if (skb_vlan_tag_present(skb)) {
- unsigned int offset = skb->data - skb_mac_header(skb);
- int err;
-
- /* __vlan_insert_tag expect skb->data pointing to mac header.
- * So change skb->data before calling it and change back to
- * original position later
- */
- __skb_push(skb, offset);
- err = __vlan_insert_tag(skb, skb->vlan_proto,
- skb_vlan_tag_get(skb));
- if (err)
- return err;
- skb->mac_len += VLAN_HLEN;
- __skb_pull(skb, offset);
-
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_add(skb->csum, csum_partial(skb->data
- + (2 * ETH_ALEN), VLAN_HLEN, 0));
- }
- __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
- return 0;
-}
-EXPORT_SYMBOL_GPL(rpl_skb_vlan_push);
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
-int rpl_pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
- gfp_t gfp_mask)
-{
- int err;
- int inner_mac_offset, inner_nw_offset, inner_transport_offset;
-
- inner_mac_offset = skb_inner_mac_offset(skb);
- inner_nw_offset = skb_inner_network_offset(skb);
- inner_transport_offset = skb_inner_transport_offset(skb);
-
-#undef pskb_expand_head
- err = pskb_expand_head(skb, nhead, ntail, gfp_mask);
- if (err)
- return err;
-
- skb_set_inner_mac_header(skb, inner_mac_offset);
- skb_set_inner_network_header(skb, inner_nw_offset);
- skb_set_inner_transport_header(skb, inner_transport_offset);
-
- return 0;
-}
-EXPORT_SYMBOL(rpl_pskb_expand_head);
-
-#endif
-
-#ifndef HAVE_KFREE_SKB_LIST
-void rpl_kfree_skb_list(struct sk_buff *segs)
-{
- while (segs) {
- struct sk_buff *next = segs->next;
-
- kfree_skb(segs);
- segs = next;
- }
-}
-EXPORT_SYMBOL(rpl_kfree_skb_list);
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0)
-
-#define nf_reset_trace rpl_nf_reset_trace
-static void nf_reset_trace(struct sk_buff *skb)
-{
-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
- skb->nf_trace = 0;
-#endif
-}
-
-void rpl_skb_scrub_packet(struct sk_buff *skb, bool xnet)
-{
- skb->tstamp.tv64 = 0;
- skb->pkt_type = PACKET_HOST;
- skb->skb_iif = 0;
- skb->ignore_df = 0;
- skb_dst_drop(skb);
- secpath_reset(skb);
- nf_reset(skb);
- nf_reset_trace(skb);
-
- if (!xnet)
- return;
-
- skb_orphan(skb);
- skb->mark = 0;
-}
-#endif
diff --git a/datapath/linux/compat/socket.c b/datapath/linux/compat/socket.c
deleted file mode 100644
index 7f61e4456..000000000
--- a/datapath/linux/compat/socket.c
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/socket.h>
-#include <linux/udp.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <net/ip_tunnels.h>
-#include <net/udp.h>
-#include <net/udp_tunnel.h>
-#include <net/net_namespace.h>
-
-
-#ifndef HAVE_SOCK_CREATE_KERN_NET
-#undef sock_create_kern
-
-int ovs_sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
-{
- int err;
-
- err = sock_create_kern(family, type, protocol, res);
- if (err < 0)
- return err;
-
- sk_change_net((*res)->sk, net);
- return err;
-}
-#undef sk_release_kernel
-void ovs_sock_release(struct socket *sock)
-{
- sk_release_kernel(sock->sk);
-}
-#endif
diff --git a/datapath/linux/compat/stt.c b/datapath/linux/compat/stt.c
deleted file mode 100644
index 39a294764..000000000
--- a/datapath/linux/compat/stt.c
+++ /dev/null
@@ -1,2129 +0,0 @@
-/*
- * Stateless TCP Tunnel (STT) vport.
- *
- * Copyright (c) 2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <asm/unaligned.h>
-
-#include <linux/delay.h>
-#include <linux/if.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/jhash.h>
-#include <linux/list.h>
-#include <linux/log2.h>
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/netfilter.h>
-#include <linux/percpu.h>
-#include <linux/skbuff.h>
-#include <linux/tcp.h>
-#include <linux/workqueue.h>
-
-#include <net/dst_metadata.h>
-#include <net/icmp.h>
-#include <net/inet_ecn.h>
-#include <net/ip.h>
-#include <net/ip_tunnels.h>
-#include <net/ip6_checksum.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/sock.h>
-#include <net/stt.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-
-#include "gso.h"
-#include "compat.h"
-
-#define STT_NETDEV_VER "0.1"
-#define STT_DST_PORT 7471
-
-#ifdef OVS_STT
-#ifdef CONFIG_SLUB
-/*
- * We saw better performance with skipping zero copy in case of SLUB.
- * So skip zero copy for SLUB case.
- */
-#define SKIP_ZERO_COPY
-#endif
-
-#define STT_VER 0
-
-/* @list: Per-net list of STT ports.
- * @rcv: The callback is called on STT packet recv, STT reassembly can generate
- * multiple packets, in this case first packet has tunnel outer header, rest
- * of the packets are inner packet segments with no stt header.
- * @rcv_data: user data.
- * @sock: Fake TCP socket for the STT port.
- */
-struct stt_dev {
- struct net_device *dev;
- struct net *net;
- struct list_head next;
- struct list_head up_next;
- struct socket *sock;
- __be16 dst_port;
-};
-
-#define STT_CSUM_VERIFIED BIT(0)
-#define STT_CSUM_PARTIAL BIT(1)
-#define STT_PROTO_IPV4 BIT(2)
-#define STT_PROTO_TCP BIT(3)
-#define STT_PROTO_TYPES (STT_PROTO_IPV4 | STT_PROTO_TCP)
-
-#ifdef HAVE_SKB_GSO_UDP
-#define SUPPORTED_GSO_TYPES (SKB_GSO_TCPV4 | SKB_GSO_UDP | SKB_GSO_DODGY | \
- SKB_GSO_TCPV6)
-#else
-#define SUPPORTED_GSO_TYPES (SKB_GSO_TCPV4 | SKB_GSO_DODGY | \
- SKB_GSO_TCPV6)
-#endif
-
-/* The length and offset of a fragment are encoded in the sequence number.
- * STT_SEQ_LEN_SHIFT is the left shift needed to store the length.
- * STT_SEQ_OFFSET_MASK is the mask to extract the offset.
- */
-#define STT_SEQ_LEN_SHIFT 16
-#define STT_SEQ_OFFSET_MASK (BIT(STT_SEQ_LEN_SHIFT) - 1)
-
-/* The maximum amount of memory used to store packets waiting to be reassembled
- * on a given CPU. Once this threshold is exceeded we will begin freeing the
- * least recently used fragments.
- */
-#define REASM_HI_THRESH (4 * 1024 * 1024)
-/* The target for the high memory evictor. Once we have exceeded
- * REASM_HI_THRESH, we will continue freeing fragments until we hit
- * this limit.
- */
-#define REASM_LO_THRESH (3 * 1024 * 1024)
-/* The length of time a given packet has to be reassembled from the time the
- * first fragment arrives. Once this limit is exceeded it becomes available
- * for cleaning.
- */
-#define FRAG_EXP_TIME (30 * HZ)
-/* Number of hash entries. Each entry has only a single slot to hold a packet
- * so if there are collisions, we will drop packets. This is allocated
- * per-cpu and each entry consists of struct pkt_frag.
- */
-#define FRAG_HASH_SHIFT 8
-#define FRAG_HASH_ENTRIES BIT(FRAG_HASH_SHIFT)
-#define FRAG_HASH_SEGS ((sizeof(u32) * 8) / FRAG_HASH_SHIFT)
-
-#define CLEAN_PERCPU_INTERVAL (30 * HZ)
-
-struct pkt_key {
- __be32 saddr;
- __be32 daddr;
- __be32 pkt_seq;
- u32 mark;
-};
-
-struct pkt_frag {
- struct sk_buff *skbs;
- unsigned long timestamp;
- struct list_head lru_node;
- struct pkt_key key;
-};
-
-struct stt_percpu {
- struct pkt_frag *frag_hash;
- struct list_head frag_lru;
- unsigned int frag_mem_used;
-
- /* Protect frags table. */
- spinlock_t lock;
-};
-
-struct first_frag {
- struct sk_buff *last_skb;
- unsigned int mem_used;
- u16 tot_len;
- u16 rcvd_len;
- bool set_ecn_ce;
-};
-
-struct frag_skb_cb {
- u16 offset;
-
- /* Only valid for the first skb in the chain. */
- struct first_frag first;
-};
-
-#define FRAG_CB(skb) ((struct frag_skb_cb *)(skb)->cb)
-
-/* per-network namespace private data for this module */
-struct stt_net {
- struct list_head stt_list;
- struct list_head stt_up_list; /* Devices which are in IFF_UP state. */
- int n_tunnels;
-#ifdef HAVE_NF_REGISTER_NET_HOOK
- bool nf_hook_reg_done;
-#endif
-};
-
-static int stt_net_id;
-
-static struct stt_percpu __percpu *stt_percpu_data __read_mostly;
-static u32 frag_hash_seed __read_mostly;
-
-/* Protects sock-hash and refcounts. */
-static DEFINE_MUTEX(stt_mutex);
-
-static int n_tunnels;
-static DEFINE_PER_CPU(u32, pkt_seq_counter);
-
-static void clean_percpu(struct work_struct *work);
-static DECLARE_DELAYED_WORK(clean_percpu_wq, clean_percpu);
-
-static struct stt_dev *stt_find_up_dev(struct net *net, __be16 port)
-{
- struct stt_net *sn = net_generic(net, stt_net_id);
- struct stt_dev *stt_dev;
-
- list_for_each_entry_rcu(stt_dev, &sn->stt_up_list, up_next) {
- if (stt_dev->dst_port == port)
- return stt_dev;
- }
- return NULL;
-}
-
-static __be32 ack_seq(void)
-{
-#if NR_CPUS <= 65536
- u32 pkt_seq, ack;
-
- pkt_seq = this_cpu_read(pkt_seq_counter);
- ack = pkt_seq << ilog2(NR_CPUS) | smp_processor_id();
- this_cpu_inc(pkt_seq_counter);
-
- return (__force __be32)ack;
-#else
-#error "Support for greater than 64k CPUs not implemented"
-#endif
-}
-
-static int clear_gso(struct sk_buff *skb)
-{
- struct skb_shared_info *shinfo = skb_shinfo(skb);
- int err;
-
- if (shinfo->gso_type == 0 && shinfo->gso_size == 0 &&
- shinfo->gso_segs == 0)
- return 0;
-
- err = skb_unclone(skb, GFP_ATOMIC);
- if (unlikely(err))
- return err;
-
- shinfo = skb_shinfo(skb);
- shinfo->gso_type = 0;
- shinfo->gso_size = 0;
- shinfo->gso_segs = 0;
- return 0;
-}
-
-static void copy_skb_metadata(struct sk_buff *to, struct sk_buff *from)
-{
- to->protocol = from->protocol;
- to->tstamp = from->tstamp;
- to->priority = from->priority;
- to->mark = from->mark;
- to->vlan_tci = from->vlan_tci;
- to->vlan_proto = from->vlan_proto;
- skb_copy_secmark(to, from);
-}
-
-static void update_headers(struct sk_buff *skb, bool head,
- unsigned int l4_offset, unsigned int hdr_len,
- bool ipv4, u32 tcp_seq)
-{
- u16 old_len, new_len;
- __be32 delta;
- struct tcphdr *tcph;
- int gso_size;
-
- if (ipv4) {
- struct iphdr *iph = (struct iphdr *)(skb->data + ETH_HLEN);
-
- old_len = ntohs(iph->tot_len);
- new_len = skb->len - ETH_HLEN;
- iph->tot_len = htons(new_len);
-
- ip_send_check(iph);
- } else {
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + ETH_HLEN);
-
- old_len = ntohs(ip6h->payload_len);
- new_len = skb->len - ETH_HLEN - sizeof(struct ipv6hdr);
- ip6h->payload_len = htons(new_len);
- }
-
- tcph = (struct tcphdr *)(skb->data + l4_offset);
- if (!head) {
- tcph->seq = htonl(tcp_seq);
- tcph->cwr = 0;
- }
-
- if (skb->next) {
- tcph->fin = 0;
- tcph->psh = 0;
- }
-
- delta = htonl(~old_len + new_len);
- tcph->check = ~csum_fold((__force __wsum)((__force u32)tcph->check +
- (__force u32)delta));
-
- gso_size = skb_shinfo(skb)->gso_size;
- if (gso_size && skb->len - hdr_len <= gso_size)
- BUG_ON(clear_gso(skb));
-}
-
-static bool can_segment(struct sk_buff *head, bool ipv4, bool tcp, bool csum_partial)
-{
- /* If no offloading is in use then we don't have enough information
- * to process the headers.
- */
- if (!csum_partial)
- goto linearize;
-
- /* Handling UDP packets requires IP fragmentation, which means that
- * the L4 checksum can no longer be calculated by hardware (since the
- * fragments are in different packets. If we have to compute the
- * checksum it's faster just to linearize and large UDP packets are
- * pretty uncommon anyways, so it's not worth dealing with for now.
- */
- if (!tcp)
- goto linearize;
-
- if (ipv4) {
- struct iphdr *iph = (struct iphdr *)(head->data + ETH_HLEN);
-
- /* It's difficult to get the IP IDs exactly right here due to
- * varying segment sizes and potentially multiple layers of
- * segmentation. IP ID isn't important when DF is set and DF
- * is generally set for TCP packets, so just linearize if it's
- * not.
- */
- if (!(iph->frag_off & htons(IP_DF)))
- goto linearize;
- } else {
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(head->data + ETH_HLEN);
-
- /* Jumbograms require more processing to update and we'll
- * probably never see them, so just linearize.
- */
- if (ip6h->payload_len == 0)
- goto linearize;
- }
- return true;
-
-linearize:
- return false;
-}
-
-static int copy_headers(struct sk_buff *head, struct sk_buff *frag,
- int hdr_len)
-{
- u16 csum_start;
-
- if (skb_cloned(frag) || skb_headroom(frag) < hdr_len) {
- int extra_head = hdr_len - skb_headroom(frag);
-
- extra_head = extra_head > 0 ? extra_head : 0;
- if (unlikely(pskb_expand_head(frag, extra_head, 0,
- GFP_ATOMIC)))
- return -ENOMEM;
- }
-
- memcpy(__skb_push(frag, hdr_len), head->data, hdr_len);
-
- csum_start = head->csum_start - skb_headroom(head);
- frag->csum_start = skb_headroom(frag) + csum_start;
- frag->csum_offset = head->csum_offset;
- frag->ip_summed = head->ip_summed;
-
- skb_shinfo(frag)->gso_size = skb_shinfo(head)->gso_size;
- skb_shinfo(frag)->gso_type = skb_shinfo(head)->gso_type;
- skb_shinfo(frag)->gso_segs = 0;
-
- copy_skb_metadata(frag, head);
- return 0;
-}
-
-static int skb_list_segment(struct sk_buff *head, bool ipv4, int l4_offset)
-{
- struct sk_buff *skb;
- struct tcphdr *tcph;
- int seg_len;
- int hdr_len;
- int tcp_len;
- u32 seq;
-
- if (unlikely(!pskb_may_pull(head, l4_offset + sizeof(*tcph))))
- return -ENOMEM;
-
- tcph = (struct tcphdr *)(head->data + l4_offset);
- tcp_len = tcph->doff * 4;
- hdr_len = l4_offset + tcp_len;
-
- if (unlikely((tcp_len < sizeof(struct tcphdr)) ||
- (head->len < hdr_len)))
- return -EINVAL;
-
- if (unlikely(!pskb_may_pull(head, hdr_len)))
- return -ENOMEM;
-
- tcph = (struct tcphdr *)(head->data + l4_offset);
- /* Update header of each segment. */
- seq = ntohl(tcph->seq);
- seg_len = skb_pagelen(head) - hdr_len;
-
- skb = skb_shinfo(head)->frag_list;
- skb_shinfo(head)->frag_list = NULL;
- head->next = skb;
- for (; skb; skb = skb->next) {
- int err;
-
- head->len -= skb->len;
- head->data_len -= skb->len;
- head->truesize -= skb->truesize;
-
- seq += seg_len;
- seg_len = skb->len;
- err = copy_headers(head, skb, hdr_len);
- if (err)
- return err;
- update_headers(skb, false, l4_offset, hdr_len, ipv4, seq);
- }
- update_headers(head, true, l4_offset, hdr_len, ipv4, 0);
- return 0;
-}
-
-#ifndef SKIP_ZERO_COPY
-static struct sk_buff *normalize_frag_list(struct sk_buff *head,
- struct sk_buff **skbp)
-{
- struct sk_buff *skb = *skbp;
- struct sk_buff *last;
-
- do {
- struct sk_buff *frags;
-
- if (skb_shared(skb)) {
- struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
- if (unlikely(!nskb))
- return ERR_PTR(-ENOMEM);
-
- nskb->next = skb->next;
- consume_skb(skb);
- skb = nskb;
- *skbp = skb;
- }
-
- if (head) {
- head->len -= skb->len;
- head->data_len -= skb->len;
- head->truesize -= skb->truesize;
- }
-
- frags = skb_shinfo(skb)->frag_list;
- if (frags) {
- int err;
-
- err = skb_unclone(skb, GFP_ATOMIC);
- if (unlikely(err))
- return ERR_PTR(err);
-
- last = normalize_frag_list(skb, &frags);
- if (IS_ERR(last))
- return last;
-
- skb_shinfo(skb)->frag_list = NULL;
- last->next = skb->next;
- skb->next = frags;
- } else {
- last = skb;
- }
-
- skbp = &skb->next;
- } while ((skb = skb->next));
-
- return last;
-}
-
-/* Takes a linked list of skbs, which potentially contain frag_list
- * (whose members in turn potentially contain frag_lists, etc.) and
- * converts them into a single linear linked list.
- */
-static int straighten_frag_list(struct sk_buff **skbp)
-{
- struct sk_buff *err_skb;
-
- err_skb = normalize_frag_list(NULL, skbp);
- if (IS_ERR(err_skb))
- return PTR_ERR(err_skb);
-
- return 0;
-}
-
-static int coalesce_skb(struct sk_buff **headp)
-{
- struct sk_buff *frag, *head, *prev;
- int err;
-
- err = straighten_frag_list(headp);
- if (unlikely(err))
- return err;
- head = *headp;
-
- /* Coalesce frag list. */
- prev = head;
- for (frag = head->next; frag; frag = frag->next) {
- bool headstolen;
- int delta;
-
- if (unlikely(skb_unclone(prev, GFP_ATOMIC)))
- return -ENOMEM;
-
- if (!skb_try_coalesce(prev, frag, &headstolen, &delta)) {
- prev = frag;
- continue;
- }
-
- prev->next = frag->next;
- frag->len = 0;
- frag->data_len = 0;
- frag->truesize -= delta;
- kfree_skb_partial(frag, headstolen);
- frag = prev;
- }
-
- if (!head->next)
- return 0;
-
- for (frag = head->next; frag; frag = frag->next) {
- head->len += frag->len;
- head->data_len += frag->len;
- head->truesize += frag->truesize;
- }
-
- skb_shinfo(head)->frag_list = head->next;
- head->next = NULL;
- return 0;
-}
-#else
-static int coalesce_skb(struct sk_buff **headp)
-{
- struct sk_buff *frag, *head = *headp, *next;
- int delta = FRAG_CB(head)->first.tot_len - skb_headlen(head);
- int err;
-
- if (unlikely(!head->next))
- return 0;
-
- err = pskb_expand_head(head, 0, delta, GFP_ATOMIC);
- if (unlikely(err))
- return err;
-
- if (unlikely(!__pskb_pull_tail(head, head->data_len)))
- BUG();
-
- for (frag = head->next; frag; frag = next) {
- skb_copy_bits(frag, 0, skb_put(head, frag->len), frag->len);
- next = frag->next;
- kfree_skb(frag);
- }
-
- head->next = NULL;
- head->truesize = SKB_TRUESIZE(head->len);
- return 0;
-}
-#endif
-
-static int __try_to_segment(struct sk_buff *skb, bool csum_partial,
- bool ipv4, bool tcp, int l4_offset)
-{
- if (can_segment(skb, ipv4, tcp, csum_partial))
- return skb_list_segment(skb, ipv4, l4_offset);
- else
- return skb_linearize(skb);
-}
-
-static int try_to_segment(struct sk_buff *skb)
-{
- struct stthdr *stth = stt_hdr(skb);
- bool csum_partial = !!(stth->flags & STT_CSUM_PARTIAL);
- bool ipv4 = !!(stth->flags & STT_PROTO_IPV4);
- bool tcp = !!(stth->flags & STT_PROTO_TCP);
- int l4_offset = stth->l4_offset;
-
- return __try_to_segment(skb, csum_partial, ipv4, tcp, l4_offset);
-}
-
-static int segment_skb(struct sk_buff **headp, bool csum_partial,
- bool ipv4, bool tcp, int l4_offset)
-{
-#ifndef SKIP_ZERO_COPY
- int err;
-
- err = coalesce_skb(headp);
- if (err)
- return err;
-#endif
-
- if (skb_shinfo(*headp)->frag_list)
- return __try_to_segment(*headp, csum_partial,
- ipv4, tcp, l4_offset);
- return 0;
-}
-
-static int __push_stt_header(struct sk_buff *skb, __be64 tun_id,
- __be16 s_port, __be16 d_port,
- __be32 saddr, __be32 dst,
- __be16 l3_proto, u8 l4_proto,
- int dst_mtu)
-{
- int data_len = skb->len + sizeof(struct stthdr) + STT_ETH_PAD;
- unsigned short encap_mss;
- struct tcphdr *tcph;
- struct stthdr *stth;
-
- skb_push(skb, STT_HEADER_LEN);
- skb_reset_transport_header(skb);
- tcph = tcp_hdr(skb);
- memset(tcph, 0, STT_HEADER_LEN);
- stth = stt_hdr(skb);
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- stth->flags |= STT_CSUM_PARTIAL;
-
- stth->l4_offset = skb->csum_start -
- (skb_headroom(skb) +
- STT_HEADER_LEN);
-
- if (l3_proto == htons(ETH_P_IP))
- stth->flags |= STT_PROTO_IPV4;
-
- if (l4_proto == IPPROTO_TCP)
- stth->flags |= STT_PROTO_TCP;
-
- stth->mss = htons(skb_shinfo(skb)->gso_size);
- } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- stth->flags |= STT_CSUM_VERIFIED;
- }
-
- stth->vlan_tci = htons(skb->vlan_tci);
- skb->vlan_tci = 0;
- put_unaligned(tun_id, &stth->key);
-
- tcph->source = s_port;
- tcph->dest = d_port;
- tcph->doff = sizeof(struct tcphdr) / 4;
- tcph->ack = 1;
- tcph->psh = 1;
- tcph->window = htons(USHRT_MAX);
- tcph->seq = htonl(data_len << STT_SEQ_LEN_SHIFT);
- tcph->ack_seq = ack_seq();
- tcph->check = ~tcp_v4_check(skb->len, saddr, dst, 0);
-
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct tcphdr, check);
- skb->ip_summed = CHECKSUM_PARTIAL;
-
- encap_mss = dst_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
- if (data_len > encap_mss) {
- if (unlikely(skb_unclone(skb, GFP_ATOMIC)))
- return -EINVAL;
-
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
- skb_shinfo(skb)->gso_size = encap_mss;
- skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(data_len, encap_mss);
- } else {
- if (unlikely(clear_gso(skb)))
- return -EINVAL;
- }
- return 0;
-}
-
-static struct sk_buff *push_stt_header(struct sk_buff *head, __be64 tun_id,
- __be16 s_port, __be16 d_port,
- __be32 saddr, __be32 dst,
- __be16 l3_proto, u8 l4_proto,
- int dst_mtu)
-{
- struct sk_buff *skb;
-
- if (skb_shinfo(head)->frag_list) {
- bool ipv4 = (l3_proto == htons(ETH_P_IP));
- bool tcp = (l4_proto == IPPROTO_TCP);
- bool csum_partial = (head->ip_summed == CHECKSUM_PARTIAL);
- int l4_offset = skb_transport_offset(head);
-
- /* Need to call skb_orphan() to report currect true-size.
- * calling skb_orphan() in this layer is odd but SKB with
- * frag-list should not be associated with any socket, so
- * skb-orphan should be no-op. */
- skb_orphan(head);
- if (unlikely(segment_skb(&head, csum_partial,
- ipv4, tcp, l4_offset)))
- goto error;
- }
-
- for (skb = head; skb; skb = skb->next) {
- if (__push_stt_header(skb, tun_id, s_port, d_port, saddr, dst,
- l3_proto, l4_proto, dst_mtu))
- goto error;
- }
-
- return head;
-error:
- kfree_skb_list(head);
- return NULL;
-}
-
-static int stt_can_offload(struct sk_buff *skb, __be16 l3_proto, u8 l4_proto)
-{
- if (skb_is_gso(skb) && skb->ip_summed != CHECKSUM_PARTIAL) {
- int csum_offset;
- __sum16 *csum;
- int len;
-
- if (l4_proto == IPPROTO_TCP)
- csum_offset = offsetof(struct tcphdr, check);
- else if (l4_proto == IPPROTO_UDP)
- csum_offset = offsetof(struct udphdr, check);
- else
- return 0;
-
- len = skb->len - skb_transport_offset(skb);
- csum = (__sum16 *)(skb_transport_header(skb) + csum_offset);
-
- if (unlikely(!pskb_may_pull(skb, skb_transport_offset(skb) +
- csum_offset + sizeof(*csum))))
- return -EINVAL;
-
- if (l3_proto == htons(ETH_P_IP)) {
- struct iphdr *iph = ip_hdr(skb);
-
- *csum = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
- len, l4_proto, 0);
- } else if (l3_proto == htons(ETH_P_IPV6)) {
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
-
- *csum = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
- len, l4_proto, 0);
- } else {
- return 0;
- }
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = csum_offset;
- skb->ip_summed = CHECKSUM_PARTIAL;
- }
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- /* Assume receiver can only offload TCP/UDP over IPv4/6,
- * and require 802.1Q VLANs to be accelerated.
- */
- if (l3_proto != htons(ETH_P_IP) &&
- l3_proto != htons(ETH_P_IPV6))
- return 0;
-
- if (l4_proto != IPPROTO_TCP && l4_proto != IPPROTO_UDP)
- return 0;
-
- /* L4 offset must fit in a 1-byte field. */
- if (skb->csum_start - skb_headroom(skb) > 255)
- return 0;
-
- if (skb_shinfo(skb)->gso_type & ~SUPPORTED_GSO_TYPES)
- return 0;
- }
- /* Total size of encapsulated packet must fit in 16 bits. */
- if (skb->len + STT_HEADER_LEN + sizeof(struct iphdr) > 65535)
- return 0;
-
- if (skb_vlan_tag_present(skb) && skb->vlan_proto != htons(ETH_P_8021Q))
- return 0;
- return 1;
-}
-
-static bool need_linearize(const struct sk_buff *skb)
-{
- struct skb_shared_info *shinfo = skb_shinfo(skb);
- int i;
-
- if (unlikely(shinfo->frag_list))
- return true;
-
- /* Generally speaking we should linearize if there are paged frags.
- * However, if all of the refcounts are 1 we know nobody else can
- * change them from underneath us and we can skip the linearization.
- */
- for (i = 0; i < shinfo->nr_frags; i++)
- if (unlikely(page_count(skb_frag_page(&shinfo->frags[i])) > 1))
- return true;
-
- return false;
-}
-
-static struct sk_buff *handle_offloads(struct sk_buff *skb, int min_headroom)
-{
- int err;
-
- if (skb_vlan_tag_present(skb) && skb->vlan_proto != htons(ETH_P_8021Q)) {
-
- min_headroom += VLAN_HLEN;
- if (skb_headroom(skb) < min_headroom) {
- int head_delta = SKB_DATA_ALIGN(min_headroom -
- skb_headroom(skb) + 16);
-
- err = pskb_expand_head(skb, max_t(int, head_delta, 0),
- 0, GFP_ATOMIC);
- if (unlikely(err))
- goto error;
- }
-
- skb = __vlan_hwaccel_push_inside(skb);
- if (!skb) {
- err = -ENOMEM;
- goto error;
- }
- }
-
- if (skb_is_gso(skb)) {
- struct sk_buff *nskb;
- char cb[sizeof(skb->cb)];
-
- memcpy(cb, skb->cb, sizeof(cb));
-
- nskb = __skb_gso_segment(skb, 0, false);
- if (IS_ERR(nskb)) {
- err = PTR_ERR(nskb);
- goto error;
- }
-
- consume_skb(skb);
- skb = nskb;
- while (nskb) {
- memcpy(nskb->cb, cb, sizeof(cb));
- nskb = nskb->next;
- }
- } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
- /* Pages aren't locked and could change at any time.
- * If this happens after we compute the checksum, the
- * checksum will be wrong. We linearize now to avoid
- * this problem.
- */
- if (unlikely(need_linearize(skb))) {
- err = __skb_linearize(skb);
- if (unlikely(err))
- goto error;
- }
-
- err = skb_checksum_help(skb);
- if (unlikely(err))
- goto error;
- }
- skb->ip_summed = CHECKSUM_NONE;
-
- return skb;
-error:
- kfree_skb(skb);
- return ERR_PTR(err);
-}
-
-static void skb_list_xmit(struct rtable *rt, struct sk_buff *skb, __be32 src,
- __be32 dst, __u8 tos, __u8 ttl, __be16 df)
-{
- while (skb) {
- struct sk_buff *next = skb->next;
-
- if (next)
- dst_clone(&rt->dst);
-
- skb->next = NULL;
- iptunnel_xmit(NULL, rt, skb, src, dst, IPPROTO_TCP,
- tos, ttl, df, false);
-
- skb = next;
- }
-}
-
-static u8 parse_ipv6_l4_proto(struct sk_buff *skb)
-{
- unsigned int nh_ofs = skb_network_offset(skb);
- int payload_ofs;
- struct ipv6hdr *nh;
- uint8_t nexthdr;
- __be16 frag_off;
-
- if (unlikely(!pskb_may_pull(skb, nh_ofs + sizeof(struct ipv6hdr))))
- return 0;
-
- nh = ipv6_hdr(skb);
- nexthdr = nh->nexthdr;
- payload_ofs = (u8 *)(nh + 1) - skb->data;
-
- payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off);
- if (unlikely(payload_ofs < 0))
- return 0;
-
- return nexthdr;
-}
-
-static u8 skb_get_l4_proto(struct sk_buff *skb, __be16 l3_proto)
-{
- if (l3_proto == htons(ETH_P_IP)) {
- unsigned int nh_ofs = skb_network_offset(skb);
-
- if (unlikely(!pskb_may_pull(skb, nh_ofs + sizeof(struct iphdr))))
- return 0;
-
- return ip_hdr(skb)->protocol;
- } else if (l3_proto == htons(ETH_P_IPV6)) {
- return parse_ipv6_l4_proto(skb);
- }
- return 0;
-}
-
-static int stt_xmit_skb(struct sk_buff *skb, struct rtable *rt,
- __be32 src, __be32 dst, __u8 tos,
- __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
- __be64 tun_id)
-{
- struct ethhdr *eh = eth_hdr(skb);
- int ret = 0, min_headroom;
- __be16 inner_l3_proto;
- u8 inner_l4_proto;
-
- inner_l3_proto = eh->h_proto;
- inner_l4_proto = skb_get_l4_proto(skb, inner_l3_proto);
-
- min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
- + STT_HEADER_LEN + sizeof(struct iphdr);
-
- if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
- int head_delta = SKB_DATA_ALIGN(min_headroom -
- skb_headroom(skb) +
- 16);
-
- ret = pskb_expand_head(skb, max_t(int, head_delta, 0),
- 0, GFP_ATOMIC);
- if (unlikely(ret))
- goto err_free_rt;
- }
-
- ret = stt_can_offload(skb, inner_l3_proto, inner_l4_proto);
- if (ret < 0)
- goto err_free_rt;
- if (!ret) {
- skb = handle_offloads(skb, min_headroom);
- if (IS_ERR(skb)) {
- ret = PTR_ERR(skb);
- skb = NULL;
- goto err_free_rt;
- }
- }
-
- ret = 0;
- while (skb) {
- struct sk_buff *next_skb = skb->next;
-
- skb->next = NULL;
-
- if (next_skb)
- dst_clone(&rt->dst);
-
- /* Push STT and TCP header. */
- skb = push_stt_header(skb, tun_id, src_port, dst_port, src,
- dst, inner_l3_proto, inner_l4_proto,
- dst_mtu(&rt->dst));
- if (unlikely(!skb)) {
- ip_rt_put(rt);
- goto next;
- }
-
- /* Push IP header. */
- skb_list_xmit(rt, skb, src, dst, tos, ttl, df);
-
-next:
- skb = next_skb;
- }
-
- return 0;
-
-err_free_rt:
- ip_rt_put(rt);
- kfree_skb(skb);
- return ret;
-}
-
-static struct rtable *stt_get_rt(struct sk_buff *skb,
- struct net_device *dev,
- struct flowi4 *fl,
- const struct ip_tunnel_key *key,
- __be16 dport, __be16 sport)
-{
- struct net *net = dev_net(dev);
-
- /* Route lookup */
- memset(fl, 0, sizeof(*fl));
- fl->daddr = key->u.ipv4.dst;
- fl->saddr = key->u.ipv4.src;
- fl->flowi4_tos = RT_TOS(key->tos);
- fl->flowi4_mark = skb->mark;
- fl->flowi4_proto = IPPROTO_TCP;
- fl->fl4_dport = dport;
- fl->fl4_sport = sport;
-
- return ip_route_output_key(net, fl);
-}
-
-netdev_tx_t ovs_stt_xmit(struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- struct stt_dev *stt_dev = netdev_priv(dev);
- struct net *net = stt_dev->net;
- __be16 dport = stt_dev->dst_port;
- struct ip_tunnel_key *tun_key;
- struct ip_tunnel_info *tun_info;
- struct rtable *rt;
- struct flowi4 fl;
- __be16 sport;
- __be16 df;
- int err;
-
- tun_info = skb_tunnel_info(skb);
- if (unlikely(!tun_info)) {
- err = -EINVAL;
- goto error;
- }
-
- tun_key = &tun_info->key;
-
- sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
- rt = stt_get_rt(skb, dev, &fl, tun_key, dport, sport);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- goto error;
- }
-
- df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
- skb->ignore_df = 1;
-
- stt_xmit_skb(skb, rt, fl.saddr, tun_key->u.ipv4.dst,
- tun_key->tos, tun_key->ttl,
- df, sport, dport, tun_key->tun_id);
- return NETDEV_TX_OK;
-error:
- kfree_skb(skb);
- dev->stats.tx_errors++;
- return err;
-}
-EXPORT_SYMBOL(ovs_stt_xmit);
-
-static void free_frag(struct stt_percpu *stt_percpu,
- struct pkt_frag *frag)
-{
- stt_percpu->frag_mem_used -= FRAG_CB(frag->skbs)->first.mem_used;
- kfree_skb_list(frag->skbs);
- list_del(&frag->lru_node);
- frag->skbs = NULL;
-}
-
-static void evict_frags(struct stt_percpu *stt_percpu)
-{
- while (!list_empty(&stt_percpu->frag_lru) &&
- stt_percpu->frag_mem_used > REASM_LO_THRESH) {
- struct pkt_frag *frag;
-
- frag = list_first_entry(&stt_percpu->frag_lru,
- struct pkt_frag,
- lru_node);
- free_frag(stt_percpu, frag);
- }
-}
-
-static bool pkt_key_match(struct net *net,
- const struct pkt_frag *a, const struct pkt_key *b)
-{
- return a->key.saddr == b->saddr && a->key.daddr == b->daddr &&
- a->key.pkt_seq == b->pkt_seq && a->key.mark == b->mark &&
- net_eq(dev_net(a->skbs->dev), net);
-}
-
-static u32 pkt_key_hash(const struct net *net, const struct pkt_key *key)
-{
- u32 initval = frag_hash_seed ^ (u32)(unsigned long)net ^ key->mark;
-
- return jhash_3words((__force u32)key->saddr, (__force u32)key->daddr,
- (__force u32)key->pkt_seq, initval);
-}
-
-static struct pkt_frag *lookup_frag(struct net *net,
- struct stt_percpu *stt_percpu,
- const struct pkt_key *key, u32 hash)
-{
- struct pkt_frag *frag, *victim_frag = NULL;
- int i;
-
- for (i = 0; i < FRAG_HASH_SEGS; i++) {
- frag = &stt_percpu->frag_hash[hash & (FRAG_HASH_ENTRIES - 1)];
-
- if (frag->skbs &&
- time_before(jiffies, frag->timestamp + FRAG_EXP_TIME) &&
- pkt_key_match(net, frag, key))
- return frag;
-
- if (!victim_frag ||
- (victim_frag->skbs &&
- (!frag->skbs ||
- time_before(frag->timestamp, victim_frag->timestamp))))
- victim_frag = frag;
-
- hash >>= FRAG_HASH_SHIFT;
- }
-
- if (victim_frag->skbs)
- free_frag(stt_percpu, victim_frag);
-
- return victim_frag;
-}
-
-#ifdef SKIP_ZERO_COPY
-static int __copy_skb(struct sk_buff *to, struct sk_buff *from,
- int *delta, bool *headstolen)
-{
- int err;
-
- if (unlikely(to->next))
- return -EINVAL;
-
- if (unlikely(FRAG_CB(to)->offset))
- return -EINVAL;
-
- if (unlikely(skb_unclone(to, GFP_ATOMIC)))
- return -ENOMEM;
-
- if (skb_try_coalesce(to, from, headstolen, delta))
- return 0;
-
- *headstolen = false;
- err = pskb_expand_head(to, 0, to->data_len + from->len, GFP_ATOMIC);
- if (unlikely(err))
- return err;
-
- if (unlikely(!__pskb_pull_tail(to, to->data_len)))
- BUG();
-
- skb_copy_bits(from, 0, skb_put(to, from->len), from->len);
-
- *delta = from->len;
- to->truesize += from->len;
- return 0;
-}
-#else
-static int __copy_skb(struct sk_buff *to, struct sk_buff *from,
- int *delta, bool *headstolen)
-{
- *headstolen = false;
- return -EINVAL;
-}
-#endif
-
-static struct sk_buff *reassemble(struct sk_buff *skb)
-{
- struct iphdr *iph = ip_hdr(skb);
- struct tcphdr *tcph = tcp_hdr(skb);
- u32 seq = ntohl(tcph->seq);
- struct stt_percpu *stt_percpu;
- struct sk_buff *last_skb, *copied_skb = NULL;
- struct pkt_frag *frag;
- struct pkt_key key;
- int tot_len, delta = skb->truesize;
- bool headstolen;
- u32 hash;
-
- tot_len = seq >> STT_SEQ_LEN_SHIFT;
- FRAG_CB(skb)->offset = seq & STT_SEQ_OFFSET_MASK;
-
- if (unlikely(skb->len == 0))
- goto out_free;
-
- if (unlikely(FRAG_CB(skb)->offset + skb->len > tot_len))
- goto out_free;
-
- if (tot_len == skb->len)
- goto out;
-
- key.saddr = iph->saddr;
- key.daddr = iph->daddr;
- key.pkt_seq = tcph->ack_seq;
- key.mark = skb->mark;
- hash = pkt_key_hash(dev_net(skb->dev), &key);
-
- stt_percpu = per_cpu_ptr(stt_percpu_data, smp_processor_id());
-
- spin_lock(&stt_percpu->lock);
-
- if (unlikely(stt_percpu->frag_mem_used + skb->truesize > REASM_HI_THRESH))
- evict_frags(stt_percpu);
-
- frag = lookup_frag(dev_net(skb->dev), stt_percpu, &key, hash);
- if (!frag->skbs) {
- frag->skbs = skb;
- frag->key = key;
- frag->timestamp = jiffies;
- FRAG_CB(skb)->first.last_skb = skb;
- FRAG_CB(skb)->first.mem_used = skb->truesize;
- FRAG_CB(skb)->first.tot_len = tot_len;
- FRAG_CB(skb)->first.rcvd_len = skb->len;
- FRAG_CB(skb)->first.set_ecn_ce = false;
- list_add_tail(&frag->lru_node, &stt_percpu->frag_lru);
- stt_percpu->frag_mem_used += skb->truesize;
- skb = NULL;
- goto unlock;
- }
-
- /* Optimize for the common case where fragments are received in-order
- * and not overlapping.
- */
- last_skb = FRAG_CB(frag->skbs)->first.last_skb;
- if (likely(FRAG_CB(last_skb)->offset + last_skb->len ==
- FRAG_CB(skb)->offset)) {
-
- if (!__copy_skb(frag->skbs, skb, &delta, &headstolen)) {
- copied_skb = skb;
- } else {
- last_skb->next = skb;
- FRAG_CB(frag->skbs)->first.last_skb = skb;
- }
- } else {
- struct sk_buff *prev = NULL, *next;
-
- for (next = frag->skbs; next; next = next->next) {
- if (FRAG_CB(next)->offset >= FRAG_CB(skb)->offset)
- break;
- prev = next;
- }
-
- /* Overlapping fragments aren't allowed. We shouldn't start
- * before the end of the previous fragment.
- */
- if (prev &&
- FRAG_CB(prev)->offset + prev->len > FRAG_CB(skb)->offset)
- goto unlock_free;
-
- /* We also shouldn't end after the beginning of the next
- * fragment.
- */
- if (next &&
- FRAG_CB(skb)->offset + skb->len > FRAG_CB(next)->offset)
- goto unlock_free;
-
- if (prev) {
- prev->next = skb;
- } else {
- FRAG_CB(skb)->first = FRAG_CB(frag->skbs)->first;
- frag->skbs = skb;
- }
-
- if (next)
- skb->next = next;
- else
- FRAG_CB(frag->skbs)->first.last_skb = skb;
- }
-
- FRAG_CB(frag->skbs)->first.set_ecn_ce |= INET_ECN_is_ce(iph->tos);
- FRAG_CB(frag->skbs)->first.rcvd_len += skb->len;
- stt_percpu->frag_mem_used += delta;
- FRAG_CB(frag->skbs)->first.mem_used += delta;
-
- if (FRAG_CB(frag->skbs)->first.tot_len ==
- FRAG_CB(frag->skbs)->first.rcvd_len) {
- struct sk_buff *frag_head = frag->skbs;
-
- frag_head->tstamp = skb->tstamp;
- if (FRAG_CB(frag_head)->first.set_ecn_ce)
- INET_ECN_set_ce(frag_head);
-
- list_del(&frag->lru_node);
- stt_percpu->frag_mem_used -= FRAG_CB(frag_head)->first.mem_used;
- frag->skbs = NULL;
- skb = frag_head;
- } else {
- list_move_tail(&frag->lru_node, &stt_percpu->frag_lru);
- skb = NULL;
- }
-
- if (copied_skb)
- kfree_skb_partial(copied_skb, headstolen);
- goto unlock;
-
-unlock_free:
- kfree_skb(skb);
- skb = NULL;
-unlock:
- spin_unlock(&stt_percpu->lock);
- return skb;
-out_free:
- kfree_skb(skb);
- skb = NULL;
-out:
- return skb;
-}
-
-static bool validate_checksum(struct sk_buff *skb)
-{
- struct iphdr *iph = ip_hdr(skb);
-
- if (skb_csum_unnecessary(skb))
- return true;
-
- if (skb->ip_summed == CHECKSUM_COMPLETE &&
- !tcp_v4_check(skb->len, iph->saddr, iph->daddr, skb->csum))
- return true;
-
- skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, skb->len,
- IPPROTO_TCP, 0);
-
- return __skb_checksum_complete(skb) == 0;
-}
-
-static bool set_offloads(struct sk_buff *skb)
-{
- struct stthdr *stth = stt_hdr(skb);
- unsigned int gso_type = 0;
- int l3_header_size;
- int l4_header_size;
- u16 csum_offset;
- u8 proto_type;
-
- if (stth->vlan_tci)
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
- ntohs(stth->vlan_tci));
-
- if (!(stth->flags & STT_CSUM_PARTIAL)) {
- if (stth->flags & STT_CSUM_VERIFIED)
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- else
- skb->ip_summed = CHECKSUM_NONE;
-
- return clear_gso(skb) == 0;
- }
-
- proto_type = stth->flags & STT_PROTO_TYPES;
-
- switch (proto_type) {
- case (STT_PROTO_IPV4 | STT_PROTO_TCP):
- /* TCP/IPv4 */
- csum_offset = offsetof(struct tcphdr, check);
- gso_type = SKB_GSO_TCPV4;
- l3_header_size = sizeof(struct iphdr);
- l4_header_size = sizeof(struct tcphdr);
- skb->protocol = htons(ETH_P_IP);
- break;
- case STT_PROTO_TCP:
- /* TCP/IPv6 */
- csum_offset = offsetof(struct tcphdr, check);
- gso_type = SKB_GSO_TCPV6;
- l3_header_size = sizeof(struct ipv6hdr);
- l4_header_size = sizeof(struct tcphdr);
- skb->protocol = htons(ETH_P_IPV6);
- break;
- case STT_PROTO_IPV4:
- /* UDP/IPv4 */
- csum_offset = offsetof(struct udphdr, check);
-#ifdef HAVE_SKB_GSO_UDP
- gso_type = SKB_GSO_UDP;
-#endif
- l3_header_size = sizeof(struct iphdr);
- l4_header_size = sizeof(struct udphdr);
- skb->protocol = htons(ETH_P_IP);
- break;
- default:
- /* UDP/IPv6 */
- csum_offset = offsetof(struct udphdr, check);
-#ifdef HAVE_SKB_GSO_UDP
- gso_type = SKB_GSO_UDP;
-#endif
- l3_header_size = sizeof(struct ipv6hdr);
- l4_header_size = sizeof(struct udphdr);
- skb->protocol = htons(ETH_P_IPV6);
- }
-
- if (unlikely(stth->l4_offset < ETH_HLEN + l3_header_size))
- return false;
-
- if (unlikely(!pskb_may_pull(skb, stth->l4_offset + l4_header_size)))
- return false;
-
- stth = stt_hdr(skb);
-
- skb->csum_start = skb_headroom(skb) + stth->l4_offset;
- skb->csum_offset = csum_offset;
- skb->ip_summed = CHECKSUM_PARTIAL;
-
- if (stth->mss) {
- if (unlikely(skb_unclone(skb, GFP_ATOMIC)))
- return false;
-
- skb_shinfo(skb)->gso_type = gso_type | SKB_GSO_DODGY;
- skb_shinfo(skb)->gso_size = ntohs(stth->mss);
- skb_shinfo(skb)->gso_segs = 0;
- } else {
- if (unlikely(clear_gso(skb)))
- return false;
- }
-
- return true;
-}
-
-static void rcv_list(struct net_device *dev, struct sk_buff *skb,
- struct metadata_dst *tun_dst)
-{
- struct sk_buff *next;
-
- do {
- next = skb->next;
- skb->next = NULL;
- if (next) {
- ovs_dst_hold((struct dst_entry *)tun_dst);
- ovs_skb_dst_set(next, (struct dst_entry *)tun_dst);
- }
- ovs_ip_tunnel_rcv(dev, skb, tun_dst);
- } while ((skb = next));
-}
-
-#ifndef USE_UPSTREAM_TUNNEL
-static int __stt_rcv(struct stt_dev *stt_dev, struct sk_buff *skb)
-{
- struct metadata_dst tun_dst;
-
- ovs_ip_tun_rx_dst(&tun_dst, skb, TUNNEL_KEY | TUNNEL_CSUM,
- get_unaligned(&stt_hdr(skb)->key), 0);
- tun_dst.u.tun_info.key.tp_src = tcp_hdr(skb)->source;
- tun_dst.u.tun_info.key.tp_dst = tcp_hdr(skb)->dest;
-
- rcv_list(stt_dev->dev, skb, &tun_dst);
- return 0;
-}
-#else
-static int __stt_rcv(struct stt_dev *stt_dev, struct sk_buff *skb)
-{
- struct metadata_dst *tun_dst;
- __be16 flags;
- __be64 tun_id;
-
- flags = TUNNEL_KEY | TUNNEL_CSUM;
- tun_id = get_unaligned(&stt_hdr(skb)->key);
- tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
- if (!tun_dst)
- return -ENOMEM;
- tun_dst->u.tun_info.key.tp_src = tcp_hdr(skb)->source;
- tun_dst->u.tun_info.key.tp_dst = tcp_hdr(skb)->dest;
-
- rcv_list(stt_dev->dev, skb, tun_dst);
- return 0;
-}
-#endif
-
-static void stt_rcv(struct stt_dev *stt_dev, struct sk_buff *skb)
-{
- int err;
-
- if (unlikely(!validate_checksum(skb)))
- goto drop;
-
- __skb_pull(skb, sizeof(struct tcphdr));
- skb = reassemble(skb);
- if (!skb)
- return;
-
- if (skb->next && coalesce_skb(&skb))
- goto drop;
-
- err = iptunnel_pull_header(skb,
- sizeof(struct stthdr) + STT_ETH_PAD,
- htons(ETH_P_TEB),
- !net_eq(stt_dev->net, dev_net(stt_dev->dev)));
- if (unlikely(err))
- goto drop;
-
- if (unlikely(stt_hdr(skb)->version != 0))
- goto drop;
-
- if (unlikely(!set_offloads(skb)))
- goto drop;
-
- if (skb_shinfo(skb)->frag_list && try_to_segment(skb))
- goto drop;
-
- err = __stt_rcv(stt_dev, skb);
- if (err)
- goto drop;
- return;
-drop:
- /* Consume bad packet */
- kfree_skb_list(skb);
- stt_dev->dev->stats.rx_errors++;
-}
-
-static void tcp_sock_release(struct socket *sock)
-{
- kernel_sock_shutdown(sock, SHUT_RDWR);
- sock_release(sock);
-}
-
-static int tcp_sock_create4(struct net *net, __be16 port,
- struct socket **sockp)
-{
- struct sockaddr_in tcp_addr;
- struct socket *sock = NULL;
- int err;
-
- err = sock_create_kern(net, AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
- if (err < 0)
- goto error;
-
- memset(&tcp_addr, 0, sizeof(tcp_addr));
- tcp_addr.sin_family = AF_INET;
- tcp_addr.sin_addr.s_addr = htonl(INADDR_ANY);
- tcp_addr.sin_port = port;
- err = kernel_bind(sock, (struct sockaddr *)&tcp_addr,
- sizeof(tcp_addr));
- if (err < 0)
- goto error;
-
- *sockp = sock;
- return 0;
-
-error:
- if (sock)
- tcp_sock_release(sock);
- *sockp = NULL;
- return err;
-}
-
-static void schedule_clean_percpu(void)
-{
- schedule_delayed_work(&clean_percpu_wq, CLEAN_PERCPU_INTERVAL);
-}
-
-static void clean_percpu(struct work_struct *work)
-{
- int i;
-
- for_each_possible_cpu(i) {
- struct stt_percpu *stt_percpu = per_cpu_ptr(stt_percpu_data, i);
- int j;
-
- for (j = 0; j < FRAG_HASH_ENTRIES; j++) {
- struct pkt_frag *frag;
-
- frag = &stt_percpu->frag_hash[j];
- if (!frag->skbs ||
- time_before(jiffies, frag->timestamp + FRAG_EXP_TIME))
- continue;
-
- spin_lock_bh(&stt_percpu->lock);
-
- if (frag->skbs &&
- time_after(jiffies, frag->timestamp + FRAG_EXP_TIME))
- free_frag(stt_percpu, frag);
-
- spin_unlock_bh(&stt_percpu->lock);
- }
- }
- schedule_clean_percpu();
-}
-
-#ifdef HAVE_NF_HOOKFN_ARG_OPS
-#define FIRST_PARAM const struct nf_hook_ops *ops
-#else
-#ifdef HAVE_NF_HOOKFN_ARG_PRIV
-#define FIRST_PARAM void *priv
-#else
-#define FIRST_PARAM unsigned int hooknum
-#endif
-#endif
-
-#ifdef HAVE_NF_HOOK_STATE
-#if RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(7,0) && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8,0)
-/* RHEL nfhook hacks. */
-#ifndef __GENKSYMS__
-#define LAST_PARAM const struct net_device *in, const struct net_device *out, \
- const struct nf_hook_state *state
-#else
-#define LAST_PARAM const struct net_device *in, const struct net_device *out, \
- int (*okfn)(struct sk_buff *)
-#endif
-#else
-#define LAST_PARAM const struct nf_hook_state *state
-#endif
-#else
-#define LAST_PARAM const struct net_device *in, const struct net_device *out, \
- int (*okfn)(struct sk_buff *)
-#endif
-
-static unsigned int nf_ip_hook(FIRST_PARAM, struct sk_buff *skb, LAST_PARAM)
-{
- struct stt_dev *stt_dev;
- int ip_hdr_len;
-
- if (ip_hdr(skb)->protocol != IPPROTO_TCP)
- return NF_ACCEPT;
-
- ip_hdr_len = ip_hdrlen(skb);
- if (unlikely(!pskb_may_pull(skb, ip_hdr_len + sizeof(struct tcphdr))))
- return NF_ACCEPT;
-
- skb_set_transport_header(skb, ip_hdr_len);
-
- stt_dev = stt_find_up_dev(dev_net(skb->dev), tcp_hdr(skb)->dest);
- if (!stt_dev)
- return NF_ACCEPT;
-
- __skb_pull(skb, ip_hdr_len);
- stt_rcv(stt_dev, skb);
- return NF_STOLEN;
-}
-
-static struct nf_hook_ops nf_hook_ops __read_mostly = {
- .hook = nf_ip_hook,
-#ifdef HAVE_NF_HOOKS_OPS_OWNER
- .owner = THIS_MODULE,
-#endif
- .pf = NFPROTO_IPV4,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = INT_MAX,
-};
-
-static int stt_start(struct net *net)
-{
- struct stt_net *sn = net_generic(net, stt_net_id);
- int err;
- int i;
-
- if (n_tunnels) {
- n_tunnels++;
- return 0;
- }
- get_random_bytes(&frag_hash_seed, sizeof(u32));
-
- stt_percpu_data = alloc_percpu(struct stt_percpu);
- if (!stt_percpu_data) {
- err = -ENOMEM;
- goto error;
- }
-
- for_each_possible_cpu(i) {
- struct stt_percpu *stt_percpu = per_cpu_ptr(stt_percpu_data, i);
- struct pkt_frag *frag_hash;
-
- spin_lock_init(&stt_percpu->lock);
- INIT_LIST_HEAD(&stt_percpu->frag_lru);
- get_random_bytes(&per_cpu(pkt_seq_counter, i), sizeof(u32));
-
- frag_hash = kvmalloc_array(sizeof(struct pkt_frag),
- FRAG_HASH_ENTRIES,
- GFP_KERNEL | __GFP_ZERO);
- if (!frag_hash) {
- err = -ENOMEM;
- goto free_percpu;
- }
- stt_percpu->frag_hash = frag_hash;
- }
- schedule_clean_percpu();
- n_tunnels++;
-
- if (sn->n_tunnels) {
- sn->n_tunnels++;
- return 0;
- }
-#ifdef HAVE_NF_REGISTER_NET_HOOK
- /* On kernel which support per net nf-hook, nf_register_hook() takes
- * rtnl-lock, which results in dead lock in stt-dev-create. Therefore
- * use this new API.
- */
-
- if (sn->nf_hook_reg_done)
- goto out;
-
- err = nf_register_net_hook(net, &nf_hook_ops);
- if (!err)
- sn->nf_hook_reg_done = true;
-#else
- /* Register STT only on very first STT device addition. */
- if (!list_empty(&nf_hook_ops.list))
- goto out;
-
- err = nf_register_hook(&nf_hook_ops);
-#endif
- if (err)
- goto dec_n_tunnel;
-out:
- sn->n_tunnels++;
- return 0;
-
-dec_n_tunnel:
- n_tunnels--;
-free_percpu:
- for_each_possible_cpu(i) {
- struct stt_percpu *stt_percpu = per_cpu_ptr(stt_percpu_data, i);
-
- if (stt_percpu->frag_hash)
- kvfree(stt_percpu->frag_hash);
- }
-
- free_percpu(stt_percpu_data);
-
-error:
- return err;
-}
-
-static void stt_cleanup(struct net *net)
-{
- struct stt_net *sn = net_generic(net, stt_net_id);
- int i;
-
- sn->n_tunnels--;
- n_tunnels--;
- if (n_tunnels)
- return;
-
- cancel_delayed_work_sync(&clean_percpu_wq);
- for_each_possible_cpu(i) {
- struct stt_percpu *stt_percpu = per_cpu_ptr(stt_percpu_data, i);
- int j;
-
- for (j = 0; j < FRAG_HASH_ENTRIES; j++) {
- struct pkt_frag *frag;
-
- frag = &stt_percpu->frag_hash[j];
- kfree_skb_list(frag->skbs);
- }
-
- kvfree(stt_percpu->frag_hash);
- }
-
- free_percpu(stt_percpu_data);
-}
-
-static netdev_tx_t stt_dev_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-#ifdef USE_UPSTREAM_TUNNEL
- return ovs_stt_xmit(skb);
-#else
- /* Drop All packets coming from networking stack. OVS-CB is
- * not initialized for these packets.
- */
- dev_kfree_skb(skb);
- dev->stats.tx_dropped++;
- return NETDEV_TX_OK;
-#endif
-}
-
-/* Setup stats when device is created */
-static int stt_init(struct net_device *dev)
-{
- dev->tstats = (typeof(dev->tstats)) netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- return 0;
-}
-
-static void stt_uninit(struct net_device *dev)
-{
- free_percpu(dev->tstats);
-}
-
-static int stt_open(struct net_device *dev)
-{
- struct stt_dev *stt = netdev_priv(dev);
- struct net *net = stt->net;
- struct stt_net *sn = net_generic(net, stt_net_id);
- int err;
-
- err = stt_start(net);
- if (err)
- return err;
-
- err = tcp_sock_create4(net, stt->dst_port, &stt->sock);
- if (err)
- return err;
- list_add_rcu(&stt->up_next, &sn->stt_up_list);
- return 0;
-}
-
-static int stt_stop(struct net_device *dev)
-{
- struct stt_dev *stt_dev = netdev_priv(dev);
- struct net *net = stt_dev->net;
-
- list_del_rcu(&stt_dev->up_next);
- synchronize_net();
- tcp_sock_release(stt_dev->sock);
- stt_dev->sock = NULL;
- stt_cleanup(net);
- return 0;
-}
-
-static int __stt_change_mtu(struct net_device *dev, int new_mtu, bool strict)
-{
- int max_mtu = IP_MAX_MTU - STT_HEADER_LEN - sizeof(struct iphdr)
- - dev->hard_header_len;
-
- if (new_mtu < 68)
- return -EINVAL;
-
- if (new_mtu > max_mtu) {
- if (strict)
- return -EINVAL;
-
- new_mtu = max_mtu;
- }
-
- dev->mtu = new_mtu;
- return 0;
-}
-
-static int stt_change_mtu(struct net_device *dev, int new_mtu)
-{
- return __stt_change_mtu(dev, new_mtu, true);
-}
-
-int ovs_stt_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
-{
- struct ip_tunnel_info *info = skb_tunnel_info(skb);
- struct stt_dev *stt_dev = netdev_priv(dev);
- struct net *net = stt_dev->net;
- __be16 dport = stt_dev->dst_port;
- __be16 sport;
- struct flowi4 fl4;
- struct rtable *rt;
-
- if (ip_tunnel_info_af(info) != AF_INET)
- return -EINVAL;
-
- sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
- rt = stt_get_rt(skb, dev, &fl4, &info->key, dport, sport);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
-
- ip_rt_put(rt);
-
- info->key.u.ipv4.src = fl4.saddr;
- info->key.tp_src = sport;
- info->key.tp_dst = dport;
- return 0;
-}
-EXPORT_SYMBOL_GPL(ovs_stt_fill_metadata_dst);
-
-static const struct net_device_ops stt_netdev_ops = {
- .ndo_init = stt_init,
- .ndo_uninit = stt_uninit,
- .ndo_open = stt_open,
- .ndo_stop = stt_stop,
- .ndo_start_xmit = stt_dev_xmit,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
-#ifdef HAVE_RHEL7_MAX_MTU
- .ndo_size = sizeof(struct net_device_ops),
- .extended.ndo_change_mtu = stt_change_mtu,
-#else
- .ndo_change_mtu = stt_change_mtu,
-#endif
- .ndo_validate_addr = eth_validate_addr,
- .ndo_set_mac_address = eth_mac_addr,
-#ifdef USE_UPSTREAM_TUNNEL
-#ifdef HAVE_NDO_FILL_METADATA_DST
- .ndo_fill_metadata_dst = stt_fill_metadata_dst,
-#endif
-#endif
-};
-
-static void stt_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *drvinfo)
-{
- strlcpy(drvinfo->version, STT_NETDEV_VER, sizeof(drvinfo->version));
- strlcpy(drvinfo->driver, "stt", sizeof(drvinfo->driver));
-}
-
-static const struct ethtool_ops stt_ethtool_ops = {
- .get_drvinfo = stt_get_drvinfo,
- .get_link = ethtool_op_get_link,
-};
-
-/* Info for udev, that this is a virtual tunnel endpoint */
-static struct device_type stt_type = {
- .name = "stt",
-};
-
-/* Initialize the device structure. */
-static void stt_setup(struct net_device *dev)
-{
- ether_setup(dev);
-
- dev->netdev_ops = &stt_netdev_ops;
- dev->ethtool_ops = &stt_ethtool_ops;
-#ifndef HAVE_NEEDS_FREE_NETDEV
- dev->destructor = free_netdev;
-#else
- dev->needs_free_netdev = true;
-#endif
-
- SET_NETDEV_DEVTYPE(dev, &stt_type);
-
- dev->features |= NETIF_F_LLTX | NETIF_F_NETNS_LOCAL;
- dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
- dev->features |= NETIF_F_RXCSUM;
- dev->features |= NETIF_F_GSO_SOFTWARE;
-
- dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
-
-#ifdef USE_UPSTREAM_TUNNEL
- netif_keep_dst(dev);
-#endif
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
- eth_hw_addr_random(dev);
-}
-
-static const struct nla_policy stt_policy[IFLA_STT_MAX + 1] = {
- [IFLA_STT_PORT] = { .type = NLA_U16 },
-};
-
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
-static int stt_validate(struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack __always_unused *extack)
-#else
-static int stt_validate(struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- if (tb[IFLA_ADDRESS]) {
- if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
- return -EINVAL;
-
- if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
- return -EADDRNOTAVAIL;
- }
-
- return 0;
-}
-
-static struct stt_dev *find_dev(struct net *net, __be16 dst_port)
-{
- struct stt_net *sn = net_generic(net, stt_net_id);
- struct stt_dev *dev;
-
- list_for_each_entry(dev, &sn->stt_list, next) {
- if (dev->dst_port == dst_port)
- return dev;
- }
- return NULL;
-}
-
-static int stt_configure(struct net *net, struct net_device *dev,
- __be16 dst_port)
-{
- struct stt_net *sn = net_generic(net, stt_net_id);
- struct stt_dev *stt = netdev_priv(dev);
- int err;
-
- stt->net = net;
- stt->dev = dev;
-
- stt->dst_port = dst_port;
-
- if (find_dev(net, dst_port))
- return -EBUSY;
-
- err = __stt_change_mtu(dev, IP_MAX_MTU, false);
- if (err)
- return err;
-
- err = register_netdevice(dev);
- if (err)
- return err;
-
- list_add(&stt->next, &sn->stt_list);
- return 0;
-}
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
-static int stt_newlink(struct net *net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack __always_unused *extack)
-#else
-static int stt_newlink(struct net *net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- __be16 dst_port = htons(STT_DST_PORT);
-
- if (data[IFLA_STT_PORT])
- dst_port = nla_get_be16(data[IFLA_STT_PORT]);
-
- return stt_configure(net, dev, dst_port);
-}
-
-static void stt_dellink(struct net_device *dev, struct list_head *head)
-{
- struct stt_dev *stt = netdev_priv(dev);
-
- list_del(&stt->next);
- unregister_netdevice_queue(dev, head);
-}
-
-static size_t stt_get_size(const struct net_device *dev)
-{
- return nla_total_size(sizeof(__be32)); /* IFLA_STT_PORT */
-}
-
-static int stt_fill_info(struct sk_buff *skb, const struct net_device *dev)
-{
- struct stt_dev *stt = netdev_priv(dev);
-
- if (nla_put_be16(skb, IFLA_STT_PORT, stt->dst_port))
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-static struct rtnl_link_ops stt_link_ops __read_mostly = {
- .kind = "stt",
- .maxtype = IFLA_STT_MAX,
- .policy = stt_policy,
- .priv_size = sizeof(struct stt_dev),
- .setup = stt_setup,
- .validate = stt_validate,
- .newlink = stt_newlink,
- .dellink = stt_dellink,
- .get_size = stt_get_size,
- .fill_info = stt_fill_info,
-};
-
-struct net_device *ovs_stt_dev_create_fb(struct net *net, const char *name,
- u8 name_assign_type, u16 dst_port)
-{
- struct nlattr *tb[IFLA_MAX + 1];
- struct net_device *dev;
- int err;
-
- memset(tb, 0, sizeof(tb));
- dev = rtnl_create_link(net, (char *) name, name_assign_type,
- &stt_link_ops, tb);
- if (IS_ERR(dev))
- return dev;
-
- err = stt_configure(net, dev, htons(dst_port));
- if (err) {
- free_netdev(dev);
- return ERR_PTR(err);
- }
- return dev;
-}
-EXPORT_SYMBOL_GPL(ovs_stt_dev_create_fb);
-
-static int stt_init_net(struct net *net)
-{
- struct stt_net *sn = net_generic(net, stt_net_id);
-
- INIT_LIST_HEAD(&sn->stt_list);
- INIT_LIST_HEAD(&sn->stt_up_list);
-#ifdef HAVE_NF_REGISTER_NET_HOOK
- sn->nf_hook_reg_done = false;
-#endif
- return 0;
-}
-
-static void stt_exit_net(struct net *net)
-{
- struct stt_net *sn = net_generic(net, stt_net_id);
- struct stt_dev *stt, *next;
- struct net_device *dev, *aux;
- LIST_HEAD(list);
-
-#ifdef HAVE_NF_REGISTER_NET_HOOK
- /* Ideally this should be done from stt_stop(), But on some kernels
- * nf-unreg operation needs RTNL-lock, which can cause deallock.
- * So it is done from here. */
- if (sn->nf_hook_reg_done)
- nf_unregister_net_hook(net, &nf_hook_ops);
-#endif
-
- rtnl_lock();
-
- /* gather any stt devices that were moved into this ns */
- for_each_netdev_safe(net, dev, aux)
- if (dev->rtnl_link_ops == &stt_link_ops)
- unregister_netdevice_queue(dev, &list);
-
- list_for_each_entry_safe(stt, next, &sn->stt_list, next) {
- /* If stt->dev is in the same netns, it was already added
- * to the stt by the previous loop.
- */
- if (!net_eq(dev_net(stt->dev), net))
- unregister_netdevice_queue(stt->dev, &list);
- }
-
- /* unregister the devices gathered above */
- unregister_netdevice_many(&list);
- rtnl_unlock();
-}
-
-static struct pernet_operations stt_net_ops = {
- .init = stt_init_net,
- .exit = stt_exit_net,
- .id = &stt_net_id,
- .size = sizeof(struct stt_net),
-};
-
-int stt_init_module(void)
-{
- int rc;
-
- rc = register_pernet_subsys(&stt_net_ops);
- if (rc)
- goto out1;
-
- rc = rtnl_link_register(&stt_link_ops);
- if (rc)
- goto out2;
-
-#ifdef HAVE_LIST_IN_NF_HOOK_OPS
- INIT_LIST_HEAD(&nf_hook_ops.list);
-#endif
- pr_info("STT tunneling driver\n");
- return 0;
-out2:
- unregister_pernet_subsys(&stt_net_ops);
-out1:
- pr_err("Error while initializing STT %d\n", rc);
- return rc;
-}
-
-void stt_cleanup_module(void)
-{
-#ifndef HAVE_NF_REGISTER_NET_HOOK
- if (!list_empty(&nf_hook_ops.list))
- nf_unregister_hook(&nf_hook_ops);
-#endif
- rtnl_link_unregister(&stt_link_ops);
- unregister_pernet_subsys(&stt_net_ops);
-}
-#endif
diff --git a/datapath/linux/compat/udp.c b/datapath/linux/compat/udp.c
deleted file mode 100644
index 38bf332db..000000000
--- a/datapath/linux/compat/udp.c
+++ /dev/null
@@ -1,46 +0,0 @@
-#include <linux/version.h>
-
-#ifndef USE_UPSTREAM_TUNNEL
-
-#include <net/udp.h>
-
-/* Function to set UDP checksum for an IPv4 UDP packet. This is intended
- * for the simple case like when setting the checksum for a UDP tunnel.
- */
-void rpl_udp_set_csum(bool nocheck, struct sk_buff *skb,
- __be32 saddr, __be32 daddr, int len)
-{
- struct udphdr *uh = udp_hdr(skb);
-
-
- if (nocheck) {
- uh->check = 0;
- } else if (skb_is_gso(skb)) {
- uh->check = ~udp_v4_check(len, saddr, daddr, 0);
- } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
- uh->check = 0;
- uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
- } else {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- uh->check = ~udp_v4_check(len, saddr, daddr, 0);
- }
-}
-EXPORT_SYMBOL_GPL(rpl_udp_set_csum);
-
-#endif /* Linux version < 3.16 */
-
-#ifdef OVS_CHECK_UDP_TUNNEL_ZERO_CSUM
-void rpl_udp6_csum_zero_error(struct sk_buff *skb)
-{
- /* RFC 2460 section 8.1 says that we SHOULD log
- * this error. Well, it is reasonable.
- */
- net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
- &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
- &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
-}
-#endif
diff --git a/datapath/linux/compat/udp_tunnel.c b/datapath/linux/compat/udp_tunnel.c
deleted file mode 100644
index 852069f62..000000000
--- a/datapath/linux/compat/udp_tunnel.c
+++ /dev/null
@@ -1,292 +0,0 @@
-#include <linux/version.h>
-
-#ifndef USE_UPSTREAM_TUNNEL
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/socket.h>
-#include <linux/udp.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <net/ip_tunnels.h>
-#include <net/udp.h>
-#include <net/udp_tunnel.h>
-#include <net/net_namespace.h>
-#include <net/ip6_checksum.h>
-#include <net/ip6_tunnel.h>
-
-#include "gso.h"
-
-int rpl_udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
- struct socket **sockp)
-{
- int err;
- struct socket *sock = NULL;
- struct sockaddr_in udp_addr;
-
- err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock);
- if (err < 0)
- goto error;
-
- udp_addr.sin_family = AF_INET;
- udp_addr.sin_addr = cfg->local_ip;
- udp_addr.sin_port = cfg->local_udp_port;
- err = kernel_bind(sock, (struct sockaddr *)&udp_addr,
- sizeof(udp_addr));
- if (err < 0)
- goto error;
-
- if (cfg->peer_udp_port) {
- udp_addr.sin_family = AF_INET;
- udp_addr.sin_addr = cfg->peer_ip;
- udp_addr.sin_port = cfg->peer_udp_port;
- err = kernel_connect(sock, (struct sockaddr *)&udp_addr,
- sizeof(udp_addr), 0);
- if (err < 0)
- goto error;
- }
-#ifdef HAVE_SK_NO_CHECK_TX
- sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
-#endif
- *sockp = sock;
- return 0;
-
-error:
- if (sock) {
- kernel_sock_shutdown(sock, SHUT_RDWR);
- sock_release(sock);
- }
- *sockp = NULL;
- return err;
-}
-EXPORT_SYMBOL(rpl_udp_sock_create4);
-
-int rpl_udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
- struct socket **sockp)
-{
- struct sockaddr_in6 udp6_addr;
- int err;
- struct socket *sock = NULL;
-
- err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, 0, &sock);
- if (err < 0)
- goto error;
-
- if (cfg->ipv6_v6only) {
- int val = 1;
-
- err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
- (char *) &val, sizeof(val));
- if (err < 0)
- goto error;
- }
-
- udp6_addr.sin6_family = AF_INET6;
- memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
- sizeof(udp6_addr.sin6_addr));
- udp6_addr.sin6_port = cfg->local_udp_port;
- err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
- sizeof(udp6_addr));
- if (err < 0)
- goto error;
-
- if (cfg->peer_udp_port) {
- udp6_addr.sin6_family = AF_INET6;
- memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
- sizeof(udp6_addr.sin6_addr));
- udp6_addr.sin6_port = cfg->peer_udp_port;
- err = kernel_connect(sock,
- (struct sockaddr *)&udp6_addr,
- sizeof(udp6_addr), 0);
- }
- if (err < 0)
- goto error;
-
- udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
- udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
-
- *sockp = sock;
- return 0;
-
-error:
- if (sock) {
- kernel_sock_shutdown(sock, SHUT_RDWR);
- sock_release(sock);
- }
- *sockp = NULL;
- return err;
-}
-EXPORT_SYMBOL_GPL(rpl_udp_sock_create6);
-
-void rpl_setup_udp_tunnel_sock(struct net *net, struct socket *sock,
- struct udp_tunnel_sock_cfg *cfg)
-{
- struct sock *sk = sock->sk;
-
- /* Disable multicast loopback */
- inet_sk(sk)->mc_loop = 0;
-
- rcu_assign_sk_user_data(sk, cfg->sk_user_data);
-
- udp_sk(sk)->encap_type = cfg->encap_type;
- udp_sk(sk)->encap_rcv = cfg->encap_rcv;
- udp_sk(sk)->encap_destroy = cfg->encap_destroy;
-#ifdef HAVE_UDP_TUNNEL_SOCK_CFG_GRO_RECEIVE
- udp_sk(sk)->gro_receive = cfg->gro_receive;
- udp_sk(sk)->gro_complete = cfg->gro_complete;
-#endif
-
- udp_tunnel_encap_enable(sock);
-}
-EXPORT_SYMBOL_GPL(rpl_setup_udp_tunnel_sock);
-
-void rpl_udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk,
- struct sk_buff *skb, __be32 src, __be32 dst,
- __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
- __be16 dst_port, bool xnet, bool nocheck)
-{
- struct udphdr *uh;
-
- __skb_push(skb, sizeof(*uh));
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- uh->dest = dst_port;
- uh->source = src_port;
- uh->len = htons(skb->len);
-
- udp_set_csum(nocheck, skb, src, dst, skb->len);
-
- iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);
-}
-EXPORT_SYMBOL_GPL(rpl_udp_tunnel_xmit_skb);
-
-void rpl_udp_tunnel_sock_release(struct socket *sock)
-{
- rcu_assign_sk_user_data(sock->sk, NULL);
- kernel_sock_shutdown(sock, SHUT_RDWR);
- sock_release(sock);
-}
-EXPORT_SYMBOL_GPL(rpl_udp_tunnel_sock_release);
-
-#if IS_ENABLED(CONFIG_IPV6)
-
-#define udp_v6_check rpl_udp_v6_check
-static __sum16 udp_v6_check(int len,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- __wsum base)
-{
- return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base);
-}
-
-#define udp6_set_csum rpl_udp6_set_csum
-static void udp6_set_csum(bool nocheck, struct sk_buff *skb,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr, int len)
-{
- struct udphdr *uh = udp_hdr(skb);
-
- if (nocheck)
- uh->check = 0;
- else if (skb_is_gso(skb))
- uh->check = ~udp_v6_check(len, saddr, daddr, 0);
- else if (skb->ip_summed == CHECKSUM_PARTIAL) {
- uh->check = 0;
- uh->check = udp_v6_check(len, saddr, daddr, lco_csum(skb));
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
- } else {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- uh->check = ~udp_v6_check(len, saddr, daddr, 0);
- }
-}
-
-#define ip6_flow_hdr rpl_ip6_flow_hdr
-static inline void ip6_flow_hdr(struct ipv6hdr *hdr, unsigned int tclass,
- __be32 flowlabel)
-{
- *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | flowlabel;
-}
-
-int rpl_udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb,
- struct net_device *dev, struct in6_addr *saddr,
- struct in6_addr *daddr,
- __u8 prio, __u8 ttl, __be32 label, __be16 src_port,
- __be16 dst_port, bool nocheck)
-{
- struct udphdr *uh;
- struct ipv6hdr *ip6h;
-
- __skb_push(skb, sizeof(*uh));
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- uh->dest = dst_port;
- uh->source = src_port;
-
- uh->len = htons(skb->len);
-
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
- | IPSKB_REROUTED);
- skb_dst_set(skb, dst);
-
- udp6_set_csum(nocheck, skb, saddr, daddr, skb->len);
-
- __skb_push(skb, sizeof(*ip6h));
- skb_reset_network_header(skb);
- ip6h = ipv6_hdr(skb);
- ip6_flow_hdr(ip6h, prio, label);
- ip6h->payload_len = htons(skb->len);
- ip6h->nexthdr = IPPROTO_UDP;
- ip6h->hop_limit = ttl;
- ip6h->daddr = *daddr;
- ip6h->saddr = *saddr;
-
- ip6tunnel_xmit(sk, skb, dev);
- return 0;
-}
-#endif
-
-#ifndef USE_UPSTREAM_TUNNEL_GSO
-void ovs_udp_gso(struct sk_buff *skb)
-{
- int udp_offset = skb_transport_offset(skb);
- struct udphdr *uh;
-
- uh = udp_hdr(skb);
- uh->len = htons(skb->len - udp_offset);
-}
-EXPORT_SYMBOL_GPL(ovs_udp_gso);
-
-void ovs_udp_csum_gso(struct sk_buff *skb)
-{
- int udp_offset = skb_transport_offset(skb);
-
- ovs_udp_gso(skb);
-
- if (!OVS_GSO_CB(skb)->ipv6) {
- struct iphdr *iph = ip_hdr(skb);
-
- /* csum segment if tunnel sets skb with csum. The cleanest way
- * to do this just to set it up from scratch. */
- udp_set_csum(false, skb, iph->saddr, iph->daddr,
- skb->len - udp_offset);
-#if IS_ENABLED(CONFIG_IPV6)
- } else {
- struct ipv6hdr *ip6h;
-
- ip6h = ipv6_hdr(skb);
- udp6_set_csum(false, skb, &ip6h->saddr, &ip6h->daddr,
- skb->len - udp_offset);
-#endif
- }
-}
-EXPORT_SYMBOL_GPL(ovs_udp_csum_gso);
-#endif /* USE_UPSTREAM_TUNNEL_GSO */
-
-#endif
diff --git a/datapath/linux/compat/utils.c b/datapath/linux/compat/utils.c
deleted file mode 100644
index a4a98ba65..000000000
--- a/datapath/linux/compat/utils.c
+++ /dev/null
@@ -1,112 +0,0 @@
-#include <linux/module.h>
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/ctype.h>
-#include <linux/inet.h>
-#include <linux/mm.h>
-#include <linux/net.h>
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/percpu.h>
-#include <linux/init.h>
-#include <linux/ratelimit.h>
-
-#include <net/sock.h>
-
-#include <asm/byteorder.h>
-#include <asm/uaccess.h>
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0)
-
-bool rpl___net_get_random_once(void *buf, int nbytes, bool *done,
- atomic_t *done_key)
-{
- static DEFINE_SPINLOCK(lock);
- unsigned long flags;
-
- spin_lock_irqsave(&lock, flags);
- if (*done) {
- spin_unlock_irqrestore(&lock, flags);
- return false;
- }
-
- get_random_bytes(buf, nbytes);
- *done = true;
- spin_unlock_irqrestore(&lock, flags);
-
- atomic_set(done_key, 1);
-
- return true;
-}
-EXPORT_SYMBOL_GPL(rpl___net_get_random_once);
-
-#endif
-
-#ifdef NEED_ALLOC_PERCPU_GFP
-void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp)
-{
- void __percpu *p;
- int i;
-
- /* older kernel do not allow all GFP flags, specifically atomic
- * allocation.
- */
- if (gfp & ~(GFP_KERNEL | __GFP_ZERO))
- return NULL;
- p = __alloc_percpu(size, align);
- if (!p)
- return p;
-
- if (!(gfp & __GFP_ZERO))
- return p;
-
- for_each_possible_cpu(i) {
- void *d;
-
- d = per_cpu_ptr(p, i);
- memset(d, 0, size);
- }
- return p;
-}
-#endif
-
-#ifndef HAVE_NLA_PUT_64BIT
-int rpl_nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen,
- const void *data, int padattr)
-{
- size_t len;
-
- if (nla_need_padding_for_64bit(skb))
- len = nla_total_size_64bit(attrlen);
- else
- len = nla_total_size(attrlen);
- if (unlikely(skb_tailroom(skb) < len))
- return -EMSGSIZE;
-
- __nla_put_64bit(skb, attrtype, attrlen, data, padattr);
- return 0;
-}
-EXPORT_SYMBOL_GPL(rpl_nla_put_64bit);
-
-void rpl___nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen,
- const void *data, int padattr)
-{
- struct nlattr *nla;
-
- nla = __nla_reserve_64bit(skb, attrtype, attrlen, padattr);
- memcpy(nla_data(nla), data, attrlen);
-}
-EXPORT_SYMBOL_GPL(rpl___nla_put_64bit);
-
-struct nlattr *rpl___nla_reserve_64bit(struct sk_buff *skb, int attrtype,
- int attrlen, int padattr)
-{
- if (nla_need_padding_for_64bit(skb))
- nla_align_64bit(skb, padattr);
-
- return __nla_reserve(skb, attrtype, attrlen);
-}
-EXPORT_SYMBOL_GPL(rpl___nla_reserve_64bit);
-#endif
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
deleted file mode 100644
index e65d955e9..000000000
--- a/datapath/linux/compat/vxlan.c
+++ /dev/null
@@ -1,2382 +0,0 @@
-/*
- * VXLAN: Virtual eXtensible Local Area Network
- *
- * Copyright (c) 2012-2013 Vyatta Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/skbuff.h>
-#include <linux/rculist.h>
-#include <linux/netdevice.h>
-#include <linux/netdev_features.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/igmp.h>
-#include <linux/etherdevice.h>
-#include <linux/if_ether.h>
-#include <linux/if_vlan.h>
-#include <linux/hash.h>
-#include <linux/ethtool.h>
-#include <net/arp.h>
-#include <net/dst_metadata.h>
-#include <net/ndisc.h>
-#include <net/ip.h>
-#include <net/ip_tunnels.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/udp_tunnel.h>
-#include <net/rtnetlink.h>
-#include <net/route.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/protocol.h>
-
-#if IS_ENABLED(CONFIG_IPV6)
-#include <net/ipv6.h>
-#include <net/addrconf.h>
-#include <net/ip6_tunnel.h>
-#include <net/ip6_checksum.h>
-#include <net/ip6_route.h>
-#endif
-
-#include <net/tun_proto.h>
-#include <net/vxlan.h>
-#include "gso.h"
-#include "vport-netdev.h"
-#include "compat.h"
-
-#ifndef USE_UPSTREAM_TUNNEL
-#define VXLAN_VERSION "0.1"
-
-#define PORT_HASH_BITS 8
-#define PORT_HASH_SIZE (1<<PORT_HASH_BITS)
-#define FDB_AGE_DEFAULT 300 /* 5 min */
-#define FDB_AGE_INTERVAL (10 * HZ) /* rescan interval */
-
-/* UDP port for VXLAN traffic.
- * The IANA assigned port is 4789, but the Linux default is 8472
- * for compatibility with early adopters.
- */
-static unsigned short vxlan_port __read_mostly = 8472;
-module_param_named(udp_port, vxlan_port, ushort, 0444);
-MODULE_PARM_DESC(udp_port, "Destination UDP port");
-
-static int vxlan_net_id;
-static struct rtnl_link_ops vxlan_link_ops;
-
-static const u8 all_zeros_mac[ETH_ALEN + 2];
-
-static int vxlan_sock_add(struct vxlan_dev *vxlan);
-
-/* per-network namespace private data for this module */
-struct vxlan_net {
- struct list_head vxlan_list;
- struct hlist_head sock_list[PORT_HASH_SIZE];
- spinlock_t sock_lock;
-};
-
-/* Forwarding table entry */
-struct vxlan_fdb {
- struct hlist_node hlist; /* linked list of entries */
- struct rcu_head rcu;
- unsigned long updated; /* jiffies */
- unsigned long used;
- struct list_head remotes;
- u8 eth_addr[ETH_ALEN];
- u16 state; /* see ndm_state */
- u8 flags; /* see ndm_flags */
-};
-
-/* salt for hash table */
-static u32 vxlan_salt __read_mostly;
-
-static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
-{
- return vs->flags & VXLAN_F_COLLECT_METADATA ||
- ip_tunnel_collect_metadata();
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static inline
-bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
-{
- if (a->sa.sa_family != b->sa.sa_family)
- return false;
- if (a->sa.sa_family == AF_INET6)
- return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr);
- else
- return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
-}
-
-static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
-{
- if (ipa->sa.sa_family == AF_INET6)
- return ipv6_addr_any(&ipa->sin6.sin6_addr);
- else
- return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
-}
-
-static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
-{
- if (ipa->sa.sa_family == AF_INET6)
- return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
- else
- return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
-}
-
-#else /* !CONFIG_IPV6 */
-
-static inline
-bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
-{
- return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
-}
-
-static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
-{
- return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
-}
-
-static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
-{
- return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
-}
-#endif
-
-/* Virtual Network hash table head */
-static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni)
-{
- return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)];
-}
-
-/* Socket hash table head */
-static inline struct hlist_head *vs_head(struct net *net, __be16 port)
-{
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
-
- return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
-}
-
-/* Find VXLAN socket based on network namespace, address family and UDP port
- * and enabled unshareable flags.
- */
-static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
- __be16 port, u32 flags)
-{
- struct vxlan_sock *vs;
-
- flags &= VXLAN_F_RCV_FLAGS;
-
- hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
- if (inet_sk(vs->sock->sk)->inet_sport == port &&
- vxlan_get_sk_family(vs) == family &&
- vs->flags == flags)
- return vs;
- }
- return NULL;
-}
-
-static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, __be32 vni)
-{
- struct vxlan_dev *vxlan;
-
- /* For flow based devices, map all packets to VNI 0 */
- if (vs->flags & VXLAN_F_COLLECT_METADATA)
- vni = 0;
-
- hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) {
- if (vxlan->default_dst.remote_vni == vni)
- return vxlan;
- }
-
- return NULL;
-}
-
-/* Look up VNI in a per net namespace table */
-static struct vxlan_dev *vxlan_find_vni(struct net *net, __be32 vni,
- sa_family_t family, __be16 port,
- u32 flags)
-{
- struct vxlan_sock *vs;
-
- vs = vxlan_find_sock(net, family, port, flags);
- if (!vs)
- return NULL;
-
- return vxlan_vs_find_vni(vs, vni);
-}
-
-static int vxlan_fdb_create(struct vxlan_dev *vxlan,
- const u8 *mac, union vxlan_addr *ip,
- __u16 state, __u16 flags,
- __be16 port, __be32 vni, __u32 ifindex,
- __u8 ndm_flags)
-{
- return -EINVAL;
-}
-
-static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
-{
-
-}
-
-static inline size_t vxlan_nlmsg_size(void)
-{
- return NLMSG_ALIGN(sizeof(struct ndmsg))
- + nla_total_size(ETH_ALEN) /* NDA_LLADDR */
- + nla_total_size(sizeof(struct in6_addr)) /* NDA_DST */
- + nla_total_size(sizeof(__be16)) /* NDA_PORT */
- + nla_total_size(sizeof(__be32)) /* NDA_VNI */
- + nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
- + nla_total_size(sizeof(__s32)) /* NDA_LINK_NETNSID */
- + nla_total_size(sizeof(struct nda_cacheinfo));
-}
-
-#ifdef HAVE_UDP_OFFLOAD
-#ifdef HAVE_NETIF_F_GSO_TUNNEL_REMCSUM
-
-static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
- unsigned int off,
- struct vxlanhdr *vh, size_t hdrlen,
- __be32 vni_field,
- struct gro_remcsum *grc,
- bool nopartial)
-{
- size_t start, offset;
-
- if (skb->remcsum_offload)
- return vh;
-
- if (!NAPI_GRO_CB(skb)->csum_valid)
- return NULL;
-
- start = vxlan_rco_start(vni_field);
- offset = start + vxlan_rco_offset(vni_field);
-
- vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen,
- start, offset, grc, nopartial);
-
- skb->remcsum_offload = 1;
-
- return vh;
-}
-#else
-static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
- unsigned int off,
- struct vxlanhdr *vh, size_t hdrlen,
- u32 data, struct gro_remcsum *grc,
- bool nopartial)
-{
- return NULL;
-}
-#endif
-
-#ifndef HAVE_UDP_OFFLOAD_ARG_UOFF
-static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
-#else
-static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
- struct sk_buff *skb,
- struct udp_offload *uoff)
-#endif
-{
-#ifdef HAVE_UDP_OFFLOAD_ARG_UOFF
- struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock,
- udp_offloads);
-#else
- struct vxlan_sock *vs = NULL;
-#endif
- struct sk_buff *p, **pp = NULL;
- struct vxlanhdr *vh, *vh2;
- unsigned int hlen, off_vx;
- int flush = 1;
- __be32 flags;
- struct gro_remcsum grc;
-
- skb_gro_remcsum_init(&grc);
-
- off_vx = skb_gro_offset(skb);
- hlen = off_vx + sizeof(*vh);
- vh = skb_gro_header_fast(skb, off_vx);
- if (skb_gro_header_hard(skb, hlen)) {
- vh = skb_gro_header_slow(skb, hlen, off_vx);
- if (unlikely(!vh))
- goto out;
- }
-
- skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
-
- flags = vh->vx_flags;
-
- if ((flags & VXLAN_HF_RCO) && vs && (vs->flags & VXLAN_F_REMCSUM_RX)) {
- vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
- vh->vx_vni, &grc,
- !!(vs->flags &
- VXLAN_F_REMCSUM_NOPARTIAL));
-
- if (!vh)
- goto out;
- }
-
- skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
-
- for (p = *head; p; p = p->next) {
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
-
- vh2 = (struct vxlanhdr *)(p->data + off_vx);
- if (vh->vx_flags != vh2->vx_flags ||
- vh->vx_vni != vh2->vx_vni) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
- }
-
- pp = eth_gro_receive(head, skb);
- flush = 0;
-
-out:
- skb_gro_remcsum_cleanup(skb, &grc);
- NAPI_GRO_CB(skb)->flush |= flush;
-
- return pp;
-}
-
-#ifndef HAVE_UDP_OFFLOAD_ARG_UOFF
-static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
-#else
-static int vxlan_gro_complete(struct sk_buff *skb, int nhoff,
- struct udp_offload *uoff)
-#endif
-{
- /* Sets 'skb->inner_mac_header' since we are always called with
- * 'skb->encapsulation' set.
- */
- udp_tunnel_gro_complete(skb, nhoff);
-
- return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
-}
-#endif
-
-/* Notify netdevs that UDP port started listening */
-static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
-{
- struct net_device *dev;
- struct sock *sk = vs->sock->sk;
- struct net *net = sock_net(sk);
- sa_family_t sa_family = vxlan_get_sk_family(vs);
-
-
- if (sa_family == AF_INET) {
- int err;
-
- err = udp_add_offload(net, &vs->udp_offloads);
- if (err)
- pr_warn("vxlan: udp_add_offload failed with status %d\n", err);
- }
-
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
-#ifdef HAVE_NDO_ADD_VXLAN_PORT
- __be16 port = inet_sk(sk)->inet_sport;
-
- if (dev->netdev_ops->ndo_add_vxlan_port)
- dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
- port);
-#elif defined(HAVE_NDO_UDP_TUNNEL_ADD)
- struct udp_tunnel_info ti;
- if (vs->flags & VXLAN_F_GPE)
- ti.type = UDP_TUNNEL_TYPE_VXLAN_GPE;
- else
- ti.type = UDP_TUNNEL_TYPE_VXLAN;
- ti.sa_family = sa_family;
- ti.port = inet_sk(sk)->inet_sport;
-
- if (dev->netdev_ops->ndo_udp_tunnel_add)
- dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
-#endif
- }
- rcu_read_unlock();
-}
-
-/* Notify netdevs that UDP port is no more listening */
-static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
-{
- struct net_device *dev;
- struct sock *sk = vs->sock->sk;
- struct net *net = sock_net(sk);
- sa_family_t sa_family = vxlan_get_sk_family(vs);
-
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
-#ifdef HAVE_NDO_ADD_VXLAN_PORT
- __be16 port = inet_sk(sk)->inet_sport;
-
- if (dev->netdev_ops->ndo_del_vxlan_port)
- dev->netdev_ops->ndo_del_vxlan_port(dev, sa_family,
- port);
-#elif defined(HAVE_NDO_UDP_TUNNEL_ADD)
- struct udp_tunnel_info ti;
- if (vs->flags & VXLAN_F_GPE)
- ti.type = UDP_TUNNEL_TYPE_VXLAN_GPE;
- else
- ti.type = UDP_TUNNEL_TYPE_VXLAN;
- ti.port = inet_sk(sk)->inet_sport;
- ti.sa_family = sa_family;
-
- if (dev->netdev_ops->ndo_udp_tunnel_del)
- dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
-#endif
- }
- rcu_read_unlock();
-
- if (sa_family == AF_INET) {
- udp_del_offload(&vs->udp_offloads);
- }
-}
-
-/* See if multicast group is already in use by other ID */
-static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
-{
- struct vxlan_dev *vxlan;
- struct vxlan_sock *sock4;
- struct vxlan_sock *sock6 = NULL;
- unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
-
- sock4 = rtnl_dereference(dev->vn4_sock);
-
- /* The vxlan_sock is only used by dev, leaving group has
- * no effect on other vxlan devices.
- */
- if (family == AF_INET && sock4 && atomic_read(&sock4->refcnt) == 1)
- return false;
-#if IS_ENABLED(CONFIG_IPV6)
- sock6 = rtnl_dereference(dev->vn6_sock);
- if (family == AF_INET6 && sock6 && atomic_read(&sock6->refcnt) == 1)
- return false;
-#endif
-
- list_for_each_entry(vxlan, &vn->vxlan_list, next) {
- if (!netif_running(vxlan->dev) || vxlan == dev)
- continue;
-
- if (family == AF_INET &&
- rtnl_dereference(vxlan->vn4_sock) != sock4)
- continue;
-#if IS_ENABLED(CONFIG_IPV6)
- if (family == AF_INET6 &&
- rtnl_dereference(vxlan->vn6_sock) != sock6)
- continue;
-#endif
-
- if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip,
- &dev->default_dst.remote_ip))
- continue;
-
- if (vxlan->default_dst.remote_ifindex !=
- dev->default_dst.remote_ifindex)
- continue;
-
- return true;
- }
-
- return false;
-}
-
-static bool __vxlan_sock_release_prep(struct vxlan_sock *vs)
-{
- struct vxlan_net *vn;
-
- if (!vs)
- return false;
- if (!atomic_dec_and_test(&vs->refcnt))
- return false;
-
- vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
- spin_lock(&vn->sock_lock);
- hlist_del_rcu(&vs->hlist);
- vxlan_notify_del_rx_port(vs);
- spin_unlock(&vn->sock_lock);
-
- return true;
-}
-
-static void vxlan_sock_release(struct vxlan_dev *vxlan)
-{
- struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
-#if IS_ENABLED(CONFIG_IPV6)
- struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
-
- rcu_assign_pointer(vxlan->vn6_sock, NULL);
-#endif
-
- rcu_assign_pointer(vxlan->vn4_sock, NULL);
- synchronize_net();
-
- if (__vxlan_sock_release_prep(sock4)) {
- udp_tunnel_sock_release(sock4->sock);
- kfree(sock4);
- }
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (__vxlan_sock_release_prep(sock6)) {
- udp_tunnel_sock_release(sock6->sock);
- kfree(sock6);
- }
-#endif
-}
-
-/* Update multicast group membership when first VNI on
- * multicast address is brought up
- */
-static int vxlan_igmp_join(struct vxlan_dev *vxlan)
-{
- return -EINVAL;
-}
-
-/* Inverse of vxlan_igmp_join when last VNI is brought down */
-static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
-{
- return -EINVAL;
-}
-
-static bool vxlan_remcsum(struct vxlanhdr *unparsed,
- struct sk_buff *skb, u32 vxflags)
-{
-#ifndef USE_UPSTREAM_TUNNEL
- return false;
-#else
- size_t start, offset;
-
- if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
- goto out;
-
- start = vxlan_rco_start(unparsed->vx_vni);
- offset = start + vxlan_rco_offset(unparsed->vx_vni);
-
- if (!pskb_may_pull(skb, offset + sizeof(u16)))
- return false;
-
- skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
- !!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL));
-out:
- unparsed->vx_flags &= ~VXLAN_HF_RCO;
- unparsed->vx_vni &= VXLAN_VNI_MASK;
- return true;
-#endif
-}
-
-static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
- struct sk_buff *skb, u32 vxflags,
- struct vxlan_metadata *md)
-{
- struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed;
- struct metadata_dst *tun_dst;
-
- if (!(unparsed->vx_flags & VXLAN_HF_GBP))
- goto out;
-
- md->gbp = ntohs(gbp->policy_id);
-
- tun_dst = (struct metadata_dst *)skb_dst(skb);
- if (tun_dst) {
- tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
- tun_dst->u.tun_info.options_len = sizeof(*md);
- }
- if (gbp->dont_learn)
- md->gbp |= VXLAN_GBP_DONT_LEARN;
-
- if (gbp->policy_applied)
- md->gbp |= VXLAN_GBP_POLICY_APPLIED;
-
- /* In flow-based mode, GBP is carried in dst_metadata */
- if (!(vxflags & VXLAN_F_COLLECT_METADATA))
- skb->mark = md->gbp;
-out:
- unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
-}
-
-static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
- __be16 *protocol,
- struct sk_buff *skb, u32 vxflags)
-{
- struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
-
- /* Need to have Next Protocol set for interfaces in GPE mode. */
- if (!gpe->np_applied)
- return false;
- /* "The initial version is 0. If a receiver does not support the
- * version indicated it MUST drop the packet.
- */
- if (gpe->version != 0)
- return false;
- /* "When the O bit is set to 1, the packet is an OAM packet and OAM
- * processing MUST occur." However, we don't implement OAM
- * processing, thus drop the packet.
- */
- if (gpe->oam_flag)
- return false;
-
- *protocol = tun_p_to_eth_p(gpe->next_protocol);
- if (!*protocol)
- return false;
-
- unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
- return true;
-}
-
-static bool vxlan_set_mac(struct vxlan_dev *vxlan,
- struct vxlan_sock *vs,
- struct sk_buff *skb)
-{
- return true;
-}
-
-static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph,
- struct sk_buff *skb)
-{
- int err = 0;
-
- if (vxlan_get_sk_family(vs) == AF_INET)
- err = IP_ECN_decapsulate(oiph, skb);
-#if IS_ENABLED(CONFIG_IPV6)
- else
- err = IP6_ECN_decapsulate(oiph, skb);
-#endif
- return err <= 1;
-}
-
-/* Callback from net/ipv4/udp.c to receive packets */
-static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
-{
- union {
- struct metadata_dst dst;
- char buf[sizeof(struct metadata_dst) + sizeof(struct vxlan_metadata)];
- } buf;
-
- struct pcpu_sw_netstats *stats;
- struct vxlan_dev *vxlan;
- struct vxlan_sock *vs;
- struct vxlanhdr unparsed;
- struct vxlan_metadata _md;
- struct vxlan_metadata *md = &_md;
- __be16 protocol = htons(ETH_P_TEB);
- bool raw_proto = false;
- void *oiph;
-
- /* Need UDP and VXLAN header to be present */
- if (!pskb_may_pull(skb, VXLAN_HLEN))
- goto drop;
-
- unparsed = *vxlan_hdr(skb);
- /* VNI flag always required to be set */
- if (!(unparsed.vx_flags & VXLAN_HF_VNI)) {
- netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
- ntohl(vxlan_hdr(skb)->vx_flags),
- ntohl(vxlan_hdr(skb)->vx_vni));
- /* Return non vxlan pkt */
- goto drop;
- }
-
- unparsed.vx_flags &= ~VXLAN_HF_VNI;
- unparsed.vx_vni &= ~VXLAN_VNI_MASK;
-
- vs = rcu_dereference_sk_user_data(sk);
- if (!vs)
- goto drop;
-
-#if IS_ENABLED(CONFIG_IPV6)
-#ifdef OVS_CHECK_UDP_TUNNEL_ZERO_CSUM
- if (vxlan_get_sk_family(vs) == AF_INET6 &&
- !udp_hdr(skb)->check &&
- !(vs->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) {
- udp6_csum_zero_error(skb);
- goto drop;
- }
-#endif
-#endif
- vxlan = vxlan_vs_find_vni(vs, vxlan_vni(vxlan_hdr(skb)->vx_vni));
- if (!vxlan)
- goto drop;
-
- /* For backwards compatibility, only allow reserved fields to be
- * used by VXLAN extensions if explicitly requested.
- */
- if (vs->flags & VXLAN_F_GPE) {
- if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
- goto drop;
- raw_proto = true;
- }
-
- if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
- !net_eq(vxlan->net, dev_net(vxlan->dev))))
- goto drop;
-
- if (vxlan_collect_metadata(vs)) {
- __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
- struct metadata_dst *tun_dst;
-
- tun_dst = &buf.dst;
- ovs_udp_tun_rx_dst(tun_dst, skb,
- vxlan_get_sk_family(vs), TUNNEL_KEY,
- vxlan_vni_to_tun_id(vni), sizeof(*md));
-
- if (!tun_dst)
- goto drop;
-
- md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
-
- ovs_skb_dst_set(skb, (struct dst_entry *)tun_dst);
- } else {
- memset(md, 0, sizeof(*md));
- }
-
- if (vs->flags & VXLAN_F_REMCSUM_RX)
- if (!vxlan_remcsum(&unparsed, skb, vs->flags))
- goto drop;
-
- if (vs->flags & VXLAN_F_GBP)
- vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md);
- /* Note that GBP and GPE can never be active together. This is
- * ensured in vxlan_dev_configure.
- */
-
- if (unparsed.vx_flags || unparsed.vx_vni) {
- /* If there are any unprocessed flags remaining treat
- * this as a malformed packet. This behavior diverges from
- * VXLAN RFC (RFC7348) which stipulates that bits in reserved
- * in reserved fields are to be ignored. The approach here
- * maintains compatibility with previous stack code, and also
- * is more robust and provides a little more security in
- * adding extensions to VXLAN.
- */
- goto drop;
- }
-
- if (!raw_proto) {
- if (!vxlan_set_mac(vxlan, vs, skb))
- goto drop;
- skb_reset_mac_header(skb);
- skb->protocol = eth_type_trans(skb, vxlan->dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
- } else {
- skb_reset_mac_header(skb);
- skb->dev = vxlan->dev;
- skb->pkt_type = PACKET_HOST;
- }
-
- oiph = skb_network_header(skb);
- skb_reset_network_header(skb);
-
- if (!vxlan_ecn_decapsulate(vs, oiph, skb)) {
- ++vxlan->dev->stats.rx_frame_errors;
- ++vxlan->dev->stats.rx_errors;
- goto drop;
- }
-
- stats = this_cpu_ptr(vxlan->dev->tstats);
- u64_stats_update_begin(&stats->syncp);
- stats->rx_packets++;
- stats->rx_bytes += skb->len;
- u64_stats_update_end(&stats->syncp);
-
- netdev_port_receive(skb, skb_tunnel_info(skb));
- return 0;
-
-drop:
- /* Consume bad packet */
- kfree_skb(skb);
- return 0;
-}
-
-static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
- struct vxlan_metadata *md)
-{
- struct vxlanhdr_gbp *gbp;
-
- if (!md->gbp)
- return;
-
- gbp = (struct vxlanhdr_gbp *)vxh;
- vxh->vx_flags |= VXLAN_HF_GBP;
-
- if (md->gbp & VXLAN_GBP_DONT_LEARN)
- gbp->dont_learn = 1;
-
- if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
- gbp->policy_applied = 1;
-
- gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
-}
-
-static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
- __be16 protocol)
-{
- struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
-
- gpe->np_applied = 1;
- gpe->next_protocol = tun_p_from_eth_p(protocol);
- if (!gpe->next_protocol)
- return -EPFNOSUPPORT;
- return 0;
-}
-
-static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
- int iphdr_len, __be32 vni,
- struct vxlan_metadata *md, u32 vxflags,
- bool udp_sum)
-{
- void (*fix_segment)(struct sk_buff *);
- struct vxlanhdr *vxh;
- int min_headroom;
- int err;
- int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
- __be16 inner_protocol = htons(ETH_P_TEB);
-
- if ((vxflags & VXLAN_F_REMCSUM_TX) &&
- skb->ip_summed == CHECKSUM_PARTIAL) {
- int csum_start = skb_checksum_start_offset(skb);
-
- if (csum_start <= VXLAN_MAX_REMCSUM_START &&
- !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
- (skb->csum_offset == offsetof(struct udphdr, check) ||
- skb->csum_offset == offsetof(struct tcphdr, check)))
- type |= SKB_GSO_TUNNEL_REMCSUM;
- }
-
- min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
- + VXLAN_HLEN + iphdr_len
- + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
-
- /* Need space for new headers (invalidates iph ptr) */
- err = skb_cow_head(skb, min_headroom);
- if (unlikely(err))
- goto out_free;
-
- if (skb_vlan_tag_present(skb))
- skb = __vlan_hwaccel_push_inside(skb);
- if (WARN_ON(!skb))
- return -ENOMEM;
-
- type |= udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
-#ifndef USE_UPSTREAM_TUNNEL_GSO
- fix_segment = !udp_sum ? ovs_udp_gso : ovs_udp_csum_gso;
-#else
- fix_segment = NULL;
-#endif
- err = ovs_iptunnel_handle_offloads(skb, type, fix_segment);
- if (err)
- goto out_free;
-
- vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
- vxh->vx_flags = VXLAN_HF_VNI;
- vxh->vx_vni = vxlan_vni_field(vni);
-
- if (type & SKB_GSO_TUNNEL_REMCSUM) {
- unsigned int start;
-
- start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr);
- vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset);
- vxh->vx_flags |= VXLAN_HF_RCO;
-
- if (!skb_is_gso(skb)) {
- skb->ip_summed = CHECKSUM_NONE;
- skb->encapsulation = 0;
- }
- }
-
- if (vxflags & VXLAN_F_GBP)
- vxlan_build_gbp_hdr(vxh, vxflags, md);
- if (vxflags & VXLAN_F_GPE) {
- err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
- if (err < 0)
- goto out_free;
- inner_protocol = skb->protocol;
- }
-
- ovs_skb_set_inner_protocol(skb, inner_protocol);
- return 0;
-
-out_free:
- kfree_skb(skb);
- return err;
-}
-
-static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
- struct sk_buff *skb, int oif, u8 tos,
- __be32 daddr, __be32 *saddr,
- __be16 dport, __be16 sport,
- struct dst_cache *dst_cache,
- const struct ip_tunnel_info *info)
-{
- bool use_cache = (dst_cache && ip_tunnel_dst_cache_usable(skb, info));
- struct rtable *rt = NULL;
- struct flowi4 fl4;
-
- if (tos && !info)
- use_cache = false;
- if (use_cache) {
- rt = dst_cache_get_ip4(dst_cache, saddr);
- if (rt)
- return rt;
- }
-
- memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_oif = oif;
- fl4.flowi4_tos = RT_TOS(tos);
- fl4.flowi4_mark = skb->mark;
- fl4.flowi4_proto = IPPROTO_UDP;
- fl4.daddr = daddr;
- fl4.saddr = *saddr;
- fl4.fl4_dport = dport;
- fl4.fl4_sport = sport;
-
- rt = ip_route_output_key(vxlan->net, &fl4);
- if (!IS_ERR(rt)) {
- *saddr = fl4.saddr;
- if (use_cache)
- dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
- }
- return rt;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
- struct sk_buff *skb, int oif, u8 tos,
- __be32 label,
- const struct in6_addr *daddr,
- struct in6_addr *saddr,
- __be16 dport, __be16 sport,
- struct dst_cache *dst_cache,
- const struct ip_tunnel_info *info)
-{
- struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
- bool use_cache = (dst_cache && ip_tunnel_dst_cache_usable(skb, info));
- struct dst_entry *ndst;
- struct flowi6 fl6;
-#if !defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) || \
- !defined(HAVE_IPV6_DST_LOOKUP_FLOW)
- int err;
-#endif
-
- if (!sock6)
- return ERR_PTR(-EIO);
-
- if (tos && !info)
- use_cache = false;
- if (use_cache) {
- ndst = dst_cache_get_ip6(dst_cache, saddr);
- if (ndst)
- return ndst;
- }
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_oif = oif;
- fl6.daddr = *daddr;
- fl6.saddr = *saddr;
- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = IPPROTO_UDP;
- fl6.fl6_dport = dport;
- fl6.fl6_sport = sport;
-
-#if defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) && defined(HAVE_IPV6_DST_LOOKUP_FLOW)
-#ifdef HAVE_IPV6_DST_LOOKUP_FLOW_NET
- ndst = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk,
- &fl6, NULL);
-#else
- ndst = ipv6_stub->ipv6_dst_lookup_flow(sock6->sock->sk, &fl6, NULL);
-#endif
- if (unlikely(IS_ERR(ndst))) {
-#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW_NET)
- err = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk,
- &ndst, &fl6);
-#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW)
- err = ipv6_stub->ipv6_dst_lookup_flow(sock6->sock->sk, &ndst, &fl6);
-#elif defined(HAVE_IPV6_DST_LOOKUP_NET)
- err = ipv6_stub->ipv6_dst_lookup(vxlan->net, sock6->sock->sk,
- &ndst, &fl6);
-#elif defined(HAVE_IPV6_STUB)
- err = ipv6_stub->ipv6_dst_lookup(vxlan->vn6_sock->sock->sk,
- &ndst, &fl6);
-#else
- err = ip6_dst_lookup(vxlan->vn6_sock->sock->sk, &ndst, &fl6);
-#endif
-#if defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) && defined(HAVE_IPV6_DST_LOOKUP_FLOW)
- return ERR_PTR(-ENETUNREACH);
- }
-#else
- if (err < 0)
- return ERR_PTR(err);
-#endif
-
- *saddr = fl6.saddr;
- if (use_cache)
- dst_cache_set_ip6(dst_cache, ndst, saddr);
- return ndst;
-}
-#endif
-
-/* Bypass encapsulation if the destination is local */
-static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
- struct vxlan_dev *dst_vxlan)
-{
- skb->dev->stats.rx_dropped++;
- kfree_skb(skb);
-}
-
-static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
- struct vxlan_rdst *rdst, bool did_rsc)
-{
- struct dst_cache *dst_cache;
- struct ip_tunnel_info *info;
- struct vxlan_dev *vxlan = netdev_priv(dev);
- struct sock *sk;
- struct rtable *rt = NULL;
- const struct iphdr *old_iph;
- union vxlan_addr *dst;
- union vxlan_addr remote_ip, local_ip;
- union vxlan_addr *src;
- struct vxlan_metadata _md;
- struct vxlan_metadata *md = &_md;
- __be16 src_port = 0, dst_port;
- __be32 vni, label;
- __be16 df = 0;
- __u8 tos, ttl;
- int err;
- u32 flags = vxlan->flags;
- bool udp_sum = false;
- bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
-
- info = skb_tunnel_info(skb);
-
- if (rdst) {
- dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
- vni = rdst->remote_vni;
- dst = &rdst->remote_ip;
- src = &vxlan->cfg.saddr;
- dst_cache = &rdst->dst_cache;
- } else {
- if (!info) {
- WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
- dev->name);
- goto drop;
- }
- dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
- vni = vxlan_tun_id_to_vni(info->key.tun_id);
- remote_ip.sa.sa_family = ip_tunnel_info_af(info);
- if (remote_ip.sa.sa_family == AF_INET) {
- remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
- local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
- } else {
- remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
- local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
- }
- dst = &remote_ip;
- src = &local_ip;
- dst_cache = &info->dst_cache;
- }
-
- if (vxlan_addr_any(dst)) {
- if (did_rsc) {
- /* short-circuited back to local bridge */
- vxlan_encap_bypass(skb, vxlan, vxlan);
- return;
- }
- goto drop;
- }
-
- old_iph = ip_hdr(skb);
-
- ttl = vxlan->cfg.ttl;
- if (!ttl && vxlan_addr_multicast(dst))
- ttl = 1;
-
- tos = vxlan->cfg.tos;
- if (tos == 1)
- tos = ip_tunnel_get_dsfield(old_iph, skb);
-
- label = vxlan->cfg.label;
- src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
- vxlan->cfg.port_max, true);
-
- if (info) {
- ttl = info->key.ttl;
- tos = info->key.tos;
- label = info->key.label;
- udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
-
- if (info->options_len &&
- info->key.tun_flags & TUNNEL_VXLAN_OPT)
- md = ip_tunnel_info_opts(info);
- } else {
- md->gbp = skb->mark;
- }
-
- if (dst->sa.sa_family == AF_INET) {
- struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
-
- if (!sock4)
- goto drop;
- sk = sock4->sock->sk;
-
- rt = vxlan_get_route(vxlan, skb,
- rdst ? rdst->remote_ifindex : 0, tos,
- dst->sin.sin_addr.s_addr,
- &src->sin.sin_addr.s_addr,
- dst_port, src_port,
- dst_cache, info);
- if (IS_ERR(rt)) {
- netdev_dbg(dev, "no route to %pI4\n",
- &dst->sin.sin_addr.s_addr);
- dev->stats.tx_carrier_errors++;
- goto tx_error;
- }
-
- if (rt->dst.dev == dev) {
- netdev_dbg(dev, "circular route to %pI4\n",
- &dst->sin.sin_addr.s_addr);
- dev->stats.collisions++;
- goto rt_tx_error;
- }
-
- /* Bypass encapsulation if the destination is local */
- if (!info && rt->rt_flags & RTCF_LOCAL &&
- !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
- struct vxlan_dev *dst_vxlan;
-
- ip_rt_put(rt);
- dst_vxlan = vxlan_find_vni(vxlan->net, vni,
- dst->sa.sa_family, dst_port,
- vxlan->flags);
- if (!dst_vxlan)
- goto tx_error;
- vxlan_encap_bypass(skb, vxlan, dst_vxlan);
- return;
- }
-
- if (!info)
- udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
- else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
- df = htons(IP_DF);
-
- tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
- ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
- err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr),
- vni, md, flags, udp_sum);
- if (err < 0)
- goto xmit_tx_error;
-
- udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr,
- dst->sin.sin_addr.s_addr, tos, ttl, df,
- src_port, dst_port, xnet, !udp_sum);
-#if IS_ENABLED(CONFIG_IPV6)
- } else {
- struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
- struct dst_entry *ndst;
- u32 rt6i_flags;
-
- if (!sock6)
- goto drop;
- sk = sock6->sock->sk;
-
- ndst = vxlan6_get_route(vxlan, skb,
- rdst ? rdst->remote_ifindex : 0, tos,
- label, &dst->sin6.sin6_addr,
- &src->sin6.sin6_addr,
- dst_port, src_port,
- dst_cache, info);
- if (IS_ERR(ndst)) {
- netdev_dbg(dev, "no route to %pI6\n",
- &dst->sin6.sin6_addr);
- dev->stats.tx_carrier_errors++;
- goto tx_error;
- }
-
- if (ndst->dev == dev) {
- netdev_dbg(dev, "circular route to %pI6\n",
- &dst->sin6.sin6_addr);
- dst_release(ndst);
- dev->stats.collisions++;
- goto tx_error;
- }
-
- /* Bypass encapsulation if the destination is local */
- rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
- if (!info && rt6i_flags & RTF_LOCAL &&
- !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
- struct vxlan_dev *dst_vxlan;
-
- dst_release(ndst);
- dst_vxlan = vxlan_find_vni(vxlan->net, vni,
- dst->sa.sa_family, dst_port,
- vxlan->flags);
- if (!dst_vxlan)
- goto tx_error;
- vxlan_encap_bypass(skb, vxlan, dst_vxlan);
- return;
- }
-
- if (!info)
- udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
-
- tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
- ttl = ttl ? : ip6_dst_hoplimit(ndst);
- skb_scrub_packet(skb, xnet);
- err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
- vni, md, flags, udp_sum);
- if (err < 0) {
- dst_release(ndst);
- return;
- }
- udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
- &src->sin6.sin6_addr,
- &dst->sin6.sin6_addr, tos, ttl,
- label, src_port, dst_port, !udp_sum);
-#endif
- }
-
- return;
-
-drop:
- dev->stats.tx_dropped++;
- goto tx_free;
-
-xmit_tx_error:
- /* skb is already freed. */
- skb = NULL;
-rt_tx_error:
- ip_rt_put(rt);
-tx_error:
- dev->stats.tx_errors++;
-tx_free:
- dev_kfree_skb(skb);
-}
-
-/* Transmit local packets over Vxlan
- *
- * Outer IP header inherits ECN and DF from inner header.
- * Outer UDP destination is the VXLAN assigned port.
- * source port is based on hash of flow
- */
-netdev_tx_t rpl_vxlan_xmit(struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- struct vxlan_dev *vxlan = netdev_priv(dev);
- const struct ip_tunnel_info *info;
-
- info = skb_tunnel_info(skb);
- skb_reset_mac_header(skb);
- if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
- if (info && info->mode & IP_TUNNEL_INFO_TX) {
- vxlan_xmit_one(skb, dev, NULL, false);
- return NETDEV_TX_OK;
- }
- }
-
- dev->stats.tx_dropped++;
- kfree_skb(skb);
- return NETDEV_TX_OK;
-}
-EXPORT_SYMBOL_GPL(rpl_vxlan_xmit);
-
-/* Walk the forwarding table and purge stale entries */
-#ifdef HAVE_INIT_TIMER_DEFERRABLE
-static void vxlan_cleanup(unsigned long arg)
-{
- struct vxlan_dev *vxlan = (struct vxlan_dev *) arg;
-#else
-static void vxlan_cleanup(struct timer_list *t)
-{
- struct vxlan_dev *vxlan = from_timer(vxlan, t, age_timer);
-#endif
- unsigned long next_timer = jiffies + FDB_AGE_INTERVAL;
- unsigned int h;
-
- if (!netif_running(vxlan->dev))
- return;
-
- for (h = 0; h < FDB_HASH_SIZE; ++h) {
- struct hlist_node *p, *n;
-
- spin_lock_bh(&vxlan->hash_lock);
- hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
- struct vxlan_fdb *f
- = container_of(p, struct vxlan_fdb, hlist);
- unsigned long timeout;
-
- if (f->state & NUD_PERMANENT)
- continue;
-
- timeout = f->used + vxlan->cfg.age_interval * HZ;
- if (time_before_eq(timeout, jiffies)) {
- netdev_dbg(vxlan->dev,
- "garbage collect %pM\n",
- f->eth_addr);
- f->state = NUD_STALE;
- vxlan_fdb_destroy(vxlan, f);
- } else if (time_before(timeout, next_timer))
- next_timer = timeout;
- }
- spin_unlock_bh(&vxlan->hash_lock);
- }
-
- mod_timer(&vxlan->age_timer, next_timer);
-}
-
-static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
-{
- struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
- __be32 vni = vxlan->default_dst.remote_vni;
-
- spin_lock(&vn->sock_lock);
- hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
- spin_unlock(&vn->sock_lock);
-}
-
-/* Setup stats when device is created */
-static int vxlan_init(struct net_device *dev)
-{
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- return 0;
-}
-
-static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan)
-{
-}
-
-static void vxlan_uninit(struct net_device *dev)
-{
- struct vxlan_dev *vxlan = netdev_priv(dev);
-
- vxlan_fdb_delete_default(vxlan);
-
- free_percpu(dev->tstats);
-}
-
-/* Start ageing timer and join group when device is brought up */
-static int vxlan_open(struct net_device *dev)
-{
- struct vxlan_dev *vxlan = netdev_priv(dev);
- int ret;
-
- ret = vxlan_sock_add(vxlan);
- if (ret < 0)
- return ret;
-
- if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
- ret = vxlan_igmp_join(vxlan);
- if (ret == -EADDRINUSE)
- ret = 0;
- if (ret) {
- vxlan_sock_release(vxlan);
- return ret;
- }
- }
-
- if (vxlan->cfg.age_interval)
- mod_timer(&vxlan->age_timer, jiffies + FDB_AGE_INTERVAL);
-
- return ret;
-}
-
-/* Purge the forwarding table */
-static void vxlan_flush(struct vxlan_dev *vxlan)
-{
- unsigned int h;
-
- spin_lock_bh(&vxlan->hash_lock);
- for (h = 0; h < FDB_HASH_SIZE; ++h) {
- struct hlist_node *p, *n;
- hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
- struct vxlan_fdb *f
- = container_of(p, struct vxlan_fdb, hlist);
- /* the all_zeros_mac entry is deleted at vxlan_uninit */
- if (!is_zero_ether_addr(f->eth_addr))
- vxlan_fdb_destroy(vxlan, f);
- }
- }
- spin_unlock_bh(&vxlan->hash_lock);
-}
-
-/* Cleanup timer and forwarding table on shutdown */
-static int vxlan_stop(struct net_device *dev)
-{
- struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
- int ret = 0;
-
- if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
- !vxlan_group_used(vn, vxlan))
- ret = vxlan_igmp_leave(vxlan);
-
- del_timer_sync(&vxlan->age_timer);
-
- vxlan_flush(vxlan);
- vxlan_sock_release(vxlan);
-
- return ret;
-}
-
-/* Stub, nothing needs to be done. */
-static void vxlan_set_multicast_list(struct net_device *dev)
-{
-}
-
-static int __vxlan_change_mtu(struct net_device *dev,
- struct net_device *lowerdev,
- struct vxlan_rdst *dst, int new_mtu, bool strict)
-{
- int max_mtu = IP_MAX_MTU;
-
- if (lowerdev)
- max_mtu = lowerdev->mtu;
-
- if (dst->remote_ip.sa.sa_family == AF_INET6)
- max_mtu -= VXLAN6_HEADROOM;
- else
- max_mtu -= VXLAN_HEADROOM;
-
- if (new_mtu < 68)
- return -EINVAL;
-
- if (new_mtu > max_mtu) {
- if (strict)
- return -EINVAL;
-
- new_mtu = max_mtu;
- }
-
- dev->mtu = new_mtu;
- return 0;
-}
-
-static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
-{
- struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_rdst *dst = &vxlan->default_dst;
- struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
- dst->remote_ifindex);
- return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true);
-}
-
-int ovs_vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
-{
- struct vxlan_dev *vxlan = netdev_priv(dev);
- struct ip_tunnel_info *info = skb_tunnel_info(skb);
- __be16 sport, dport;
-
- sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
- vxlan->cfg.port_max, true);
- dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
-
- if (ip_tunnel_info_af(info) == AF_INET) {
- struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
- struct rtable *rt;
-
- if (!sock4)
- return -EINVAL;
- rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
- info->key.u.ipv4.dst,
- &info->key.u.ipv4.src,
- dport, sport, NULL, info);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- ip_rt_put(rt);
- } else {
-#if IS_ENABLED(CONFIG_IPV6)
- struct dst_entry *ndst;
-
- ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
- info->key.label, &info->key.u.ipv6.dst,
- &info->key.u.ipv6.src,
- dport, sport, NULL, info);
- if (IS_ERR(ndst))
- return PTR_ERR(ndst);
- dst_release(ndst);
-#else /* !CONFIG_IPV6 */
- return -EPFNOSUPPORT;
-#endif
- }
- info->key.tp_src = sport;
- info->key.tp_dst = dport;
- return 0;
-}
-EXPORT_SYMBOL_GPL(ovs_vxlan_fill_metadata_dst);
-
-static netdev_tx_t vxlan_dev_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- /* Drop All packets coming from networking stack. OVS-CB is
- * not initialized for these packets.
- */
-
- dev_kfree_skb(skb);
- dev->stats.tx_dropped++;
- return NETDEV_TX_OK;
-}
-
-static const struct net_device_ops vxlan_netdev_ether_ops = {
- .ndo_init = vxlan_init,
- .ndo_uninit = vxlan_uninit,
- .ndo_open = vxlan_open,
- .ndo_stop = vxlan_stop,
- .ndo_start_xmit = vxlan_dev_xmit,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
- .ndo_set_rx_mode = vxlan_set_multicast_list,
-#ifdef HAVE_RHEL7_MAX_MTU
- .ndo_size = sizeof(struct net_device_ops),
- .extended.ndo_change_mtu = vxlan_change_mtu,
-#else
- .ndo_change_mtu = vxlan_change_mtu,
-#endif
- .ndo_validate_addr = eth_validate_addr,
- .ndo_set_mac_address = eth_mac_addr,
-#ifdef HAVE_NDO_FILL_METADATA_DST
- .ndo_fill_metadata_dst = ovs_vxlan_fill_metadata_dst,
-#endif
-};
-
-static const struct net_device_ops vxlan_netdev_raw_ops = {
- .ndo_init = vxlan_init,
- .ndo_uninit = vxlan_uninit,
- .ndo_open = vxlan_open,
- .ndo_stop = vxlan_stop,
- .ndo_start_xmit = vxlan_dev_xmit,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
-#ifdef HAVE_RHEL7_MAX_MTU
- .ndo_size = sizeof(struct net_device_ops),
- .extended.ndo_change_mtu = vxlan_change_mtu,
-#else
- .ndo_change_mtu = vxlan_change_mtu,
-#endif
-#ifdef HAVE_NDO_FILL_METADATA_DST
- .ndo_fill_metadata_dst = ovs_vxlan_fill_metadata_dst,
-#endif
-};
-
-/* Info for udev, that this is a virtual tunnel endpoint */
-static struct device_type vxlan_type = {
- .name = "vxlan",
-};
-
-/* Calls the ndo_add_vxlan_port or ndo_udp_tunnel_add of the caller
- * in order to supply the listening VXLAN udp ports. Callers are
- * expected to implement the ndo_add_vxlan_port.
- */
-static void vxlan_push_rx_ports(struct net_device *dev)
-{
-#ifdef HAVE_NDO_ADD_VXLAN_PORT
- struct vxlan_sock *vs;
- struct net *net = dev_net(dev);
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- sa_family_t sa_family;
- __be16 port;
- unsigned int i;
-
- if (!dev->netdev_ops->ndo_add_vxlan_port)
- return;
-
- spin_lock(&vn->sock_lock);
- for (i = 0; i < PORT_HASH_SIZE; ++i) {
- hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
- port = inet_sk(vs->sock->sk)->inet_sport;
- sa_family = vxlan_get_sk_family(vs);
- dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
- port);
- }
- }
- spin_unlock(&vn->sock_lock);
-#elif defined(HAVE_NDO_UDP_TUNNEL_ADD)
- struct vxlan_sock *vs;
- struct net *net = dev_net(dev);
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- unsigned int i;
-
- if (!dev->netdev_ops->ndo_udp_tunnel_add)
- return;
-
- spin_lock(&vn->sock_lock);
- for (i = 0; i < PORT_HASH_SIZE; ++i) {
- hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
- struct udp_tunnel_info ti;
- if (vs->flags & VXLAN_F_GPE)
- ti.type = UDP_TUNNEL_TYPE_VXLAN_GPE;
- else
- ti.type = UDP_TUNNEL_TYPE_VXLAN;
- ti.port = inet_sk(vs->sock->sk)->inet_sport;
- ti.sa_family = vxlan_get_sk_family(vs);
-
- dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
- }
- }
- spin_unlock(&vn->sock_lock);
-#endif
-}
-
-/* Initialize the device structure. */
-static void vxlan_setup(struct net_device *dev)
-{
- struct vxlan_dev *vxlan = netdev_priv(dev);
- unsigned int h;
-
- eth_hw_addr_random(dev);
- ether_setup(dev);
-
-#ifndef HAVE_NEEDS_FREE_NETDEV
- dev->destructor = free_netdev;
-#else
- dev->needs_free_netdev = true;
-#endif
- SET_NETDEV_DEVTYPE(dev, &vxlan_type);
-
- dev->features |= NETIF_F_LLTX;
- dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
- dev->features |= NETIF_F_RXCSUM;
- dev->features |= NETIF_F_GSO_SOFTWARE;
-
- dev->vlan_features = dev->features;
- dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
- dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
-#if 0
- netif_keep_dst(dev);
-#endif
- dev->priv_flags |= IFF_NO_QUEUE;
-
- INIT_LIST_HEAD(&vxlan->next);
- spin_lock_init(&vxlan->hash_lock);
-
-#ifdef HAVE_INIT_TIMER_DEFERRABLE
- init_timer_deferrable(&vxlan->age_timer);
- vxlan->age_timer.function = vxlan_cleanup;
- vxlan->age_timer.data = (unsigned long) vxlan;
-#else
- timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
-#endif
-
- vxlan->cfg.dst_port = htons(vxlan_port);
-
- vxlan->dev = dev;
-
- for (h = 0; h < FDB_HASH_SIZE; ++h)
- INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
-}
-
-static void vxlan_ether_setup(struct net_device *dev)
-{
- dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- dev->netdev_ops = &vxlan_netdev_ether_ops;
-}
-
-static void vxlan_raw_setup(struct net_device *dev)
-{
- dev->header_ops = NULL;
- dev->type = ARPHRD_NONE;
- dev->hard_header_len = 0;
- dev->addr_len = 0;
- dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
- dev->netdev_ops = &vxlan_netdev_raw_ops;
-}
-
-static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
- [IFLA_VXLAN_ID] = { .type = NLA_U32 },
- [IFLA_VXLAN_GROUP] = { .len = sizeof_field(struct iphdr, daddr) },
- [IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) },
- [IFLA_VXLAN_LINK] = { .type = NLA_U32 },
- [IFLA_VXLAN_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
- [IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) },
- [IFLA_VXLAN_TOS] = { .type = NLA_U8 },
- [IFLA_VXLAN_TTL] = { .type = NLA_U8 },
- [IFLA_VXLAN_LABEL] = { .type = NLA_U32 },
- [IFLA_VXLAN_LEARNING] = { .type = NLA_U8 },
- [IFLA_VXLAN_AGEING] = { .type = NLA_U32 },
- [IFLA_VXLAN_LIMIT] = { .type = NLA_U32 },
- [IFLA_VXLAN_PORT_RANGE] = { .len = sizeof(struct ifla_vxlan_port_range) },
- [IFLA_VXLAN_PROXY] = { .type = NLA_U8 },
- [IFLA_VXLAN_RSC] = { .type = NLA_U8 },
- [IFLA_VXLAN_L2MISS] = { .type = NLA_U8 },
- [IFLA_VXLAN_L3MISS] = { .type = NLA_U8 },
- [IFLA_VXLAN_COLLECT_METADATA] = { .type = NLA_U8 },
- [IFLA_VXLAN_PORT] = { .type = NLA_U16 },
- [IFLA_VXLAN_UDP_CSUM] = { .type = NLA_U8 },
- [IFLA_VXLAN_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
- [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
- [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
- [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
- [IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
- [IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
- [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
-};
-
-#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
-static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- if (tb[IFLA_ADDRESS]) {
- if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
- pr_debug("invalid link address (not ethernet)\n");
- return -EINVAL;
- }
-
- if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
- pr_debug("invalid all zero ethernet address\n");
- return -EADDRNOTAVAIL;
- }
- }
-
- if (!data)
- return -EINVAL;
-
- if (data[IFLA_VXLAN_ID]) {
- __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
- if (id >= VXLAN_VID_MASK)
- return -ERANGE;
- }
-
- if (data[IFLA_VXLAN_PORT_RANGE]) {
- const struct ifla_vxlan_port_range *p
- = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
-
- if (ntohs(p->high) < ntohs(p->low)) {
- pr_debug("port range %u .. %u not valid\n",
- ntohs(p->low), ntohs(p->high));
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-static void vxlan_get_drvinfo(struct net_device *netdev,
- struct ethtool_drvinfo *drvinfo)
-{
- strlcpy(drvinfo->version, VXLAN_VERSION, sizeof(drvinfo->version));
- strlcpy(drvinfo->driver, "vxlan", sizeof(drvinfo->driver));
-}
-
-static const struct ethtool_ops vxlan_ethtool_ops = {
- .get_drvinfo = vxlan_get_drvinfo,
- .get_link = ethtool_op_get_link,
-};
-
-static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
- __be16 port, u32 flags)
-{
- struct socket *sock;
- struct udp_port_cfg udp_conf;
- int err;
-
- memset(&udp_conf, 0, sizeof(udp_conf));
-
- if (ipv6) {
- udp_conf.family = AF_INET6;
- udp_conf.use_udp6_rx_checksums =
- !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
- udp_conf.ipv6_v6only = 1;
- } else {
- udp_conf.family = AF_INET;
- }
-
- udp_conf.local_udp_port = port;
-
- /* Open UDP socket */
- err = udp_sock_create(net, &udp_conf, &sock);
- if (err < 0)
- return ERR_PTR(err);
-
- return sock;
-}
-
-/* Create new listen socket if needed */
-static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
- __be16 port, u32 flags)
-{
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- struct vxlan_sock *vs;
- struct socket *sock;
- unsigned int h;
- struct udp_tunnel_sock_cfg tunnel_cfg;
-
- vs = kzalloc(sizeof(*vs), GFP_KERNEL);
- if (!vs)
- return ERR_PTR(-ENOMEM);
-
- for (h = 0; h < VNI_HASH_SIZE; ++h)
- INIT_HLIST_HEAD(&vs->vni_list[h]);
-
- sock = vxlan_create_sock(net, ipv6, port, flags);
- if (IS_ERR(sock)) {
- kfree(vs);
- return ERR_CAST(sock);
- }
-
- vs->sock = sock;
- atomic_set(&vs->refcnt, 1);
- vs->flags = (flags & VXLAN_F_RCV_FLAGS);
-
-#ifdef HAVE_UDP_OFFLOAD
- vs->udp_offloads.port = port;
- vs->udp_offloads.callbacks.gro_receive = vxlan_gro_receive;
- vs->udp_offloads.callbacks.gro_complete = vxlan_gro_complete;
-#endif
-
- spin_lock(&vn->sock_lock);
- hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
- vxlan_notify_add_rx_port(vs);
- spin_unlock(&vn->sock_lock);
-
- /* Mark socket as an encapsulation socket. */
- memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
- tunnel_cfg.sk_user_data = vs;
- tunnel_cfg.encap_type = 1;
- tunnel_cfg.encap_rcv = vxlan_rcv;
- tunnel_cfg.encap_destroy = NULL;
-#ifdef HAVE_UDP_TUNNEL_SOCK_CFG_GRO_RECEIVE
- tunnel_cfg.gro_receive = vxlan_gro_receive;
- tunnel_cfg.gro_complete = vxlan_gro_complete;
-#endif
- setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
-
- return vs;
-}
-
-static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
-{
- struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
- struct vxlan_sock *vs = NULL;
-
- if (!vxlan->cfg.no_share) {
- spin_lock(&vn->sock_lock);
- vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
- vxlan->cfg.dst_port, vxlan->flags);
- if (vs && !atomic_add_unless(&vs->refcnt, 1, 0)) {
- spin_unlock(&vn->sock_lock);
- return -EBUSY;
- }
- spin_unlock(&vn->sock_lock);
- }
- if (!vs)
- vs = vxlan_socket_create(vxlan->net, ipv6,
- vxlan->cfg.dst_port, vxlan->flags);
- if (IS_ERR(vs))
- return PTR_ERR(vs);
-#if IS_ENABLED(CONFIG_IPV6)
- if (ipv6)
- rcu_assign_pointer(vxlan->vn6_sock, vs);
- else
-#endif
- rcu_assign_pointer(vxlan->vn4_sock, vs);
- vxlan_vs_add_dev(vs, vxlan);
- return 0;
-}
-
-static int vxlan_sock_add(struct vxlan_dev *vxlan)
-{
- bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA;
- bool ipv6 = vxlan->flags & VXLAN_F_IPV6 || metadata;
- bool ipv4 = !ipv6 || metadata;
- int ret = 0;
-
- RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
-#if IS_ENABLED(CONFIG_IPV6)
- RCU_INIT_POINTER(vxlan->vn6_sock, NULL);
- if (ipv6) {
- ret = __vxlan_sock_add(vxlan, true);
- if (ret < 0 && ret != -EAFNOSUPPORT)
- ipv4 = false;
- }
-#endif
- if (ipv4)
- ret = __vxlan_sock_add(vxlan, false);
- if (ret < 0)
- vxlan_sock_release(vxlan);
- return ret;
-}
-
-static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
- struct vxlan_config *conf)
-{
- struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
- struct vxlan_dev *vxlan = netdev_priv(dev), *tmp;
- struct vxlan_rdst *dst = &vxlan->default_dst;
- unsigned short needed_headroom = ETH_HLEN;
- int err;
- bool use_ipv6 = false;
- __be16 default_port = vxlan->cfg.dst_port;
- struct net_device *lowerdev = NULL;
-
- if (conf->flags & VXLAN_F_GPE) {
- if (conf->flags & ~VXLAN_F_ALLOWED_GPE)
- return -EINVAL;
- /* For now, allow GPE only together with COLLECT_METADATA.
- * This can be relaxed later; in such case, the other side
- * of the PtP link will have to be provided.
- */
- if (!(conf->flags & VXLAN_F_COLLECT_METADATA))
- return -EINVAL;
-
- vxlan_raw_setup(dev);
- } else {
- vxlan_ether_setup(dev);
- }
-
- vxlan->net = src_net;
-
- dst->remote_vni = conf->vni;
-
- memcpy(&dst->remote_ip, &conf->remote_ip, sizeof(conf->remote_ip));
-
- /* Unless IPv6 is explicitly requested, assume IPv4 */
- if (!dst->remote_ip.sa.sa_family)
- dst->remote_ip.sa.sa_family = AF_INET;
-
- if (dst->remote_ip.sa.sa_family == AF_INET6 ||
- vxlan->cfg.saddr.sa.sa_family == AF_INET6) {
- if (!IS_ENABLED(CONFIG_IPV6))
- return -EPFNOSUPPORT;
- use_ipv6 = true;
- vxlan->flags |= VXLAN_F_IPV6;
- }
-
- if (conf->label && !use_ipv6) {
- pr_info("label only supported in use with IPv6\n");
- return -EINVAL;
- }
-
- if (conf->remote_ifindex) {
- lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
- dst->remote_ifindex = conf->remote_ifindex;
-
- if (!lowerdev) {
- pr_info("ifindex %d does not exist\n", dst->remote_ifindex);
- return -ENODEV;
- }
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (use_ipv6) {
- struct inet6_dev *idev = __in6_dev_get(lowerdev);
- if (idev && idev->cnf.disable_ipv6) {
- pr_info("IPv6 is disabled via sysctl\n");
- return -EPERM;
- }
- }
-#endif
-
- if (!conf->mtu)
- dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
-
- needed_headroom = lowerdev->hard_header_len;
- }
-
- if (conf->mtu) {
- err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false);
- if (err)
- return err;
- }
-
- if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
- needed_headroom += VXLAN6_HEADROOM;
- else
- needed_headroom += VXLAN_HEADROOM;
- dev->needed_headroom = needed_headroom;
-
- memcpy(&vxlan->cfg, conf, sizeof(*conf));
- if (!vxlan->cfg.dst_port) {
- if (conf->flags & VXLAN_F_GPE)
- vxlan->cfg.dst_port = 4790; /* IANA assigned VXLAN-GPE port */
- else
- vxlan->cfg.dst_port = default_port;
- }
- vxlan->flags |= conf->flags;
-
- if (!vxlan->cfg.age_interval)
- vxlan->cfg.age_interval = FDB_AGE_DEFAULT;
-
- list_for_each_entry(tmp, &vn->vxlan_list, next) {
- if (tmp->cfg.vni == conf->vni &&
- (tmp->default_dst.remote_ip.sa.sa_family == AF_INET6 ||
- tmp->cfg.saddr.sa.sa_family == AF_INET6) == use_ipv6 &&
- tmp->cfg.dst_port == vxlan->cfg.dst_port &&
- (tmp->flags & VXLAN_F_RCV_FLAGS) ==
- (vxlan->flags & VXLAN_F_RCV_FLAGS))
- return -EEXIST;
- }
-
- dev->ethtool_ops = &vxlan_ethtool_ops;
-
- /* create an fdb entry for a valid default destination */
- if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
- err = vxlan_fdb_create(vxlan, all_zeros_mac,
- &vxlan->default_dst.remote_ip,
- NUD_REACHABLE|NUD_PERMANENT,
- NLM_F_EXCL|NLM_F_CREATE,
- vxlan->cfg.dst_port,
- vxlan->default_dst.remote_vni,
- vxlan->default_dst.remote_ifindex,
- NTF_SELF);
- if (err)
- return err;
- }
-
- err = register_netdevice(dev);
- if (err) {
- vxlan_fdb_delete_default(vxlan);
- return err;
- }
-
- list_add(&vxlan->next, &vn->vxlan_list);
-
- return 0;
-}
-
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
-static int vxlan_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
- struct netlink_ext_ack *extack)
-#else
-static int vxlan_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
-#endif
-{
- pr_info("unsupported operation\n");
- return -EINVAL;
-}
-
-static void vxlan_dellink(struct net_device *dev, struct list_head *head)
-{
- struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
-
- spin_lock(&vn->sock_lock);
- if (!hlist_unhashed(&vxlan->hlist))
- hlist_del_rcu(&vxlan->hlist);
- spin_unlock(&vn->sock_lock);
-
- list_del(&vxlan->next);
- unregister_netdevice_queue(dev, head);
-}
-
-static size_t vxlan_get_size(const struct net_device *dev)
-{
-
- return nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_ID */
- nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_GROUP{6} */
- nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */
- nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */
- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */
- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
- nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */
- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L2MISS */
- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L3MISS */
- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_COLLECT_METADATA */
- nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */
- nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */
- nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
- nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */
- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */
- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
- nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
- 0;
-}
-
-static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
-{
- const struct vxlan_dev *vxlan = netdev_priv(dev);
- const struct vxlan_rdst *dst = &vxlan->default_dst;
- struct ifla_vxlan_port_range ports = {
- .low = htons(vxlan->cfg.port_min),
- .high = htons(vxlan->cfg.port_max),
- };
-
- if (nla_put_u32(skb, IFLA_VXLAN_ID, be32_to_cpu(dst->remote_vni)))
- goto nla_put_failure;
-
- if (!vxlan_addr_any(&dst->remote_ip)) {
- if (dst->remote_ip.sa.sa_family == AF_INET) {
- if (nla_put_in_addr(skb, IFLA_VXLAN_GROUP,
- dst->remote_ip.sin.sin_addr.s_addr))
- goto nla_put_failure;
-#if IS_ENABLED(CONFIG_IPV6)
- } else {
- if (nla_put_in6_addr(skb, IFLA_VXLAN_GROUP6,
- &dst->remote_ip.sin6.sin6_addr))
- goto nla_put_failure;
-#endif
- }
- }
-
- if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, dst->remote_ifindex))
- goto nla_put_failure;
-
- if (!vxlan_addr_any(&vxlan->cfg.saddr)) {
- if (vxlan->cfg.saddr.sa.sa_family == AF_INET) {
- if (nla_put_in_addr(skb, IFLA_VXLAN_LOCAL,
- vxlan->cfg.saddr.sin.sin_addr.s_addr))
- goto nla_put_failure;
-#if IS_ENABLED(CONFIG_IPV6)
- } else {
- if (nla_put_in6_addr(skb, IFLA_VXLAN_LOCAL6,
- &vxlan->cfg.saddr.sin6.sin6_addr))
- goto nla_put_failure;
-#endif
- }
- }
-
- if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
- nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
- nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
- nla_put_u8(skb, IFLA_VXLAN_LEARNING,
- !!(vxlan->flags & VXLAN_F_LEARN)) ||
- nla_put_u8(skb, IFLA_VXLAN_PROXY,
- !!(vxlan->flags & VXLAN_F_PROXY)) ||
- nla_put_u8(skb, IFLA_VXLAN_RSC, !!(vxlan->flags & VXLAN_F_RSC)) ||
- nla_put_u8(skb, IFLA_VXLAN_L2MISS,
- !!(vxlan->flags & VXLAN_F_L2MISS)) ||
- nla_put_u8(skb, IFLA_VXLAN_L3MISS,
- !!(vxlan->flags & VXLAN_F_L3MISS)) ||
- nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA,
- !!(vxlan->flags & VXLAN_F_COLLECT_METADATA)) ||
- nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) ||
- nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
- nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
- nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
- !(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
- nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
- !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
- nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
- !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
- nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
- !!(vxlan->flags & VXLAN_F_REMCSUM_TX)) ||
- nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
- !!(vxlan->flags & VXLAN_F_REMCSUM_RX)))
- goto nla_put_failure;
-
- if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
- goto nla_put_failure;
-
- if (vxlan->flags & VXLAN_F_GBP &&
- nla_put_flag(skb, IFLA_VXLAN_GBP))
- goto nla_put_failure;
-
- if (vxlan->flags & VXLAN_F_GPE &&
- nla_put_flag(skb, IFLA_VXLAN_GPE))
- goto nla_put_failure;
-
- if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL &&
- nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-#ifdef HAVE_GET_LINK_NET
-static struct net *vxlan_get_link_net(const struct net_device *dev)
-{
- struct vxlan_dev *vxlan = netdev_priv(dev);
-
- return vxlan->net;
-}
-#endif
-
-static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
- .kind = "ovs_vxlan",
- .maxtype = IFLA_VXLAN_MAX,
- .policy = vxlan_policy,
- .priv_size = sizeof(struct vxlan_dev),
- .setup = vxlan_setup,
- .validate = vxlan_validate,
- .newlink = vxlan_newlink,
- .dellink = vxlan_dellink,
- .get_size = vxlan_get_size,
- .fill_info = vxlan_fill_info,
-#ifdef HAVE_GET_LINK_NET
- .get_link_net = vxlan_get_link_net,
-#endif
-};
-
-struct net_device *rpl_vxlan_dev_create(struct net *net, const char *name,
- u8 name_assign_type,
- struct vxlan_config *conf)
-{
- struct nlattr *tb[IFLA_MAX + 1];
- struct net_device *dev;
- int err;
-
- memset(&tb, 0, sizeof(tb));
-
- dev = rtnl_create_link(net, name, name_assign_type,
- &vxlan_link_ops, tb);
- if (IS_ERR(dev))
- return dev;
-
- err = vxlan_dev_configure(net, dev, conf);
- if (err < 0) {
- free_netdev(dev);
- return ERR_PTR(err);
- }
-
- err = rtnl_configure_link(dev, NULL);
- if (err < 0) {
- LIST_HEAD(list_kill);
-
- vxlan_dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
- return ERR_PTR(err);
- }
-
- return dev;
-}
-EXPORT_SYMBOL_GPL(rpl_vxlan_dev_create);
-
-static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
- struct net_device *dev)
-{
- struct vxlan_dev *vxlan, *next;
- LIST_HEAD(list_kill);
-
- list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
- struct vxlan_rdst *dst = &vxlan->default_dst;
-
- /* In case we created vxlan device with carrier
- * and we loose the carrier due to module unload
- * we also need to remove vxlan device. In other
- * cases, it's not necessary and remote_ifindex
- * is 0 here, so no matches.
- */
- if (dst->remote_ifindex == dev->ifindex)
- vxlan_dellink(vxlan->dev, &list_kill);
- }
-
- unregister_netdevice_many(&list_kill);
-}
-
-static int vxlan_netdevice_event(struct notifier_block *unused,
- unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
-
- if (event == NETDEV_UNREGISTER)
- vxlan_handle_lowerdev_unregister(vn, dev);
- else if (event == NETDEV_OFFLOAD_PUSH_VXLAN)
- vxlan_push_rx_ports(dev);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block vxlan_notifier_block __read_mostly = {
- .notifier_call = vxlan_netdevice_event,
-};
-
-static __net_init int vxlan_init_net(struct net *net)
-{
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- unsigned int h;
-
- INIT_LIST_HEAD(&vn->vxlan_list);
- spin_lock_init(&vn->sock_lock);
-
- for (h = 0; h < PORT_HASH_SIZE; ++h)
- INIT_HLIST_HEAD(&vn->sock_list[h]);
-
- return 0;
-}
-
-static void __net_exit vxlan_exit_net(struct net *net)
-{
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- struct vxlan_dev *vxlan, *next;
- struct net_device *dev, *aux;
- LIST_HEAD(list);
-
- rtnl_lock();
- for_each_netdev_safe(net, dev, aux)
- if (dev->rtnl_link_ops == &vxlan_link_ops)
- unregister_netdevice_queue(dev, &list);
-
- list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
- /* If vxlan->dev is in the same netns, it has already been added
- * to the list by the previous loop.
- */
- if (!net_eq(dev_net(vxlan->dev), net)) {
- unregister_netdevice_queue(vxlan->dev, &list);
- }
- }
-
- unregister_netdevice_many(&list);
- rtnl_unlock();
-}
-
-static struct pernet_operations vxlan_net_ops = {
- .init = vxlan_init_net,
- .exit = vxlan_exit_net,
- .id = &vxlan_net_id,
- .size = sizeof(struct vxlan_net),
-};
-
-int rpl_vxlan_init_module(void)
-{
- int rc;
-
- get_random_bytes(&vxlan_salt, sizeof(vxlan_salt));
-
- rc = register_pernet_subsys(&vxlan_net_ops);
- if (rc)
- goto out1;
-
- rc = register_netdevice_notifier(&vxlan_notifier_block);
- if (rc)
- goto out2;
-
- rc = rtnl_link_register(&vxlan_link_ops);
- if (rc)
- goto out3;
-
- pr_info("VxLAN tunneling driver\n");
- return 0;
-out3:
- unregister_netdevice_notifier(&vxlan_notifier_block);
-out2:
- unregister_pernet_subsys(&vxlan_net_ops);
-out1:
- pr_err("Error while initializing VxLAN %d\n", rc);
- return rc;
-}
-
-void rpl_vxlan_cleanup_module(void)
-{
- rtnl_link_unregister(&vxlan_link_ops);
- unregister_netdevice_notifier(&vxlan_notifier_block);
- unregister_pernet_subsys(&vxlan_net_ops);
- /* rcu_barrier() is called by netns */
-}
-#endif
diff --git a/datapath/meter.c b/datapath/meter.c
deleted file mode 100644
index 92c9c3671..000000000
--- a/datapath/meter.c
+++ /dev/null
@@ -1,639 +0,0 @@
-/*
- * Copyright (c) 2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/if.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/kernel.h>
-#include <linux/openvswitch.h>
-#include <linux/overflow.h>
-#include <linux/netlink.h>
-#include <linux/rculist.h>
-
-#include <net/netlink.h>
-#include <net/genetlink.h>
-#include <linux/mm.h>
-
-#include "datapath.h"
-#include "meter.h"
-
-#define METER_HASH_BUCKETS 1024
-
-static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
- [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
- [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
- [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
- [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
- [OVS_METER_ATTR_USED] = { .type = NLA_U64 },
- [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
- [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
- [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
-};
-
-static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
- [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
- [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
- [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
- [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
-};
-
-static void ovs_meter_free(struct dp_meter *meter)
-{
- if (!meter)
- return;
-
- kfree_rcu(meter, rcu);
-}
-
-static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
- u32 meter_id)
-{
- return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
-}
-
-/* Call with ovs_mutex or RCU read lock. */
-static struct dp_meter *lookup_meter(const struct datapath *dp,
- u32 meter_id)
-{
- struct dp_meter *meter;
- struct hlist_head *head;
-
- head = meter_hash_bucket(dp, meter_id);
- hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
- if (meter->id == meter_id)
- return meter;
- }
- return NULL;
-}
-
-static void attach_meter(struct datapath *dp, struct dp_meter *meter)
-{
- struct hlist_head *head = meter_hash_bucket(dp, meter->id);
-
- hlist_add_head_rcu(&meter->dp_hash_node, head);
-}
-
-static void detach_meter(struct dp_meter *meter)
-{
- ASSERT_OVSL();
- if (meter)
- hlist_del_rcu(&meter->dp_hash_node);
-}
-
-static struct sk_buff *
-ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
- struct ovs_header **ovs_reply_header)
-{
- struct sk_buff *skb;
- struct ovs_header *ovs_header = info->userhdr;
-
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
- if (!skb)
- return ERR_PTR(-ENOMEM);
-
- *ovs_reply_header = genlmsg_put(skb, info->snd_portid,
- info->snd_seq,
- &dp_meter_genl_family, 0, cmd);
- if (!*ovs_reply_header) {
- nlmsg_free(skb);
- return ERR_PTR(-EMSGSIZE);
- }
- (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
-
- return skb;
-}
-
-static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
- struct dp_meter *meter)
-{
- struct nlattr *nla;
- struct dp_meter_band *band;
- u16 i;
-
- if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
- goto error;
-
- if (!meter)
- return 0;
-
- if (nla_put(reply, OVS_METER_ATTR_STATS,
- sizeof(struct ovs_flow_stats), &meter->stats) ||
- nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
- OVS_METER_ATTR_PAD))
- goto error;
-
- nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
- if (!nla)
- goto error;
-
- band = meter->bands;
-
- for (i = 0; i < meter->n_bands; ++i, ++band) {
- struct nlattr *band_nla;
-
- band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
- if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
- sizeof(struct ovs_flow_stats),
- &band->stats))
- goto error;
- nla_nest_end(reply, band_nla);
- }
- nla_nest_end(reply, nla);
-
- return 0;
-error:
- return -EMSGSIZE;
-}
-
-static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
-{
- struct sk_buff *reply;
- struct ovs_header *ovs_reply_header;
- struct nlattr *nla, *band_nla;
- int err;
-
- reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
- &ovs_reply_header);
- if (IS_ERR(reply))
- return PTR_ERR(reply);
-
- if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
- nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
- goto nla_put_failure;
-
- nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
- if (!nla)
- goto nla_put_failure;
-
- band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC);
- if (!band_nla)
- goto nla_put_failure;
- /* Currently only DROP band type is supported. */
- if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
- goto nla_put_failure;
- nla_nest_end(reply, band_nla);
- nla_nest_end(reply, nla);
-
- genlmsg_end(reply, ovs_reply_header);
- return genlmsg_reply(reply, info);
-
-nla_put_failure:
- nlmsg_free(reply);
- err = -EMSGSIZE;
- return err;
-}
-
-#ifndef HAVE_KTIME_GET_NS
-#ifndef ktime_to_ns
-#define ktime_to_ns(kt) ((kt).tv64)
-#endif
-static inline u64 ktime_get_ns(void)
-{
- return ktime_to_ns(ktime_get());
-}
-#endif
-
-static struct dp_meter *dp_meter_create(struct nlattr **a)
-{
- struct nlattr *nla;
- int rem;
- u16 n_bands = 0;
- struct dp_meter *meter;
- struct dp_meter_band *band;
- int err;
-
- /* Validate attributes, count the bands. */
- if (!a[OVS_METER_ATTR_BANDS])
- return ERR_PTR(-EINVAL);
-
- nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
- if (++n_bands > DP_MAX_BANDS)
- return ERR_PTR(-EINVAL);
-
- /* Allocate and set up the meter before locking anything. */
- meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL);
- if (!meter)
- return ERR_PTR(-ENOMEM);
-
- meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]);
- meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
- meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
- meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
- spin_lock_init(&meter->lock);
- if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
- meter->stats = *(struct ovs_flow_stats *)
- nla_data(a[OVS_METER_ATTR_STATS]);
- }
- meter->n_bands = n_bands;
-
- /* Set up meter bands. */
- band = meter->bands;
- nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
- struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
- u32 band_max_delta_t;
-
- err = nla_parse_deprecated_strict((struct nlattr **)&attr,
- OVS_BAND_ATTR_MAX,
- nla_data(nla),
- nla_len(nla),
- band_policy, NULL);
- if (err)
- goto exit_free_meter;
-
- if (!attr[OVS_BAND_ATTR_TYPE] ||
- !attr[OVS_BAND_ATTR_RATE] ||
- !attr[OVS_BAND_ATTR_BURST]) {
- err = -EINVAL;
- goto exit_free_meter;
- }
-
- band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
- band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
- if (band->rate == 0) {
- err = -EINVAL;
- goto exit_free_meter;
- }
-
- band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
- /* Figure out max delta_t that is enough to fill any bucket.
- * Keep max_delta_t size to the bucket units:
- * pkts => 1/1000 packets, kilobits => bits.
- *
- * Start with a full bucket.
- */
- band->bucket = (band->burst_size + band->rate) * 1000;
- band_max_delta_t = band->bucket / band->rate;
- if (band_max_delta_t > meter->max_delta_t)
- meter->max_delta_t = band_max_delta_t;
- band++;
- }
-
- return meter;
-
-exit_free_meter:
- kfree(meter);
- return ERR_PTR(err);
-}
-
-static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
-{
- struct nlattr **a = info->attrs;
- struct dp_meter *meter, *old_meter;
- struct sk_buff *reply;
- struct ovs_header *ovs_reply_header;
- struct ovs_header *ovs_header = info->userhdr;
- struct datapath *dp;
- int err;
- u32 meter_id;
- bool failed;
-
- if (!a[OVS_METER_ATTR_ID]) {
- return -ENODEV;
- }
-
- meter = dp_meter_create(a);
- if (IS_ERR_OR_NULL(meter))
- return PTR_ERR(meter);
-
- reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
- &ovs_reply_header);
- if (IS_ERR(reply)) {
- err = PTR_ERR(reply);
- goto exit_free_meter;
- }
-
- ovs_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp) {
- err = -ENODEV;
- goto exit_unlock;
- }
-
- meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
-
- /* Cannot fail after this. */
- old_meter = lookup_meter(dp, meter_id);
- detach_meter(old_meter);
- attach_meter(dp, meter);
- ovs_unlock();
-
- /* Build response with the meter_id and stats from
- * the old meter, if any.
- */
- failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
- WARN_ON(failed);
- if (old_meter) {
- spin_lock_bh(&old_meter->lock);
- if (old_meter->keep_stats) {
- err = ovs_meter_cmd_reply_stats(reply, meter_id,
- old_meter);
- WARN_ON(err);
- }
- spin_unlock_bh(&old_meter->lock);
- ovs_meter_free(old_meter);
- }
-
- genlmsg_end(reply, ovs_reply_header);
- return genlmsg_reply(reply, info);
-
-exit_unlock:
- ovs_unlock();
- nlmsg_free(reply);
-exit_free_meter:
- kfree(meter);
- return err;
-}
-
-static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
-{
- struct nlattr **a = info->attrs;
- u32 meter_id;
- struct ovs_header *ovs_header = info->userhdr;
- struct ovs_header *ovs_reply_header;
- struct datapath *dp;
- int err;
- struct sk_buff *reply;
- struct dp_meter *meter;
-
- if (!a[OVS_METER_ATTR_ID])
- return -EINVAL;
-
- meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
-
- reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
- &ovs_reply_header);
- if (IS_ERR(reply))
- return PTR_ERR(reply);
-
- ovs_lock();
-
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp) {
- err = -ENODEV;
- goto exit_unlock;
- }
-
- /* Locate meter, copy stats. */
- meter = lookup_meter(dp, meter_id);
- if (!meter) {
- err = -ENOENT;
- goto exit_unlock;
- }
-
- spin_lock_bh(&meter->lock);
- err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
- spin_unlock_bh(&meter->lock);
- if (err)
- goto exit_unlock;
-
- ovs_unlock();
-
- genlmsg_end(reply, ovs_reply_header);
- return genlmsg_reply(reply, info);
-
-exit_unlock:
- ovs_unlock();
- nlmsg_free(reply);
- return err;
-}
-
-static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
-{
- struct nlattr **a = info->attrs;
- u32 meter_id;
- struct ovs_header *ovs_header = info->userhdr;
- struct ovs_header *ovs_reply_header;
- struct datapath *dp;
- int err;
- struct sk_buff *reply;
- struct dp_meter *old_meter;
-
- if (!a[OVS_METER_ATTR_ID])
- return -EINVAL;
- meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
-
- reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
- &ovs_reply_header);
- if (IS_ERR(reply))
- return PTR_ERR(reply);
-
- ovs_lock();
-
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp) {
- err = -ENODEV;
- goto exit_unlock;
- }
-
- old_meter = lookup_meter(dp, meter_id);
- if (old_meter) {
- spin_lock_bh(&old_meter->lock);
- err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
- WARN_ON(err);
- spin_unlock_bh(&old_meter->lock);
- detach_meter(old_meter);
- }
- ovs_unlock();
- ovs_meter_free(old_meter);
- genlmsg_end(reply, ovs_reply_header);
- return genlmsg_reply(reply, info);
-
-exit_unlock:
- ovs_unlock();
- nlmsg_free(reply);
- return err;
-}
-
-/* Meter action execution.
- *
- * Return true 'meter_id' drop band is triggered. The 'skb' should be
- * dropped by the caller'.
- */
-bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key, u32 meter_id)
-{
- struct dp_meter *meter;
- struct dp_meter_band *band;
- long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
- long long int long_delta_ms;
- u32 delta_ms;
- u32 cost;
- int i, band_exceeded_max = -1;
- u32 band_exceeded_rate = 0;
-
- meter = lookup_meter(dp, meter_id);
- /* Do not drop the packet when there is no meter. */
- if (!meter)
- return false;
-
- /* Lock the meter while using it. */
- spin_lock(&meter->lock);
-
- long_delta_ms = (now_ms - meter->used); /* ms */
-
- /* Make sure delta_ms will not be too large, so that bucket will not
- * wrap around below.
- */
- delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
- ? meter->max_delta_t : (u32)long_delta_ms;
-
- /* Update meter statistics.
- */
- meter->used = now_ms;
- meter->stats.n_packets += 1;
- meter->stats.n_bytes += skb->len;
-
- /* Bucket rate is either in kilobits per second, or in packets per
- * second. We maintain the bucket in the units of either bits or
- * 1/1000th of a packet, correspondingly.
- * Then, when rate is multiplied with milliseconds, we get the
- * bucket units:
- * msec * kbps = bits, and
- * msec * packets/sec = 1/1000 packets.
- *
- * 'cost' is the number of bucket units in this packet.
- */
- cost = (meter->kbps) ? skb->len * 8 : 1000;
-
- /* Update all bands and find the one hit with the highest rate. */
- for (i = 0; i < meter->n_bands; ++i) {
- long long int max_bucket_size;
-
- band = &meter->bands[i];
- max_bucket_size = (band->burst_size + band->rate) * 1000LL;
-
- band->bucket += delta_ms * band->rate;
- if (band->bucket > max_bucket_size)
- band->bucket = max_bucket_size;
-
- if (band->bucket >= cost) {
- band->bucket -= cost;
- } else if (band->rate > band_exceeded_rate) {
- band_exceeded_rate = band->rate;
- band_exceeded_max = i;
- }
- }
-
- if (band_exceeded_max >= 0) {
- /* Update band statistics. */
- band = &meter->bands[band_exceeded_max];
- band->stats.n_packets += 1;
- band->stats.n_bytes += skb->len;
-
- /* Drop band triggered, let the caller drop the 'skb'. */
- if (band->type == OVS_METER_BAND_TYPE_DROP) {
- spin_unlock(&meter->lock);
- return true;
- }
- }
-
- spin_unlock(&meter->lock);
- return false;
-}
-
-static struct genl_ops dp_meter_genl_ops[] = {
- { .cmd = OVS_METER_CMD_FEATURES,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = 0, /* OK for unprivileged users. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = meter_policy,
-#endif
- .doit = ovs_meter_cmd_features
- },
- { .cmd = OVS_METER_CMD_SET,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
- * privilege.
- */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = meter_policy,
-#endif
- .doit = ovs_meter_cmd_set,
- },
- { .cmd = OVS_METER_CMD_GET,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = 0, /* OK for unprivileged users. */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = meter_policy,
-#endif
- .doit = ovs_meter_cmd_get,
- },
- { .cmd = OVS_METER_CMD_DEL,
-#ifdef HAVE_GENL_VALIDATE_FLAGS
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-#endif
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
- * privilege.
- */
-#ifdef HAVE_GENL_OPS_POLICY
- .policy = meter_policy,
-#endif
- .doit = ovs_meter_cmd_del
- },
-};
-
-static const struct genl_multicast_group ovs_meter_multicast_group = {
- .name = OVS_METER_MCGROUP,
-};
-
-struct genl_family dp_meter_genl_family __ro_after_init = {
- .hdrsize = sizeof(struct ovs_header),
- .name = OVS_METER_FAMILY,
- .version = OVS_METER_VERSION,
- .maxattr = OVS_METER_ATTR_MAX,
-#ifndef HAVE_GENL_OPS_POLICY
- .policy = meter_policy,
-#endif
- .netnsok = true,
- .parallel_ops = true,
- .ops = dp_meter_genl_ops,
- .n_ops = ARRAY_SIZE(dp_meter_genl_ops),
- .mcgrps = &ovs_meter_multicast_group,
- .n_mcgrps = 1,
- .module = THIS_MODULE,
-};
-
-int ovs_meters_init(struct datapath *dp)
-{
- int i;
-
- dp->meters = kmalloc_array(METER_HASH_BUCKETS,
- sizeof(struct hlist_head), GFP_KERNEL);
-
- if (!dp->meters)
- return -ENOMEM;
-
- for (i = 0; i < METER_HASH_BUCKETS; i++)
- INIT_HLIST_HEAD(&dp->meters[i]);
-
- return 0;
-}
-
-void ovs_meters_exit(struct datapath *dp)
-{
- int i;
-
- for (i = 0; i < METER_HASH_BUCKETS; i++) {
- struct hlist_head *head = &dp->meters[i];
- struct dp_meter *meter;
- struct hlist_node *n;
-
- hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
- kfree(meter);
- }
-
- kfree(dp->meters);
-}
diff --git a/datapath/meter.h b/datapath/meter.h
deleted file mode 100644
index 964ace265..000000000
--- a/datapath/meter.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-
-#ifndef METER_H
-#define METER_H 1
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/netlink.h>
-#include <linux/openvswitch.h>
-#include <linux/genetlink.h>
-#include <linux/skbuff.h>
-
-#include "flow.h"
-struct datapath;
-
-#define DP_MAX_BANDS 1
-
-struct dp_meter_band {
- u32 type;
- u32 rate;
- u32 burst_size;
- u32 bucket; /* 1/1000 packets, or in bits */
- struct ovs_flow_stats stats;
-};
-
-struct dp_meter {
- spinlock_t lock; /* Per meter lock */
- struct rcu_head rcu;
- struct hlist_node dp_hash_node; /*Element in datapath->meters
- * hash table.
- */
- u32 id;
- u16 kbps:1, keep_stats:1;
- u16 n_bands;
- u32 max_delta_t;
- u64 used;
- struct ovs_flow_stats stats;
- struct dp_meter_band bands[];
-};
-
-extern struct genl_family dp_meter_genl_family;
-int ovs_meters_init(struct datapath *dp);
-void ovs_meters_exit(struct datapath *dp);
-bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key, u32 meter_id);
-
-#endif /* meter.h */
diff --git a/datapath/nsh.c b/datapath/nsh.c
deleted file mode 100644
index 9e583edbe..000000000
--- a/datapath/nsh.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Network Service Header
- *
- * Copyright (c) 2017 Red Hat, Inc. -- Jiri Benc <jbenc@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <net/nsh.h>
-#include <net/tun_proto.h>
-
-int ovs_nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh)
-{
- struct nshhdr *nh;
- size_t length = nsh_hdr_len(pushed_nh);
- u8 next_proto;
-
- if (skb->mac_len) {
- next_proto = TUN_P_ETHERNET;
- } else {
- next_proto = tun_p_from_eth_p(skb->protocol);
- if (!next_proto)
- return -EAFNOSUPPORT;
- }
-
- /* Add the NSH header */
- if (skb_cow_head(skb, length) < 0)
- return -ENOMEM;
-
- skb_push(skb, length);
- nh = (struct nshhdr *)(skb->data);
- memcpy(nh, pushed_nh, length);
- nh->np = next_proto;
- skb_postpush_rcsum(skb, nh, length);
-
- skb->protocol = htons(ETH_P_NSH);
- skb_reset_mac_header(skb);
- skb_reset_network_header(skb);
- skb_reset_mac_len(skb);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(ovs_nsh_push);
-
-int ovs_nsh_pop(struct sk_buff *skb)
-{
- struct nshhdr *nh;
- size_t length;
- __be16 inner_proto;
-
- if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN))
- return -ENOMEM;
- nh = (struct nshhdr *)(skb->data);
- length = nsh_hdr_len(nh);
- inner_proto = tun_p_to_eth_p(nh->np);
- if (!pskb_may_pull(skb, length))
- return -ENOMEM;
-
- if (!inner_proto)
- return -EAFNOSUPPORT;
-
- skb_pull_rcsum(skb, length);
- skb_reset_mac_header(skb);
- skb_reset_network_header(skb);
- skb_reset_mac_len(skb);
- skb->protocol = inner_proto;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(ovs_nsh_pop);
-
-static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
- netdev_features_t features)
-{
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- unsigned int nsh_len, mac_len;
- __be16 proto;
- int nhoff;
-
- skb_reset_network_header(skb);
-
- nhoff = skb->network_header - skb->mac_header;
- mac_len = skb->mac_len;
-
- if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
- goto out;
- nsh_len = nsh_hdr_len(nsh_hdr(skb));
- if (unlikely(!pskb_may_pull(skb, nsh_len)))
- goto out;
-
- proto = tun_p_to_eth_p(nsh_hdr(skb)->np);
- if (!proto)
- goto out;
-
- __skb_pull(skb, nsh_len);
-
- skb_reset_mac_header(skb);
- skb_reset_mac_len(skb);
- skb->protocol = proto;
-
- features &= NETIF_F_SG;
- segs = skb_mac_gso_segment(skb, features);
- if (IS_ERR_OR_NULL(segs)) {
- skb_gso_error_unwind(skb, htons(ETH_P_NSH), nsh_len,
- skb->network_header - nhoff,
- mac_len);
- goto out;
- }
-
- for (skb = segs; skb; skb = skb->next) {
- skb->protocol = htons(ETH_P_NSH);
- __skb_push(skb, nsh_len);
- skb_set_mac_header(skb, -nhoff);
- skb->network_header = skb->mac_header + mac_len;
- skb->mac_len = mac_len;
- }
-
-out:
- return segs;
-}
-
-static struct packet_offload nsh_packet_offload __read_mostly = {
- .type = htons(ETH_P_NSH),
- .callbacks = {
- .gso_segment = nsh_gso_segment,
- },
-};
-
-int ovs_nsh_init(void)
-{
- dev_add_offload(&nsh_packet_offload);
- return 0;
-}
-
-void ovs_nsh_cleanup(void)
-{
- dev_remove_offload(&nsh_packet_offload);
-}
diff --git a/datapath/vport-geneve.c b/datapath/vport-geneve.c
deleted file mode 100644
index a5b91246f..000000000
--- a/datapath/vport-geneve.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/net.h>
-#include <linux/rculist.h>
-#include <linux/udp.h>
-#include <linux/if_vlan.h>
-#include <linux/module.h>
-
-#include <net/geneve.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <net/udp.h>
-#include <net/xfrm.h>
-
-#include "datapath.h"
-#include "vport.h"
-#include "vport-netdev.h"
-
-static struct vport_ops ovs_geneve_vport_ops;
-/**
- * struct geneve_port - Keeps track of open UDP ports
- * @dst_port: destination port.
- */
-struct geneve_port {
- u16 dst_port;
-};
-
-static inline struct geneve_port *geneve_vport(const struct vport *vport)
-{
- return vport_priv(vport);
-}
-
-static int geneve_get_options(const struct vport *vport,
- struct sk_buff *skb)
-{
- struct geneve_port *geneve_port = geneve_vport(vport);
-
- if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->dst_port))
- return -EMSGSIZE;
- return 0;
-}
-
-static struct vport *geneve_tnl_create(const struct vport_parms *parms)
-{
- struct net *net = ovs_dp_get_net(parms->dp);
- struct nlattr *options = parms->options;
- struct geneve_port *geneve_port;
- struct net_device *dev;
- struct vport *vport;
- struct nlattr *a;
- u16 dst_port;
- int err;
-
- if (!options) {
- err = -EINVAL;
- goto error;
- }
-
- a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
- if (a && nla_len(a) == sizeof(u16)) {
- dst_port = nla_get_u16(a);
- } else {
- /* Require destination port from userspace. */
- err = -EINVAL;
- goto error;
- }
-
- vport = ovs_vport_alloc(sizeof(struct geneve_port),
- &ovs_geneve_vport_ops, parms);
- if (IS_ERR(vport))
- return vport;
-
- geneve_port = geneve_vport(vport);
- geneve_port->dst_port = dst_port;
-
- rtnl_lock();
- dev = geneve_dev_create_fb(net, parms->name, NET_NAME_USER, dst_port);
- if (IS_ERR(dev)) {
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_CAST(dev);
- }
-
- err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
- if (err < 0) {
- rtnl_delete_link(dev);
- rtnl_unlock();
- ovs_vport_free(vport);
- goto error;
- }
-
- rtnl_unlock();
- return vport;
-error:
- return ERR_PTR(err);
-}
-
-static struct vport *geneve_create(const struct vport_parms *parms)
-{
- struct vport *vport;
-
- vport = geneve_tnl_create(parms);
- if (IS_ERR(vport))
- return vport;
-
- return ovs_netdev_link(vport, parms->name);
-}
-
-static struct vport_ops ovs_geneve_vport_ops = {
- .type = OVS_VPORT_TYPE_GENEVE,
- .create = geneve_create,
- .destroy = ovs_netdev_tunnel_destroy,
- .get_options = geneve_get_options,
-#ifndef USE_UPSTREAM_TUNNEL
- .fill_metadata_dst = geneve_fill_metadata_dst,
-#endif
- .send = geneve_xmit,
-};
-
-static int __init ovs_geneve_tnl_init(void)
-{
- return ovs_vport_ops_register(&ovs_geneve_vport_ops);
-}
-
-static void __exit ovs_geneve_tnl_exit(void)
-{
- ovs_vport_ops_unregister(&ovs_geneve_vport_ops);
-}
-
-module_init(ovs_geneve_tnl_init);
-module_exit(ovs_geneve_tnl_exit);
-
-MODULE_DESCRIPTION("OVS: Geneve switching port");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("vport-type-5");
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
deleted file mode 100644
index 07a8c19df..000000000
--- a/datapath/vport-gre.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2007-2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/if.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/if_tunnel.h>
-#include <linux/if_vlan.h>
-#include <linux/in.h>
-#include <linux/in_route.h>
-#include <linux/inetdevice.h>
-#include <linux/jhash.h>
-#include <linux/list.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/workqueue.h>
-#include <linux/rculist.h>
-#include <net/route.h>
-#include <net/xfrm.h>
-
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/ip_tunnels.h>
-#include <net/gre.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/protocol.h>
-
-#include "datapath.h"
-#include "vport.h"
-#include "vport-netdev.h"
-
-static struct vport_ops ovs_gre_vport_ops;
-
-static struct vport *gre_tnl_create(const struct vport_parms *parms)
-{
- struct net *net = ovs_dp_get_net(parms->dp);
- struct net_device *dev;
- struct vport *vport;
- int err;
-
- vport = ovs_vport_alloc(0, &ovs_gre_vport_ops, parms);
- if (IS_ERR(vport))
- return vport;
-
- rtnl_lock();
- dev = gretap_fb_dev_create(net, parms->name, NET_NAME_USER);
- if (IS_ERR(dev)) {
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_CAST(dev);
- }
-
- err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
- if (err < 0) {
- rtnl_delete_link(dev);
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_PTR(err);
- }
-
- rtnl_unlock();
- return vport;
-}
-
-static struct vport *gre_create(const struct vport_parms *parms)
-{
- struct vport *vport;
-
- vport = gre_tnl_create(parms);
- if (IS_ERR(vport))
- return vport;
-
- return ovs_netdev_link(vport, parms->name);
-}
-
-static struct vport_ops ovs_gre_vport_ops = {
- .type = OVS_VPORT_TYPE_GRE,
- .create = gre_create,
- .send = gre_fb_xmit,
-#ifndef USE_UPSTREAM_TUNNEL
- .fill_metadata_dst = gre_fill_metadata_dst,
-#endif
- .destroy = ovs_netdev_tunnel_destroy,
-};
-
-static int __init ovs_gre_tnl_init(void)
-{
- return ovs_vport_ops_register(&ovs_gre_vport_ops);
-}
-
-static void __exit ovs_gre_tnl_exit(void)
-{
- ovs_vport_ops_unregister(&ovs_gre_vport_ops);
-}
-
-module_init(ovs_gre_tnl_init);
-module_exit(ovs_gre_tnl_exit);
-
-MODULE_DESCRIPTION("OVS: GRE switching port");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("vport-type-3");
diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c
deleted file mode 100644
index dbc200231..000000000
--- a/datapath/vport-internal_dev.c
+++ /dev/null
@@ -1,340 +0,0 @@
-/*
- * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#include <linux/if_vlan.h>
-#include <linux/kernel.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/skbuff.h>
-
-#include <net/dst.h>
-#include <net/xfrm.h>
-#include <net/rtnetlink.h>
-
-#include "datapath.h"
-#include "vport-internal_dev.h"
-#include "vport-netdev.h"
-
-struct internal_dev {
- struct vport *vport;
-};
-
-static struct vport_ops ovs_internal_vport_ops;
-
-static struct internal_dev *internal_dev_priv(struct net_device *netdev)
-{
- return netdev_priv(netdev);
-}
-
-/* Called with rcu_read_lock_bh. */
-static netdev_tx_t
-internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
-{
- int len, err;
-
- len = skb->len;
- rcu_read_lock();
- err = ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL);
- rcu_read_unlock();
-
- if (likely(!err)) {
- struct pcpu_sw_netstats *tstats = this_cpu_ptr(netdev->tstats);
-
- u64_stats_update_begin(&tstats->syncp);
- tstats->tx_bytes += len;
- tstats->tx_packets++;
- u64_stats_update_end(&tstats->syncp);
- } else {
- netdev->stats.tx_errors++;
- }
- return NETDEV_TX_OK;
-}
-
-static int internal_dev_open(struct net_device *netdev)
-{
- netif_start_queue(netdev);
- return 0;
-}
-
-static int internal_dev_stop(struct net_device *netdev)
-{
- netif_stop_queue(netdev);
- return 0;
-}
-
-static void internal_dev_getinfo(struct net_device *netdev,
- struct ethtool_drvinfo *info)
-{
- strlcpy(info->driver, "openvswitch", sizeof(info->driver));
-}
-
-static const struct ethtool_ops internal_dev_ethtool_ops = {
- .get_drvinfo = internal_dev_getinfo,
- .get_link = ethtool_op_get_link,
-};
-
-#if !defined(HAVE_NET_DEVICE_WITH_MAX_MTU) && !defined(HAVE_RHEL7_MAX_MTU)
-static int internal_dev_change_mtu(struct net_device *dev, int new_mtu)
-{
- if (new_mtu < ETH_MIN_MTU) {
- net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n",
- dev->name, new_mtu, ETH_MIN_MTU);
- return -EINVAL;
- }
-
- if (new_mtu > ETH_MAX_MTU) {
- net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n",
- dev->name, new_mtu, ETH_MAX_MTU);
- return -EINVAL;
- }
-
- dev->mtu = new_mtu;
- return 0;
-}
-#endif
-
-static void internal_dev_destructor(struct net_device *dev)
-{
- struct vport *vport = ovs_internal_dev_get_vport(dev);
-
- ovs_vport_free(vport);
-#ifndef HAVE_NEEDS_FREE_NETDEV
- free_netdev(dev);
-#endif
-}
-
-static void
-internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
-{
- int i;
-
- memset(stats, 0, sizeof(*stats));
- stats->rx_errors = dev->stats.rx_errors;
- stats->tx_errors = dev->stats.tx_errors;
- stats->tx_dropped = dev->stats.tx_dropped;
- stats->rx_dropped = dev->stats.rx_dropped;
-
- for_each_possible_cpu(i) {
- const struct pcpu_sw_netstats *percpu_stats;
- struct pcpu_sw_netstats local_stats;
- unsigned int start;
-
- percpu_stats = per_cpu_ptr(dev->tstats, i);
-
- do {
- start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
- local_stats = *percpu_stats;
- } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
-
- stats->rx_bytes += local_stats.rx_bytes;
- stats->rx_packets += local_stats.rx_packets;
- stats->tx_bytes += local_stats.tx_bytes;
- stats->tx_packets += local_stats.tx_packets;
- }
-}
-
-static const struct net_device_ops internal_dev_netdev_ops = {
- .ndo_open = internal_dev_open,
- .ndo_stop = internal_dev_stop,
- .ndo_start_xmit = internal_dev_xmit,
- .ndo_set_mac_address = eth_mac_addr,
-#if !defined(HAVE_NET_DEVICE_WITH_MAX_MTU) && !defined(HAVE_RHEL7_MAX_MTU)
- .ndo_change_mtu = internal_dev_change_mtu,
-#endif
- .ndo_get_stats64 = (void *)internal_get_stats,
-};
-
-static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
- .kind = "openvswitch",
-};
-
-static void do_setup(struct net_device *netdev)
-{
- ether_setup(netdev);
-
-#ifdef HAVE_NET_DEVICE_WITH_MAX_MTU
- netdev->max_mtu = ETH_MAX_MTU;
-#elif defined(HAVE_RHEL7_MAX_MTU)
- netdev->extended->max_mtu = ETH_MAX_MTU;
-#endif
- netdev->netdev_ops = &internal_dev_netdev_ops;
-
- netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
- netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
- IFF_NO_QUEUE;
-#ifndef HAVE_NEEDS_FREE_NETDEV
- netdev->destructor = internal_dev_destructor;
-#else
- netdev->needs_free_netdev = true;
- netdev->priv_destructor = internal_dev_destructor;
-#endif /* HAVE_NEEDS_FREE_NETDEV */
- netdev->ethtool_ops = &internal_dev_ethtool_ops;
- netdev->rtnl_link_ops = &internal_dev_link_ops;
-
-#ifndef HAVE_IFF_NO_QUEUE
- netdev->tx_queue_len = 0;
-#endif
-
- netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
- NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
- NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
-
- netdev->vlan_features = netdev->features;
- netdev->hw_enc_features = netdev->features;
- netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
- netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
-
- eth_hw_addr_random(netdev);
-}
-
-static struct vport *internal_dev_create(const struct vport_parms *parms)
-{
- struct vport *vport;
- struct internal_dev *internal_dev;
- int err;
-
- vport = ovs_vport_alloc(0, &ovs_internal_vport_ops, parms);
- if (IS_ERR(vport)) {
- err = PTR_ERR(vport);
- goto error;
- }
-
- vport->dev = alloc_netdev(sizeof(struct internal_dev),
- parms->name, NET_NAME_USER, do_setup);
- if (!vport->dev) {
- err = -ENOMEM;
- goto error_free_vport;
- }
- vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!vport->dev->tstats) {
- err = -ENOMEM;
- goto error_free_netdev;
- }
-
- dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
- internal_dev = internal_dev_priv(vport->dev);
- internal_dev->vport = vport;
-
- /* Restrict bridge port to current netns. */
- if (vport->port_no == OVSP_LOCAL)
- vport->dev->features |= NETIF_F_NETNS_LOCAL;
-
- rtnl_lock();
- err = register_netdevice(vport->dev);
- if (err)
- goto error_unlock;
-
- dev_set_promiscuity(vport->dev, 1);
- rtnl_unlock();
- netif_start_queue(vport->dev);
-
- return vport;
-
-error_unlock:
- rtnl_unlock();
- free_percpu(vport->dev->tstats);
-error_free_netdev:
- free_netdev(vport->dev);
-error_free_vport:
- ovs_vport_free(vport);
-error:
- return ERR_PTR(err);
-}
-
-static void internal_dev_destroy(struct vport *vport)
-{
- netif_stop_queue(vport->dev);
- rtnl_lock();
- dev_set_promiscuity(vport->dev, -1);
-
- /* unregister_netdevice() waits for an RCU grace period. */
- unregister_netdevice(vport->dev);
- free_percpu(vport->dev->tstats);
- rtnl_unlock();
-}
-
-static netdev_tx_t internal_dev_recv(struct sk_buff *skb)
-{
- struct net_device *netdev = skb->dev;
- struct pcpu_sw_netstats *stats;
-
- if (unlikely(!(netdev->flags & IFF_UP))) {
- kfree_skb(skb);
- netdev->stats.rx_dropped++;
- return NETDEV_TX_OK;
- }
-
- skb_dst_drop(skb);
- nf_reset_ct(skb);
- secpath_reset(skb);
-
- skb->pkt_type = PACKET_HOST;
- skb->protocol = eth_type_trans(skb, netdev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
-
- stats = this_cpu_ptr(netdev->tstats);
- u64_stats_update_begin(&stats->syncp);
- stats->rx_packets++;
- stats->rx_bytes += skb->len;
- u64_stats_update_end(&stats->syncp);
-
- netif_rx(skb);
- return NETDEV_TX_OK;
-}
-
-static struct vport_ops ovs_internal_vport_ops = {
- .type = OVS_VPORT_TYPE_INTERNAL,
- .create = internal_dev_create,
- .destroy = internal_dev_destroy,
- .send = internal_dev_recv,
-};
-
-int ovs_is_internal_dev(const struct net_device *netdev)
-{
- return netdev->netdev_ops == &internal_dev_netdev_ops;
-}
-
-struct vport *ovs_internal_dev_get_vport(struct net_device *netdev)
-{
- if (!ovs_is_internal_dev(netdev))
- return NULL;
-
- return internal_dev_priv(netdev)->vport;
-}
-
-int ovs_internal_dev_rtnl_link_register(void)
-{
- int err;
-
- err = rtnl_link_register(&internal_dev_link_ops);
- if (err < 0)
- return err;
-
- err = ovs_vport_ops_register(&ovs_internal_vport_ops);
- if (err < 0)
- rtnl_link_unregister(&internal_dev_link_ops);
-
- return err;
-}
-
-void ovs_internal_dev_rtnl_link_unregister(void)
-{
- ovs_vport_ops_unregister(&ovs_internal_vport_ops);
- rtnl_link_unregister(&internal_dev_link_ops);
-}
diff --git a/datapath/vport-internal_dev.h b/datapath/vport-internal_dev.h
deleted file mode 100644
index 1b179a190..000000000
--- a/datapath/vport-internal_dev.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2007-2011 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#ifndef VPORT_INTERNAL_DEV_H
-#define VPORT_INTERNAL_DEV_H 1
-
-#include "datapath.h"
-#include "vport.h"
-
-int ovs_is_internal_dev(const struct net_device *);
-struct vport *ovs_internal_dev_get_vport(struct net_device *);
-int ovs_internal_dev_rtnl_link_register(void);
-void ovs_internal_dev_rtnl_link_unregister(void);
-
-#endif /* vport-internal_dev.h */
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
deleted file mode 100644
index 5e2bcda88..000000000
--- a/datapath/vport-lisp.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (c) 2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/net.h>
-#include <linux/rculist.h>
-#include <linux/udp.h>
-#include <linux/if_vlan.h>
-#include <linux/module.h>
-
-#include <net/lisp.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <net/udp.h>
-#include <net/xfrm.h>
-
-#include "datapath.h"
-#include "vport.h"
-#include "vport-netdev.h"
-
-static struct vport_ops ovs_lisp_vport_ops;
-/**
- * struct lisp_port - Keeps track of open UDP ports
- * @dst_port: destination port.
- */
-struct lisp_port {
- u16 port_no;
-};
-
-static inline struct lisp_port *lisp_vport(const struct vport *vport)
-{
- return vport_priv(vport);
-}
-
-static int lisp_get_options(const struct vport *vport,
- struct sk_buff *skb)
-{
- struct lisp_port *lisp_port = lisp_vport(vport);
-
- if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, lisp_port->port_no))
- return -EMSGSIZE;
- return 0;
-}
-
-static struct vport *lisp_tnl_create(const struct vport_parms *parms)
-{
- struct net *net = ovs_dp_get_net(parms->dp);
- struct nlattr *options = parms->options;
- struct lisp_port *lisp_port;
- struct net_device *dev;
- struct vport *vport;
- struct nlattr *a;
- u16 dst_port;
- int err;
-
- if (!options) {
- err = -EINVAL;
- goto error;
- }
-
- a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
- if (a && nla_len(a) == sizeof(u16)) {
- dst_port = nla_get_u16(a);
- } else {
- /* Require destination port from userspace. */
- err = -EINVAL;
- goto error;
- }
-
- vport = ovs_vport_alloc(sizeof(struct lisp_port),
- &ovs_lisp_vport_ops, parms);
- if (IS_ERR(vport))
- return vport;
-
- lisp_port = lisp_vport(vport);
- lisp_port->port_no = dst_port;
-
- rtnl_lock();
- dev = lisp_dev_create_fb(net, parms->name, NET_NAME_USER, dst_port);
- if (IS_ERR(dev)) {
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_CAST(dev);
- }
- err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
- if (err < 0) {
- rtnl_delete_link(dev);
- rtnl_unlock();
- ovs_vport_free(vport);
- goto error;
- }
-
- rtnl_unlock();
- return vport;
-error:
- return ERR_PTR(err);
-}
-
-static struct vport *lisp_create(const struct vport_parms *parms)
-{
- struct vport *vport;
-
- vport = lisp_tnl_create(parms);
- if (IS_ERR(vport))
- return vport;
-
- return ovs_netdev_link(vport, parms->name);
-}
-
-static struct vport_ops ovs_lisp_vport_ops = {
- .type = OVS_VPORT_TYPE_LISP,
- .create = lisp_create,
- .destroy = ovs_netdev_tunnel_destroy,
- .get_options = lisp_get_options,
-#ifndef USE_UPSTREAM_TUNNEL
- .fill_metadata_dst = lisp_fill_metadata_dst,
-#endif
- .send = lisp_xmit,
-};
-
-static int __init ovs_lisp_tnl_init(void)
-{
- return ovs_vport_ops_register(&ovs_lisp_vport_ops);
-}
-
-static void __exit ovs_lisp_tnl_exit(void)
-{
- ovs_vport_ops_unregister(&ovs_lisp_vport_ops);
-}
-
-module_init(ovs_lisp_tnl_init);
-module_exit(ovs_lisp_tnl_exit);
-
-MODULE_DESCRIPTION("OVS: Lisp switching port");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("vport-type-105");
diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c
deleted file mode 100644
index 4eb881671..000000000
--- a/datapath/vport-netdev.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2007-2012 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/if_arp.h>
-#include <linux/if_bridge.h>
-#include <linux/if_vlan.h>
-#include <linux/kernel.h>
-#include <linux/llc.h>
-#include <linux/rtnetlink.h>
-#include <linux/skbuff.h>
-#include <linux/openvswitch.h>
-#include <linux/export.h>
-
-#include <net/ip_tunnels.h>
-#include <net/rtnetlink.h>
-
-#include "datapath.h"
-#include "gso.h"
-#include "vport.h"
-#include "vport-internal_dev.h"
-#include "vport-netdev.h"
-
-static struct vport_ops ovs_netdev_vport_ops;
-
-/* Must be called with rcu_read_lock. */
-void netdev_port_receive(struct sk_buff *skb, struct ip_tunnel_info *tun_info)
-{
- struct vport *vport;
-
- vport = ovs_netdev_get_vport(skb->dev);
- if (unlikely(!vport))
- goto error;
-
- if (unlikely(skb_warn_if_lro(skb)))
- goto error;
-
- /* Make our own copy of the packet. Otherwise we will mangle the
- * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
- */
- skb = skb_share_check(skb, GFP_ATOMIC);
- if (unlikely(!skb))
- return;
-
- if (skb->dev->type == ARPHRD_ETHER) {
- skb_push(skb, ETH_HLEN);
- skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
- }
- ovs_vport_receive(vport, skb, tun_info);
- return;
-error:
- kfree_skb(skb);
-}
-
-/* Called with rcu_read_lock and bottom-halves disabled. */
-static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
-{
- struct sk_buff *skb = *pskb;
-
- if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
- return RX_HANDLER_PASS;
-
-#ifndef USE_UPSTREAM_TUNNEL
- netdev_port_receive(skb, NULL);
-#else
- netdev_port_receive(skb, skb_tunnel_info(skb));
-#endif
- return RX_HANDLER_CONSUMED;
-}
-
-static struct net_device *get_dpdev(const struct datapath *dp)
-{
- struct vport *local;
-
- local = ovs_vport_ovsl(dp, OVSP_LOCAL);
- BUG_ON(!local);
- return local->dev;
-}
-
-struct vport *ovs_netdev_link(struct vport *vport, const char *name)
-{
- int err;
-
- vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), name);
- if (!vport->dev) {
- err = -ENODEV;
- goto error_free_vport;
- }
-
- if (vport->dev->flags & IFF_LOOPBACK ||
- (vport->dev->type != ARPHRD_ETHER &&
- vport->dev->type != ARPHRD_NONE) ||
- ovs_is_internal_dev(vport->dev)) {
- err = -EINVAL;
- goto error_put;
- }
-
- rtnl_lock();
- err = netdev_master_upper_dev_link(vport->dev,
- get_dpdev(vport->dp),
- NULL, NULL, NULL);
- if (err)
- goto error_unlock;
-
- err = netdev_rx_handler_register(vport->dev, netdev_frame_hook,
- vport);
- if (err)
- goto error_master_upper_dev_unlink;
-
- dev_disable_lro(vport->dev);
- dev_set_promiscuity(vport->dev, 1);
- vport->dev->priv_flags |= IFF_OVS_DATAPATH;
- rtnl_unlock();
-
- return vport;
-
-error_master_upper_dev_unlink:
- netdev_upper_dev_unlink(vport->dev, get_dpdev(vport->dp));
-error_unlock:
- rtnl_unlock();
-error_put:
- dev_put(vport->dev);
-error_free_vport:
- ovs_vport_free(vport);
- return ERR_PTR(err);
-}
-EXPORT_SYMBOL_GPL(ovs_netdev_link);
-
-static struct vport *netdev_create(const struct vport_parms *parms)
-{
- struct vport *vport;
-
- vport = ovs_vport_alloc(0, &ovs_netdev_vport_ops, parms);
- if (IS_ERR(vport))
- return vport;
-
- return ovs_netdev_link(vport, parms->name);
-}
-
-static void vport_netdev_free(struct rcu_head *rcu)
-{
- struct vport *vport = container_of(rcu, struct vport, rcu);
-
- if (vport->dev)
- dev_put(vport->dev);
- ovs_vport_free(vport);
-}
-
-void ovs_netdev_detach_dev(struct vport *vport)
-{
- ASSERT_RTNL();
- vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
- netdev_rx_handler_unregister(vport->dev);
- netdev_upper_dev_unlink(vport->dev,
- netdev_master_upper_dev_get(vport->dev));
- dev_set_promiscuity(vport->dev, -1);
-}
-
-static void netdev_destroy(struct vport *vport)
-{
- rtnl_lock();
- if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
- ovs_netdev_detach_dev(vport);
- rtnl_unlock();
-
- call_rcu(&vport->rcu, vport_netdev_free);
-}
-
-void ovs_netdev_tunnel_destroy(struct vport *vport)
-{
- rtnl_lock();
- if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
- ovs_netdev_detach_dev(vport);
-
- /* We can be invoked by both explicit vport deletion and
- * underlying netdev deregistration; delete the link only
- * if it's not already shutting down.
- */
- if (vport->dev->reg_state == NETREG_REGISTERED)
- rtnl_delete_link(vport->dev);
- dev_put(vport->dev);
- vport->dev = NULL;
- rtnl_unlock();
-
- call_rcu(&vport->rcu, vport_netdev_free);
-}
-EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy);
-
-/* Returns null if this device is not attached to a datapath. */
-struct vport *ovs_netdev_get_vport(struct net_device *dev)
-{
- if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
- return (struct vport *)
- rcu_dereference_rtnl(dev->rx_handler_data);
- else
- return NULL;
-}
-
-static struct vport_ops ovs_netdev_vport_ops = {
- .type = OVS_VPORT_TYPE_NETDEV,
- .create = netdev_create,
- .destroy = netdev_destroy,
- .send = dev_queue_xmit,
-};
-
-int __init ovs_netdev_init(void)
-{
- return ovs_vport_ops_register(&ovs_netdev_vport_ops);
-}
-
-void ovs_netdev_exit(void)
-{
- ovs_vport_ops_unregister(&ovs_netdev_vport_ops);
-}
diff --git a/datapath/vport-netdev.h b/datapath/vport-netdev.h
deleted file mode 100644
index 04ad190c9..000000000
--- a/datapath/vport-netdev.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2007-2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#ifndef VPORT_NETDEV_H
-#define VPORT_NETDEV_H 1
-
-#include <linux/netdevice.h>
-#include <linux/rcupdate.h>
-
-#include "vport.h"
-
-struct vport *ovs_netdev_get_vport(struct net_device *dev);
-
-struct vport *ovs_netdev_link(struct vport *vport, const char *name);
-void ovs_netdev_detach_dev(struct vport *);
-
-int __init ovs_netdev_init(void);
-void ovs_netdev_exit(void);
-
-void ovs_netdev_tunnel_destroy(struct vport *vport);
-
-void netdev_port_receive(struct sk_buff *skb, struct ip_tunnel_info *tun_info);
-
-#endif /* vport_netdev.h */
diff --git a/datapath/vport-stt.c b/datapath/vport-stt.c
deleted file mode 100644
index 71bbeda63..000000000
--- a/datapath/vport-stt.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/net.h>
-#include <linux/rculist.h>
-#include <linux/udp.h>
-#include <linux/if_vlan.h>
-#include <linux/module.h>
-
-#include <net/stt.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <net/udp.h>
-#include <net/xfrm.h>
-
-#include "datapath.h"
-#include "vport.h"
-#include "vport-netdev.h"
-
-#ifdef OVS_STT
-static struct vport_ops ovs_stt_vport_ops;
-/**
- * struct stt_port - Keeps track of open UDP ports
- * @dst_port: destination port.
- */
-struct stt_port {
- u16 port_no;
-};
-
-static inline struct stt_port *stt_vport(const struct vport *vport)
-{
- return vport_priv(vport);
-}
-
-static int stt_get_options(const struct vport *vport,
- struct sk_buff *skb)
-{
- struct stt_port *stt_port = stt_vport(vport);
-
- if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, stt_port->port_no))
- return -EMSGSIZE;
- return 0;
-}
-
-static struct vport *stt_tnl_create(const struct vport_parms *parms)
-{
- struct net *net = ovs_dp_get_net(parms->dp);
- struct nlattr *options = parms->options;
- struct stt_port *stt_port;
- struct net_device *dev;
- struct vport *vport;
- struct nlattr *a;
- u16 dst_port;
- int err;
-
- if (!options) {
- err = -EINVAL;
- goto error;
- }
-
- a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
- if (a && nla_len(a) == sizeof(u16)) {
- dst_port = nla_get_u16(a);
- } else {
- /* Require destination port from userspace. */
- err = -EINVAL;
- goto error;
- }
-
- vport = ovs_vport_alloc(sizeof(struct stt_port),
- &ovs_stt_vport_ops, parms);
- if (IS_ERR(vport))
- return vport;
-
- stt_port = stt_vport(vport);
- stt_port->port_no = dst_port;
-
- rtnl_lock();
- dev = stt_dev_create_fb(net, parms->name, NET_NAME_USER, dst_port);
- if (IS_ERR(dev)) {
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_CAST(dev);
- }
-
- err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
- if (err < 0) {
- rtnl_delete_link(dev);
- rtnl_unlock();
- ovs_vport_free(vport);
- goto error;
- }
-
- rtnl_unlock();
- return vport;
-error:
- return ERR_PTR(err);
-}
-
-static struct vport *stt_create(const struct vport_parms *parms)
-{
- struct vport *vport;
-
- vport = stt_tnl_create(parms);
- if (IS_ERR(vport))
- return vport;
-
- return ovs_netdev_link(vport, parms->name);
-}
-
-static struct vport_ops ovs_stt_vport_ops = {
- .type = OVS_VPORT_TYPE_STT,
- .create = stt_create,
- .destroy = ovs_netdev_tunnel_destroy,
- .get_options = stt_get_options,
-#ifndef USE_UPSTREAM_TUNNEL
- .fill_metadata_dst = stt_fill_metadata_dst,
-#endif
- .send = ovs_stt_xmit,
-};
-
-static int __init ovs_stt_tnl_init(void)
-{
- return ovs_vport_ops_register(&ovs_stt_vport_ops);
-}
-
-static void __exit ovs_stt_tnl_exit(void)
-{
- ovs_vport_ops_unregister(&ovs_stt_vport_ops);
-}
-
-module_init(ovs_stt_tnl_init);
-module_exit(ovs_stt_tnl_exit);
-
-MODULE_DESCRIPTION("OVS: STT switching port");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("vport-type-106");
-#endif
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
deleted file mode 100644
index 79331c968..000000000
--- a/datapath/vport-vxlan.c
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Copyright (c) 2015,2017 Nicira, Inc.
- * Copyright (c) 2013 Cisco Systems, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/openvswitch.h>
-#include <linux/module.h>
-#include <net/udp.h>
-#include <net/ip_tunnels.h>
-#include <net/rtnetlink.h>
-#include <net/vxlan.h>
-
-#include "datapath.h"
-#include "vport.h"
-#include "vport-netdev.h"
-
-static struct vport_ops ovs_vxlan_netdev_vport_ops;
-
-static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
-{
- struct vxlan_dev *vxlan = netdev_priv(vport->dev);
- __be16 dst_port = vxlan->cfg.dst_port;
-
- if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
- return -EMSGSIZE;
-
-#ifdef HAVE_VXLAN_DEV_CFG
- if (vxlan->cfg.flags & VXLAN_F_GBP) {
-#else
- if (vxlan->flags & VXLAN_F_GBP) {
-#endif
- struct nlattr *exts;
-
- exts = nla_nest_start_noflag(skb, OVS_TUNNEL_ATTR_EXTENSION);
- if (!exts)
- return -EMSGSIZE;
-
-#ifdef HAVE_VXLAN_DEV_CFG
- if (vxlan->cfg.flags & VXLAN_F_GBP &&
-#else
- if (vxlan->flags & VXLAN_F_GBP &&
-#endif
- nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
- return -EMSGSIZE;
-
- nla_nest_end(skb, exts);
-#ifdef HAVE_VXLAN_DEV_CFG
- } else if (vxlan->cfg.flags & VXLAN_F_GPE) {
-#else
- } else if (vxlan->flags & VXLAN_F_GPE) {
-#endif
- struct nlattr *exts;
-
- exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
- if (!exts)
- return -EMSGSIZE;
-
-#ifdef HAVE_VXLAN_DEV_CFG
- if (vxlan->cfg.flags & VXLAN_F_GPE &&
-#else
- if (vxlan->flags & VXLAN_F_GPE &&
-#endif
- nla_put_flag(skb, OVS_VXLAN_EXT_GPE))
- return -EMSGSIZE;
-
- nla_nest_end(skb, exts);
- }
-
- return 0;
-}
-
-static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX + 1] = {
- [OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, },
- [OVS_VXLAN_EXT_GPE] = { .type = NLA_FLAG, },
-};
-
-static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr,
- struct vxlan_config *conf)
-{
- struct nlattr *exts[OVS_VXLAN_EXT_MAX + 1];
- int err;
-
- if (nla_len(attr) < sizeof(struct nlattr))
- return -EINVAL;
-
- err = nla_parse_nested_deprecated(exts, OVS_VXLAN_EXT_MAX, attr,
- exts_policy, NULL);
- if (err < 0)
- return err;
-
- if (exts[OVS_VXLAN_EXT_GBP])
- conf->flags |= VXLAN_F_GBP;
- else if (exts[OVS_VXLAN_EXT_GPE])
- conf->flags |= VXLAN_F_GPE;
-
- return 0;
-}
-
-static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
-{
- struct net *net = ovs_dp_get_net(parms->dp);
- struct nlattr *options = parms->options;
- struct net_device *dev;
- struct vport *vport;
- struct nlattr *a;
- int err;
- struct vxlan_config conf = {
- .no_share = true,
- .flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX,
- /* Don't restrict the packets that can be sent by MTU */
- .mtu = IP_MAX_MTU,
- };
-
- if (!options) {
- err = -EINVAL;
- goto error;
- }
-
- a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
- if (a && nla_len(a) == sizeof(u16)) {
- conf.dst_port = htons(nla_get_u16(a));
- } else {
- /* Require destination port from userspace. */
- err = -EINVAL;
- goto error;
- }
-
- vport = ovs_vport_alloc(0, &ovs_vxlan_netdev_vport_ops, parms);
- if (IS_ERR(vport))
- return vport;
-
- a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
- if (a) {
- err = vxlan_configure_exts(vport, a, &conf);
- if (err) {
- ovs_vport_free(vport);
- goto error;
- }
- }
-
- rtnl_lock();
- dev = vxlan_dev_create(net, parms->name, NET_NAME_USER, &conf);
- if (IS_ERR(dev)) {
- rtnl_unlock();
- ovs_vport_free(vport);
- return ERR_CAST(dev);
- }
-
- err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
- if (err < 0) {
- rtnl_delete_link(dev);
- rtnl_unlock();
- ovs_vport_free(vport);
- goto error;
- }
-
- rtnl_unlock();
- return vport;
-error:
- return ERR_PTR(err);
-}
-
-static struct vport *vxlan_create(const struct vport_parms *parms)
-{
- struct vport *vport;
-
- vport = vxlan_tnl_create(parms);
- if (IS_ERR(vport))
- return vport;
-
- return ovs_netdev_link(vport, parms->name);
-}
-
-static struct vport_ops ovs_vxlan_netdev_vport_ops = {
- .type = OVS_VPORT_TYPE_VXLAN,
- .create = vxlan_create,
- .destroy = ovs_netdev_tunnel_destroy,
- .get_options = vxlan_get_options,
-#ifndef USE_UPSTREAM_TUNNEL
- .fill_metadata_dst = vxlan_fill_metadata_dst,
-#endif
- .send = vxlan_xmit,
-};
-
-static int __init ovs_vxlan_tnl_init(void)
-{
- return ovs_vport_ops_register(&ovs_vxlan_netdev_vport_ops);
-}
-
-static void __exit ovs_vxlan_tnl_exit(void)
-{
- ovs_vport_ops_unregister(&ovs_vxlan_netdev_vport_ops);
-}
-
-module_init(ovs_vxlan_tnl_init);
-module_exit(ovs_vxlan_tnl_exit);
-
-MODULE_DESCRIPTION("OVS: VXLAN switching port");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("vport-type-4");
diff --git a/datapath/vport.c b/datapath/vport.c
deleted file mode 100644
index bd62c5612..000000000
--- a/datapath/vport.c
+++ /dev/null
@@ -1,614 +0,0 @@
-/*
- * Copyright (c) 2007-2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#include <linux/etherdevice.h>
-#include <linux/if.h>
-#include <linux/if_vlan.h>
-#include <linux/jhash.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/percpu.h>
-#include <linux/rcupdate.h>
-#include <linux/rtnetlink.h>
-#include <linux/compat.h>
-#include <linux/module.h>
-#include <linux/if_link.h>
-#include <net/net_namespace.h>
-#include <net/lisp.h>
-#include <net/gre.h>
-#include <net/geneve.h>
-#include <net/stt.h>
-#include <net/vxlan.h>
-
-#include "datapath.h"
-#include "gso.h"
-#include "vport.h"
-#include "vport-internal_dev.h"
-
-static LIST_HEAD(vport_ops_list);
-static bool compat_gre_loaded = false;
-static bool compat_ip6_tunnel_loaded = false;
-
-/* Protected by RCU read lock for reading, ovs_mutex for writing. */
-static struct hlist_head *dev_table;
-#define VPORT_HASH_BUCKETS 1024
-
-/**
- * ovs_vport_init - initialize vport subsystem
- *
- * Called at module load time to initialize the vport subsystem.
- */
-int ovs_vport_init(void)
-{
- int err;
-
- dev_table = kcalloc(VPORT_HASH_BUCKETS, sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!dev_table)
- return -ENOMEM;
-
- err = lisp_init_module();
- if (err)
- goto err_lisp;
- err = gre_init();
- if (err && err != -EEXIST) {
- goto err_gre;
- } else {
- if (err == -EEXIST) {
- pr_warn("Cannot take GRE protocol rx entry"\
- "- The GRE/ERSPAN rx feature not supported\n");
- /* continue GRE tx */
- }
-
- err = ipgre_init();
- if (err && err != -EEXIST)
- goto err_ipgre;
- compat_gre_loaded = true;
- }
- err = ip6gre_init();
- if (err && err != -EEXIST) {
- goto err_ip6gre;
- } else {
- if (err == -EEXIST) {
- pr_warn("IPv6 GRE/ERSPAN Rx mode is not supported\n");
- goto skip_ip6_tunnel_init;
- }
- }
-
- err = ip6_tunnel_init();
- if (err)
- goto err_ip6_tunnel;
- else
- compat_ip6_tunnel_loaded = true;
-
-skip_ip6_tunnel_init:
- err = geneve_init_module();
- if (err)
- goto err_geneve;
- err = vxlan_init_module();
- if (err)
- goto err_vxlan;
- err = ovs_stt_init_module();
- if (err)
- goto err_stt;
-
- return 0;
- ovs_stt_cleanup_module();
-err_stt:
- vxlan_cleanup_module();
-err_vxlan:
- geneve_cleanup_module();
-err_geneve:
- ip6_tunnel_cleanup();
-err_ip6_tunnel:
- ip6gre_fini();
-err_ip6gre:
- ipgre_fini();
-err_ipgre:
- gre_exit();
-err_gre:
- lisp_cleanup_module();
-err_lisp:
- kfree(dev_table);
- return err;
-}
-
-/**
- * ovs_vport_exit - shutdown vport subsystem
- *
- * Called at module exit time to shutdown the vport subsystem.
- */
-void ovs_vport_exit(void)
-{
- if (compat_gre_loaded) {
- gre_exit();
- ipgre_fini();
- }
- ovs_stt_cleanup_module();
- vxlan_cleanup_module();
- geneve_cleanup_module();
- if (compat_ip6_tunnel_loaded)
- ip6_tunnel_cleanup();
- ip6gre_fini();
- lisp_cleanup_module();
- kfree(dev_table);
-}
-
-static struct hlist_head *hash_bucket(const struct net *net, const char *name)
-{
- unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
- return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
-}
-
-int __ovs_vport_ops_register(struct vport_ops *ops)
-{
- int err = -EEXIST;
- struct vport_ops *o;
-
- ovs_lock();
- list_for_each_entry(o, &vport_ops_list, list)
- if (ops->type == o->type)
- goto errout;
-
- list_add_tail(&ops->list, &vport_ops_list);
- err = 0;
-errout:
- ovs_unlock();
- return err;
-}
-EXPORT_SYMBOL_GPL(__ovs_vport_ops_register);
-
-void ovs_vport_ops_unregister(struct vport_ops *ops)
-{
- ovs_lock();
- list_del(&ops->list);
- ovs_unlock();
-}
-EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
-
-/**
- * ovs_vport_locate - find a port that has already been created
- *
- * @name: name of port to find
- *
- * Must be called with ovs or RCU read lock.
- */
-struct vport *ovs_vport_locate(const struct net *net, const char *name)
-{
- struct hlist_head *bucket = hash_bucket(net, name);
- struct vport *vport;
-
- hlist_for_each_entry_rcu(vport, bucket, hash_node)
- if (!strcmp(name, ovs_vport_name(vport)) &&
- net_eq(ovs_dp_get_net(vport->dp), net))
- return vport;
-
- return NULL;
-}
-
-/**
- * ovs_vport_alloc - allocate and initialize new vport
- *
- * @priv_size: Size of private data area to allocate.
- * @ops: vport device ops
- *
- * Allocate and initialize a new vport defined by @ops. The vport will contain
- * a private data area of size @priv_size that can be accessed using
- * vport_priv(). vports that are no longer needed should be released with
- * vport_free().
- */
-struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
- const struct vport_parms *parms)
-{
- struct vport *vport;
- size_t alloc_size;
-
- alloc_size = sizeof(struct vport);
- if (priv_size) {
- alloc_size = ALIGN(alloc_size, VPORT_ALIGN);
- alloc_size += priv_size;
- }
-
- vport = kzalloc(alloc_size, GFP_KERNEL);
- if (!vport)
- return ERR_PTR(-ENOMEM);
-
- vport->dp = parms->dp;
- vport->port_no = parms->port_no;
- vport->ops = ops;
- INIT_HLIST_NODE(&vport->dp_hash_node);
-
- if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
- kfree(vport);
- return ERR_PTR(-EINVAL);
- }
-
- return vport;
-}
-EXPORT_SYMBOL_GPL(ovs_vport_alloc);
-
-/**
- * ovs_vport_free - uninitialize and free vport
- *
- * @vport: vport to free
- *
- * Frees a vport allocated with vport_alloc() when it is no longer needed.
- *
- * The caller must ensure that an RCU grace period has passed since the last
- * time @vport was in a datapath.
- */
-void ovs_vport_free(struct vport *vport)
-{
- /* vport is freed from RCU callback or error path, Therefore
- * it is safe to use raw dereference.
- */
- kfree(rcu_dereference_raw(vport->upcall_portids));
- kfree(vport);
-}
-EXPORT_SYMBOL_GPL(ovs_vport_free);
-
-static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
-{
- struct vport_ops *ops;
-
- list_for_each_entry(ops, &vport_ops_list, list)
- if (ops->type == parms->type)
- return ops;
-
- return NULL;
-}
-
-/**
- * ovs_vport_add - add vport device (for kernel callers)
- *
- * @parms: Information about new vport.
- *
- * Creates a new vport with the specified configuration (which is dependent on
- * device type). ovs_mutex must be held.
- */
-struct vport *ovs_vport_add(const struct vport_parms *parms)
-{
- struct vport_ops *ops;
- struct vport *vport;
-
- ops = ovs_vport_lookup(parms);
- if (ops) {
- struct hlist_head *bucket;
-
- if (!try_module_get(ops->owner))
- return ERR_PTR(-EAFNOSUPPORT);
-
- vport = ops->create(parms);
- if (IS_ERR(vport)) {
- module_put(ops->owner);
- return vport;
- }
-
- bucket = hash_bucket(ovs_dp_get_net(vport->dp),
- ovs_vport_name(vport));
- hlist_add_head_rcu(&vport->hash_node, bucket);
- return vport;
- }
-
- if (parms->type == OVS_VPORT_TYPE_GRE && !compat_gre_loaded) {
- pr_warn("GRE protocol already loaded!\n");
- return ERR_PTR(-EAFNOSUPPORT);
- }
- /* Unlock to attempt module load and return -EAGAIN if load
- * was successful as we need to restart the port addition
- * workflow.
- */
- ovs_unlock();
- request_module("vport-type-%d", parms->type);
- ovs_lock();
-
- if (!ovs_vport_lookup(parms))
- return ERR_PTR(-EAFNOSUPPORT);
- else
- return ERR_PTR(-EAGAIN);
-}
-
-/**
- * ovs_vport_set_options - modify existing vport device (for kernel callers)
- *
- * @vport: vport to modify.
- * @options: New configuration.
- *
- * Modifies an existing device with the specified configuration (which is
- * dependent on device type). ovs_mutex must be held.
- */
-int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
-{
- if (!vport->ops->set_options)
- return -EOPNOTSUPP;
- return vport->ops->set_options(vport, options);
-}
-
-/**
- * ovs_vport_del - delete existing vport device
- *
- * @vport: vport to delete.
- *
- * Detaches @vport from its datapath and destroys it. ovs_mutex must be
- * held.
- */
-void ovs_vport_del(struct vport *vport)
-{
- ASSERT_OVSL();
-
- hlist_del_rcu(&vport->hash_node);
- module_put(vport->ops->owner);
- vport->ops->destroy(vport);
-}
-
-/**
- * ovs_vport_get_stats - retrieve device stats
- *
- * @vport: vport from which to retrieve the stats
- * @stats: location to store stats
- *
- * Retrieves transmit, receive, and error stats for the given device.
- *
- * Must be called with ovs_mutex or rcu_read_lock.
- */
-void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
-{
- const struct rtnl_link_stats64 *dev_stats;
- struct rtnl_link_stats64 temp;
-
- dev_stats = dev_get_stats(vport->dev, &temp);
- stats->rx_errors = dev_stats->rx_errors;
- stats->tx_errors = dev_stats->tx_errors;
- stats->tx_dropped = dev_stats->tx_dropped;
- stats->rx_dropped = dev_stats->rx_dropped;
-
- stats->rx_bytes = dev_stats->rx_bytes;
- stats->rx_packets = dev_stats->rx_packets;
- stats->tx_bytes = dev_stats->tx_bytes;
- stats->tx_packets = dev_stats->tx_packets;
-}
-
-/**
- * ovs_vport_get_options - retrieve device options
- *
- * @vport: vport from which to retrieve the options.
- * @skb: sk_buff where options should be appended.
- *
- * Retrieves the configuration of the given device, appending an
- * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested
- * vport-specific attributes to @skb.
- *
- * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another
- * negative error code if a real error occurred. If an error occurs, @skb is
- * left unmodified.
- *
- * Must be called with ovs_mutex or rcu_read_lock.
- */
-int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
-{
- struct nlattr *nla;
- int err;
-
- if (!vport->ops->get_options)
- return 0;
-
- nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_OPTIONS);
- if (!nla)
- return -EMSGSIZE;
-
- err = vport->ops->get_options(vport, skb);
- if (err) {
- nla_nest_cancel(skb, nla);
- return err;
- }
-
- nla_nest_end(skb, nla);
- return 0;
-}
-
-/**
- * ovs_vport_set_upcall_portids - set upcall portids of @vport.
- *
- * @vport: vport to modify.
- * @ids: new configuration, an array of port ids.
- *
- * Sets the vport's upcall_portids to @ids.
- *
- * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed
- * as an array of U32.
- *
- * Must be called with ovs_mutex.
- */
-int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
-{
- struct vport_portids *old, *vport_portids;
-
- if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
- return -EINVAL;
-
- old = ovsl_dereference(vport->upcall_portids);
-
- vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids),
- GFP_KERNEL);
- if (!vport_portids)
- return -ENOMEM;
-
- vport_portids->n_ids = nla_len(ids) / sizeof(u32);
- vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids);
- nla_memcpy(vport_portids->ids, ids, nla_len(ids));
-
- rcu_assign_pointer(vport->upcall_portids, vport_portids);
-
- if (old)
- kfree_rcu(old, rcu);
- return 0;
-}
-
-/**
- * ovs_vport_get_upcall_portids - get the upcall_portids of @vport.
- *
- * @vport: vport from which to retrieve the portids.
- * @skb: sk_buff where portids should be appended.
- *
- * Retrieves the configuration of the given vport, appending the
- * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall
- * portids to @skb.
- *
- * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room.
- * If an error occurs, @skb is left unmodified. Must be called with
- * ovs_mutex or rcu_read_lock.
- */
-int ovs_vport_get_upcall_portids(const struct vport *vport,
- struct sk_buff *skb)
-{
- struct vport_portids *ids;
-
- ids = rcu_dereference_ovsl(vport->upcall_portids);
-
- if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
- return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID,
- ids->n_ids * sizeof(u32), (void *)ids->ids);
- else
- return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]);
-}
-
-/**
- * ovs_vport_find_upcall_portid - find the upcall portid to send upcall.
- *
- * @vport: vport from which the missed packet is received.
- * @skb: skb that the missed packet was received.
- *
- * Uses the skb_get_hash() to select the upcall portid to send the
- * upcall.
- *
- * Returns the portid of the target socket. Must be called with rcu_read_lock.
- */
-u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
-{
- struct vport_portids *ids;
- u32 ids_index;
- u32 hash;
-
- ids = rcu_dereference(vport->upcall_portids);
-
- /* If there is only one portid, select it in the fast-path. */
- if (ids->n_ids == 1)
- return ids->ids[0];
-
- hash = skb_get_hash(skb);
- ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
- return ids->ids[ids_index];
-}
-
-/**
- * ovs_vport_receive - pass up received packet to the datapath for processing
- *
- * @vport: vport that received the packet
- * @skb: skb that was received
- * @tun_key: tunnel (if any) that carried packet
- *
- * Must be called with rcu_read_lock. The packet cannot be shared and
- * skb->data should point to the Ethernet header.
- */
-int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
- const struct ip_tunnel_info *tun_info)
-{
- struct sw_flow_key key;
- int error;
-
- OVS_CB(skb)->input_vport = vport;
- OVS_CB(skb)->mru = 0;
- OVS_CB(skb)->cutlen = 0;
- if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
- u32 mark;
-
- mark = skb->mark;
- skb_scrub_packet(skb, true);
- skb->mark = mark;
- tun_info = NULL;
- }
-
- ovs_skb_init_inner_protocol(skb);
- skb_clear_ovs_gso_cb(skb);
- /* Extract flow from 'skb' into 'key'. */
- error = ovs_flow_key_extract(tun_info, skb, &key);
- if (unlikely(error)) {
- kfree_skb(skb);
- return error;
- }
- ovs_dp_process_packet(skb, &key);
- return 0;
-}
-
-static int packet_length(const struct sk_buff *skb,
- struct net_device *dev)
-{
- int length = skb->len - dev->hard_header_len;
-
- if (!skb_vlan_tag_present(skb) &&
- eth_type_vlan(skb->protocol))
- length -= VLAN_HLEN;
-
- /* Don't subtract for multiple VLAN tags. Most (all?) drivers allow
- * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none
- * account for 802.1ad. e.g. is_skb_forwardable().
- */
-
- return length > 0 ? length: 0;
-}
-
-void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
-{
- int mtu = vport->dev->mtu;
-
- switch (vport->dev->type) {
- case ARPHRD_NONE:
- if (mac_proto == MAC_PROTO_ETHERNET) {
- skb_reset_network_header(skb);
- skb_reset_mac_len(skb);
- skb->protocol = htons(ETH_P_TEB);
- } else if (mac_proto != MAC_PROTO_NONE) {
- WARN_ON_ONCE(1);
- goto drop;
- }
- break;
- case ARPHRD_ETHER:
- if (mac_proto != MAC_PROTO_ETHERNET)
- goto drop;
- break;
- default:
- goto drop;
- }
-
- if (unlikely(packet_length(skb, vport->dev) > mtu &&
- !skb_is_gso(skb))) {
- net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
- vport->dev->name,
- packet_length(skb, vport->dev), mtu);
- vport->dev->stats.tx_errors++;
- goto drop;
- }
-
- skb->dev = vport->dev;
- vport->ops->send(skb);
- return;
-
-drop:
- kfree_skb(skb);
-}
diff --git a/datapath/vport.h b/datapath/vport.h
deleted file mode 100644
index d630c34bc..000000000
--- a/datapath/vport.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) 2007-2015 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#ifndef VPORT_H
-#define VPORT_H 1
-
-#include <linux/if_tunnel.h>
-#include <linux/list.h>
-#include <linux/netlink.h>
-#include <linux/openvswitch.h>
-#include <linux/reciprocal_div.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/u64_stats_sync.h>
-
-#include "datapath.h"
-
-struct vport;
-struct vport_parms;
-
-/* The following definitions are for users of the vport subsytem: */
-
-int ovs_vport_init(void);
-void ovs_vport_exit(void);
-
-struct vport *ovs_vport_add(const struct vport_parms *);
-void ovs_vport_del(struct vport *);
-
-struct vport *ovs_vport_locate(const struct net *net, const char *name);
-
-void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
-
-int ovs_vport_set_options(struct vport *, struct nlattr *options);
-int ovs_vport_get_options(const struct vport *, struct sk_buff *);
-
-int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
-int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
-u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
-
-/**
- * struct vport_portids - array of netlink portids of a vport.
- * must be protected by rcu.
- * @rn_ids: The reciprocal value of @n_ids.
- * @rcu: RCU callback head for deferred destruction.
- * @n_ids: Size of @ids array.
- * @ids: Array storing the Netlink socket pids to be used for packets received
- * on this port that miss the flow table.
- */
-struct vport_portids {
- struct reciprocal_value rn_ids;
- struct rcu_head rcu;
- u32 n_ids;
- u32 ids[];
-};
-
-/**
- * struct vport - one port within a datapath
- * @dev: Pointer to net_device.
- * @dp: Datapath to which this port belongs.
- * @upcall_portids: RCU protected 'struct vport_portids'.
- * @port_no: Index into @dp's @ports array.
- * @hash_node: Element in @dev_table hash table in vport.c.
- * @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
- * @ops: Class structure.
- * @detach_list: list used for detaching vport in net-exit call.
- * @rcu: RCU callback head for deferred destruction.
- */
-struct vport {
- struct net_device *dev;
- struct datapath *dp;
- struct vport_portids __rcu *upcall_portids;
- u16 port_no;
-
- struct hlist_node hash_node;
- struct hlist_node dp_hash_node;
- const struct vport_ops *ops;
-
- struct list_head detach_list;
- struct rcu_head rcu;
-};
-
-/**
- * struct vport_parms - parameters for creating a new vport
- *
- * @name: New vport's name.
- * @type: New vport's type.
- * @options: %OVS_VPORT_ATTR_OPTIONS attribute from Netlink message, %NULL if
- * none was supplied.
- * @dp: New vport's datapath.
- * @port_no: New vport's port number.
- */
-struct vport_parms {
- const char *name;
- enum ovs_vport_type type;
- struct nlattr *options;
-
- /* For ovs_vport_alloc(). */
- struct datapath *dp;
- u16 port_no;
- struct nlattr *upcall_portids;
-};
-
-/**
- * struct vport_ops - definition of a type of virtual port
- *
- * @type: %OVS_VPORT_TYPE_* value for this type of virtual port.
- * @create: Create a new vport configured as specified. On success returns
- * a new vport allocated with ovs_vport_alloc(), otherwise an ERR_PTR() value.
- * @destroy: Destroys a vport. Must call vport_free() on the vport but not
- * before an RCU grace period has elapsed.
- * @set_options: Modify the configuration of an existing vport. May be %NULL
- * if modification is not supported.
- * @get_options: Appends vport-specific attributes for the configuration of an
- * existing vport to a &struct sk_buff. May be %NULL for a vport that does not
- * have any configuration.
- * @send: Send a packet on the device.
- * zero for dropped packets or negative for error.
- */
-struct vport_ops {
- enum ovs_vport_type type;
-
- /* Called with ovs_mutex. */
- struct vport *(*create)(const struct vport_parms *);
- void (*destroy)(struct vport *);
-
- int (*set_options)(struct vport *, struct nlattr *);
- int (*get_options)(const struct vport *, struct sk_buff *);
-
- netdev_tx_t (*send)(struct sk_buff *skb);
-#ifndef USE_UPSTREAM_TUNNEL
- int (*fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb);
-#endif
- struct module *owner;
- struct list_head list;
-};
-
-struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
- const struct vport_parms *);
-void ovs_vport_free(struct vport *);
-
-#define VPORT_ALIGN 8
-
-/**
- * vport_priv - access private data area of vport
- *
- * @vport: vport to access
- *
- * If a nonzero size was passed in priv_size of vport_alloc() a private data
- * area was allocated on creation. This allows that area to be accessed and
- * used for any purpose needed by the vport implementer.
- */
-static inline void *vport_priv(const struct vport *vport)
-{
- return (u8 *)(uintptr_t)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
-}
-
-/**
- * vport_from_priv - lookup vport from private data pointer
- *
- * @priv: Start of private data area.
- *
- * It is sometimes useful to translate from a pointer to the private data
- * area to the vport, such as in the case where the private data pointer is
- * the result of a hash table lookup. @priv must point to the start of the
- * private data area.
- */
-static inline struct vport *vport_from_priv(void *priv)
-{
- return (struct vport *)((u8 *)priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
-}
-
-int ovs_vport_receive(struct vport *, struct sk_buff *,
- const struct ip_tunnel_info *);
-
-static inline const char *ovs_vport_name(struct vport *vport)
-{
- return vport->dev->name;
-}
-
-int __ovs_vport_ops_register(struct vport_ops *ops);
-#define ovs_vport_ops_register(ops) \
- ({ \
- (ops)->owner = THIS_MODULE; \
- __ovs_vport_ops_register(ops); \
- })
-
-void ovs_vport_ops_unregister(struct vport_ops *ops);
-void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto);
-
-#endif /* vport.h */
diff --git a/debian/copyright.in b/debian/copyright.in
index 64d23795d..ff38792d9 100644
--- a/debian/copyright.in
+++ b/debian/copyright.in
@@ -81,7 +81,7 @@ Copyright: (c) 1990,1993, The Regents of the University of California
License: BSD-3-clause
Files:
- datapath/linux/compat/include/linux/openvswitch.h
+ include/linux/openvswitch.h
Copyright: (c) 2007-2017 Nicira, Inc.
License: Apache-2.0-or-GPL-2.0
diff --git a/include/automake.mk b/include/automake.mk
index e982da87d..1e3390ae0 100644
--- a/include/automake.mk
+++ b/include/automake.mk
@@ -1,6 +1,6 @@
BUILT_SOURCES += include/odp-netlink.h include/odp-netlink-macros.h
-include/odp-netlink.h: datapath/linux/compat/include/linux/openvswitch.h \
+include/odp-netlink.h: include/linux/openvswitch.h \
build-aux/extract-odp-netlink-h
$(AM_V_GEN)sed -f $(srcdir)/build-aux/extract-odp-netlink-h < $< > $@
diff --git a/include/linux/automake.mk b/include/linux/automake.mk
index f857c7e08..cdae5eedc 100644
--- a/include/linux/automake.mk
+++ b/include/linux/automake.mk
@@ -1,6 +1,7 @@
noinst_HEADERS += \
include/linux/netlink.h \
include/linux/netfilter/nf_conntrack_sctp.h \
+ include/linux/openvswitch.h \
include/linux/pkt_cls.h \
include/linux/gen_stats.h \
include/linux/tc_act/tc_mpls.h \
diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/include/linux/openvswitch.h
index 8bb5abdc8..8bb5abdc8 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
diff --git a/utilities/docker/debian/build-kernel-modules.sh b/utilities/docker/debian/build-kernel-modules.sh
index 872ba1eb8..aaee73ff7 100755
--- a/utilities/docker/debian/build-kernel-modules.sh
+++ b/utilities/docker/debian/build-kernel-modules.sh
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-KERNEL_VERSION=$1
+KERNEL_VERSION=host
OVS_BRANCH=$2
GITHUB_SRC=$3
@@ -39,18 +39,13 @@ cd ovs
config="./configure --localstatedir="/var" --sysconfdir="/etc" --prefix="/usr"
--enable-ssl"
-if [ $KERNEL_VERSION = "host" ]; then
- eval $config
-else
- withlinux=" --with-linux=/lib/modules/$KERNEL_VERSION/build"
- eval $config$withlinux
-fi
+eval $config
-make -j8; make install; make modules_install
+make -j8; make install
# remove deps to make the container light weight.
apt-get remove --purge -y ${build_deps}
apt-get autoremove -y --purge
cd ..; rm -rf ovs
basic_utils="vim kmod net-tools uuid-runtime iproute2"
-apt-get install -y ${basic_utils} \ No newline at end of file
+apt-get install -y ${basic_utils}
diff --git a/utilities/ovs-dev.py b/utilities/ovs-dev.py
index 534c5e7f1..d64e464f4 100755
--- a/utilities/ovs-dev.py
+++ b/utilities/ovs-dev.py
@@ -106,7 +106,7 @@ def conf():
pass # Directory exists.
os.chdir(BUILD_GCC)
- _sh(*(configure + ["--with-linux=/lib/modules/%s/build" % uname()]))
+ _sh(*(configure))
try:
_sh("clang --version", check=True)
@@ -184,12 +184,9 @@ def tag():
ctags = ['ctags', '-R', '-f', '.tags']
try:
- _sh(*(ctags + ['--exclude="datapath/"']))
+ _sh(*ctags)
except:
- try:
- _sh(*ctags) # Some versions of ctags don't have --exclude
- except:
- pass
+ pass
try:
_sh('cscope', '-R', '-b')
@@ -351,7 +348,7 @@ Basic Configuration:
# First install the basic requirements needed to build Open vSwitch.
sudo apt-get install git build-essential libtool autoconf pkg-config \\
- libssl-dev gdb libcap-ng-dev linux-headers-`uname -r`
+ libssl-dev gdb libcap-ng-dev
# Next clone the Open vSwitch source.
git clone https://github.com/openvswitch/ovs.git %(ovs)s
@@ -362,14 +359,6 @@ Basic Configuration:
# Build the switch.
%(v)s conf make
- # Install the kernel module
- sudo insmod %(ovs)s/datapath/linux/openvswitch.ko
-
- # If needed, manually load all required vport modules:
- sudo insmod %(ovs)s/datapath/linux/vport-vxlan.ko
- sudo insmod %(ovs)s/datapath/linux/vport-geneve.ko
- [...]
-
# Run the switch.
%(v)s run