diff options
author | Mark Brown <broonie@kernel.org> | 2021-07-27 03:36:23 +0100 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2021-07-27 03:36:23 +0100 |
commit | 5a0b5fbe7b41515ebf5e5cb1f70a6bd220f44df2 (patch) | |
tree | 4c87298c3ad3c7b58c169e56cace092b80442372 | |
parent | c72249d40e91c77de63b782a84b115038341fde7 (diff) | |
parent | 268ca4129d8da764fdf72916f762a1145c6ea743 (diff) | |
download | linux-next-5a0b5fbe7b41515ebf5e5cb1f70a6bd220f44df2.tar.gz |
Merge remote-tracking branch 'net-next/master'
# Conflicts:
# drivers/bus/mhi/pci_generic.c
417 files changed, 17415 insertions, 6450 deletions
diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml index f84e31348d80..a7b5807c5543 100644 --- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml +++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml @@ -107,6 +107,9 @@ properties: can-transceiver: $ref: can-transceiver.yaml# + phys: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/fsl,fec.yaml b/Documentation/devicetree/bindings/net/fsl,fec.yaml new file mode 100644 index 000000000000..dbcbec95fc9e --- /dev/null +++ b/Documentation/devicetree/bindings/net/fsl,fec.yaml @@ -0,0 +1,217 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/fsl,fec.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale Fast Ethernet Controller (FEC) + +maintainers: + - Joakim Zhang <qiangqing.zhang@nxp.com> + +allOf: + - $ref: ethernet-controller.yaml# + +properties: + compatible: + oneOf: + - enum: + - fsl,imx25-fec + - fsl,imx27-fec + - fsl,imx28-fec + - fsl,imx6q-fec + - fsl,mvf600-fec + - items: + - enum: + - fsl,imx53-fec + - fsl,imx6sl-fec + - const: fsl,imx25-fec + - items: + - enum: + - fsl,imx35-fec + - fsl,imx51-fec + - const: fsl,imx27-fec + - items: + - enum: + - fsl,imx6ul-fec + - fsl,imx6sx-fec + - const: fsl,imx6q-fec + - items: + - enum: + - fsl,imx7d-fec + - const: fsl,imx6sx-fec + + reg: + maxItems: 1 + + interrupts: + minItems: 1 + maxItems: 4 + + interrupt-names: + oneOf: + - items: + - const: int0 + - items: + - const: int0 + - const: pps + - items: + - const: int0 + - const: int1 + - const: int2 + - items: + - const: int0 + - const: int1 + - const: int2 + - const: pps + + clocks: + minItems: 2 + maxItems: 5 + description: + The "ipg", for MAC ipg_clk_s, ipg_clk_mac_s that are for register accessing. + The "ahb", for MAC ipg_clk, ipg_clk_mac that are bus clock. + The "ptp"(option), for IEEE1588 timer clock that requires the clock. + The "enet_clk_ref"(option), for MAC transmit/receiver reference clock like + RGMII TXC clock or RMII reference clock. It depends on board design, + the clock is required if RGMII TXC and RMII reference clock source from + SOC internal PLL. + The "enet_out"(option), output clock for external device, like supply clock + for PHY. The clock is required if PHY clock source from SOC. + + clock-names: + minItems: 2 + maxItems: 5 + items: + enum: + - ipg + - ahb + - ptp + - enet_clk_ref + - enet_out + + phy-mode: true + + phy-handle: true + + fixed-link: true + + local-mac-address: true + + mac-address: true + + phy-supply: + description: + Regulator that powers the Ethernet PHY. + + fsl,num-tx-queues: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + The property is valid for enet-avb IP, which supports hw multi queues. + Should specify the tx queue number, otherwise set tx queue number to 1. + enum: [1, 2, 3] + + fsl,num-rx-queues: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + The property is valid for enet-avb IP, which supports hw multi queues. + Should specify the rx queue number, otherwise set rx queue number to 1. + enum: [1, 2, 3] + + fsl,magic-packet: + $ref: /schemas/types.yaml#/definitions/flag + description: + If present, indicates that the hardware supports waking up via magic packet. + + fsl,err006687-workaround-present: + $ref: /schemas/types.yaml#/definitions/flag + description: + If present indicates that the system has the hardware workaround for + ERR006687 applied and does not need a software workaround. + + fsl,stop-mode: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: + Register bits of stop mode control, the format is <&gpr req_gpr req_bit>. + gpr is the phandle to general purpose register node. + req_gpr is the gpr register offset for ENET stop request. + req_bit is the gpr bit offset for ENET stop request. + + mdio: + type: object + description: + Specifies the mdio bus in the FEC, used as a container for phy nodes. + + # Deprecated optional properties: + # To avoid these, create a phy node according to ethernet-phy.yaml in the same + # directory, and point the FEC's "phy-handle" property to it. Then use + # the phy's reset binding, again described by ethernet-phy.yaml. + + phy-reset-gpios: + deprecated: true + description: + Should specify the gpio for phy reset. + + phy-reset-duration: + deprecated: true + description: + Reset duration in milliseconds. Should present only if property + "phy-reset-gpios" is available. Missing the property will have the + duration be 1 millisecond. Numbers greater than 1000 are invalid + and 1 millisecond will be used instead. + + phy-reset-active-high: + deprecated: true + description: + If present then the reset sequence using the GPIO specified in the + "phy-reset-gpios" property is reversed (H=reset state, L=operation state). + + phy-reset-post-delay: + deprecated: true + description: + Post reset delay in milliseconds. If present then a delay of phy-reset-post-delay + milliseconds will be observed after the phy-reset-gpios has been toggled. + Can be omitted thus no delay is observed. Delay is in range of 1ms to 1000ms. + Other delays are invalid. + +required: + - compatible + - reg + - interrupts + +# FIXME: We had better set additionalProperties to false to avoid invalid or at +# least undocumented properties. However, PHY may have a deprecated option to +# place PHY OF properties in the MAC node, such as Micrel PHY, and we can find +# these boards which is based on i.MX6QDL. +additionalProperties: false + +examples: + - | + ethernet@83fec000 { + compatible = "fsl,imx51-fec", "fsl,imx27-fec"; + reg = <0x83fec000 0x4000>; + interrupts = <87>; + phy-mode = "mii"; + phy-reset-gpios = <&gpio2 14 0>; + phy-supply = <®_fec_supply>; + }; + + ethernet@83fed000 { + compatible = "fsl,imx51-fec", "fsl,imx27-fec"; + reg = <0x83fed000 0x4000>; + interrupts = <87>; + phy-mode = "mii"; + phy-reset-gpios = <&gpio2 14 0>; + phy-supply = <®_fec_supply>; + phy-handle = <ðphy0>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy0: ethernet-phy@0 { + compatible = "ethernet-phy-ieee802.3-c22"; + reg = <0>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt deleted file mode 100644 index 9b543789cd52..000000000000 --- a/Documentation/devicetree/bindings/net/fsl-fec.txt +++ /dev/null @@ -1,95 +0,0 @@ -* Freescale Fast Ethernet Controller (FEC) - -Required properties: -- compatible : Should be "fsl,<soc>-fec" -- reg : Address and length of the register set for the device -- interrupts : Should contain fec interrupt -- phy-mode : See ethernet.txt file in the same directory - -Optional properties: -- phy-supply : regulator that powers the Ethernet PHY. -- phy-handle : phandle to the PHY device connected to this device. -- fixed-link : Assume a fixed link. See fixed-link.txt in the same directory. - Use instead of phy-handle. -- fsl,num-tx-queues : The property is valid for enet-avb IP, which supports - hw multi queues. Should specify the tx queue number, otherwise set tx queue - number to 1. -- fsl,num-rx-queues : The property is valid for enet-avb IP, which supports - hw multi queues. Should specify the rx queue number, otherwise set rx queue - number to 1. -- fsl,magic-packet : If present, indicates that the hardware supports waking - up via magic packet. -- fsl,err006687-workaround-present: If present indicates that the system has - the hardware workaround for ERR006687 applied and does not need a software - workaround. -- fsl,stop-mode: register bits of stop mode control, the format is - <&gpr req_gpr req_bit>. - gpr is the phandle to general purpose register node. - req_gpr is the gpr register offset for ENET stop request. - req_bit is the gpr bit offset for ENET stop request. - -interrupt-names: names of the interrupts listed in interrupts property in - the same order. The defaults if not specified are - __Number of interrupts__ __Default__ - 1 "int0" - 2 "int0", "pps" - 3 "int0", "int1", "int2" - 4 "int0", "int1", "int2", "pps" - The order may be changed as long as they correspond to the interrupts - property. Currently, only i.mx7 uses "int1" and "int2". They correspond to - tx/rx queues 1 and 2. "int0" will be used for queue 0 and ENET_MII interrupts. - For imx6sx, "int0" handles all 3 queues and ENET_MII. "pps" is for the pulse - per second interrupt associated with 1588 precision time protocol(PTP). - -Optional subnodes: -- mdio : specifies the mdio bus in the FEC, used as a container for phy nodes - according to phy.txt in the same directory - -Deprecated optional properties: - To avoid these, create a phy node according to phy.txt in the same - directory, and point the fec's "phy-handle" property to it. Then use - the phy's reset binding, again described by phy.txt. -- phy-reset-gpios : Should specify the gpio for phy reset -- phy-reset-duration : Reset duration in milliseconds. Should present - only if property "phy-reset-gpios" is available. Missing the property - will have the duration be 1 millisecond. Numbers greater than 1000 are - invalid and 1 millisecond will be used instead. -- phy-reset-active-high : If present then the reset sequence using the GPIO - specified in the "phy-reset-gpios" property is reversed (H=reset state, - L=operation state). -- phy-reset-post-delay : Post reset delay in milliseconds. If present then - a delay of phy-reset-post-delay milliseconds will be observed after the - phy-reset-gpios has been toggled. Can be omitted thus no delay is - observed. Delay is in range of 1ms to 1000ms. Other delays are invalid. - -Example: - -ethernet@83fec000 { - compatible = "fsl,imx51-fec", "fsl,imx27-fec"; - reg = <0x83fec000 0x4000>; - interrupts = <87>; - phy-mode = "mii"; - phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */ - local-mac-address = [00 04 9F 01 1B B9]; - phy-supply = <®_fec_supply>; -}; - -Example with phy specified: - -ethernet@83fec000 { - compatible = "fsl,imx51-fec", "fsl,imx27-fec"; - reg = <0x83fec000 0x4000>; - interrupts = <87>; - phy-mode = "mii"; - phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */ - local-mac-address = [00 04 9F 01 1B B9]; - phy-supply = <®_fec_supply>; - phy-handle = <ðphy>; - mdio { - clock-frequency = <5000000>; - ethphy: ethernet-phy@6 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <6>; - max-speed = <100>; - }; - }; -}; diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index ed88ba4b94df..4853ab7017bd 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -87,16 +87,18 @@ properties: - const: ipa-setup-ready interconnects: + minItems: 2 items: - - description: Interconnect path between IPA and main memory - - description: Interconnect path between IPA and internal memory - - description: Interconnect path between IPA and the AP subsystem + - description: Path leading to system memory + - description: Path between the AP and IPA config space + - description: Path leading to internal memory interconnect-names: + minItems: 2 items: - const: memory - - const: imem - const: config + - const: imem qcom,smem-states: $ref: /schemas/types.yaml#/definitions/phandle-array @@ -207,11 +209,11 @@ examples: interconnects = <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_EBI1>, - <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_IMEM>, - <&rsc_hlos MASTER_APPSS_PROC &rsc_hlos SLAVE_IPA_CFG>; + <&rsc_hlos MASTER_APPSS_PROC &rsc_hlos SLAVE_IPA_CFG>, + <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_IMEM>; interconnect-names = "memory", - "imem", - "config"; + "config", + "imem"; qcom,smem-states = <&ipa_smp2p_out 0>, <&ipa_smp2p_out 1>; diff --git a/Documentation/driver-api/nfc/nfc-hci.rst b/Documentation/driver-api/nfc/nfc-hci.rst index eb8a1a14e919..f10fe53aa9fe 100644 --- a/Documentation/driver-api/nfc/nfc-hci.rst +++ b/Documentation/driver-api/nfc/nfc-hci.rst @@ -181,7 +181,7 @@ xmit_from_hci(): The llc must be registered with nfc before it can be used. Do that by calling:: - nfc_llc_register(const char *name, struct nfc_llc_ops *ops); + nfc_llc_register(const char *name, const struct nfc_llc_ops *ops); Again, note that the llc does not handle the physical link. It is thus very easy to mix any physical link with any llc for a given chip driver. diff --git a/Documentation/networking/devlink/hns3.rst b/Documentation/networking/devlink/hns3.rst new file mode 100644 index 000000000000..4562a6e4782f --- /dev/null +++ b/Documentation/networking/devlink/hns3.rst @@ -0,0 +1,25 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==================== +hns3 devlink support +==================== + +This document describes the devlink features implemented by the ``hns3`` +device driver. + +The ``hns3`` driver supports reloading via ``DEVLINK_CMD_RELOAD``. + +Info versions +============= + +The ``hns3`` driver reports the following versions + +.. list-table:: devlink info versions implemented + :widths: 10 10 80 + + * - Name + - Type + - Description + * - ``fw`` + - running + - Used to represent the firmware version. diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index b3b9e0692088..03f56ed2961f 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -34,6 +34,7 @@ parameters, info versions, and other features it supports. :maxdepth: 1 bnxt + hns3 ionic ice mlx4 diff --git a/Documentation/networking/ioam6-sysctl.rst b/Documentation/networking/ioam6-sysctl.rst new file mode 100644 index 000000000000..c18cab2c481a --- /dev/null +++ b/Documentation/networking/ioam6-sysctl.rst @@ -0,0 +1,26 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================== +IOAM6 Sysfs variables +===================== + + +/proc/sys/net/conf/<iface>/ioam6_* variables: +============================================= + +ioam6_enabled - BOOL + Accept (= enabled) or ignore (= disabled) IPv6 IOAM options on ingress + for this interface. + + * 0 - disabled (default) + * 1 - enabled + +ioam6_id - SHORT INTEGER + Define the IOAM id of this interface. + + Default is ~0. + +ioam6_id_wide - INTEGER + Define the wide IOAM id of this interface. + + Default is ~0. diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 316c7dfa9693..d91ab28718d4 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1926,6 +1926,23 @@ fib_notify_on_flag_change - INTEGER - 1 - Emit notifications. - 2 - Emit notifications only for RTM_F_OFFLOAD_FAILED flag change. +ioam6_id - INTEGER + Define the IOAM id of this node. Uses only 24 bits out of 32 in total. + + Min: 0 + Max: 0xFFFFFF + + Default: 0xFFFFFF + +ioam6_id_wide - LONG INTEGER + Define the wide IOAM id of this node. Uses only 56 bits out of 64 in + total. Can be different from ioam6_id. + + Min: 0 + Max: 0xFFFFFFFFFFFFFF + + Default: 0xFFFFFFFFFFFFFF + IPv6 Fragmentation: ip6frag_high_thresh - INTEGER diff --git a/MAINTAINERS b/MAINTAINERS index 85b93585a635..8a1a8005dd27 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10397,6 +10397,7 @@ F: net/core/skmsg.c F: net/core/sock_map.c F: net/ipv4/tcp_bpf.c F: net/ipv4/udp_bpf.c +F: net/unix/unix_bpf.c LANDLOCK SECURITY MODULE M: Mickaël Salaün <mic@digikod.net> @@ -11336,6 +11337,12 @@ W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git F: drivers/media/radio/radio-maxiradio* +MAXLINEAR ETHERNET PHY DRIVER +M: Xu Liang <lxu@maxlinear.com> +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/phy/mxl-gpy.c + MCAN MMIO DEVICE DRIVER M: Chandrasekar Ramakrishnan <rcsekar@samsung.com> L: linux-can@vger.kernel.org diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi index 98ccc81ca6d9..8e41c8b7bd70 100644 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@ -189,7 +189,7 @@ status = "disabled"; }; - fec: fec@50038000 { + fec: ethernet@50038000 { compatible = "fsl,imx35-fec", "fsl,imx27-fec"; reg = <0x50038000 0x4000>; clocks = <&clks 46>, <&clks 8>; diff --git a/arch/arm/boot/dts/imx6q-novena.dts b/arch/arm/boot/dts/imx6q-novena.dts index 52e3567d1859..225cf6b7a7a4 100644 --- a/arch/arm/boot/dts/imx6q-novena.dts +++ b/arch/arm/boot/dts/imx6q-novena.dts @@ -222,20 +222,30 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet_novena>; phy-mode = "rgmii"; + phy-handle = <ðphy>; phy-reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>; - rxc-skew-ps = <3000>; - rxdv-skew-ps = <0>; - txc-skew-ps = <3000>; - txen-skew-ps = <0>; - rxd0-skew-ps = <0>; - rxd1-skew-ps = <0>; - rxd2-skew-ps = <0>; - rxd3-skew-ps = <0>; - txd0-skew-ps = <3000>; - txd1-skew-ps = <3000>; - txd2-skew-ps = <3000>; - txd3-skew-ps = <3000>; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy: ethernet-phy { + compatible = "ethernet-phy-ieee802.3-c22"; + rxc-skew-ps = <3000>; + rxdv-skew-ps = <0>; + txc-skew-ps = <3000>; + txen-skew-ps = <0>; + rxd0-skew-ps = <0>; + rxd1-skew-ps = <0>; + rxd2-skew-ps = <0>; + rxd3-skew-ps = <0>; + txd0-skew-ps = <3000>; + txd1-skew-ps = <3000>; + txd2-skew-ps = <3000>; + txd3-skew-ps = <3000>; + }; + }; }; &hdmi { diff --git a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi index ead7ba27e105..563bf9d44fe0 100644 --- a/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi @@ -316,12 +316,22 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + phy-handle = <ðphy>; phy-reset-gpios = <&gpio7 18 GPIO_ACTIVE_LOW>; - txd0-skew-ps = <0>; - txd1-skew-ps = <0>; - txd2-skew-ps = <0>; - txd3-skew-ps = <0>; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy: ethernet-phy { + compatible = "ethernet-phy-ieee802.3-c22"; + txd0-skew-ps = <0>; + txd1-skew-ps = <0>; + txd2-skew-ps = <0>; + txd3-skew-ps = <0>; + }; + }; }; &gpmi { diff --git a/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi b/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi index d526f01a2c52..ac34709e9741 100644 --- a/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi @@ -190,23 +190,33 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + phy-handle = <ðphy>; phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; - txen-skew-ps = <0>; - txc-skew-ps = <3000>; - rxdv-skew-ps = <0>; - rxc-skew-ps = <3000>; - rxd0-skew-ps = <0>; - rxd1-skew-ps = <0>; - rxd2-skew-ps = <0>; - rxd3-skew-ps = <0>; - txd0-skew-ps = <0>; - txd1-skew-ps = <0>; - txd2-skew-ps = <0>; - txd3-skew-ps = <0>; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy: ethernet-phy { + compatible = "ethernet-phy-ieee802.3-c22"; + txen-skew-ps = <0>; + txc-skew-ps = <3000>; + rxdv-skew-ps = <0>; + rxc-skew-ps = <3000>; + rxd0-skew-ps = <0>; + rxd1-skew-ps = <0>; + rxd2-skew-ps = <0>; + rxd3-skew-ps = <0>; + txd0-skew-ps = <0>; + txd1-skew-ps = <0>; + txd2-skew-ps = <0>; + txd3-skew-ps = <0>; + }; + }; }; &hdmi { diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi index a0917823c244..c96f4d7e1e0d 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi @@ -332,23 +332,33 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + phy-handle = <ðphy>; phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; - txen-skew-ps = <0>; - txc-skew-ps = <3000>; - rxdv-skew-ps = <0>; - rxc-skew-ps = <3000>; - rxd0-skew-ps = <0>; - rxd1-skew-ps = <0>; - rxd2-skew-ps = <0>; - rxd3-skew-ps = <0>; - txd0-skew-ps = <0>; - txd1-skew-ps = <0>; - txd2-skew-ps = <0>; - txd3-skew-ps = <0>; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy: ethernet-phy { + compatible = "ethernet-phy-ieee802.3-c22"; + txen-skew-ps = <0>; + txc-skew-ps = <3000>; + rxdv-skew-ps = <0>; + rxc-skew-ps = <3000>; + rxd0-skew-ps = <0>; + rxd1-skew-ps = <0>; + rxd2-skew-ps = <0>; + rxd3-skew-ps = <0>; + txd0-skew-ps = <0>; + txd1-skew-ps = <0>; + txd2-skew-ps = <0>; + txd3-skew-ps = <0>; + }; + }; }; &hdmi { diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi index 1243677b5f97..49da30d7510c 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi @@ -265,23 +265,33 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + phy-handle = <ðphy>; phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; - txen-skew-ps = <0>; - txc-skew-ps = <3000>; - rxdv-skew-ps = <0>; - rxc-skew-ps = <3000>; - rxd0-skew-ps = <0>; - rxd1-skew-ps = <0>; - rxd2-skew-ps = <0>; - rxd3-skew-ps = <0>; - txd0-skew-ps = <0>; - txd1-skew-ps = <0>; - txd2-skew-ps = <0>; - txd3-skew-ps = <0>; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy: ethernet-phy { + compatible = "ethernet-phy-ieee802.3-c22"; + txen-skew-ps = <0>; + txc-skew-ps = <3000>; + rxdv-skew-ps = <0>; + rxc-skew-ps = <3000>; + rxd0-skew-ps = <0>; + rxd1-skew-ps = <0>; + rxd2-skew-ps = <0>; + rxd3-skew-ps = <0>; + txd0-skew-ps = <0>; + txd1-skew-ps = <0>; + txd2-skew-ps = <0>; + txd3-skew-ps = <0>; + }; + }; }; &hdmi { diff --git a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi index fdc3aa9d544d..eb9a0b104f1c 100644 --- a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi @@ -324,20 +324,30 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + phy-handle = <ðphy>; phy-reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>; - txen-skew-ps = <0>; - txc-skew-ps = <3000>; - rxdv-skew-ps = <0>; - rxc-skew-ps = <3000>; - rxd0-skew-ps = <0>; - rxd1-skew-ps = <0>; - rxd2-skew-ps = <0>; - rxd3-skew-ps = <0>; - txd0-skew-ps = <0>; - txd1-skew-ps = <0>; - txd2-skew-ps = <0>; - txd3-skew-ps = <0>; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy: ethernet-phy { + compatible = "ethernet-phy-ieee802.3-c22"; + txen-skew-ps = <0>; + txc-skew-ps = <3000>; + rxdv-skew-ps = <0>; + rxc-skew-ps = <3000>; + rxd0-skew-ps = <0>; + rxd1-skew-ps = <0>; + rxd2-skew-ps = <0>; + rxd3-skew-ps = <0>; + txd0-skew-ps = <0>; + txd1-skew-ps = <0>; + txd2-skew-ps = <0>; + txd3-skew-ps = <0>; + }; + }; }; &hdmi { diff --git a/arch/arm/boot/dts/imx7-mba7.dtsi b/arch/arm/boot/dts/imx7-mba7.dtsi index c6d1c63f7905..5e6bef230dc7 100644 --- a/arch/arm/boot/dts/imx7-mba7.dtsi +++ b/arch/arm/boot/dts/imx7-mba7.dtsi @@ -216,7 +216,6 @@ phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio7 15 GPIO_ACTIVE_LOW>; phy-reset-duration = <1>; - phy-reset-delay = <1>; phy-supply = <®_fec1_pwdn>; phy-handle = <ðphy1_0>; fsl,magic-packet; diff --git a/arch/arm/boot/dts/imx7d-mba7.dts b/arch/arm/boot/dts/imx7d-mba7.dts index 23856a8d4b8c..36ef6a3cdb0b 100644 --- a/arch/arm/boot/dts/imx7d-mba7.dts +++ b/arch/arm/boot/dts/imx7d-mba7.dts @@ -23,7 +23,6 @@ phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio2 28 GPIO_ACTIVE_LOW>; phy-reset-duration = <1>; - phy-reset-delay = <1>; phy-supply = <®_fec2_pwdn>; phy-handle = <ðphy2_0>; fsl,magic-packet; diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 9b65896d62dc..5b10eb4be2b1 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -110,6 +110,11 @@ no-map; }; + ipa_fw_mem: memory@8b700000 { + reg = <0 0x8b700000 0 0x10000>; + no-map; + }; + rmtfs_mem: memory@94600000 { compatible = "qcom,rmtfs-mem"; reg = <0x0 0x94600000 0x0 0x200000>; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 029723a75ede..5764c5b5cae1 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -69,6 +69,11 @@ no-map; reg = <0x0 0x80b00000 0x0 0x100000>; }; + + ipa_fw_mem: memory@8b700000 { + reg = <0 0x8b700000 0 0x10000>; + no-map; + }; }; cpus { @@ -568,6 +573,43 @@ qcom,bcm-voters = <&apps_bcm_voter>; }; + ipa: ipa@1e40000 { + compatible = "qcom,sc7280-ipa"; + + iommus = <&apps_smmu 0x480 0x0>, + <&apps_smmu 0x482 0x0>; + reg = <0 0x1e40000 0 0x8000>, + <0 0x1e50000 0 0x4ad0>, + <0 0x1e04000 0 0x23000>; + reg-names = "ipa-reg", + "ipa-shared", + "gsi"; + + interrupts-extended = <&intc 0 654 IRQ_TYPE_EDGE_RISING>, + <&intc 0 432 IRQ_TYPE_LEVEL_HIGH>, + <&ipa_smp2p_in 0 IRQ_TYPE_EDGE_RISING>, + <&ipa_smp2p_in 1 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "ipa", + "gsi", + "ipa-clock-query", + "ipa-setup-ready"; + + clocks = <&rpmhcc RPMH_IPA_CLK>; + clock-names = "core"; + + interconnects = <&aggre2_noc MASTER_IPA 0 &mc_virt SLAVE_EBI1 0>, + <&gem_noc MASTER_APPSS_PROC 0 &cnoc2 SLAVE_IPA_CFG 0>; + interconnect-names = "memory", + "config"; + + qcom,smem-states = <&ipa_smp2p_out 0>, + <&ipa_smp2p_out 1>; + qcom,smem-state-names = "ipa-clock-enabled-valid", + "ipa-clock-enabled"; + + status = "disabled"; + }; + tcsr_mutex: hwlock@1f40000 { compatible = "qcom,tcsr-mutex", "syscon"; reg = <0 0x01f40000 0 0x40000>; diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 23a9fb73c04f..79c1a750e357 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -5,6 +5,9 @@ #ifndef __ASM_COMPAT_H #define __ASM_COMPAT_H +#define compat_mode_t compat_mode_t +typedef u16 compat_mode_t; + #include <asm-generic/compat.h> #ifdef CONFIG_COMPAT @@ -27,13 +30,9 @@ typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; typedef u16 __compat_uid16_t; typedef u16 __compat_gid16_t; -typedef u32 __compat_uid32_t; -typedef u32 __compat_gid32_t; -typedef u16 compat_mode_t; typedef u32 compat_dev_t; typedef s32 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { @@ -103,13 +102,6 @@ struct compat_statfs { #define COMPAT_RLIM_INFINITY 0xffffffff -typedef u32 compat_old_sigset_t; - -#define _COMPAT_NSIG 64 -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index 65975712a22d..53f015a1b0a7 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -9,20 +9,25 @@ #include <asm/page.h> #include <asm/ptrace.h> +typedef s32 __compat_uid_t; +typedef s32 __compat_gid_t; +typedef __compat_uid_t __compat_uid32_t; +typedef __compat_gid_t __compat_gid32_t; +#define __compat_uid32_t __compat_uid32_t +#define __compat_gid32_t __compat_gid32_t + +#define _COMPAT_NSIG 128 /* Don't ask !$@#% ... */ +#define _COMPAT_NSIG_BPW 32 +typedef u32 compat_sigset_word; + #include <asm-generic/compat.h> #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "mips\0\0\0" -typedef s32 __compat_uid_t; -typedef s32 __compat_gid_t; -typedef __compat_uid_t __compat_uid32_t; -typedef __compat_gid_t __compat_gid32_t; -typedef u32 compat_mode_t; typedef u32 compat_dev_t; typedef u32 compat_nlink_t; typedef s32 compat_ipc_pid_t; -typedef s32 compat_caddr_t; typedef struct { s32 val[2]; } compat_fsid_t; @@ -89,13 +94,6 @@ struct compat_statfs { #define COMPAT_RLIM_INFINITY 0x7fffffffUL -typedef u32 compat_old_sigset_t; /* at least 32 bits */ - -#define _COMPAT_NSIG 128 /* Don't ask !$@#% ... */ -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff static inline void __user *arch_compat_alloc_user_space(long len) diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index 1a609d38f667..b5d90e82b65d 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -8,6 +8,9 @@ #include <linux/sched.h> #include <linux/thread_info.h> +#define compat_mode_t compat_mode_t +typedef u16 compat_mode_t; + #include <asm-generic/compat.h> #define COMPAT_USER_HZ 100 @@ -15,13 +18,9 @@ typedef u32 __compat_uid_t; typedef u32 __compat_gid_t; -typedef u32 __compat_uid32_t; -typedef u32 __compat_gid32_t; -typedef u16 compat_mode_t; typedef u32 compat_dev_t; typedef u16 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef u32 compat_caddr_t; struct compat_stat { compat_dev_t st_dev; /* dev_t is 32 bits on parisc */ @@ -96,13 +95,6 @@ struct compat_sigcontext { #define COMPAT_RLIM_INFINITY 0xffffffff -typedef u32 compat_old_sigset_t; /* at least 32 bits */ - -#define _COMPAT_NSIG 64 -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff struct compat_ipc64_perm { diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 9191fc29e6ed..e33dcf134cdd 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -19,13 +19,9 @@ typedef u32 __compat_uid_t; typedef u32 __compat_gid_t; -typedef u32 __compat_uid32_t; -typedef u32 __compat_gid32_t; -typedef u32 compat_mode_t; typedef u32 compat_dev_t; typedef s16 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { @@ -85,13 +81,6 @@ struct compat_statfs { #define COMPAT_RLIM_INFINITY 0xffffffff -typedef u32 compat_old_sigset_t; - -#define _COMPAT_NSIG 64 -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff static inline void __user *arch_compat_alloc_user_space(long len) diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index 20f169b6db4e..36dbf5043fc0 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -53,8 +53,6 @@ extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver); extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver); int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv, int num_devices, const char *buf); -struct ccwgroup_device *get_ccwgroupdev_by_busid(struct ccwgroup_driver *gdrv, - char *bus_id); extern int ccwgroup_set_online(struct ccwgroup_device *gdev); extern int ccwgroup_set_offline(struct ccwgroup_device *gdev); diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index ea5b9c34b7be..8d49505b4a43 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -9,6 +9,9 @@ #include <linux/sched/task_stack.h> #include <linux/thread_info.h> +#define compat_mode_t compat_mode_t +typedef u16 compat_mode_t; + #include <asm-generic/compat.h> #define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \ @@ -55,13 +58,9 @@ typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; -typedef u32 __compat_uid32_t; -typedef u32 __compat_gid32_t; -typedef u16 compat_mode_t; typedef u16 compat_dev_t; typedef u16 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; typedef struct { @@ -155,13 +154,6 @@ struct compat_statfs64 { #define COMPAT_RLIM_INFINITY 0xffffffff -typedef u32 compat_old_sigset_t; /* at least 32 bits */ - -#define _COMPAT_NSIG 64 -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff /* diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index b85842cda99f..8b63410e830f 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -6,6 +6,9 @@ */ #include <linux/types.h> +#define compat_mode_t compat_mode_t +typedef u16 compat_mode_t; + #include <asm-generic/compat.h> #define COMPAT_USER_HZ 100 @@ -13,13 +16,9 @@ typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; -typedef u32 __compat_uid32_t; -typedef u32 __compat_gid32_t; -typedef u16 compat_mode_t; typedef u16 compat_dev_t; typedef s16 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { @@ -115,13 +114,6 @@ struct compat_statfs { #define COMPAT_RLIM_INFINITY 0x7fffffff -typedef u32 compat_old_sigset_t; - -#define _COMPAT_NSIG 64 -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff #ifdef CONFIG_COMPAT diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index be09c7eac89f..4ae01cdb99de 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -12,6 +12,9 @@ #include <asm/user32.h> #include <asm/unistd.h> +#define compat_mode_t compat_mode_t +typedef u16 compat_mode_t; + #include <asm-generic/compat.h> #define COMPAT_USER_HZ 100 @@ -19,13 +22,9 @@ typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; -typedef u32 __compat_uid32_t; -typedef u32 __compat_gid32_t; -typedef u16 compat_mode_t; typedef u16 compat_dev_t; typedef u16 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; struct compat_stat { @@ -92,13 +91,6 @@ struct compat_statfs { #define COMPAT_RLIM_INFINITY 0xffffffff -typedef u32 compat_old_sigset_t; /* at least 32 bits */ - -#define _COMPAT_NSIG 64 -#define _COMPAT_NSIG_BPW 32 - -typedef u32 compat_sigset_word; - #define COMPAT_OFF_T_MAX 0x7fffffff struct compat_ipc64_perm { diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 6fd8410a3910..2dfb5fea13af 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -29,6 +29,7 @@ typedef struct { #define SA_X32_ABI 0x01000000u #ifndef CONFIG_COMPAT +#define compat_sigset_t compat_sigset_t typedef sigset_t compat_sigset_t; #endif diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4b951458c9fc..333650b9372a 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1954,6 +1954,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_CALL_ORIG) stack_size += 8; /* room for return value of orig_call */ + if (flags & BPF_TRAMP_F_IP_ARG) + stack_size += 8; /* room for IP address argument */ + if (flags & BPF_TRAMP_F_SKIP_FRAME) /* skip patched call instruction and point orig_call to actual * body of the kernel function. @@ -1967,6 +1970,22 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ EMIT1(0x53); /* push rbx */ + if (flags & BPF_TRAMP_F_IP_ARG) { + /* Store IP address of the traced function: + * mov rax, QWORD PTR [rbp + 8] + * sub rax, X86_PATCH_SIZE + * mov QWORD PTR [rbp - stack_size], rax + */ + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8); + EMIT4(0x48, 0x83, 0xe8, X86_PATCH_SIZE); + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -stack_size); + + /* Continue with stack_size for regs storage, stack will + * be correctly restored with 'leave' instruction. + */ + stack_size -= 8; + } + save_regs(m, &prog, nr_args, stack_size); if (flags & BPF_TRAMP_F_CALL_ORIG) { diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 9e4bd751db79..81ce81a75fc6 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3536,7 +3536,7 @@ static int idt77252_preset(struct idt77252_dev *card) return -1; } if (!(pci_command & PCI_COMMAND_IO)) { - printk("%s: PCI_COMMAND: %04x (???)\n", + printk("%s: PCI_COMMAND: %04x (?)\n", card->name, pci_command); deinit_card(card); return (-1); diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index 4dd1077354af..b33b9d75e8af 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -32,6 +32,7 @@ * @edl: emergency download mode firmware path (if any) * @bar_num: PCI base address register to use for MHI MMIO register space * @dma_data_width: DMA transfer word size (32 or 64 bits) + * @mru_default: default MRU size for MBIM network packets * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead * of inband wake support (such as sdx24) */ @@ -42,6 +43,7 @@ struct mhi_pci_dev_info { const char *edl; unsigned int bar_num; unsigned int dma_data_width; + unsigned int mru_default; bool sideband_wake; }; @@ -272,6 +274,7 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = { .config = &modem_qcom_v1_mhiv_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, .dma_data_width = 32, + .mru_default = 32768, .sideband_wake = false, }; @@ -664,6 +667,7 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mhi_cntrl->status_cb = mhi_pci_status_cb; mhi_cntrl->runtime_get = mhi_pci_runtime_get; mhi_cntrl->runtime_put = mhi_pci_runtime_put; + mhi_cntrl->mru = info->mru_default; if (info->sideband_wake) { mhi_cntrl->wake_get = mhi_pci_wake_get_nop; diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index e355d3974977..fff259247d52 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -97,7 +97,8 @@ config CAN_AT91 config CAN_FLEXCAN tristate "Support for Freescale FLEXCAN based chips" - depends on OF && HAS_IOMEM + depends on OF || COLDFIRE || COMPILE_TEST + depends on HAS_IOMEM help Say Y here if you want to support for Freescale FlexCAN. diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index 04d0bb3ffe89..b06af90a9964 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -43,14 +43,14 @@ enum at91_reg { }; /* Mailbox registers (0 <= i <= 15) */ -#define AT91_MMR(i) (enum at91_reg)(0x200 + ((i) * 0x20)) -#define AT91_MAM(i) (enum at91_reg)(0x204 + ((i) * 0x20)) -#define AT91_MID(i) (enum at91_reg)(0x208 + ((i) * 0x20)) -#define AT91_MFID(i) (enum at91_reg)(0x20C + ((i) * 0x20)) -#define AT91_MSR(i) (enum at91_reg)(0x210 + ((i) * 0x20)) -#define AT91_MDL(i) (enum at91_reg)(0x214 + ((i) * 0x20)) -#define AT91_MDH(i) (enum at91_reg)(0x218 + ((i) * 0x20)) -#define AT91_MCR(i) (enum at91_reg)(0x21C + ((i) * 0x20)) +#define AT91_MMR(i) ((enum at91_reg)(0x200 + ((i) * 0x20))) +#define AT91_MAM(i) ((enum at91_reg)(0x204 + ((i) * 0x20))) +#define AT91_MID(i) ((enum at91_reg)(0x208 + ((i) * 0x20))) +#define AT91_MFID(i) ((enum at91_reg)(0x20C + ((i) * 0x20))) +#define AT91_MSR(i) ((enum at91_reg)(0x210 + ((i) * 0x20))) +#define AT91_MDL(i) ((enum at91_reg)(0x214 + ((i) * 0x20))) +#define AT91_MDH(i) ((enum at91_reg)(0x218 + ((i) * 0x20))) +#define AT91_MCR(i) ((enum at91_reg)(0x21C + ((i) * 0x20))) /* Register bits */ #define AT91_MR_CANEN BIT(0) @@ -87,19 +87,19 @@ enum at91_mb_mode { }; /* Interrupt mask bits */ -#define AT91_IRQ_ERRA (1 << 16) -#define AT91_IRQ_WARN (1 << 17) -#define AT91_IRQ_ERRP (1 << 18) -#define AT91_IRQ_BOFF (1 << 19) -#define AT91_IRQ_SLEEP (1 << 20) -#define AT91_IRQ_WAKEUP (1 << 21) -#define AT91_IRQ_TOVF (1 << 22) -#define AT91_IRQ_TSTP (1 << 23) -#define AT91_IRQ_CERR (1 << 24) -#define AT91_IRQ_SERR (1 << 25) -#define AT91_IRQ_AERR (1 << 26) -#define AT91_IRQ_FERR (1 << 27) -#define AT91_IRQ_BERR (1 << 28) +#define AT91_IRQ_ERRA BIT(16) +#define AT91_IRQ_WARN BIT(17) +#define AT91_IRQ_ERRP BIT(18) +#define AT91_IRQ_BOFF BIT(19) +#define AT91_IRQ_SLEEP BIT(20) +#define AT91_IRQ_WAKEUP BIT(21) +#define AT91_IRQ_TOVF BIT(22) +#define AT91_IRQ_TSTP BIT(23) +#define AT91_IRQ_CERR BIT(24) +#define AT91_IRQ_SERR BIT(25) +#define AT91_IRQ_AERR BIT(26) +#define AT91_IRQ_FERR BIT(27) +#define AT91_IRQ_BERR BIT(28) #define AT91_IRQ_ERR_ALL (0x1fff0000) #define AT91_IRQ_ERR_FRAME (AT91_IRQ_CERR | AT91_IRQ_SERR | \ @@ -163,7 +163,7 @@ static const struct can_bittiming_const at91_bittiming_const = { .tseg2_min = 2, .tseg2_max = 8, .sjw_max = 4, - .brp_min = 2, + .brp_min = 2, .brp_max = 128, .brp_inc = 1, }; @@ -281,19 +281,20 @@ static inline u32 at91_read(const struct at91_priv *priv, enum at91_reg reg) } static inline void at91_write(const struct at91_priv *priv, enum at91_reg reg, - u32 value) + u32 value) { writel_relaxed(value, priv->reg_base + reg); } static inline void set_mb_mode_prio(const struct at91_priv *priv, - unsigned int mb, enum at91_mb_mode mode, int prio) + unsigned int mb, enum at91_mb_mode mode, + int prio) { at91_write(priv, AT91_MMR(mb), (mode << 24) | (prio << 16)); } static inline void set_mb_mode(const struct at91_priv *priv, unsigned int mb, - enum at91_mb_mode mode) + enum at91_mb_mode mode) { set_mb_mode_prio(priv, mb, mode, 0); } @@ -316,8 +317,7 @@ static void at91_setup_mailboxes(struct net_device *dev) unsigned int i; u32 reg_mid; - /* - * Due to a chip bug (errata 50.2.6.3 & 50.3.5.3) the first + /* Due to a chip bug (errata 50.2.6.3 & 50.3.5.3) the first * mailbox is disabled. The next 11 mailboxes are used as a * reception FIFO. The last mailbox is configured with * overwrite option. The overwrite flag indicates a FIFO @@ -368,7 +368,7 @@ static int at91_set_bittiming(struct net_device *dev) } static int at91_get_berr_counter(const struct net_device *dev, - struct can_berr_counter *bec) + struct can_berr_counter *bec) { const struct at91_priv *priv = netdev_priv(dev); u32 reg_ecr = at91_read(priv, AT91_ECR); @@ -423,8 +423,7 @@ static void at91_chip_stop(struct net_device *dev, enum can_state state) priv->can.state = state; } -/* - * theory of operation: +/* theory of operation: * * According to the datasheet priority 0 is the highest priority, 15 * is the lowest. If two mailboxes have the same priority level the @@ -486,8 +485,7 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev) /* _NOTE_: subtract AT91_MB_TX_FIRST offset from mb! */ can_put_echo_skb(skb, dev, mb - get_mb_tx_first(priv), 0); - /* - * we have to stop the queue and deliver all messages in case + /* we have to stop the queue and deliver all messages in case * of a prio+mb counter wrap around. This is the case if * tx_next buffer prio and mailbox equals 0. * @@ -515,6 +513,7 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev) static inline void at91_activate_rx_low(const struct at91_priv *priv) { u32 mask = get_mb_rx_low_mask(priv); + at91_write(priv, AT91_TCR, mask); } @@ -526,9 +525,10 @@ static inline void at91_activate_rx_low(const struct at91_priv *priv) * Reenables given mailbox for reception of new CAN messages */ static inline void at91_activate_rx_mb(const struct at91_priv *priv, - unsigned int mb) + unsigned int mb) { u32 mask = 1 << mb; + at91_write(priv, AT91_TCR, mask); } @@ -568,7 +568,7 @@ static void at91_rx_overflow_err(struct net_device *dev) * given can frame. "mb" and "cf" must be valid. */ static void at91_read_mb(struct net_device *dev, unsigned int mb, - struct can_frame *cf) + struct can_frame *cf) { const struct at91_priv *priv = netdev_priv(dev); u32 reg_msr, reg_mid; @@ -582,9 +582,9 @@ static void at91_read_mb(struct net_device *dev, unsigned int mb, reg_msr = at91_read(priv, AT91_MSR(mb)); cf->len = can_cc_dlc2len((reg_msr >> 16) & 0xf); - if (reg_msr & AT91_MSR_MRTR) + if (reg_msr & AT91_MSR_MRTR) { cf->can_id |= CAN_RTR_FLAG; - else { + } else { *(u32 *)(cf->data + 0) = at91_read(priv, AT91_MDL(mb)); *(u32 *)(cf->data + 4) = at91_read(priv, AT91_MDH(mb)); } @@ -685,7 +685,7 @@ static int at91_poll_rx(struct net_device *dev, int quota) if (priv->rx_next > get_mb_rx_low_last(priv) && reg_sr & get_mb_rx_low_mask(priv)) netdev_info(dev, - "order of incoming frames cannot be guaranteed\n"); + "order of incoming frames cannot be guaranteed\n"); again: for (mb = find_next_bit(addr, get_mb_tx_first(priv), priv->rx_next); @@ -718,7 +718,7 @@ static int at91_poll_rx(struct net_device *dev, int quota) } static void at91_poll_err_frame(struct net_device *dev, - struct can_frame *cf, u32 reg_sr) + struct can_frame *cf, u32 reg_sr) { struct at91_priv *priv = netdev_priv(dev); @@ -796,8 +796,7 @@ static int at91_poll(struct napi_struct *napi, int quota) if (reg_sr & get_irq_mb_rx(priv)) work_done += at91_poll_rx(dev, quota - work_done); - /* - * The error bits are clear on read, + /* The error bits are clear on read, * so use saved value from irq handler. */ reg_sr |= priv->reg_sr; @@ -807,6 +806,7 @@ static int at91_poll(struct napi_struct *napi, int quota) if (work_done < quota) { /* enable IRQs for frame errors and all mailboxes >= rx_next */ u32 reg_ier = AT91_IRQ_ERR_FRAME; + reg_ier |= get_irq_mb_rx(priv) & ~AT91_MB_MASK(priv->rx_next); napi_complete_done(napi, work_done); @@ -816,8 +816,7 @@ static int at91_poll(struct napi_struct *napi, int quota) return work_done; } -/* - * theory of operation: +/* theory of operation: * * priv->tx_echo holds the number of the oldest can_frame put for * transmission into the hardware, but not yet ACKed by the CAN tx @@ -846,8 +845,7 @@ static void at91_irq_tx(struct net_device *dev, u32 reg_sr) /* Disable irq for this TX mailbox */ at91_write(priv, AT91_IDR, 1 << mb); - /* - * only echo if mailbox signals us a transfer + /* only echo if mailbox signals us a transfer * complete (MSR_MRDY). Otherwise it's a tansfer * abort. "can_bus_off()" takes care about the skbs * parked in the echo queue. @@ -862,8 +860,7 @@ static void at91_irq_tx(struct net_device *dev, u32 reg_sr) } } - /* - * restart queue if we don't have a wrap around but restart if + /* restart queue if we don't have a wrap around but restart if * we get a TX int for the last can frame directly before a * wrap around. */ @@ -873,7 +870,7 @@ static void at91_irq_tx(struct net_device *dev, u32 reg_sr) } static void at91_irq_err_state(struct net_device *dev, - struct can_frame *cf, enum can_state new_state) + struct can_frame *cf, enum can_state new_state) { struct at91_priv *priv = netdev_priv(dev); u32 reg_idr = 0, reg_ier = 0; @@ -883,8 +880,7 @@ static void at91_irq_err_state(struct net_device *dev, switch (priv->can.state) { case CAN_STATE_ERROR_ACTIVE: - /* - * from: ERROR_ACTIVE + /* from: ERROR_ACTIVE * to : ERROR_WARNING, ERROR_PASSIVE, BUS_OFF * => : there was a warning int */ @@ -900,8 +896,7 @@ static void at91_irq_err_state(struct net_device *dev, } fallthrough; case CAN_STATE_ERROR_WARNING: - /* - * from: ERROR_ACTIVE, ERROR_WARNING + /* from: ERROR_ACTIVE, ERROR_WARNING * to : ERROR_PASSIVE, BUS_OFF * => : error passive int */ @@ -917,8 +912,7 @@ static void at91_irq_err_state(struct net_device *dev, } break; case CAN_STATE_BUS_OFF: - /* - * from: BUS_OFF + /* from: BUS_OFF * to : ERROR_ACTIVE, ERROR_WARNING, ERROR_PASSIVE */ if (new_state <= CAN_STATE_ERROR_PASSIVE) { @@ -935,12 +929,10 @@ static void at91_irq_err_state(struct net_device *dev, break; } - /* process state changes depending on the new state */ switch (new_state) { case CAN_STATE_ERROR_ACTIVE: - /* - * actually we want to enable AT91_IRQ_WARN here, but + /* actually we want to enable AT91_IRQ_WARN here, but * it screws up the system under certain * circumstances. so just enable AT91_IRQ_ERRP, thus * the "fallthrough" @@ -983,7 +975,7 @@ static void at91_irq_err_state(struct net_device *dev, } static int at91_get_state_by_bec(const struct net_device *dev, - enum can_state *state) + enum can_state *state) { struct can_berr_counter bec; int err; @@ -1004,7 +996,6 @@ static int at91_get_state_by_bec(const struct net_device *dev, return 0; } - static void at91_irq_err(struct net_device *dev) { struct at91_priv *priv = netdev_priv(dev); @@ -1018,15 +1009,15 @@ static void at91_irq_err(struct net_device *dev) reg_sr = at91_read(priv, AT91_SR); /* we need to look at the unmasked reg_sr */ - if (unlikely(reg_sr & AT91_IRQ_BOFF)) + if (unlikely(reg_sr & AT91_IRQ_BOFF)) { new_state = CAN_STATE_BUS_OFF; - else if (unlikely(reg_sr & AT91_IRQ_ERRP)) + } else if (unlikely(reg_sr & AT91_IRQ_ERRP)) { new_state = CAN_STATE_ERROR_PASSIVE; - else if (unlikely(reg_sr & AT91_IRQ_WARN)) + } else if (unlikely(reg_sr & AT91_IRQ_WARN)) { new_state = CAN_STATE_ERROR_WARNING; - else if (likely(reg_sr & AT91_IRQ_ERRA)) + } else if (likely(reg_sr & AT91_IRQ_ERRA)) { new_state = CAN_STATE_ERROR_ACTIVE; - else { + } else { netdev_err(dev, "BUG! hardware in undefined state\n"); return; } @@ -1053,8 +1044,7 @@ static void at91_irq_err(struct net_device *dev) priv->can.state = new_state; } -/* - * interrupt handler +/* interrupt handler */ static irqreturn_t at91_irq(int irq, void *dev_id) { @@ -1075,8 +1065,7 @@ static irqreturn_t at91_irq(int irq, void *dev_id) /* Receive or error interrupt? -> napi */ if (reg_sr & (get_irq_mb_rx(priv) | AT91_IRQ_ERR_FRAME)) { - /* - * The error bits are clear on read, + /* The error bits are clear on read, * save for later use. */ priv->reg_sr = reg_sr; @@ -1133,8 +1122,7 @@ static int at91_open(struct net_device *dev) return err; } -/* - * stop CAN bus activity +/* stop CAN bus activity */ static int at91_close(struct net_device *dev) { @@ -1176,8 +1164,8 @@ static const struct net_device_ops at91_netdev_ops = { .ndo_change_mtu = can_change_mtu, }; -static ssize_t at91_sysfs_show_mb0_id(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t mb0_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct at91_priv *priv = netdev_priv(to_net_dev(dev)); @@ -1187,8 +1175,9 @@ static ssize_t at91_sysfs_show_mb0_id(struct device *dev, return snprintf(buf, PAGE_SIZE, "0x%03x\n", priv->mb0_id); } -static ssize_t at91_sysfs_set_mb0_id(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) +static ssize_t mb0_id_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct net_device *ndev = to_net_dev(dev); struct at91_priv *priv = netdev_priv(ndev); @@ -1222,7 +1211,7 @@ static ssize_t at91_sysfs_set_mb0_id(struct device *dev, return ret; } -static DEVICE_ATTR(mb0_id, 0644, at91_sysfs_show_mb0_id, at91_sysfs_set_mb0_id); +static DEVICE_ATTR_RW(mb0_id); static struct attribute *at91_sysfs_attrs[] = { &dev_attr_mb0_id.attr, diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index e38c2566aff4..147c23d7dab7 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -47,7 +47,7 @@ static int can_validate(struct nlattr *tb[], struct nlattr *data[], } if (data[IFLA_CAN_DATA_BITTIMING]) { - if (!is_can_fd || !data[IFLA_CAN_BITTIMING]) + if (!is_can_fd) return -EOPNOTSUPP; } @@ -132,10 +132,13 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], priv->ctrlmode |= maskedflags; /* CAN_CTRLMODE_FD can only be set when driver supports FD */ - if (priv->ctrlmode & CAN_CTRLMODE_FD) + if (priv->ctrlmode & CAN_CTRLMODE_FD) { dev->mtu = CANFD_MTU; - else + } else { dev->mtu = CAN_MTU; + memset(&priv->data_bittiming, 0, + sizeof(priv->data_bittiming)); + } } if (data[IFLA_CAN_RESTART_MS]) { diff --git a/drivers/net/can/dev/rx-offload.c b/drivers/net/can/dev/rx-offload.c index ab2c1543786c..37b0cc65237b 100644 --- a/drivers/net/can/dev/rx-offload.c +++ b/drivers/net/can/dev/rx-offload.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2014 Protonic Holland, * David Jander - * Copyright (C) 2014-2017 Pengutronix, + * Copyright (C) 2014-2021 Pengutronix, * Marc Kleine-Budde <kernel@pengutronix.de> */ @@ -174,10 +174,8 @@ can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n) int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 pending) { - struct sk_buff_head skb_queue; unsigned int i; - - __skb_queue_head_init(&skb_queue); + int received = 0; for (i = offload->mb_first; can_rx_offload_le(offload, i, offload->mb_last); @@ -191,26 +189,12 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, if (IS_ERR_OR_NULL(skb)) continue; - __skb_queue_add_sort(&skb_queue, skb, can_rx_offload_compare); - } - - if (!skb_queue_empty(&skb_queue)) { - unsigned long flags; - u32 queue_len; - - spin_lock_irqsave(&offload->skb_queue.lock, flags); - skb_queue_splice_tail(&skb_queue, &offload->skb_queue); - spin_unlock_irqrestore(&offload->skb_queue.lock, flags); - - queue_len = skb_queue_len(&offload->skb_queue); - if (queue_len > offload->skb_queue_len_max / 8) - netdev_dbg(offload->dev, "%s: queue_len=%d\n", - __func__, queue_len); - - can_rx_offload_schedule(offload); + __skb_queue_add_sort(&offload->skb_irq_queue, skb, + can_rx_offload_compare); + received++; } - return skb_queue_len(&skb_queue); + return received; } EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_timestamp); @@ -226,13 +210,10 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload) if (!skb) break; - skb_queue_tail(&offload->skb_queue, skb); + __skb_queue_tail(&offload->skb_irq_queue, skb); received++; } - if (received) - can_rx_offload_schedule(offload); - return received; } EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_fifo); @@ -241,7 +222,6 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, struct sk_buff *skb, u32 timestamp) { struct can_rx_offload_cb *cb; - unsigned long flags; if (skb_queue_len(&offload->skb_queue) > offload->skb_queue_len_max) { @@ -252,11 +232,8 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, cb = can_rx_offload_get_cb(skb); cb->timestamp = timestamp; - spin_lock_irqsave(&offload->skb_queue.lock, flags); - __skb_queue_add_sort(&offload->skb_queue, skb, can_rx_offload_compare); - spin_unlock_irqrestore(&offload->skb_queue.lock, flags); - - can_rx_offload_schedule(offload); + __skb_queue_add_sort(&offload->skb_irq_queue, skb, + can_rx_offload_compare); return 0; } @@ -295,13 +272,56 @@ int can_rx_offload_queue_tail(struct can_rx_offload *offload, return -ENOBUFS; } - skb_queue_tail(&offload->skb_queue, skb); - can_rx_offload_schedule(offload); + __skb_queue_tail(&offload->skb_irq_queue, skb); return 0; } EXPORT_SYMBOL_GPL(can_rx_offload_queue_tail); +void can_rx_offload_irq_finish(struct can_rx_offload *offload) +{ + unsigned long flags; + int queue_len; + + if (skb_queue_empty_lockless(&offload->skb_irq_queue)) + return; + + spin_lock_irqsave(&offload->skb_queue.lock, flags); + skb_queue_splice_tail_init(&offload->skb_irq_queue, &offload->skb_queue); + spin_unlock_irqrestore(&offload->skb_queue.lock, flags); + + queue_len = skb_queue_len(&offload->skb_queue); + if (queue_len > offload->skb_queue_len_max / 8) + netdev_dbg(offload->dev, "%s: queue_len=%d\n", + __func__, queue_len); + + napi_schedule(&offload->napi); +} +EXPORT_SYMBOL_GPL(can_rx_offload_irq_finish); + +void can_rx_offload_threaded_irq_finish(struct can_rx_offload *offload) +{ + unsigned long flags; + int queue_len; + + if (skb_queue_empty_lockless(&offload->skb_irq_queue)) + return; + + spin_lock_irqsave(&offload->skb_queue.lock, flags); + skb_queue_splice_tail_init(&offload->skb_irq_queue, &offload->skb_queue); + spin_unlock_irqrestore(&offload->skb_queue.lock, flags); + + queue_len = skb_queue_len(&offload->skb_queue); + if (queue_len > offload->skb_queue_len_max / 8) + netdev_dbg(offload->dev, "%s: queue_len=%d\n", + __func__, queue_len); + + local_bh_disable(); + napi_schedule(&offload->napi); + local_bh_enable(); +} +EXPORT_SYMBOL_GPL(can_rx_offload_threaded_irq_finish); + static int can_rx_offload_init_queue(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight) @@ -312,6 +332,7 @@ static int can_rx_offload_init_queue(struct net_device *dev, offload->skb_queue_len_max = 2 << fls(weight); offload->skb_queue_len_max *= 4; skb_queue_head_init(&offload->skb_queue); + __skb_queue_head_init(&offload->skb_irq_queue); netif_napi_add(dev, &offload->napi, can_rx_offload_napi_poll, weight); @@ -373,5 +394,6 @@ void can_rx_offload_del(struct can_rx_offload *offload) { netif_napi_del(&offload->napi); skb_queue_purge(&offload->skb_queue); + __skb_queue_purge(&offload->skb_irq_queue); } EXPORT_SYMBOL_GPL(can_rx_offload_del); diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 57f3635ad8d7..54ffb796a320 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -28,6 +28,7 @@ #include <linux/of_device.h> #include <linux/pinctrl/consumer.h> #include <linux/platform_device.h> +#include <linux/can/platform/flexcan.h> #include <linux/pm_runtime.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> @@ -208,18 +209,19 @@ /* FLEXCAN hardware feature flags * * Below is some version info we got: - * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece- FD Mode + * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece- FD Mode MB * Filter? connected? Passive detection ption in MB Supported? - * MX25 FlexCAN2 03.00.00.00 no no no no no no - * MX28 FlexCAN2 03.00.04.00 yes yes no no no no - * MX35 FlexCAN2 03.00.00.00 no no no no no no - * MX53 FlexCAN2 03.00.00.00 yes no no no no no - * MX6s FlexCAN3 10.00.12.00 yes yes no no yes no - * MX8QM FlexCAN3 03.00.23.00 yes yes no no yes yes - * MX8MP FlexCAN3 03.00.17.01 yes yes no yes yes yes - * VF610 FlexCAN3 ? no yes no yes yes? no - * LS1021A FlexCAN2 03.00.04.00 no yes no no yes no - * LX2160A FlexCAN3 03.00.23.00 no yes no yes yes yes + * MCF5441X FlexCAN2 ? no yes no no yes no 16 + * MX25 FlexCAN2 03.00.00.00 no no no no no no 64 + * MX28 FlexCAN2 03.00.04.00 yes yes no no no no 64 + * MX35 FlexCAN2 03.00.00.00 no no no no no no 64 + * MX53 FlexCAN2 03.00.00.00 yes no no no no no 64 + * MX6s FlexCAN3 10.00.12.00 yes yes no no yes no 64 + * MX8QM FlexCAN3 03.00.23.00 yes yes no no yes yes 64 + * MX8MP FlexCAN3 03.00.17.01 yes yes no yes yes yes 64 + * VF610 FlexCAN3 ? no yes no yes yes? no 64 + * LS1021A FlexCAN2 03.00.04.00 no yes no no yes no 64 + * LX2160A FlexCAN3 03.00.23.00 no yes no yes yes yes 64 * * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected. */ @@ -246,6 +248,10 @@ #define FLEXCAN_QUIRK_SUPPORT_ECC BIT(10) /* Setup stop mode with SCU firmware to support wakeup */ #define FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW BIT(11) +/* Setup 3 separate interrupts, main, boff and err */ +#define FLEXCAN_QUIRK_NR_IRQ_3 BIT(12) +/* Setup 16 mailboxes */ +#define FLEXCAN_QUIRK_NR_MB_16 BIT(13) /* Structure of the message buffer */ struct flexcan_mb { @@ -363,6 +369,9 @@ struct flexcan_priv { struct regulator *reg_xceiver; struct flexcan_stop_mode stm; + int irq_boff; + int irq_err; + /* IPC handle when setup stop mode by System Controller firmware(scfw) */ struct imx_sc_ipc *sc_ipc_handle; @@ -371,6 +380,11 @@ struct flexcan_priv { void (*write)(u32 val, void __iomem *addr); }; +static const struct flexcan_devtype_data fsl_mcf5441x_devtype_data = { + .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE | + FLEXCAN_QUIRK_NR_IRQ_3 | FLEXCAN_QUIRK_NR_MB_16, +}; + static const struct flexcan_devtype_data fsl_p1010_devtype_data = { .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE | FLEXCAN_QUIRK_BROKEN_PERR_STATE | @@ -637,13 +651,17 @@ static int flexcan_clks_enable(const struct flexcan_priv *priv) { int err; - err = clk_prepare_enable(priv->clk_ipg); - if (err) - return err; + if (priv->clk_ipg) { + err = clk_prepare_enable(priv->clk_ipg); + if (err) + return err; + } - err = clk_prepare_enable(priv->clk_per); - if (err) - clk_disable_unprepare(priv->clk_ipg); + if (priv->clk_per) { + err = clk_prepare_enable(priv->clk_per); + if (err) + clk_disable_unprepare(priv->clk_ipg); + } return err; } @@ -1198,6 +1216,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) } } + if (handled) + can_rx_offload_irq_finish(&priv->offload); + return handled; } @@ -1401,8 +1422,12 @@ static int flexcan_rx_offload_setup(struct net_device *dev) priv->mb_size = sizeof(struct flexcan_mb) + CANFD_MAX_DLEN; else priv->mb_size = sizeof(struct flexcan_mb) + CAN_MAX_DLEN; - priv->mb_count = (sizeof(priv->regs->mb[0]) / priv->mb_size) + - (sizeof(priv->regs->mb[1]) / priv->mb_size); + + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_MB_16) + priv->mb_count = 16; + else + priv->mb_count = (sizeof(priv->regs->mb[0]) / priv->mb_size) + + (sizeof(priv->regs->mb[1]) / priv->mb_size); if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) priv->tx_mb_reserved = @@ -1774,6 +1799,18 @@ static int flexcan_open(struct net_device *dev) if (err) goto out_can_rx_offload_disable; + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) { + err = request_irq(priv->irq_boff, + flexcan_irq, IRQF_SHARED, dev->name, dev); + if (err) + goto out_free_irq; + + err = request_irq(priv->irq_err, + flexcan_irq, IRQF_SHARED, dev->name, dev); + if (err) + goto out_free_irq_boff; + } + flexcan_chip_interrupts_enable(dev); can_led_event(dev, CAN_LED_EVENT_OPEN); @@ -1782,6 +1819,10 @@ static int flexcan_open(struct net_device *dev) return 0; + out_free_irq_boff: + free_irq(priv->irq_boff, dev); + out_free_irq: + free_irq(dev->irq, dev); out_can_rx_offload_disable: can_rx_offload_disable(&priv->offload); flexcan_chip_stop(dev); @@ -1803,6 +1844,12 @@ static int flexcan_close(struct net_device *dev) netif_stop_queue(dev); flexcan_chip_interrupts_disable(dev); + + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) { + free_irq(priv->irq_err, dev); + free_irq(priv->irq_boff, dev); + } + free_irq(dev->irq, dev); can_rx_offload_disable(&priv->offload); flexcan_chip_stop_disable_on_error(dev); @@ -2039,14 +2086,26 @@ static const struct of_device_id flexcan_of_match[] = { }; MODULE_DEVICE_TABLE(of, flexcan_of_match); +static const struct platform_device_id flexcan_id_table[] = { + { + .name = "flexcan-mcf5441x", + .driver_data = (kernel_ulong_t)&fsl_mcf5441x_devtype_data, + }, { + /* sentinel */ + }, +}; +MODULE_DEVICE_TABLE(platform, flexcan_id_table); + static int flexcan_probe(struct platform_device *pdev) { + const struct of_device_id *of_id; const struct flexcan_devtype_data *devtype_data; struct net_device *dev; struct flexcan_priv *priv; struct regulator *reg_xceiver; struct clk *clk_ipg = NULL, *clk_per = NULL; struct flexcan_regs __iomem *regs; + struct flexcan_platform_data *pdata; int err, irq; u8 clk_src = 1; u32 clock_freq = 0; @@ -2064,6 +2123,12 @@ static int flexcan_probe(struct platform_device *pdev) "clock-frequency", &clock_freq); of_property_read_u8(pdev->dev.of_node, "fsl,clk-source", &clk_src); + } else { + pdata = dev_get_platdata(&pdev->dev); + if (pdata) { + clock_freq = pdata->clock_frequency; + clk_src = pdata->clk_src; + } } if (!clock_freq) { @@ -2089,7 +2154,14 @@ static int flexcan_probe(struct platform_device *pdev) if (IS_ERR(regs)) return PTR_ERR(regs); - devtype_data = of_device_get_match_data(&pdev->dev); + of_id = of_match_device(flexcan_of_match, &pdev->dev); + if (of_id) + devtype_data = of_id->data; + else if (platform_get_device_id(pdev)->driver_data) + devtype_data = (struct flexcan_devtype_data *) + platform_get_device_id(pdev)->driver_data; + else + return -ENODEV; if ((devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) && !(devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)) { @@ -2133,6 +2205,19 @@ static int flexcan_probe(struct platform_device *pdev) priv->devtype_data = devtype_data; priv->reg_xceiver = reg_xceiver; + if (devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) { + priv->irq_boff = platform_get_irq(pdev, 1); + if (priv->irq_boff <= 0) { + err = -ENODEV; + goto failed_platform_get_irq; + } + priv->irq_err = platform_get_irq(pdev, 2); + if (priv->irq_err <= 0) { + err = -ENODEV; + goto failed_platform_get_irq; + } + } + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) { priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO; @@ -2170,6 +2255,7 @@ static int flexcan_probe(struct platform_device *pdev) failed_register: pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); + failed_platform_get_irq: free_candev(dev); return err; } @@ -2322,6 +2408,7 @@ static struct platform_driver flexcan_driver = { }, .probe = flexcan_probe, .remove = flexcan_remove, + .id_table = flexcan_id_table, }; module_platform_driver(flexcan_driver); diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c index 2a6c918186c0..c68ad56628bd 100644 --- a/drivers/net/can/janz-ican3.c +++ b/drivers/net/can/janz-ican3.c @@ -1815,9 +1815,9 @@ static int ican3_get_berr_counter(const struct net_device *ndev, * Sysfs Attributes */ -static ssize_t ican3_sysfs_show_term(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t termination_show(struct device *dev, + struct device_attribute *attr, + char *buf) { struct ican3_dev *mod = netdev_priv(to_net_dev(dev)); int ret; @@ -1834,9 +1834,9 @@ static ssize_t ican3_sysfs_show_term(struct device *dev, return snprintf(buf, PAGE_SIZE, "%u\n", mod->termination_enabled); } -static ssize_t ican3_sysfs_set_term(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t termination_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct ican3_dev *mod = netdev_priv(to_net_dev(dev)); unsigned long enable; @@ -1852,18 +1852,17 @@ static ssize_t ican3_sysfs_set_term(struct device *dev, return count; } -static ssize_t ican3_sysfs_show_fwinfo(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t fwinfo_show(struct device *dev, + struct device_attribute *attr, + char *buf) { struct ican3_dev *mod = netdev_priv(to_net_dev(dev)); return scnprintf(buf, PAGE_SIZE, "%s\n", mod->fwinfo); } -static DEVICE_ATTR(termination, 0644, ican3_sysfs_show_term, - ican3_sysfs_set_term); -static DEVICE_ATTR(fwinfo, 0444, ican3_sysfs_show_fwinfo, NULL); +static DEVICE_ATTR_RW(termination); +static DEVICE_ATTR_RO(fwinfo); static struct attribute *ican3_sysfs_attrs[] = { &dev_attr_termination.attr, diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index bba2a449ac70..0cffaad905c2 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -21,6 +21,7 @@ #include <linux/iopoll.h> #include <linux/can/dev.h> #include <linux/pinctrl/consumer.h> +#include <linux/phy/phy.h> #include "m_can.h" @@ -1058,6 +1059,9 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) } } + if (cdev->is_peripheral) + can_rx_offload_threaded_irq_finish(&cdev->offload); + return IRQ_HANDLED; } @@ -1436,32 +1440,20 @@ static int m_can_dev_setup(struct m_can_classdev *cdev) case 30: /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.0.x */ can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO); - cdev->can.bittiming_const = cdev->bit_timing ? - cdev->bit_timing : &m_can_bittiming_const_30X; - - cdev->can.data_bittiming_const = cdev->data_timing ? - cdev->data_timing : - &m_can_data_bittiming_const_30X; + cdev->can.bittiming_const = &m_can_bittiming_const_30X; + cdev->can.data_bittiming_const = &m_can_data_bittiming_const_30X; break; case 31: /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.1.x */ can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO); - cdev->can.bittiming_const = cdev->bit_timing ? - cdev->bit_timing : &m_can_bittiming_const_31X; - - cdev->can.data_bittiming_const = cdev->data_timing ? - cdev->data_timing : - &m_can_data_bittiming_const_31X; + cdev->can.bittiming_const = &m_can_bittiming_const_31X; + cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X; break; case 32: case 33: /* Support both MCAN version v3.2.x and v3.3.0 */ - cdev->can.bittiming_const = cdev->bit_timing ? - cdev->bit_timing : &m_can_bittiming_const_31X; - - cdev->can.data_bittiming_const = cdev->data_timing ? - cdev->data_timing : - &m_can_data_bittiming_const_31X; + cdev->can.bittiming_const = &m_can_bittiming_const_31X; + cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X; cdev->can.ctrlmode_supported |= (m_can_niso_supported(cdev) ? @@ -1518,6 +1510,8 @@ static int m_can_close(struct net_device *dev) close_candev(dev); can_led_event(dev, CAN_LED_EVENT_STOP); + phy_power_off(cdev->transceiver); + return 0; } @@ -1703,10 +1697,14 @@ static int m_can_open(struct net_device *dev) struct m_can_classdev *cdev = netdev_priv(dev); int err; - err = m_can_clk_start(cdev); + err = phy_power_on(cdev->transceiver); if (err) return err; + err = m_can_clk_start(cdev); + if (err) + goto out_phy_power_off; + /* open the can device */ err = open_candev(dev); if (err) { @@ -1763,6 +1761,8 @@ out_wq_fail: close_candev(dev); exit_disable_clks: m_can_clk_stop(cdev); +out_phy_power_off: + phy_power_off(cdev->transceiver); return err; } diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index ace071c3e58c..56e994376a7b 100644 --- a/drivers/net/can/m_can/m_can.h +++ b/drivers/net/can/m_can/m_can.h @@ -28,6 +28,7 @@ #include <linux/iopoll.h> #include <linux/can/dev.h> #include <linux/pinctrl/consumer.h> +#include <linux/phy/phy.h> /* m_can lec values */ enum m_can_lec_type { @@ -82,9 +83,7 @@ struct m_can_classdev { struct workqueue_struct *tx_wq; struct work_struct tx_work; struct sk_buff *tx_skb; - - struct can_bittiming_const *bit_timing; - struct can_bittiming_const *data_timing; + struct phy *transceiver; struct m_can_ops *ops; diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c index 599de0e08cd7..a28c84aa8fa8 100644 --- a/drivers/net/can/m_can/m_can_platform.c +++ b/drivers/net/can/m_can/m_can_platform.c @@ -6,6 +6,7 @@ // Copyright (C) 2018-19 Texas Instruments Incorporated - http://www.ti.com/ #include <linux/platform_device.h> +#include <linux/phy/phy.h> #include "m_can.h" @@ -67,6 +68,7 @@ static int m_can_plat_probe(struct platform_device *pdev) struct resource *res; void __iomem *addr; void __iomem *mram_addr; + struct phy *transceiver; int irq, ret = 0; mcan_class = m_can_class_allocate_dev(&pdev->dev, @@ -80,8 +82,7 @@ static int m_can_plat_probe(struct platform_device *pdev) if (ret) goto probe_fail; - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "m_can"); - addr = devm_ioremap_resource(&pdev->dev, res); + addr = devm_platform_ioremap_resource_byname(pdev, "m_can"); irq = platform_get_irq_byname(pdev, "int0"); if (IS_ERR(addr) || irq < 0) { ret = -EINVAL; @@ -101,6 +102,16 @@ static int m_can_plat_probe(struct platform_device *pdev) goto probe_fail; } + transceiver = devm_phy_optional_get(&pdev->dev, NULL); + if (IS_ERR(transceiver)) { + ret = PTR_ERR(transceiver); + dev_err_probe(&pdev->dev, ret, "failed to get phy\n"); + goto probe_fail; + } + + if (transceiver) + mcan_class->can.bitrate_max = transceiver->attrs.max_link_rate; + priv->base = addr; priv->mram_base = mram_addr; @@ -108,6 +119,7 @@ static int m_can_plat_probe(struct platform_device *pdev) mcan_class->pm_clock_support = 1; mcan_class->can.clock.freq = clk_get_rate(mcan_class->cclk); mcan_class->dev = &pdev->dev; + mcan_class->transceiver = transceiver; mcan_class->ops = &m_can_plat_ops; diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c index 84eac8cb8686..6db90dc4bc9d 100644 --- a/drivers/net/can/sja1000/peak_pci.c +++ b/drivers/net/can/sja1000/peak_pci.c @@ -28,6 +28,10 @@ MODULE_LICENSE("GPL v2"); #define DRV_NAME "peak_pci" +/* FPGA cards FW version registers */ +#define PEAK_VER_REG1 0x40 +#define PEAK_VER_REG2 0x44 + struct peak_pciec_card; struct peak_pci_chan { void __iomem *cfg_base; /* Common for all channels */ @@ -41,9 +45,7 @@ struct peak_pci_chan { #define PEAK_PCI_CDR (CDR_CBP | CDR_CLKOUT_MASK) #define PEAK_PCI_OCR OCR_TX0_PUSHPULL -/* - * Important PITA registers - */ +/* Important PITA registers */ #define PITA_ICR 0x00 /* Interrupt control register */ #define PITA_GPIOICR 0x18 /* GPIO interface control register */ #define PITA_MISC 0x1C /* Miscellaneous register */ @@ -70,27 +72,47 @@ static const u16 peak_pci_icr_masks[PEAK_PCI_CHAN_MAX] = { }; static const struct pci_device_id peak_pci_tbl[] = { - {PEAK_PCI_VENDOR_ID, PEAK_PCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_PCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_MPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_MPCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_PC_104P_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_PCI_104E_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_CPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_PCIE_OEM_ID, PCI_ANY_ID, PCI_ANY_ID,}, + { + PEAK_PCI_VENDOR_ID, PEAK_PCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-PCI", + }, { + PEAK_PCI_VENDOR_ID, PEAK_PCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-PCI Express", + }, { + PEAK_PCI_VENDOR_ID, PEAK_MPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-miniPCI", + }, { + PEAK_PCI_VENDOR_ID, PEAK_MPCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-miniPCIe", + }, { + PEAK_PCI_VENDOR_ID, PEAK_PC_104P_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-PC/104-Plus Quad", + }, { + PEAK_PCI_VENDOR_ID, PEAK_PCI_104E_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-PCI/104-Express", + }, { + PEAK_PCI_VENDOR_ID, PEAK_CPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-cPCI", + }, { + PEAK_PCI_VENDOR_ID, PEAK_PCIE_OEM_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-Chip PCIe", + }, #ifdef CONFIG_CAN_PEAK_PCIEC - {PEAK_PCI_VENDOR_ID, PEAK_PCIEC_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, - {PEAK_PCI_VENDOR_ID, PEAK_PCIEC34_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, + { + PEAK_PCI_VENDOR_ID, PEAK_PCIEC_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-ExpressCard", + }, { + PEAK_PCI_VENDOR_ID, PEAK_PCIEC34_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, + .driver_data = (kernel_ulong_t)"PCAN-ExpressCard 34", + }, #endif - {0,} + { /* sentinel */ } }; MODULE_DEVICE_TABLE(pci, peak_pci_tbl); #ifdef CONFIG_CAN_PEAK_PCIEC -/* - * PCAN-ExpressCard needs I2C bit-banging configuration option. - */ +/* PCAN-ExpressCard needs I2C bit-banging configuration option. */ /* GPIOICR byte access offsets */ #define PITA_GPOUT 0x18 /* GPx output value */ @@ -156,12 +178,14 @@ static void peak_pci_write_reg(const struct sja1000_priv *priv, static inline void pita_set_scl_highz(struct peak_pciec_card *card) { u8 gp_outen = readb(card->cfg_base + PITA_GPOEN) & ~PITA_GPIN_SCL; + writeb(gp_outen, card->cfg_base + PITA_GPOEN); } static inline void pita_set_sda_highz(struct peak_pciec_card *card) { u8 gp_outen = readb(card->cfg_base + PITA_GPOEN) & ~PITA_GPIN_SDA; + writeb(gp_outen, card->cfg_base + PITA_GPOEN); } @@ -230,9 +254,7 @@ static int pita_getscl(void *data) return (readb(card->cfg_base + PITA_GPIN) & PITA_GPIN_SCL) ? 1 : 0; } -/* - * write commands to the LED chip though the I2C-bus of the PCAN-PCIeC - */ +/* write commands to the LED chip though the I2C-bus of the PCAN-PCIeC */ static int peak_pciec_write_pca9553(struct peak_pciec_card *card, u8 offset, u8 data) { @@ -248,7 +270,7 @@ static int peak_pciec_write_pca9553(struct peak_pciec_card *card, int ret; /* cache led mask */ - if ((offset == 5) && (data == card->led_cache)) + if (offset == 5 && data == card->led_cache) return 0; ret = i2c_transfer(&card->led_chip, &msg, 1); @@ -261,9 +283,7 @@ static int peak_pciec_write_pca9553(struct peak_pciec_card *card, return 0; } -/* - * delayed work callback used to control the LEDs - */ +/* delayed work callback used to control the LEDs */ static void peak_pciec_led_work(struct work_struct *work) { struct peak_pciec_card *card = @@ -309,9 +329,7 @@ static void peak_pciec_led_work(struct work_struct *work) schedule_delayed_work(&card->led_work, HZ); } -/* - * set LEDs blinking state - */ +/* set LEDs blinking state */ static void peak_pciec_set_leds(struct peak_pciec_card *card, u8 led_mask, u8 s) { u8 new_led = card->led_cache; @@ -328,25 +346,19 @@ static void peak_pciec_set_leds(struct peak_pciec_card *card, u8 led_mask, u8 s) peak_pciec_write_pca9553(card, 5, new_led); } -/* - * start one second delayed work to control LEDs - */ +/* start one second delayed work to control LEDs */ static void peak_pciec_start_led_work(struct peak_pciec_card *card) { schedule_delayed_work(&card->led_work, HZ); } -/* - * stop LEDs delayed work - */ +/* stop LEDs delayed work */ static void peak_pciec_stop_led_work(struct peak_pciec_card *card) { cancel_delayed_work_sync(&card->led_work); } -/* - * initialize the PCA9553 4-bit I2C-bus LED chip - */ +/* initialize the PCA9553 4-bit I2C-bus LED chip */ static int peak_pciec_init_leds(struct peak_pciec_card *card) { int err; @@ -375,17 +387,14 @@ static int peak_pciec_init_leds(struct peak_pciec_card *card) return peak_pciec_write_pca9553(card, 5, PCA9553_LS0_INIT); } -/* - * restore LEDs state to off peak_pciec_leds_exit - */ +/* restore LEDs state to off peak_pciec_leds_exit */ static void peak_pciec_leds_exit(struct peak_pciec_card *card) { /* switch LEDs to off */ peak_pciec_write_pca9553(card, 5, PCA9553_LED_OFF_ALL); } -/* - * normal write sja1000 register method overloaded to catch when controller +/* normal write sja1000 register method overloaded to catch when controller * is started or stopped, to control leds */ static void peak_pciec_write_reg(const struct sja1000_priv *priv, @@ -443,7 +452,7 @@ static int peak_pciec_probe(struct pci_dev *pdev, struct net_device *dev) /* channel is the first one: do the init part */ } else { /* create the bit banging I2C adapter structure */ - card = kzalloc(sizeof(struct peak_pciec_card), GFP_KERNEL); + card = kzalloc(sizeof(*card), GFP_KERNEL); if (!card) return -ENOMEM; @@ -506,9 +515,7 @@ static void peak_pciec_remove(struct peak_pciec_card *card) #else /* CONFIG_CAN_PEAK_PCIEC */ -/* - * Placebo functions when PCAN-ExpressCard support is not selected - */ +/* Placebo functions when PCAN-ExpressCard support is not selected */ static inline int peak_pciec_probe(struct pci_dev *pdev, struct net_device *dev) { return -ENODEV; @@ -549,6 +556,7 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) void __iomem *cfg_base, *reg_base; u16 sub_sys_id, icr; int i, err, channels; + char fw_str[14] = ""; err = pci_enable_device(pdev); if (err) @@ -602,6 +610,21 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Leave parport mux mode */ writeb(0x04, cfg_base + PITA_MISC + 3); + /* FPGA equipped card if not 0 */ + if (readl(cfg_base + PEAK_VER_REG1)) { + /* FPGA card: display version of the running firmware */ + u32 fw_ver = readl(cfg_base + PEAK_VER_REG2); + + snprintf(fw_str, sizeof(fw_str), " FW v%u.%u.%u", + (fw_ver >> 12) & 0xf, + (fw_ver >> 8) & 0xf, + (fw_ver >> 4) & 0xf); + } + + /* Display commercial name (and, eventually, FW version) of the card */ + dev_info(&pdev->dev, "%ux CAN %s%s\n", + channels, (const char *)ent->driver_data, fw_str); + icr = readw(cfg_base + PITA_ICR + 2); for (i = 0; i < channels; i++) { @@ -642,8 +665,7 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) chan->prev_dev = pci_get_drvdata(pdev); pci_set_drvdata(pdev, dev); - /* - * PCAN-ExpressCard needs some additional i2c init. + /* PCAN-ExpressCard needs some additional i2c init. * This must be done *before* register_sja1000dev() but * *after* devices linkage */ @@ -709,7 +731,8 @@ failure_disable_pci: /* pci_xxx_config_word() return positive PCIBIOS_xxx error codes while * the probe() function must return a negative errno in case of failure - * (err is unchanged if negative) */ + * (err is unchanged if negative) + */ return pcibios_err_to_errno(err); } diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 9ae48072b6c6..6c369a399c45 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -15,10 +15,10 @@ #include <linux/bitfield.h> #include <linux/clk.h> #include <linux/device.h> +#include <linux/mod_devicetable.h> #include <linux/module.h> -#include <linux/of.h> -#include <linux/of_device.h> #include <linux/pm_runtime.h> +#include <linux/property.h> #include <asm/unaligned.h> @@ -2195,8 +2195,10 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) FIELD_GET(MCP251XFD_REG_INT_IE_MASK, priv->regs_status.intf); - if (!(intf_pending)) + if (!(intf_pending)) { + can_rx_offload_threaded_irq_finish(&priv->offload); return handled; + } /* Some interrupts must be ACKed in the * MCP251XFD_REG_INT register. @@ -2296,6 +2298,8 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) } while (1); out_fail: + can_rx_offload_threaded_irq_finish(&priv->offload); + netdev_err(priv->ndev, "IRQ handler returned %d (intf=0x%08x).\n", err, priv->regs_status.intf); mcp251xfd_dump(priv); @@ -2524,8 +2528,8 @@ static int mcp251xfd_open(struct net_device *ndev) can_rx_offload_enable(&priv->offload); err = request_threaded_irq(spi->irq, NULL, mcp251xfd_irq, - IRQF_ONESHOT, dev_name(&spi->dev), - priv); + IRQF_SHARED | IRQF_ONESHOT, + dev_name(&spi->dev), priv); if (err) goto out_can_rx_offload_disable; @@ -2857,7 +2861,7 @@ static int mcp251xfd_probe(struct spi_device *spi) struct gpio_desc *rx_int; struct regulator *reg_vdd, *reg_xceiver; struct clk *clk; - u32 freq; + u32 freq = 0; int err; if (!spi->irq) @@ -2884,11 +2888,19 @@ static int mcp251xfd_probe(struct spi_device *spi) return dev_err_probe(&spi->dev, PTR_ERR(reg_xceiver), "Failed to get Transceiver regulator!\n"); - clk = devm_clk_get(&spi->dev, NULL); + clk = devm_clk_get_optional(&spi->dev, NULL); if (IS_ERR(clk)) return dev_err_probe(&spi->dev, PTR_ERR(clk), "Failed to get Oscillator (clock)!\n"); - freq = clk_get_rate(clk); + if (clk) { + freq = clk_get_rate(clk); + } else { + err = device_property_read_u32(&spi->dev, "clock-frequency", + &freq); + if (err) + return dev_err_probe(&spi->dev, err, + "Failed to get clock-frequency!\n"); + } /* Sanity check */ if (freq < MCP251XFD_SYSCLOCK_HZ_MIN || diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 73245d8836a9..353062ead98f 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -786,6 +786,8 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) int_status = hecc_read(priv, HECC_CANGIF0); } + can_rx_offload_irq_finish(&priv->offload); + return IRQ_HANDLED; } diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 65b58f8fc328..60f3e0ca080a 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -474,7 +474,7 @@ static void esd_usb2_write_bulk_callback(struct urb *urb) netif_trans_update(netdev); } -static ssize_t show_firmware(struct device *d, +static ssize_t firmware_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); @@ -485,9 +485,9 @@ static ssize_t show_firmware(struct device *d, (dev->version >> 8) & 0xf, dev->version & 0xff); } -static DEVICE_ATTR(firmware, 0444, show_firmware, NULL); +static DEVICE_ATTR_RO(firmware); -static ssize_t show_hardware(struct device *d, +static ssize_t hardware_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); @@ -498,9 +498,9 @@ static ssize_t show_hardware(struct device *d, (dev->version >> 24) & 0xf, (dev->version >> 16) & 0xff); } -static DEVICE_ATTR(hardware, 0444, show_hardware, NULL); +static DEVICE_ATTR_RO(hardware); -static ssize_t show_nets(struct device *d, +static ssize_t nets_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); @@ -508,7 +508,7 @@ static ssize_t show_nets(struct device *d, return sprintf(buf, "%d", dev->net_count); } -static DEVICE_ATTR(nets, 0444, show_nets, NULL); +static DEVICE_ATTR_RO(nets); static int esd_usb2_send_msg(struct esd_usb2 *dev, struct esd_usb2_msg *msg) { diff --git a/drivers/net/can/usb/etas_es58x/es581_4.c b/drivers/net/can/usb/etas_es58x/es581_4.c index 1985f772fc3c..14e360c9f2c9 100644 --- a/drivers/net/can/usb/etas_es58x/es581_4.c +++ b/drivers/net/can/usb/etas_es58x/es581_4.c @@ -355,7 +355,7 @@ static int es581_4_tx_can_msg(struct es58x_priv *priv, return -EMSGSIZE; if (priv->tx_can_msg_cnt == 0) { - msg_len = 1; /* struct es581_4_bulk_tx_can_msg:num_can_msg */ + msg_len = sizeof(es581_4_urb_cmd->bulk_tx_can_msg.num_can_msg); es581_4_fill_urb_header(urb_cmd, ES581_4_CAN_COMMAND_TYPE, ES581_4_CMD_ID_TX_MSG, priv->channel_idx, msg_len); @@ -371,8 +371,7 @@ static int es581_4_tx_can_msg(struct es58x_priv *priv, return ret; /* Fill message contents. */ - tx_can_msg = (struct es581_4_tx_can_msg *) - &es581_4_urb_cmd->bulk_tx_can_msg.tx_can_msg_buf[msg_len - 1]; + tx_can_msg = (typeof(tx_can_msg))&es581_4_urb_cmd->raw_msg[msg_len]; put_unaligned_le32(es58x_get_raw_can_id(cf), &tx_can_msg->can_id); put_unaligned_le32(priv->tx_head, &tx_can_msg->packet_idx); put_unaligned_le16((u16)es58x_get_flags(skb), &tx_can_msg->flags); diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 8e9102482c52..96a13c770e4a 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -19,7 +19,7 @@ #include "es58x_core.h" #define DRV_VERSION "1.00" -MODULE_AUTHOR("Mailhol Vincent <mailhol.vincent@wanadoo.fr>"); +MODULE_AUTHOR("Vincent Mailhol <mailhol.vincent@wanadoo.fr>"); MODULE_AUTHOR("Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>"); MODULE_DESCRIPTION("Socket CAN driver for ETAS ES58X USB adapters"); MODULE_VERSION(DRV_VERSION); @@ -70,7 +70,7 @@ MODULE_DEVICE_TABLE(usb, es58x_id_table); * bytes (the start of frame) are skipped and the CRC calculation * starts on the third byte. */ -#define ES58X_CRC_CALC_OFFSET 2 +#define ES58X_CRC_CALC_OFFSET sizeof_field(union es58x_urb_cmd, sof) /** * es58x_calculate_crc() - Compute the crc16 of a given URB. @@ -2108,6 +2108,25 @@ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx) } /** + * es58x_free_netdevs() - Release all network resources of the device. + * @es58x_dev: ES58X device. + */ +static void es58x_free_netdevs(struct es58x_device *es58x_dev) +{ + int i; + + for (i = 0; i < es58x_dev->num_can_ch; i++) { + struct net_device *netdev = es58x_dev->netdev[i]; + + if (!netdev) + continue; + unregister_candev(netdev); + es58x_dev->netdev[i] = NULL; + free_candev(netdev); + } +} + +/** * es58x_get_product_info() - Get the product information and print them. * @es58x_dev: ES58X device. * @@ -2152,14 +2171,13 @@ static int es58x_get_product_info(struct es58x_device *es58x_dev) /** * es58x_init_es58x_dev() - Initialize the ES58X device. * @intf: USB interface. - * @p_es58x_dev: pointer to the address of the ES58X device. * @driver_info: Quirks of the device. * - * Return: zero on success, errno when any error occurs. + * Return: pointer to an ES58X device on success, error pointer when + * any error occurs. */ -static int es58x_init_es58x_dev(struct usb_interface *intf, - struct es58x_device **p_es58x_dev, - kernel_ulong_t driver_info) +static struct es58x_device *es58x_init_es58x_dev(struct usb_interface *intf, + kernel_ulong_t driver_info) { struct device *dev = &intf->dev; struct es58x_device *es58x_dev; @@ -2176,7 +2194,7 @@ static int es58x_init_es58x_dev(struct usb_interface *intf, ret = usb_find_common_endpoints(intf->cur_altsetting, &ep_in, &ep_out, NULL, NULL); if (ret) - return ret; + return ERR_PTR(ret); if (driver_info & ES58X_FD_FAMILY) { param = &es58x_fd_param; @@ -2186,9 +2204,10 @@ static int es58x_init_es58x_dev(struct usb_interface *intf, ops = &es581_4_ops; } - es58x_dev = kzalloc(es58x_sizeof_es58x_device(param), GFP_KERNEL); + es58x_dev = devm_kzalloc(dev, es58x_sizeof_es58x_device(param), + GFP_KERNEL); if (!es58x_dev) - return -ENOMEM; + return ERR_PTR(-ENOMEM); es58x_dev->param = param; es58x_dev->ops = ops; @@ -2213,9 +2232,7 @@ static int es58x_init_es58x_dev(struct usb_interface *intf, ep_out->bEndpointAddress); es58x_dev->rx_max_packet_size = le16_to_cpu(ep_in->wMaxPacketSize); - *p_es58x_dev = es58x_dev; - - return 0; + return es58x_dev; } /** @@ -2232,30 +2249,21 @@ static int es58x_probe(struct usb_interface *intf, struct es58x_device *es58x_dev; int ch_idx, ret; - ret = es58x_init_es58x_dev(intf, &es58x_dev, id->driver_info); - if (ret) - return ret; + es58x_dev = es58x_init_es58x_dev(intf, id->driver_info); + if (IS_ERR(es58x_dev)) + return PTR_ERR(es58x_dev); ret = es58x_get_product_info(es58x_dev); if (ret) - goto cleanup_es58x_dev; + return ret; for (ch_idx = 0; ch_idx < es58x_dev->num_can_ch; ch_idx++) { ret = es58x_init_netdev(es58x_dev, ch_idx); - if (ret) - goto cleanup_candev; - } - - return ret; - - cleanup_candev: - for (ch_idx = 0; ch_idx < es58x_dev->num_can_ch; ch_idx++) - if (es58x_dev->netdev[ch_idx]) { - unregister_candev(es58x_dev->netdev[ch_idx]); - free_candev(es58x_dev->netdev[ch_idx]); + if (ret) { + es58x_free_netdevs(es58x_dev); + return ret; } - cleanup_es58x_dev: - kfree(es58x_dev); + } return ret; } @@ -2270,24 +2278,12 @@ static int es58x_probe(struct usb_interface *intf, static void es58x_disconnect(struct usb_interface *intf) { struct es58x_device *es58x_dev = usb_get_intfdata(intf); - struct net_device *netdev; - int i; dev_info(&intf->dev, "Disconnecting %s %s\n", es58x_dev->udev->manufacturer, es58x_dev->udev->product); - for (i = 0; i < es58x_dev->num_can_ch; i++) { - netdev = es58x_dev->netdev[i]; - if (!netdev) - continue; - unregister_candev(netdev); - es58x_dev->netdev[i] = NULL; - free_candev(netdev); - } - + es58x_free_netdevs(es58x_dev); es58x_free_urbs(es58x_dev); - - kfree(es58x_dev); usb_set_intfdata(intf, NULL); } diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.h b/drivers/net/can/usb/etas_es58x/es58x_core.h index fcf219e727bf..826a15871573 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.h +++ b/drivers/net/can/usb/etas_es58x/es58x_core.h @@ -287,7 +287,7 @@ struct es58x_priv { * @rx_urb_cmd_max_len: Maximum length of a RX URB command. * @fifo_mask: Bit mask to quickly convert the tx_tail and tx_head * field of the struct es58x_priv into echo_skb - * indexes. Properties: @fifo_mask = echos_skb_max - 1 where + * indexes. Properties: @fifo_mask = echo_skb_max - 1 where * echo_skb_max must be a power of two. Also, echo_skb_max must * not exceed the maximum size of the device internal TX FIFO * length. This parameter is used to control the network queue diff --git a/drivers/net/can/usb/etas_es58x/es58x_fd.c b/drivers/net/can/usb/etas_es58x/es58x_fd.c index 1a2779d383a4..af042aa55f59 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_fd.c +++ b/drivers/net/can/usb/etas_es58x/es58x_fd.c @@ -357,8 +357,7 @@ static int es58x_fd_tx_can_msg(struct es58x_priv *priv, return ret; /* Fill message contents. */ - tx_can_msg = (struct es58x_fd_tx_can_msg *) - &es58x_fd_urb_cmd->tx_can_msg_buf[msg_len]; + tx_can_msg = (typeof(tx_can_msg))&es58x_fd_urb_cmd->raw_msg[msg_len]; tx_can_msg->packet_idx = (u8)priv->tx_head; put_unaligned_le32(es58x_get_raw_can_id(cf), &tx_can_msg->can_id); tx_can_msg->flags = (u8)es58x_get_flags(skb); @@ -463,9 +462,9 @@ static int es58x_fd_get_timestamp(struct es58x_device *es58x_dev) } /* Nominal bittiming constants for ES582.1 and ES584.1 as specified in - * the microcontroller datasheet: "SAM E701/S70/V70/V71 Family" - * section 49.6.8 "MCAN Nominal Bit Timing and Prescaler Register" - * from Microchip. + * the microcontroller datasheet: "SAM E70/S70/V70/V71 Family" section + * 49.6.8 "MCAN Nominal Bit Timing and Prescaler Register" from + * Microchip. * * The values from the specification are the hardware register * values. To convert them to the functional values, all ranges were @@ -484,8 +483,8 @@ static const struct can_bittiming_const es58x_fd_nom_bittiming_const = { }; /* Data bittiming constants for ES582.1 and ES584.1 as specified in - * the microcontroller datasheet: "SAM E701/S70/V70/V71 Family" - * section 49.6.4 "MCAN Data Bit Timing and Prescaler Register" from + * the microcontroller datasheet: "SAM E70/S70/V70/V71 Family" section + * 49.6.4 "MCAN Data Bit Timing and Prescaler Register" from * Microchip. */ static const struct can_bittiming_const es58x_fd_data_bittiming_const = { @@ -501,9 +500,9 @@ static const struct can_bittiming_const es58x_fd_data_bittiming_const = { }; /* Transmission Delay Compensation constants for ES582.1 and ES584.1 - * as specified in the microcontroller datasheet: "SAM - * E701/S70/V70/V71 Family" section 49.6.15 "MCAN Transmitter Delay - * Compensation Register" from Microchip. + * as specified in the microcontroller datasheet: "SAM E70/S70/V70/V71 + * Family" section 49.6.15 "MCAN Transmitter Delay Compensation + * Register" from Microchip. */ static const struct can_tdc_const es58x_tdc_const = { .tdcv_max = 0, /* Manual mode not supported. */ diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 899a3d21b77f..837b3fecd71e 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -63,6 +63,8 @@ #define PCAN_USB_MSG_HEADER_LEN 2 +#define PCAN_USB_MSG_TX_CAN 2 /* Tx msg is a CAN frame */ + /* PCAN-USB adapter internal clock (MHz) */ #define PCAN_USB_CRYSTAL_HZ 16000000 @@ -73,6 +75,10 @@ #define PCAN_USB_STATUSLEN_RTR (1 << 4) #define PCAN_USB_STATUSLEN_DLC (0xf) +/* PCAN-USB 4.1 CAN Id tx extended flags */ +#define PCAN_USB_TX_SRR 0x01 /* SJA1000 SRR command */ +#define PCAN_USB_TX_AT 0x02 /* SJA1000 AT command */ + /* PCAN-USB error flags */ #define PCAN_USB_ERROR_TXFULL 0x01 #define PCAN_USB_ERROR_RXQOVR 0x02 @@ -385,7 +391,8 @@ static int pcan_usb_get_device_id(struct peak_usb_device *dev, u32 *device_id) if (err) netdev_err(dev->netdev, "getting device id failure: %d\n", err); - *device_id = args[0]; + else + *device_id = args[0]; return err; } @@ -446,145 +453,65 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, { struct sk_buff *skb; struct can_frame *cf; - enum can_state new_state; + enum can_state new_state = CAN_STATE_ERROR_ACTIVE; /* ignore this error until 1st ts received */ if (n == PCAN_USB_ERROR_QOVR) if (!mc->pdev->time_ref.tick_count) return 0; - new_state = mc->pdev->dev.can.state; - - switch (mc->pdev->dev.can.state) { - case CAN_STATE_ERROR_ACTIVE: - if (n & PCAN_USB_ERROR_BUS_LIGHT) { - new_state = CAN_STATE_ERROR_WARNING; - break; - } - fallthrough; - - case CAN_STATE_ERROR_WARNING: - if (n & PCAN_USB_ERROR_BUS_HEAVY) { - new_state = CAN_STATE_ERROR_PASSIVE; - break; - } - if (n & PCAN_USB_ERROR_BUS_OFF) { - new_state = CAN_STATE_BUS_OFF; - break; - } - if (n & ~PCAN_USB_ERROR_BUS) { - /* - * trick to bypass next comparison and process other - * errors - */ - new_state = CAN_STATE_MAX; - break; - } - if ((n & PCAN_USB_ERROR_BUS_LIGHT) == 0) { - /* no error (back to active state) */ - new_state = CAN_STATE_ERROR_ACTIVE; - break; - } - break; - - case CAN_STATE_ERROR_PASSIVE: - if (n & PCAN_USB_ERROR_BUS_OFF) { - new_state = CAN_STATE_BUS_OFF; - break; - } - if (n & PCAN_USB_ERROR_BUS_LIGHT) { - new_state = CAN_STATE_ERROR_WARNING; - break; - } - if (n & ~PCAN_USB_ERROR_BUS) { - /* - * trick to bypass next comparison and process other - * errors - */ - new_state = CAN_STATE_MAX; - break; - } - - if ((n & PCAN_USB_ERROR_BUS_HEAVY) == 0) { - /* no error (back to warning state) */ - new_state = CAN_STATE_ERROR_WARNING; - break; - } - break; - - default: - /* do nothing waiting for restart */ - return 0; - } - - /* donot post any error if current state didn't change */ - if (mc->pdev->dev.can.state == new_state) - return 0; - /* allocate an skb to store the error frame */ skb = alloc_can_err_skb(mc->netdev, &cf); - if (!skb) - return -ENOMEM; - - switch (new_state) { - case CAN_STATE_BUS_OFF: - cf->can_id |= CAN_ERR_BUSOFF; - mc->pdev->dev.can.can_stats.bus_off++; - can_bus_off(mc->netdev); - break; - - case CAN_STATE_ERROR_PASSIVE: - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = (mc->pdev->bec.txerr > mc->pdev->bec.rxerr) ? - CAN_ERR_CRTL_TX_PASSIVE : - CAN_ERR_CRTL_RX_PASSIVE; - cf->data[6] = mc->pdev->bec.txerr; - cf->data[7] = mc->pdev->bec.rxerr; - - mc->pdev->dev.can.can_stats.error_passive++; - break; - - case CAN_STATE_ERROR_WARNING: - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = (mc->pdev->bec.txerr > mc->pdev->bec.rxerr) ? - CAN_ERR_CRTL_TX_WARNING : - CAN_ERR_CRTL_RX_WARNING; - cf->data[6] = mc->pdev->bec.txerr; - cf->data[7] = mc->pdev->bec.rxerr; - - mc->pdev->dev.can.can_stats.error_warning++; - break; - case CAN_STATE_ERROR_ACTIVE: - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = CAN_ERR_CRTL_ACTIVE; - - /* sync local copies of rxerr/txerr counters */ - mc->pdev->bec.txerr = 0; - mc->pdev->bec.rxerr = 0; - break; - - default: - /* CAN_STATE_MAX (trick to handle other errors) */ - if (n & PCAN_USB_ERROR_TXQFULL) - netdev_dbg(mc->netdev, "device Tx queue full)\n"); - - if (n & PCAN_USB_ERROR_RXQOVR) { - netdev_dbg(mc->netdev, "data overrun interrupt\n"); + if (n & PCAN_USB_ERROR_RXQOVR) { + /* data overrun interrupt */ + netdev_dbg(mc->netdev, "data overrun interrupt\n"); + mc->netdev->stats.rx_over_errors++; + mc->netdev->stats.rx_errors++; + if (cf) { cf->can_id |= CAN_ERR_CRTL; cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW; - mc->netdev->stats.rx_over_errors++; - mc->netdev->stats.rx_errors++; } + } - cf->data[6] = mc->pdev->bec.txerr; - cf->data[7] = mc->pdev->bec.rxerr; + if (n & PCAN_USB_ERROR_TXQFULL) + netdev_dbg(mc->netdev, "device Tx queue full)\n"); - new_state = mc->pdev->dev.can.state; - break; + if (n & PCAN_USB_ERROR_BUS_OFF) { + new_state = CAN_STATE_BUS_OFF; + } else if (n & PCAN_USB_ERROR_BUS_HEAVY) { + new_state = ((mc->pdev->bec.txerr >= 128) || + (mc->pdev->bec.rxerr >= 128)) ? + CAN_STATE_ERROR_PASSIVE : + CAN_STATE_ERROR_WARNING; + } else { + new_state = CAN_STATE_ERROR_ACTIVE; } - mc->pdev->dev.can.state = new_state; + /* handle change of state */ + if (new_state != mc->pdev->dev.can.state) { + enum can_state tx_state = + (mc->pdev->bec.txerr >= mc->pdev->bec.rxerr) ? + new_state : 0; + enum can_state rx_state = + (mc->pdev->bec.txerr <= mc->pdev->bec.rxerr) ? + new_state : 0; + + can_change_state(mc->netdev, cf, tx_state, rx_state); + + if (new_state == CAN_STATE_BUS_OFF) { + can_bus_off(mc->netdev); + } else if (cf && (cf->can_id & CAN_ERR_CRTL)) { + /* Supply TX/RX error counters in case of + * controller error. + */ + cf->data[6] = mc->pdev->bec.txerr; + cf->data[7] = mc->pdev->bec.rxerr; + } + } + + if (!skb) + return -ENOMEM; if (status_len & PCAN_USB_STATUSLEN_TIMESTAMP) { struct skb_shared_hwtstamps *hwts = skb_hwtstamps(skb); @@ -706,6 +633,7 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) struct sk_buff *skb; struct can_frame *cf; struct skb_shared_hwtstamps *hwts; + u32 can_id_flags; skb = alloc_can_skb(mc->netdev, &cf); if (!skb) @@ -715,13 +643,15 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) if ((mc->ptr + 4) > mc->end) goto decode_failed; - cf->can_id = get_unaligned_le32(mc->ptr) >> 3 | CAN_EFF_FLAG; + can_id_flags = get_unaligned_le32(mc->ptr); + cf->can_id = can_id_flags >> 3 | CAN_EFF_FLAG; mc->ptr += 4; } else { if ((mc->ptr + 2) > mc->end) goto decode_failed; - cf->can_id = get_unaligned_le16(mc->ptr) >> 5; + can_id_flags = get_unaligned_le16(mc->ptr); + cf->can_id = can_id_flags >> 5; mc->ptr += 2; } @@ -744,6 +674,10 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) memcpy(cf->data, mc->ptr, cf->len); mc->ptr += rec_len; + + /* Ignore next byte (client private id) if SRR bit is set */ + if (can_id_flags & PCAN_USB_TX_SRR) + mc->ptr++; } /* convert timestamp into kernel time */ @@ -821,10 +755,11 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb, struct net_device *netdev = dev->netdev; struct net_device_stats *stats = &netdev->stats; struct can_frame *cf = (struct can_frame *)skb->data; + u32 can_id_flags = cf->can_id & CAN_ERR_MASK; u8 *pc; - obuf[0] = 2; - obuf[1] = 1; + obuf[0] = PCAN_USB_MSG_TX_CAN; + obuf[1] = 1; /* only one CAN frame is stored in the packet */ pc = obuf + PCAN_USB_MSG_HEADER_LEN; @@ -839,12 +774,28 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb, *pc |= PCAN_USB_STATUSLEN_EXT_ID; pc++; - put_unaligned_le32((cf->can_id & CAN_ERR_MASK) << 3, pc); + can_id_flags <<= 3; + + if (dev->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) + can_id_flags |= PCAN_USB_TX_SRR; + + if (dev->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT) + can_id_flags |= PCAN_USB_TX_AT; + + put_unaligned_le32(can_id_flags, pc); pc += 4; } else { pc++; - put_unaligned_le16((cf->can_id & CAN_ERR_MASK) << 5, pc); + can_id_flags <<= 5; + + if (dev->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) + can_id_flags |= PCAN_USB_TX_SRR; + + if (dev->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT) + can_id_flags |= PCAN_USB_TX_AT; + + put_unaligned_le16(can_id_flags, pc); pc += 2; } @@ -854,6 +805,10 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb, pc += cf->len; } + /* SRR bit needs a writer id (useless here) */ + if (can_id_flags & PCAN_USB_TX_SRR) + *pc++ = 0x80; + obuf[(*size)-1] = (u8)(stats->tx_packets & 0xff); return 0; @@ -928,6 +883,19 @@ static int pcan_usb_init(struct peak_usb_device *dev) return err; } + /* Since rev 4.1, PCAN-USB is able to make single-shot as well as + * looped back frames. + */ + if (dev->device_rev >= 41) { + struct can_priv *priv = netdev_priv(dev->netdev); + + priv->ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT | + CAN_CTRLMODE_LOOPBACK; + } else { + dev_info(dev->netdev->dev.parent, + "Firmware update available. Please contact support@peak-system.com\n"); + } + dev_info(dev->netdev->dev.parent, "PEAK-System %s adapter hwrev %u serial %08X (%u channel)\n", pcan_usb.name, dev->device_rev, serial_number, diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 272b0535d946..ddb51dd132ef 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1221,14 +1221,36 @@ static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port) bool found = false; u16 pvlan; - list_for_each_entry(dp, &dst->ports, list) { - if (dp->ds->index == dev && dp->index == port) { + /* dev is a physical switch */ + if (dev <= dst->last_switch) { + list_for_each_entry(dp, &dst->ports, list) { + if (dp->ds->index == dev && dp->index == port) { + /* dp might be a DSA link or a user port, so it + * might or might not have a bridge_dev + * pointer. Use the "found" variable for both + * cases. + */ + br = dp->bridge_dev; + found = true; + break; + } + } + /* dev is a virtual bridge */ + } else { + list_for_each_entry(dp, &dst->ports, list) { + if (dp->bridge_num < 0) + continue; + + if (dp->bridge_num + 1 + dst->last_switch != dev) + continue; + + br = dp->bridge_dev; found = true; break; } } - /* Prevent frames from unknown switch or port */ + /* Prevent frames from unknown switch or virtual bridge */ if (!found) return 0; @@ -1236,7 +1258,6 @@ static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port) if (dp->type == DSA_PORT_TYPE_CPU || dp->type == DSA_PORT_TYPE_DSA) return mv88e6xxx_port_mask(chip); - br = dp->bridge_dev; pvlan = 0; /* Frames from user ports can egress any local DSA links and CPU ports, @@ -2422,6 +2443,44 @@ static void mv88e6xxx_crosschip_bridge_leave(struct dsa_switch *ds, mv88e6xxx_reg_unlock(chip); } +/* Treat the software bridge as a virtual single-port switch behind the + * CPU and map in the PVT. First dst->last_switch elements are taken by + * physical switches, so start from beyond that range. + */ +static int mv88e6xxx_map_virtual_bridge_to_pvt(struct dsa_switch *ds, + int bridge_num) +{ + u8 dev = bridge_num + ds->dst->last_switch + 1; + struct mv88e6xxx_chip *chip = ds->priv; + int err; + + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_pvt_map(chip, dev, 0); + mv88e6xxx_reg_unlock(chip); + + return err; +} + +static int mv88e6xxx_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, + struct net_device *br, + int bridge_num) +{ + return mv88e6xxx_map_virtual_bridge_to_pvt(ds, bridge_num); +} + +static void mv88e6xxx_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, + struct net_device *br, + int bridge_num) +{ + int err; + + err = mv88e6xxx_map_virtual_bridge_to_pvt(ds, bridge_num); + if (err) { + dev_err(ds->dev, "failed to remap cross-chip Port VLAN: %pe\n", + ERR_PTR(err)); + } +} + static int mv88e6xxx_software_reset(struct mv88e6xxx_chip *chip) { if (chip->info->ops->reset) @@ -3025,6 +3084,15 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) chip->ds = ds; ds->slave_mii_bus = mv88e6xxx_default_mdio_bus(chip); + /* Since virtual bridges are mapped in the PVT, the number we support + * depends on the physical switch topology. We need to let DSA figure + * that out and therefore we cannot set this at dsa_register_switch() + * time. + */ + if (mv88e6xxx_has_pvt(chip)) + ds->num_fwd_offloading_bridges = MV88E6XXX_MAX_PVT_SWITCHES - + ds->dst->last_switch - 1; + mv88e6xxx_reg_lock(chip); if (chip->info->ops->setup_errata) { @@ -6128,6 +6196,8 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .crosschip_lag_change = mv88e6xxx_crosschip_lag_change, .crosschip_lag_join = mv88e6xxx_crosschip_lag_join, .crosschip_lag_leave = mv88e6xxx_crosschip_lag_leave, + .port_bridge_tx_fwd_offload = mv88e6xxx_bridge_tx_fwd_offload, + .port_bridge_tx_fwd_unoffload = mv88e6xxx_bridge_tx_fwd_unoffload, }; static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index a2a15919b960..583a22d901b3 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -231,11 +231,6 @@ static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) return 0; } -static const struct dsa_8021q_ops felix_tag_8021q_ops = { - .vlan_add = felix_tag_8021q_vlan_add, - .vlan_del = felix_tag_8021q_vlan_del, -}; - /* Alternatively to using the NPI functionality, that same hardware MAC * connected internally to the enetc or fman DSA master can be configured to * use the software-defined tag_8021q frame format. As far as the hardware is @@ -425,29 +420,18 @@ static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu) ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_MC); ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_BC); - felix->dsa_8021q_ctx = kzalloc(sizeof(*felix->dsa_8021q_ctx), - GFP_KERNEL); - if (!felix->dsa_8021q_ctx) - return -ENOMEM; - - felix->dsa_8021q_ctx->ops = &felix_tag_8021q_ops; - felix->dsa_8021q_ctx->proto = htons(ETH_P_8021AD); - felix->dsa_8021q_ctx->ds = ds; - - err = dsa_8021q_setup(felix->dsa_8021q_ctx, true); + err = dsa_tag_8021q_register(ds, htons(ETH_P_8021AD)); if (err) - goto out_free_dsa_8021_ctx; + return err; err = felix_setup_mmio_filtering(felix); if (err) - goto out_teardown_dsa_8021q; + goto out_tag_8021q_unregister; return 0; -out_teardown_dsa_8021q: - dsa_8021q_setup(felix->dsa_8021q_ctx, false); -out_free_dsa_8021_ctx: - kfree(felix->dsa_8021q_ctx); +out_tag_8021q_unregister: + dsa_tag_8021q_unregister(ds); return err; } @@ -462,11 +446,7 @@ static void felix_teardown_tag_8021q(struct dsa_switch *ds, int cpu) dev_err(ds->dev, "felix_teardown_mmio_filtering returned %d", err); - err = dsa_8021q_setup(felix->dsa_8021q_ctx, false); - if (err) - dev_err(ds->dev, "dsa_8021q_setup returned %d", err); - - kfree(felix->dsa_8021q_ctx); + dsa_tag_8021q_unregister(ds); for (port = 0; port < ds->num_ports; port++) { if (dsa_is_unused_port(ds, port)) @@ -1679,6 +1659,8 @@ const struct dsa_switch_ops felix_switch_ops = { .port_mrp_del = felix_mrp_del, .port_mrp_add_ring_role = felix_mrp_add_ring_role, .port_mrp_del_ring_role = felix_mrp_del_ring_role, + .tag_8021q_vlan_add = felix_tag_8021q_vlan_add, + .tag_8021q_vlan_del = felix_tag_8021q_vlan_del, }; struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port) diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 4d96cad815d5..9da3c6a94c6e 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -60,7 +60,6 @@ struct felix { struct lynx_pcs **pcs; resource_size_t switch_base; resource_size_t imdio_base; - struct dsa_8021q_context *dsa_8021q_ctx; enum dsa_tag_protocol tag_proto; }; diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index 221c7abdef0e..9cd7dbdd7db9 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -226,27 +226,13 @@ struct sja1105_flow_block { int num_virtual_links; }; -struct sja1105_bridge_vlan { - struct list_head list; - int port; - u16 vid; - bool pvid; - bool untagged; -}; - -enum sja1105_vlan_state { - SJA1105_VLAN_UNAWARE, - SJA1105_VLAN_BEST_EFFORT, - SJA1105_VLAN_FILTERING_FULL, -}; - struct sja1105_private { struct sja1105_static_config static_config; bool rgmii_rx_delay[SJA1105_MAX_NUM_PORTS]; bool rgmii_tx_delay[SJA1105_MAX_NUM_PORTS]; phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS]; bool fixed_link[SJA1105_MAX_NUM_PORTS]; - bool best_effort_vlan_filtering; + bool vlan_aware; unsigned long learn_ena; unsigned long ucast_egress_floods; unsigned long bcast_egress_floods; @@ -255,16 +241,14 @@ struct sja1105_private { struct gpio_desc *reset_gpio; struct spi_device *spidev; struct dsa_switch *ds; - struct list_head dsa_8021q_vlans; - struct list_head bridge_vlans; + u16 bridge_pvid[SJA1105_MAX_NUM_PORTS]; + u16 tag_8021q_pvid[SJA1105_MAX_NUM_PORTS]; struct sja1105_flow_block flow_block; struct sja1105_port ports[SJA1105_MAX_NUM_PORTS]; /* Serializes transmission of management frames so that * the switch doesn't confuse them with one another. */ struct mutex mgmt_lock; - struct dsa_8021q_context *dsa_8021q_ctx; - enum sja1105_vlan_state vlan_state; struct devlink_region **regions; struct sja1105_cbs_entry *cbs; struct mii_bus *mdio_base_t1; @@ -311,10 +295,6 @@ int sja1110_pcs_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val); /* From sja1105_devlink.c */ int sja1105_devlink_setup(struct dsa_switch *ds); void sja1105_devlink_teardown(struct dsa_switch *ds); -int sja1105_devlink_param_get(struct dsa_switch *ds, u32 id, - struct devlink_param_gset_ctx *ctx); -int sja1105_devlink_param_set(struct dsa_switch *ds, u32 id, - struct devlink_param_gset_ctx *ctx); int sja1105_devlink_info_get(struct dsa_switch *ds, struct devlink_info_req *req, struct netlink_ext_ack *extack); diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c index b6a4a16b8c7e..05c7f4ca3b1a 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c @@ -115,105 +115,6 @@ static void sja1105_teardown_devlink_regions(struct dsa_switch *ds) kfree(priv->regions); } -static int sja1105_best_effort_vlan_filtering_get(struct sja1105_private *priv, - bool *be_vlan) -{ - *be_vlan = priv->best_effort_vlan_filtering; - - return 0; -} - -static int sja1105_best_effort_vlan_filtering_set(struct sja1105_private *priv, - bool be_vlan) -{ - struct dsa_switch *ds = priv->ds; - bool vlan_filtering; - int port; - int rc; - - priv->best_effort_vlan_filtering = be_vlan; - - rtnl_lock(); - for (port = 0; port < ds->num_ports; port++) { - struct dsa_port *dp; - - if (!dsa_is_user_port(ds, port)) - continue; - - dp = dsa_to_port(ds, port); - vlan_filtering = dsa_port_is_vlan_filtering(dp); - - rc = sja1105_vlan_filtering(ds, port, vlan_filtering, NULL); - if (rc) - break; - } - rtnl_unlock(); - - return rc; -} - -enum sja1105_devlink_param_id { - SJA1105_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, - SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING, -}; - -int sja1105_devlink_param_get(struct dsa_switch *ds, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct sja1105_private *priv = ds->priv; - int err; - - switch (id) { - case SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING: - err = sja1105_best_effort_vlan_filtering_get(priv, - &ctx->val.vbool); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - -int sja1105_devlink_param_set(struct dsa_switch *ds, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct sja1105_private *priv = ds->priv; - int err; - - switch (id) { - case SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING: - err = sja1105_best_effort_vlan_filtering_set(priv, - ctx->val.vbool); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - -static const struct devlink_param sja1105_devlink_params[] = { - DSA_DEVLINK_PARAM_DRIVER(SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING, - "best_effort_vlan_filtering", - DEVLINK_PARAM_TYPE_BOOL, - BIT(DEVLINK_PARAM_CMODE_RUNTIME)), -}; - -static int sja1105_setup_devlink_params(struct dsa_switch *ds) -{ - return dsa_devlink_params_register(ds, sja1105_devlink_params, - ARRAY_SIZE(sja1105_devlink_params)); -} - -static void sja1105_teardown_devlink_params(struct dsa_switch *ds) -{ - dsa_devlink_params_unregister(ds, sja1105_devlink_params, - ARRAY_SIZE(sja1105_devlink_params)); -} - int sja1105_devlink_info_get(struct dsa_switch *ds, struct devlink_info_req *req, struct netlink_ext_ack *extack) @@ -233,23 +134,10 @@ int sja1105_devlink_info_get(struct dsa_switch *ds, int sja1105_devlink_setup(struct dsa_switch *ds) { - int rc; - - rc = sja1105_setup_devlink_params(ds); - if (rc) - return rc; - - rc = sja1105_setup_devlink_regions(ds); - if (rc < 0) { - sja1105_teardown_devlink_params(ds); - return rc; - } - - return 0; + return sja1105_setup_devlink_regions(ds); } void sja1105_devlink_teardown(struct dsa_switch *ds) { - sja1105_teardown_devlink_params(ds); sja1105_teardown_devlink_regions(ds); } diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index e2dc997580a8..da042e211dda 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -378,8 +378,6 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv) table->entry_count = 1; for (port = 0; port < ds->num_ports; port++) { - struct sja1105_bridge_vlan *v; - if (dsa_is_unused_port(ds, port)) continue; @@ -387,22 +385,10 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv) pvid.vlan_bc |= BIT(port); pvid.tag_port &= ~BIT(port); - v = kzalloc(sizeof(*v), GFP_KERNEL); - if (!v) - return -ENOMEM; - - v->port = port; - v->vid = SJA1105_DEFAULT_VLAN; - v->untagged = true; - if (dsa_is_cpu_port(ds, port)) - v->pvid = true; - list_add(&v->list, &priv->dsa_8021q_vlans); - - v = kmemdup(v, sizeof(*v), GFP_KERNEL); - if (!v) - return -ENOMEM; - - list_add(&v->list, &priv->bridge_vlans); + if (dsa_is_cpu_port(ds, port)) { + priv->tag_8021q_pvid[port] = SJA1105_DEFAULT_VLAN; + priv->bridge_pvid[port] = SJA1105_DEFAULT_VLAN; + } } ((struct sja1105_vlan_lookup_entry *)table->entries)[0] = pvid; @@ -551,18 +537,11 @@ void sja1105_frame_memory_partitioning(struct sja1105_private *priv) { struct sja1105_l2_forwarding_params_entry *l2_fwd_params; struct sja1105_vl_forwarding_params_entry *vl_fwd_params; - int max_mem = priv->info->max_frame_mem; struct sja1105_table *table; - /* VLAN retagging is implemented using a loopback port that consumes - * frame buffers. That leaves less for us. - */ - if (priv->vlan_state == SJA1105_VLAN_BEST_EFFORT) - max_mem -= SJA1105_FRAME_MEMORY_RETAGGING_OVERHEAD; - table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING_PARAMS]; l2_fwd_params = table->entries; - l2_fwd_params->part_spc[0] = max_mem; + l2_fwd_params->part_spc[0] = SJA1105_MAX_FRAME_MEMORY; /* If we have any critical-traffic virtual links, we need to reserve * some frame buffer memory for them. At the moment, hardcode the value @@ -1422,7 +1401,7 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, l2_lookup.vlanid = vid; l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); - if (priv->vlan_state != SJA1105_VLAN_UNAWARE) { + if (priv->vlan_aware) { l2_lookup.mask_vlanid = VLAN_VID_MASK; l2_lookup.mask_iotag = BIT(0); } else { @@ -1485,7 +1464,7 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port, l2_lookup.vlanid = vid; l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); - if (priv->vlan_state != SJA1105_VLAN_UNAWARE) { + if (priv->vlan_aware) { l2_lookup.mask_vlanid = VLAN_VID_MASK; l2_lookup.mask_iotag = BIT(0); } else { @@ -1531,7 +1510,7 @@ static int sja1105_fdb_add(struct dsa_switch *ds, int port, * for what gets printed in 'bridge fdb show'. In the case of zero, * no VID gets printed at all. */ - if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL) + if (!priv->vlan_aware) vid = 0; return priv->info->fdb_add_cmd(ds, port, addr, vid); @@ -1542,7 +1521,7 @@ static int sja1105_fdb_del(struct dsa_switch *ds, int port, { struct sja1105_private *priv = ds->priv; - if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL) + if (!priv->vlan_aware) vid = 0; return priv->info->fdb_del_cmd(ds, port, addr, vid); @@ -1587,7 +1566,7 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port, u64_to_ether_addr(l2_lookup.macaddr, macaddr); /* We need to hide the dsa_8021q VLANs from the user. */ - if (priv->vlan_state == SJA1105_VLAN_UNAWARE) + if (!priv->vlan_aware) l2_lookup.vlanid = 0; cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data); } @@ -1997,89 +1976,27 @@ static int sja1105_pvid_apply(struct sja1105_private *priv, int port, u16 pvid) mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries; + if (mac[port].vlanid == pvid) + return 0; + mac[port].vlanid = pvid; return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, &mac[port], true); } -static int sja1105_crosschip_bridge_join(struct dsa_switch *ds, - int tree_index, int sw_index, - int other_port, struct net_device *br) -{ - struct dsa_switch *other_ds = dsa_switch_find(tree_index, sw_index); - struct sja1105_private *other_priv = other_ds->priv; - struct sja1105_private *priv = ds->priv; - int port, rc; - - if (other_ds->ops != &sja1105_switch_ops) - return 0; - - for (port = 0; port < ds->num_ports; port++) { - if (!dsa_is_user_port(ds, port)) - continue; - if (dsa_to_port(ds, port)->bridge_dev != br) - continue; - - rc = dsa_8021q_crosschip_bridge_join(priv->dsa_8021q_ctx, - port, - other_priv->dsa_8021q_ctx, - other_port); - if (rc) - return rc; - - rc = dsa_8021q_crosschip_bridge_join(other_priv->dsa_8021q_ctx, - other_port, - priv->dsa_8021q_ctx, - port); - if (rc) - return rc; - } - - return 0; -} - -static void sja1105_crosschip_bridge_leave(struct dsa_switch *ds, - int tree_index, int sw_index, - int other_port, - struct net_device *br) -{ - struct dsa_switch *other_ds = dsa_switch_find(tree_index, sw_index); - struct sja1105_private *other_priv = other_ds->priv; - struct sja1105_private *priv = ds->priv; - int port; - - if (other_ds->ops != &sja1105_switch_ops) - return; - - for (port = 0; port < ds->num_ports; port++) { - if (!dsa_is_user_port(ds, port)) - continue; - if (dsa_to_port(ds, port)->bridge_dev != br) - continue; - - dsa_8021q_crosschip_bridge_leave(priv->dsa_8021q_ctx, port, - other_priv->dsa_8021q_ctx, - other_port); - - dsa_8021q_crosschip_bridge_leave(other_priv->dsa_8021q_ctx, - other_port, - priv->dsa_8021q_ctx, port); - } -} - -static int sja1105_setup_8021q_tagging(struct dsa_switch *ds, bool enabled) +static int sja1105_commit_pvid(struct dsa_switch *ds, int port) { + struct dsa_port *dp = dsa_to_port(ds, port); struct sja1105_private *priv = ds->priv; - int rc; + u16 pvid; - rc = dsa_8021q_setup(priv->dsa_8021q_ctx, enabled); - if (rc) - return rc; + if (dp->bridge_dev && br_vlan_enabled(dp->bridge_dev)) + pvid = priv->bridge_pvid[port]; + else + pvid = priv->tag_8021q_pvid[port]; - dev_info(ds->dev, "%s switch tagging\n", - enabled ? "Enabled" : "Disabled"); - return 0; + return sja1105_pvid_apply(priv, port, pvid); } static enum dsa_tag_protocol @@ -2091,57 +2008,6 @@ sja1105_get_tag_protocol(struct dsa_switch *ds, int port, return priv->info->tag_proto; } -static int sja1105_find_free_subvlan(u16 *subvlan_map, bool pvid) -{ - int subvlan; - - if (pvid) - return 0; - - for (subvlan = 1; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) - if (subvlan_map[subvlan] == VLAN_N_VID) - return subvlan; - - return -1; -} - -static int sja1105_find_subvlan(u16 *subvlan_map, u16 vid) -{ - int subvlan; - - for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) - if (subvlan_map[subvlan] == vid) - return subvlan; - - return -1; -} - -static int sja1105_find_committed_subvlan(struct sja1105_private *priv, - int port, u16 vid) -{ - struct sja1105_port *sp = &priv->ports[port]; - - return sja1105_find_subvlan(sp->subvlan_map, vid); -} - -static void sja1105_init_subvlan_map(u16 *subvlan_map) -{ - int subvlan; - - for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) - subvlan_map[subvlan] = VLAN_N_VID; -} - -static void sja1105_commit_subvlan_map(struct sja1105_private *priv, int port, - u16 *subvlan_map) -{ - struct sja1105_port *sp = &priv->ports[port]; - int subvlan; - - for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) - sp->subvlan_map[subvlan] = subvlan_map[subvlan]; -} - static int sja1105_is_vlan_configured(struct sja1105_private *priv, u16 vid) { struct sja1105_vlan_lookup_entry *vlan; @@ -2158,602 +2024,6 @@ static int sja1105_is_vlan_configured(struct sja1105_private *priv, u16 vid) return -1; } -static int -sja1105_find_retagging_entry(struct sja1105_retagging_entry *retagging, - int count, int from_port, u16 from_vid, - u16 to_vid) -{ - int i; - - for (i = 0; i < count; i++) - if (retagging[i].ing_port == BIT(from_port) && - retagging[i].vlan_ing == from_vid && - retagging[i].vlan_egr == to_vid) - return i; - - /* Return an invalid entry index if not found */ - return -1; -} - -static int sja1105_commit_vlans(struct sja1105_private *priv, - struct sja1105_vlan_lookup_entry *new_vlan, - struct sja1105_retagging_entry *new_retagging, - int num_retagging) -{ - struct sja1105_retagging_entry *retagging; - struct sja1105_vlan_lookup_entry *vlan; - struct sja1105_table *table; - int num_vlans = 0; - int rc, i, k = 0; - - /* VLAN table */ - table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP]; - vlan = table->entries; - - for (i = 0; i < VLAN_N_VID; i++) { - int match = sja1105_is_vlan_configured(priv, i); - - if (new_vlan[i].vlanid != VLAN_N_VID) - num_vlans++; - - if (new_vlan[i].vlanid == VLAN_N_VID && match >= 0) { - /* Was there before, no longer is. Delete */ - dev_dbg(priv->ds->dev, "Deleting VLAN %d\n", i); - rc = sja1105_dynamic_config_write(priv, - BLK_IDX_VLAN_LOOKUP, - i, &vlan[match], false); - if (rc < 0) - return rc; - } else if (new_vlan[i].vlanid != VLAN_N_VID) { - /* Nothing changed, don't do anything */ - if (match >= 0 && - vlan[match].vlanid == new_vlan[i].vlanid && - vlan[match].tag_port == new_vlan[i].tag_port && - vlan[match].vlan_bc == new_vlan[i].vlan_bc && - vlan[match].vmemb_port == new_vlan[i].vmemb_port) - continue; - /* Update entry */ - dev_dbg(priv->ds->dev, "Updating VLAN %d\n", i); - rc = sja1105_dynamic_config_write(priv, - BLK_IDX_VLAN_LOOKUP, - i, &new_vlan[i], - true); - if (rc < 0) - return rc; - } - } - - if (table->entry_count) - kfree(table->entries); - - table->entries = kcalloc(num_vlans, table->ops->unpacked_entry_size, - GFP_KERNEL); - if (!table->entries) - return -ENOMEM; - - table->entry_count = num_vlans; - vlan = table->entries; - - for (i = 0; i < VLAN_N_VID; i++) { - if (new_vlan[i].vlanid == VLAN_N_VID) - continue; - vlan[k++] = new_vlan[i]; - } - - /* VLAN Retagging Table */ - table = &priv->static_config.tables[BLK_IDX_RETAGGING]; - retagging = table->entries; - - for (i = 0; i < table->entry_count; i++) { - rc = sja1105_dynamic_config_write(priv, BLK_IDX_RETAGGING, - i, &retagging[i], false); - if (rc) - return rc; - } - - if (table->entry_count) - kfree(table->entries); - - table->entries = kcalloc(num_retagging, table->ops->unpacked_entry_size, - GFP_KERNEL); - if (!table->entries) - return -ENOMEM; - - table->entry_count = num_retagging; - retagging = table->entries; - - for (i = 0; i < num_retagging; i++) { - retagging[i] = new_retagging[i]; - - /* Update entry */ - rc = sja1105_dynamic_config_write(priv, BLK_IDX_RETAGGING, - i, &retagging[i], true); - if (rc < 0) - return rc; - } - - return 0; -} - -struct sja1105_crosschip_vlan { - struct list_head list; - u16 vid; - bool untagged; - int port; - int other_port; - struct dsa_8021q_context *other_ctx; -}; - -struct sja1105_crosschip_switch { - struct list_head list; - struct dsa_8021q_context *other_ctx; -}; - -static int sja1105_commit_pvid(struct sja1105_private *priv) -{ - struct sja1105_bridge_vlan *v; - struct list_head *vlan_list; - int rc = 0; - - if (priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) - vlan_list = &priv->bridge_vlans; - else - vlan_list = &priv->dsa_8021q_vlans; - - list_for_each_entry(v, vlan_list, list) { - if (v->pvid) { - rc = sja1105_pvid_apply(priv, v->port, v->vid); - if (rc) - break; - } - } - - return rc; -} - -static int -sja1105_build_bridge_vlans(struct sja1105_private *priv, - struct sja1105_vlan_lookup_entry *new_vlan) -{ - struct sja1105_bridge_vlan *v; - - if (priv->vlan_state == SJA1105_VLAN_UNAWARE) - return 0; - - list_for_each_entry(v, &priv->bridge_vlans, list) { - int match = v->vid; - - new_vlan[match].vlanid = v->vid; - new_vlan[match].vmemb_port |= BIT(v->port); - new_vlan[match].vlan_bc |= BIT(v->port); - if (!v->untagged) - new_vlan[match].tag_port |= BIT(v->port); - new_vlan[match].type_entry = SJA1110_VLAN_D_TAG; - } - - return 0; -} - -static int -sja1105_build_dsa_8021q_vlans(struct sja1105_private *priv, - struct sja1105_vlan_lookup_entry *new_vlan) -{ - struct sja1105_bridge_vlan *v; - - if (priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) - return 0; - - list_for_each_entry(v, &priv->dsa_8021q_vlans, list) { - int match = v->vid; - - new_vlan[match].vlanid = v->vid; - new_vlan[match].vmemb_port |= BIT(v->port); - new_vlan[match].vlan_bc |= BIT(v->port); - if (!v->untagged) - new_vlan[match].tag_port |= BIT(v->port); - new_vlan[match].type_entry = SJA1110_VLAN_D_TAG; - } - - return 0; -} - -static int sja1105_build_subvlans(struct sja1105_private *priv, - u16 subvlan_map[][DSA_8021Q_N_SUBVLAN], - struct sja1105_vlan_lookup_entry *new_vlan, - struct sja1105_retagging_entry *new_retagging, - int *num_retagging) -{ - struct sja1105_bridge_vlan *v; - int k = *num_retagging; - - if (priv->vlan_state != SJA1105_VLAN_BEST_EFFORT) - return 0; - - list_for_each_entry(v, &priv->bridge_vlans, list) { - int upstream = dsa_upstream_port(priv->ds, v->port); - int match, subvlan; - u16 rx_vid; - - /* Only sub-VLANs on user ports need to be applied. - * Bridge VLANs also include VLANs added automatically - * by DSA on the CPU port. - */ - if (!dsa_is_user_port(priv->ds, v->port)) - continue; - - subvlan = sja1105_find_subvlan(subvlan_map[v->port], - v->vid); - if (subvlan < 0) { - subvlan = sja1105_find_free_subvlan(subvlan_map[v->port], - v->pvid); - if (subvlan < 0) { - dev_err(priv->ds->dev, "No more free subvlans\n"); - return -ENOSPC; - } - } - - rx_vid = dsa_8021q_rx_vid_subvlan(priv->ds, v->port, subvlan); - - /* @v->vid on @v->port needs to be retagged to @rx_vid - * on @upstream. Assume @v->vid on @v->port and on - * @upstream was already configured by the previous - * iteration over bridge_vlans. - */ - match = rx_vid; - new_vlan[match].vlanid = rx_vid; - new_vlan[match].vmemb_port |= BIT(v->port); - new_vlan[match].vmemb_port |= BIT(upstream); - new_vlan[match].vlan_bc |= BIT(v->port); - new_vlan[match].vlan_bc |= BIT(upstream); - /* The "untagged" flag is set the same as for the - * original VLAN - */ - if (!v->untagged) - new_vlan[match].tag_port |= BIT(v->port); - /* But it's always tagged towards the CPU */ - new_vlan[match].tag_port |= BIT(upstream); - new_vlan[match].type_entry = SJA1110_VLAN_D_TAG; - - /* The Retagging Table generates packet *clones* with - * the new VLAN. This is a very odd hardware quirk - * which we need to suppress by dropping the original - * packet. - * Deny egress of the original VLAN towards the CPU - * port. This will force the switch to drop it, and - * we'll see only the retagged packets. - */ - match = v->vid; - new_vlan[match].vlan_bc &= ~BIT(upstream); - - /* And the retagging itself */ - new_retagging[k].vlan_ing = v->vid; - new_retagging[k].vlan_egr = rx_vid; - new_retagging[k].ing_port = BIT(v->port); - new_retagging[k].egr_port = BIT(upstream); - if (k++ == SJA1105_MAX_RETAGGING_COUNT) { - dev_err(priv->ds->dev, "No more retagging rules\n"); - return -ENOSPC; - } - - subvlan_map[v->port][subvlan] = v->vid; - } - - *num_retagging = k; - - return 0; -} - -/* Sadly, in crosschip scenarios where the CPU port is also the link to another - * switch, we should retag backwards (the dsa_8021q vid to the original vid) on - * the CPU port of neighbour switches. - */ -static int -sja1105_build_crosschip_subvlans(struct sja1105_private *priv, - struct sja1105_vlan_lookup_entry *new_vlan, - struct sja1105_retagging_entry *new_retagging, - int *num_retagging) -{ - struct sja1105_crosschip_vlan *tmp, *pos; - struct dsa_8021q_crosschip_link *c; - struct sja1105_bridge_vlan *v, *w; - struct list_head crosschip_vlans; - int k = *num_retagging; - int rc = 0; - - if (priv->vlan_state != SJA1105_VLAN_BEST_EFFORT) - return 0; - - INIT_LIST_HEAD(&crosschip_vlans); - - list_for_each_entry(c, &priv->dsa_8021q_ctx->crosschip_links, list) { - struct sja1105_private *other_priv = c->other_ctx->ds->priv; - - if (other_priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) - continue; - - /* Crosschip links are also added to the CPU ports. - * Ignore those. - */ - if (!dsa_is_user_port(priv->ds, c->port)) - continue; - if (!dsa_is_user_port(c->other_ctx->ds, c->other_port)) - continue; - - /* Search for VLANs on the remote port */ - list_for_each_entry(v, &other_priv->bridge_vlans, list) { - bool already_added = false; - bool we_have_it = false; - - if (v->port != c->other_port) - continue; - - /* If @v is a pvid on @other_ds, it does not need - * re-retagging, because its SVL field is 0 and we - * already allow that, via the dsa_8021q crosschip - * links. - */ - if (v->pvid) - continue; - - /* Search for the VLAN on our local port */ - list_for_each_entry(w, &priv->bridge_vlans, list) { - if (w->port == c->port && w->vid == v->vid) { - we_have_it = true; - break; - } - } - - if (!we_have_it) - continue; - - list_for_each_entry(tmp, &crosschip_vlans, list) { - if (tmp->vid == v->vid && - tmp->untagged == v->untagged && - tmp->port == c->port && - tmp->other_port == v->port && - tmp->other_ctx == c->other_ctx) { - already_added = true; - break; - } - } - - if (already_added) - continue; - - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) { - dev_err(priv->ds->dev, "Failed to allocate memory\n"); - rc = -ENOMEM; - goto out; - } - tmp->vid = v->vid; - tmp->port = c->port; - tmp->other_port = v->port; - tmp->other_ctx = c->other_ctx; - tmp->untagged = v->untagged; - list_add(&tmp->list, &crosschip_vlans); - } - } - - list_for_each_entry(tmp, &crosschip_vlans, list) { - struct sja1105_private *other_priv = tmp->other_ctx->ds->priv; - int upstream = dsa_upstream_port(priv->ds, tmp->port); - int match, subvlan; - u16 rx_vid; - - subvlan = sja1105_find_committed_subvlan(other_priv, - tmp->other_port, - tmp->vid); - /* If this happens, it's a bug. The neighbour switch does not - * have a subvlan for tmp->vid on tmp->other_port, but it - * should, since we already checked for its vlan_state. - */ - if (WARN_ON(subvlan < 0)) { - rc = -EINVAL; - goto out; - } - - rx_vid = dsa_8021q_rx_vid_subvlan(tmp->other_ctx->ds, - tmp->other_port, - subvlan); - - /* The @rx_vid retagged from @tmp->vid on - * {@tmp->other_ds, @tmp->other_port} needs to be - * re-retagged to @tmp->vid on the way back to us. - * - * Assume the original @tmp->vid is already configured - * on this local switch, otherwise we wouldn't be - * retagging its subvlan on the other switch in the - * first place. We just need to add a reverse retagging - * rule for @rx_vid and install @rx_vid on our ports. - */ - match = rx_vid; - new_vlan[match].vlanid = rx_vid; - new_vlan[match].vmemb_port |= BIT(tmp->port); - new_vlan[match].vmemb_port |= BIT(upstream); - /* The "untagged" flag is set the same as for the - * original VLAN. And towards the CPU, it doesn't - * really matter, because @rx_vid will only receive - * traffic on that port. For consistency with other dsa_8021q - * VLANs, we'll keep the CPU port tagged. - */ - if (!tmp->untagged) - new_vlan[match].tag_port |= BIT(tmp->port); - new_vlan[match].tag_port |= BIT(upstream); - new_vlan[match].type_entry = SJA1110_VLAN_D_TAG; - /* Deny egress of @rx_vid towards our front-panel port. - * This will force the switch to drop it, and we'll see - * only the re-retagged packets (having the original, - * pre-initial-retagging, VLAN @tmp->vid). - */ - new_vlan[match].vlan_bc &= ~BIT(tmp->port); - - /* On reverse retagging, the same ingress VLAN goes to multiple - * ports. So we have an opportunity to create composite rules - * to not waste the limited space in the retagging table. - */ - k = sja1105_find_retagging_entry(new_retagging, *num_retagging, - upstream, rx_vid, tmp->vid); - if (k < 0) { - if (*num_retagging == SJA1105_MAX_RETAGGING_COUNT) { - dev_err(priv->ds->dev, "No more retagging rules\n"); - rc = -ENOSPC; - goto out; - } - k = (*num_retagging)++; - } - /* And the retagging itself */ - new_retagging[k].vlan_ing = rx_vid; - new_retagging[k].vlan_egr = tmp->vid; - new_retagging[k].ing_port = BIT(upstream); - new_retagging[k].egr_port |= BIT(tmp->port); - } - -out: - list_for_each_entry_safe(tmp, pos, &crosschip_vlans, list) { - list_del(&tmp->list); - kfree(tmp); - } - - return rc; -} - -static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify); - -static int sja1105_notify_crosschip_switches(struct sja1105_private *priv) -{ - struct sja1105_crosschip_switch *s, *pos; - struct list_head crosschip_switches; - struct dsa_8021q_crosschip_link *c; - int rc = 0; - - INIT_LIST_HEAD(&crosschip_switches); - - list_for_each_entry(c, &priv->dsa_8021q_ctx->crosschip_links, list) { - bool already_added = false; - - list_for_each_entry(s, &crosschip_switches, list) { - if (s->other_ctx == c->other_ctx) { - already_added = true; - break; - } - } - - if (already_added) - continue; - - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) { - dev_err(priv->ds->dev, "Failed to allocate memory\n"); - rc = -ENOMEM; - goto out; - } - s->other_ctx = c->other_ctx; - list_add(&s->list, &crosschip_switches); - } - - list_for_each_entry(s, &crosschip_switches, list) { - struct sja1105_private *other_priv = s->other_ctx->ds->priv; - - rc = sja1105_build_vlan_table(other_priv, false); - if (rc) - goto out; - } - -out: - list_for_each_entry_safe(s, pos, &crosschip_switches, list) { - list_del(&s->list); - kfree(s); - } - - return rc; -} - -static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify) -{ - u16 subvlan_map[SJA1105_MAX_NUM_PORTS][DSA_8021Q_N_SUBVLAN]; - struct sja1105_retagging_entry *new_retagging; - struct sja1105_vlan_lookup_entry *new_vlan; - struct sja1105_table *table; - int i, num_retagging = 0; - int rc; - - table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP]; - new_vlan = kcalloc(VLAN_N_VID, - table->ops->unpacked_entry_size, GFP_KERNEL); - if (!new_vlan) - return -ENOMEM; - - table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP]; - new_retagging = kcalloc(SJA1105_MAX_RETAGGING_COUNT, - table->ops->unpacked_entry_size, GFP_KERNEL); - if (!new_retagging) { - kfree(new_vlan); - return -ENOMEM; - } - - for (i = 0; i < VLAN_N_VID; i++) - new_vlan[i].vlanid = VLAN_N_VID; - - for (i = 0; i < SJA1105_MAX_RETAGGING_COUNT; i++) - new_retagging[i].vlan_ing = VLAN_N_VID; - - for (i = 0; i < priv->ds->num_ports; i++) - sja1105_init_subvlan_map(subvlan_map[i]); - - /* Bridge VLANs */ - rc = sja1105_build_bridge_vlans(priv, new_vlan); - if (rc) - goto out; - - /* VLANs necessary for dsa_8021q operation, given to us by tag_8021q.c: - * - RX VLANs - * - TX VLANs - * - Crosschip links - */ - rc = sja1105_build_dsa_8021q_vlans(priv, new_vlan); - if (rc) - goto out; - - /* Private VLANs necessary for dsa_8021q operation, which we need to - * determine on our own: - * - Sub-VLANs - * - Sub-VLANs of crosschip switches - */ - rc = sja1105_build_subvlans(priv, subvlan_map, new_vlan, new_retagging, - &num_retagging); - if (rc) - goto out; - - rc = sja1105_build_crosschip_subvlans(priv, new_vlan, new_retagging, - &num_retagging); - if (rc) - goto out; - - rc = sja1105_commit_vlans(priv, new_vlan, new_retagging, num_retagging); - if (rc) - goto out; - - rc = sja1105_commit_pvid(priv); - if (rc) - goto out; - - for (i = 0; i < priv->ds->num_ports; i++) - sja1105_commit_subvlan_map(priv, i, subvlan_map[i]); - - if (notify) { - rc = sja1105_notify_crosschip_switches(priv); - if (rc) - goto out; - } - -out: - kfree(new_vlan); - kfree(new_retagging); - - return rc; -} - /* The TPID setting belongs to the General Parameters table, * which can only be partially reconfigured at runtime (and not the TPID). * So a switch reset is required. @@ -2764,10 +2034,8 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, struct sja1105_l2_lookup_params_entry *l2_lookup_params; struct sja1105_general_params_entry *general_params; struct sja1105_private *priv = ds->priv; - enum sja1105_vlan_state state; struct sja1105_table *table; struct sja1105_rule *rule; - bool want_tagging; u16 tpid, tpid2; int rc; @@ -2798,19 +2066,10 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, sp->xmit_tpid = ETH_P_SJA1105; } - if (!enabled) - state = SJA1105_VLAN_UNAWARE; - else if (priv->best_effort_vlan_filtering) - state = SJA1105_VLAN_BEST_EFFORT; - else - state = SJA1105_VLAN_FILTERING_FULL; - - if (priv->vlan_state == state) + if (priv->vlan_aware == enabled) return 0; - priv->vlan_state = state; - want_tagging = (state == SJA1105_VLAN_UNAWARE || - state == SJA1105_VLAN_BEST_EFFORT); + priv->vlan_aware = enabled; table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS]; general_params = table->entries; @@ -2824,8 +2083,6 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, general_params->incl_srcpt1 = enabled; general_params->incl_srcpt0 = enabled; - want_tagging = priv->best_effort_vlan_filtering || !enabled; - /* VLAN filtering => independent VLAN learning. * No VLAN filtering (or best effort) => shared VLAN learning. * @@ -2846,132 +2103,135 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, */ table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS]; l2_lookup_params = table->entries; - l2_lookup_params->shared_learn = want_tagging; + l2_lookup_params->shared_learn = !priv->vlan_aware; - sja1105_frame_memory_partitioning(priv); + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_unused_port(ds, port)) + continue; - rc = sja1105_build_vlan_table(priv, false); - if (rc) - return rc; + rc = sja1105_commit_pvid(ds, port); + if (rc) + return rc; + } rc = sja1105_static_config_reload(priv, SJA1105_VLAN_FILTERING); if (rc) NL_SET_ERR_MSG_MOD(extack, "Failed to change VLAN Ethertype"); - /* Switch port identification based on 802.1Q is only passable - * if we are not under a vlan_filtering bridge. So make sure - * the two configurations are mutually exclusive (of course, the - * user may know better, i.e. best_effort_vlan_filtering). - */ - return sja1105_setup_8021q_tagging(ds, want_tagging); + return rc; } -/* Returns number of VLANs added (0 or 1) on success, - * or a negative error code. - */ -static int sja1105_vlan_add_one(struct dsa_switch *ds, int port, u16 vid, - u16 flags, struct list_head *vlan_list) -{ - bool untagged = flags & BRIDGE_VLAN_INFO_UNTAGGED; - bool pvid = flags & BRIDGE_VLAN_INFO_PVID; - struct sja1105_bridge_vlan *v; - - list_for_each_entry(v, vlan_list, list) { - if (v->port == port && v->vid == vid) { - /* Already added */ - if (v->untagged == untagged && v->pvid == pvid) - /* Nothing changed */ - return 0; - - /* It's the same VLAN, but some of the flags changed - * and the user did not bother to delete it first. - * Update it and trigger sja1105_build_vlan_table. - */ - v->untagged = untagged; - v->pvid = pvid; - return 1; - } - } +static int sja1105_vlan_add(struct sja1105_private *priv, int port, u16 vid, + u16 flags) +{ + struct sja1105_vlan_lookup_entry *vlan; + struct sja1105_table *table; + int match, rc; - v = kzalloc(sizeof(*v), GFP_KERNEL); - if (!v) { - dev_err(ds->dev, "Out of memory while storing VLAN\n"); - return -ENOMEM; + table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP]; + + match = sja1105_is_vlan_configured(priv, vid); + if (match < 0) { + rc = sja1105_table_resize(table, table->entry_count + 1); + if (rc) + return rc; + match = table->entry_count - 1; } - v->port = port; - v->vid = vid; - v->untagged = untagged; - v->pvid = pvid; - list_add(&v->list, vlan_list); + /* Assign pointer after the resize (it's new memory) */ + vlan = table->entries; - return 1; + vlan[match].type_entry = SJA1110_VLAN_D_TAG; + vlan[match].vlanid = vid; + vlan[match].vlan_bc |= BIT(port); + vlan[match].vmemb_port |= BIT(port); + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) + vlan[match].tag_port &= ~BIT(port); + else + vlan[match].tag_port |= BIT(port); + + return sja1105_dynamic_config_write(priv, BLK_IDX_VLAN_LOOKUP, vid, + &vlan[match], true); } -/* Returns number of VLANs deleted (0 or 1) */ -static int sja1105_vlan_del_one(struct dsa_switch *ds, int port, u16 vid, - struct list_head *vlan_list) +static int sja1105_vlan_del(struct sja1105_private *priv, int port, u16 vid) { - struct sja1105_bridge_vlan *v, *n; + struct sja1105_vlan_lookup_entry *vlan; + struct sja1105_table *table; + bool keep = true; + int match, rc; - list_for_each_entry_safe(v, n, vlan_list, list) { - if (v->port == port && v->vid == vid) { - list_del(&v->list); - kfree(v); - return 1; - } - } + table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP]; + + match = sja1105_is_vlan_configured(priv, vid); + /* Can't delete a missing entry. */ + if (match < 0) + return 0; + + /* Assign pointer after the resize (it's new memory) */ + vlan = table->entries; + + vlan[match].vlanid = vid; + vlan[match].vlan_bc &= ~BIT(port); + vlan[match].vmemb_port &= ~BIT(port); + /* Also unset tag_port, just so we don't have a confusing bitmap + * (no practical purpose). + */ + vlan[match].tag_port &= ~BIT(port); + + /* If there's no port left as member of this VLAN, + * it's time for it to go. + */ + if (!vlan[match].vmemb_port) + keep = false; + + rc = sja1105_dynamic_config_write(priv, BLK_IDX_VLAN_LOOKUP, vid, + &vlan[match], keep); + if (rc < 0) + return rc; + + if (!keep) + return sja1105_table_delete_entry(table, match); return 0; } -static int sja1105_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct netlink_ext_ack *extack) +static int sja1105_bridge_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) { struct sja1105_private *priv = ds->priv; - bool vlan_table_changed = false; + u16 flags = vlan->flags; int rc; - /* If the user wants best-effort VLAN filtering (aka vlan_filtering - * bridge plus tagging), be sure to at least deny alterations to the - * configuration done by dsa_8021q. + /* Be sure to deny alterations to the configuration done by tag_8021q. */ - if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL && - vid_is_dsa_8021q(vlan->vid)) { + if (vid_is_dsa_8021q(vlan->vid)) { NL_SET_ERR_MSG_MOD(extack, "Range 1024-3071 reserved for dsa_8021q operation"); return -EBUSY; } - rc = sja1105_vlan_add_one(ds, port, vlan->vid, vlan->flags, - &priv->bridge_vlans); - if (rc < 0) + /* Always install bridge VLANs as egress-tagged on the CPU port. */ + if (dsa_is_cpu_port(ds, port)) + flags = 0; + + rc = sja1105_vlan_add(priv, port, vlan->vid, flags); + if (rc) return rc; - if (rc > 0) - vlan_table_changed = true; - if (!vlan_table_changed) - return 0; + if (vlan->flags & BRIDGE_VLAN_INFO_PVID) + priv->bridge_pvid[port] = vlan->vid; - return sja1105_build_vlan_table(priv, true); + return sja1105_commit_pvid(ds, port); } -static int sja1105_vlan_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +static int sja1105_bridge_vlan_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { struct sja1105_private *priv = ds->priv; - bool vlan_table_changed = false; - int rc; - rc = sja1105_vlan_del_one(ds, port, vlan->vid, &priv->bridge_vlans); - if (rc > 0) - vlan_table_changed = true; - - if (!vlan_table_changed) - return 0; - - return sja1105_build_vlan_table(priv, true); + return sja1105_vlan_del(priv, port, vlan->vid); } static int sja1105_dsa_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, @@ -2980,29 +2240,49 @@ static int sja1105_dsa_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, struct sja1105_private *priv = ds->priv; int rc; - rc = sja1105_vlan_add_one(ds, port, vid, flags, &priv->dsa_8021q_vlans); - if (rc <= 0) + rc = sja1105_vlan_add(priv, port, vid, flags); + if (rc) return rc; - return sja1105_build_vlan_table(priv, true); + if (flags & BRIDGE_VLAN_INFO_PVID) + priv->tag_8021q_pvid[port] = vid; + + return sja1105_commit_pvid(ds, port); } static int sja1105_dsa_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) { struct sja1105_private *priv = ds->priv; - int rc; - rc = sja1105_vlan_del_one(ds, port, vid, &priv->dsa_8021q_vlans); - if (!rc) - return 0; - - return sja1105_build_vlan_table(priv, true); + return sja1105_vlan_del(priv, port, vid); } -static const struct dsa_8021q_ops sja1105_dsa_8021q_ops = { - .vlan_add = sja1105_dsa_8021q_vlan_add, - .vlan_del = sja1105_dsa_8021q_vlan_del, -}; +static int sja1105_prechangeupper(struct dsa_switch *ds, int port, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack = info->info.extack; + struct net_device *upper = info->upper_dev; + struct dsa_switch_tree *dst = ds->dst; + struct dsa_port *dp; + + if (is_vlan_dev(upper)) { + NL_SET_ERR_MSG_MOD(extack, "8021q uppers are not supported"); + return -EBUSY; + } + + if (netif_is_bridge_master(upper)) { + list_for_each_entry(dp, &dst->ports, list) { + if (dp->bridge_dev && dp->bridge_dev != upper && + br_vlan_enabled(dp->bridge_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Only one VLAN-aware bridge is supported"); + return -EBUSY; + } + } + } + + return 0; +} /* The programming model for the SJA1105 switch is "all-at-once" via static * configuration tables. Some of these can be dynamically modified at runtime, @@ -3086,24 +2366,21 @@ static int sja1105_setup(struct dsa_switch *ds) * TPID is ETH_P_SJA1105, and the VLAN ID is the port pvid. */ ds->vlan_filtering_is_global = true; + ds->untag_bridge_pvid = true; + /* tag_8021q has 3 bits for the VBID, and the value 0 is reserved */ + ds->num_fwd_offloading_bridges = 7; /* Advertise the 8 egress queues */ ds->num_tx_queues = SJA1105_NUM_TC; ds->mtu_enforcement_ingress = true; - priv->best_effort_vlan_filtering = true; - rc = sja1105_devlink_setup(ds); if (rc < 0) goto out_static_config_free; - /* The DSA/switchdev model brings up switch ports in standalone mode by - * default, and that means vlan_filtering is 0 since they're not under - * a bridge, so it's safe to set up switch tagging at this time. - */ rtnl_lock(); - rc = sja1105_setup_8021q_tagging(ds, true); + rc = dsa_tag_8021q_register(ds, htons(ETH_P_8021Q)); rtnl_unlock(); if (rc) goto out_devlink_teardown; @@ -3125,9 +2402,12 @@ out_static_config_free: static void sja1105_teardown(struct dsa_switch *ds) { struct sja1105_private *priv = ds->priv; - struct sja1105_bridge_vlan *v, *n; int port; + rtnl_lock(); + dsa_tag_8021q_unregister(ds); + rtnl_unlock(); + for (port = 0; port < ds->num_ports; port++) { struct sja1105_port *sp = &priv->ports[port]; @@ -3143,16 +2423,6 @@ static void sja1105_teardown(struct dsa_switch *ds) sja1105_tas_teardown(ds); sja1105_ptp_clock_unregister(ds); sja1105_static_config_free(&priv->static_config); - - list_for_each_entry_safe(v, n, &priv->dsa_8021q_vlans, list) { - list_del(&v->list); - kfree(v); - } - - list_for_each_entry_safe(v, n, &priv->bridge_vlans, list) { - list_del(&v->list); - kfree(v); - } } static void sja1105_port_disable(struct dsa_switch *ds, int port) @@ -3592,8 +2862,8 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .port_bridge_flags = sja1105_port_bridge_flags, .port_stp_state_set = sja1105_bridge_stp_state_set, .port_vlan_filtering = sja1105_vlan_filtering, - .port_vlan_add = sja1105_vlan_add, - .port_vlan_del = sja1105_vlan_del, + .port_vlan_add = sja1105_bridge_vlan_add, + .port_vlan_del = sja1105_bridge_vlan_del, .port_mdb_add = sja1105_mdb_add, .port_mdb_del = sja1105_mdb_del, .port_hwtstamp_get = sja1105_hwtstamp_get, @@ -3608,11 +2878,12 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .cls_flower_add = sja1105_cls_flower_add, .cls_flower_del = sja1105_cls_flower_del, .cls_flower_stats = sja1105_cls_flower_stats, - .crosschip_bridge_join = sja1105_crosschip_bridge_join, - .crosschip_bridge_leave = sja1105_crosschip_bridge_leave, - .devlink_param_get = sja1105_devlink_param_get, - .devlink_param_set = sja1105_devlink_param_set, .devlink_info_get = sja1105_devlink_info_get, + .tag_8021q_vlan_add = sja1105_dsa_8021q_vlan_add, + .tag_8021q_vlan_del = sja1105_dsa_8021q_vlan_del, + .port_prechangeupper = sja1105_prechangeupper, + .port_bridge_tx_fwd_offload = dsa_tag_8021q_bridge_tx_fwd_offload, + .port_bridge_tx_fwd_unoffload = dsa_tag_8021q_bridge_tx_fwd_unoffload, }; static const struct of_device_id sja1105_dt_ids[]; @@ -3756,19 +3027,6 @@ static int sja1105_probe(struct spi_device *spi) mutex_init(&priv->ptp_data.lock); mutex_init(&priv->mgmt_lock); - priv->dsa_8021q_ctx = devm_kzalloc(dev, sizeof(*priv->dsa_8021q_ctx), - GFP_KERNEL); - if (!priv->dsa_8021q_ctx) - return -ENOMEM; - - priv->dsa_8021q_ctx->ops = &sja1105_dsa_8021q_ops; - priv->dsa_8021q_ctx->proto = htons(ETH_P_8021Q); - priv->dsa_8021q_ctx->ds = ds; - - INIT_LIST_HEAD(&priv->dsa_8021q_ctx->crosschip_links); - INIT_LIST_HEAD(&priv->bridge_vlans); - INIT_LIST_HEAD(&priv->dsa_8021q_vlans); - sja1105_tas_setup(ds); sja1105_flower_setup(ds); @@ -3791,7 +3049,6 @@ static int sja1105_probe(struct spi_device *spi) struct sja1105_port *sp = &priv->ports[port]; struct dsa_port *dp = dsa_to_port(ds, port); struct net_device *slave; - int subvlan; if (!dsa_is_user_port(ds, port)) continue; @@ -3812,9 +3069,6 @@ static int sja1105_probe(struct spi_device *spi) } skb_queue_head_init(&sp->xmit_queue); sp->xmit_tpid = ETH_P_SJA1105; - - for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) - sp->subvlan_map[subvlan] = VLAN_N_VID; } return 0; @@ -3838,8 +3092,10 @@ out_unregister_switch: static int sja1105_remove(struct spi_device *spi) { struct sja1105_private *priv = spi_get_drvdata(spi); + struct dsa_switch *ds = priv->ds; + + dsa_unregister_switch(ds); - dsa_unregister_switch(priv->ds); return 0; } diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c index f6e13e6c6a18..ec7b65daec20 100644 --- a/drivers/net/dsa/sja1105/sja1105_vl.c +++ b/drivers/net/dsa/sja1105/sja1105_vl.c @@ -496,14 +496,11 @@ int sja1105_vl_redirect(struct sja1105_private *priv, int port, struct sja1105_rule *rule = sja1105_rule_find(priv, cookie); int rc; - if (priv->vlan_state == SJA1105_VLAN_UNAWARE && - key->type != SJA1105_KEY_VLAN_UNAWARE_VL) { + if (!priv->vlan_aware && key->type != SJA1105_KEY_VLAN_UNAWARE_VL) { NL_SET_ERR_MSG_MOD(extack, "Can only redirect based on DMAC"); return -EOPNOTSUPP; - } else if ((priv->vlan_state == SJA1105_VLAN_BEST_EFFORT || - priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) && - key->type != SJA1105_KEY_VLAN_AWARE_VL) { + } else if (priv->vlan_aware && key->type != SJA1105_KEY_VLAN_AWARE_VL) { NL_SET_ERR_MSG_MOD(extack, "Can only redirect based on {DMAC, VID, PCP}"); return -EOPNOTSUPP; @@ -595,14 +592,11 @@ int sja1105_vl_gate(struct sja1105_private *priv, int port, return -ERANGE; } - if (priv->vlan_state == SJA1105_VLAN_UNAWARE && - key->type != SJA1105_KEY_VLAN_UNAWARE_VL) { + if (!priv->vlan_aware && key->type != SJA1105_KEY_VLAN_UNAWARE_VL) { NL_SET_ERR_MSG_MOD(extack, "Can only gate based on DMAC"); return -EOPNOTSUPP; - } else if ((priv->vlan_state == SJA1105_VLAN_BEST_EFFORT || - priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) && - key->type != SJA1105_KEY_VLAN_AWARE_VL) { + } else if (priv->vlan_aware && key->type != SJA1105_KEY_VLAN_AWARE_VL) { NL_SET_ERR_MSG_MOD(extack, "Can only gate based on {DMAC, VID, PCP}"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 27943b0446c2..f255fd0b16db 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -1858,7 +1858,6 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp) { int i; int first_queue_query_index, num_queues_req; - dma_addr_t cur_data_offset; struct stats_query_entry *cur_query_entry; u8 stats_count = 0; bool is_fcoe = false; @@ -1879,10 +1878,6 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp) BNX2X_NUM_ETH_QUEUES(bp), is_fcoe, first_queue_query_index, first_queue_query_index + num_queues_req); - cur_data_offset = bp->fw_stats_data_mapping + - offsetof(struct bnx2x_fw_stats_data, queue_stats) + - num_queues_req * sizeof(struct per_queue_stats); - cur_query_entry = &bp->fw_stats_req-> query[first_queue_query_index + num_queues_req]; @@ -1933,7 +1928,6 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp) cur_query_entry->funcID, j, cur_query_entry->index); cur_query_entry++; - cur_data_offset += sizeof(struct per_queue_stats); stats_count++; /* all stats are coalesced to the leading queue */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 68b78642c045..c233e8786e19 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -1889,8 +1889,12 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev, return notifier_from_errno(err); } +static struct notifier_block dpaa2_switch_port_switchdev_nb; +static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb; + static int dpaa2_switch_port_bridge_join(struct net_device *netdev, - struct net_device *upper_dev) + struct net_device *upper_dev, + struct netlink_ext_ack *extack) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); struct ethsw_core *ethsw = port_priv->ethsw_data; @@ -1906,8 +1910,8 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev, other_port_priv = netdev_priv(other_dev); if (other_port_priv->ethsw_data != port_priv->ethsw_data) { - netdev_err(netdev, - "Interface from a different DPSW is in the bridge already!\n"); + NL_SET_ERR_MSG_MOD(extack, + "Interface from a different DPSW is in the bridge already"); return -EINVAL; } } @@ -1929,8 +1933,16 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev, if (err) goto err_egress_flood; + err = switchdev_bridge_port_offload(netdev, netdev, NULL, + &dpaa2_switch_port_switchdev_nb, + &dpaa2_switch_port_switchdev_blocking_nb, + false, extack); + if (err) + goto err_switchdev_offload; + return 0; +err_switchdev_offload: err_egress_flood: dpaa2_switch_port_set_fdb(port_priv, NULL); return err; @@ -1956,6 +1968,13 @@ static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, vo return dpaa2_switch_port_vlan_add(arg, vlan_proto, vid); } +static void dpaa2_switch_port_pre_bridge_leave(struct net_device *netdev) +{ + switchdev_bridge_port_unoffload(netdev, NULL, + &dpaa2_switch_port_switchdev_nb, + &dpaa2_switch_port_switchdev_blocking_nb); +} + static int dpaa2_switch_port_bridge_leave(struct net_device *netdev) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); @@ -2029,6 +2048,28 @@ static int dpaa2_switch_prevent_bridging_with_8021q_upper(struct net_device *net return 0; } +static int +dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev, + struct net_device *upper_dev, + struct netlink_ext_ack *extack) +{ + int err; + + if (!br_vlan_enabled(upper_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge"); + return -EOPNOTSUPP; + } + + err = dpaa2_switch_prevent_bridging_with_8021q_upper(netdev); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot join a bridge while VLAN uppers are present"); + return 0; + } + + return 0; +} + static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -2049,25 +2090,23 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb, if (!netif_is_bridge_master(upper_dev)) break; - if (!br_vlan_enabled(upper_dev)) { - NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge"); - err = -EOPNOTSUPP; + err = dpaa2_switch_prechangeupper_sanity_checks(netdev, + upper_dev, + extack); + if (err) goto out; - } - err = dpaa2_switch_prevent_bridging_with_8021q_upper(netdev); - if (err) { - NL_SET_ERR_MSG_MOD(extack, - "Cannot join a bridge while VLAN uppers are present"); - goto out; - } + if (!info->linking) + dpaa2_switch_port_pre_bridge_leave(netdev); break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; if (netif_is_bridge_master(upper_dev)) { if (info->linking) - err = dpaa2_switch_port_bridge_join(netdev, upper_dev); + err = dpaa2_switch_port_bridge_join(netdev, + upper_dev, + extack); else err = dpaa2_switch_port_bridge_leave(netdev); } diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 5bb56b454541..f089d33dd48e 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -322,7 +322,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, tail = ioread32be(&priv->reg_bar0->adminq_event_counter); // Check if next command will overflow the buffer. - if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) { + if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == + (tail & priv->adminq_mask)) { int err; // Flush existing commands to make room. @@ -332,7 +333,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, // Retry. tail = ioread32be(&priv->reg_bar0->adminq_event_counter); - if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) { + if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == + (tail & priv->adminq_mask)) { // This should never happen. We just flushed the // command queue so there should be enough space. return -ENOMEM; diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index bb062b02fb85..094e4a37a295 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -90,6 +90,7 @@ config HNS_ENET config HNS3 tristate "Hisilicon Network Subsystem Support HNS3 (Framework)" depends on PCI + select NET_DEVLINK help This selects the framework support for Hisilicon Network Subsystem 3. This layer facilitates clients like ENET, RoCE and user-space ethernet diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile index a685392dbfe9..d1bf5c4c0abb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile @@ -7,6 +7,6 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 ccflags-y += -I $(srctree)/$(src) obj-$(CONFIG_HNS3_HCLGE) += hclge.o -hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o hclge_debugfs.o hclge_ptp.o +hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o hclge_debugfs.o hclge_ptp.o hclge_devlink.o hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c new file mode 100644 index 000000000000..06d29945d4e1 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2021 Hisilicon Limited. */ + +#include <net/devlink.h> + +#include "hclge_devlink.h" + +static int hclge_devlink_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ +#define HCLGE_DEVLINK_FW_STRING_LEN 32 + struct hclge_devlink_priv *priv = devlink_priv(devlink); + char version_str[HCLGE_DEVLINK_FW_STRING_LEN]; + struct hclge_dev *hdev = priv->hdev; + int ret; + + ret = devlink_info_driver_name_put(req, KBUILD_MODNAME); + if (ret) + return ret; + + snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu", + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK, + HNAE3_FW_VERSION_BYTE3_SHIFT), + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE2_MASK, + HNAE3_FW_VERSION_BYTE2_SHIFT), + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE1_MASK, + HNAE3_FW_VERSION_BYTE1_SHIFT), + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE0_MASK, + HNAE3_FW_VERSION_BYTE0_SHIFT)); + + return devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + version_str); +} + +static int hclge_devlink_reload_down(struct devlink *devlink, bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct hclge_devlink_priv *priv = devlink_priv(devlink); + struct hclge_dev *hdev = priv->hdev; + struct hnae3_handle *h = &hdev->vport->nic; + struct pci_dev *pdev = hdev->pdev; + int ret; + + if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) { + dev_err(&pdev->dev, "reset is handling\n"); + return -EBUSY; + } + + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + rtnl_lock(); + ret = hdev->nic_client->ops->reset_notify(h, HNAE3_DOWN_CLIENT); + if (ret) { + rtnl_unlock(); + return ret; + } + + ret = hdev->nic_client->ops->reset_notify(h, + HNAE3_UNINIT_CLIENT); + rtnl_unlock(); + return ret; + default: + return -EOPNOTSUPP; + } +} + +static int hclge_devlink_reload_up(struct devlink *devlink, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct hclge_devlink_priv *priv = devlink_priv(devlink); + struct hclge_dev *hdev = priv->hdev; + struct hnae3_handle *h = &hdev->vport->nic; + int ret; + + *actions_performed = BIT(action); + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + rtnl_lock(); + ret = hdev->nic_client->ops->reset_notify(h, HNAE3_INIT_CLIENT); + if (ret) { + rtnl_unlock(); + return ret; + } + + ret = hdev->nic_client->ops->reset_notify(h, HNAE3_UP_CLIENT); + rtnl_unlock(); + return ret; + default: + return -EOPNOTSUPP; + } +} + +static const struct devlink_ops hclge_devlink_ops = { + .info_get = hclge_devlink_info_get, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), + .reload_down = hclge_devlink_reload_down, + .reload_up = hclge_devlink_reload_up, +}; + +int hclge_devlink_init(struct hclge_dev *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + struct hclge_devlink_priv *priv; + struct devlink *devlink; + int ret; + + devlink = devlink_alloc(&hclge_devlink_ops, + sizeof(struct hclge_devlink_priv)); + if (!devlink) + return -ENOMEM; + + priv = devlink_priv(devlink); + priv->hdev = hdev; + + ret = devlink_register(devlink, &pdev->dev); + if (ret) { + dev_err(&pdev->dev, "failed to register devlink, ret = %d\n", + ret); + goto out_reg_fail; + } + + hdev->devlink = devlink; + + devlink_reload_enable(devlink); + + return 0; + +out_reg_fail: + devlink_free(devlink); + return ret; +} + +void hclge_devlink_uninit(struct hclge_dev *hdev) +{ + struct devlink *devlink = hdev->devlink; + + if (!devlink) + return; + + devlink_reload_disable(devlink); + + devlink_unregister(devlink); + + devlink_free(devlink); + + hdev->devlink = NULL; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h new file mode 100644 index 000000000000..918be04507a5 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2021 Hisilicon Limited. */ + +#ifndef __HCLGE_DEVLINK_H +#define __HCLGE_DEVLINK_H + +#include "hclge_main.h" + +struct hclge_devlink_priv { + struct hclge_dev *hdev; +}; + +int hclge_devlink_init(struct hclge_dev *hdev); +void hclge_devlink_uninit(struct hclge_dev *hdev); +#endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index ebeaf12e409b..f15d76ec0068 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -23,6 +23,7 @@ #include "hclge_tm.h" #include "hclge_err.h" #include "hnae3.h" +#include "hclge_devlink.h" #define HCLGE_NAME "hclge" #define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset))) @@ -11482,10 +11483,14 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto out; + ret = hclge_devlink_init(hdev); + if (ret) + goto err_pci_uninit; + /* Firmware command queue initialize */ ret = hclge_cmd_queue_init(hdev); if (ret) - goto err_pci_uninit; + goto err_devlink_uninit; /* Firmware command initialize */ ret = hclge_cmd_init(hdev); @@ -11658,6 +11663,8 @@ err_msi_uninit: pci_free_irq_vectors(pdev); err_cmd_uninit: hclge_cmd_uninit(hdev); +err_devlink_uninit: + hclge_devlink_uninit(hdev); err_pci_uninit: pcim_iounmap(pdev, hdev->hw.io_base); pci_clear_master(pdev); @@ -12048,6 +12055,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_cmd_uninit(hdev); hclge_misc_irq_uninit(hdev); + hclge_devlink_uninit(hdev); hclge_pci_uninit(hdev); mutex_destroy(&hdev->vport_lock); hclge_uninit_vport_vlan_table(hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 3d3352491dba..cc31b12904ad 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -8,6 +8,7 @@ #include <linux/phy.h> #include <linux/if_vlan.h> #include <linux/kfifo.h> +#include <net/devlink.h> #include "hclge_cmd.h" #include "hclge_ptp.h" @@ -943,6 +944,7 @@ struct hclge_dev { cpumask_t affinity_mask; struct irq_affinity_notify affinity_notify; struct hclge_ptp *ptp; + struct devlink *devlink; }; /* VPort level vlan tag configuration for TX direction */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile index 2c26ea607a53..51ff7d86ee90 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile @@ -7,4 +7,4 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 ccflags-y += -I $(srctree)/$(src) obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o -hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o +hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o hclgevf_devlink.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c new file mode 100644 index 000000000000..21a45279fd99 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2021 Hisilicon Limited. */ + +#include <net/devlink.h> + +#include "hclgevf_devlink.h" + +static int hclgevf_devlink_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ +#define HCLGEVF_DEVLINK_FW_STRING_LEN 32 + struct hclgevf_devlink_priv *priv = devlink_priv(devlink); + char version_str[HCLGEVF_DEVLINK_FW_STRING_LEN]; + struct hclgevf_dev *hdev = priv->hdev; + int ret; + + ret = devlink_info_driver_name_put(req, KBUILD_MODNAME); + if (ret) + return ret; + + snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu", + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK, + HNAE3_FW_VERSION_BYTE3_SHIFT), + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE2_MASK, + HNAE3_FW_VERSION_BYTE2_SHIFT), + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE1_MASK, + HNAE3_FW_VERSION_BYTE1_SHIFT), + hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE0_MASK, + HNAE3_FW_VERSION_BYTE0_SHIFT)); + + return devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + version_str); +} + +static int hclgevf_devlink_reload_down(struct devlink *devlink, + bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct hclgevf_devlink_priv *priv = devlink_priv(devlink); + struct hclgevf_dev *hdev = priv->hdev; + struct hnae3_handle *h = &hdev->nic; + struct pci_dev *pdev = hdev->pdev; + int ret; + + if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) { + dev_err(&pdev->dev, "reset is handling\n"); + return -EBUSY; + } + + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + rtnl_lock(); + ret = hdev->nic_client->ops->reset_notify(h, HNAE3_DOWN_CLIENT); + if (ret) { + rtnl_unlock(); + return ret; + } + + ret = hdev->nic_client->ops->reset_notify(h, + HNAE3_UNINIT_CLIENT); + rtnl_unlock(); + return ret; + default: + return -EOPNOTSUPP; + } +} + +static int hclgevf_devlink_reload_up(struct devlink *devlink, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct hclgevf_devlink_priv *priv = devlink_priv(devlink); + struct hclgevf_dev *hdev = priv->hdev; + struct hnae3_handle *h = &hdev->nic; + int ret; + + *actions_performed = BIT(action); + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + rtnl_lock(); + ret = hdev->nic_client->ops->reset_notify(h, HNAE3_INIT_CLIENT); + if (ret) { + rtnl_unlock(); + return ret; + } + + ret = hdev->nic_client->ops->reset_notify(h, HNAE3_UP_CLIENT); + rtnl_unlock(); + return ret; + default: + return -EOPNOTSUPP; + } +} + +static const struct devlink_ops hclgevf_devlink_ops = { + .info_get = hclgevf_devlink_info_get, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), + .reload_down = hclgevf_devlink_reload_down, + .reload_up = hclgevf_devlink_reload_up, +}; + +int hclgevf_devlink_init(struct hclgevf_dev *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + struct hclgevf_devlink_priv *priv; + struct devlink *devlink; + int ret; + + devlink = devlink_alloc(&hclgevf_devlink_ops, + sizeof(struct hclgevf_devlink_priv)); + if (!devlink) + return -ENOMEM; + + priv = devlink_priv(devlink); + priv->hdev = hdev; + + ret = devlink_register(devlink, &pdev->dev); + if (ret) { + dev_err(&pdev->dev, "failed to register devlink, ret = %d\n", + ret); + goto out_reg_fail; + } + + hdev->devlink = devlink; + + devlink_reload_enable(devlink); + + return 0; + +out_reg_fail: + devlink_free(devlink); + return ret; +} + +void hclgevf_devlink_uninit(struct hclgevf_dev *hdev) +{ + struct devlink *devlink = hdev->devlink; + + if (!devlink) + return; + + devlink_reload_disable(devlink); + + devlink_unregister(devlink); + + devlink_free(devlink); + + hdev->devlink = NULL; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.h new file mode 100644 index 000000000000..e09ea3d8a963 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2021 Hisilicon Limited. */ + +#ifndef __HCLGEVF_DEVLINK_H +#define __HCLGEVF_DEVLINK_H + +#include "hclgevf_main.h" + +struct hclgevf_devlink_priv { + struct hclgevf_dev *hdev; +}; + +int hclgevf_devlink_init(struct hclgevf_dev *hdev); +void hclgevf_devlink_uninit(struct hclgevf_dev *hdev); +#endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 8784d61e833f..3a19f08bfff3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -8,6 +8,7 @@ #include "hclgevf_main.h" #include "hclge_mbx.h" #include "hnae3.h" +#include "hclgevf_devlink.h" #define HCLGEVF_NAME "hclgevf" @@ -3337,6 +3338,10 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) if (ret) return ret; + ret = hclgevf_devlink_init(hdev); + if (ret) + goto err_devlink_init; + ret = hclgevf_cmd_queue_init(hdev); if (ret) goto err_cmd_queue_init; @@ -3441,6 +3446,8 @@ err_misc_irq_init: err_cmd_init: hclgevf_cmd_uninit(hdev); err_cmd_queue_init: + hclgevf_devlink_uninit(hdev); +err_devlink_init: hclgevf_pci_uninit(hdev); clear_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state); return ret; @@ -3462,6 +3469,7 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev) } hclgevf_cmd_uninit(hdev); + hclgevf_devlink_uninit(hdev); hclgevf_pci_uninit(hdev); hclgevf_uninit_mac_list(hdev); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index d7d02848d674..6f222a3a0bf2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -6,6 +6,7 @@ #include <linux/fs.h> #include <linux/if_vlan.h> #include <linux/types.h> +#include <net/devlink.h> #include "hclge_mbx.h" #include "hclgevf_cmd.h" #include "hnae3.h" @@ -330,6 +331,8 @@ struct hclgevf_dev { u32 flag; unsigned long serv_processed_cnt; unsigned long last_serv_processed; + + struct devlink *devlink; }; static inline bool hclgevf_is_reset_pending(struct hclgevf_dev *hdev) diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 1b0958bd24f6..1ec924c556c5 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2715,10 +2715,10 @@ static void e100_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { switch (stringset) { case ETH_SS_TEST: - memcpy(data, *e100_gstrings_test, sizeof(e100_gstrings_test)); + memcpy(data, e100_gstrings_test, sizeof(e100_gstrings_test)); break; case ETH_SS_STATS: - memcpy(data, *e100_gstrings_stats, sizeof(e100_gstrings_stats)); + memcpy(data, e100_gstrings_stats, sizeof(e100_gstrings_stats)); break; } } diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 06442e6bef73..7256b43b7a65 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -903,6 +903,7 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: mask |= BIT(18); break; default: @@ -1569,6 +1570,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: fext_nvm11 = er32(FEXTNVM11); fext_nvm11 &= ~E1000_FEXTNVM11_DISABLE_MULR_FIX; ew32(FEXTNVM11, fext_nvm11); diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index db79c4e6413e..bcf680e83811 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -98,14 +98,22 @@ struct e1000_hw; #define E1000_DEV_ID_PCH_TGP_I219_V14 0x15FA #define E1000_DEV_ID_PCH_TGP_I219_LM15 0x15F4 #define E1000_DEV_ID_PCH_TGP_I219_V15 0x15F5 +#define E1000_DEV_ID_PCH_RPL_I219_LM23 0x0DC5 +#define E1000_DEV_ID_PCH_RPL_I219_V23 0x0DC6 #define E1000_DEV_ID_PCH_ADP_I219_LM16 0x1A1E #define E1000_DEV_ID_PCH_ADP_I219_V16 0x1A1F #define E1000_DEV_ID_PCH_ADP_I219_LM17 0x1A1C #define E1000_DEV_ID_PCH_ADP_I219_V17 0x1A1D +#define E1000_DEV_ID_PCH_RPL_I219_LM22 0x0DC7 +#define E1000_DEV_ID_PCH_RPL_I219_V22 0x0DC8 #define E1000_DEV_ID_PCH_MTP_I219_LM18 0x550A #define E1000_DEV_ID_PCH_MTP_I219_V18 0x550B #define E1000_DEV_ID_PCH_MTP_I219_LM19 0x550C #define E1000_DEV_ID_PCH_MTP_I219_V19 0x550D +#define E1000_DEV_ID_PCH_LNP_I219_LM20 0x550E +#define E1000_DEV_ID_PCH_LNP_I219_V20 0x550F +#define E1000_DEV_ID_PCH_LNP_I219_LM21 0x5510 +#define E1000_DEV_ID_PCH_LNP_I219_V21 0x5511 #define E1000_REVISION_4 4 @@ -132,6 +140,7 @@ enum e1000_mac_type { e1000_pch_tgp, e1000_pch_adp, e1000_pch_mtp, + e1000_pch_lnp, }; enum e1000_media_type { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index cf7b3887da1d..2f97c9f5611d 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -321,6 +321,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: if (e1000_phy_is_accessible_pchlan(hw)) break; @@ -466,6 +467,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: /* In case the PHY needs to be in mdio slow mode, * set slow mode and try to get the PHY id again. */ @@ -711,6 +713,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: case e1000_pchlan: /* check management mode */ mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan; @@ -1266,9 +1269,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) usleep_range(10000, 11000); } if (firmware_bug) - e_warn("ULP_CONFIG_DONE took %dmsec. This is a firmware bug\n", i * 10); + e_warn("ULP_CONFIG_DONE took %d msec. This is a firmware bug\n", + i * 10); else - e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10); + e_dbg("ULP_CONFIG_DONE cleared after %d msec\n", + i * 10); if (force) { mac_reg = er32(H2ME); @@ -1663,6 +1668,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: rc = e1000_init_phy_params_pchlan(hw); break; default: @@ -2118,6 +2124,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M; break; default: @@ -3162,6 +3169,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: bank1_offset = nvm->flash_bank_size; act_offset = E1000_ICH_NVM_SIG_WORD; @@ -4101,6 +4109,7 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: word = NVM_COMPAT; valid_csum_mask = NVM_COMPAT_VALID_CSUM; break; diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 1502895eb45d..9b145f6248a8 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -41,12 +41,15 @@ #define E1000_FWSM_WLOCK_MAC_MASK 0x0380 #define E1000_FWSM_WLOCK_MAC_SHIFT 7 #define E1000_FWSM_ULP_CFG_DONE 0x00000400 /* Low power cfg done */ +#define E1000_EXFWSM_DPG_EXIT_DONE 0x00000001 /* Shared Receive Address Registers */ #define E1000_SHRAL_PCH_LPT(_i) (0x05408 + ((_i) * 8)) #define E1000_SHRAH_PCH_LPT(_i) (0x0540C + ((_i) * 8)) #define E1000_H2ME 0x05B50 /* Host to ME */ +#define E1000_H2ME_START_DPG 0x00000001 /* indicate the ME of DPG */ +#define E1000_H2ME_EXIT_DPG 0x00000002 /* indicate the ME exit DPG */ #define E1000_H2ME_ULP 0x00000800 /* ULP Indication Bit */ #define E1000_H2ME_ENFORCE_SETTINGS 0x00001000 /* Enforce Settings */ diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 757a54c39eef..3c22b509fa79 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3550,6 +3550,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { /* Stable 24MHz frequency */ incperiod = INCPERIOD_24MHZ; @@ -4068,6 +4069,7 @@ void e1000e_reset(struct e1000_adapter *adapter) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: fc->refresh_time = 0xFFFF; fc->pause_time = 0xFFFF; @@ -6343,42 +6345,110 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) u32 mac_data; u16 phy_data; - /* Disable the periodic inband message, - * don't request PCIe clock in K1 page770_17[10:9] = 10b - */ - e1e_rphy(hw, HV_PM_CTRL, &phy_data); - phy_data &= ~HV_PM_CTRL_K1_CLK_REQ; - phy_data |= BIT(10); - e1e_wphy(hw, HV_PM_CTRL, phy_data); + if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) { + /* Request ME configure the device for S0ix */ + mac_data = er32(H2ME); + mac_data |= E1000_H2ME_START_DPG; + mac_data &= ~E1000_H2ME_EXIT_DPG; + ew32(H2ME, mac_data); + } else { + /* Request driver configure the device to S0ix */ + /* Disable the periodic inband message, + * don't request PCIe clock in K1 page770_17[10:9] = 10b + */ + e1e_rphy(hw, HV_PM_CTRL, &phy_data); + phy_data &= ~HV_PM_CTRL_K1_CLK_REQ; + phy_data |= BIT(10); + e1e_wphy(hw, HV_PM_CTRL, phy_data); - /* Make sure we don't exit K1 every time a new packet arrives - * 772_29[5] = 1 CS_Mode_Stay_In_K1 - */ - e1e_rphy(hw, I217_CGFREG, &phy_data); - phy_data |= BIT(5); - e1e_wphy(hw, I217_CGFREG, phy_data); + /* Make sure we don't exit K1 every time a new packet arrives + * 772_29[5] = 1 CS_Mode_Stay_In_K1 + */ + e1e_rphy(hw, I217_CGFREG, &phy_data); + phy_data |= BIT(5); + e1e_wphy(hw, I217_CGFREG, phy_data); - /* Change the MAC/PHY interface to SMBus - * Force the SMBus in PHY page769_23[0] = 1 - * Force the SMBus in MAC CTRL_EXT[11] = 1 - */ - e1e_rphy(hw, CV_SMB_CTRL, &phy_data); - phy_data |= CV_SMB_CTRL_FORCE_SMBUS; - e1e_wphy(hw, CV_SMB_CTRL, phy_data); - mac_data = er32(CTRL_EXT); - mac_data |= E1000_CTRL_EXT_FORCE_SMBUS; - ew32(CTRL_EXT, mac_data); + /* Change the MAC/PHY interface to SMBus + * Force the SMBus in PHY page769_23[0] = 1 + * Force the SMBus in MAC CTRL_EXT[11] = 1 + */ + e1e_rphy(hw, CV_SMB_CTRL, &phy_data); + phy_data |= CV_SMB_CTRL_FORCE_SMBUS; + e1e_wphy(hw, CV_SMB_CTRL, phy_data); + mac_data = er32(CTRL_EXT); + mac_data |= E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, mac_data); + + /* DFT control: PHY bit: page769_20[0] = 1 + * page769_20[7] - PHY PLL stop + * page769_20[8] - PHY go to the electrical idle + * page769_20[9] - PHY serdes disable + * Gate PPW via EXTCNF_CTRL - set 0x0F00[7] = 1 + */ + e1e_rphy(hw, I82579_DFT_CTRL, &phy_data); + phy_data |= BIT(0); + phy_data |= BIT(7); + phy_data |= BIT(8); + phy_data |= BIT(9); + e1e_wphy(hw, I82579_DFT_CTRL, phy_data); + + mac_data = er32(EXTCNF_CTRL); + mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; + ew32(EXTCNF_CTRL, mac_data); + + /* Enable the Dynamic Power Gating in the MAC */ + mac_data = er32(FEXTNVM7); + mac_data |= BIT(22); + ew32(FEXTNVM7, mac_data); + + /* Disable disconnected cable conditioning for Power Gating */ + mac_data = er32(DPGFR); + mac_data |= BIT(2); + ew32(DPGFR, mac_data); + + /* Don't wake from dynamic Power Gating with clock request */ + mac_data = er32(FEXTNVM12); + mac_data |= BIT(12); + ew32(FEXTNVM12, mac_data); + + /* Ungate PGCB clock */ + mac_data = er32(FEXTNVM9); + mac_data &= ~BIT(28); + ew32(FEXTNVM9, mac_data); + + /* Enable K1 off to enable mPHY Power Gating */ + mac_data = er32(FEXTNVM6); + mac_data |= BIT(31); + ew32(FEXTNVM6, mac_data); + + /* Enable mPHY power gating for any link and speed */ + mac_data = er32(FEXTNVM8); + mac_data |= BIT(9); + ew32(FEXTNVM8, mac_data); + + /* Enable the Dynamic Clock Gating in the DMA and MAC */ + mac_data = er32(CTRL_EXT); + mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN; + ew32(CTRL_EXT, mac_data); + + /* No MAC DPG gating SLP_S0 in modern standby + * Switch the logic of the lanphypc to use PMC counter + */ + mac_data = er32(FEXTNVM5); + mac_data |= BIT(7); + ew32(FEXTNVM5, mac_data); + } - /* DFT control: PHY bit: page769_20[0] = 1 - * Gate PPW via EXTCNF_CTRL - set 0x0F00[7] = 1 - */ - e1e_rphy(hw, I82579_DFT_CTRL, &phy_data); - phy_data |= BIT(0); - e1e_wphy(hw, I82579_DFT_CTRL, phy_data); + /* Disable the time synchronization clock */ + mac_data = er32(FEXTNVM7); + mac_data |= BIT(31); + mac_data &= ~BIT(0); + ew32(FEXTNVM7, mac_data); - mac_data = er32(EXTCNF_CTRL); - mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; - ew32(EXTCNF_CTRL, mac_data); + /* Dynamic Power Gating Enable */ + mac_data = er32(CTRL_EXT); + mac_data |= BIT(3); + ew32(CTRL_EXT, mac_data); /* Check MAC Tx/Rx packet buffer pointers. * Reset MAC Tx/Rx packet buffer pointers to suppress any @@ -6414,148 +6484,130 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) mac_data = er32(RDFPC); if (mac_data) ew32(RDFPC, 0); - - /* Enable the Dynamic Power Gating in the MAC */ - mac_data = er32(FEXTNVM7); - mac_data |= BIT(22); - ew32(FEXTNVM7, mac_data); - - /* Disable the time synchronization clock */ - mac_data = er32(FEXTNVM7); - mac_data |= BIT(31); - mac_data &= ~BIT(0); - ew32(FEXTNVM7, mac_data); - - /* Dynamic Power Gating Enable */ - mac_data = er32(CTRL_EXT); - mac_data |= BIT(3); - ew32(CTRL_EXT, mac_data); - - /* Disable disconnected cable conditioning for Power Gating */ - mac_data = er32(DPGFR); - mac_data |= BIT(2); - ew32(DPGFR, mac_data); - - /* Don't wake from dynamic Power Gating with clock request */ - mac_data = er32(FEXTNVM12); - mac_data |= BIT(12); - ew32(FEXTNVM12, mac_data); - - /* Ungate PGCB clock */ - mac_data = er32(FEXTNVM9); - mac_data &= ~BIT(28); - ew32(FEXTNVM9, mac_data); - - /* Enable K1 off to enable mPHY Power Gating */ - mac_data = er32(FEXTNVM6); - mac_data |= BIT(31); - ew32(FEXTNVM6, mac_data); - - /* Enable mPHY power gating for any link and speed */ - mac_data = er32(FEXTNVM8); - mac_data |= BIT(9); - ew32(FEXTNVM8, mac_data); - - /* Enable the Dynamic Clock Gating in the DMA and MAC */ - mac_data = er32(CTRL_EXT); - mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN; - ew32(CTRL_EXT, mac_data); - - /* No MAC DPG gating SLP_S0 in modern standby - * Switch the logic of the lanphypc to use PMC counter - */ - mac_data = er32(FEXTNVM5); - mac_data |= BIT(7); - ew32(FEXTNVM5, mac_data); } static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + bool firmware_bug = false; u32 mac_data; u16 phy_data; + u32 i = 0; + + if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) { + /* Request ME unconfigure the device from S0ix */ + mac_data = er32(H2ME); + mac_data &= ~E1000_H2ME_START_DPG; + mac_data |= E1000_H2ME_EXIT_DPG; + ew32(H2ME, mac_data); + + /* Poll up to 2.5 seconds for ME to unconfigure DPG. + * If this takes more than 1 second, show a warning indicating a + * firmware bug + */ + while (!(er32(EXFWSM) & E1000_EXFWSM_DPG_EXIT_DONE)) { + if (i > 100 && !firmware_bug) + firmware_bug = true; - /* Disable the Dynamic Power Gating in the MAC */ - mac_data = er32(FEXTNVM7); - mac_data &= 0xFFBFFFFF; - ew32(FEXTNVM7, mac_data); + if (i++ == 250) { + e_dbg("Timeout (firmware bug): %d msec\n", + i * 10); + break; + } - /* Enable the time synchronization clock */ - mac_data = er32(FEXTNVM7); - mac_data |= BIT(0); - ew32(FEXTNVM7, mac_data); + usleep_range(10000, 11000); + } + if (firmware_bug) + e_warn("DPG_EXIT_DONE took %d msec. This is a firmware bug\n", + i * 10); + else + e_dbg("DPG_EXIT_DONE cleared after %d msec\n", i * 10); + } else { + /* Request driver unconfigure the device from S0ix */ + + /* Disable the Dynamic Power Gating in the MAC */ + mac_data = er32(FEXTNVM7); + mac_data &= 0xFFBFFFFF; + ew32(FEXTNVM7, mac_data); + + /* Disable mPHY power gating for any link and speed */ + mac_data = er32(FEXTNVM8); + mac_data &= ~BIT(9); + ew32(FEXTNVM8, mac_data); + + /* Disable K1 off */ + mac_data = er32(FEXTNVM6); + mac_data &= ~BIT(31); + ew32(FEXTNVM6, mac_data); + + /* Disable Ungate PGCB clock */ + mac_data = er32(FEXTNVM9); + mac_data |= BIT(28); + ew32(FEXTNVM9, mac_data); + + /* Cancel not waking from dynamic + * Power Gating with clock request + */ + mac_data = er32(FEXTNVM12); + mac_data &= ~BIT(12); + ew32(FEXTNVM12, mac_data); - /* Disable mPHY power gating for any link and speed */ - mac_data = er32(FEXTNVM8); - mac_data &= ~BIT(9); - ew32(FEXTNVM8, mac_data); + /* Cancel disable disconnected cable conditioning + * for Power Gating + */ + mac_data = er32(DPGFR); + mac_data &= ~BIT(2); + ew32(DPGFR, mac_data); - /* Disable K1 off */ - mac_data = er32(FEXTNVM6); - mac_data &= ~BIT(31); - ew32(FEXTNVM6, mac_data); + /* Disable the Dynamic Clock Gating in the DMA and MAC */ + mac_data = er32(CTRL_EXT); + mac_data &= 0xFFF7FFFF; + ew32(CTRL_EXT, mac_data); - /* Disable Ungate PGCB clock */ - mac_data = er32(FEXTNVM9); - mac_data |= BIT(28); - ew32(FEXTNVM9, mac_data); + /* Revert the lanphypc logic to use the internal Gbe counter + * and not the PMC counter + */ + mac_data = er32(FEXTNVM5); + mac_data &= 0xFFFFFF7F; + ew32(FEXTNVM5, mac_data); - /* Cancel not waking from dynamic - * Power Gating with clock request - */ - mac_data = er32(FEXTNVM12); - mac_data &= ~BIT(12); - ew32(FEXTNVM12, mac_data); + /* Enable the periodic inband message, + * Request PCIe clock in K1 page770_17[10:9] =01b + */ + e1e_rphy(hw, HV_PM_CTRL, &phy_data); + phy_data &= 0xFBFF; + phy_data |= HV_PM_CTRL_K1_CLK_REQ; + e1e_wphy(hw, HV_PM_CTRL, phy_data); - /* Cancel disable disconnected cable conditioning - * for Power Gating - */ - mac_data = er32(DPGFR); - mac_data &= ~BIT(2); - ew32(DPGFR, mac_data); + /* Return back configuration + * 772_29[5] = 0 CS_Mode_Stay_In_K1 + */ + e1e_rphy(hw, I217_CGFREG, &phy_data); + phy_data &= 0xFFDF; + e1e_wphy(hw, I217_CGFREG, phy_data); + + /* Change the MAC/PHY interface to Kumeran + * Unforce the SMBus in PHY page769_23[0] = 0 + * Unforce the SMBus in MAC CTRL_EXT[11] = 0 + */ + e1e_rphy(hw, CV_SMB_CTRL, &phy_data); + phy_data &= ~CV_SMB_CTRL_FORCE_SMBUS; + e1e_wphy(hw, CV_SMB_CTRL, phy_data); + mac_data = er32(CTRL_EXT); + mac_data &= ~E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, mac_data); + } /* Disable Dynamic Power Gating */ mac_data = er32(CTRL_EXT); mac_data &= 0xFFFFFFF7; ew32(CTRL_EXT, mac_data); - /* Disable the Dynamic Clock Gating in the DMA and MAC */ - mac_data = er32(CTRL_EXT); - mac_data &= 0xFFF7FFFF; - ew32(CTRL_EXT, mac_data); - - /* Revert the lanphypc logic to use the internal Gbe counter - * and not the PMC counter - */ - mac_data = er32(FEXTNVM5); - mac_data &= 0xFFFFFF7F; - ew32(FEXTNVM5, mac_data); - - /* Enable the periodic inband message, - * Request PCIe clock in K1 page770_17[10:9] =01b - */ - e1e_rphy(hw, HV_PM_CTRL, &phy_data); - phy_data &= 0xFBFF; - phy_data |= HV_PM_CTRL_K1_CLK_REQ; - e1e_wphy(hw, HV_PM_CTRL, phy_data); - - /* Return back configuration - * 772_29[5] = 0 CS_Mode_Stay_In_K1 - */ - e1e_rphy(hw, I217_CGFREG, &phy_data); - phy_data &= 0xFFDF; - e1e_wphy(hw, I217_CGFREG, phy_data); - - /* Change the MAC/PHY interface to Kumeran - * Unforce the SMBus in PHY page769_23[0] = 0 - * Unforce the SMBus in MAC CTRL_EXT[11] = 0 - */ - e1e_rphy(hw, CV_SMB_CTRL, &phy_data); - phy_data &= ~CV_SMB_CTRL_FORCE_SMBUS; - e1e_wphy(hw, CV_SMB_CTRL, phy_data); - mac_data = er32(CTRL_EXT); - mac_data &= ~E1000_CTRL_EXT_FORCE_SMBUS; - ew32(CTRL_EXT, mac_data); + /* Enable the time synchronization clock */ + mac_data = er32(FEXTNVM7); + mac_data &= ~BIT(31); + mac_data |= BIT(0); + ew32(FEXTNVM7, mac_data); } static int e1000e_pm_freeze(struct device *dev) @@ -7677,7 +7729,7 @@ err_dma: * @pdev: PCI device information struct * * e1000_remove is called by the PCI subsystem to alert the driver - * that it should release a PCI device. The could be caused by a + * that it should release a PCI device. This could be caused by a * Hot-Plug event, or because the driver is going to be removed from * memory. **/ @@ -7850,14 +7902,22 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_cnp }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 9e79d672f4f1..eb5c014c02fb 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -298,6 +298,7 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) case e1000_pch_tgp: case e1000_pch_adp: case e1000_pch_mtp: + case e1000_pch_lnp: if ((hw->mac.type < e1000_pch_lpt) || (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) { adapter->ptp_clock_info.max_adj = 24000000 - 1; diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h index 8165ba2619a4..6c0cd8cab3ef 100644 --- a/drivers/net/ethernet/intel/e1000e/regs.h +++ b/drivers/net/ethernet/intel/e1000e/regs.h @@ -213,6 +213,7 @@ #define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */ #define E1000_SWSM 0x05B50 /* SW Semaphore */ #define E1000_FWSM 0x05B54 /* FW Semaphore */ +#define E1000_EXFWSM 0x05B58 /* Extended FW Semaphore */ /* Driver-only SW semaphore (not used by BOOT agents) */ #define E1000_SWSM2 0x05B58 #define E1000_FFLT_DBG 0x05F04 /* Debug Register */ diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index b9417dc0007c..39fb3d57c057 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -428,6 +428,8 @@ struct i40e_channel { struct i40e_vsi *parent_vsi; }; +struct i40e_ptp_pins_settings; + static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch) { return !!ch->fwd; @@ -644,12 +646,83 @@ struct i40e_pf { struct i40e_rx_pb_config pb_cfg; /* Current Rx packet buffer config */ struct i40e_dcbx_config tmp_cfg; +/* GPIO defines used by PTP */ +#define I40E_SDP3_2 18 +#define I40E_SDP3_3 19 +#define I40E_GPIO_4 20 +#define I40E_LED2_0 26 +#define I40E_LED2_1 27 +#define I40E_LED3_0 28 +#define I40E_LED3_1 29 +#define I40E_GLGEN_GPIO_SET_SDP_DATA_HI \ + (1 << I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT) +#define I40E_GLGEN_GPIO_SET_DRV_SDP_DATA \ + (1 << I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT) +#define I40E_GLGEN_GPIO_CTL_PRT_NUM_0 \ + (0 << I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT) +#define I40E_GLGEN_GPIO_CTL_PRT_NUM_1 \ + (1 << I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT) +#define I40E_GLGEN_GPIO_CTL_RESERVED BIT(2) +#define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_Z \ + (1 << I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT) +#define I40E_GLGEN_GPIO_CTL_DIR_OUT \ + (1 << I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT) +#define I40E_GLGEN_GPIO_CTL_TRI_DRV_HI \ + (1 << I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT) +#define I40E_GLGEN_GPIO_CTL_OUT_HI_RST \ + (1 << I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT) +#define I40E_GLGEN_GPIO_CTL_TIMESYNC_0 \ + (3 << I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT) +#define I40E_GLGEN_GPIO_CTL_TIMESYNC_1 \ + (4 << I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT) +#define I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN \ + (0x3F << I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT) +#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT \ + (1 << I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT) +#define I40E_GLGEN_GPIO_CTL_PORT_0_IN_TIMESYNC_0 \ + (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \ + I40E_GLGEN_GPIO_CTL_TIMESYNC_0 | \ + I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_0) +#define I40E_GLGEN_GPIO_CTL_PORT_1_IN_TIMESYNC_0 \ + (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \ + I40E_GLGEN_GPIO_CTL_TIMESYNC_0 | \ + I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_1) +#define I40E_GLGEN_GPIO_CTL_PORT_0_OUT_TIMESYNC_1 \ + (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \ + I40E_GLGEN_GPIO_CTL_TIMESYNC_1 | I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \ + I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | I40E_GLGEN_GPIO_CTL_DIR_OUT | \ + I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_0) +#define I40E_GLGEN_GPIO_CTL_PORT_1_OUT_TIMESYNC_1 \ + (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \ + I40E_GLGEN_GPIO_CTL_TIMESYNC_1 | I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \ + I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | I40E_GLGEN_GPIO_CTL_DIR_OUT | \ + I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_1) +#define I40E_GLGEN_GPIO_CTL_LED_INIT \ + (I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_Z | \ + I40E_GLGEN_GPIO_CTL_DIR_OUT | \ + I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | \ + I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \ + I40E_GLGEN_GPIO_CTL_OUT_DEFAULT | \ + I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN) +#define I40E_PRTTSYN_AUX_1_INSTNT \ + (1 << I40E_PRTTSYN_AUX_1_INSTNT_SHIFT) +#define I40E_PRTTSYN_AUX_0_OUT_ENABLE \ + (1 << I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT) +#define I40E_PRTTSYN_AUX_0_OUT_CLK_MOD (3 << I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT) +#define I40E_PRTTSYN_AUX_0_OUT_ENABLE_CLK_MOD \ + (I40E_PRTTSYN_AUX_0_OUT_ENABLE | I40E_PRTTSYN_AUX_0_OUT_CLK_MOD) +#define I40E_PTP_HALF_SECOND 500000000LL /* nano seconds */ +#define I40E_PTP_2_SEC_DELAY 2 + struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; struct sk_buff *ptp_tx_skb; unsigned long ptp_tx_start; struct hwtstamp_config tstamp_config; struct timespec64 ptp_prev_hw_time; + struct work_struct ptp_pps_work; + struct work_struct ptp_extts0_work; + struct work_struct ptp_extts1_work; ktime_t ptp_reset_start; struct mutex tmreg_lock; /* Used to protect the SYSTIME registers. */ u32 ptp_adj_mult; @@ -657,10 +730,14 @@ struct i40e_pf { u32 tx_hwtstamp_skipped; u32 rx_hwtstamp_cleared; u32 latch_event_flags; + u64 ptp_pps_start; + u32 pps_delay; spinlock_t ptp_rx_lock; /* Used to protect Rx timestamp registers. */ + struct ptp_pin_desc ptp_pin[3]; unsigned long latch_events[4]; bool ptp_tx; bool ptp_rx; + struct i40e_ptp_pins_settings *ptp_pins; u16 rss_table_size; /* HW RSS table size */ u32 max_bw; u32 min_bw; @@ -1169,6 +1246,7 @@ void i40e_ptp_save_hw_time(struct i40e_pf *pf); void i40e_ptp_restore_hw_time(struct i40e_pf *pf); void i40e_ptp_init(struct i40e_pf *pf); void i40e_ptp_stop(struct i40e_pf *pf); +int i40e_ptp_alloc_pins(struct i40e_pf *pf); int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi); i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf); i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1d1f52756a93..b4a57251256a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4079,10 +4079,13 @@ static irqreturn_t i40e_intr(int irq, void *data) if (icr0 & I40E_PFINT_ICR0_TIMESYNC_MASK) { u32 prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_0); - if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK) { - icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK; + if (prttsyn_stat & I40E_PRTTSYN_STAT_0_EVENT0_MASK) + schedule_work(&pf->ptp_extts0_work); + + if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK) i40e_ptp_tx_hwtstamp(pf); - } + + icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK; } /* If a critical error is pending we have no choice but to reset the @@ -15181,6 +15184,22 @@ err_switch_setup: } /** + * i40e_set_subsystem_device_id - set subsystem device id + * @hw: pointer to the hardware info + * + * Set PCI subsystem device id either from a pci_dev structure or + * a specific FW register. + **/ +static inline void i40e_set_subsystem_device_id(struct i40e_hw *hw) +{ + struct pci_dev *pdev = ((struct i40e_pf *)hw->back)->pdev; + + hw->subsystem_device_id = pdev->subsystem_device ? + pdev->subsystem_device : + (ushort)(rd32(hw, I40E_PFPCI_SUBSYSID) & USHRT_MAX); +} + +/** * i40e_probe - Device initialization routine * @pdev: PCI device information struct * @ent: entry in i40e_pci_tbl @@ -15275,7 +15294,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->device_id = pdev->device; pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); hw->subsystem_vendor_id = pdev->subsystem_vendor; - hw->subsystem_device_id = pdev->subsystem_device; + i40e_set_subsystem_device_id(hw); hw->bus.device = PCI_SLOT(pdev->devfn); hw->bus.func = PCI_FUNC(pdev->devfn); hw->bus.bus_id = pdev->bus->number; @@ -15455,6 +15474,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (is_valid_ether_addr(hw->mac.port_addr)) pf->hw_features |= I40E_HW_PORT_ID_VALID; + i40e_ptp_alloc_pins(pf); pci_set_drvdata(pdev, pf); pci_save_state(pdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 7b971b205d36..09b1d5aed1c9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -3,6 +3,7 @@ #include "i40e.h" #include <linux/ptp_classify.h> +#include <linux/posix-clock.h> /* The XL710 timesync is very much like Intel's 82599 design when it comes to * the fundamental clock design. However, the clock operations are much simpler @@ -20,10 +21,252 @@ #define I40E_PTP_10GB_INCVAL_MULT 2 #define I40E_PTP_5GB_INCVAL_MULT 2 #define I40E_PTP_1GB_INCVAL_MULT 20 +#define I40E_ISGN 0x80000000 #define I40E_PRTTSYN_CTL1_TSYNTYPE_V1 BIT(I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT) #define I40E_PRTTSYN_CTL1_TSYNTYPE_V2 (2 << \ I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT) +#define I40E_SUBDEV_ID_25G_PTP_PIN 0xB +#define to_dev(obj) container_of(obj, struct device, kobj) + +enum i40e_ptp_pin { + SDP3_2 = 0, + SDP3_3, + GPIO_4 +}; + +enum i40e_can_set_pins_t { + CANT_DO_PINS = -1, + CAN_SET_PINS, + CAN_DO_PINS +}; + +static struct ptp_pin_desc sdp_desc[] = { + /* name idx func chan */ + {"SDP3_2", SDP3_2, PTP_PF_NONE, 0}, + {"SDP3_3", SDP3_3, PTP_PF_NONE, 1}, + {"GPIO_4", GPIO_4, PTP_PF_NONE, 1}, +}; + +enum i40e_ptp_gpio_pin_state { + end = -2, + invalid, + off, + in_A, + in_B, + out_A, + out_B, +}; + +static const char * const i40e_ptp_gpio_pin_state2str[] = { + "off", "in_A", "in_B", "out_A", "out_B" +}; + +enum i40e_ptp_led_pin_state { + led_end = -2, + low = 0, + high, +}; + +struct i40e_ptp_pins_settings { + enum i40e_ptp_gpio_pin_state sdp3_2; + enum i40e_ptp_gpio_pin_state sdp3_3; + enum i40e_ptp_gpio_pin_state gpio_4; + enum i40e_ptp_led_pin_state led2_0; + enum i40e_ptp_led_pin_state led2_1; + enum i40e_ptp_led_pin_state led3_0; + enum i40e_ptp_led_pin_state led3_1; +}; + +static const struct i40e_ptp_pins_settings + i40e_ptp_pin_led_allowed_states[] = { + {off, off, off, high, high, high, high}, + {off, in_A, off, high, high, high, low}, + {off, out_A, off, high, low, high, high}, + {off, in_B, off, high, high, high, low}, + {off, out_B, off, high, low, high, high}, + {in_A, off, off, high, high, high, low}, + {in_A, in_B, off, high, high, high, low}, + {in_A, out_B, off, high, low, high, high}, + {out_A, off, off, high, low, high, high}, + {out_A, in_B, off, high, low, high, high}, + {in_B, off, off, high, high, high, low}, + {in_B, in_A, off, high, high, high, low}, + {in_B, out_A, off, high, low, high, high}, + {out_B, off, off, high, low, high, high}, + {out_B, in_A, off, high, low, high, high}, + {off, off, in_A, high, high, low, high}, + {off, out_A, in_A, high, low, low, high}, + {off, in_B, in_A, high, high, low, low}, + {off, out_B, in_A, high, low, low, high}, + {out_A, off, in_A, high, low, low, high}, + {out_A, in_B, in_A, high, low, low, high}, + {in_B, off, in_A, high, high, low, low}, + {in_B, out_A, in_A, high, low, low, high}, + {out_B, off, in_A, high, low, low, high}, + {off, off, out_A, low, high, high, high}, + {off, in_A, out_A, low, high, high, low}, + {off, in_B, out_A, low, high, high, low}, + {off, out_B, out_A, low, low, high, high}, + {in_A, off, out_A, low, high, high, low}, + {in_A, in_B, out_A, low, high, high, low}, + {in_A, out_B, out_A, low, low, high, high}, + {in_B, off, out_A, low, high, high, low}, + {in_B, in_A, out_A, low, high, high, low}, + {out_B, off, out_A, low, low, high, high}, + {out_B, in_A, out_A, low, low, high, high}, + {off, off, in_B, high, high, low, high}, + {off, in_A, in_B, high, high, low, low}, + {off, out_A, in_B, high, low, low, high}, + {off, out_B, in_B, high, low, low, high}, + {in_A, off, in_B, high, high, low, low}, + {in_A, out_B, in_B, high, low, low, high}, + {out_A, off, in_B, high, low, low, high}, + {out_B, off, in_B, high, low, low, high}, + {out_B, in_A, in_B, high, low, low, high}, + {off, off, out_B, low, high, high, high}, + {off, in_A, out_B, low, high, high, low}, + {off, out_A, out_B, low, low, high, high}, + {off, in_B, out_B, low, high, high, low}, + {in_A, off, out_B, low, high, high, low}, + {in_A, in_B, out_B, low, high, high, low}, + {out_A, off, out_B, low, low, high, high}, + {out_A, in_B, out_B, low, low, high, high}, + {in_B, off, out_B, low, high, high, low}, + {in_B, in_A, out_B, low, high, high, low}, + {in_B, out_A, out_B, low, low, high, high}, + {end, end, end, led_end, led_end, led_end, led_end} +}; + +static int i40e_ptp_set_pins(struct i40e_pf *pf, + struct i40e_ptp_pins_settings *pins); + +/** + * i40e_ptp_extts0_work - workqueue task function + * @work: workqueue task structure + * + * Service for PTP external clock event + **/ +static void i40e_ptp_extts0_work(struct work_struct *work) +{ + struct i40e_pf *pf = container_of(work, struct i40e_pf, + ptp_extts0_work); + struct i40e_hw *hw = &pf->hw; + struct ptp_clock_event event; + u32 hi, lo; + + /* Event time is captured by one of the two matched registers + * PRTTSYN_EVNT_L: 32 LSB of sampled time event + * PRTTSYN_EVNT_H: 32 MSB of sampled time event + * Event is defined in PRTTSYN_EVNT_0 register + */ + lo = rd32(hw, I40E_PRTTSYN_EVNT_L(0)); + hi = rd32(hw, I40E_PRTTSYN_EVNT_H(0)); + + event.timestamp = (((u64)hi) << 32) | lo; + + event.type = PTP_CLOCK_EXTTS; + event.index = hw->pf_id; + + /* fire event */ + ptp_clock_event(pf->ptp_clock, &event); +} + +/** + * i40e_is_ptp_pin_dev - check if device supports PTP pins + * @hw: pointer to the hardware structure + * + * Return true if device supports PTP pins, false otherwise. + **/ +static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw) +{ + return hw->device_id == I40E_DEV_ID_25G_SFP28 && + hw->subsystem_device_id == I40E_SUBDEV_ID_25G_PTP_PIN; +} + +/** + * i40e_can_set_pins - check possibility of manipulating the pins + * @pf: board private structure + * + * Check if all conditions are satisfied to manipulate PTP pins. + * Return CAN_SET_PINS if pins can be set on a specific PF or + * return CAN_DO_PINS if pins can be manipulated within a NIC or + * return CANT_DO_PINS otherwise. + **/ +static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf) +{ + if (!i40e_is_ptp_pin_dev(&pf->hw)) { + dev_warn(&pf->pdev->dev, + "PTP external clock not supported.\n"); + return CANT_DO_PINS; + } + + if (!pf->ptp_pins) { + dev_warn(&pf->pdev->dev, + "PTP PIN manipulation not allowed.\n"); + return CANT_DO_PINS; + } + + if (pf->hw.pf_id) { + dev_warn(&pf->pdev->dev, + "PTP PINs should be accessed via PF0.\n"); + return CAN_DO_PINS; + } + + return CAN_SET_PINS; +} + +/** + * i40_ptp_reset_timing_events - Reset PTP timing events + * @pf: Board private structure + * + * This function resets timing events for pf. + **/ +static void i40_ptp_reset_timing_events(struct i40e_pf *pf) +{ + u32 i; + + spin_lock_bh(&pf->ptp_rx_lock); + for (i = 0; i <= I40E_PRTTSYN_RXTIME_L_MAX_INDEX; i++) { + /* reading and automatically clearing timing events registers */ + rd32(&pf->hw, I40E_PRTTSYN_RXTIME_L(i)); + rd32(&pf->hw, I40E_PRTTSYN_RXTIME_H(i)); + pf->latch_events[i] = 0; + } + /* reading and automatically clearing timing events registers */ + rd32(&pf->hw, I40E_PRTTSYN_TXTIME_L); + rd32(&pf->hw, I40E_PRTTSYN_TXTIME_H); + + pf->tx_hwtstamp_timeouts = 0; + pf->tx_hwtstamp_skipped = 0; + pf->rx_hwtstamp_cleared = 0; + pf->latch_event_flags = 0; + spin_unlock_bh(&pf->ptp_rx_lock); +} + +/** + * i40e_ptp_verify - check pins + * @ptp: ptp clock + * @pin: pin index + * @func: assigned function + * @chan: channel + * + * Check pins consistency. + * Return 0 on success or error on failure. + **/ +static int i40e_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + switch (func) { + case PTP_PF_NONE: + case PTP_PF_EXTTS: + case PTP_PF_PEROUT: + break; + case PTP_PF_PHYSYNC: + return -EOPNOTSUPP; + } + return 0; +} /** * i40e_ptp_read - Read the PHC time from the device @@ -137,6 +380,37 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) } /** + * i40e_ptp_set_1pps_signal_hw - configure 1PPS PTP signal for pins + * @pf: the PF private data structure + * + * Configure 1PPS signal used for PTP pins + **/ +static void i40e_ptp_set_1pps_signal_hw(struct i40e_pf *pf) +{ + struct i40e_hw *hw = &pf->hw; + struct timespec64 now; + u64 ns; + + wr32(hw, I40E_PRTTSYN_AUX_0(1), 0); + wr32(hw, I40E_PRTTSYN_AUX_1(1), I40E_PRTTSYN_AUX_1_INSTNT); + wr32(hw, I40E_PRTTSYN_AUX_0(1), I40E_PRTTSYN_AUX_0_OUT_ENABLE); + + i40e_ptp_read(pf, &now, NULL); + now.tv_sec += I40E_PTP_2_SEC_DELAY; + now.tv_nsec = 0; + ns = timespec64_to_ns(&now); + + /* I40E_PRTTSYN_TGT_L(1) */ + wr32(hw, I40E_PRTTSYN_TGT_L(1), ns & 0xFFFFFFFF); + /* I40E_PRTTSYN_TGT_H(1) */ + wr32(hw, I40E_PRTTSYN_TGT_H(1), ns >> 32); + wr32(hw, I40E_PRTTSYN_CLKO(1), I40E_PTP_HALF_SECOND); + wr32(hw, I40E_PRTTSYN_AUX_1(1), I40E_PRTTSYN_AUX_1_INSTNT); + wr32(hw, I40E_PRTTSYN_AUX_0(1), + I40E_PRTTSYN_AUX_0_OUT_ENABLE_CLK_MOD); +} + +/** * i40e_ptp_adjtime - Adjust the PHC time * @ptp: The PTP clock structure * @delta: Offset in nanoseconds to adjust the PHC time by @@ -146,14 +420,35 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); - struct timespec64 now, then; + struct i40e_hw *hw = &pf->hw; - then = ns_to_timespec64(delta); mutex_lock(&pf->tmreg_lock); - i40e_ptp_read(pf, &now, NULL); - now = timespec64_add(now, then); - i40e_ptp_write(pf, (const struct timespec64 *)&now); + if (delta > -999999900LL && delta < 999999900LL) { + int neg_adj = 0; + u32 timadj; + u64 tohw; + + if (delta < 0) { + neg_adj = 1; + tohw = -delta; + } else { + tohw = delta; + } + + timadj = tohw & 0x3FFFFFFF; + if (neg_adj) + timadj |= I40E_ISGN; + wr32(hw, I40E_PRTTSYN_ADJ, timadj); + } else { + struct timespec64 then, now; + + then = ns_to_timespec64(delta); + i40e_ptp_read(pf, &now, NULL); + now = timespec64_add(now, then); + i40e_ptp_write(pf, (const struct timespec64 *)&now); + i40e_ptp_set_1pps_signal_hw(pf); + } mutex_unlock(&pf->tmreg_lock); @@ -184,7 +479,7 @@ static int i40e_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, /** * i40e_ptp_settime - Set the time of the PHC * @ptp: The PTP clock structure - * @ts: timespec structure that holds the new time value + * @ts: timespec64 structure that holds the new time value * * Set the device clock to the user input value. The conversion from timespec * to ns happens in the write function. @@ -202,18 +497,145 @@ static int i40e_ptp_settime(struct ptp_clock_info *ptp, } /** - * i40e_ptp_feature_enable - Enable/disable ancillary features of the PHC subsystem + * i40e_pps_configure - configure PPS events + * @ptp: ptp clock + * @rq: clock request + * @on: status + * + * Configure PPS events for external clock source. + * Return 0 on success or error on failure. + **/ +static int i40e_pps_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); + + if (!!on) + i40e_ptp_set_1pps_signal_hw(pf); + + return 0; +} + +/** + * i40e_pin_state - determine PIN state + * @index: PIN index + * @func: function assigned to PIN + * + * Determine PIN state based on PIN index and function assigned. + * Return PIN state. + **/ +static enum i40e_ptp_gpio_pin_state i40e_pin_state(int index, int func) +{ + enum i40e_ptp_gpio_pin_state state = off; + + if (index == 0 && func == PTP_PF_EXTTS) + state = in_A; + if (index == 1 && func == PTP_PF_EXTTS) + state = in_B; + if (index == 0 && func == PTP_PF_PEROUT) + state = out_A; + if (index == 1 && func == PTP_PF_PEROUT) + state = out_B; + + return state; +} + +/** + * i40e_ptp_enable_pin - enable PINs. + * @pf: private board structure + * @chan: channel + * @func: PIN function + * @on: state + * + * Enable PTP pins for external clock source. + * Return 0 on success or error code on failure. + **/ +static int i40e_ptp_enable_pin(struct i40e_pf *pf, unsigned int chan, + enum ptp_pin_function func, int on) +{ + enum i40e_ptp_gpio_pin_state *pin = NULL; + struct i40e_ptp_pins_settings pins; + int pin_index; + + /* Use PF0 to set pins. Return success for user space tools */ + if (pf->hw.pf_id) + return 0; + + /* Preserve previous state of pins that we don't touch */ + pins.sdp3_2 = pf->ptp_pins->sdp3_2; + pins.sdp3_3 = pf->ptp_pins->sdp3_3; + pins.gpio_4 = pf->ptp_pins->gpio_4; + + /* To turn on the pin - find the corresponding one based on + * the given index. To to turn the function off - find + * which pin had it assigned. Don't use ptp_find_pin here + * because it tries to lock the pincfg_mux which is locked by + * ptp_pin_store() that calls here. + */ + if (on) { + pin_index = ptp_find_pin(pf->ptp_clock, func, chan); + if (pin_index < 0) + return -EBUSY; + + switch (pin_index) { + case SDP3_2: + pin = &pins.sdp3_2; + break; + case SDP3_3: + pin = &pins.sdp3_3; + break; + case GPIO_4: + pin = &pins.gpio_4; + break; + default: + return -EINVAL; + } + + *pin = i40e_pin_state(chan, func); + } else { + pins.sdp3_2 = off; + pins.sdp3_3 = off; + pins.gpio_4 = off; + } + + return i40e_ptp_set_pins(pf, &pins) ? -EINVAL : 0; +} + +/** + * i40e_ptp_feature_enable - Enable external clock pins * @ptp: The PTP clock structure - * @rq: The requested feature to change - * @on: Enable/disable flag + * @rq: The PTP clock request structure + * @on: To turn feature on/off * - * The XL710 does not support any of the ancillary features of the PHC - * subsystem, so this function may just return. + * Setting on/off PTP PPS feature for pin. **/ static int i40e_ptp_feature_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) + struct ptp_clock_request *rq, + int on) { - return -EOPNOTSUPP; + struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); + + enum ptp_pin_function func; + unsigned int chan; + + /* TODO: Implement flags handling for EXTTS and PEROUT */ + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + func = PTP_PF_EXTTS; + chan = rq->extts.index; + break; + case PTP_CLK_REQ_PEROUT: + func = PTP_PF_PEROUT; + chan = rq->perout.index; + break; + case PTP_CLK_REQ_PPS: + return i40e_pps_configure(ptp, rq, on); + default: + return -EOPNOTSUPP; + } + + return i40e_ptp_enable_pin(pf, chan, func, on); } /** @@ -528,6 +950,229 @@ int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr) } /** + * i40e_ptp_free_pins - free memory used by PTP pins + * @pf: Board private structure + * + * Release memory allocated for PTP pins. + **/ +static void i40e_ptp_free_pins(struct i40e_pf *pf) +{ + if (i40e_is_ptp_pin_dev(&pf->hw)) { + kfree(pf->ptp_pins); + kfree(pf->ptp_caps.pin_config); + pf->ptp_pins = NULL; + } +} + +/** + * i40e_ptp_set_pin_hw - Set HW GPIO pin + * @hw: pointer to the hardware structure + * @pin: pin index + * @state: pin state + * + * Set status of GPIO pin for external clock handling. + **/ +static void i40e_ptp_set_pin_hw(struct i40e_hw *hw, + unsigned int pin, + enum i40e_ptp_gpio_pin_state state) +{ + switch (state) { + case off: + wr32(hw, I40E_GLGEN_GPIO_CTL(pin), 0); + break; + case in_A: + wr32(hw, I40E_GLGEN_GPIO_CTL(pin), + I40E_GLGEN_GPIO_CTL_PORT_0_IN_TIMESYNC_0); + break; + case in_B: + wr32(hw, I40E_GLGEN_GPIO_CTL(pin), + I40E_GLGEN_GPIO_CTL_PORT_1_IN_TIMESYNC_0); + break; + case out_A: + wr32(hw, I40E_GLGEN_GPIO_CTL(pin), + I40E_GLGEN_GPIO_CTL_PORT_0_OUT_TIMESYNC_1); + break; + case out_B: + wr32(hw, I40E_GLGEN_GPIO_CTL(pin), + I40E_GLGEN_GPIO_CTL_PORT_1_OUT_TIMESYNC_1); + break; + default: + break; + } +} + +/** + * i40e_ptp_set_led_hw - Set HW GPIO led + * @hw: pointer to the hardware structure + * @led: led index + * @state: led state + * + * Set status of GPIO led for external clock handling. + **/ +static void i40e_ptp_set_led_hw(struct i40e_hw *hw, + unsigned int led, + enum i40e_ptp_led_pin_state state) +{ + switch (state) { + case low: + wr32(hw, I40E_GLGEN_GPIO_SET, + I40E_GLGEN_GPIO_SET_DRV_SDP_DATA | led); + break; + case high: + wr32(hw, I40E_GLGEN_GPIO_SET, + I40E_GLGEN_GPIO_SET_DRV_SDP_DATA | + I40E_GLGEN_GPIO_SET_SDP_DATA_HI | led); + break; + default: + break; + } +} + +/** + * i40e_ptp_init_leds_hw - init LEDs + * @hw: pointer to a hardware structure + * + * Set initial state of LEDs + **/ +static void i40e_ptp_init_leds_hw(struct i40e_hw *hw) +{ + wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED2_0), + I40E_GLGEN_GPIO_CTL_LED_INIT); + wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED2_1), + I40E_GLGEN_GPIO_CTL_LED_INIT); + wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED3_0), + I40E_GLGEN_GPIO_CTL_LED_INIT); + wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED3_1), + I40E_GLGEN_GPIO_CTL_LED_INIT); +} + +/** + * i40e_ptp_set_pins_hw - Set HW GPIO pins + * @pf: Board private structure + * + * This function sets GPIO pins for PTP + **/ +static void i40e_ptp_set_pins_hw(struct i40e_pf *pf) +{ + const struct i40e_ptp_pins_settings *pins = pf->ptp_pins; + struct i40e_hw *hw = &pf->hw; + + /* pin must be disabled before it may be used */ + i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, off); + i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, off); + i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, off); + + i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, pins->sdp3_2); + i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, pins->sdp3_3); + i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, pins->gpio_4); + + i40e_ptp_set_led_hw(hw, I40E_LED2_0, pins->led2_0); + i40e_ptp_set_led_hw(hw, I40E_LED2_1, pins->led2_1); + i40e_ptp_set_led_hw(hw, I40E_LED3_0, pins->led3_0); + i40e_ptp_set_led_hw(hw, I40E_LED3_1, pins->led3_1); + + dev_info(&pf->pdev->dev, + "PTP configuration set to: SDP3_2: %s, SDP3_3: %s, GPIO_4: %s.\n", + i40e_ptp_gpio_pin_state2str[pins->sdp3_2], + i40e_ptp_gpio_pin_state2str[pins->sdp3_3], + i40e_ptp_gpio_pin_state2str[pins->gpio_4]); +} + +/** + * i40e_ptp_set_pins - set PTP pins in HW + * @pf: Board private structure + * @pins: PTP pins to be applied + * + * Validate and set PTP pins in HW for specific PF. + * Return 0 on success or negative value on error. + **/ +static int i40e_ptp_set_pins(struct i40e_pf *pf, + struct i40e_ptp_pins_settings *pins) +{ + enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf); + int i = 0; + + if (pin_caps == CANT_DO_PINS) + return -EOPNOTSUPP; + else if (pin_caps == CAN_DO_PINS) + return 0; + + if (pins->sdp3_2 == invalid) + pins->sdp3_2 = pf->ptp_pins->sdp3_2; + if (pins->sdp3_3 == invalid) + pins->sdp3_3 = pf->ptp_pins->sdp3_3; + if (pins->gpio_4 == invalid) + pins->gpio_4 = pf->ptp_pins->gpio_4; + while (i40e_ptp_pin_led_allowed_states[i].sdp3_2 != end) { + if (pins->sdp3_2 == i40e_ptp_pin_led_allowed_states[i].sdp3_2 && + pins->sdp3_3 == i40e_ptp_pin_led_allowed_states[i].sdp3_3 && + pins->gpio_4 == i40e_ptp_pin_led_allowed_states[i].gpio_4) { + pins->led2_0 = + i40e_ptp_pin_led_allowed_states[i].led2_0; + pins->led2_1 = + i40e_ptp_pin_led_allowed_states[i].led2_1; + pins->led3_0 = + i40e_ptp_pin_led_allowed_states[i].led3_0; + pins->led3_1 = + i40e_ptp_pin_led_allowed_states[i].led3_1; + break; + } + i++; + } + if (i40e_ptp_pin_led_allowed_states[i].sdp3_2 == end) { + dev_warn(&pf->pdev->dev, + "Unsupported PTP pin configuration: SDP3_2: %s, SDP3_3: %s, GPIO_4: %s.\n", + i40e_ptp_gpio_pin_state2str[pins->sdp3_2], + i40e_ptp_gpio_pin_state2str[pins->sdp3_3], + i40e_ptp_gpio_pin_state2str[pins->gpio_4]); + + return -EPERM; + } + memcpy(pf->ptp_pins, pins, sizeof(*pins)); + i40e_ptp_set_pins_hw(pf); + i40_ptp_reset_timing_events(pf); + + return 0; +} + +/** + * i40e_ptp_alloc_pins - allocate PTP pins structure + * @pf: Board private structure + * + * allocate PTP pins structure + **/ +int i40e_ptp_alloc_pins(struct i40e_pf *pf) +{ + if (!i40e_is_ptp_pin_dev(&pf->hw)) + return 0; + + pf->ptp_pins = + kzalloc(sizeof(struct i40e_ptp_pins_settings), GFP_KERNEL); + + if (!pf->ptp_pins) { + dev_warn(&pf->pdev->dev, "Cannot allocate memory for PTP pins structure.\n"); + return -I40E_ERR_NO_MEMORY; + } + + pf->ptp_pins->sdp3_2 = off; + pf->ptp_pins->sdp3_3 = off; + pf->ptp_pins->gpio_4 = off; + pf->ptp_pins->led2_0 = high; + pf->ptp_pins->led2_1 = high; + pf->ptp_pins->led3_0 = high; + pf->ptp_pins->led3_1 = high; + + /* Use PF0 to set pins in HW. Return success for user space tools */ + if (pf->hw.pf_id) + return 0; + + i40e_ptp_init_leds_hw(&pf->hw); + i40e_ptp_set_pins_hw(pf); + + return 0; +} + +/** * i40e_ptp_set_timestamp_mode - setup hardware for requested timestamp mode * @pf: Board private structure * @config: hwtstamp settings requested or saved @@ -545,6 +1190,21 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf, struct i40e_hw *hw = &pf->hw; u32 tsyntype, regval; + /* Selects external trigger to cause event */ + regval = rd32(hw, I40E_PRTTSYN_AUX_0(0)); + /* Bit 17:16 is EVNTLVL, 01B rising edge */ + regval &= 0; + regval |= (1 << I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT); + /* regval: 0001 0000 0000 0000 0000 */ + wr32(hw, I40E_PRTTSYN_AUX_0(0), regval); + + /* Enabel interrupts */ + regval = rd32(hw, I40E_PRTTSYN_CTL0); + regval |= 1 << I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT; + wr32(hw, I40E_PRTTSYN_CTL0, regval); + + INIT_WORK(&pf->ptp_extts0_work, i40e_ptp_extts0_work); + /* Reserved for future extensions. */ if (config->flags) return -EINVAL; @@ -688,6 +1348,45 @@ int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr) } /** + * i40e_init_pin_config - initialize pins. + * @pf: private board structure + * + * Initialize pins for external clock source. + * Return 0 on success or error code on failure. + **/ +static int i40e_init_pin_config(struct i40e_pf *pf) +{ + int i; + + pf->ptp_caps.n_pins = 3; + pf->ptp_caps.n_ext_ts = 2; + pf->ptp_caps.pps = 1; + pf->ptp_caps.n_per_out = 2; + + pf->ptp_caps.pin_config = kcalloc(pf->ptp_caps.n_pins, + sizeof(*pf->ptp_caps.pin_config), + GFP_KERNEL); + if (!pf->ptp_caps.pin_config) + return -ENOMEM; + + for (i = 0; i < pf->ptp_caps.n_pins; i++) { + snprintf(pf->ptp_caps.pin_config[i].name, + sizeof(pf->ptp_caps.pin_config[i].name), + "%s", sdp_desc[i].name); + pf->ptp_caps.pin_config[i].index = sdp_desc[i].index; + pf->ptp_caps.pin_config[i].func = PTP_PF_NONE; + pf->ptp_caps.pin_config[i].chan = sdp_desc[i].chan; + } + + pf->ptp_caps.verify = i40e_ptp_verify; + pf->ptp_caps.enable = i40e_ptp_feature_enable; + + pf->ptp_caps.pps = 1; + + return 0; +} + +/** * i40e_ptp_create_clock - Create PTP clock device for userspace * @pf: Board private structure * @@ -707,13 +1406,16 @@ static long i40e_ptp_create_clock(struct i40e_pf *pf) sizeof(pf->ptp_caps.name) - 1); pf->ptp_caps.owner = THIS_MODULE; pf->ptp_caps.max_adj = 999999999; - pf->ptp_caps.n_ext_ts = 0; - pf->ptp_caps.pps = 0; pf->ptp_caps.adjfreq = i40e_ptp_adjfreq; pf->ptp_caps.adjtime = i40e_ptp_adjtime; pf->ptp_caps.gettimex64 = i40e_ptp_gettimex; pf->ptp_caps.settime64 = i40e_ptp_settime; - pf->ptp_caps.enable = i40e_ptp_feature_enable; + if (i40e_is_ptp_pin_dev(&pf->hw)) { + int err = i40e_init_pin_config(pf); + + if (err) + return err; + } /* Attempt to register the clock before enabling the hardware. */ pf->ptp_clock = ptp_clock_register(&pf->ptp_caps, &pf->pdev->dev); @@ -843,6 +1545,8 @@ void i40e_ptp_init(struct i40e_pf *pf) /* Restore the clock time based on last known value */ i40e_ptp_restore_hw_time(pf); } + + i40e_ptp_set_1pps_signal_hw(pf); } /** @@ -854,6 +1558,9 @@ void i40e_ptp_init(struct i40e_pf *pf) **/ void i40e_ptp_stop(struct i40e_pf *pf) { + struct i40e_hw *hw = &pf->hw; + u32 regval; + pf->flags &= ~I40E_FLAG_PTP; pf->ptp_tx = false; pf->ptp_rx = false; @@ -872,4 +1579,21 @@ void i40e_ptp_stop(struct i40e_pf *pf) dev_info(&pf->pdev->dev, "%s: removed PHC on %s\n", __func__, pf->vsi[pf->lan_vsi]->netdev->name); } + + if (i40e_is_ptp_pin_dev(&pf->hw)) { + i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, off); + i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, off); + i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, off); + } + + regval = rd32(hw, I40E_PRTTSYN_AUX_0(0)); + regval &= ~I40E_PRTTSYN_AUX_0_PTPFLAG_MASK; + wr32(hw, I40E_PRTTSYN_AUX_0(0), regval); + + /* Disable interrupts */ + regval = rd32(hw, I40E_PRTTSYN_CTL0); + regval &= ~I40E_PRTTSYN_CTL0_EVENT_INT_ENA_MASK; + wr32(hw, I40E_PRTTSYN_CTL0, regval); + + i40e_ptp_free_pins(pf); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 36f7b27a04ae..8d0588a27a05 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -182,11 +182,20 @@ #define I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK I40E_MASK(0x3, I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT) #define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT 3 #define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT) +#define I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT 4 +#define I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT 5 +#define I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT 6 #define I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT 7 #define I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK I40E_MASK(0x7, I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT) #define I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT 11 #define I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT 12 #define I40E_GLGEN_GPIO_CTL_LED_MODE_MASK I40E_MASK(0x1F, I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) +#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT 19 +#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT) +#define I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT 20 +#define I40E_GLGEN_GPIO_SET 0x00088184 /* Reset: POR */ +#define I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT 5 +#define I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT 6 #define I40E_GLGEN_MDIO_I2C_SEL(_i) (0x000881C0 + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */ #define I40E_GLGEN_MSCA(_i) (0x0008818C + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */ #define I40E_GLGEN_MSCA_MDIADD_SHIFT 0 @@ -540,6 +549,7 @@ #define I40E_PF_PCI_CIAA_VF_NUM_SHIFT 12 #define I40E_PF_PCI_CIAD 0x0009C100 /* Reset: FLR */ #define I40E_PRTPM_EEE_STAT 0x001E4320 /* Reset: GLOBR */ +#define I40E_PFPCI_SUBSYSID 0x000BE100 /* Reset: PCIR */ #define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT 30 #define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK I40E_MASK(0x1, I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT) #define I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT 31 @@ -742,6 +752,8 @@ #define I40E_PRTTSYN_CTL0 0x001E4200 /* Reset: GLOBR */ #define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT 1 #define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT) +#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT 2 +#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT) #define I40E_PRTTSYN_CTL0_PF_ID_SHIFT 8 #define I40E_PRTTSYN_CTL0_PF_ID_MASK I40E_MASK(0xF, I40E_PRTTSYN_CTL0_PF_ID_SHIFT) #define I40E_PRTTSYN_CTL0_TSYNENA_SHIFT 31 @@ -760,7 +772,10 @@ #define I40E_PRTTSYN_INC_L 0x001E4040 /* Reset: GLOBR */ #define I40E_PRTTSYN_RXTIME_H(_i) (0x00085040 + ((_i) * 32)) /* _i=0...3 */ /* Reset: CORER */ #define I40E_PRTTSYN_RXTIME_L(_i) (0x000850C0 + ((_i) * 32)) /* _i=0...3 */ /* Reset: CORER */ +#define I40E_PRTTSYN_RXTIME_L_MAX_INDEX 3 #define I40E_PRTTSYN_STAT_0 0x001E4220 /* Reset: GLOBR */ +#define I40E_PRTTSYN_STAT_0_EVENT0_SHIFT 0 +#define I40E_PRTTSYN_STAT_0_EVENT0_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_EVENT0_SHIFT) #define I40E_PRTTSYN_STAT_0_TXTIME_SHIFT 4 #define I40E_PRTTSYN_STAT_0_TXTIME_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_TXTIME_SHIFT) #define I40E_PRTTSYN_STAT_1 0x00085140 /* Reset: CORER */ @@ -768,6 +783,20 @@ #define I40E_PRTTSYN_TIME_L 0x001E4100 /* Reset: GLOBR */ #define I40E_PRTTSYN_TXTIME_H 0x001E41E0 /* Reset: GLOBR */ #define I40E_PRTTSYN_TXTIME_L 0x001E41C0 /* Reset: GLOBR */ +#define I40E_PRTTSYN_EVNT_H(_i) (0x001E40C0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_EVNT_L(_i) (0x001E4080 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_AUX_0(_i) (0x001E42A0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT 0 +#define I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT 1 +#define I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT 16 +#define I40E_PRTTSYN_AUX_0_PTPFLAG_SHIFT 17 +#define I40E_PRTTSYN_AUX_0_PTPFLAG_MASK I40E_MASK(0x1, I40E_PRTTSYN_AUX_0_PTPFLAG_SHIFT) +#define I40E_PRTTSYN_AUX_1(_i) (0x001E42E0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_AUX_1_INSTNT_SHIFT 0 +#define I40E_PRTTSYN_TGT_H(_i) (0x001E4180 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_TGT_L(_i) (0x001E4140 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_CLKO(_i) (0x001E4240 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */ +#define I40E_PRTTSYN_ADJ 0x001E4280 /* Reset: GLOBR */ #define I40E_GL_MDET_RX 0x0012A510 /* Reset: CORER */ #define I40E_GL_MDET_RX_FUNCTION_SHIFT 0 #define I40E_GL_MDET_RX_FUNCTION_MASK I40E_MASK(0xFF, I40E_GL_MDET_RX_FUNCTION_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index eff0a30790dd..472f56b360b8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1160,12 +1160,12 @@ static int i40e_quiesce_vf_pci(struct i40e_vf *vf) } /** - * i40e_getnum_vf_vsi_vlan_filters + * __i40e_getnum_vf_vsi_vlan_filters * @vsi: pointer to the vsi * * called to get the number of VLANs offloaded on this VF **/ -static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) +static int __i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) { struct i40e_mac_filter *f; u16 num_vlans = 0, bkt; @@ -1179,6 +1179,23 @@ static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) } /** + * i40e_getnum_vf_vsi_vlan_filters + * @vsi: pointer to the vsi + * + * wrapper for __i40e_getnum_vf_vsi_vlan_filters() with spinlock held + **/ +static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) +{ + int num_vlans; + + spin_lock_bh(&vsi->mac_filter_hash_lock); + num_vlans = __i40e_getnum_vf_vsi_vlan_filters(vsi); + spin_unlock_bh(&vsi->mac_filter_hash_lock); + + return num_vlans; +} + +/** * i40e_get_vlan_list_sync * @vsi: pointer to the VSI * @num_vlans: number of VLANs in mac_filter_hash, returned to caller @@ -1195,7 +1212,7 @@ static void i40e_get_vlan_list_sync(struct i40e_vsi *vsi, u16 *num_vlans, int bkt; spin_lock_bh(&vsi->mac_filter_hash_lock); - *num_vlans = i40e_getnum_vf_vsi_vlan_filters(vsi); + *num_vlans = __i40e_getnum_vf_vsi_vlan_filters(vsi); *vlan_list = kcalloc(*num_vlans, sizeof(**vlan_list), GFP_ATOMIC); if (!(*vlan_list)) goto err; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 44bafedd09f2..fa6cf20da911 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -132,6 +132,30 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, } /** + * iavf_lock_timeout - try to set bit but give up after timeout + * @adapter: board private structure + * @bit: bit to set + * @msecs: timeout in msecs + * + * Returns 0 on success, negative on failure + **/ +static int iavf_lock_timeout(struct iavf_adapter *adapter, + enum iavf_critical_section_t bit, + unsigned int msecs) +{ + unsigned int wait, delay = 10; + + for (wait = 0; wait < msecs; wait += delay) { + if (!test_and_set_bit(bit, &adapter->crit_section)) + return 0; + + msleep(delay); + } + + return -1; +} + +/** * iavf_schedule_reset - Set the flags and schedule a reset event * @adapter: board private structure **/ @@ -1988,7 +2012,6 @@ static void iavf_watchdog_task(struct work_struct *work) /* check for hw reset */ reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; if (!reg_val) { - adapter->state = __IAVF_RESETTING; adapter->flags |= IAVF_FLAG_RESET_PENDING; adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; @@ -2102,6 +2125,10 @@ static void iavf_reset_task(struct work_struct *work) if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) return; + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200)) { + schedule_work(&adapter->reset_task); + return; + } while (test_and_set_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section)) usleep_range(500, 1000); @@ -2308,6 +2335,8 @@ static void iavf_adminq_task(struct work_struct *work) if (!event.msg_buf) goto out; + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 200)) + goto freedom; do { ret = iavf_clean_arq_element(hw, &event, &pending); v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); @@ -2321,6 +2350,7 @@ static void iavf_adminq_task(struct work_struct *work) if (pending != 0) memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE); } while (pending); + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); if ((adapter->flags & (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) || @@ -3625,6 +3655,10 @@ static void iavf_init_task(struct work_struct *work) init_task.work); struct iavf_hw *hw = &adapter->hw; + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) { + dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); + return; + } switch (adapter->state) { case __IAVF_STARTUP: if (iavf_startup(adapter) < 0) @@ -3637,14 +3671,14 @@ static void iavf_init_task(struct work_struct *work) case __IAVF_INIT_GET_RESOURCES: if (iavf_init_get_resources(adapter) < 0) goto init_failed; - return; + goto out; default: goto init_failed; } queue_delayed_work(iavf_wq, &adapter->init_task, msecs_to_jiffies(30)); - return; + goto out; init_failed: if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { dev_err(&adapter->pdev->dev, @@ -3653,9 +3687,11 @@ init_failed: iavf_shutdown_adminq(hw); adapter->state = __IAVF_STARTUP; queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5); - return; + goto out; } queue_delayed_work(iavf_wq, &adapter->init_task, HZ); +out: + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); } /** @@ -3672,9 +3708,12 @@ static void iavf_shutdown(struct pci_dev *pdev) if (netif_running(netdev)) iavf_close(netdev); + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) + dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); /* Prevent the watchdog from running. */ adapter->state = __IAVF_REMOVE; adapter->aq_required = 0; + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); #ifdef CONFIG_PM pci_save_state(pdev); @@ -3908,10 +3947,6 @@ static void iavf_remove(struct pci_dev *pdev) err); } - /* Shut down all the garbage mashers on the detention level */ - adapter->state = __IAVF_REMOVE; - adapter->aq_required = 0; - adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; iavf_request_reset(adapter); msleep(50); /* If the FW isn't responding, kick it once, but only once. */ @@ -3919,6 +3954,13 @@ static void iavf_remove(struct pci_dev *pdev) iavf_request_reset(adapter); msleep(50); } + if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) + dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); + + /* Shut down all the garbage mashers on the detention level */ + adapter->state = __IAVF_REMOVE; + adapter->aq_required = 0; + adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; iavf_free_all_tx_resources(adapter); iavf_free_all_rx_resources(adapter); iavf_misc_irq_disable(adapter); diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index e63ee3cca5ea..1277c5c7d099 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -492,6 +492,7 @@ static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) **/ static void igb_i21x_hw_doublecheck(struct e1000_hw *hw) { + int failed_cnt = 3; bool is_failed; int i; @@ -502,9 +503,12 @@ static void igb_i21x_hw_doublecheck(struct e1000_hw *hw) is_failed = true; array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]); wrfl(); - break; } } + if (is_failed && --failed_cnt <= 0) { + hw_dbg("Failed to update MTA_REGISTER, too many retries"); + break; + } } while (is_failed); } diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 636a1b1fb7e1..17f5c003c3df 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2343,8 +2343,7 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) switch (stringset) { case ETH_SS_TEST: - memcpy(data, *igb_gstrings_test, - IGB_TEST_LEN*ETH_GSTRING_LEN); + memcpy(data, igb_gstrings_test, sizeof(igb_gstrings_test)); break; case ETH_SS_STATS: for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 5901ed9fb545..a0ecfe5a4078 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -33,6 +33,8 @@ void igc_ethtool_set_ops(struct net_device *); #define IGC_N_PEROUT 2 #define IGC_N_SDP 4 +#define MAX_FLEX_FILTER 32 + enum igc_mac_filter_type { IGC_MAC_FILTER_TYPE_DST = 0, IGC_MAC_FILTER_TYPE_SRC @@ -476,18 +478,28 @@ struct igc_q_vector { }; enum igc_filter_match_flags { - IGC_FILTER_FLAG_ETHER_TYPE = 0x1, - IGC_FILTER_FLAG_VLAN_TCI = 0x2, - IGC_FILTER_FLAG_SRC_MAC_ADDR = 0x4, - IGC_FILTER_FLAG_DST_MAC_ADDR = 0x8, + IGC_FILTER_FLAG_ETHER_TYPE = BIT(0), + IGC_FILTER_FLAG_VLAN_TCI = BIT(1), + IGC_FILTER_FLAG_SRC_MAC_ADDR = BIT(2), + IGC_FILTER_FLAG_DST_MAC_ADDR = BIT(3), + IGC_FILTER_FLAG_USER_DATA = BIT(4), + IGC_FILTER_FLAG_VLAN_ETYPE = BIT(5), }; struct igc_nfc_filter { u8 match_flags; u16 etype; + __be16 vlan_etype; u16 vlan_tci; u8 src_addr[ETH_ALEN]; u8 dst_addr[ETH_ALEN]; + u8 user_data[8]; + u8 user_mask[8]; + u8 flex_index; + u8 rx_queue; + u8 prio; + u8 immediate_irq; + u8 drop; }; struct igc_nfc_rule { @@ -495,12 +507,24 @@ struct igc_nfc_rule { struct igc_nfc_filter filter; u32 location; u16 action; + bool flex; }; -/* IGC supports a total of 32 NFC rules: 16 MAC address based,, 8 VLAN priority - * based, and 8 ethertype based. +/* IGC supports a total of 32 NFC rules: 16 MAC address based, 8 VLAN priority + * based, 8 ethertype based and 32 Flex filter based rules. */ -#define IGC_MAX_RXNFC_RULES 32 +#define IGC_MAX_RXNFC_RULES 64 + +struct igc_flex_filter { + u8 index; + u8 data[128]; + u8 mask[16]; + u8 length; + u8 rx_queue; + u8 prio; + u8 immediate_irq; + u8 drop; +}; /* igc_desc_unused - calculate if we have unused descriptors */ static inline u16 igc_desc_unused(const struct igc_ring *ring) diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index d0700d48ecf9..84f142f5e472 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -187,15 +187,7 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw) igc_check_for_copper_link(hw); - /* Verify phy id and set remaining function pointers */ - switch (phy->id) { - case I225_I_PHY_ID: - phy->type = igc_phy_i225; - break; - default: - ret_val = -IGC_ERR_PHY; - goto out; - } + phy->type = igc_phy_i225; out: return ret_val; diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index c3a5a5518790..c6315690e20f 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -17,11 +17,22 @@ #define IGC_WUC_PME_EN 0x00000002 /* PME Enable */ /* Wake Up Filter Control */ -#define IGC_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ -#define IGC_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ -#define IGC_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ -#define IGC_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ -#define IGC_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ +#define IGC_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ +#define IGC_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ +#define IGC_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ +#define IGC_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ +#define IGC_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ +#define IGC_WUFC_FLEX_HQ BIT(14) /* Flex Filters Host Queuing */ +#define IGC_WUFC_FLX0 BIT(16) /* Flexible Filter 0 Enable */ +#define IGC_WUFC_FLX1 BIT(17) /* Flexible Filter 1 Enable */ +#define IGC_WUFC_FLX2 BIT(18) /* Flexible Filter 2 Enable */ +#define IGC_WUFC_FLX3 BIT(19) /* Flexible Filter 3 Enable */ +#define IGC_WUFC_FLX4 BIT(20) /* Flexible Filter 4 Enable */ +#define IGC_WUFC_FLX5 BIT(21) /* Flexible Filter 5 Enable */ +#define IGC_WUFC_FLX6 BIT(22) /* Flexible Filter 6 Enable */ +#define IGC_WUFC_FLX7 BIT(23) /* Flexible Filter 7 Enable */ + +#define IGC_WUFC_FILTER_MASK GENMASK(23, 14) #define IGC_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ @@ -46,6 +57,37 @@ /* Wake Up Packet Memory stores the first 128 bytes of the wake up packet */ #define IGC_WUPM_BYTES 128 +/* Wakeup Filter Control Extended */ +#define IGC_WUFC_EXT_FLX8 BIT(8) /* Flexible Filter 8 Enable */ +#define IGC_WUFC_EXT_FLX9 BIT(9) /* Flexible Filter 9 Enable */ +#define IGC_WUFC_EXT_FLX10 BIT(10) /* Flexible Filter 10 Enable */ +#define IGC_WUFC_EXT_FLX11 BIT(11) /* Flexible Filter 11 Enable */ +#define IGC_WUFC_EXT_FLX12 BIT(12) /* Flexible Filter 12 Enable */ +#define IGC_WUFC_EXT_FLX13 BIT(13) /* Flexible Filter 13 Enable */ +#define IGC_WUFC_EXT_FLX14 BIT(14) /* Flexible Filter 14 Enable */ +#define IGC_WUFC_EXT_FLX15 BIT(15) /* Flexible Filter 15 Enable */ +#define IGC_WUFC_EXT_FLX16 BIT(16) /* Flexible Filter 16 Enable */ +#define IGC_WUFC_EXT_FLX17 BIT(17) /* Flexible Filter 17 Enable */ +#define IGC_WUFC_EXT_FLX18 BIT(18) /* Flexible Filter 18 Enable */ +#define IGC_WUFC_EXT_FLX19 BIT(19) /* Flexible Filter 19 Enable */ +#define IGC_WUFC_EXT_FLX20 BIT(20) /* Flexible Filter 20 Enable */ +#define IGC_WUFC_EXT_FLX21 BIT(21) /* Flexible Filter 21 Enable */ +#define IGC_WUFC_EXT_FLX22 BIT(22) /* Flexible Filter 22 Enable */ +#define IGC_WUFC_EXT_FLX23 BIT(23) /* Flexible Filter 23 Enable */ +#define IGC_WUFC_EXT_FLX24 BIT(24) /* Flexible Filter 24 Enable */ +#define IGC_WUFC_EXT_FLX25 BIT(25) /* Flexible Filter 25 Enable */ +#define IGC_WUFC_EXT_FLX26 BIT(26) /* Flexible Filter 26 Enable */ +#define IGC_WUFC_EXT_FLX27 BIT(27) /* Flexible Filter 27 Enable */ +#define IGC_WUFC_EXT_FLX28 BIT(28) /* Flexible Filter 28 Enable */ +#define IGC_WUFC_EXT_FLX29 BIT(29) /* Flexible Filter 29 Enable */ +#define IGC_WUFC_EXT_FLX30 BIT(30) /* Flexible Filter 30 Enable */ +#define IGC_WUFC_EXT_FLX31 BIT(31) /* Flexible Filter 31 Enable */ + +#define IGC_WUFC_EXT_FILTER_MASK GENMASK(31, 8) + +/* Physical Func Reset Done Indication */ +#define IGC_CTRL_EXT_LINK_MODE_MASK 0x00C00000 + /* Loop limit on how long we wait for auto-negotiation to complete */ #define COPPER_LINK_UP_LIMIT 10 #define PHY_AUTO_NEG_LIMIT 45 diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index fa4171860623..d3e84416248e 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -979,6 +979,12 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter, eth_broadcast_addr(fsp->m_u.ether_spec.h_source); } + if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) { + fsp->flow_type |= FLOW_EXT; + memcpy(fsp->h_ext.data, rule->filter.user_data, sizeof(fsp->h_ext.data)); + memcpy(fsp->m_ext.data, rule->filter.user_mask, sizeof(fsp->m_ext.data)); + } + mutex_unlock(&adapter->nfc_rule_lock); return 0; @@ -1215,6 +1221,30 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule, ether_addr_copy(rule->filter.dst_addr, fsp->h_u.ether_spec.h_dest); } + + /* VLAN etype matching */ + if ((fsp->flow_type & FLOW_EXT) && fsp->h_ext.vlan_etype) { + rule->filter.vlan_etype = fsp->h_ext.vlan_etype; + rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_ETYPE; + } + + /* Check for user defined data */ + if ((fsp->flow_type & FLOW_EXT) && + (fsp->h_ext.data[0] || fsp->h_ext.data[1])) { + rule->filter.match_flags |= IGC_FILTER_FLAG_USER_DATA; + memcpy(rule->filter.user_data, fsp->h_ext.data, sizeof(fsp->h_ext.data)); + memcpy(rule->filter.user_mask, fsp->m_ext.data, sizeof(fsp->m_ext.data)); + } + + /* When multiple filter options or user data or vlan etype is set, use a + * flex filter. + */ + if ((rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) || + (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) || + (rule->filter.match_flags & (rule->filter.match_flags - 1))) + rule->flex = true; + else + rule->flex = false; } /** @@ -1244,11 +1274,6 @@ static int igc_ethtool_check_nfc_rule(struct igc_adapter *adapter, return -EINVAL; } - if (flags & (flags - 1)) { - netdev_dbg(dev, "Rule with multiple matches not supported\n"); - return -EOPNOTSUPP; - } - list_for_each_entry(tmp, &adapter->nfc_rule_list, list) { if (!memcmp(&rule->filter, &tmp->filter, sizeof(rule->filter)) && @@ -1280,12 +1305,6 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter, return -EOPNOTSUPP; } - if ((fsp->flow_type & FLOW_EXT) && - fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) { - netdev_dbg(netdev, "VLAN mask not supported\n"); - return -EOPNOTSUPP; - } - if (fsp->ring_cookie >= adapter->num_rx_queues) { netdev_dbg(netdev, "Invalid action\n"); return -EINVAL; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index e29aadbc6744..5c95bf82eaf7 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -3075,11 +3075,320 @@ static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype) etype); } +static int igc_flex_filter_select(struct igc_adapter *adapter, + struct igc_flex_filter *input, + u32 *fhft) +{ + struct igc_hw *hw = &adapter->hw; + u8 fhft_index; + u32 fhftsl; + + if (input->index >= MAX_FLEX_FILTER) { + dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n"); + return -EINVAL; + } + + /* Indirect table select register */ + fhftsl = rd32(IGC_FHFTSL); + fhftsl &= ~IGC_FHFTSL_FTSL_MASK; + switch (input->index) { + case 0 ... 7: + fhftsl |= 0x00; + break; + case 8 ... 15: + fhftsl |= 0x01; + break; + case 16 ... 23: + fhftsl |= 0x02; + break; + case 24 ... 31: + fhftsl |= 0x03; + break; + } + wr32(IGC_FHFTSL, fhftsl); + + /* Normalize index down to host table register */ + fhft_index = input->index % 8; + + *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) : + IGC_FHFT_EXT(fhft_index - 4); + + return 0; +} + +static int igc_write_flex_filter_ll(struct igc_adapter *adapter, + struct igc_flex_filter *input) +{ + struct device *dev = &adapter->pdev->dev; + struct igc_hw *hw = &adapter->hw; + u8 *data = input->data; + u8 *mask = input->mask; + u32 queuing; + u32 fhft; + u32 wufc; + int ret; + int i; + + /* Length has to be aligned to 8. Otherwise the filter will fail. Bail + * out early to avoid surprises later. + */ + if (input->length % 8 != 0) { + dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n"); + return -EINVAL; + } + + /* Select corresponding flex filter register and get base for host table. */ + ret = igc_flex_filter_select(adapter, input, &fhft); + if (ret) + return ret; + + /* When adding a filter globally disable flex filter feature. That is + * recommended within the datasheet. + */ + wufc = rd32(IGC_WUFC); + wufc &= ~IGC_WUFC_FLEX_HQ; + wr32(IGC_WUFC, wufc); + + /* Configure filter */ + queuing = input->length & IGC_FHFT_LENGTH_MASK; + queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK; + queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK; + + if (input->immediate_irq) + queuing |= IGC_FHFT_IMM_INT; + + if (input->drop) + queuing |= IGC_FHFT_DROP; + + wr32(fhft + 0xFC, queuing); + + /* Write data (128 byte) and mask (128 bit) */ + for (i = 0; i < 16; ++i) { + const size_t data_idx = i * 8; + const size_t row_idx = i * 16; + u32 dw0 = + (data[data_idx + 0] << 0) | + (data[data_idx + 1] << 8) | + (data[data_idx + 2] << 16) | + (data[data_idx + 3] << 24); + u32 dw1 = + (data[data_idx + 4] << 0) | + (data[data_idx + 5] << 8) | + (data[data_idx + 6] << 16) | + (data[data_idx + 7] << 24); + u32 tmp; + + /* Write row: dw0, dw1 and mask */ + wr32(fhft + row_idx, dw0); + wr32(fhft + row_idx + 4, dw1); + + /* mask is only valid for MASK(7, 0) */ + tmp = rd32(fhft + row_idx + 8); + tmp &= ~GENMASK(7, 0); + tmp |= mask[i]; + wr32(fhft + row_idx + 8, tmp); + } + + /* Enable filter. */ + wufc |= IGC_WUFC_FLEX_HQ; + if (input->index > 8) { + /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */ + u32 wufc_ext = rd32(IGC_WUFC_EXT); + + wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8)); + + wr32(IGC_WUFC_EXT, wufc_ext); + } else { + wufc |= (IGC_WUFC_FLX0 << input->index); + } + wr32(IGC_WUFC, wufc); + + dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n", + input->index); + + return 0; +} + +static void igc_flex_filter_add_field(struct igc_flex_filter *flex, + const void *src, unsigned int offset, + size_t len, const void *mask) +{ + int i; + + /* data */ + memcpy(&flex->data[offset], src, len); + + /* mask */ + for (i = 0; i < len; ++i) { + const unsigned int idx = i + offset; + const u8 *ptr = mask; + + if (mask) { + if (ptr[i] & 0xff) + flex->mask[idx / 8] |= BIT(idx % 8); + + continue; + } + + flex->mask[idx / 8] |= BIT(idx % 8); + } +} + +static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 wufc, wufc_ext; + int i; + + wufc = rd32(IGC_WUFC); + wufc_ext = rd32(IGC_WUFC_EXT); + + for (i = 0; i < MAX_FLEX_FILTER; i++) { + if (i < 8) { + if (!(wufc & (IGC_WUFC_FLX0 << i))) + return i; + } else { + if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8)))) + return i; + } + } + + return -ENOSPC; +} + +static bool igc_flex_filter_in_use(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 wufc, wufc_ext; + + wufc = rd32(IGC_WUFC); + wufc_ext = rd32(IGC_WUFC_EXT); + + if (wufc & IGC_WUFC_FILTER_MASK) + return true; + + if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK) + return true; + + return false; +} + +static int igc_add_flex_filter(struct igc_adapter *adapter, + struct igc_nfc_rule *rule) +{ + struct igc_flex_filter flex = { }; + struct igc_nfc_filter *filter = &rule->filter; + unsigned int eth_offset, user_offset; + int ret, index; + bool vlan; + + index = igc_find_avail_flex_filter_slot(adapter); + if (index < 0) + return -ENOSPC; + + /* Construct the flex filter: + * -> dest_mac [6] + * -> src_mac [6] + * -> tpid [2] + * -> vlan tci [2] + * -> ether type [2] + * -> user data [8] + * -> = 26 bytes => 32 length + */ + flex.index = index; + flex.length = 32; + flex.rx_queue = rule->action; + + vlan = rule->filter.vlan_tci || rule->filter.vlan_etype; + eth_offset = vlan ? 16 : 12; + user_offset = vlan ? 18 : 14; + + /* Add destination MAC */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) + igc_flex_filter_add_field(&flex, &filter->dst_addr, 0, + ETH_ALEN, NULL); + + /* Add source MAC */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) + igc_flex_filter_add_field(&flex, &filter->src_addr, 6, + ETH_ALEN, NULL); + + /* Add VLAN etype */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) + igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12, + sizeof(filter->vlan_etype), + NULL); + + /* Add VLAN TCI */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) + igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14, + sizeof(filter->vlan_tci), NULL); + + /* Add Ether type */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { + __be16 etype = cpu_to_be16(filter->etype); + + igc_flex_filter_add_field(&flex, &etype, eth_offset, + sizeof(etype), NULL); + } + + /* Add user data */ + if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) + igc_flex_filter_add_field(&flex, &filter->user_data, + user_offset, + sizeof(filter->user_data), + filter->user_mask); + + /* Add it down to the hardware and enable it. */ + ret = igc_write_flex_filter_ll(adapter, &flex); + if (ret) + return ret; + + filter->flex_index = index; + + return 0; +} + +static void igc_del_flex_filter(struct igc_adapter *adapter, + u16 reg_index) +{ + struct igc_hw *hw = &adapter->hw; + u32 wufc; + + /* Just disable the filter. The filter table itself is kept + * intact. Another flex_filter_add() should override the "old" data + * then. + */ + if (reg_index > 8) { + u32 wufc_ext = rd32(IGC_WUFC_EXT); + + wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8)); + wr32(IGC_WUFC_EXT, wufc_ext); + } else { + wufc = rd32(IGC_WUFC); + + wufc &= ~(IGC_WUFC_FLX0 << reg_index); + wr32(IGC_WUFC, wufc); + } + + if (igc_flex_filter_in_use(adapter)) + return; + + /* No filters are in use, we may disable flex filters */ + wufc = rd32(IGC_WUFC); + wufc &= ~IGC_WUFC_FLEX_HQ; + wr32(IGC_WUFC, wufc); +} + static int igc_enable_nfc_rule(struct igc_adapter *adapter, - const struct igc_nfc_rule *rule) + struct igc_nfc_rule *rule) { int err; + if (rule->flex) { + return igc_add_flex_filter(adapter, rule); + } + if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { err = igc_add_etype_filter(adapter, rule->filter.etype, rule->action); @@ -3116,6 +3425,11 @@ static int igc_enable_nfc_rule(struct igc_adapter *adapter, static void igc_disable_nfc_rule(struct igc_adapter *adapter, const struct igc_nfc_rule *rule) { + if (rule->flex) { + igc_del_flex_filter(adapter, rule->filter.flex_index); + return; + } + if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) igc_del_etype_filter(adapter, rule->filter.etype); @@ -4811,6 +5125,7 @@ static irqreturn_t igc_msix_ring(int irq, void *data) */ static int igc_request_msix(struct igc_adapter *adapter) { + unsigned int num_q_vectors = adapter->num_q_vectors; int i = 0, err = 0, vector = 0, free_vector = 0; struct net_device *netdev = adapter->netdev; @@ -4819,7 +5134,13 @@ static int igc_request_msix(struct igc_adapter *adapter) if (err) goto err_out; - for (i = 0; i < adapter->num_q_vectors; i++) { + if (num_q_vectors > MAX_Q_VECTORS) { + num_q_vectors = MAX_Q_VECTORS; + dev_warn(&adapter->pdev->dev, + "The number of queue vectors (%d) is higher than max allowed (%d)\n", + adapter->num_q_vectors, MAX_Q_VECTORS); + } + for (i = 0; i < num_q_vectors; i++) { struct igc_q_vector *q_vector = adapter->q_vector[i]; vector++; @@ -4898,20 +5219,12 @@ bool igc_has_link(struct igc_adapter *adapter) * false until the igc_check_for_link establishes link * for copper adapters ONLY */ - switch (hw->phy.media_type) { - case igc_media_type_copper: - if (!hw->mac.get_link_status) - return true; - hw->mac.ops.check_for_link(hw); - link_active = !hw->mac.get_link_status; - break; - default: - case igc_media_type_unknown: - break; - } + if (!hw->mac.get_link_status) + return true; + hw->mac.ops.check_for_link(hw); + link_active = !hw->mac.get_link_status; - if (hw->mac.type == igc_i225 && - hw->phy.id == I225_I_PHY_ID) { + if (hw->mac.type == igc_i225) { if (!netif_carrier_ok(adapter->netdev)) { adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) { @@ -4999,7 +5312,9 @@ static void igc_watchdog_task(struct work_struct *work) adapter->tx_timeout_factor = 14; break; case SPEED_100: - /* maybe add some timeout factor ? */ + case SPEED_1000: + case SPEED_2500: + adapter->tx_timeout_factor = 7; break; } diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index 83aeb5e7076f..5cad31c3c7b0 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -249,8 +249,7 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw) return ret_val; } - if ((phy->autoneg_mask & ADVERTISE_2500_FULL) && - hw->phy.id == I225_I_PHY_ID) { + if (phy->autoneg_mask & ADVERTISE_2500_FULL) { /* Read the MULTI GBT AN Control Register - reg 7.32 */ ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK << MMD_DEVADDR_SHIFT) | @@ -390,8 +389,7 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw) ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg); - if ((phy->autoneg_mask & ADVERTISE_2500_FULL) && - hw->phy.id == I225_I_PHY_ID) + if (phy->autoneg_mask & ADVERTISE_2500_FULL) ret_val = phy->ops.write_reg(hw, (STANDARD_AN_REG_MASK << MMD_DEVADDR_SHIFT) | diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 0f82990567d9..828c3501c448 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -67,6 +67,9 @@ /* Filtering Registers */ #define IGC_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */ +#define IGC_FHFT(_n) (0x09000 + (256 * (_n))) /* Flexible Host Filter */ +#define IGC_FHFT_EXT(_n) (0x09A00 + (256 * (_n))) /* Flexible Host Filter Extended */ +#define IGC_FHFTSL 0x05804 /* Flex Filter indirect table select */ /* ETQF register bit definitions */ #define IGC_ETQF_FILTER_ENABLE BIT(26) @@ -75,6 +78,19 @@ #define IGC_ETQF_QUEUE_MASK 0x00070000 #define IGC_ETQF_ETYPE_MASK 0x0000FFFF +/* FHFT register bit definitions */ +#define IGC_FHFT_LENGTH_MASK GENMASK(7, 0) +#define IGC_FHFT_QUEUE_SHIFT 8 +#define IGC_FHFT_QUEUE_MASK GENMASK(10, 8) +#define IGC_FHFT_PRIO_SHIFT 16 +#define IGC_FHFT_PRIO_MASK GENMASK(18, 16) +#define IGC_FHFT_IMM_INT BIT(24) +#define IGC_FHFT_DROP BIT(25) + +/* FHFTSL register bit definitions */ +#define IGC_FHFTSL_FTSL_SHIFT 0 +#define IGC_FHFTSL_FTSL_MASK GENMASK(1, 0) + /* Redirection Table - RW Array */ #define IGC_RETA(_i) (0x05C00 + ((_i) * 4)) /* RSS Random Key - RW Array */ @@ -240,6 +256,7 @@ #define IGC_WUFC 0x05808 /* Wakeup Filter Control - RW */ #define IGC_WUS 0x05810 /* Wakeup Status - R/W1C */ #define IGC_WUPL 0x05900 /* Wakeup Packet Length - RW */ +#define IGC_WUFC_EXT 0x0580C /* Wakeup Filter Control Register Extended - RW */ /* Wake Up packet memory */ #define IGC_WUPM_REG(_i) (0x05A00 + ((_i) * 4)) diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 174103c4bea6..4dbbb8a32ce9 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -52,7 +52,7 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) wr32(IGC_ENDQT(i), NSEC_PER_SEC); } - wr32(IGC_QBVCYCLET_S, NSEC_PER_SEC); + wr32(IGC_QBVCYCLET_S, 0); wr32(IGC_QBVCYCLET, NSEC_PER_SEC); adapter->flags &= ~IGC_FLAG_TSN_QBV_ENABLED; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 76a7777c746d..975a1a77d445 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2666,7 +2666,7 @@ static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev, return 0; if (skb_headlen(skb) < (skb_transport_offset(skb) + tcp_hdrlen(skb))) { - pr_info("*** Is this even possible???!?!?\n"); + pr_info("*** Is this even possible?\n"); return 0; } @@ -3832,12 +3832,20 @@ static void mvneta_validate(struct phylink_config *config, struct mvneta_port *pp = netdev_priv(ndev); __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; - /* We only support QSGMII, SGMII, 802.3z and RGMII modes */ - if (state->interface != PHY_INTERFACE_MODE_NA && - state->interface != PHY_INTERFACE_MODE_QSGMII && - state->interface != PHY_INTERFACE_MODE_SGMII && - !phy_interface_mode_is_8023z(state->interface) && - !phy_interface_mode_is_rgmii(state->interface)) { + /* We only support QSGMII, SGMII, 802.3z and RGMII modes. + * When in 802.3z mode, we must have AN enabled: + * "Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ... + * When <PortType> = 1 (1000BASE-X) this field must be set to 1." + */ + if (phy_interface_mode_is_8023z(state->interface)) { + if (!phylink_test(state->advertising, Autoneg)) { + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + return; + } + } else if (state->interface != PHY_INTERFACE_MODE_NA && + state->interface != PHY_INTERFACE_MODE_QSGMII && + state->interface != PHY_INTERFACE_MODE_SGMII && + !phy_interface_mode_is_rgmii(state->interface)) { bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); return; } diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 3229bafa2a2c..878fb17dea41 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -6269,6 +6269,15 @@ static void mvpp2_phylink_validate(struct phylink_config *config, if (!mvpp2_port_supports_rgmii(port)) goto empty_set; break; + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + /* When in 802.3z mode, we must have AN enabled: + * Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ... + * When <PortType> = 1 (1000BASE-X) this field must be set to 1. + */ + if (!phylink_test(state->advertising, Autoneg)) + goto empty_set; + break; default: break; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h index 64aa7d350df1..6af97ce69443 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h @@ -14,6 +14,8 @@ #include <linux/tracepoint.h> #include <linux/pci.h> +#include "mbox.h" + TRACE_EVENT(otx2_msg_alloc, TP_PROTO(const struct pci_dev *pdev, u16 id, u64 size), TP_ARGS(pdev, id, size), @@ -25,8 +27,8 @@ TRACE_EVENT(otx2_msg_alloc, __entry->id = id; __entry->size = size; ), - TP_printk("[%s] msg:(0x%x) size:%lld\n", __get_str(dev), - __entry->id, __entry->size) + TP_printk("[%s] msg:(%s) size:%lld\n", __get_str(dev), + otx2_mbox_id2name(__entry->id), __entry->size) ); TRACE_EVENT(otx2_msg_send, @@ -88,8 +90,8 @@ TRACE_EVENT(otx2_msg_process, __entry->id = id; __entry->err = err; ), - TP_printk("[%s] msg:(0x%x) error:%d\n", __get_str(dev), - __entry->id, __entry->err) + TP_printk("[%s] msg:(%s) error:%d\n", __get_str(dev), + otx2_mbox_id2name(__entry->id), __entry->err) ); #endif /* __RVU_TRACE_H */ diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 226f4ff29f6e..7c569c1abefc 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -746,7 +746,8 @@ static int prestera_netdev_port_event(struct net_device *lower, case NETDEV_CHANGEUPPER: if (netif_is_bridge_master(upper)) { if (info->linking) - return prestera_bridge_port_join(upper, port); + return prestera_bridge_port_join(upper, port, + extack); else prestera_bridge_port_leave(upper, port); } else if (netif_is_lag_master(upper)) { diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index 0b3e8f2db294..be01ec8284e6 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -480,7 +480,8 @@ err_port_flood_set: } int prestera_bridge_port_join(struct net_device *br_dev, - struct prestera_port *port) + struct prestera_port *port, + struct netlink_ext_ack *extack) { struct prestera_switchdev *swdev = port->sw->swdev; struct prestera_bridge_port *br_port; @@ -500,6 +501,11 @@ int prestera_bridge_port_join(struct net_device *br_dev, goto err_brport_create; } + err = switchdev_bridge_port_offload(br_port->dev, port->dev, NULL, + NULL, NULL, false, extack); + if (err) + goto err_switchdev_offload; + if (bridge->vlan_enabled) return 0; @@ -510,6 +516,8 @@ int prestera_bridge_port_join(struct net_device *br_dev, return 0; err_port_join: + switchdev_bridge_port_unoffload(br_port->dev, NULL, NULL, NULL); +err_switchdev_offload: prestera_bridge_port_put(br_port); err_brport_create: prestera_bridge_put(bridge); @@ -584,6 +592,8 @@ void prestera_bridge_port_leave(struct net_device *br_dev, else prestera_bridge_1d_port_leave(br_port); + switchdev_bridge_port_unoffload(br_port->dev, NULL, NULL, NULL); + prestera_hw_port_learning_set(port, false); prestera_hw_port_flood_set(port, BR_FLOOD | BR_MCAST_FLOOD, 0); prestera_port_vid_stp_set(port, PRESTERA_VID_ALL, BR_STATE_FORWARDING); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h index a91bc35d235f..0e93fda3d9a5 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h @@ -8,7 +8,8 @@ int prestera_switchdev_init(struct prestera_switch *sw); void prestera_switchdev_fini(struct prestera_switch *sw); int prestera_bridge_port_join(struct net_device *br_dev, - struct prestera_port *port); + struct prestera_port *port, + struct netlink_ext_ack *extack); void prestera_bridge_port_leave(struct net_device *br_dev, struct prestera_port *port); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index b5072a3a2585..6378dc815df7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -27,7 +27,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ - en/qos.o en/trap.o en/fs_tt_redirect.o + en/qos.o en/trap.o en/fs_tt_redirect.o en/rqt.o en/tir.o \ + en/rx_res.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b1b51bbba054..35668986878a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -58,6 +58,7 @@ #include "en/qos.h" #include "lib/hv_vhca.h" #include "lib/clock.h" +#include "en/rx_res.h" extern const struct net_device_ops mlx5e_netdev_ops; struct page_pool; @@ -126,7 +127,6 @@ struct page_pool; #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 -#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_DEFAULT_LRO_TIMEOUT 32 #define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 @@ -139,10 +139,7 @@ struct page_pool; #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 -#define MLX5E_LOG_INDIR_RQT_SIZE 0x8 -#define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) #define MLX5E_MIN_NUM_CHANNELS 0x1 -#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2) #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_TX_XSK_POLL_BUDGET 64 @@ -745,29 +742,11 @@ enum { MLX5E_STATE_XDP_ACTIVE, }; -struct mlx5e_rqt { - u32 rqtn; - bool enabled; -}; - -struct mlx5e_tir { - u32 tirn; - struct mlx5e_rqt rqt; - struct list_head list; -}; - enum { MLX5E_TC_PRIO = 0, MLX5E_NIC_PRIO }; -struct mlx5e_rss_params { - u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; - u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; - u8 toeplitz_hash_key[40]; - u8 hfunc; -}; - struct mlx5e_modify_sq_param { int curr_state; int next_state; @@ -837,13 +816,7 @@ struct mlx5e_priv { struct mlx5e_channels channels; u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC]; - struct mlx5e_rqt indir_rqt; - struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; - struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS]; - struct mlx5e_tir ptp_tir; - struct mlx5e_rss_params rss_params; + struct mlx5e_rx_res *rx_res; u32 tx_rates[MLX5E_MAX_NUM_SQS]; struct mlx5e_flow_steering fs; @@ -948,24 +921,7 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); void mlx5e_timestamp_init(struct mlx5e_priv *priv); -struct mlx5e_redirect_rqt_param { - bool is_rss; - union { - u32 rqn; /* Direct RQN (Non-RSS) */ - struct { - u8 hfunc; - struct mlx5e_channels *channels; - } rss; /* RSS data */ - }; -}; - -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, - struct mlx5e_redirect_rqt_param rrp); -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, - const struct mlx5e_tirc_config *ttconfig, - void *tirc, bool inner); -void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in); -struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt); +int mlx5e_modify_tirs_hash(struct mlx5e_priv *priv); struct mlx5e_xsk_param; @@ -1065,10 +1021,6 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) extern const struct ethtool_ops mlx5e_ethtool_ops; -int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, - u32 *in); -void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, - struct mlx5e_tir *tir); int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, @@ -1089,11 +1041,10 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv); -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); -void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); @@ -1106,7 +1057,6 @@ int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); void mlx5e_queue_update_stats(struct mlx5e_priv *priv); -int mlx5e_bits_invert(unsigned long a, int size); int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv); int mlx5e_set_dev_port_mtu_ctx(struct mlx5e_priv *priv, void *context); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 1d5ce07b83f4..0e053aab12b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -83,12 +83,6 @@ enum mlx5e_traffic_types { MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, }; -struct mlx5e_tirc_config { - u8 l3_prot_type; - u8 l4_prot_type; - u32 rx_hash_fields; -}; - #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) #define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ @@ -160,6 +154,8 @@ enum { MLX5E_INNER_TTC_GROUP2_SIZE +\ MLX5E_INNER_TTC_GROUP3_SIZE) +struct mlx5e_priv; + #ifdef CONFIG_MLX5_EN_RXNFC struct mlx5e_ethtool_table { @@ -248,18 +244,12 @@ struct ttc_params { void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params); void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params); -void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params); int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, struct mlx5e_ttc_table *ttc); void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, struct mlx5e_ttc_table *ttc); -int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc); -void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc); - void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type, struct mlx5_flow_destination *new_dest); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 150c8e82c738..fc602d85ca48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -167,6 +167,18 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0; } +struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params) +{ + struct mlx5e_lro_param lro_param; + + lro_param = (struct mlx5e_lro_param) { + .enabled = params->lro_en, + .timeout = params->lro_timeout, + }; + + return lro_param; +} + u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index e9593f5f0661..879ad46d754e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -11,6 +11,11 @@ struct mlx5e_xsk_param { u16 chunk_size; }; +struct mlx5e_lro_param { + bool enabled; + u32 timeout; +}; + struct mlx5e_cq_param { u32 cqc[MLX5_ST_SZ_DW(cqc)]; struct mlx5_wq_param wq; @@ -120,6 +125,7 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); +struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params); /* Build queue parameters */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 778e229310a9..849ee3e147c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -603,8 +603,8 @@ static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv) static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) { struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; + u32 tirn = priv->rx_res->ptp.tir.tirn; struct mlx5_flow_handle *rule; - u32 tirn = priv->ptp_tir.tirn; int err; if (ptp_fs->valid) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c new file mode 100644 index 000000000000..38d0e9dbd6bd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "rqt.h" +#include <linux/mlx5/transobj.h> + +static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + u16 max_size, u32 *init_rqns, u16 init_size) +{ + void *rqtc; + int inlen; + int err; + u32 *in; + int i; + + rqt->mdev = mdev; + rqt->size = max_size; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * init_size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_max_size, rqt->size); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, init_size); + for (i = 0; i < init_size; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], init_rqns[i]); + + err = mlx5_core_create_rqt(rqt->mdev, in, inlen, &rqt->rqtn); + + kvfree(in); + return err; +} + +int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + bool indir_enabled, u32 init_rqn) +{ + u16 max_size = indir_enabled ? MLX5E_INDIR_RQT_SIZE : 1; + + return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, 1); +} + +static int mlx5e_bits_invert(unsigned long a, int size) +{ + int inv = 0; + int i; + + for (i = 0; i < size; i++) + inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; + + return inv; +} + +static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir) +{ + unsigned int i; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) { + unsigned int ix = i; + + if (hfunc == ETH_RSS_HASH_XOR) + ix = mlx5e_bits_invert(ix, ilog2(MLX5E_INDIR_RQT_SIZE)); + + ix = indir->table[ix]; + + if (WARN_ON(ix >= num_rqns)) + /* Could be a bug in the driver or in the kernel part of + * ethtool: indir table refers to non-existent RQs. + */ + return -EINVAL; + rss_rqns[i] = rqns[ix]; + } + + return 0; +} + +int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir) +{ + u32 *rss_rqns; + int err; + + rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL); + if (!rss_rqns) + return -ENOMEM; + + err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir); + if (err) + goto out; + + err = mlx5e_rqt_init(rqt, mdev, MLX5E_INDIR_RQT_SIZE, rss_rqns, MLX5E_INDIR_RQT_SIZE); + +out: + kvfree(rss_rqns); + return err; +} + +void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt) +{ + mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn); +} + +static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int size) +{ + unsigned int i; + void *rqtc; + int inlen; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); + + MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); + MLX5_SET(rqtc, rqtc, rqt_actual_size, size); + for (i = 0; i < size; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], rqns[i]); + + err = mlx5_core_modify_rqt(rqt->mdev, rqt->rqtn, in, inlen); + + kvfree(in); + return err; +} + +int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn) +{ + return mlx5e_rqt_redirect(rqt, &rqn, 1); +} + +int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir) +{ + u32 *rss_rqns; + int err; + + if (WARN_ON(rqt->size != MLX5E_INDIR_RQT_SIZE)) + return -EINVAL; + + rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL); + if (!rss_rqns) + return -ENOMEM; + + err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir); + if (err) + goto out; + + err = mlx5e_rqt_redirect(rqt, rss_rqns, MLX5E_INDIR_RQT_SIZE); + +out: + kvfree(rss_rqns); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h new file mode 100644 index 000000000000..d2c76649efb0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_RQT_H__ +#define __MLX5_EN_RQT_H__ + +#include <linux/kernel.h> + +#define MLX5E_INDIR_RQT_SIZE (1 << 8) + +struct mlx5_core_dev; + +struct mlx5e_rss_params_indir { + u32 table[MLX5E_INDIR_RQT_SIZE]; +}; + +struct mlx5e_rqt { + struct mlx5_core_dev *mdev; + u32 rqtn; + u16 size; +}; + +int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + bool indir_enabled, u32 init_rqn); +int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir); +void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt); + +static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt) +{ + return rqt->rqtn; +} + +int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn); +int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir); + +#endif /* __MLX5_EN_RQT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c new file mode 100644 index 000000000000..8fc1dfc4e830 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "rx_res.h" + +static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = { + [MLX5E_TT_IPV4_TCP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV6_TCP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV4_UDP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV6_UDP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV4_IPSEC_AH] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV6_IPSEC_AH] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV4_IPSEC_ESP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV6_IPSEC_ESP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV4] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, + [MLX5E_TT_IPV6] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, +}; + +struct mlx5e_rss_params_traffic_type +mlx5e_rss_get_default_tt_config(enum mlx5e_traffic_types tt) +{ + return rss_default_config[tt]; +} + +struct mlx5e_rss_params_traffic_type +mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traffic_types tt) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + + rss_tt = mlx5e_rss_get_default_tt_config(tt); + rss_tt.rx_hash_fields = res->rss_params.rx_hash_fields[tt]; + return rss_tt; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h new file mode 100644 index 000000000000..068e48140a6f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_RX_RES_H__ +#define __MLX5_EN_RX_RES_H__ + +#include <linux/kernel.h> +#include "rqt.h" +#include "tir.h" +#include "fs.h" + +#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2) + +struct mlx5e_rss_params { + struct mlx5e_rss_params_hash hash; + struct mlx5e_rss_params_indir indir; + u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; +}; + +struct mlx5e_rx_res { + struct mlx5e_rss_params rss_params; + + struct mlx5e_rqt indir_rqt; + struct { + struct mlx5e_tir indir_tir; + struct mlx5e_tir inner_indir_tir; + } rss[MLX5E_NUM_INDIR_TIRS]; + + struct { + struct mlx5e_rqt direct_rqt; + struct mlx5e_tir direct_tir; + struct mlx5e_rqt xsk_rqt; + struct mlx5e_tir xsk_tir; + } channels[MLX5E_MAX_NUM_CHANNELS]; + + struct { + struct mlx5e_rqt rqt; + struct mlx5e_tir tir; + } ptp; +}; + +struct mlx5e_rss_params_traffic_type +mlx5e_rss_get_default_tt_config(enum mlx5e_traffic_types tt); +struct mlx5e_rss_params_traffic_type +mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traffic_types tt); + +#endif /* __MLX5_EN_RX_RES_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c new file mode 100644 index 000000000000..de936dc4bc48 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "tir.h" +#include "params.h" +#include <linux/mlx5/transobj.h> + +#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) + +/* max() doesn't work inside square brackets. */ +#define MLX5E_TIR_CMD_IN_SZ_DW ( \ + MLX5_ST_SZ_DW(create_tir_in) > MLX5_ST_SZ_DW(modify_tir_in) ? \ + MLX5_ST_SZ_DW(create_tir_in) : MLX5_ST_SZ_DW(modify_tir_in) \ +) + +struct mlx5e_tir_builder { + u32 in[MLX5E_TIR_CMD_IN_SZ_DW]; + bool modify; +}; + +struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify) +{ + struct mlx5e_tir_builder *builder; + + builder = kvzalloc(sizeof(*builder), GFP_KERNEL); + builder->modify = modify; + + return builder; +} + +void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder) +{ + kvfree(builder); +} + +void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder) +{ + memset(builder->in, 0, sizeof(builder->in)); +} + +static void *mlx5e_tir_builder_get_tirc(struct mlx5e_tir_builder *builder) +{ + if (builder->modify) + return MLX5_ADDR_OF(modify_tir_in, builder->in, ctx); + return MLX5_ADDR_OF(create_tir_in, builder->in, ctx); +} + +void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, transport_domain, tdn); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE); + MLX5_SET(tirc, tirc, inline_rqn, rqn); +} + +void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn, + u32 rqtn, bool inner_ft_support) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, transport_domain, tdn); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, rqtn); + MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support); +} + +void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder, + const struct mlx5e_lro_param *lro_param) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + const unsigned int rough_max_l2_l3_hdr_sz = 256; + + if (builder->modify) + MLX5_SET(modify_tir_in, builder->in, bitmask.lro, 1); + + if (!lro_param->enabled) + return; + + MLX5_SET(tirc, tirc, lro_enable_mask, + MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | + MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); + MLX5_SET(tirc, tirc, lro_max_ip_payload_size, + (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, lro_param->timeout); +} + +static int mlx5e_hfunc_to_hw(u8 hfunc) +{ + switch (hfunc) { + case ETH_RSS_HASH_TOP: + return MLX5_RX_HASH_FN_TOEPLITZ; + case ETH_RSS_HASH_XOR: + return MLX5_RX_HASH_FN_INVERTED_XOR8; + default: + return MLX5_RX_HASH_FN_NONE; + } +} + +void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder, + const struct mlx5e_rss_params_hash *rss_hash, + const struct mlx5e_rss_params_traffic_type *rss_tt, + bool inner) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + void *hfso; + + if (builder->modify) + MLX5_SET(modify_tir_in, builder->in, bitmask.hash, 1); + + MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_hfunc_to_hw(rss_hash->hfunc)); + if (rss_hash->hfunc == ETH_RSS_HASH_TOP) { + const size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); + void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); + + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + memcpy(rss_key, rss_hash->toeplitz_hash_key, len); + } + + if (inner) + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner); + else + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, rss_tt->l3_prot_type); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, rss_tt->l4_prot_type); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, rss_tt->rx_hash_fields); +} + +void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); +} + +void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, tls_en, 1); + MLX5_SET(tirc, tirc, self_lb_block, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST | + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST); +} + +int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder, + struct mlx5_core_dev *mdev, bool reg) +{ + int err; + + tir->mdev = mdev; + + err = mlx5_core_create_tir(tir->mdev, builder->in, &tir->tirn); + if (err) + return err; + + if (reg) { + struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs; + + mutex_lock(&res->td.list_lock); + list_add(&tir->list, &res->td.tirs_list); + mutex_unlock(&res->td.list_lock); + } else { + INIT_LIST_HEAD(&tir->list); + } + + return 0; +} + +void mlx5e_tir_destroy(struct mlx5e_tir *tir) +{ + struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs; + + /* Skip mutex if list_del is no-op (the TIR wasn't registered in the + * list). list_empty will never return true for an item of tirs_list, + * and READ_ONCE/WRITE_ONCE in list_empty/list_del guarantee consistency + * of the list->next value. + */ + if (!list_empty(&tir->list)) { + mutex_lock(&res->td.list_lock); + list_del(&tir->list); + mutex_unlock(&res->td.list_lock); + } + + mlx5_core_destroy_tir(tir->mdev, tir->tirn); +} + +int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder) +{ + return mlx5_core_modify_tir(tir->mdev, tir->tirn, builder->in); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h new file mode 100644 index 000000000000..e45149a78ed9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_TIR_H__ +#define __MLX5_EN_TIR_H__ + +#include <linux/kernel.h> + +struct mlx5e_rss_params_hash { + u8 hfunc; + u8 toeplitz_hash_key[40]; +}; + +struct mlx5e_rss_params_traffic_type { + u8 l3_prot_type; + u8 l4_prot_type; + u32 rx_hash_fields; +}; + +struct mlx5e_tir_builder; +struct mlx5e_lro_param; + +struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify); +void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder); +void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder); + +void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn); +void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn, + u32 rqtn, bool inner_ft_support); +void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder, + const struct mlx5e_lro_param *lro_param); +void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder, + const struct mlx5e_rss_params_hash *rss_hash, + const struct mlx5e_rss_params_traffic_type *rss_tt, + bool inner); +void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder); +void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder); + +struct mlx5_core_dev; + +struct mlx5e_tir { + struct mlx5_core_dev *mdev; + u32 tirn; + struct list_head list; +}; + +int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder, + struct mlx5_core_dev *mdev, bool reg); +void mlx5e_tir_destroy(struct mlx5e_tir *tir); + +static inline u32 mlx5e_tir_get_tirn(struct mlx5e_tir *tir) +{ + return tir->tirn; +} + +int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder); + +#endif /* __MLX5_EN_TIR_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 86ab4e864fe6..afaf5b413066 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -92,30 +92,19 @@ static void mlx5e_close_trap_rq(struct mlx5e_rq *rq) static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 rqn) { - void *tirc; - int inlen; - u32 *in; + struct mlx5e_tir_builder *builder; int err; - inlen = MLX5_ST_SZ_BYTES(create_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + builder = mlx5e_tir_builder_alloc(false); + if (!builder) return -ENOMEM; - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); - MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE); - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); - MLX5_SET(tirc, tirc, inline_rqn, rqn); - err = mlx5e_create_tir(mdev, tir, in); - kvfree(in); + mlx5e_tir_builder_build_inline(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqn); + err = mlx5e_tir_init(tir, builder, mdev, true); - return err; -} + mlx5e_tir_builder_free(builder); -static void mlx5e_destroy_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir) -{ - mlx5e_destroy_tir(mdev, tir); + return err; } static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev, @@ -173,7 +162,7 @@ err_napi_del: void mlx5e_close_trap(struct mlx5e_trap *trap) { - mlx5e_destroy_trap_direct_rq_tir(trap->mdev, &trap->tir); + mlx5e_tir_destroy(&trap->tir); mlx5e_close_trap_rq(&trap->rq); netif_napi_del(&trap->napi); kvfree(trap); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index a8315f166696..ab485d082729 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -184,28 +184,14 @@ void mlx5e_deactivate_xsk(struct mlx5e_channel *c) /* TX queue is disabled on close. */ } -static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn) -{ - struct mlx5e_redirect_rqt_param direct_rrp = { - .is_rss = false, - { - .rqn = rqn, - }, - }; - - u32 rqtn = priv->xsk_tir[ix].rqt.rqtn; - - return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); -} - int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c) { - return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn); + return mlx5e_rqt_redirect_direct(&priv->rx_res->channels[c->ix].xsk_rqt, c->xskrq.rqn); } int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix) { - return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn); + return mlx5e_rqt_redirect_direct(&priv->rx_res->channels[ix].xsk_rqt, priv->drop_rq.rqn); } int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 4e58fade7a60..bfdbc3060755 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -49,7 +49,7 @@ struct mlx5e_ktls_offload_context_rx { struct mlx5e_rq_stats *rq_stats; struct mlx5e_tls_sw_stats *sw_stats; struct completion add_ctx; - u32 tirn; + struct mlx5e_tir tir; u32 key_id; u32 rxq; DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS); @@ -99,31 +99,22 @@ mlx5e_ktls_rx_resync_create_resp_list(void) return resp_list; } -static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn) +static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 rqtn) { - int err, inlen; - void *tirc; - u32 *in; + struct mlx5e_tir_builder *builder; + int err; - inlen = MLX5_ST_SZ_BYTES(create_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + builder = mlx5e_tir_builder_alloc(false); + if (!builder) return -ENOMEM; - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - - MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); - MLX5_SET(tirc, tirc, indirect_table, rqtn); - MLX5_SET(tirc, tirc, tls_en, 1); - MLX5_SET(tirc, tirc, self_lb_block, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST | - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST); + mlx5e_tir_builder_build_rqt(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqtn, false); + mlx5e_tir_builder_build_direct(builder); + mlx5e_tir_builder_build_tls(builder); + err = mlx5e_tir_init(tir, builder, mdev, false); - err = mlx5_core_create_tir(mdev, in, tirn); + mlx5e_tir_builder_free(builder); - kvfree(in); return err; } @@ -139,7 +130,8 @@ static void accel_rule_handle_work(struct work_struct *work) goto out; rule = mlx5e_accel_fs_add_sk(accel_rule->priv, priv_rx->sk, - priv_rx->tirn, MLX5_FS_DEFAULT_FLOW_TAG); + mlx5e_tir_get_tirn(&priv_rx->tir), + MLX5_FS_DEFAULT_FLOW_TAG); if (!IS_ERR_OR_NULL(rule)) accel_rule->rule = rule; out: @@ -173,8 +165,8 @@ post_static_params(struct mlx5e_icosq *sq, pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi); mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_rx->crypto_info, - priv_rx->tirn, priv_rx->key_id, - priv_rx->resync.seq, false, + mlx5e_tir_get_tirn(&priv_rx->tir), + priv_rx->key_id, priv_rx->resync.seq, false, TLS_OFFLOAD_CTX_DIR_RX); wi = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_UMR_TLS, @@ -202,8 +194,9 @@ post_progress_params(struct mlx5e_icosq *sq, pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); wqe = MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi); - mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn, priv_rx->tirn, false, - next_record_tcp_sn, + mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn, + mlx5e_tir_get_tirn(&priv_rx->tir), + false, next_record_tcp_sn, TLS_OFFLOAD_CTX_DIR_RX); wi = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_SET_PSV_TLS, @@ -325,7 +318,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, psv = &wqe->psv; psv->num_psv = 1 << 4; psv->l_key = sq->channel->mkey_be; - psv->psv_index[0] = cpu_to_be32(priv_rx->tirn); + psv->psv_index[0] = cpu_to_be32(mlx5e_tir_get_tirn(&priv_rx->tir)); psv->va = cpu_to_be64(buf->dma_addr); wi = (struct mlx5e_icosq_wqe_info) { @@ -635,9 +628,9 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, priv_rx->sw_stats = &priv->tls->sw_stats; mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx); - rqtn = priv->direct_tir[rxq].rqt.rqtn; + rqtn = mlx5e_rqt_get_rqtn(&priv->rx_res->channels[rxq].direct_rqt); - err = mlx5e_ktls_create_tir(mdev, &priv_rx->tirn, rqtn); + err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn); if (err) goto err_create_tir; @@ -658,7 +651,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, return 0; err_post_wqes: - mlx5_core_destroy_tir(mdev, priv_rx->tirn); + mlx5e_tir_destroy(&priv_rx->tir); err_create_tir: mlx5_ktls_destroy_key(mdev, priv_rx->key_id); err_create_key: @@ -693,7 +686,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) if (priv_rx->rule.rule) mlx5e_accel_fs_del_sk(priv_rx->rule.rule); - mlx5_core_destroy_tir(mdev, priv_rx->tirn); + mlx5e_tir_destroy(&priv_rx->tir); mlx5_ktls_destroy_key(mdev, priv_rx->key_id); /* priv_rx should normally be freed here, but if there is an outstanding * GET_PSV, deallocation will be delayed until the CQE for GET_PSV is diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 25403af32859..db6c6a96a6c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -192,7 +192,6 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, enum arfs_type type) { struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type]; - struct mlx5e_tir *tir = priv->indir_tir; struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); enum mlx5e_traffic_types tt; @@ -209,7 +208,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, /* FIXME: Must use mlx5e_ttc_get_default_dest(), * but can't since TTC default is not setup yet ! */ - dest.tir_num = tir[tt].tirn; + dest.tir_num = priv->rx_res->rss[tt].indir_tir.tirn; arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL, &flow_act, &dest, 1); @@ -553,7 +552,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, 16); } dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn; + dest.tir_num = priv->rx_res->channels[arfs_rule->rxq].direct_tir.tirn; rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -576,7 +575,7 @@ static void arfs_modify_rule_rq(struct mlx5e_priv *priv, int err = 0; dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst.tir_num = priv->direct_tir[rxq].tirn; + dst.tir_num = priv->rx_res->channels[rxq].direct_tir.tirn; err = mlx5_modify_rule_destination(rule, &dst, NULL); if (err) netdev_warn(priv->netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 8c166ee56d8b..c4db367d4baf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -36,33 +36,6 @@ * Global resources are common to all the netdevices crated on the same nic. */ -int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in) -{ - struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; - int err; - - err = mlx5_core_create_tir(mdev, in, &tir->tirn); - if (err) - return err; - - mutex_lock(&res->td.list_lock); - list_add(&tir->list, &res->td.tirs_list); - mutex_unlock(&res->td.list_lock); - - return 0; -} - -void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, - struct mlx5e_tir *tir) -{ - struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; - - mutex_lock(&res->td.list_lock); - mlx5_core_destroy_tir(mdev, tir->tirn); - list_del(&tir->list); - mutex_unlock(&res->td.list_lock); -} - void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc) { bool ro_pci_enable = pcie_relaxed_ordering_enabled(mdev->pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index bd72572e03d1..9264d18b0964 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1172,7 +1172,7 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev, u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv) { - return sizeof(priv->rss_params.toeplitz_hash_key); + return sizeof(priv->rx_res->rss_params.hash.toeplitz_hash_key); } static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) @@ -1198,18 +1198,18 @@ int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_rss_params *rss = &priv->rss_params; + struct mlx5e_rss_params *rss; + + rss = &priv->rx_res->rss_params; if (indir) - memcpy(indir, rss->indirection_rqt, - sizeof(rss->indirection_rqt)); + memcpy(indir, rss->indir.table, sizeof(rss->indir.table)); if (key) - memcpy(key, rss->toeplitz_hash_key, - sizeof(rss->toeplitz_hash_key)); + memcpy(key, rss->hash.toeplitz_hash_key, sizeof(rss->hash.toeplitz_hash_key)); if (hfunc) - *hfunc = rss->hfunc; + *hfunc = rss->hash.hfunc; return 0; } @@ -1218,63 +1218,57 @@ int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_rss_params *rss = &priv->rss_params; - int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + struct mlx5e_rss_params *rss; bool refresh_tirs = false; bool refresh_rqt = false; - void *in; if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && (hfunc != ETH_RSS_HASH_XOR) && (hfunc != ETH_RSS_HASH_TOP)) return -EINVAL; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - mutex_lock(&priv->state_lock); - if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) { - rss->hfunc = hfunc; + rss = &priv->rx_res->rss_params; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hash.hfunc) { + rss->hash.hfunc = hfunc; refresh_rqt = true; refresh_tirs = true; } if (indir) { - memcpy(rss->indirection_rqt, indir, - sizeof(rss->indirection_rqt)); + memcpy(rss->indir.table, indir, sizeof(rss->indir.table)); refresh_rqt = true; } if (key) { - memcpy(rss->toeplitz_hash_key, key, - sizeof(rss->toeplitz_hash_key)); - refresh_tirs = refresh_tirs || rss->hfunc == ETH_RSS_HASH_TOP; + memcpy(rss->hash.toeplitz_hash_key, key, sizeof(rss->hash.toeplitz_hash_key)); + refresh_tirs = refresh_tirs || rss->hash.hfunc == ETH_RSS_HASH_TOP; } if (refresh_rqt && test_bit(MLX5E_STATE_OPENED, &priv->state)) { - struct mlx5e_redirect_rqt_param rrp = { - .is_rss = true, - { - .rss = { - .hfunc = rss->hfunc, - .channels = &priv->channels, - }, - }, - }; - u32 rqtn = priv->indir_rqt.rqtn; - - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); + u32 *rqns; + + rqns = kvmalloc_array(priv->channels.num, sizeof(*rqns), GFP_KERNEL); + if (rqns) { + unsigned int ix; + + for (ix = 0; ix < priv->channels.num; ix++) + rqns[ix] = priv->channels.c[ix]->rq.rqn; + + mlx5e_rqt_redirect_indir(&priv->rx_res->indir_rqt, rqns, + priv->channels.num, + rss->hash.hfunc, &rss->indir); + kvfree(rqns); + } } if (refresh_tirs) - mlx5e_modify_tirs_hash(priv, in); + mlx5e_modify_tirs_hash(priv); mutex_unlock(&priv->state_lock); - kvfree(in); - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 0b75fab41ae8..e79815763edf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -1320,11 +1320,11 @@ err: void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params) { - ttc_params->any_tt_tirn = priv->direct_tir[0].tirn; + ttc_params->any_tt_tirn = priv->rx_res->channels[0].direct_tir.tirn; ttc_params->inner_ttc = &priv->fs.inner_ttc; } -void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params) +static void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params) { struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; @@ -1343,15 +1343,12 @@ void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params) ft_attr->prio = MLX5E_NIC_PRIO; } -int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc) +static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc) { struct mlx5e_flow_table *ft = &ttc->ft; int err; - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) - return 0; - ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); @@ -1374,12 +1371,9 @@ err: return err; } -void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc) +static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc) { - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) - return; - mlx5e_cleanup_ttc_rules(ttc); mlx5e_destroy_flow_table(&ttc->ft); } @@ -1788,20 +1782,23 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) } mlx5e_set_ttc_basic_params(priv, &ttc_params); - mlx5e_set_inner_ttc_ft_params(&ttc_params); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn; - err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); - if (err) { - netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", - err); - goto err_destroy_arfs_tables; + if (mlx5e_tunnel_inner_ft_supported(priv->mdev)) { + mlx5e_set_inner_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].inner_indir_tir.tirn; + + err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); + if (err) { + netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } } mlx5e_set_ttc_ft_params(&ttc_params); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; + ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].indir_tir.tirn; err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); if (err) { @@ -1839,7 +1836,8 @@ err_destroy_l2_table: err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_inner_ttc_table: - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); + if (mlx5e_tunnel_inner_ft_supported(priv->mdev)) + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -1852,7 +1850,8 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); + if (mlx5e_tunnel_inner_ft_supported(priv->mdev)) + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); mlx5e_arfs_destroy_tables(priv); mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index b416a8ee2eed..494f6f832407 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -421,11 +421,9 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, } else { struct mlx5e_params *params = &priv->channels.params; enum mlx5e_rq_group group; - struct mlx5e_tir *tir; u16 ix; mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group); - tir = group == MLX5E_RQ_GROUP_XSK ? priv->xsk_tir : priv->direct_tir; dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) { @@ -434,7 +432,10 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, } dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst->tir_num = tir[ix].tirn; + if (group == MLX5E_RQ_GROUP_XSK) + dst->tir_num = priv->rx_res->channels[ix].xsk_tir.tirn; + else + dst->tir_num = priv->rx_res->channels[ix].direct_tir.tirn; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } @@ -816,10 +817,8 @@ static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type) static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, struct ethtool_rxnfc *nfc) { - int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); enum mlx5e_traffic_types tt; u8 rx_hash_field = 0; - void *in; tt = flow_type_to_traffic_type(nfc->flow_type); if (tt == MLX5E_NUM_INDIR_TIRS) @@ -848,21 +847,16 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, if (nfc->data & RXH_L4_B_2_3) rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - mutex_lock(&priv->state_lock); - if (rx_hash_field == priv->rss_params.rx_hash_fields[tt]) + if (rx_hash_field == priv->rx_res->rss_params.rx_hash_fields[tt]) goto out; - priv->rss_params.rx_hash_fields[tt] = rx_hash_field; - mlx5e_modify_tirs_hash(priv, in); + priv->rx_res->rss_params.rx_hash_fields[tt] = rx_hash_field; + mlx5e_modify_tirs_hash(priv); out: mutex_unlock(&priv->state_lock); - kvfree(in); return 0; } @@ -876,7 +870,7 @@ static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, if (tt == MLX5E_NUM_INDIR_TIRS) return -EINVAL; - hash_field = priv->rss_params.rx_hash_fields[tt]; + hash_field = priv->rx_res->rss_params.rx_hash_fields[tt]; nfc->data = 0; if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d09e65557e75..b9a0459b58f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2194,61 +2194,25 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) chs->num = 0; } -static int -mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt) -{ - struct mlx5_core_dev *mdev = priv->mdev; - void *rqtc; - int inlen; - int err; - u32 *in; - int i; - - inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - - rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); - - MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); - MLX5_SET(rqtc, rqtc, rqt_max_size, sz); - - for (i = 0; i < sz; i++) - MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn); - - err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn); - if (!err) - rqt->enabled = true; - - kvfree(in); - return err; -} - -void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) -{ - rqt->enabled = false; - mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); -} - int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) { - struct mlx5e_rqt *rqt = &priv->indir_rqt; int err; - err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, rqt); + err = mlx5e_rqt_init_direct(&priv->rx_res->indir_rqt, priv->mdev, true, + priv->drop_rq.rqn); if (err) mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err); return err; } -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) { int err; int ix; - for (ix = 0; ix < n; ix++) { - err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt); + for (ix = 0; ix < priv->max_nch; ix++) { + err = mlx5e_rqt_init_direct(&priv->rx_res->channels[ix].direct_rqt, + priv->mdev, false, priv->drop_rq.rqn); if (unlikely(err)) goto err_destroy_rqts; } @@ -2256,337 +2220,175 @@ int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, in return 0; err_destroy_rqts: - mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err); - for (ix--; ix >= 0; ix--) - mlx5e_destroy_rqt(priv, &tirs[ix].rqt); + mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err); + while (--ix >= 0) + mlx5e_rqt_destroy(&priv->rx_res->channels[ix].direct_rqt); return err; } -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) +static int mlx5e_create_xsk_rqts(struct mlx5e_priv *priv) { - int i; - - for (i = 0; i < n; i++) - mlx5e_destroy_rqt(priv, &tirs[i].rqt); -} + int err; + int ix; -static int mlx5e_rx_hash_fn(int hfunc) -{ - return (hfunc == ETH_RSS_HASH_TOP) ? - MLX5_RX_HASH_FN_TOEPLITZ : - MLX5_RX_HASH_FN_INVERTED_XOR8; -} + for (ix = 0; ix < priv->max_nch; ix++) { + err = mlx5e_rqt_init_direct(&priv->rx_res->channels[ix].xsk_rqt, + priv->mdev, false, priv->drop_rq.rqn); + if (unlikely(err)) + goto err_destroy_rqts; + } -int mlx5e_bits_invert(unsigned long a, int size) -{ - int inv = 0; - int i; + return 0; - for (i = 0; i < size; i++) - inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; +err_destroy_rqts: + mlx5_core_warn(priv->mdev, "create xsk rqts failed, %d\n", err); + while (--ix >= 0) + mlx5e_rqt_destroy(&priv->rx_res->channels[ix].xsk_rqt); - return inv; + return err; } -static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz, - struct mlx5e_redirect_rqt_param rrp, void *rqtc) +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv) { - int i; - - for (i = 0; i < sz; i++) { - u32 rqn; - - if (rrp.is_rss) { - int ix = i; - - if (rrp.rss.hfunc == ETH_RSS_HASH_XOR) - ix = mlx5e_bits_invert(i, ilog2(sz)); + unsigned int ix; - ix = priv->rss_params.indirection_rqt[ix]; - rqn = rrp.rss.channels->c[ix]->rq.rqn; - } else { - rqn = rrp.rqn; - } - MLX5_SET(rqtc, rqtc, rq_num[i], rqn); - } + for (ix = 0; ix < priv->max_nch; ix++) + mlx5e_rqt_destroy(&priv->rx_res->channels[ix].direct_rqt); } -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, - struct mlx5e_redirect_rqt_param rrp) +static void mlx5e_destroy_xsk_rqts(struct mlx5e_priv *priv) { - struct mlx5_core_dev *mdev = priv->mdev; - void *rqtc; - int inlen; - u32 *in; - int err; - - inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - - rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); - - MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); - MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); - mlx5e_fill_rqt_rqns(priv, sz, rrp, rqtc); - err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen); + unsigned int ix; - kvfree(in); - return err; + for (ix = 0; ix < priv->max_nch; ix++) + mlx5e_rqt_destroy(&priv->rx_res->channels[ix].xsk_rqt); } -static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix, - struct mlx5e_redirect_rqt_param rrp) +static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs) { - if (!rrp.is_rss) - return rrp.rqn; - - if (ix >= rrp.rss.channels->num) - return priv->drop_rq.rqn; - - return rrp.rss.channels->c[ix]->rq.rqn; -} + struct mlx5e_rx_res *res = priv->rx_res; + unsigned int ix; + u32 *rqns; -static void mlx5e_redirect_rqts(struct mlx5e_priv *priv, - struct mlx5e_redirect_rqt_param rrp, - struct mlx5e_redirect_rqt_param *ptp_rrp) -{ - u32 rqtn; - int ix; + rqns = kvmalloc_array(chs->num, sizeof(*rqns), GFP_KERNEL); + if (rqns) { + for (ix = 0; ix < chs->num; ix++) + rqns[ix] = chs->c[ix]->rq.rqn; - if (priv->indir_rqt.enabled) { - /* RSS RQ table */ - rqtn = priv->indir_rqt.rqtn; - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); + mlx5e_rqt_redirect_indir(&res->indir_rqt, rqns, chs->num, + res->rss_params.hash.hfunc, + &res->rss_params.indir); + kvfree(rqns); } for (ix = 0; ix < priv->max_nch; ix++) { - struct mlx5e_redirect_rqt_param direct_rrp = { - .is_rss = false, - { - .rqn = mlx5e_get_direct_rqn(priv, ix, rrp) - }, - }; + u32 rqn = priv->drop_rq.rqn; - /* Direct RQ Tables */ - if (!priv->direct_tir[ix].rqt.enabled) - continue; + if (ix < chs->num) + rqn = chs->c[ix]->rq.rqn; - rqtn = priv->direct_tir[ix].rqt.rqtn; - mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); - } - if (ptp_rrp) { - rqtn = priv->ptp_tir.rqt.rqtn; - mlx5e_redirect_rqt(priv, rqtn, 1, *ptp_rrp); + mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn); } -} -static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *chs) -{ - bool rx_ptp_support = priv->profile->rx_ptp_support; - struct mlx5e_redirect_rqt_param *ptp_rrp_p = NULL; - struct mlx5e_redirect_rqt_param rrp = { - .is_rss = true, - { - .rss = { - .channels = chs, - .hfunc = priv->rss_params.hfunc, - } - }, - }; - struct mlx5e_redirect_rqt_param ptp_rrp; + if (priv->profile->rx_ptp_support) { + u32 rqn; - if (rx_ptp_support) { - u32 ptp_rqn; + if (mlx5e_ptp_get_rqn(priv->channels.ptp, &rqn)) + rqn = priv->drop_rq.rqn; - ptp_rrp.is_rss = false; - ptp_rrp.rqn = mlx5e_ptp_get_rqn(priv->channels.ptp, &ptp_rqn) ? - priv->drop_rq.rqn : ptp_rqn; - ptp_rrp_p = &ptp_rrp; + mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn); } - mlx5e_redirect_rqts(priv, rrp, ptp_rrp_p); } static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) { - bool rx_ptp_support = priv->profile->rx_ptp_support; - struct mlx5e_redirect_rqt_param drop_rrp = { - .is_rss = false, - { - .rqn = priv->drop_rq.rqn, - }, - }; - - mlx5e_redirect_rqts(priv, drop_rrp, rx_ptp_support ? &drop_rrp : NULL); -} - -static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = { - [MLX5E_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, - .rx_hash_fields = MLX5_HASH_IP_L4PORTS, - }, - [MLX5E_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, - .rx_hash_fields = MLX5_HASH_IP_L4PORTS, - }, - [MLX5E_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, - .rx_hash_fields = MLX5_HASH_IP_L4PORTS, - }, - [MLX5E_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, - .rx_hash_fields = MLX5_HASH_IP_L4PORTS, - }, - [MLX5E_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, - }, - [MLX5E_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, - }, - [MLX5E_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, - }, - [MLX5E_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, - }, - [MLX5E_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP, - }, - [MLX5E_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, - .l4_prot_type = 0, - .rx_hash_fields = MLX5_HASH_IP, - }, -}; + struct mlx5e_rx_res *res = priv->rx_res; + unsigned int ix; -struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt) -{ - return tirc_default_config[tt]; -} + mlx5e_rqt_redirect_direct(&res->indir_rqt, priv->drop_rq.rqn); -static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) -{ - if (!params->lro_en) - return; + for (ix = 0; ix < priv->max_nch; ix++) + mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, priv->drop_rq.rqn); -#define ROUGH_MAX_L2_L3_HDR_SZ 256 - - MLX5_SET(tirc, tirc, lro_enable_mask, - MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | - MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); - MLX5_SET(tirc, tirc, lro_max_ip_payload_size, - (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); - MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout); + if (priv->profile->rx_ptp_support) + mlx5e_rqt_redirect_direct(&res->ptp.rqt, priv->drop_rq.rqn); } -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, - const struct mlx5e_tirc_config *ttconfig, - void *tirc, bool inner) +int mlx5e_modify_tirs_hash(struct mlx5e_priv *priv) { - void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) : - MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); - - MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(rss_params->hfunc)); - if (rss_params->hfunc == ETH_RSS_HASH_TOP) { - void *rss_key = MLX5_ADDR_OF(tirc, tirc, - rx_hash_toeplitz_key); - size_t len = MLX5_FLD_SZ_BYTES(tirc, - rx_hash_toeplitz_key); - - MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); - memcpy(rss_key, rss_params->toeplitz_hash_key, len); - } - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - ttconfig->l3_prot_type); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - ttconfig->l4_prot_type); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - ttconfig->rx_hash_fields); -} - -static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig, - enum mlx5e_traffic_types tt, - u32 rx_hash_fields) -{ - *ttconfig = tirc_default_config[tt]; - ttconfig->rx_hash_fields = rx_hash_fields; -} - -void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in) -{ - void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); - struct mlx5e_rss_params *rss = &priv->rss_params; - struct mlx5_core_dev *mdev = priv->mdev; - int ctxlen = MLX5_ST_SZ_BYTES(tirc); - struct mlx5e_tirc_config ttconfig; - int tt; + struct mlx5e_rss_params_hash *rss_hash = &priv->rx_res->rss_params.hash; + struct mlx5e_rss_params_traffic_type rss_tt; + struct mlx5e_rx_res *res = priv->rx_res; + struct mlx5e_tir_builder *builder; + enum mlx5e_traffic_types tt; - MLX5_SET(modify_tir_in, in, bitmask.hash, 1); + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - memset(tirc, 0, ctxlen); - mlx5e_update_rx_hash_fields(&ttconfig, tt, - rss->rx_hash_fields[tt]); - mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false); - mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in); + rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); + mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false); + mlx5e_tir_modify(&res->rss[tt].indir_tir, builder); + mlx5e_tir_builder_clear(builder); } /* Verify inner tirs resources allocated */ - if (!priv->inner_indir_tir[0].tirn) - return; + if (!res->rss[0].inner_indir_tir.tirn) + goto out; for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - memset(tirc, 0, ctxlen); - mlx5e_update_rx_hash_fields(&ttconfig, tt, - rss->rx_hash_fields[tt]); - mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true); - mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in); + rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); + mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, true); + mlx5e_tir_modify(&res->rss[tt].indir_tir, builder); + mlx5e_tir_builder_clear(builder); } + +out: + mlx5e_tir_builder_free(builder); + return 0; } static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) { - struct mlx5_core_dev *mdev = priv->mdev; - - void *in; - void *tirc; - int inlen; + struct mlx5e_rx_res *res = priv->rx_res; + struct mlx5e_tir_builder *builder; + struct mlx5e_lro_param lro_param; + enum mlx5e_traffic_types tt; int err; - int tt; int ix; - inlen = MLX5_ST_SZ_BYTES(modify_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + builder = mlx5e_tir_builder_alloc(true); + if (!builder) return -ENOMEM; - MLX5_SET(modify_tir_in, in, bitmask.lro, 1); - tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); - - mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); + lro_param = mlx5e_get_lro_param(&priv->channels.params); + mlx5e_tir_builder_build_lro(builder, &lro_param); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in); + err = mlx5e_tir_modify(&res->rss[tt].indir_tir, builder); if (err) - goto free_in; + goto err_free_builder; + + /* Verify inner tirs resources allocated */ + if (!res->rss[0].inner_indir_tir.tirn) + continue; + + err = mlx5e_tir_modify(&res->rss[tt].inner_indir_tir, builder); + if (err) + goto err_free_builder; } for (ix = 0; ix < priv->max_nch; ix++) { - err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, in); + err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder); if (err) - goto free_in; + goto err_free_builder; } -free_in: - kvfree(in); - +err_free_builder: + mlx5e_tir_builder_free(builder); return err; } @@ -2768,8 +2570,9 @@ int mlx5e_num_channels_changed(struct mlx5e_priv *priv) mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params); - if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, + /* This function may be called on attach, before priv->rx_res is created. */ + if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res) + mlx5e_build_default_indir_rqt(priv->rx_res->rss_params.indir.table, MLX5E_INDIR_RQT_SIZE, count); return 0; @@ -2829,16 +2632,19 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_add_sqs_fwd_rules(priv); mlx5e_wait_channels_min_rx_wqes(&priv->channels); - mlx5e_redirect_rqts_to_channels(priv, &priv->channels); - mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels); + if (priv->rx_res) { + mlx5e_redirect_rqts_to_channels(priv, &priv->channels); + mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels); + } } void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { - mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels); - - mlx5e_redirect_rqts_to_drop(priv); + if (priv->rx_res) { + mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels); + mlx5e_redirect_rqts_to_drop(priv); + } if (mlx5e_is_vport_rep(priv)) mlx5e_remove_sqs_fwd_rules(priv); @@ -3213,159 +3019,192 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) mlx5e_destroy_tises(priv); } -static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv, - u32 rqtn, u32 *tirc) -{ - MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.hw_objs.td.tdn); - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - MLX5_SET(tirc, tirc, indirect_table, rqtn); - MLX5_SET(tirc, tirc, tunneled_offload_en, - priv->channels.params.tunneled_offload_en); - - mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); -} - -static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, - enum mlx5e_traffic_types tt, - u32 *tirc) -{ - mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc); - mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, - &tirc_default_config[tt], tirc, false); -} - -static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) -{ - mlx5e_build_indir_tir_ctx_common(priv, rqtn, tirc); - MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); -} - -static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, - enum mlx5e_traffic_types tt, - u32 *tirc) -{ - mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc); - mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, - &tirc_default_config[tt], tirc, true); -} - int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc) { - struct mlx5e_tir *tir; - void *tirc; - int inlen; - int i = 0; - int err; - u32 *in; - int tt; + struct mlx5e_rss_params_hash *rss_hash = &priv->rx_res->rss_params.hash; + bool inner_ft_support = priv->channels.params.tunneled_offload_en; + struct mlx5e_rss_params_traffic_type rss_tt; + struct mlx5e_rx_res *res = priv->rx_res; + enum mlx5e_traffic_types tt, max_tt; + struct mlx5e_tir_builder *builder; + struct mlx5e_lro_param lro_param; + u32 indir_rqtn; + int err = 0; - inlen = MLX5_ST_SZ_BYTES(create_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + builder = mlx5e_tir_builder_alloc(false); + if (!builder) return -ENOMEM; + lro_param = mlx5e_get_lro_param(&priv->channels.params); + indir_rqtn = mlx5e_rqt_get_rqtn(&res->indir_rqt); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - memset(in, 0, inlen); - tir = &priv->indir_tir[tt]; - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_indir_tir_ctx(priv, tt, tirc); - err = mlx5e_create_tir(priv->mdev, tir, in); + mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn, + indir_rqtn, inner_ft_support); + mlx5e_tir_builder_build_lro(builder, &lro_param); + rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); + mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false); + + err = mlx5e_tir_init(&res->rss[tt].indir_tir, builder, priv->mdev, true); if (err) { mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); - goto err_destroy_inner_tirs; + goto err_destroy_tirs; } + + mlx5e_tir_builder_clear(builder); } if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) goto out; - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { - memset(in, 0, inlen); - tir = &priv->inner_indir_tir[i]; - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_inner_indir_tir_ctx(priv, i, tirc); - err = mlx5e_create_tir(priv->mdev, tir, in); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn, + indir_rqtn, inner_ft_support); + mlx5e_tir_builder_build_lro(builder, &lro_param); + rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); + mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, true); + + err = mlx5e_tir_init(&res->rss[tt].inner_indir_tir, builder, priv->mdev, true); if (err) { mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err); goto err_destroy_inner_tirs; } + + mlx5e_tir_builder_clear(builder); } + goto out; + +err_destroy_inner_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); + + tt = MLX5E_NUM_INDIR_TIRS; +err_destroy_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_tir_destroy(&res->rss[tt].indir_tir); + out: - kvfree(in); + mlx5e_tir_builder_free(builder); - return 0; + return err; +} -err_destroy_inner_tirs: - for (i--; i >= 0; i--) - mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); +static int mlx5e_create_direct_tir(struct mlx5e_priv *priv, struct mlx5e_tir *tir, + struct mlx5e_tir_builder *builder, struct mlx5e_rqt *rqt) +{ + bool inner_ft_support = priv->channels.params.tunneled_offload_en; + struct mlx5e_lro_param lro_param; + int err = 0; - for (tt--; tt >= 0; tt--) - mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); + lro_param = mlx5e_get_lro_param(&priv->channels.params); - kvfree(in); + mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(rqt), inner_ft_support); + mlx5e_tir_builder_build_lro(builder, &lro_param); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(tir, builder, priv->mdev, true); + if (unlikely(err)) + mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err); + + mlx5e_tir_builder_clear(builder); return err; } -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) { - struct mlx5e_tir *tir; - void *tirc; - int inlen; + struct mlx5e_rx_res *res = priv->rx_res; + struct mlx5e_tir_builder *builder; int err = 0; - u32 *in; int ix; - inlen = MLX5_ST_SZ_BYTES(create_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + builder = mlx5e_tir_builder_alloc(false); + if (!builder) return -ENOMEM; - for (ix = 0; ix < n; ix++) { - memset(in, 0, inlen); - tir = &tirs[ix]; - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc); - err = mlx5e_create_tir(priv->mdev, tir, in); - if (unlikely(err)) - goto err_destroy_ch_tirs; + for (ix = 0; ix < priv->max_nch; ix++) { + err = mlx5e_create_direct_tir(priv, &res->channels[ix].direct_tir, + builder, &res->channels[ix].direct_rqt); + if (err) + goto err_destroy_tirs; } goto out; -err_destroy_ch_tirs: - mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err); - for (ix--; ix >= 0; ix--) - mlx5e_destroy_tir(priv->mdev, &tirs[ix]); +err_destroy_tirs: + while (--ix >= 0) + mlx5e_tir_destroy(&res->channels[ix].direct_tir); out: - kvfree(in); + mlx5e_tir_builder_free(builder); + + return err; +} + +static int mlx5e_create_xsk_tirs(struct mlx5e_priv *priv) +{ + struct mlx5e_rx_res *res = priv->rx_res; + struct mlx5e_tir_builder *builder; + int err; + int ix; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + for (ix = 0; ix < priv->max_nch; ix++) { + err = mlx5e_create_direct_tir(priv, &res->channels[ix].xsk_tir, + builder, &res->channels[ix].xsk_rqt); + if (err) + goto err_destroy_tirs; + } + + goto out; + +err_destroy_tirs: + while (--ix >= 0) + mlx5e_tir_destroy(&res->channels[ix].xsk_tir); + +out: + mlx5e_tir_builder_free(builder); return err; } void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) { - int i; + struct mlx5e_rx_res *res = priv->rx_res; + enum mlx5e_traffic_types tt; - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) - mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5e_tir_destroy(&res->rss[tt].indir_tir); /* Verify inner tirs resources allocated */ - if (!priv->inner_indir_tir[0].tirn) + if (!res->rss[0].inner_indir_tir.tirn) return; - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) - mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); } -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) { - int i; + unsigned int ix; + + for (ix = 0; ix < priv->max_nch; ix++) + mlx5e_tir_destroy(&priv->rx_res->channels[ix].direct_tir); +} - for (i = 0; i < n; i++) - mlx5e_destroy_tir(priv->mdev, &tirs[i]); +static void mlx5e_destroy_xsk_tirs(struct mlx5e_priv *priv) +{ + unsigned int ix; + + for (ix = 0; ix < priv->max_nch; ix++) + mlx5e_tir_destroy(&priv->rx_res->channels[ix].xsk_tir); } static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) @@ -4633,19 +4472,18 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, { enum mlx5e_traffic_types tt; - rss_params->hfunc = ETH_RSS_HASH_TOP; - netdev_rss_key_fill(rss_params->toeplitz_hash_key, - sizeof(rss_params->toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(rss_params->indirection_rqt, + rss_params->hash.hfunc = ETH_RSS_HASH_TOP; + netdev_rss_key_fill(rss_params->hash.toeplitz_hash_key, + sizeof(rss_params->hash.toeplitz_hash_key)); + mlx5e_build_default_indir_rqt(rss_params->indir.table, MLX5E_INDIR_RQT_SIZE, num_channels); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) rss_params->rx_hash_fields[tt] = - tirc_default_config[tt].rx_hash_fields; + mlx5e_rss_get_default_tt_config(tt).rx_hash_fields; } void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) { - struct mlx5e_rss_params *rss_params = &priv->rss_params; struct mlx5e_params *params = &priv->channels.params; struct mlx5_core_dev *mdev = priv->mdev; u8 rx_cq_period_mode; @@ -4705,10 +4543,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 /* TX inline */ mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); - /* RSS */ - mlx5e_build_rss_params(rss_params, params->num_channels); - params->tunneled_offload_en = - mlx5e_tunnel_inner_ft_supported(mdev); + params->tunneled_offload_en = mlx5e_tunnel_inner_ft_supported(mdev); /* AF_XDP */ params->xsk = xsk; @@ -4808,7 +4643,14 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX; + /* Tunneled LRO is not supported in the driver, and the same RQs are + * shared between inner and outer TIRs, so the driver can't disable LRO + * for inner TIRs while having it enabled for outer TIRs. Due to this, + * block LRO altogether if the firmware declares tunneled LRO support. + */ if (!!MLX5_CAP_ETH(mdev, lro_cap) && + !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) && + !MLX5_CAP_ETH(mdev, tunnel_lro_gre) && mlx5e_check_fragmented_striding_rq_cap(mdev)) netdev->vlan_features |= NETIF_F_LRO; @@ -4970,9 +4812,15 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - u16 max_nch = priv->max_nch; + struct mlx5e_tir_builder *tir_builder; int err; + priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL); + if (!priv->rx_res) + return -ENOMEM; + + mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels); + mlx5e_create_q_counters(priv); err = mlx5e_open_drop_rq(priv, &priv->drop_rq); @@ -4985,7 +4833,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); + err = mlx5e_create_direct_rqts(priv); if (err) goto err_destroy_indirect_rqts; @@ -4993,23 +4841,31 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); + err = mlx5e_create_direct_tirs(priv); if (err) goto err_destroy_indirect_tirs; - err = mlx5e_create_direct_rqts(priv, priv->xsk_tir, max_nch); + err = mlx5e_create_xsk_rqts(priv); if (unlikely(err)) goto err_destroy_direct_tirs; - err = mlx5e_create_direct_tirs(priv, priv->xsk_tir, max_nch); + err = mlx5e_create_xsk_tirs(priv); if (unlikely(err)) goto err_destroy_xsk_rqts; - err = mlx5e_create_direct_rqts(priv, &priv->ptp_tir, 1); + err = mlx5e_rqt_init_direct(&priv->rx_res->ptp.rqt, priv->mdev, false, + priv->drop_rq.rqn); if (err) goto err_destroy_xsk_tirs; - err = mlx5e_create_direct_tirs(priv, &priv->ptp_tir, 1); + tir_builder = mlx5e_tir_builder_alloc(false); + if (!tir_builder) { + err = -ENOMEM; + goto err_destroy_ptp_rqt; + } + err = mlx5e_create_direct_tir(priv, &priv->rx_res->ptp.tir, tir_builder, + &priv->rx_res->ptp.rqt); + mlx5e_tir_builder_free(tir_builder); if (err) goto err_destroy_ptp_rqt; @@ -5038,45 +4894,47 @@ err_tc_nic_cleanup: err_destroy_flow_steering: mlx5e_destroy_flow_steering(priv); err_destroy_ptp_direct_tir: - mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1); + mlx5e_tir_destroy(&priv->rx_res->ptp.tir); err_destroy_ptp_rqt: - mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1); + mlx5e_rqt_destroy(&priv->rx_res->ptp.rqt); err_destroy_xsk_tirs: - mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch); + mlx5e_destroy_xsk_tirs(priv); err_destroy_xsk_rqts: - mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch); + mlx5e_destroy_xsk_rqts(priv); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); + mlx5e_destroy_direct_tirs(priv); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); + mlx5e_destroy_direct_rqts(priv); err_destroy_indirect_rqts: - mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); err_destroy_q_counters: mlx5e_destroy_q_counters(priv); + kvfree(priv->rx_res); + priv->rx_res = NULL; return err; } static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { - u16 max_nch = priv->max_nch; - mlx5e_accel_cleanup_rx(priv); mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1); - mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1); - mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch); - mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); + mlx5e_tir_destroy(&priv->rx_res->ptp.tir); + mlx5e_rqt_destroy(&priv->rx_res->ptp.rqt); + mlx5e_destroy_xsk_tirs(priv); + mlx5e_destroy_xsk_rqts(priv); + mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); - mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_destroy_direct_rqts(priv); + mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); + kvfree(priv->rx_res); + priv->rx_res = NULL; } static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index bf94bcb6fa5d..2c54951c240d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -585,9 +585,6 @@ static void mlx5e_build_rep_params(struct net_device *netdev) params->tunneled_offload_en = false; mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); - - /* RSS */ - mlx5e_build_rss_params(&priv->rss_params, params->num_channels); } static void mlx5e_build_rep_netdev(struct net_device *netdev, @@ -650,6 +647,7 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5e_rx_res *res = priv->rx_res; struct ttc_params ttc_params = {}; int tt, err; @@ -657,7 +655,7 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) MLX5_FLOW_NAMESPACE_KERNEL); /* The inner_ttc in the ttc params is intentionally not set */ - ttc_params.any_tt_tirn = priv->direct_tir[0].tirn; + ttc_params.any_tt_tirn = res->channels[0].direct_tir.tirn; mlx5e_set_ttc_ft_params(&ttc_params); if (rep->vport != MLX5_VPORT_UPLINK) @@ -665,7 +663,7 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1; for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; + ttc_params.indir_tirn[tt] = res->rss[tt].indir_tir.tirn; err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); if (err) { @@ -760,9 +758,14 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup) static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - u16 max_nch = priv->max_nch; int err; + priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL); + if (!priv->rx_res) + return -ENOMEM; + + mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels); + mlx5e_init_l2_addr(priv); err = mlx5e_open_drop_rq(priv, &priv->drop_rq); @@ -775,7 +778,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); + err = mlx5e_create_direct_rqts(priv); if (err) goto err_destroy_indirect_rqts; @@ -783,7 +786,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); + err = mlx5e_create_direct_tirs(priv); if (err) goto err_destroy_indirect_tirs; @@ -808,31 +811,33 @@ err_destroy_root_ft: err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); + mlx5e_destroy_direct_tirs(priv); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); + mlx5e_destroy_direct_rqts(priv); err_destroy_indirect_rqts: - mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); + kvfree(priv->rx_res); + priv->rx_res = NULL; return err; } static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { - u16 max_nch = priv->max_nch; - mlx5e_ethtool_cleanup_steering(priv); rep_vport_rx_rule_destroy(priv); mlx5e_destroy_rep_root_ft(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); + mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); - mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_destroy_direct_rqts(priv); + mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); + kvfree(priv->rx_res); + priv->rx_res = NULL; } static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 629a61e8022f..0cee2fa76d65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -340,11 +340,11 @@ struct mlx5e_hairpin { struct mlx5_core_dev *func_mdev; struct mlx5e_priv *func_priv; u32 tdn; - u32 tirn; + struct mlx5e_tir direct_tir; int num_channels; struct mlx5e_rqt indir_rqt; - u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_ttc_table ttc; }; @@ -462,126 +462,100 @@ struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) { - u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {}; - void *tirc; + struct mlx5e_tir_builder *builder; int err; + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn); if (err) - goto alloc_tdn_err; - - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); - MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]); - MLX5_SET(tirc, tirc, transport_domain, hp->tdn); + goto out; - err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn); + mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]); + err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false); if (err) goto create_tir_err; - return 0; +out: + mlx5e_tir_builder_free(builder); + return err; create_tir_err: mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); -alloc_tdn_err: - return err; + + goto out; } static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) { - mlx5_core_destroy_tir(hp->func_mdev, hp->tirn); + mlx5e_tir_destroy(&hp->direct_tir); mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); } -static int mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) -{ - struct mlx5e_priv *priv = hp->func_priv; - int i, ix, sz = MLX5E_INDIR_RQT_SIZE; - u32 *indirection_rqt, rqn; - - indirection_rqt = kcalloc(sz, sizeof(*indirection_rqt), GFP_KERNEL); - if (!indirection_rqt) - return -ENOMEM; - - mlx5e_build_default_indir_rqt(indirection_rqt, sz, - hp->num_channels); - - for (i = 0; i < sz; i++) { - ix = i; - if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR) - ix = mlx5e_bits_invert(i, ilog2(sz)); - ix = indirection_rqt[ix]; - rqn = hp->pair->rqn[ix]; - MLX5_SET(rqtc, rqtc, rq_num[i], rqn); - } - - kfree(indirection_rqt); - return 0; -} - static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) { - int inlen, err, sz = MLX5E_INDIR_RQT_SIZE; struct mlx5e_priv *priv = hp->func_priv; struct mlx5_core_dev *mdev = priv->mdev; - void *rqtc; - u32 *in; + struct mlx5e_rss_params_indir *indir; + int err; - inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + indir = kvmalloc(sizeof(*indir), GFP_KERNEL); + if (!indir) return -ENOMEM; - rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); - - MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); - MLX5_SET(rqtc, rqtc, rqt_max_size, sz); - - err = mlx5e_hairpin_fill_rqt_rqns(hp, rqtc); - if (err) - goto out; - - err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn); - if (!err) - hp->indir_rqt.enabled = true; + mlx5e_build_default_indir_rqt(indir->table, MLX5E_INDIR_RQT_SIZE, hp->num_channels); + err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels, + priv->rx_res->rss_params.hash.hfunc, indir); -out: - kvfree(in); + kvfree(indir); return err; } static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) { struct mlx5e_priv *priv = hp->func_priv; - u32 in[MLX5_ST_SZ_DW(create_tir_in)]; - int tt, i, err; - void *tirc; + struct mlx5e_rss_params_hash *rss_hash; + enum mlx5e_traffic_types tt, max_tt; + struct mlx5e_tir_builder *builder; + int err = 0; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + rss_hash = &priv->rx_res->rss_params.hash; for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt); + struct mlx5e_rss_params_traffic_type rss_tt; - memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in)); - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + rss_tt = mlx5e_rss_get_default_tt_config(tt); - MLX5_SET(tirc, tirc, transport_domain, hp->tdn); - MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false); + mlx5e_tir_builder_build_rqt(builder, hp->tdn, + mlx5e_rqt_get_rqtn(&hp->indir_rqt), + false); + mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false); - err = mlx5_core_create_tir(hp->func_mdev, in, - &hp->indir_tirn[tt]); + err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false); if (err) { mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); goto err_destroy_tirs; } + + mlx5e_tir_builder_clear(builder); } - return 0; -err_destroy_tirs: - for (i = 0; i < tt; i++) - mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]); +out: + mlx5e_tir_builder_free(builder); return err; + +err_destroy_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_tir_destroy(&hp->indir_tir[tt]); + + goto out; } static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) @@ -589,7 +563,7 @@ static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) int tt; for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]); + mlx5e_tir_destroy(&hp->indir_tir[tt]); } static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, @@ -600,10 +574,10 @@ static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, memset(ttc_params, 0, sizeof(*ttc_params)); - ttc_params->any_tt_tirn = hp->tirn; + ttc_params->any_tt_tirn = mlx5e_tir_get_tirn(&hp->direct_tir); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params->indir_tirn[tt] = hp->indir_tirn[tt]; + ttc_params->indir_tirn[tt] = mlx5e_tir_get_tirn(&hp->indir_tir[tt]); ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE; ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; @@ -637,7 +611,7 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) err_create_ttc_table: mlx5e_hairpin_destroy_indirect_tirs(hp); err_create_indirect_tirs: - mlx5e_destroy_rqt(priv, &hp->indir_rqt); + mlx5e_rqt_destroy(&hp->indir_rqt); return err; } @@ -648,7 +622,7 @@ static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) mlx5e_destroy_ttc_table(priv, &hp->ttc); mlx5e_hairpin_destroy_indirect_tirs(hp); - mlx5e_destroy_rqt(priv, &hp->indir_rqt); + mlx5e_rqt_destroy(&hp->indir_rqt); } static struct mlx5e_hairpin * @@ -874,7 +848,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, } netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", - hp->tirn, hp->pair->rqn[0], + mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0], dev_name(hp->pair->peer_mdev->device), hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); @@ -883,7 +857,7 @@ attach_flow: flow_flag_set(flow, HAIRPIN_RSS); flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; } else { - flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn; + flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir); } flow->hpe = hpe; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 7d7ed025db0d..6535c636ae22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -331,32 +331,19 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) } mlx5e_set_ttc_basic_params(priv, &ttc_params); - mlx5e_set_inner_ttc_ft_params(&ttc_params); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn; - - err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); - if (err) { - netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", - err); - goto err_destroy_arfs_tables; - } - mlx5e_set_ttc_ft_params(&ttc_params); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; + ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].indir_tir.tirn; err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); - goto err_destroy_inner_ttc_table; + goto err_destroy_arfs_tables; } return 0; -err_destroy_inner_ttc_table: - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -366,16 +353,20 @@ err_destroy_arfs_tables: static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) { mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); mlx5e_arfs_destroy_tables(priv); } static int mlx5i_init_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - u16 max_nch = priv->max_nch; int err; + priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL); + if (!priv->rx_res) + return -ENOMEM; + + mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels); + mlx5e_create_q_counters(priv); err = mlx5e_open_drop_rq(priv, &priv->drop_rq); @@ -388,15 +379,15 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); + err = mlx5e_create_direct_rqts(priv); if (err) goto err_destroy_indirect_rqts; - err = mlx5e_create_indirect_tirs(priv, true); + err = mlx5e_create_indirect_tirs(priv, false); if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); + err = mlx5e_create_direct_tirs(priv); if (err) goto err_destroy_indirect_tirs; @@ -407,31 +398,33 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) return 0; err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); + mlx5e_destroy_direct_tirs(priv); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); + mlx5e_destroy_direct_rqts(priv); err_destroy_indirect_rqts: - mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); err_destroy_q_counters: mlx5e_destroy_q_counters(priv); + kvfree(priv->rx_res); + priv->rx_res = NULL; return err; } static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { - u16 max_nch = priv->max_nch; - mlx5i_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); + mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); - mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_destroy_direct_rqts(priv); + mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); + kvfree(priv->rx_res); + priv->rx_res = NULL; } /* The stats groups order is opposite to the update_stats() order calls */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index c5ef9aa64efe..f5d0d392efbf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -335,14 +335,16 @@ mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge, static struct mlxsw_sp_bridge_port * mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device, - struct net_device *brport_dev) + struct net_device *brport_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp_bridge_port *bridge_port; struct mlxsw_sp_port *mlxsw_sp_port; + int err; bridge_port = kzalloc(sizeof(*bridge_port), GFP_KERNEL); if (!bridge_port) - return NULL; + return ERR_PTR(-ENOMEM); mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(brport_dev); bridge_port->lagged = mlxsw_sp_port->lagged; @@ -359,12 +361,23 @@ mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device, list_add(&bridge_port->list, &bridge_device->ports_list); bridge_port->ref_count = 1; + err = switchdev_bridge_port_offload(brport_dev, mlxsw_sp_port->dev, + NULL, NULL, NULL, false, extack); + if (err) + goto err_switchdev_offload; + return bridge_port; + +err_switchdev_offload: + list_del(&bridge_port->list); + kfree(bridge_port); + return ERR_PTR(err); } static void mlxsw_sp_bridge_port_destroy(struct mlxsw_sp_bridge_port *bridge_port) { + switchdev_bridge_port_unoffload(bridge_port->dev, NULL, NULL, NULL); list_del(&bridge_port->list); WARN_ON(!list_empty(&bridge_port->vlans_list)); kfree(bridge_port); @@ -390,9 +403,10 @@ mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge, if (IS_ERR(bridge_device)) return ERR_CAST(bridge_device); - bridge_port = mlxsw_sp_bridge_port_create(bridge_device, brport_dev); - if (!bridge_port) { - err = -ENOMEM; + bridge_port = mlxsw_sp_bridge_port_create(bridge_device, brport_dev, + extack); + if (IS_ERR(bridge_port)) { + err = PTR_ERR(bridge_port); goto err_bridge_port_create; } @@ -1569,7 +1583,6 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, { long *flood_bitmap; int num_of_ports; - int alloc_size; u16 mid_idx; int err; @@ -1579,18 +1592,17 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, return false; num_of_ports = mlxsw_core_max_ports(mlxsw_sp->core); - alloc_size = sizeof(long) * BITS_TO_LONGS(num_of_ports); - flood_bitmap = kzalloc(alloc_size, GFP_KERNEL); + flood_bitmap = bitmap_alloc(num_of_ports, GFP_KERNEL); if (!flood_bitmap) return false; - bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports); + bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports); mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp); mid->mid = mid_idx; err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap, bridge_device->mrouter); - kfree(flood_bitmap); + bitmap_free(flood_bitmap); if (err) return false; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c index a72e3b3b596e..649ca609884a 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c @@ -93,9 +93,12 @@ static int sparx5_port_attr_set(struct net_device *dev, const void *ctx, } static int sparx5_port_bridge_join(struct sparx5_port *port, - struct net_device *bridge) + struct net_device *bridge, + struct netlink_ext_ack *extack) { struct sparx5 *sparx5 = port->sparx5; + struct net_device *ndev = port->ndev; + int err; if (bitmap_empty(sparx5->bridge_mask, SPX5_PORTS)) /* First bridged port */ @@ -109,12 +112,21 @@ static int sparx5_port_bridge_join(struct sparx5_port *port, set_bit(port->portno, sparx5->bridge_mask); + err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL, + false, extack); + if (err) + goto err_switchdev_offload; + /* Port enters in bridge mode therefor don't need to copy to CPU * frames for multicast in case the bridge is not requesting them */ - __dev_mc_unsync(port->ndev, sparx5_mc_unsync); + __dev_mc_unsync(ndev, sparx5_mc_unsync); return 0; + +err_switchdev_offload: + clear_bit(port->portno, sparx5->bridge_mask); + return err; } static void sparx5_port_bridge_leave(struct sparx5_port *port, @@ -122,6 +134,8 @@ static void sparx5_port_bridge_leave(struct sparx5_port *port, { struct sparx5 *sparx5 = port->sparx5; + switchdev_bridge_port_unoffload(port->ndev, NULL, NULL, NULL); + clear_bit(port->portno, sparx5->bridge_mask); if (bitmap_empty(sparx5->bridge_mask, SPX5_PORTS)) sparx5->hw_bridge_dev = NULL; @@ -139,11 +153,15 @@ static int sparx5_port_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct sparx5_port *port = netdev_priv(dev); + struct netlink_ext_ack *extack; int err = 0; + extack = netdev_notifier_info_to_extack(&info->info); + if (netif_is_bridge_master(info->upper_dev)) { if (info->linking) - err = sparx5_port_bridge_join(port, info->upper_dev); + err = sparx5_port_bridge_join(port, info->upper_dev, + extack); else sparx5_port_bridge_leave(port, info->upper_dev); diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index e9d260d84bf3..c52f175df389 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -1154,38 +1154,19 @@ static int ocelot_switchdev_sync(struct ocelot *ocelot, int port, struct net_device *bridge_dev, struct netlink_ext_ack *extack) { - struct ocelot_port *ocelot_port = ocelot->ports[port]; - struct ocelot_port_private *priv; clock_t ageing_time; u8 stp_state; - int err; - - priv = container_of(ocelot_port, struct ocelot_port_private, port); ocelot_inherit_brport_flags(ocelot, port, brport_dev); stp_state = br_port_get_stp_state(brport_dev); ocelot_bridge_stp_state_set(ocelot, port, stp_state); - err = ocelot_port_vlan_filtering(ocelot, port, - br_vlan_enabled(bridge_dev)); - if (err) - return err; - ageing_time = br_get_ageing_time(bridge_dev); ocelot_port_attr_ageing_set(ocelot, port, ageing_time); - err = br_mdb_replay(bridge_dev, brport_dev, priv, true, - &ocelot_switchdev_blocking_nb, extack); - if (err && err != -EOPNOTSUPP) - return err; - - err = br_vlan_replay(bridge_dev, brport_dev, priv, true, - &ocelot_switchdev_blocking_nb, extack); - if (err && err != -EOPNOTSUPP) - return err; - - return 0; + return ocelot_port_vlan_filtering(ocelot, port, + br_vlan_enabled(bridge_dev)); } static int ocelot_switchdev_unsync(struct ocelot *ocelot, int port) @@ -1216,6 +1197,13 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev, ocelot_port_bridge_join(ocelot, port, bridge); + err = switchdev_bridge_port_offload(brport_dev, dev, priv, + &ocelot_netdevice_nb, + &ocelot_switchdev_blocking_nb, + false, extack); + if (err) + goto err_switchdev_offload; + err = ocelot_switchdev_sync(ocelot, port, brport_dev, bridge, extack); if (err) goto err_switchdev_sync; @@ -1223,10 +1211,24 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev, return 0; err_switchdev_sync: + switchdev_bridge_port_unoffload(brport_dev, priv, + &ocelot_netdevice_nb, + &ocelot_switchdev_blocking_nb); +err_switchdev_offload: ocelot_port_bridge_leave(ocelot, port, bridge); return err; } +static void ocelot_netdevice_pre_bridge_leave(struct net_device *dev, + struct net_device *brport_dev) +{ + struct ocelot_port_private *priv = netdev_priv(dev); + + switchdev_bridge_port_unoffload(brport_dev, priv, + &ocelot_netdevice_nb, + &ocelot_switchdev_blocking_nb); +} + static int ocelot_netdevice_bridge_leave(struct net_device *dev, struct net_device *brport_dev, struct net_device *bridge) @@ -1279,6 +1281,18 @@ err_bridge_join: return err; } +static void ocelot_netdevice_pre_lag_leave(struct net_device *dev, + struct net_device *bond) +{ + struct net_device *bridge_dev; + + bridge_dev = netdev_master_upper_dev_get(bond); + if (!bridge_dev || !netif_is_bridge_master(bridge_dev)) + return; + + ocelot_netdevice_pre_bridge_leave(dev, bond); +} + static int ocelot_netdevice_lag_leave(struct net_device *dev, struct net_device *bond) { @@ -1356,6 +1370,43 @@ ocelot_netdevice_lag_changeupper(struct net_device *dev, } static int +ocelot_netdevice_prechangeupper(struct net_device *dev, + struct net_device *brport_dev, + struct netdev_notifier_changeupper_info *info) +{ + if (netif_is_bridge_master(info->upper_dev) && !info->linking) + ocelot_netdevice_pre_bridge_leave(dev, brport_dev); + + if (netif_is_lag_master(info->upper_dev) && !info->linking) + ocelot_netdevice_pre_lag_leave(dev, info->upper_dev); + + return NOTIFY_DONE; +} + +static int +ocelot_netdevice_lag_prechangeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = NOTIFY_DONE; + + netdev_for_each_lower_dev(dev, lower, iter) { + struct ocelot_port_private *priv = netdev_priv(lower); + struct ocelot_port *ocelot_port = &priv->port; + + if (ocelot_port->bond != dev) + return NOTIFY_OK; + + err = ocelot_netdevice_prechangeupper(dev, lower, info); + if (err) + return err; + } + + return NOTIFY_DONE; +} + +static int ocelot_netdevice_changelowerstate(struct net_device *dev, struct netdev_lag_lower_state_info *info) { @@ -1382,6 +1433,17 @@ static int ocelot_netdevice_event(struct notifier_block *unused, struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { + case NETDEV_PRECHANGEUPPER: { + struct netdev_notifier_changeupper_info *info = ptr; + + if (ocelot_netdevice_dev_check(dev)) + return ocelot_netdevice_prechangeupper(dev, dev, info); + + if (netif_is_lag_master(dev)) + return ocelot_netdevice_lag_prechangeupper(dev, info); + + break; + } case NETDEV_CHANGEUPPER: { struct netdev_notifier_changeupper_info *info = ptr; diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig index b82758d5beed..8844d1ac053a 100644 --- a/drivers/net/ethernet/netronome/Kconfig +++ b/drivers/net/ethernet/netronome/Kconfig @@ -23,6 +23,7 @@ config NFP depends on TLS && TLS_DEVICE || TLS_DEVICE=n select NET_DEVLINK select CRC32 + select DIMLIB help This driver supports the Netronome(R) NFP4000/NFP6000 based cards working as a advanced Ethernet NIC. It works with both diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 1cbe2c9f3959..2a432de11858 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -262,10 +262,10 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, } static bool -nfp_flower_tun_is_gre(struct flow_cls_offload *flow, int start_idx) +nfp_flower_tun_is_gre(struct flow_rule *rule, int start_idx) { - struct flow_action_entry *act = flow->rule->action.entries; - int num_act = flow->rule->action.num_entries; + struct flow_action_entry *act = rule->action.entries; + int num_act = rule->action.num_entries; int act_idx; /* Preparse action list for next mirred or redirect action */ @@ -279,7 +279,7 @@ nfp_flower_tun_is_gre(struct flow_cls_offload *flow, int start_idx) static enum nfp_flower_tun_type nfp_fl_get_tun_from_act(struct nfp_app *app, - struct flow_cls_offload *flow, + struct flow_rule *rule, const struct flow_action_entry *act, int act_idx) { const struct ip_tunnel_info *tun = act->tunnel; @@ -288,7 +288,7 @@ nfp_fl_get_tun_from_act(struct nfp_app *app, /* Determine the tunnel type based on the egress netdev * in the mirred action for tunnels without l4. */ - if (nfp_flower_tun_is_gre(flow, act_idx)) + if (nfp_flower_tun_is_gre(rule, act_idx)) return NFP_FL_TUNNEL_GRE; switch (tun->key.tp_dst) { @@ -788,11 +788,10 @@ struct nfp_flower_pedit_acts { }; static int -nfp_fl_commit_mangle(struct flow_cls_offload *flow, char *nfp_action, +nfp_fl_commit_mangle(struct flow_rule *rule, char *nfp_action, int *a_len, struct nfp_flower_pedit_acts *set_act, u32 *csum_updated) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); size_t act_size = 0; u8 ip_proto = 0; @@ -890,7 +889,7 @@ nfp_fl_commit_mangle(struct flow_cls_offload *flow, char *nfp_action, static int nfp_fl_pedit(const struct flow_action_entry *act, - struct flow_cls_offload *flow, char *nfp_action, int *a_len, + char *nfp_action, int *a_len, u32 *csum_updated, struct nfp_flower_pedit_acts *set_act, struct netlink_ext_ack *extack) { @@ -977,7 +976,7 @@ nfp_flower_output_action(struct nfp_app *app, static int nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, - struct flow_cls_offload *flow, + struct flow_rule *rule, struct nfp_fl_payload *nfp_fl, int *a_len, struct net_device *netdev, enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, @@ -1045,7 +1044,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, case FLOW_ACTION_TUNNEL_ENCAP: { const struct ip_tunnel_info *ip_tun = act->tunnel; - *tun_type = nfp_fl_get_tun_from_act(app, flow, act, act_idx); + *tun_type = nfp_fl_get_tun_from_act(app, rule, act, act_idx); if (*tun_type == NFP_FL_TUNNEL_NONE) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel type in action list"); return -EOPNOTSUPP; @@ -1086,7 +1085,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, /* Tunnel decap is handled by default so accept action. */ return 0; case FLOW_ACTION_MANGLE: - if (nfp_fl_pedit(act, flow, &nfp_fl->action_data[*a_len], + if (nfp_fl_pedit(act, &nfp_fl->action_data[*a_len], a_len, csum_updated, set_act, extack)) return -EOPNOTSUPP; break; @@ -1195,7 +1194,7 @@ static bool nfp_fl_check_mangle_end(struct flow_action *flow_act, } int nfp_flower_compile_action(struct nfp_app *app, - struct flow_cls_offload *flow, + struct flow_rule *rule, struct net_device *netdev, struct nfp_fl_payload *nfp_flow, struct netlink_ext_ack *extack) @@ -1207,7 +1206,7 @@ int nfp_flower_compile_action(struct nfp_app *app, bool pkt_host = false; u32 csum_updated = 0; - if (!flow_action_hw_stats_check(&flow->rule->action, extack, + if (!flow_action_hw_stats_check(&rule->action, extack, FLOW_ACTION_HW_STATS_DELAYED_BIT)) return -EOPNOTSUPP; @@ -1219,18 +1218,18 @@ int nfp_flower_compile_action(struct nfp_app *app, tun_out_cnt = 0; out_cnt = 0; - flow_action_for_each(i, act, &flow->rule->action) { - if (nfp_fl_check_mangle_start(&flow->rule->action, i)) + flow_action_for_each(i, act, &rule->action) { + if (nfp_fl_check_mangle_start(&rule->action, i)) memset(&set_act, 0, sizeof(set_act)); - err = nfp_flower_loop_action(app, act, flow, nfp_flow, &act_len, + err = nfp_flower_loop_action(app, act, rule, nfp_flow, &act_len, netdev, &tun_type, &tun_out_cnt, &out_cnt, &csum_updated, &set_act, &pkt_host, extack, i); if (err) return err; act_cnt++; - if (nfp_fl_check_mangle_end(&flow->rule->action, i)) - nfp_fl_commit_mangle(flow, + if (nfp_fl_check_mangle_end(&rule->action, i)) + nfp_fl_commit_mangle(rule, &nfp_flow->action_data[act_len], &act_len, &set_act, &csum_updated); } diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 062bb2db68bf..1ac3b65df600 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -2,6 +2,7 @@ /* Copyright (C) 2021 Corigine, Inc. */ #include "conntrack.h" +#include "../nfp_port.h" const struct rhashtable_params nfp_tc_ct_merge_params = { .head_offset = offsetof(struct nfp_fl_ct_tc_merge, @@ -407,15 +408,487 @@ static int nfp_ct_check_meta(struct nfp_fl_ct_flow_entry *post_ct_entry, return -EINVAL; } +static int +nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map) +{ + int key_size; + + /* This field must always be present */ + key_size = sizeof(struct nfp_flower_meta_tci); + map[FLOW_PAY_META_TCI] = 0; + + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_EXT_META) { + map[FLOW_PAY_EXT_META] = key_size; + key_size += sizeof(struct nfp_flower_ext_meta); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_PORT) { + map[FLOW_PAY_INPORT] = key_size; + key_size += sizeof(struct nfp_flower_in_port); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_MAC) { + map[FLOW_PAY_MAC_MPLS] = key_size; + key_size += sizeof(struct nfp_flower_mac_mpls); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_TP) { + map[FLOW_PAY_L4] = key_size; + key_size += sizeof(struct nfp_flower_tp_ports); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV4) { + map[FLOW_PAY_IPV4] = key_size; + key_size += sizeof(struct nfp_flower_ipv4); + } + if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV6) { + map[FLOW_PAY_IPV6] = key_size; + key_size += sizeof(struct nfp_flower_ipv6); + } + + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GRE) { + map[FLOW_PAY_GRE] = key_size; + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) + key_size += sizeof(struct nfp_flower_ipv6_gre_tun); + else + key_size += sizeof(struct nfp_flower_ipv4_gre_tun); + } + + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) { + map[FLOW_PAY_QINQ] = key_size; + key_size += sizeof(struct nfp_flower_vlan); + } + + if ((in_key_ls.key_layer & NFP_FLOWER_LAYER_VXLAN) || + (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE)) { + map[FLOW_PAY_UDP_TUN] = key_size; + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) + key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + else + key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } + + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + map[FLOW_PAY_GENEVE_OPT] = key_size; + key_size += sizeof(struct nfp_flower_geneve_options); + } + + return key_size; +} + +static int nfp_fl_merge_actions_offload(struct flow_rule **rules, + struct nfp_flower_priv *priv, + struct net_device *netdev, + struct nfp_fl_payload *flow_pay) +{ + struct flow_action_entry *a_in; + int i, j, num_actions, id; + struct flow_rule *a_rule; + int err = 0, offset = 0; + + num_actions = rules[CT_TYPE_PRE_CT]->action.num_entries + + rules[CT_TYPE_NFT]->action.num_entries + + rules[CT_TYPE_POST_CT]->action.num_entries; + + a_rule = flow_rule_alloc(num_actions); + if (!a_rule) + return -ENOMEM; + + /* Actions need a BASIC dissector. */ + a_rule->match = rules[CT_TYPE_PRE_CT]->match; + + /* Copy actions */ + for (j = 0; j < _CT_TYPE_MAX; j++) { + if (flow_rule_match_key(rules[j], FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + /* ip_proto is the only field that needed in later compile_action, + * needed to set the correct checksum flags. It doesn't really matter + * which input rule's ip_proto field we take as the earlier merge checks + * would have made sure that they don't conflict. We do not know which + * of the subflows would have the ip_proto filled in, so we need to iterate + * through the subflows and assign the proper subflow to a_rule + */ + flow_rule_match_basic(rules[j], &match); + if (match.mask->ip_proto) + a_rule->match = rules[j]->match; + } + + for (i = 0; i < rules[j]->action.num_entries; i++) { + a_in = &rules[j]->action.entries[i]; + id = a_in->id; + + /* Ignore CT related actions as these would already have + * been taken care of by previous checks, and we do not send + * any CT actions to the firmware. + */ + switch (id) { + case FLOW_ACTION_CT: + case FLOW_ACTION_GOTO: + case FLOW_ACTION_CT_METADATA: + continue; + default: + memcpy(&a_rule->action.entries[offset++], + a_in, sizeof(struct flow_action_entry)); + break; + } + } + } + + /* Some actions would have been ignored, so update the num_entries field */ + a_rule->action.num_entries = offset; + err = nfp_flower_compile_action(priv->app, a_rule, netdev, flow_pay, NULL); + kfree(a_rule); + + return err; +} + static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) { - return 0; + enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; + struct nfp_fl_ct_zone_entry *zt = m_entry->zt; + struct nfp_fl_key_ls key_layer, tmp_layer; + struct nfp_flower_priv *priv = zt->priv; + u16 key_map[_FLOW_PAY_LAYERS_MAX]; + struct nfp_fl_payload *flow_pay; + + struct flow_rule *rules[_CT_TYPE_MAX]; + u8 *key, *msk, *kdata, *mdata; + struct nfp_port *port = NULL; + struct net_device *netdev; + bool qinq_sup; + u32 port_id; + u16 offset; + int i, err; + + netdev = m_entry->netdev; + qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ); + + rules[CT_TYPE_PRE_CT] = m_entry->tc_m_parent->pre_ct_parent->rule; + rules[CT_TYPE_NFT] = m_entry->nft_parent->rule; + rules[CT_TYPE_POST_CT] = m_entry->tc_m_parent->post_ct_parent->rule; + + memset(&key_layer, 0, sizeof(struct nfp_fl_key_ls)); + memset(&key_map, 0, sizeof(key_map)); + + /* Calculate the resultant key layer and size for offload */ + for (i = 0; i < _CT_TYPE_MAX; i++) { + err = nfp_flower_calculate_key_layers(priv->app, + m_entry->netdev, + &tmp_layer, rules[i], + &tun_type, NULL); + if (err) + return err; + + key_layer.key_layer |= tmp_layer.key_layer; + key_layer.key_layer_two |= tmp_layer.key_layer_two; + } + key_layer.key_size = nfp_fl_calc_key_layers_sz(key_layer, key_map); + + flow_pay = nfp_flower_allocate_new(&key_layer); + if (!flow_pay) + return -ENOMEM; + + memset(flow_pay->unmasked_data, 0, key_layer.key_size); + memset(flow_pay->mask_data, 0, key_layer.key_size); + + kdata = flow_pay->unmasked_data; + mdata = flow_pay->mask_data; + + offset = key_map[FLOW_PAY_META_TCI]; + key = kdata + offset; + msk = mdata + offset; + nfp_flower_compile_meta((struct nfp_flower_meta_tci *)key, + (struct nfp_flower_meta_tci *)msk, + key_layer.key_layer); + + if (NFP_FLOWER_LAYER_EXT_META & key_layer.key_layer) { + offset = key_map[FLOW_PAY_EXT_META]; + key = kdata + offset; + msk = mdata + offset; + nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)key, + key_layer.key_layer_two); + nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)msk, + key_layer.key_layer_two); + } + + /* Using in_port from the -trk rule. The tc merge checks should already + * be checking that the ingress netdevs are the same + */ + port_id = nfp_flower_get_port_id_from_netdev(priv->app, netdev); + offset = key_map[FLOW_PAY_INPORT]; + key = kdata + offset; + msk = mdata + offset; + err = nfp_flower_compile_port((struct nfp_flower_in_port *)key, + port_id, false, tun_type, NULL); + if (err) + goto ct_offload_err; + err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk, + port_id, true, tun_type, NULL); + if (err) + goto ct_offload_err; + + /* This following part works on the assumption that previous checks has + * already filtered out flows that has different values for the different + * layers. Here we iterate through all three rules and merge their respective + * masked value(cared bits), basic method is: + * final_key = (r1_key & r1_mask) | (r2_key & r2_mask) | (r3_key & r3_mask) + * final_mask = r1_mask | r2_mask | r3_mask + * If none of the rules contains a match that is also fine, that simply means + * that the layer is not present. + */ + if (!qinq_sup) { + for (i = 0; i < _CT_TYPE_MAX; i++) { + offset = key_map[FLOW_PAY_META_TCI]; + key = kdata + offset; + msk = mdata + offset; + nfp_flower_compile_tci((struct nfp_flower_meta_tci *)key, + (struct nfp_flower_meta_tci *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER_MAC & key_layer.key_layer) { + offset = key_map[FLOW_PAY_MAC_MPLS]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)key, + (struct nfp_flower_mac_mpls *)msk, + rules[i]); + err = nfp_flower_compile_mpls((struct nfp_flower_mac_mpls *)key, + (struct nfp_flower_mac_mpls *)msk, + rules[i], NULL); + if (err) + goto ct_offload_err; + } + } + + if (NFP_FLOWER_LAYER_IPV4 & key_layer.key_layer) { + offset = key_map[FLOW_PAY_IPV4]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)key, + (struct nfp_flower_ipv4 *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER_IPV6 & key_layer.key_layer) { + offset = key_map[FLOW_PAY_IPV6]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)key, + (struct nfp_flower_ipv6 *)msk, + rules[i]); + } + } + + if (NFP_FLOWER_LAYER_TP & key_layer.key_layer) { + offset = key_map[FLOW_PAY_L4]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_tport((struct nfp_flower_tp_ports *)key, + (struct nfp_flower_tp_ports *)msk, + rules[i]); + } + } + + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GRE) { + offset = key_map[FLOW_PAY_GRE]; + key = kdata + offset; + msk = mdata + offset; + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_gre_tun *gre_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv6_gre_tun((void *)key, + (void *)msk, rules[i]); + } + gre_match = (struct nfp_flower_ipv6_gre_tun *)key; + dst = &gre_match->ipv6.dst; + + entry = nfp_tunnel_add_ipv6_off(priv->app, dst); + if (!entry) + goto ct_offload_err; + + flow_pay->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv4_gre_tun((void *)key, + (void *)msk, rules[i]); + } + dst = ((struct nfp_flower_ipv4_gre_tun *)key)->ipv4.dst; + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + flow_pay->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(priv->app, dst); + } + } + + if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) { + offset = key_map[FLOW_PAY_QINQ]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_vlan((struct nfp_flower_vlan *)key, + (struct nfp_flower_vlan *)msk, + rules[i]); + } + } + + if (key_layer.key_layer & NFP_FLOWER_LAYER_VXLAN || + key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { + offset = key_map[FLOW_PAY_UDP_TUN]; + key = kdata + offset; + msk = mdata + offset; + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_udp_tun *udp_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv6_udp_tun((void *)key, + (void *)msk, rules[i]); + } + udp_match = (struct nfp_flower_ipv6_udp_tun *)key; + dst = &udp_match->ipv6.dst; + + entry = nfp_tunnel_add_ipv6_off(priv->app, dst); + if (!entry) + goto ct_offload_err; + + flow_pay->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_ipv4_udp_tun((void *)key, + (void *)msk, rules[i]); + } + dst = ((struct nfp_flower_ipv4_udp_tun *)key)->ipv4.dst; + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + flow_pay->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(priv->app, dst); + } + + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { + offset = key_map[FLOW_PAY_GENEVE_OPT]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) + nfp_flower_compile_geneve_opt(key, msk, rules[i]); + } + } + + /* Merge actions into flow_pay */ + err = nfp_fl_merge_actions_offload(rules, priv, netdev, flow_pay); + if (err) + goto ct_offload_err; + + /* Use the pointer address as the cookie, but set the last bit to 1. + * This is to avoid the 'is_merge_flow' check from detecting this as + * an already merged flow. This works since address alignment means + * that the last bit for pointer addresses will be 0. + */ + flow_pay->tc_flower_cookie = ((unsigned long)flow_pay) | 0x1; + err = nfp_compile_flow_metadata(priv->app, flow_pay->tc_flower_cookie, + flow_pay, netdev, NULL); + if (err) + goto ct_offload_err; + + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_port_from_netdev(netdev); + + err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node, + nfp_flower_table_params); + if (err) + goto ct_release_offload_meta_err; + + err = nfp_flower_xmit_flow(priv->app, flow_pay, + NFP_FLOWER_CMSG_TYPE_FLOW_ADD); + if (err) + goto ct_remove_rhash_err; + + m_entry->tc_flower_cookie = flow_pay->tc_flower_cookie; + m_entry->flow_pay = flow_pay; + + if (port) + port->tc_offload_cnt++; + + return err; + +ct_remove_rhash_err: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &flow_pay->fl_node, + nfp_flower_table_params)); +ct_release_offload_meta_err: + nfp_modify_flow_metadata(priv->app, flow_pay); +ct_offload_err: + if (flow_pay->nfp_tun_ipv4_addr) + nfp_tunnel_del_ipv4_off(priv->app, flow_pay->nfp_tun_ipv4_addr); + if (flow_pay->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(priv->app, flow_pay->nfp_tun_ipv6); + kfree(flow_pay->action_data); + kfree(flow_pay->mask_data); + kfree(flow_pay->unmasked_data); + kfree(flow_pay); + return err; } static int nfp_fl_ct_del_offload(struct nfp_app *app, unsigned long cookie, struct net_device *netdev) { - return 0; + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload *flow_pay; + struct nfp_port *port = NULL; + int err = 0; + + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_port_from_netdev(netdev); + + flow_pay = nfp_flower_search_fl_table(app, cookie, netdev); + if (!flow_pay) + return -ENOENT; + + err = nfp_modify_flow_metadata(app, flow_pay); + if (err) + goto err_free_merge_flow; + + if (flow_pay->nfp_tun_ipv4_addr) + nfp_tunnel_del_ipv4_off(app, flow_pay->nfp_tun_ipv4_addr); + + if (flow_pay->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6); + + if (!flow_pay->in_hw) { + err = 0; + goto err_free_merge_flow; + } + + err = nfp_flower_xmit_flow(app, flow_pay, + NFP_FLOWER_CMSG_TYPE_FLOW_DEL); + +err_free_merge_flow: + nfp_flower_del_linked_merge_flows(app, flow_pay); + if (port) + port->tc_offload_cnt--; + kfree(flow_pay->action_data); + kfree(flow_pay->mask_data); + kfree(flow_pay->unmasked_data); + WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, + &flow_pay->fl_node, + nfp_flower_table_params)); + kfree_rcu(flow_pay, rcu); + return err; } static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt, @@ -1048,6 +1521,139 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, return 0; } +static void +nfp_fl_ct_sub_stats(struct nfp_fl_nft_tc_merge *nft_merge, + enum ct_entry_type type, u64 *m_pkts, + u64 *m_bytes, u64 *m_used) +{ + struct nfp_flower_priv *priv = nft_merge->zt->priv; + struct nfp_fl_payload *nfp_flow; + u32 ctx_id; + + nfp_flow = nft_merge->flow_pay; + if (!nfp_flow) + return; + + ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id); + *m_pkts += priv->stats[ctx_id].pkts; + *m_bytes += priv->stats[ctx_id].bytes; + *m_used = max_t(u64, *m_used, priv->stats[ctx_id].used); + + /* If request is for a sub_flow which is part of a tunnel merged + * flow then update stats from tunnel merged flows first. + */ + if (!list_empty(&nfp_flow->linked_flows)) + nfp_flower_update_merge_stats(priv->app, nfp_flow); + + if (type != CT_TYPE_NFT) { + /* Update nft cached stats */ + flow_stats_update(&nft_merge->nft_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + } else { + /* Update pre_ct cached stats */ + flow_stats_update(&nft_merge->tc_m_parent->pre_ct_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + /* Update post_ct cached stats */ + flow_stats_update(&nft_merge->tc_m_parent->post_ct_parent->stats, + priv->stats[ctx_id].bytes, + priv->stats[ctx_id].pkts, + 0, priv->stats[ctx_id].used, + FLOW_ACTION_HW_STATS_DELAYED); + } + /* Reset stats from the nfp */ + priv->stats[ctx_id].pkts = 0; + priv->stats[ctx_id].bytes = 0; +} + +int nfp_fl_ct_stats(struct flow_cls_offload *flow, + struct nfp_fl_ct_map_entry *ct_map_ent) +{ + struct nfp_fl_ct_flow_entry *ct_entry = ct_map_ent->ct_entry; + struct nfp_fl_nft_tc_merge *nft_merge, *nft_m_tmp; + struct nfp_fl_ct_tc_merge *tc_merge, *tc_m_tmp; + + u64 pkts = 0, bytes = 0, used = 0; + u64 m_pkts, m_bytes, m_used; + + spin_lock_bh(&ct_entry->zt->priv->stats_lock); + + if (ct_entry->type == CT_TYPE_PRE_CT) { + /* Iterate tc_merge entries associated with this flow */ + list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children, + pre_ct_list) { + m_pkts = 0; + m_bytes = 0; + m_used = 0; + /* Iterate nft_merge entries associated with this tc_merge flow */ + list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children, + tc_merge_list) { + nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_PRE_CT, + &m_pkts, &m_bytes, &m_used); + } + pkts += m_pkts; + bytes += m_bytes; + used = max_t(u64, used, m_used); + /* Update post_ct partner */ + flow_stats_update(&tc_merge->post_ct_parent->stats, + m_bytes, m_pkts, 0, m_used, + FLOW_ACTION_HW_STATS_DELAYED); + } + } else if (ct_entry->type == CT_TYPE_POST_CT) { + /* Iterate tc_merge entries associated with this flow */ + list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children, + post_ct_list) { + m_pkts = 0; + m_bytes = 0; + m_used = 0; + /* Iterate nft_merge entries associated with this tc_merge flow */ + list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children, + tc_merge_list) { + nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_POST_CT, + &m_pkts, &m_bytes, &m_used); + } + pkts += m_pkts; + bytes += m_bytes; + used = max_t(u64, used, m_used); + /* Update pre_ct partner */ + flow_stats_update(&tc_merge->pre_ct_parent->stats, + m_bytes, m_pkts, 0, m_used, + FLOW_ACTION_HW_STATS_DELAYED); + } + } else { + /* Iterate nft_merge entries associated with this nft flow */ + list_for_each_entry_safe(nft_merge, nft_m_tmp, &ct_entry->children, + nft_flow_list) { + nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_NFT, + &pkts, &bytes, &used); + } + } + + /* Add stats from this request to stats potentially cached by + * previous requests. + */ + flow_stats_update(&ct_entry->stats, bytes, pkts, 0, used, + FLOW_ACTION_HW_STATS_DELAYED); + /* Finally update the flow stats from the original stats request */ + flow_stats_update(&flow->stats, ct_entry->stats.bytes, + ct_entry->stats.pkts, 0, + ct_entry->stats.lastused, + FLOW_ACTION_HW_STATS_DELAYED); + /* Stats has been synced to original flow, can now clear + * the cache. + */ + ct_entry->stats.pkts = 0; + ct_entry->stats.bytes = 0; + spin_unlock_bh(&ct_entry->zt->priv->stats_lock); + + return 0; +} + static int nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offload *flow) { @@ -1080,7 +1686,11 @@ nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offl nfp_ct_map_params); return nfp_fl_ct_del_flow(ct_map_ent); case FLOW_CLS_STATS: - return 0; + ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + if (ct_map_ent) + return nfp_fl_ct_stats(flow, ct_map_ent); + break; default: break; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h index 170b6cdb8cd0..beb6cceff9d8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h @@ -83,6 +83,24 @@ enum ct_entry_type { CT_TYPE_PRE_CT, CT_TYPE_NFT, CT_TYPE_POST_CT, + _CT_TYPE_MAX, +}; + +enum nfp_nfp_layer_name { + FLOW_PAY_META_TCI = 0, + FLOW_PAY_INPORT, + FLOW_PAY_EXT_META, + FLOW_PAY_MAC_MPLS, + FLOW_PAY_L4, + FLOW_PAY_IPV4, + FLOW_PAY_IPV6, + FLOW_PAY_CT, + FLOW_PAY_GRE, + FLOW_PAY_QINQ, + FLOW_PAY_UDP_TUN, + FLOW_PAY_GENEVE_OPT, + + _FLOW_PAY_LAYERS_MAX }; /** @@ -228,4 +246,12 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent); */ int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data, void *cb_priv); + +/** + * nfp_fl_ct_stats() - Handle flower stats callbacks for ct flows + * @flow: TC flower classifier offload structure. + * @ct_map_ent: ct map entry for the flow that needs deleting + */ +int nfp_fl_ct_stats(struct flow_cls_offload *flow, + struct nfp_fl_ct_map_entry *ct_map_ent); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 0fbd682ccf72..917c450a7aad 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -413,20 +413,73 @@ int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, int nfp_flower_merge_offloaded_flows(struct nfp_app *app, struct nfp_fl_payload *sub_flow1, struct nfp_fl_payload *sub_flow2); +void +nfp_flower_compile_meta(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, u8 key_type); +void +nfp_flower_compile_tci(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext); +int +nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, + bool mask_version, enum nfp_flower_tun_type tun_type, + struct netlink_ext_ack *extack); +void +nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule); +int +nfp_flower_compile_mpls(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule, + struct netlink_ext_ack *extack); +void +nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, + struct nfp_flower_tp_ports *msk, + struct flow_rule *rule); +void +nfp_flower_compile_vlan(struct nfp_flower_vlan *ext, + struct nfp_flower_vlan *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, + struct nfp_flower_ipv4 *msk, struct flow_rule *rule); +void +nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, + struct nfp_flower_ipv6 *msk, struct flow_rule *rule); +void +nfp_flower_compile_geneve_opt(u8 *ext, u8 *msk, struct flow_rule *rule); +void +nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, + struct nfp_flower_ipv4_gre_tun *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, + struct nfp_flower_ipv4_udp_tun *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext, + struct nfp_flower_ipv6_udp_tun *msk, + struct flow_rule *rule); +void +nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext, + struct nfp_flower_ipv6_gre_tun *msk, + struct flow_rule *rule); int nfp_flower_compile_flow_match(struct nfp_app *app, - struct flow_cls_offload *flow, + struct flow_rule *rule, struct nfp_fl_key_ls *key_ls, struct net_device *netdev, struct nfp_fl_payload *nfp_flow, enum nfp_flower_tun_type tun_type, struct netlink_ext_ack *extack); int nfp_flower_compile_action(struct nfp_app *app, - struct flow_cls_offload *flow, + struct flow_rule *rule, struct net_device *netdev, struct nfp_fl_payload *nfp_flow, struct netlink_ext_ack *extack); -int nfp_compile_flow_metadata(struct nfp_app *app, - struct flow_cls_offload *flow, +int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie, struct nfp_fl_payload *nfp_flow, struct net_device *netdev, struct netlink_ext_ack *extack); @@ -498,4 +551,22 @@ int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app, struct nfp_fl_payload *flow); int nfp_flower_xmit_pre_tun_del_flow(struct nfp_app *app, struct nfp_fl_payload *flow); + +struct nfp_fl_payload * +nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer); +int nfp_flower_calculate_key_layers(struct nfp_app *app, + struct net_device *netdev, + struct nfp_fl_key_ls *ret_key_ls, + struct flow_rule *flow, + enum nfp_flower_tun_type *tun_type, + struct netlink_ext_ack *extack); +void +nfp_flower_del_linked_merge_flows(struct nfp_app *app, + struct nfp_fl_payload *sub_flow); +int +nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, + u8 mtype); +void +nfp_flower_update_merge_stats(struct nfp_app *app, + struct nfp_fl_payload *sub_flow); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 255a4dff6288..9d86eea4dc16 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -7,51 +7,68 @@ #include "cmsg.h" #include "main.h" -static void -nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext, - struct nfp_flower_meta_tci *msk, - struct flow_rule *rule, u8 key_type, bool qinq_sup) +void +nfp_flower_compile_meta(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, u8 key_type) { - u16 tmp_tci; - - memset(ext, 0, sizeof(struct nfp_flower_meta_tci)); - memset(msk, 0, sizeof(struct nfp_flower_meta_tci)); - /* Populate the metadata frame. */ ext->nfp_flow_key_layer = key_type; ext->mask_id = ~0; msk->nfp_flow_key_layer = key_type; msk->mask_id = ~0; +} - if (!qinq_sup && flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { +void +nfp_flower_compile_tci(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, + struct flow_rule *rule) +{ + u16 msk_tci, key_tci; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { struct flow_match_vlan match; flow_rule_match_vlan(rule, &match); /* Populate the tci field. */ - tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT; - tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + key_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + key_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, match.key->vlan_priority) | FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, match.key->vlan_id); - ext->tci = cpu_to_be16(tmp_tci); - tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT; - tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + msk_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + msk_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, match.mask->vlan_priority) | FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, match.mask->vlan_id); - msk->tci = cpu_to_be16(tmp_tci); + + ext->tci |= cpu_to_be16((key_tci & msk_tci)); + msk->tci |= cpu_to_be16(msk_tci); } } static void +nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext, + struct nfp_flower_meta_tci *msk, + struct flow_rule *rule, u8 key_type, bool qinq_sup) +{ + memset(ext, 0, sizeof(struct nfp_flower_meta_tci)); + memset(msk, 0, sizeof(struct nfp_flower_meta_tci)); + + nfp_flower_compile_meta(ext, msk, key_type); + + if (!qinq_sup) + nfp_flower_compile_tci(ext, msk, rule); +} + +void nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext) { frame->nfp_flow_key_layer2 = cpu_to_be32(key_ext); } -static int +int nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, bool mask_version, enum nfp_flower_tun_type tun_type, struct netlink_ext_ack *extack) @@ -74,28 +91,37 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, return 0; } -static int +void nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, - struct nfp_flower_mac_mpls *msk, struct flow_rule *rule, - struct netlink_ext_ack *extack) + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_mac_mpls)); - memset(msk, 0, sizeof(struct nfp_flower_mac_mpls)); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct flow_match_eth_addrs match; + int i; flow_rule_match_eth_addrs(rule, &match); /* Populate mac frame. */ - ether_addr_copy(ext->mac_dst, &match.key->dst[0]); - ether_addr_copy(ext->mac_src, &match.key->src[0]); - ether_addr_copy(msk->mac_dst, &match.mask->dst[0]); - ether_addr_copy(msk->mac_src, &match.mask->src[0]); + for (i = 0; i < ETH_ALEN; i++) { + ext->mac_dst[i] |= match.key->dst[i] & + match.mask->dst[i]; + msk->mac_dst[i] |= match.mask->dst[i]; + ext->mac_src[i] |= match.key->src[i] & + match.mask->src[i]; + msk->mac_src[i] |= match.mask->src[i]; + } } +} +int +nfp_flower_compile_mpls(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule, + struct netlink_ext_ack *extack) +{ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) { struct flow_match_mpls match; - u32 t_mpls; + u32 key_mpls, msk_mpls; flow_rule_match_mpls(rule, &match); @@ -106,22 +132,24 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, return -EOPNOTSUPP; } - t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, - match.key->ls[0].mpls_label) | - FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, - match.key->ls[0].mpls_tc) | - FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, - match.key->ls[0].mpls_bos) | - NFP_FLOWER_MASK_MPLS_Q; - ext->mpls_lse = cpu_to_be32(t_mpls); - t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, - match.mask->ls[0].mpls_label) | - FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, - match.mask->ls[0].mpls_tc) | - FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, - match.mask->ls[0].mpls_bos) | - NFP_FLOWER_MASK_MPLS_Q; - msk->mpls_lse = cpu_to_be32(t_mpls); + key_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, + match.key->ls[0].mpls_label) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, + match.key->ls[0].mpls_tc) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, + match.key->ls[0].mpls_bos) | + NFP_FLOWER_MASK_MPLS_Q; + + msk_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, + match.mask->ls[0].mpls_label) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, + match.mask->ls[0].mpls_tc) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, + match.mask->ls[0].mpls_bos) | + NFP_FLOWER_MASK_MPLS_Q; + + ext->mpls_lse |= cpu_to_be32((key_mpls & msk_mpls)); + msk->mpls_lse |= cpu_to_be32(msk_mpls); } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { /* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q * bit, which indicates an mpls ether type but without any @@ -132,30 +160,41 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, flow_rule_match_basic(rule, &match); if (match.key->n_proto == cpu_to_be16(ETH_P_MPLS_UC) || match.key->n_proto == cpu_to_be16(ETH_P_MPLS_MC)) { - ext->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); - msk->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); + ext->mpls_lse |= cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); + msk->mpls_lse |= cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); } } return 0; } -static void +static int +nfp_flower_compile_mac_mpls(struct nfp_flower_mac_mpls *ext, + struct nfp_flower_mac_mpls *msk, + struct flow_rule *rule, + struct netlink_ext_ack *extack) +{ + memset(ext, 0, sizeof(struct nfp_flower_mac_mpls)); + memset(msk, 0, sizeof(struct nfp_flower_mac_mpls)); + + nfp_flower_compile_mac(ext, msk, rule); + + return nfp_flower_compile_mpls(ext, msk, rule, extack); +} + +void nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, struct nfp_flower_tp_ports *msk, struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_tp_ports)); - memset(msk, 0, sizeof(struct nfp_flower_tp_ports)); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { struct flow_match_ports match; flow_rule_match_ports(rule, &match); - ext->port_src = match.key->src; - ext->port_dst = match.key->dst; - msk->port_src = match.mask->src; - msk->port_dst = match.mask->dst; + ext->port_src |= match.key->src & match.mask->src; + ext->port_dst |= match.key->dst & match.mask->dst; + msk->port_src |= match.mask->src; + msk->port_dst |= match.mask->dst; } } @@ -167,18 +206,18 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, struct flow_match_basic match; flow_rule_match_basic(rule, &match); - ext->proto = match.key->ip_proto; - msk->proto = match.mask->ip_proto; + ext->proto |= match.key->ip_proto & match.mask->ip_proto; + msk->proto |= match.mask->ip_proto; } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { struct flow_match_ip match; flow_rule_match_ip(rule, &match); - ext->tos = match.key->tos; - ext->ttl = match.key->ttl; - msk->tos = match.mask->tos; - msk->ttl = match.mask->ttl; + ext->tos |= match.key->tos & match.mask->tos; + ext->ttl |= match.key->ttl & match.mask->ttl; + msk->tos |= match.mask->tos; + msk->ttl |= match.mask->ttl; } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { @@ -231,99 +270,108 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, } static void -nfp_flower_fill_vlan(struct flow_dissector_key_vlan *key, - struct nfp_flower_vlan *frame, - bool outer_vlan) +nfp_flower_fill_vlan(struct flow_match_vlan *match, + struct nfp_flower_vlan *ext, + struct nfp_flower_vlan *msk, bool outer_vlan) { - u16 tci; - - tci = NFP_FLOWER_MASK_VLAN_PRESENT; - tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, - key->vlan_priority) | - FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, - key->vlan_id); + struct flow_dissector_key_vlan *mask = match->mask; + struct flow_dissector_key_vlan *key = match->key; + u16 msk_tci, key_tci; + + key_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + key_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + key->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + key->vlan_id); + msk_tci = NFP_FLOWER_MASK_VLAN_PRESENT; + msk_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + mask->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + mask->vlan_id); if (outer_vlan) { - frame->outer_tci = cpu_to_be16(tci); - frame->outer_tpid = key->vlan_tpid; + ext->outer_tci |= cpu_to_be16((key_tci & msk_tci)); + ext->outer_tpid |= key->vlan_tpid & mask->vlan_tpid; + msk->outer_tci |= cpu_to_be16(msk_tci); + msk->outer_tpid |= mask->vlan_tpid; } else { - frame->inner_tci = cpu_to_be16(tci); - frame->inner_tpid = key->vlan_tpid; + ext->inner_tci |= cpu_to_be16((key_tci & msk_tci)); + ext->inner_tpid |= key->vlan_tpid & mask->vlan_tpid; + msk->inner_tci |= cpu_to_be16(msk_tci); + msk->inner_tpid |= mask->vlan_tpid; } } -static void +void nfp_flower_compile_vlan(struct nfp_flower_vlan *ext, struct nfp_flower_vlan *msk, struct flow_rule *rule) { struct flow_match_vlan match; - memset(ext, 0, sizeof(struct nfp_flower_vlan)); - memset(msk, 0, sizeof(struct nfp_flower_vlan)); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { flow_rule_match_vlan(rule, &match); - nfp_flower_fill_vlan(match.key, ext, true); - nfp_flower_fill_vlan(match.mask, msk, true); + nfp_flower_fill_vlan(&match, ext, msk, true); } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { flow_rule_match_cvlan(rule, &match); - nfp_flower_fill_vlan(match.key, ext, false); - nfp_flower_fill_vlan(match.mask, msk, false); + nfp_flower_fill_vlan(&match, ext, msk, false); } } -static void +void nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, struct nfp_flower_ipv4 *msk, struct flow_rule *rule) { - struct flow_match_ipv4_addrs match; - - memset(ext, 0, sizeof(struct nfp_flower_ipv4)); - memset(msk, 0, sizeof(struct nfp_flower_ipv4)); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; + flow_rule_match_ipv4_addrs(rule, &match); - ext->ipv4_src = match.key->src; - ext->ipv4_dst = match.key->dst; - msk->ipv4_src = match.mask->src; - msk->ipv4_dst = match.mask->dst; + ext->ipv4_src |= match.key->src & match.mask->src; + ext->ipv4_dst |= match.key->dst & match.mask->dst; + msk->ipv4_src |= match.mask->src; + msk->ipv4_dst |= match.mask->dst; } nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); } -static void +void nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, struct nfp_flower_ipv6 *msk, struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_ipv6)); - memset(msk, 0, sizeof(struct nfp_flower_ipv6)); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { struct flow_match_ipv6_addrs match; + int i; flow_rule_match_ipv6_addrs(rule, &match); - ext->ipv6_src = match.key->src; - ext->ipv6_dst = match.key->dst; - msk->ipv6_src = match.mask->src; - msk->ipv6_dst = match.mask->dst; + for (i = 0; i < sizeof(ext->ipv6_src); i++) { + ext->ipv6_src.s6_addr[i] |= match.key->src.s6_addr[i] & + match.mask->src.s6_addr[i]; + ext->ipv6_dst.s6_addr[i] |= match.key->dst.s6_addr[i] & + match.mask->dst.s6_addr[i]; + msk->ipv6_src.s6_addr[i] |= match.mask->src.s6_addr[i]; + msk->ipv6_dst.s6_addr[i] |= match.mask->dst.s6_addr[i]; + } } nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); } -static int -nfp_flower_compile_geneve_opt(void *ext, void *msk, struct flow_rule *rule) +void +nfp_flower_compile_geneve_opt(u8 *ext, u8 *msk, struct flow_rule *rule) { struct flow_match_enc_opts match; + int i; - flow_rule_match_enc_opts(rule, &match); - memcpy(ext, match.key->data, match.key->len); - memcpy(msk, match.mask->data, match.mask->len); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + flow_rule_match_enc_opts(rule, &match); - return 0; + for (i = 0; i < match.mask->len; i++) { + ext[i] |= match.key->data[i] & match.mask->data[i]; + msk[i] |= match.mask->data[i]; + } + } } static void @@ -335,10 +383,10 @@ nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext, struct flow_match_ipv4_addrs match; flow_rule_match_enc_ipv4_addrs(rule, &match); - ext->src = match.key->src; - ext->dst = match.key->dst; - msk->src = match.mask->src; - msk->dst = match.mask->dst; + ext->src |= match.key->src & match.mask->src; + ext->dst |= match.key->dst & match.mask->dst; + msk->src |= match.mask->src; + msk->dst |= match.mask->dst; } } @@ -349,12 +397,17 @@ nfp_flower_compile_tun_ipv6_addrs(struct nfp_flower_tun_ipv6 *ext, { if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { struct flow_match_ipv6_addrs match; + int i; flow_rule_match_enc_ipv6_addrs(rule, &match); - ext->src = match.key->src; - ext->dst = match.key->dst; - msk->src = match.mask->src; - msk->dst = match.mask->dst; + for (i = 0; i < sizeof(ext->src); i++) { + ext->src.s6_addr[i] |= match.key->src.s6_addr[i] & + match.mask->src.s6_addr[i]; + ext->dst.s6_addr[i] |= match.key->dst.s6_addr[i] & + match.mask->dst.s6_addr[i]; + msk->src.s6_addr[i] |= match.mask->src.s6_addr[i]; + msk->dst.s6_addr[i] |= match.mask->dst.s6_addr[i]; + } } } @@ -367,10 +420,10 @@ nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext, struct flow_match_ip match; flow_rule_match_enc_ip(rule, &match); - ext->tos = match.key->tos; - ext->ttl = match.key->ttl; - msk->tos = match.mask->tos; - msk->ttl = match.mask->ttl; + ext->tos |= match.key->tos & match.mask->tos; + ext->ttl |= match.key->ttl & match.mask->ttl; + msk->tos |= match.mask->tos; + msk->ttl |= match.mask->ttl; } } @@ -383,10 +436,11 @@ nfp_flower_compile_tun_udp_key(__be32 *key, __be32 *key_msk, u32 vni; flow_rule_match_enc_keyid(rule, &match); - vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET; - *key = cpu_to_be32(vni); + vni = be32_to_cpu((match.key->keyid & match.mask->keyid)) << + NFP_FL_TUN_VNI_OFFSET; + *key |= cpu_to_be32(vni); vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET; - *key_msk = cpu_to_be32(vni); + *key_msk |= cpu_to_be32(vni); } } @@ -398,22 +452,19 @@ nfp_flower_compile_tun_gre_key(__be32 *key, __be32 *key_msk, __be16 *flags, struct flow_match_enc_keyid match; flow_rule_match_enc_keyid(rule, &match); - *key = match.key->keyid; - *key_msk = match.mask->keyid; + *key |= match.key->keyid & match.mask->keyid; + *key_msk |= match.mask->keyid; *flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); *flags_msk = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); } } -static void +void nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, struct nfp_flower_ipv4_gre_tun *msk, struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); - memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); - /* NVGRE is the only supported GRE tunnel type */ ext->ethertype = cpu_to_be16(ETH_P_TEB); msk->ethertype = cpu_to_be16(~0); @@ -424,40 +475,31 @@ nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, &ext->tun_flags, &msk->tun_flags, rule); } -static void +void nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, struct nfp_flower_ipv4_udp_tun *msk, struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); - memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); - nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule); nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); } -static void +void nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext, struct nfp_flower_ipv6_udp_tun *msk, struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_ipv6_udp_tun)); - memset(msk, 0, sizeof(struct nfp_flower_ipv6_udp_tun)); - nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule); nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); } -static void +void nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext, struct nfp_flower_ipv6_gre_tun *msk, struct flow_rule *rule) { - memset(ext, 0, sizeof(struct nfp_flower_ipv6_gre_tun)); - memset(msk, 0, sizeof(struct nfp_flower_ipv6_gre_tun)); - /* NVGRE is the only supported GRE tunnel type */ ext->ethertype = cpu_to_be16(ETH_P_TEB); msk->ethertype = cpu_to_be16(~0); @@ -469,14 +511,13 @@ nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext, } int nfp_flower_compile_flow_match(struct nfp_app *app, - struct flow_cls_offload *flow, + struct flow_rule *rule, struct nfp_fl_key_ls *key_ls, struct net_device *netdev, struct nfp_fl_payload *nfp_flow, enum nfp_flower_tun_type tun_type, struct netlink_ext_ack *extack) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct nfp_flower_priv *priv = app->priv; bool qinq_sup; u32 port_id; @@ -527,9 +568,9 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, msk += sizeof(struct nfp_flower_in_port); if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) { - err = nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext, - (struct nfp_flower_mac_mpls *)msk, - rule, extack); + err = nfp_flower_compile_mac_mpls((struct nfp_flower_mac_mpls *)ext, + (struct nfp_flower_mac_mpls *)msk, + rule, extack); if (err) return err; @@ -640,9 +681,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, } if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { - err = nfp_flower_compile_geneve_opt(ext, msk, rule); - if (err) - return err; + nfp_flower_compile_geneve_opt(ext, msk, rule); } } diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 621113650a9b..2af9faee96c5 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -290,8 +290,7 @@ nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len, return true; } -int nfp_compile_flow_metadata(struct nfp_app *app, - struct flow_cls_offload *flow, +int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie, struct nfp_fl_payload *nfp_flow, struct net_device *netdev, struct netlink_ext_ack *extack) @@ -310,7 +309,7 @@ int nfp_compile_flow_metadata(struct nfp_app *app, } nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt); - nfp_flow->meta.host_cookie = cpu_to_be64(flow->cookie); + nfp_flow->meta.host_cookie = cpu_to_be64(cookie); nfp_flow->ingress_dev = netdev; ctx_entry = kzalloc(sizeof(*ctx_entry), GFP_KERNEL); @@ -357,7 +356,7 @@ int nfp_compile_flow_metadata(struct nfp_app *app, priv->stats[stats_cxt].bytes = 0; priv->stats[stats_cxt].used = jiffies; - check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev); + check_entry = nfp_flower_search_fl_table(app, cookie, netdev); if (check_entry) { NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot offload duplicate flow entry"); if (nfp_release_stats_entry(app, stats_cxt)) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 2406d33356ad..556c3495211d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -41,6 +41,8 @@ BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \ BIT(FLOW_DISSECTOR_KEY_MPLS) | \ + BIT(FLOW_DISSECTOR_KEY_CT) | \ + BIT(FLOW_DISSECTOR_KEY_META) | \ BIT(FLOW_DISSECTOR_KEY_IP)) #define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \ @@ -89,7 +91,7 @@ struct nfp_flower_merge_check { }; }; -static int +int nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, u8 mtype) { @@ -134,20 +136,16 @@ nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, return 0; } -static bool nfp_flower_check_higher_than_mac(struct flow_cls_offload *f) +static bool nfp_flower_check_higher_than_mac(struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(f); - return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) || flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) || flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) || flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP); } -static bool nfp_flower_check_higher_than_l3(struct flow_cls_offload *f) +static bool nfp_flower_check_higher_than_l3(struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(f); - return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) || flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP); } @@ -236,15 +234,14 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, return 0; } -static int +int nfp_flower_calculate_key_layers(struct nfp_app *app, struct net_device *netdev, struct nfp_fl_key_ls *ret_key_ls, - struct flow_cls_offload *flow, + struct flow_rule *rule, enum nfp_flower_tun_type *tun_type, struct netlink_ext_ack *extack) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct flow_dissector *dissector = rule->match.dissector; struct flow_match_basic basic = { NULL, NULL}; struct nfp_flower_priv *priv = app->priv; @@ -452,7 +449,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on given EtherType is not supported"); return -EOPNOTSUPP; } - } else if (nfp_flower_check_higher_than_mac(flow)) { + } else if (nfp_flower_check_higher_than_mac(rule)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match above L2 without specified EtherType"); return -EOPNOTSUPP; } @@ -471,7 +468,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, } if (!(key_layer & NFP_FLOWER_LAYER_TP) && - nfp_flower_check_higher_than_l3(flow)) { + nfp_flower_check_higher_than_l3(rule)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match on L4 information without specified IP protocol type"); return -EOPNOTSUPP; } @@ -543,7 +540,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, return 0; } -static struct nfp_fl_payload * +struct nfp_fl_payload * nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) { struct nfp_fl_payload *flow_pay; @@ -1005,9 +1002,7 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, struct nfp_fl_payload *sub_flow1, struct nfp_fl_payload *sub_flow2) { - struct flow_cls_offload merge_tc_off; struct nfp_flower_priv *priv = app->priv; - struct netlink_ext_ack *extack = NULL; struct nfp_fl_payload *merge_flow; struct nfp_fl_key_ls merge_key_ls; struct nfp_merge_info *merge_info; @@ -1016,7 +1011,6 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, ASSERT_RTNL(); - extack = merge_tc_off.common.extack; if (sub_flow1 == sub_flow2 || nfp_flower_is_merge_flow(sub_flow1) || nfp_flower_is_merge_flow(sub_flow2)) @@ -1061,9 +1055,8 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, if (err) goto err_unlink_sub_flow1; - merge_tc_off.cookie = merge_flow->tc_flower_cookie; - err = nfp_compile_flow_metadata(app, &merge_tc_off, merge_flow, - merge_flow->ingress_dev, extack); + err = nfp_compile_flow_metadata(app, merge_flow->tc_flower_cookie, merge_flow, + merge_flow->ingress_dev, NULL); if (err) goto err_unlink_sub_flow2; @@ -1305,6 +1298,7 @@ static int nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, struct flow_cls_offload *flow) { + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; struct nfp_flower_priv *priv = app->priv; struct netlink_ext_ack *extack = NULL; @@ -1330,7 +1324,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, if (!key_layer) return -ENOMEM; - err = nfp_flower_calculate_key_layers(app, netdev, key_layer, flow, + err = nfp_flower_calculate_key_layers(app, netdev, key_layer, rule, &tun_type, extack); if (err) goto err_free_key_ls; @@ -1341,12 +1335,12 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, goto err_free_key_ls; } - err = nfp_flower_compile_flow_match(app, flow, key_layer, netdev, + err = nfp_flower_compile_flow_match(app, rule, key_layer, netdev, flow_pay, tun_type, extack); if (err) goto err_destroy_flow; - err = nfp_flower_compile_action(app, flow, netdev, flow_pay, extack); + err = nfp_flower_compile_action(app, rule, netdev, flow_pay, extack); if (err) goto err_destroy_flow; @@ -1356,7 +1350,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, goto err_destroy_flow; } - err = nfp_compile_flow_metadata(app, flow, flow_pay, netdev, extack); + err = nfp_compile_flow_metadata(app, flow->cookie, flow_pay, netdev, extack); if (err) goto err_destroy_flow; @@ -1476,7 +1470,7 @@ err_free_links: kfree_rcu(merge_flow, rcu); } -static void +void nfp_flower_del_linked_merge_flows(struct nfp_app *app, struct nfp_fl_payload *sub_flow) { @@ -1601,7 +1595,7 @@ __nfp_flower_update_merge_stats(struct nfp_app *app, } } -static void +void nfp_flower_update_merge_stats(struct nfp_app *app, struct nfp_fl_payload *sub_flow) { @@ -1628,10 +1622,17 @@ nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev, struct flow_cls_offload *flow) { struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_ct_map_entry *ct_map_ent; struct netlink_ext_ack *extack = NULL; struct nfp_fl_payload *nfp_flow; u32 ctx_id; + /* Check ct_map table first */ + ct_map_ent = rhashtable_lookup_fast(&priv->ct_map_table, &flow->cookie, + nfp_ct_map_params); + if (ct_map_ent) + return nfp_fl_ct_stats(flow, ct_map_ent); + extack = flow->common.extack; nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev); if (!nfp_flow) { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index df5b748be068..df203738511b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -17,6 +17,7 @@ #include <linux/list.h> #include <linux/netdevice.h> #include <linux/pci.h> +#include <linux/dim.h> #include <linux/io-64-nonatomic-hi-lo.h> #include <linux/semaphore.h> #include <linux/workqueue.h> @@ -360,6 +361,9 @@ struct nfp_net_rx_ring { * @rx_ring: Pointer to RX ring * @xdp_ring: Pointer to an extra TX ring for XDP * @irq_entry: MSI-X table entry (use for talking to the device) + * @event_ctr: Number of interrupt + * @rx_dim: Dynamic interrupt moderation structure for RX + * @tx_dim: Dynamic interrupt moderation structure for TX * @rx_sync: Seqlock for atomic updates of RX stats * @rx_pkts: Number of received packets * @rx_bytes: Number of received bytes @@ -410,6 +414,10 @@ struct nfp_net_r_vector { u16 irq_entry; + u16 event_ctr; + struct dim rx_dim; + struct dim tx_dim; + struct u64_stats_sync rx_sync; u64 rx_pkts; u64 rx_bytes; @@ -571,6 +579,8 @@ struct nfp_net_dp { * mailbox area, crypto TLV * @link_up: Is the link up? * @link_status_lock: Protects @link_* and ensures atomicity with BAR reading + * @rx_coalesce_adapt_on: Is RX interrupt moderation adaptive? + * @tx_coalesce_adapt_on: Is TX interrupt moderation adaptive? * @rx_coalesce_usecs: RX interrupt moderation usecs delay parameter * @rx_coalesce_max_frames: RX interrupt moderation frame count parameter * @tx_coalesce_usecs: TX interrupt moderation usecs delay parameter @@ -654,6 +664,8 @@ struct nfp_net { struct semaphore bar_lock; + bool rx_coalesce_adapt_on; + bool tx_coalesce_adapt_on; u32 rx_coalesce_usecs; u32 rx_coalesce_max_frames; u32 tx_coalesce_usecs; @@ -919,6 +931,14 @@ static inline bool nfp_netdev_is_nfp_net(struct net_device *netdev) return netdev->netdev_ops == &nfp_net_netdev_ops; } +static inline int nfp_net_coalesce_para_check(u32 usecs, u32 pkts) +{ + if ((usecs >= ((1 << 16) - 1)) || (pkts >= ((1 << 16) - 1))) + return -EINVAL; + + return 0; +} + /* Prototypes */ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, void __iomem *ctrl_bar); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 5dfa4799c34f..15078f9dc9f1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -474,6 +474,12 @@ static irqreturn_t nfp_net_irq_rxtx(int irq, void *data) { struct nfp_net_r_vector *r_vec = data; + /* Currently we cannot tell if it's a rx or tx interrupt, + * since dim does not need accurate event_ctr to calculate, + * we just use this counter for both rx and tx dim. + */ + r_vec->event_ctr++; + napi_schedule_irqoff(&r_vec->napi); /* The FW auto-masks any interrupt, either via the MASK bit in @@ -1697,7 +1703,7 @@ nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, case NFP_NET_META_RESYNC_INFO: if (nfp_net_tls_rx_resync_req(netdev, data, pkt, pkt_len)) - return NULL; + return false; data += sizeof(struct nfp_net_tls_resync_req); break; default: @@ -2061,6 +2067,36 @@ static int nfp_net_poll(struct napi_struct *napi, int budget) if (napi_complete_done(napi, pkts_polled)) nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + if (r_vec->nfp_net->rx_coalesce_adapt_on) { + struct dim_sample dim_sample = {}; + unsigned int start; + u64 pkts, bytes; + + do { + start = u64_stats_fetch_begin(&r_vec->rx_sync); + pkts = r_vec->rx_pkts; + bytes = r_vec->rx_bytes; + } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + + dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); + net_dim(&r_vec->rx_dim, dim_sample); + } + + if (r_vec->nfp_net->tx_coalesce_adapt_on) { + struct dim_sample dim_sample = {}; + unsigned int start; + u64 pkts, bytes; + + do { + start = u64_stats_fetch_begin(&r_vec->tx_sync); + pkts = r_vec->tx_pkts; + bytes = r_vec->tx_bytes; + } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + + dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); + net_dim(&r_vec->tx_dim, dim_sample); + } + return pkts_polled; } @@ -2873,6 +2909,7 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn) */ static void nfp_net_close_stack(struct nfp_net *nn) { + struct nfp_net_r_vector *r_vec; unsigned int r; disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector); @@ -2880,8 +2917,16 @@ static void nfp_net_close_stack(struct nfp_net *nn) nn->link_up = false; for (r = 0; r < nn->dp.num_r_vecs; r++) { - disable_irq(nn->r_vecs[r].irq_vector); - napi_disable(&nn->r_vecs[r].napi); + r_vec = &nn->r_vecs[r]; + + disable_irq(r_vec->irq_vector); + napi_disable(&r_vec->napi); + + if (r_vec->rx_ring) + cancel_work_sync(&r_vec->rx_dim.work); + + if (r_vec->tx_ring) + cancel_work_sync(&r_vec->tx_dim.work); } netif_tx_disable(nn->dp.netdev); @@ -2948,17 +2993,92 @@ void nfp_ctrl_close(struct nfp_net *nn) rtnl_unlock(); } +static void nfp_net_rx_dim_work(struct work_struct *work) +{ + struct nfp_net_r_vector *r_vec; + unsigned int factor, value; + struct dim_cq_moder moder; + struct nfp_net *nn; + struct dim *dim; + + dim = container_of(work, struct dim, work); + moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + r_vec = container_of(dim, struct nfp_net_r_vector, rx_dim); + nn = r_vec->nfp_net; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->tlv_caps.me_freq_mhz / 16; + if (nfp_net_coalesce_para_check(factor * moder.usec, moder.pkts)) + return; + + /* copy RX interrupt coalesce parameters */ + value = (moder.pkts << 16) | (factor * moder.usec); + rtnl_lock(); + nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(r_vec->rx_ring->idx), value); + (void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD); + rtnl_unlock(); + + dim->state = DIM_START_MEASURE; +} + +static void nfp_net_tx_dim_work(struct work_struct *work) +{ + struct nfp_net_r_vector *r_vec; + unsigned int factor, value; + struct dim_cq_moder moder; + struct nfp_net *nn; + struct dim *dim; + + dim = container_of(work, struct dim, work); + moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + r_vec = container_of(dim, struct nfp_net_r_vector, tx_dim); + nn = r_vec->nfp_net; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->tlv_caps.me_freq_mhz / 16; + if (nfp_net_coalesce_para_check(factor * moder.usec, moder.pkts)) + return; + + /* copy TX interrupt coalesce parameters */ + value = (moder.pkts << 16) | (factor * moder.usec); + rtnl_lock(); + nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(r_vec->tx_ring->idx), value); + (void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD); + rtnl_unlock(); + + dim->state = DIM_START_MEASURE; +} + /** * nfp_net_open_stack() - Start the device from stack's perspective * @nn: NFP Net device to reconfigure */ static void nfp_net_open_stack(struct nfp_net *nn) { + struct nfp_net_r_vector *r_vec; unsigned int r; for (r = 0; r < nn->dp.num_r_vecs; r++) { - napi_enable(&nn->r_vecs[r].napi); - enable_irq(nn->r_vecs[r].irq_vector); + r_vec = &nn->r_vecs[r]; + + if (r_vec->rx_ring) { + INIT_WORK(&r_vec->rx_dim.work, nfp_net_rx_dim_work); + r_vec->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + } + + if (r_vec->tx_ring) { + INIT_WORK(&r_vec->tx_dim.work, nfp_net_tx_dim_work); + r_vec->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + } + + napi_enable(&r_vec->napi); + enable_irq(r_vec->irq_vector); } netif_tx_wake_all_queues(nn->dp.netdev); @@ -3893,6 +4013,9 @@ static void nfp_net_irqmod_init(struct nfp_net *nn) nn->rx_coalesce_max_frames = 64; nn->tx_coalesce_usecs = 50; nn->tx_coalesce_max_frames = 64; + + nn->rx_coalesce_adapt_on = true; + nn->tx_coalesce_adapt_on = true; } static void nfp_net_netdev_init(struct nfp_net *nn) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 1b482446536d..a213784ffa54 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1083,6 +1083,9 @@ static int nfp_net_get_coalesce(struct net_device *netdev, if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD)) return -EINVAL; + ec->use_adaptive_rx_coalesce = nn->rx_coalesce_adapt_on; + ec->use_adaptive_tx_coalesce = nn->tx_coalesce_adapt_on; + ec->rx_coalesce_usecs = nn->rx_coalesce_usecs; ec->rx_max_coalesced_frames = nn->rx_coalesce_max_frames; ec->tx_coalesce_usecs = nn->tx_coalesce_usecs; @@ -1359,19 +1362,18 @@ static int nfp_net_set_coalesce(struct net_device *netdev, if (!ec->tx_coalesce_usecs && !ec->tx_max_coalesced_frames) return -EINVAL; - if (ec->rx_coalesce_usecs * factor >= ((1 << 16) - 1)) - return -EINVAL; - - if (ec->tx_coalesce_usecs * factor >= ((1 << 16) - 1)) + if (nfp_net_coalesce_para_check(ec->rx_coalesce_usecs * factor, + ec->rx_max_coalesced_frames)) return -EINVAL; - if (ec->rx_max_coalesced_frames >= ((1 << 16) - 1)) - return -EINVAL; - - if (ec->tx_max_coalesced_frames >= ((1 << 16) - 1)) + if (nfp_net_coalesce_para_check(ec->tx_coalesce_usecs * factor, + ec->tx_max_coalesced_frames)) return -EINVAL; /* configuration is valid */ + nn->rx_coalesce_adapt_on = !!ec->use_adaptive_rx_coalesce; + nn->tx_coalesce_adapt_on = !!ec->use_adaptive_tx_coalesce; + nn->rx_coalesce_usecs = ec->rx_coalesce_usecs; nn->rx_coalesce_max_frames = ec->rx_max_coalesced_frames; nn->tx_coalesce_usecs = ec->tx_coalesce_usecs; @@ -1443,7 +1445,8 @@ static int nfp_net_set_channels(struct net_device *netdev, static const struct ethtool_ops nfp_net_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | - ETHTOOL_COALESCE_MAX_FRAMES, + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = nfp_net_get_drvinfo, .get_link = ethtool_op_get_link, .get_ringparam = nfp_net_get_ringparam, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index e4a5416adc80..505f605fa40b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -373,9 +373,6 @@ static void ionic_remove(struct pci_dev *pdev) { struct ionic *ionic = pci_get_drvdata(pdev); - if (!ionic) - return; - del_timer_sync(&ionic->watchdog_timer); if (ionic->lif) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index b41301a5b0df..cd520e4c5522 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -91,20 +91,20 @@ int ionic_devlink_register(struct ionic *ionic) attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; devlink_port_attrs_set(&ionic->dl_port, &attrs); err = devlink_port_register(dl, &ionic->dl_port, 0); - if (err) + if (err) { dev_err(ionic->dev, "devlink_port_register failed: %d\n", err); - else - devlink_port_type_eth_set(&ionic->dl_port, - ionic->lif->netdev); + devlink_unregister(dl); + return err; + } - return err; + devlink_port_type_eth_set(&ionic->dl_port, ionic->lif->netdev); + return 0; } void ionic_devlink_unregister(struct ionic *ionic) { struct devlink *dl = priv_to_devlink(ionic); - if (ionic->dl_port.registered) - devlink_port_unregister(&ionic->dl_port); + devlink_port_unregister(&ionic->dl_port); devlink_unregister(dl); } diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index 315a6e5c0f59..e75814a4654f 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -119,7 +119,8 @@ struct rocker_world_ops { int (*port_obj_fdb_del)(struct rocker_port *rocker_port, u16 vid, const unsigned char *addr); int (*port_master_linked)(struct rocker_port *rocker_port, - struct net_device *master); + struct net_device *master, + struct netlink_ext_ack *extack); int (*port_master_unlinked)(struct rocker_port *rocker_port, struct net_device *master); int (*port_neigh_update)(struct rocker_port *rocker_port, diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index a46633606cae..53d407a5dbf7 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1670,13 +1670,14 @@ rocker_world_port_fdb_del(struct rocker_port *rocker_port, } static int rocker_world_port_master_linked(struct rocker_port *rocker_port, - struct net_device *master) + struct net_device *master, + struct netlink_ext_ack *extack) { struct rocker_world_ops *wops = rocker_port->rocker->wops; if (!wops->port_master_linked) return -EOPNOTSUPP; - return wops->port_master_linked(rocker_port, master); + return wops->port_master_linked(rocker_port, master, extack); } static int rocker_world_port_master_unlinked(struct rocker_port *rocker_port, @@ -3107,6 +3108,7 @@ struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, static int rocker_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info; struct rocker_port *rocker_port; @@ -3123,7 +3125,8 @@ static int rocker_netdevice_event(struct notifier_block *unused, rocker_port = netdev_priv(dev); if (info->linking) { err = rocker_world_port_master_linked(rocker_port, - info->upper_dev); + info->upper_dev, + extack); if (err) netdev_warn(dev, "failed to reflect master linked (err %d)\n", err); diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 967a634ee9ac..b82e169b7836 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2571,8 +2571,10 @@ static int ofdpa_port_obj_fdb_del(struct rocker_port *rocker_port, } static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port, - struct net_device *bridge) + struct net_device *bridge, + struct netlink_ext_ack *extack) { + struct net_device *dev = ofdpa_port->dev; int err; /* Port is joining bridge, so the internal VLAN for the @@ -2592,13 +2594,21 @@ static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port, ofdpa_port->bridge_dev = bridge; - return ofdpa_port_vlan_add(ofdpa_port, OFDPA_UNTAGGED_VID, 0); + err = ofdpa_port_vlan_add(ofdpa_port, OFDPA_UNTAGGED_VID, 0); + if (err) + return err; + + return switchdev_bridge_port_offload(dev, dev, NULL, NULL, NULL, + false, extack); } static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port) { + struct net_device *dev = ofdpa_port->dev; int err; + switchdev_bridge_port_unoffload(dev, NULL, NULL, NULL); + err = ofdpa_port_vlan_del(ofdpa_port, OFDPA_UNTAGGED_VID, 0); if (err) return err; @@ -2637,13 +2647,14 @@ static int ofdpa_port_ovs_changed(struct ofdpa_port *ofdpa_port, } static int ofdpa_port_master_linked(struct rocker_port *rocker_port, - struct net_device *master) + struct net_device *master, + struct netlink_ext_ack *extack) { struct ofdpa_port *ofdpa_port = rocker_port->wpriv; int err = 0; if (netif_is_bridge_master(master)) - err = ofdpa_port_bridge_join(ofdpa_port, master); + err = ofdpa_port_bridge_join(ofdpa_port, master, extack); else if (netif_is_ovs_master(master)) err = ofdpa_port_ovs_changed(ofdpa_port, master); return err; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 718539cdd2f2..229e2f09d605 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -7,6 +7,7 @@ #include <linux/clk.h> #include <linux/etherdevice.h> +#include <linux/if_bridge.h> #include <linux/if_vlan.h> #include <linux/interrupt.h> #include <linux/kernel.h> @@ -2077,10 +2078,13 @@ bool am65_cpsw_port_dev_check(const struct net_device *ndev) return false; } -static int am65_cpsw_netdevice_port_link(struct net_device *ndev, struct net_device *br_ndev) +static int am65_cpsw_netdevice_port_link(struct net_device *ndev, + struct net_device *br_ndev, + struct netlink_ext_ack *extack) { struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(ndev); + int err; if (!common->br_members) { common->hw_bridge_dev = br_ndev; @@ -2092,6 +2096,11 @@ static int am65_cpsw_netdevice_port_link(struct net_device *ndev, struct net_dev return -EOPNOTSUPP; } + err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL, + false, extack); + if (err) + return err; + common->br_members |= BIT(priv->port->port_id); am65_cpsw_port_offload_fwd_mark_update(common); @@ -2104,6 +2113,8 @@ static void am65_cpsw_netdevice_port_unlink(struct net_device *ndev) struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(ndev); + switchdev_bridge_port_unoffload(ndev, NULL, NULL, NULL); + common->br_members &= ~BIT(priv->port->port_id); am65_cpsw_port_offload_fwd_mark_update(common); @@ -2116,6 +2127,7 @@ static void am65_cpsw_netdevice_port_unlink(struct net_device *ndev) static int am65_cpsw_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct net_device *ndev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info; int ret = NOTIFY_DONE; @@ -2129,7 +2141,9 @@ static int am65_cpsw_netdevice_event(struct notifier_block *unused, if (netif_is_bridge_master(info->upper_dev)) { if (info->linking) - ret = am65_cpsw_netdevice_port_link(ndev, info->upper_dev); + ret = am65_cpsw_netdevice_port_link(ndev, + info->upper_dev, + extack); else am65_cpsw_netdevice_port_unlink(ndev); } diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 57d279fdcc9f..4448a91cce54 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/irqreturn.h> #include <linux/interrupt.h> +#include <linux/if_bridge.h> #include <linux/if_ether.h> #include <linux/etherdevice.h> #include <linux/net_tstamp.h> @@ -1499,10 +1500,12 @@ static void cpsw_port_offload_fwd_mark_update(struct cpsw_common *cpsw) } static int cpsw_netdevice_port_link(struct net_device *ndev, - struct net_device *br_ndev) + struct net_device *br_ndev, + struct netlink_ext_ack *extack) { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; + int err; if (!cpsw->br_members) { cpsw->hw_bridge_dev = br_ndev; @@ -1514,6 +1517,11 @@ static int cpsw_netdevice_port_link(struct net_device *ndev, return -EOPNOTSUPP; } + err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL, + false, extack); + if (err) + return err; + cpsw->br_members |= BIT(priv->emac_port); cpsw_port_offload_fwd_mark_update(cpsw); @@ -1526,6 +1534,8 @@ static void cpsw_netdevice_port_unlink(struct net_device *ndev) struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; + switchdev_bridge_port_unoffload(ndev, NULL, NULL, NULL); + cpsw->br_members &= ~BIT(priv->emac_port); cpsw_port_offload_fwd_mark_update(cpsw); @@ -1538,6 +1548,7 @@ static void cpsw_netdevice_port_unlink(struct net_device *ndev) static int cpsw_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct net_device *ndev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info; int ret = NOTIFY_DONE; @@ -1552,7 +1563,8 @@ static int cpsw_netdevice_event(struct notifier_block *unused, if (netif_is_bridge_master(info->upper_dev)) { if (info->linking) ret = cpsw_netdevice_port_link(ndev, - info->upper_dev); + info->upper_dev, + extack); else cpsw_netdevice_port_unlink(ndev); } diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile index 506f8d5cd4ee..75435d40b920 100644 --- a/drivers/net/ipa/Makefile +++ b/drivers/net/ipa/Makefile @@ -1,6 +1,3 @@ -# Un-comment the next line if you want to validate configuration data -#ccflags-y += -DIPA_VALIDATE - obj-$(CONFIG_QCOM_IPA) += ipa.o ipa-y := ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \ diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 427c68b2ad8f..3de67ba066a6 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -1964,7 +1964,6 @@ static void gsi_evt_ring_init(struct gsi *gsi) static bool gsi_channel_data_valid(struct gsi *gsi, const struct ipa_gsi_endpoint_data *data) { -#ifdef IPA_VALIDATION u32 channel_id = data->channel_id; struct device *dev = gsi->dev; @@ -2010,7 +2009,6 @@ static bool gsi_channel_data_valid(struct gsi *gsi, channel_id, data->channel.event_count); return false; } -#endif /* IPA_VALIDATION */ return true; } diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c index 8c795a6a8598..1544564bc283 100644 --- a/drivers/net/ipa/gsi_trans.c +++ b/drivers/net/ipa/gsi_trans.c @@ -90,14 +90,12 @@ int gsi_trans_pool_init(struct gsi_trans_pool *pool, size_t size, u32 count, { void *virt; -#ifdef IPA_VALIDATE if (!size) return -EINVAL; if (count < max_alloc) return -EINVAL; if (!max_alloc) return -EINVAL; -#endif /* IPA_VALIDATE */ /* By allocating a few extra entries in our pool (one less * than the maximum number that will be requested in a @@ -140,14 +138,12 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool, dma_addr_t addr; void *virt; -#ifdef IPA_VALIDATE if (!size) return -EINVAL; if (count < max_alloc) return -EINVAL; if (!max_alloc) return -EINVAL; -#endif /* IPA_VALIDATE */ /* Don't let allocations cross a power-of-two boundary */ size = __roundup_pow_of_two(size); @@ -188,8 +184,8 @@ static u32 gsi_trans_pool_alloc_common(struct gsi_trans_pool *pool, u32 count) { u32 offset; - /* assert(count > 0); */ - /* assert(count <= pool->max_alloc); */ + WARN_ON(!count); + WARN_ON(count > pool->max_alloc); /* Allocate from beginning if wrap would occur */ if (count > pool->count - pool->free) @@ -225,9 +221,10 @@ void *gsi_trans_pool_next(struct gsi_trans_pool *pool, void *element) { void *end = pool->base + pool->count * pool->size; - /* assert(element >= pool->base); */ - /* assert(element < end); */ - /* assert(pool->max_alloc == 1); */ + WARN_ON(element < pool->base); + WARN_ON(element >= end); + WARN_ON(pool->max_alloc != 1); + element += pool->size; return element < end ? element : pool->base; @@ -332,7 +329,8 @@ struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id, struct gsi_trans_info *trans_info; struct gsi_trans *trans; - /* assert(tre_count <= gsi_channel_trans_tre_max(gsi, channel_id)); */ + if (WARN_ON(tre_count > gsi_channel_trans_tre_max(gsi, channel_id))) + return NULL; trans_info = &channel->trans_info; @@ -408,7 +406,7 @@ void gsi_trans_cmd_add(struct gsi_trans *trans, void *buf, u32 size, u32 which = trans->used++; struct scatterlist *sg; - /* assert(which < trans->tre_count); */ + WARN_ON(which >= trans->tre_count); /* Commands are quite different from data transfer requests. * Their payloads come from a pool whose memory is allocated @@ -441,8 +439,10 @@ int gsi_trans_page_add(struct gsi_trans *trans, struct page *page, u32 size, struct scatterlist *sg = &trans->sgl[0]; int ret; - /* assert(trans->tre_count == 1); */ - /* assert(!trans->used); */ + if (WARN_ON(trans->tre_count != 1)) + return -EINVAL; + if (WARN_ON(trans->used)) + return -EINVAL; sg_set_page(sg, page, size, offset); ret = dma_map_sg(trans->gsi->dev, sg, 1, trans->direction); @@ -461,8 +461,10 @@ int gsi_trans_skb_add(struct gsi_trans *trans, struct sk_buff *skb) u32 used; int ret; - /* assert(trans->tre_count == 1); */ - /* assert(!trans->used); */ + if (WARN_ON(trans->tre_count != 1)) + return -EINVAL; + if (WARN_ON(trans->used)) + return -EINVAL; /* skb->len will not be 0 (checked early) */ ret = skb_to_sgvec(skb, sg, 0, skb->len); @@ -550,7 +552,7 @@ static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db) u32 avail; u32 i; - /* assert(trans->used > 0); */ + WARN_ON(!trans->used); /* Consume the entries. If we cross the end of the ring while * filling them we'll switch to the beginning to finish. diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h index 744406832a77..71ba996096bb 100644 --- a/drivers/net/ipa/ipa.h +++ b/drivers/net/ipa/ipa.h @@ -51,6 +51,7 @@ enum ipa_flag { * @table_addr: DMA address of filter/route table content * @table_virt: Virtual address of filter/route table content * @interrupt: IPA Interrupt information + * @uc_clocked: true if clock is active by proxy for microcontroller * @uc_loaded: true after microcontroller has reported it's ready * @reg_addr: DMA address used for IPA register access * @reg_virt: Virtual address used for IPA register access @@ -95,6 +96,7 @@ struct ipa { __le64 *table_virt; struct ipa_interrupt *interrupt; + bool uc_clocked; bool uc_loaded; dma_addr_t reg_addr; diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index af44ca41189e..cff51731195a 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -159,35 +159,49 @@ static void ipa_cmd_validate_build(void) BUILD_BUG_ON(TABLE_SIZE > field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK)); #undef TABLE_COUNT_MAX #undef TABLE_SIZE -} -#ifdef IPA_VALIDATE + /* Hashed and non-hashed fields are assumed to be the same size */ + BUILD_BUG_ON(field_max(IP_FLTRT_FLAGS_HASH_SIZE_FMASK) != + field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK)); + BUILD_BUG_ON(field_max(IP_FLTRT_FLAGS_HASH_ADDR_FMASK) != + field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK)); + + /* Valid endpoint numbers must fit in the IP packet init command */ + BUILD_BUG_ON(field_max(IPA_PACKET_INIT_DEST_ENDPOINT_FMASK) < + IPA_ENDPOINT_MAX - 1); +} /* Validate a memory region holding a table */ -bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem, - bool route, bool ipv6, bool hashed) +bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem, bool route) { + u32 offset_max = field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK); + u32 size_max = field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK); + const char *table = route ? "route" : "filter"; struct device *dev = &ipa->pdev->dev; - u32 offset_max; - offset_max = hashed ? field_max(IP_FLTRT_FLAGS_HASH_ADDR_FMASK) - : field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK); + /* Size must fit in the immediate command field that holds it */ + if (mem->size > size_max) { + dev_err(dev, "%s table region size too large\n", table); + dev_err(dev, " (0x%04x > 0x%04x)\n", + mem->size, size_max); + + return false; + } + + /* Offset must fit in the immediate command field that holds it */ if (mem->offset > offset_max || ipa->mem_offset > offset_max - mem->offset) { - dev_err(dev, "IPv%c %s%s table region offset too large\n", - ipv6 ? '6' : '4', hashed ? "hashed " : "", - route ? "route" : "filter"); + dev_err(dev, "%s table region offset too large\n", table); dev_err(dev, " (0x%04x + 0x%04x > 0x%04x)\n", ipa->mem_offset, mem->offset, offset_max); return false; } + /* Entire memory range must fit within IPA-local memory */ if (mem->offset > ipa->mem_size || mem->size > ipa->mem_size - mem->offset) { - dev_err(dev, "IPv%c %s%s table region out of range\n", - ipv6 ? '6' : '4', hashed ? "hashed " : "", - route ? "route" : "filter"); + dev_err(dev, "%s table region out of range\n", table); dev_err(dev, " (0x%04x + 0x%04x > 0x%04x)\n", mem->offset, mem->size, ipa->mem_size); @@ -331,7 +345,6 @@ bool ipa_cmd_data_valid(struct ipa *ipa) return true; } -#endif /* IPA_VALIDATE */ int ipa_cmd_pool_init(struct gsi_channel *channel, u32 tre_max) { @@ -522,9 +535,6 @@ static void ipa_cmd_ip_packet_init_add(struct gsi_trans *trans, u8 endpoint_id) union ipa_cmd_payload *cmd_payload; dma_addr_t payload_addr; - /* assert(endpoint_id < - field_max(IPA_PACKET_INIT_DEST_ENDPOINT_FMASK)); */ - cmd_payload = ipa_cmd_payload_alloc(ipa, &payload_addr); payload = &cmd_payload->ip_packet_init; @@ -548,8 +558,9 @@ void ipa_cmd_dma_shared_mem_add(struct gsi_trans *trans, u32 offset, u16 size, u16 flags; /* size and offset must fit in 16 bit fields */ - /* assert(size > 0 && size <= U16_MAX); */ - /* assert(offset <= U16_MAX && ipa->mem_offset <= U16_MAX - offset); */ + WARN_ON(!size); + WARN_ON(size > U16_MAX); + WARN_ON(offset > U16_MAX || ipa->mem_offset > U16_MAX - offset); offset += ipa->mem_offset; @@ -588,8 +599,6 @@ static void ipa_cmd_ip_tag_status_add(struct gsi_trans *trans) union ipa_cmd_payload *cmd_payload; dma_addr_t payload_addr; - /* assert(tag <= field_max(IP_PACKET_TAG_STATUS_TAG_FMASK)); */ - cmd_payload = ipa_cmd_payload_alloc(ipa, &payload_addr); payload = &cmd_payload->ip_packet_tag_status; diff --git a/drivers/net/ipa/ipa_cmd.h b/drivers/net/ipa/ipa_cmd.h index b99262281f41..69cd085d427d 100644 --- a/drivers/net/ipa/ipa_cmd.h +++ b/drivers/net/ipa/ipa_cmd.h @@ -57,20 +57,16 @@ struct ipa_cmd_info { enum dma_data_direction direction; }; -#ifdef IPA_VALIDATE - /** * ipa_cmd_table_valid() - Validate a memory region holding a table * @ipa: - IPA pointer * @mem: - IPA memory region descriptor * @route: - Whether the region holds a route or filter table - * @ipv6: - Whether the table is for IPv6 or IPv4 - * @hashed: - Whether the table is hashed or non-hashed * * Return: true if region is valid, false otherwise */ bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem, - bool route, bool ipv6, bool hashed); + bool route); /** * ipa_cmd_data_valid() - Validate command-realted configuration is valid @@ -80,22 +76,6 @@ bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem, */ bool ipa_cmd_data_valid(struct ipa *ipa); -#else /* !IPA_VALIDATE */ - -static inline bool ipa_cmd_table_valid(struct ipa *ipa, - const struct ipa_mem *mem, bool route, - bool ipv6, bool hashed) -{ - return true; -} - -static inline bool ipa_cmd_data_valid(struct ipa *ipa) -{ - return true; -} - -#endif /* !IPA_VALIDATE */ - /** * ipa_cmd_pool_init() - initialize command channel pools * @channel: AP->IPA command TX GSI channel pointer diff --git a/drivers/net/ipa/ipa_data-v4.11.c b/drivers/net/ipa/ipa_data-v4.11.c index 9353efbd504f..782f67e3e079 100644 --- a/drivers/net/ipa/ipa_data-v4.11.c +++ b/drivers/net/ipa/ipa_data-v4.11.c @@ -105,6 +105,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .filter_support = true, .config = { .resource_group = IPA_RSRC_GROUP_SRC_UL_DL, + .checksum = true, .qmap = true, .status_enable = true, .tx = { @@ -128,6 +129,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .config = { .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, + .checksum = true, .qmap = true, .aggregation = true, .rx = { @@ -368,18 +370,13 @@ static const struct ipa_mem_data ipa_mem_data = { static const struct ipa_interconnect_data ipa_interconnect_data[] = { { .name = "memory", - .peak_bandwidth = 465000, /* 465 MBps */ - .average_bandwidth = 80000, /* 80 MBps */ - }, - /* Average rate is unused for the next two interconnects */ - { - .name = "imem", - .peak_bandwidth = 68570, /* 68.57 MBps */ - .average_bandwidth = 80000, /* 80 MBps (unused?) */ + .peak_bandwidth = 600000, /* 600 MBps */ + .average_bandwidth = 150000, /* 150 MBps */ }, + /* Average rate is unused for the next interconnect */ { .name = "config", - .peak_bandwidth = 30000, /* 30 MBps */ + .peak_bandwidth = 74000, /* 74 MBps */ .average_bandwidth = 0, /* unused */ }, }; diff --git a/drivers/net/ipa/ipa_data-v4.5.c b/drivers/net/ipa/ipa_data-v4.5.c index a99b6478fa3a..db6fda2fe43d 100644 --- a/drivers/net/ipa/ipa_data-v4.5.c +++ b/drivers/net/ipa/ipa_data-v4.5.c @@ -114,6 +114,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .filter_support = true, .config = { .resource_group = IPA_RSRC_GROUP_SRC_UL_DL, + .checksum = true, .qmap = true, .status_enable = true, .tx = { @@ -137,6 +138,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .config = { .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, + .checksum = true, .qmap = true, .aggregation = true, .rx = { diff --git a/drivers/net/ipa/ipa_data-v4.9.c b/drivers/net/ipa/ipa_data-v4.9.c index 798d43e1eb13..6ab928266b5c 100644 --- a/drivers/net/ipa/ipa_data-v4.9.c +++ b/drivers/net/ipa/ipa_data-v4.9.c @@ -106,6 +106,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .filter_support = true, .config = { .resource_group = IPA_RSRC_GROUP_SRC_UL_DL, + .checksum = true, .qmap = true, .status_enable = true, .tx = { @@ -129,6 +130,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .config = { .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, + .checksum = true, .qmap = true, .aggregation = true, .rx = { diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index ab02669bae4e..8070d1a1d5df 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -250,17 +250,18 @@ ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay) /* Suspend is not supported for IPA v4.0+. Delay doesn't work * correctly on IPA v4.2. - * - * if (endpoint->toward_ipa) - * assert(ipa->version != IPA_VERSION_4.2); - * else - * assert(ipa->version < IPA_VERSION_4_0); */ + if (endpoint->toward_ipa) + WARN_ON(ipa->version == IPA_VERSION_4_2); + else + WARN_ON(ipa->version >= IPA_VERSION_4_0); + mask = endpoint->toward_ipa ? ENDP_DELAY_FMASK : ENDP_SUSPEND_FMASK; val = ioread32(ipa->reg_virt + offset); - /* Don't bother if it's already in the requested state */ state = !!(val & mask); + + /* Don't bother if it's already in the requested state */ if (suspend_delay != state) { val ^= mask; iowrite32(val, ipa->reg_virt + offset); @@ -273,7 +274,7 @@ ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay) static void ipa_endpoint_program_delay(struct ipa_endpoint *endpoint, bool enable) { - /* assert(endpoint->toward_ipa); */ + WARN_ON(!endpoint->toward_ipa); /* Delay mode doesn't work properly for IPA v4.2 */ if (endpoint->ipa->version != IPA_VERSION_4_2) @@ -287,7 +288,8 @@ static bool ipa_endpoint_aggr_active(struct ipa_endpoint *endpoint) u32 offset; u32 val; - /* assert(mask & ipa->available); */ + WARN_ON(!(mask & ipa->available)); + offset = ipa_reg_state_aggr_active_offset(ipa->version); val = ioread32(ipa->reg_virt + offset); @@ -299,7 +301,8 @@ static void ipa_endpoint_force_close(struct ipa_endpoint *endpoint) u32 mask = BIT(endpoint->endpoint_id); struct ipa *ipa = endpoint->ipa; - /* assert(mask & ipa->available); */ + WARN_ON(!(mask & ipa->available)); + iowrite32(mask, ipa->reg_virt + IPA_REG_AGGR_FORCE_CLOSE_OFFSET); } @@ -338,7 +341,7 @@ ipa_endpoint_program_suspend(struct ipa_endpoint *endpoint, bool enable) if (endpoint->ipa->version >= IPA_VERSION_4_0) return enable; /* For IPA v4.0+, no change made */ - /* assert(!endpoint->toward_ipa); */ + WARN_ON(endpoint->toward_ipa); suspended = ipa_endpoint_init_ctrl(endpoint, enable); @@ -1156,7 +1159,8 @@ static bool ipa_endpoint_skb_build(struct ipa_endpoint *endpoint, if (!endpoint->netdev) return false; - /* assert(len <= SKB_WITH_OVERHEAD(IPA_RX_BUFFER_SIZE-NET_SKB_PAD)); */ + WARN_ON(len > SKB_WITH_OVERHEAD(IPA_RX_BUFFER_SIZE - NET_SKB_PAD)); + skb = build_skb(page_address(page), IPA_RX_BUFFER_SIZE); if (skb) { /* Reserve the headroom and account for the data */ diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index c46df0b7c4e5..9fd158dd9047 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -146,7 +146,7 @@ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt, u32 offset; u32 val; - /* assert(mask & ipa->available); */ + WARN_ON(!(mask & ipa->available)); /* IPA version 3.0 does not support TX_SUSPEND interrupt control */ if (ipa->version == IPA_VERSION_3_0) @@ -206,7 +206,8 @@ void ipa_interrupt_add(struct ipa_interrupt *interrupt, struct ipa *ipa = interrupt->ipa; u32 offset; - /* assert(ipa_irq < IPA_IRQ_COUNT); */ + WARN_ON(ipa_irq >= IPA_IRQ_COUNT); + interrupt->handler[ipa_irq] = handler; /* Update the IPA interrupt mask to enable it */ @@ -222,7 +223,8 @@ ipa_interrupt_remove(struct ipa_interrupt *interrupt, enum ipa_irq_id ipa_irq) struct ipa *ipa = interrupt->ipa; u32 offset; - /* assert(ipa_irq < IPA_IRQ_COUNT); */ + WARN_ON(ipa_irq >= IPA_IRQ_COUNT); + /* Update the IPA interrupt mask to disable it */ interrupt->enabled &= ~BIT(ipa_irq); offset = ipa_reg_irq_en_offset(ipa->version); @@ -231,8 +233,8 @@ ipa_interrupt_remove(struct ipa_interrupt *interrupt, enum ipa_irq_id ipa_irq) interrupt->handler[ipa_irq] = NULL; } -/* Set up the IPA interrupt framework */ -struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa) +/* Configure the IPA interrupt framework */ +struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa) { struct device *dev = &ipa->pdev->dev; struct ipa_interrupt *interrupt; @@ -281,8 +283,8 @@ err_kfree: return ERR_PTR(ret); } -/* Tear down the IPA interrupt framework */ -void ipa_interrupt_teardown(struct ipa_interrupt *interrupt) +/* Inverse of ipa_interrupt_config() */ +void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt) { struct device *dev = &interrupt->ipa->pdev->dev; int ret; diff --git a/drivers/net/ipa/ipa_interrupt.h b/drivers/net/ipa/ipa_interrupt.h index d5c486a6800d..231390cea52a 100644 --- a/drivers/net/ipa/ipa_interrupt.h +++ b/drivers/net/ipa/ipa_interrupt.h @@ -86,17 +86,17 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt); void ipa_interrupt_simulate_suspend(struct ipa_interrupt *interrupt); /** - * ipa_interrupt_setup() - Set up the IPA interrupt framework + * ipa_interrupt_config() - Configure the IPA interrupt framework * @ipa: IPA pointer * * Return: Pointer to IPA SMP2P info, or a pointer-coded error */ -struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa); +struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa); /** - * ipa_interrupt_teardown() - Tear down the IPA interrupt framework + * ipa_interrupt_deconfig() - Inverse of ipa_interrupt_config() * @interrupt: IPA interrupt structure */ -void ipa_interrupt_teardown(struct ipa_interrupt *interrupt); +void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt); #endif /* _IPA_INTERRUPT_H_ */ diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index 9810c61a0320..5bcc6cd13a9a 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -124,19 +124,12 @@ int ipa_setup(struct ipa *ipa) if (ret) return ret; - ipa->interrupt = ipa_interrupt_setup(ipa); - if (IS_ERR(ipa->interrupt)) { - ret = PTR_ERR(ipa->interrupt); - goto err_gsi_teardown; - } ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND, ipa_suspend_handler); - ipa_uc_setup(ipa); - ret = device_init_wakeup(dev, true); if (ret) - goto err_uc_teardown; + goto err_interrupt_remove; ipa_endpoint_setup(ipa); @@ -167,7 +160,7 @@ int ipa_setup(struct ipa *ipa) ipa_endpoint_default_route_set(ipa, exception_endpoint->endpoint_id); /* We're all set. Now prepare for communication with the modem */ - ret = ipa_modem_setup(ipa); + ret = ipa_qmi_setup(ipa); if (ret) goto err_default_route_clear; @@ -185,11 +178,8 @@ err_command_disable: err_endpoint_teardown: ipa_endpoint_teardown(ipa); (void)device_init_wakeup(dev, false); -err_uc_teardown: - ipa_uc_teardown(ipa); +err_interrupt_remove: ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); - ipa_interrupt_teardown(ipa->interrupt); -err_gsi_teardown: gsi_teardown(&ipa->gsi); return ret; @@ -204,7 +194,7 @@ static void ipa_teardown(struct ipa *ipa) struct ipa_endpoint *exception_endpoint; struct ipa_endpoint *command_endpoint; - ipa_modem_teardown(ipa); + ipa_qmi_teardown(ipa); ipa_endpoint_default_route_clear(ipa); exception_endpoint = ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]; ipa_endpoint_disable_one(exception_endpoint); @@ -212,9 +202,7 @@ static void ipa_teardown(struct ipa *ipa) ipa_endpoint_disable_one(command_endpoint); ipa_endpoint_teardown(ipa); (void)device_init_wakeup(&ipa->pdev->dev, false); - ipa_uc_teardown(ipa); ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); - ipa_interrupt_teardown(ipa->interrupt); gsi_teardown(&ipa->gsi); } @@ -253,9 +241,6 @@ ipa_hardware_config_qsb(struct ipa *ipa, const struct ipa_data *data) const struct ipa_qsb_data *data1; u32 val; - /* assert(data->qsb_count > 0); */ - /* assert(data->qsb_count < 3); */ - /* QMB 0 represents DDR; QMB 1 (if present) represents PCIe */ data0 = &data->qsb_data[IPA_QSB_MASTER_DDR]; if (data->qsb_count > 1) @@ -293,7 +278,7 @@ ipa_hardware_config_qsb(struct ipa *ipa, const struct ipa_data *data) */ static u32 ipa_aggr_granularity_val(u32 usec) { - /* assert(usec != 0); */ + WARN_ON(!usec); return DIV_ROUND_CLOSEST(usec * TIMER_FREQUENCY, USEC_PER_SEC) - 1; } @@ -471,31 +456,44 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data) ipa_hardware_config(ipa, data); - ret = ipa_endpoint_config(ipa); + ret = ipa_mem_config(ipa); if (ret) goto err_hardware_deconfig; - ret = ipa_mem_config(ipa); + ipa->interrupt = ipa_interrupt_config(ipa); + if (IS_ERR(ipa->interrupt)) { + ret = PTR_ERR(ipa->interrupt); + ipa->interrupt = NULL; + goto err_mem_deconfig; + } + + ipa_uc_config(ipa); + + ret = ipa_endpoint_config(ipa); if (ret) - goto err_endpoint_deconfig; + goto err_interrupt_deconfig; ipa_table_config(ipa); /* No deconfig required */ /* Assign resource limitation to each group; no deconfig required */ ret = ipa_resource_config(ipa, data->resource_data); if (ret) - goto err_mem_deconfig; + goto err_endpoint_deconfig; ret = ipa_modem_config(ipa); if (ret) - goto err_mem_deconfig; + goto err_endpoint_deconfig; return 0; -err_mem_deconfig: - ipa_mem_deconfig(ipa); err_endpoint_deconfig: ipa_endpoint_deconfig(ipa); +err_interrupt_deconfig: + ipa_uc_deconfig(ipa); + ipa_interrupt_deconfig(ipa->interrupt); + ipa->interrupt = NULL; +err_mem_deconfig: + ipa_mem_deconfig(ipa); err_hardware_deconfig: ipa_hardware_deconfig(ipa); ipa_clock_put(ipa); @@ -510,8 +508,11 @@ err_hardware_deconfig: static void ipa_deconfig(struct ipa *ipa) { ipa_modem_deconfig(ipa); - ipa_mem_deconfig(ipa); ipa_endpoint_deconfig(ipa); + ipa_uc_deconfig(ipa); + ipa_interrupt_deconfig(ipa->interrupt); + ipa->interrupt = NULL; + ipa_mem_deconfig(ipa); ipa_hardware_deconfig(ipa); ipa_clock_put(ipa); } @@ -612,7 +613,6 @@ MODULE_DEVICE_TABLE(of, ipa_match); * */ static void ipa_validate_build(void) { -#ifdef IPA_VALIDATE /* At one time we assumed a 64-bit build, allowing some do_div() * calls to be replaced by simple division or modulo operations. * We currently only perform divide and modulo operations on u32, @@ -646,7 +646,6 @@ static void ipa_validate_build(void) BUILD_BUG_ON(!ipa_aggr_granularity_val(IPA_AGGR_GRANULARITY)); BUILD_BUG_ON(ipa_aggr_granularity_val(IPA_AGGR_GRANULARITY) > field_max(AGGR_GRANULARITY_FMASK)); -#endif /* IPA_VALIDATE */ } static bool ipa_version_valid(enum ipa_version version) diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index af9aedbde717..c851e2cf1255 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -19,6 +19,7 @@ #include "ipa_modem.h" #include "ipa_smp2p.h" #include "ipa_qmi.h" +#include "ipa_uc.h" #define IPA_NETDEV_NAME "rmnet_ipa%d" #define IPA_NETDEV_TAILROOM 0 /* for padding by mux layer */ @@ -314,6 +315,7 @@ static int ipa_modem_notify(struct notifier_block *nb, unsigned long action, switch (action) { case QCOM_SSR_BEFORE_POWERUP: dev_info(dev, "received modem starting event\n"); + ipa_uc_clock(ipa); ipa_smp2p_notify_reset(ipa); break; @@ -377,13 +379,3 @@ void ipa_modem_deconfig(struct ipa *ipa) ipa->notifier = NULL; memset(&ipa->nb, 0, sizeof(ipa->nb)); } - -int ipa_modem_setup(struct ipa *ipa) -{ - return ipa_qmi_setup(ipa); -} - -void ipa_modem_teardown(struct ipa *ipa) -{ - ipa_qmi_teardown(ipa); -} diff --git a/drivers/net/ipa/ipa_modem.h b/drivers/net/ipa/ipa_modem.h index 2de3e216d1d4..5e6e3d234454 100644 --- a/drivers/net/ipa/ipa_modem.h +++ b/drivers/net/ipa/ipa_modem.h @@ -7,7 +7,6 @@ #define _IPA_MODEM_H_ struct ipa; -struct ipa_endpoint; struct net_device; struct sk_buff; @@ -25,7 +24,4 @@ void ipa_modem_exit(struct ipa *ipa); int ipa_modem_config(struct ipa *ipa); void ipa_modem_deconfig(struct ipa *ipa); -int ipa_modem_setup(struct ipa *ipa); -void ipa_modem_teardown(struct ipa *ipa); - #endif /* _IPA_MODEM_H_ */ diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c index 4661105ce7ab..90f3aec55b36 100644 --- a/drivers/net/ipa/ipa_qmi.c +++ b/drivers/net/ipa/ipa_qmi.c @@ -467,10 +467,7 @@ static const struct qmi_ops ipa_client_ops = { .new_server = ipa_client_new_server, }; -/* This is called by ipa_setup(). We can be informed via remoteproc that - * the modem has shut down, in which case this function will be called - * again to prepare for it coming back up again. - */ +/* Set up for QMI message exchange */ int ipa_qmi_setup(struct ipa *ipa) { struct ipa_qmi *ipa_qmi = &ipa->qmi; @@ -526,6 +523,7 @@ err_server_handle_release: return ret; } +/* Tear down IPA QMI handles */ void ipa_qmi_teardown(struct ipa *ipa) { cancel_work_sync(&ipa->qmi.init_driver_work); diff --git a/drivers/net/ipa/ipa_qmi.h b/drivers/net/ipa/ipa_qmi.h index b6f2055d35a6..856ef629ccc8 100644 --- a/drivers/net/ipa/ipa_qmi.h +++ b/drivers/net/ipa/ipa_qmi.h @@ -39,7 +39,26 @@ struct ipa_qmi { bool indication_sent; }; +/** + * ipa_qmi_setup() - Set up for QMI message exchange + * @ipa: IPA pointer + * + * This is called at the end of ipa_setup(), to prepare for the exchange + * of QMI messages that perform a "handshake" between the AP and modem. + * When the modem QMI server announces its presence, an AP request message + * supplies operating parameters to be used to the modem, and the modem + * acknowledges receipt of those parameters. The modem will not touch the + * IPA hardware until this handshake is complete. + * + * If the modem crashes (or shuts down) a new handshake begins when the + * modem's QMI server is started again. + */ int ipa_qmi_setup(struct ipa *ipa); + +/** + * ipa_qmi_teardown() - Tear down IPA QMI handles + * @ipa: IPA pointer + */ void ipa_qmi_teardown(struct ipa *ipa); #endif /* !_IPA_QMI_H_ */ diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h index b89dec5865a5..a5b355384d4a 100644 --- a/drivers/net/ipa/ipa_reg.h +++ b/drivers/net/ipa/ipa_reg.h @@ -99,7 +99,7 @@ struct ipa; static inline u32 arbitration_lock_disable_encoded(enum ipa_version version, u32 mask) { - /* assert(version >= IPA_VERSION_4_0); */ + WARN_ON(version < IPA_VERSION_4_0); if (version < IPA_VERSION_4_9) return u32_encode_bits(mask, GENMASK(20, 17)); @@ -116,7 +116,7 @@ static inline u32 full_flush_rsc_closure_en_encoded(enum ipa_version version, { u32 val = enable ? 1 : 0; - /* assert(version >= IPA_VERSION_4_5); */ + WARN_ON(version < IPA_VERSION_4_5); if (version == IPA_VERSION_4_5 || version == IPA_VERSION_4_7) return u32_encode_bits(val, GENMASK(21, 21)); @@ -409,7 +409,7 @@ static inline u32 ipa_header_size_encoded(enum ipa_version version, val = u32_encode_bits(size, HDR_LEN_FMASK); if (version < IPA_VERSION_4_5) { - /* ipa_assert(header_size == size); */ + WARN_ON(header_size != size); return val; } @@ -429,7 +429,7 @@ static inline u32 ipa_metadata_offset_encoded(enum ipa_version version, val = u32_encode_bits(off, HDR_OFST_METADATA_FMASK); if (version < IPA_VERSION_4_5) { - /* ipa_assert(offset == off); */ + WARN_ON(offset != off); return val; } @@ -812,7 +812,7 @@ ipa_reg_irq_suspend_info_offset(enum ipa_version version) static inline u32 ipa_reg_irq_suspend_en_ee_n_offset(enum ipa_version version, u32 ee) { - /* assert(version != IPA_VERSION_3_0); */ + WARN_ON(version == IPA_VERSION_3_0); if (version < IPA_VERSION_4_9) return 0x00003034 + 0x1000 * ee; @@ -830,7 +830,7 @@ ipa_reg_irq_suspend_en_offset(enum ipa_version version) static inline u32 ipa_reg_irq_suspend_clr_ee_n_offset(enum ipa_version version, u32 ee) { - /* assert(version != IPA_VERSION_3_0); */ + WARN_ON(version == IPA_VERSION_3_0); if (version < IPA_VERSION_4_9) return 0x00003038 + 0x1000 * ee; diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c index 3b2dc216d3a6..e3da95d69409 100644 --- a/drivers/net/ipa/ipa_resource.c +++ b/drivers/net/ipa/ipa_resource.c @@ -29,7 +29,6 @@ static bool ipa_resource_limits_valid(struct ipa *ipa, const struct ipa_resource_data *data) { -#ifdef IPA_VALIDATION u32 group_count; u32 i; u32 j; @@ -65,7 +64,7 @@ static bool ipa_resource_limits_valid(struct ipa *ipa, if (resource->limits[j].min || resource->limits[j].max) return false; } -#endif /* !IPA_VALIDATION */ + return true; } diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c index c617a9156f26..2324e1b93e37 100644 --- a/drivers/net/ipa/ipa_table.c +++ b/drivers/net/ipa/ipa_table.c @@ -120,8 +120,6 @@ */ #define IPA_ZERO_RULE_SIZE (2 * sizeof(__le32)) -#ifdef IPA_VALIDATE - /* Check things that can be validated at build time. */ static void ipa_table_validate_build(void) { @@ -161,7 +159,7 @@ ipa_table_valid_one(struct ipa *ipa, enum ipa_mem_id mem_id, bool route) else size = (1 + IPA_FILTER_COUNT_MAX) * sizeof(__le64); - if (!ipa_cmd_table_valid(ipa, mem, route, ipv6, hashed)) + if (!ipa_cmd_table_valid(ipa, mem, route)) return false; /* mem->size >= size is sufficient, but we'll demand more */ @@ -169,7 +167,7 @@ ipa_table_valid_one(struct ipa *ipa, enum ipa_mem_id mem_id, bool route) return true; /* Hashed table regions can be zero size if hashing is not supported */ - if (hashed && !mem->size) + if (ipa_table_hash_support(ipa) && !mem->size) return true; dev_err(dev, "%s table region %u size 0x%02x, expected 0x%02x\n", @@ -183,14 +181,22 @@ bool ipa_table_valid(struct ipa *ipa) { bool valid; - valid = ipa_table_valid_one(IPA_MEM_V4_FILTER, false); - valid = valid && ipa_table_valid_one(IPA_MEM_V4_FILTER_HASHED, false); - valid = valid && ipa_table_valid_one(IPA_MEM_V6_FILTER, false); - valid = valid && ipa_table_valid_one(IPA_MEM_V6_FILTER_HASHED, false); - valid = valid && ipa_table_valid_one(IPA_MEM_V4_ROUTE, true); - valid = valid && ipa_table_valid_one(IPA_MEM_V4_ROUTE_HASHED, true); - valid = valid && ipa_table_valid_one(IPA_MEM_V6_ROUTE, true); - valid = valid && ipa_table_valid_one(IPA_MEM_V6_ROUTE_HASHED, true); + valid = ipa_table_valid_one(ipa, IPA_MEM_V4_FILTER, false); + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_FILTER, false); + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V4_ROUTE, true); + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_ROUTE, true); + + if (!ipa_table_hash_support(ipa)) + return valid; + + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V4_FILTER_HASHED, + false); + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_FILTER_HASHED, + false); + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V4_ROUTE_HASHED, + true); + valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_ROUTE_HASHED, + true); return valid; } @@ -217,14 +223,6 @@ bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_map) return true; } -#else /* !IPA_VALIDATE */ -static void ipa_table_validate_build(void) - -{ -} - -#endif /* !IPA_VALIDATE */ - /* Zero entry count means no table, so just return a 0 address */ static dma_addr_t ipa_table_addr(struct ipa *ipa, bool filter_mask, u16 count) { @@ -233,7 +231,7 @@ static dma_addr_t ipa_table_addr(struct ipa *ipa, bool filter_mask, u16 count) if (!count) return 0; -/* assert(count <= max_t(u32, IPA_FILTER_COUNT_MAX, IPA_ROUTE_COUNT_MAX)); */ + WARN_ON(count > max_t(u32, IPA_FILTER_COUNT_MAX, IPA_ROUTE_COUNT_MAX)); /* Skip over the zero rule and possibly the filter mask */ skip = filter_mask ? 1 : 2; diff --git a/drivers/net/ipa/ipa_table.h b/drivers/net/ipa/ipa_table.h index 1e2be9fce2f8..b6a9a0d79d68 100644 --- a/drivers/net/ipa/ipa_table.h +++ b/drivers/net/ipa/ipa_table.h @@ -16,8 +16,6 @@ struct ipa; /* The maximum number of route table entries (IPv4, IPv6; hashed or not) */ #define IPA_ROUTE_COUNT_MAX 15 -#ifdef IPA_VALIDATE - /** * ipa_table_valid() - Validate route and filter table memory regions * @ipa: IPA pointer @@ -35,20 +33,6 @@ bool ipa_table_valid(struct ipa *ipa); */ bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_mask); -#else /* !IPA_VALIDATE */ - -static inline bool ipa_table_valid(struct ipa *ipa) -{ - return true; -} - -static inline bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_mask) -{ - return true; -} - -#endif /* !IPA_VALIDATE */ - /** * ipa_table_hash_support() - Return true if hashed tables are supported * @ipa: IPA pointer diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c index fd9219863234..f88ee02457d4 100644 --- a/drivers/net/ipa/ipa_uc.c +++ b/drivers/net/ipa/ipa_uc.c @@ -131,7 +131,7 @@ static void ipa_uc_event_handler(struct ipa *ipa, enum ipa_irq_id irq_id) if (shared->event == IPA_UC_EVENT_ERROR) dev_err(dev, "microcontroller error event\n"); else if (shared->event != IPA_UC_EVENT_LOG_INFO) - dev_err(dev, "unsupported microcontroller event %hhu\n", + dev_err(dev, "unsupported microcontroller event %u\n", shared->event); /* The LOG_INFO event can be safely ignored */ } @@ -140,55 +140,65 @@ static void ipa_uc_event_handler(struct ipa *ipa, enum ipa_irq_id irq_id) static void ipa_uc_response_hdlr(struct ipa *ipa, enum ipa_irq_id irq_id) { struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa); + struct device *dev = &ipa->pdev->dev; /* An INIT_COMPLETED response message is sent to the AP by the * microcontroller when it is operational. Other than this, the AP * should only receive responses from the microcontroller when it has * sent it a request message. * - * We can drop the clock reference taken in ipa_uc_setup() once we + * We can drop the clock reference taken in ipa_uc_clock() once we * know the microcontroller has finished its initialization. */ switch (shared->response) { case IPA_UC_RESPONSE_INIT_COMPLETED: - ipa->uc_loaded = true; - ipa_clock_put(ipa); + if (ipa->uc_clocked) { + ipa->uc_loaded = true; + ipa_clock_put(ipa); + ipa->uc_clocked = false; + } else { + dev_warn(dev, "unexpected init_completed response\n"); + } break; default: - dev_warn(&ipa->pdev->dev, - "unsupported microcontroller response %hhu\n", + dev_warn(dev, "unsupported microcontroller response %u\n", shared->response); break; } } -/* ipa_uc_setup() - Set up the microcontroller */ -void ipa_uc_setup(struct ipa *ipa) +/* Configure the IPA microcontroller subsystem */ +void ipa_uc_config(struct ipa *ipa) { - /* The microcontroller needs the IPA clock running until it has - * completed its initialization. It signals this by sending an - * INIT_COMPLETED response message to the AP. This could occur after - * we have finished doing the rest of the IPA initialization, so we - * need to take an extra "proxy" reference, and hold it until we've - * received that signal. (This reference is dropped in - * ipa_uc_response_hdlr(), above.) - */ - ipa_clock_get(ipa); - + ipa->uc_clocked = false; ipa->uc_loaded = false; ipa_interrupt_add(ipa->interrupt, IPA_IRQ_UC_0, ipa_uc_event_handler); ipa_interrupt_add(ipa->interrupt, IPA_IRQ_UC_1, ipa_uc_response_hdlr); } -/* Inverse of ipa_uc_setup() */ -void ipa_uc_teardown(struct ipa *ipa) +/* Inverse of ipa_uc_config() */ +void ipa_uc_deconfig(struct ipa *ipa) { ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_1); ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_0); - if (!ipa->uc_loaded) + if (ipa->uc_clocked) ipa_clock_put(ipa); } +/* Take a proxy clock reference for the microcontroller */ +void ipa_uc_clock(struct ipa *ipa) +{ + static bool already; + + if (already) + return; + already = true; /* Only do this on first boot */ + + /* This clock reference dropped in ipa_uc_response_hdlr() above */ + ipa_clock_get(ipa); + ipa->uc_clocked = true; +} + /* Send a command to the microcontroller */ static void send_uc_command(struct ipa *ipa, u32 command, u32 command_param) { diff --git a/drivers/net/ipa/ipa_uc.h b/drivers/net/ipa/ipa_uc.h index e8510899a3f0..14e4e1115aa7 100644 --- a/drivers/net/ipa/ipa_uc.h +++ b/drivers/net/ipa/ipa_uc.h @@ -9,16 +9,30 @@ struct ipa; /** - * ipa_uc_setup() - set up the IPA microcontroller subsystem + * ipa_uc_config() - Configure the IPA microcontroller subsystem * @ipa: IPA pointer */ -void ipa_uc_setup(struct ipa *ipa); +void ipa_uc_config(struct ipa *ipa); /** - * ipa_uc_teardown() - inverse of ipa_uc_setup() + * ipa_uc_deconfig() - Inverse of ipa_uc_config() * @ipa: IPA pointer */ -void ipa_uc_teardown(struct ipa *ipa); +void ipa_uc_deconfig(struct ipa *ipa); + +/** + * ipa_uc_clock() - Take a proxy clock reference for the microcontroller + * @ipa: IPA pointer + * + * The first time the modem boots, it loads firmware for and starts the + * IPA-resident microcontroller. The microcontroller signals that it + * has completed its initialization by sending an INIT_COMPLETED response + * message to the AP. The AP must ensure the IPA core clock is operating + * until it receives this message, and to do so we take a "proxy" clock + * reference on its behalf here. Once we receive the INIT_COMPLETED + * message (in ipa_uc_response_hdlr()) we drop this clock reference. + */ +void ipa_uc_clock(struct ipa *ipa); /** * ipa_uc_panic_notifier() diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c index e60e38c1f09d..0cc7dcd0ff96 100644 --- a/drivers/net/mhi/net.c +++ b/drivers/net/mhi/net.c @@ -205,11 +205,6 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, mhi_netdev->skbagg_head = NULL; } - u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); - u64_stats_inc(&mhi_netdev->stats.rx_packets); - u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len); - u64_stats_update_end(&mhi_netdev->stats.rx_syncp); - switch (skb->data[0] & 0xf0) { case 0x40: skb->protocol = htons(ETH_P_IP); @@ -222,10 +217,15 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, break; } - if (proto && proto->rx) + if (proto && proto->rx) { proto->rx(mhi_netdev, skb); - else + } else { + u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); + u64_stats_inc(&mhi_netdev->stats.rx_packets); + u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len); + u64_stats_update_end(&mhi_netdev->stats.rx_syncp); netif_rx(skb); + } } /* Refill if RX buffers queue becomes low */ @@ -329,6 +329,7 @@ static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id, mhi_netdev->mdev = mhi_dev; mhi_netdev->skbagg_head = NULL; mhi_netdev->proto = info->proto; + mhi_netdev->mru = mhi_dev->mhi_cntrl->mru; INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work); u64_stats_init(&mhi_netdev->stats.rx_syncp); diff --git a/drivers/net/mhi/proto_mbim.c b/drivers/net/mhi/proto_mbim.c index bf1ad863237d..761d90b28ee6 100644 --- a/drivers/net/mhi/proto_mbim.c +++ b/drivers/net/mhi/proto_mbim.c @@ -211,6 +211,10 @@ static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb) continue; } + u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); + u64_stats_inc(&mhi_netdev->stats.rx_packets); + u64_stats_add(&mhi_netdev->stats.rx_bytes, skbn->len); + u64_stats_update_end(&mhi_netdev->stats.rx_syncp); netif_rx(skbn); } next_ndp: @@ -292,7 +296,9 @@ static int mbim_init(struct mhi_net_dev *mhi_netdev) ndev->needed_headroom = sizeof(struct mbim_tx_hdr); ndev->mtu = MHI_MBIM_DEFAULT_MTU; - mhi_netdev->mru = MHI_MBIM_DEFAULT_MRU; + + if (!mhi_netdev->mru) + mhi_netdev->mru = MHI_MBIM_DEFAULT_MRU; return 0; } diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index ccec29970d5b..ff01e5bdc72e 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -262,29 +262,31 @@ static struct device_type nsim_bus_dev_type = { }; static struct nsim_bus_dev * -nsim_bus_dev_new(unsigned int id, unsigned int port_count); +nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues); static ssize_t new_device_store(struct bus_type *bus, const char *buf, size_t count) { + unsigned int id, port_count, num_queues; struct nsim_bus_dev *nsim_bus_dev; - unsigned int port_count; - unsigned int id; int err; - err = sscanf(buf, "%u %u", &id, &port_count); + err = sscanf(buf, "%u %u %u", &id, &port_count, &num_queues); switch (err) { case 1: port_count = 1; fallthrough; case 2: + num_queues = 1; + fallthrough; + case 3: if (id > INT_MAX) { pr_err("Value of \"id\" is too big.\n"); return -EINVAL; } break; default: - pr_err("Format for adding new device is \"id port_count\" (uint uint).\n"); + pr_err("Format for adding new device is \"id port_count num_queues\" (uint uint unit).\n"); return -EINVAL; } @@ -295,7 +297,7 @@ new_device_store(struct bus_type *bus, const char *buf, size_t count) goto err; } - nsim_bus_dev = nsim_bus_dev_new(id, port_count); + nsim_bus_dev = nsim_bus_dev_new(id, port_count, num_queues); if (IS_ERR(nsim_bus_dev)) { err = PTR_ERR(nsim_bus_dev); goto err; @@ -397,7 +399,7 @@ static struct bus_type nsim_bus = { #define NSIM_BUS_DEV_MAX_VFS 4 static struct nsim_bus_dev * -nsim_bus_dev_new(unsigned int id, unsigned int port_count) +nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues) { struct nsim_bus_dev *nsim_bus_dev; int err; @@ -413,6 +415,7 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count) nsim_bus_dev->dev.bus = &nsim_bus; nsim_bus_dev->dev.type = &nsim_bus_dev_type; nsim_bus_dev->port_count = port_count; + nsim_bus_dev->num_queues = num_queues; nsim_bus_dev->initial_net = current->nsproxy->net_ns; nsim_bus_dev->max_vfs = NSIM_BUS_DEV_MAX_VFS; mutex_init(&nsim_bus_dev->nsim_bus_reload_lock); diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index c3aeb15843e2..50572e0f1f52 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -347,7 +347,8 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) struct netdevsim *ns; int err; - dev = alloc_netdev(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup); + dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup, + nsim_dev->nsim_bus_dev->num_queues); if (!dev) return ERR_PTR(-ENOMEM); @@ -392,7 +393,8 @@ void nsim_destroy(struct netdevsim *ns) static int nsim_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - NL_SET_ERR_MSG_MOD(extack, "Please use: echo \"[ID] [PORT_COUNT]\" > /sys/bus/netdevsim/new_device"); + NL_SET_ERR_MSG_MOD(extack, + "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device"); return -EOPNOTSUPP; } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index ae462957dcee..1c20bcbd9d91 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -352,6 +352,7 @@ struct nsim_bus_dev { struct device dev; struct list_head list; unsigned int port_count; + unsigned int num_queues; /* Number of queues for each port on this bus */ struct net *initial_net; /* Purpose of this is to carry net pointer * during the probe time only. */ diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index c56f703ae998..7564ae0c1997 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -207,6 +207,12 @@ config MARVELL_88X2222_PHY Support for the Marvell 88X2222 Dual-port Multi-speed Ethernet Transceiver. +config MAXLINEAR_GPHY + tristate "Maxlinear Ethernet PHYs" + help + Support for the Maxlinear GPY115, GPY211, GPY212, GPY215, + GPY241, GPY245 PHYs. + config MEDIATEK_GE_PHY tristate "MediaTek Gigabit Ethernet PHYs" help diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 172bb193ae6a..b2728d00fc9a 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_LXT_PHY) += lxt.o obj-$(CONFIG_MARVELL_10G_PHY) += marvell10g.o obj-$(CONFIG_MARVELL_PHY) += marvell.o obj-$(CONFIG_MARVELL_88X2222_PHY) += marvell-88x2222.o +obj-$(CONFIG_MAXLINEAR_GPHY) += mxl-gpy.o obj-$(CONFIG_MEDIATEK_GE_PHY) += mediatek-ge.o obj-$(CONFIG_MESON_GXL_PHY) += meson-gxl.o obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 5d62b85a4024..bdac087058b2 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -532,12 +532,6 @@ static int at8031_register_regulators(struct phy_device *phydev) return 0; } -static bool at803x_match_phy_id(struct phy_device *phydev, u32 phy_id) -{ - return (phydev->phy_id & phydev->drv->phy_id_mask) - == (phy_id & phydev->drv->phy_id_mask); -} - static int at803x_parse_dt(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; @@ -602,8 +596,8 @@ static int at803x_parse_dt(struct phy_device *phydev) * to the AR8030 so there might be a good chance it works on * the AR8030 too. */ - if (at803x_match_phy_id(phydev, ATH8030_PHY_ID) || - at803x_match_phy_id(phydev, ATH8035_PHY_ID)) { + if (phydev->drv->phy_id == ATH8030_PHY_ID || + phydev->drv->phy_id == ATH8035_PHY_ID) { priv->clk_25m_reg &= AT8035_CLK_OUT_MASK; priv->clk_25m_mask &= AT8035_CLK_OUT_MASK; } @@ -631,7 +625,7 @@ static int at803x_parse_dt(struct phy_device *phydev) /* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping * options. */ - if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) { + if (phydev->drv->phy_id == ATH8031_PHY_ID) { if (of_property_read_bool(node, "qca,keep-pll-enabled")) priv->flags |= AT803X_KEEP_PLL_ENABLED; @@ -676,7 +670,7 @@ static int at803x_probe(struct phy_device *phydev) * Switch to the copper page, as otherwise we read * the PHY capabilities from the fiber side. */ - if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) { + if (phydev->drv->phy_id == ATH8031_PHY_ID) { phy_lock_mdio_bus(phydev); ret = at803x_write_page(phydev, AT803X_PAGE_COPPER); phy_unlock_mdio_bus(phydev); @@ -709,7 +703,7 @@ static int at803x_get_features(struct phy_device *phydev) if (err) return err; - if (!at803x_match_phy_id(phydev, ATH8031_PHY_ID)) + if (phydev->drv->phy_id != ATH8031_PHY_ID) return 0; /* AR8031/AR8033 have different status registers @@ -820,7 +814,7 @@ static int at803x_config_init(struct phy_device *phydev) if (ret < 0) return ret; - if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) { + if (phydev->drv->phy_id == ATH8031_PHY_ID) { ret = at8031_pll_config(phydev); if (ret < 0) return ret; diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index f7a2ec150e54..211b5476a6f5 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -326,11 +326,9 @@ static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev) static int dp8382x_disable_wol(struct phy_device *phydev) { - int value = DP83822_WOL_EN | DP83822_WOL_MAGIC_EN | - DP83822_WOL_SECURE_ON; - - return phy_clear_bits_mmd(phydev, DP83822_DEVADDR, - MII_DP83822_WOL_CFG, value); + return phy_clear_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG, + DP83822_WOL_EN | DP83822_WOL_MAGIC_EN | + DP83822_WOL_SECURE_ON); } static int dp83822_read_status(struct phy_device *phydev) diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c index d453ec016168..3c032868ef04 100644 --- a/drivers/net/phy/intel-xway.c +++ b/drivers/net/phy/intel-xway.c @@ -8,11 +8,16 @@ #include <linux/module.h> #include <linux/phy.h> #include <linux/of.h> +#include <linux/bitfield.h> +#define XWAY_MDIO_MIICTRL 0x17 /* mii control */ #define XWAY_MDIO_IMASK 0x19 /* interrupt mask */ #define XWAY_MDIO_ISTAT 0x1A /* interrupt status */ #define XWAY_MDIO_LED 0x1B /* led control */ +#define XWAY_MDIO_MIICTRL_RXSKEW_MASK GENMASK(14, 12) +#define XWAY_MDIO_MIICTRL_TXSKEW_MASK GENMASK(10, 8) + /* bit 15:12 are reserved */ #define XWAY_MDIO_LED_LED3_EN BIT(11) /* Enable the integrated function of LED3 */ #define XWAY_MDIO_LED_LED2_EN BIT(10) /* Enable the integrated function of LED2 */ @@ -157,6 +162,73 @@ #define PHY_ID_PHY11G_VR9_1_2 0xD565A409 #define PHY_ID_PHY22F_VR9_1_2 0xD565A419 +static const int xway_internal_delay[] = {0, 500, 1000, 1500, 2000, 2500, + 3000, 3500}; + +static int xway_gphy_rgmii_init(struct phy_device *phydev) +{ + struct device *dev = &phydev->mdio.dev; + unsigned int delay_size = ARRAY_SIZE(xway_internal_delay); + s32 int_delay; + int val = 0; + + if (!phy_interface_is_rgmii(phydev)) + return 0; + + /* Existing behavior was to use default pin strapping delay in rgmii + * mode, but rgmii should have meant no delay. Warn existing users, + * but do not change anything at the moment. + */ + if (phydev->interface == PHY_INTERFACE_MODE_RGMII) { + u16 txskew, rxskew; + + val = phy_read(phydev, XWAY_MDIO_MIICTRL); + if (val < 0) + return val; + + txskew = FIELD_GET(XWAY_MDIO_MIICTRL_TXSKEW_MASK, val); + rxskew = FIELD_GET(XWAY_MDIO_MIICTRL_RXSKEW_MASK, val); + + if (txskew > 0 || rxskew > 0) + phydev_warn(phydev, + "PHY has delays (e.g. via pin strapping), but phy-mode = 'rgmii'\n" + "Should be 'rgmii-id' to use internal delays txskew:%d ps rxskew:%d ps\n", + xway_internal_delay[txskew], + xway_internal_delay[rxskew]); + return 0; + } + + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { + int_delay = phy_get_internal_delay(phydev, dev, + xway_internal_delay, + delay_size, true); + + /* if rx-internal-delay-ps is missing, use default of 2.0 ns */ + if (int_delay < 0) + int_delay = 4; /* 2000 ps */ + + val |= FIELD_PREP(XWAY_MDIO_MIICTRL_RXSKEW_MASK, int_delay); + } + + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { + int_delay = phy_get_internal_delay(phydev, dev, + xway_internal_delay, + delay_size, false); + + /* if tx-internal-delay-ps is missing, use default of 2.0 ns */ + if (int_delay < 0) + int_delay = 4; /* 2000 ps */ + + val |= FIELD_PREP(XWAY_MDIO_MIICTRL_TXSKEW_MASK, int_delay); + } + + return phy_modify(phydev, XWAY_MDIO_MIICTRL, + XWAY_MDIO_MIICTRL_RXSKEW_MASK | + XWAY_MDIO_MIICTRL_TXSKEW_MASK, val); +} + static int xway_gphy_config_init(struct phy_device *phydev) { int err; @@ -204,6 +276,10 @@ static int xway_gphy_config_init(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED2H, ledxh); phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED2L, ledxl); + err = xway_gphy_rgmii_init(phydev); + if (err) + return err; + return 0; } diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 53a433442803..0b7cae118ad7 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -28,6 +28,7 @@ #include <linux/marvell_phy.h> #include <linux/phy.h> #include <linux/sfp.h> +#include <linux/netdevice.h> #define MV_PHY_ALASKA_NBT_QUIRK_MASK 0xfffffffe #define MV_PHY_ALASKA_NBT_QUIRK_REV (MARVELL_PHY_ID_88X3310 | 0xa) @@ -104,6 +105,16 @@ enum { MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_NO_SGMII_AN = 0x5, MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH = 0x6, MV_V2_33X0_PORT_CTRL_MACTYPE_USXGMII = 0x7, + MV_V2_PORT_INTR_STS = 0xf040, + MV_V2_PORT_INTR_MASK = 0xf043, + MV_V2_PORT_INTR_STS_WOL_EN = BIT(8), + MV_V2_MAGIC_PKT_WORD0 = 0xf06b, + MV_V2_MAGIC_PKT_WORD1 = 0xf06c, + MV_V2_MAGIC_PKT_WORD2 = 0xf06d, + /* Wake on LAN registers */ + MV_V2_WOL_CTRL = 0xf06e, + MV_V2_WOL_CTRL_CLEAR_STS = BIT(15), + MV_V2_WOL_CTRL_MAGIC_PKT_EN = BIT(0), /* Temperature control/read registers (88X3310 only) */ MV_V2_TEMP_CTRL = 0xf08a, MV_V2_TEMP_CTRL_MASK = 0xc000, @@ -1020,6 +1031,80 @@ static int mv2111_match_phy_device(struct phy_device *phydev) return mv211x_match_phy_device(phydev, false); } +static void mv3110_get_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + wol->supported = WAKE_MAGIC; + wol->wolopts = 0; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_WOL_CTRL); + if (ret < 0) + return; + + if (ret & MV_V2_WOL_CTRL_MAGIC_PKT_EN) + wol->wolopts |= WAKE_MAGIC; +} + +static int mv3110_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + if (wol->wolopts & WAKE_MAGIC) { + /* Enable the WOL interrupt */ + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_PORT_INTR_MASK, + MV_V2_PORT_INTR_STS_WOL_EN); + if (ret < 0) + return ret; + + /* Store the device address for the magic packet */ + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_MAGIC_PKT_WORD2, + ((phydev->attached_dev->dev_addr[5] << 8) | + phydev->attached_dev->dev_addr[4])); + if (ret < 0) + return ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_MAGIC_PKT_WORD1, + ((phydev->attached_dev->dev_addr[3] << 8) | + phydev->attached_dev->dev_addr[2])); + if (ret < 0) + return ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_MAGIC_PKT_WORD0, + ((phydev->attached_dev->dev_addr[1] << 8) | + phydev->attached_dev->dev_addr[0])); + if (ret < 0) + return ret; + + /* Clear WOL status and enable magic packet matching */ + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_WOL_CTRL, + MV_V2_WOL_CTRL_MAGIC_PKT_EN | + MV_V2_WOL_CTRL_CLEAR_STS); + if (ret < 0) + return ret; + } else { + /* Disable magic packet matching & reset WOL status bit */ + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_WOL_CTRL, + MV_V2_WOL_CTRL_MAGIC_PKT_EN, + MV_V2_WOL_CTRL_CLEAR_STS); + if (ret < 0) + return ret; + } + + /* Reset the clear WOL status bit as it does not self-clear */ + return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, + MV_V2_WOL_CTRL, + MV_V2_WOL_CTRL_CLEAR_STS); +} + static struct phy_driver mv3310_drivers[] = { { .phy_id = MARVELL_PHY_ID_88X3310, @@ -1039,6 +1124,8 @@ static struct phy_driver mv3310_drivers[] = { .set_tunable = mv3310_set_tunable, .remove = mv3310_remove, .set_loopback = genphy_c45_loopback, + .get_wol = mv3110_get_wol, + .set_wol = mv3110_set_wol, }, { .phy_id = MARVELL_PHY_ID_88X3310, @@ -1076,6 +1163,8 @@ static struct phy_driver mv3310_drivers[] = { .set_tunable = mv3310_set_tunable, .remove = mv3310_remove, .set_loopback = genphy_c45_loopback, + .get_wol = mv3110_get_wol, + .set_wol = mv3110_set_wol, }, { .phy_id = MARVELL_PHY_ID_88E2110, diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c new file mode 100644 index 000000000000..2d5d5081c3b6 --- /dev/null +++ b/drivers/net/phy/mxl-gpy.c @@ -0,0 +1,727 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2021 Maxlinear Corporation + * Copyright (C) 2020 Intel Corporation + * + * Drivers for Maxlinear Ethernet GPY + * + */ + +#include <linux/module.h> +#include <linux/bitfield.h> +#include <linux/phy.h> +#include <linux/netdevice.h> + +/* PHY ID */ +#define PHY_ID_GPYx15B_MASK 0xFFFFFFFC +#define PHY_ID_GPY21xB_MASK 0xFFFFFFF9 +#define PHY_ID_GPY2xx 0x67C9DC00 +#define PHY_ID_GPY115B 0x67C9DF00 +#define PHY_ID_GPY115C 0x67C9DF10 +#define PHY_ID_GPY211B 0x67C9DE08 +#define PHY_ID_GPY211C 0x67C9DE10 +#define PHY_ID_GPY212B 0x67C9DE09 +#define PHY_ID_GPY212C 0x67C9DE20 +#define PHY_ID_GPY215B 0x67C9DF04 +#define PHY_ID_GPY215C 0x67C9DF20 +#define PHY_ID_GPY241B 0x67C9DE40 +#define PHY_ID_GPY241BM 0x67C9DE80 +#define PHY_ID_GPY245B 0x67C9DEC0 + +#define PHY_MIISTAT 0x18 /* MII state */ +#define PHY_IMASK 0x19 /* interrupt mask */ +#define PHY_ISTAT 0x1A /* interrupt status */ +#define PHY_FWV 0x1E /* firmware version */ + +#define PHY_MIISTAT_SPD_MASK GENMASK(2, 0) +#define PHY_MIISTAT_DPX BIT(3) +#define PHY_MIISTAT_LS BIT(10) + +#define PHY_MIISTAT_SPD_10 0 +#define PHY_MIISTAT_SPD_100 1 +#define PHY_MIISTAT_SPD_1000 2 +#define PHY_MIISTAT_SPD_2500 4 + +#define PHY_IMASK_WOL BIT(15) /* Wake-on-LAN */ +#define PHY_IMASK_ANC BIT(10) /* Auto-Neg complete */ +#define PHY_IMASK_ADSC BIT(5) /* Link auto-downspeed detect */ +#define PHY_IMASK_DXMC BIT(2) /* Duplex mode change */ +#define PHY_IMASK_LSPC BIT(1) /* Link speed change */ +#define PHY_IMASK_LSTC BIT(0) /* Link state change */ +#define PHY_IMASK_MASK (PHY_IMASK_LSTC | \ + PHY_IMASK_LSPC | \ + PHY_IMASK_DXMC | \ + PHY_IMASK_ADSC | \ + PHY_IMASK_ANC) + +#define PHY_FWV_REL_MASK BIT(15) +#define PHY_FWV_TYPE_MASK GENMASK(11, 8) +#define PHY_FWV_MINOR_MASK GENMASK(7, 0) + +/* SGMII */ +#define VSPEC1_SGMII_CTRL 0x08 +#define VSPEC1_SGMII_CTRL_ANEN BIT(12) /* Aneg enable */ +#define VSPEC1_SGMII_CTRL_ANRS BIT(9) /* Restart Aneg */ +#define VSPEC1_SGMII_ANEN_ANRS (VSPEC1_SGMII_CTRL_ANEN | \ + VSPEC1_SGMII_CTRL_ANRS) + +/* WoL */ +#define VPSPEC2_WOL_CTL 0x0E06 +#define VPSPEC2_WOL_AD01 0x0E08 +#define VPSPEC2_WOL_AD23 0x0E09 +#define VPSPEC2_WOL_AD45 0x0E0A +#define WOL_EN BIT(0) + +static const struct { + int type; + int minor; +} ver_need_sgmii_reaneg[] = { + {7, 0x6D}, + {8, 0x6D}, + {9, 0x73}, +}; + +static int gpy_config_init(struct phy_device *phydev) +{ + int ret; + + /* Mask all interrupts */ + ret = phy_write(phydev, PHY_IMASK, 0); + if (ret) + return ret; + + /* Clear all pending interrupts */ + ret = phy_read(phydev, PHY_ISTAT); + return ret < 0 ? ret : 0; +} + +static int gpy_probe(struct phy_device *phydev) +{ + int ret; + + if (!phydev->is_c45) { + ret = phy_get_c45_ids(phydev); + if (ret < 0) + return ret; + } + + /* Show GPY PHY FW version in dmesg */ + ret = phy_read(phydev, PHY_FWV); + if (ret < 0) + return ret; + + phydev_info(phydev, "Firmware Version: 0x%04X (%s)\n", ret, + (ret & PHY_FWV_REL_MASK) ? "release" : "test"); + + return 0; +} + +static bool gpy_sgmii_need_reaneg(struct phy_device *phydev) +{ + int fw_ver, fw_type, fw_minor; + size_t i; + + fw_ver = phy_read(phydev, PHY_FWV); + if (fw_ver < 0) + return true; + + fw_type = FIELD_GET(PHY_FWV_TYPE_MASK, fw_ver); + fw_minor = FIELD_GET(PHY_FWV_MINOR_MASK, fw_ver); + + for (i = 0; i < ARRAY_SIZE(ver_need_sgmii_reaneg); i++) { + if (fw_type != ver_need_sgmii_reaneg[i].type) + continue; + if (fw_minor < ver_need_sgmii_reaneg[i].minor) + return true; + break; + } + + return false; +} + +static bool gpy_2500basex_chk(struct phy_device *phydev) +{ + int ret; + + ret = phy_read(phydev, PHY_MIISTAT); + if (ret < 0) { + phydev_err(phydev, "Error: MDIO register access failed: %d\n", + ret); + return false; + } + + if (!(ret & PHY_MIISTAT_LS) || + FIELD_GET(PHY_MIISTAT_SPD_MASK, ret) != PHY_MIISTAT_SPD_2500) + return false; + + phydev->speed = SPEED_2500; + phydev->interface = PHY_INTERFACE_MODE_2500BASEX; + phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL, + VSPEC1_SGMII_CTRL_ANEN, 0); + return true; +} + +static bool gpy_sgmii_aneg_en(struct phy_device *phydev) +{ + int ret; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL); + if (ret < 0) { + phydev_err(phydev, "Error: MMD register access failed: %d\n", + ret); + return true; + } + + return (ret & VSPEC1_SGMII_CTRL_ANEN) ? true : false; +} + +static int gpy_config_aneg(struct phy_device *phydev) +{ + bool changed = false; + u32 adv; + int ret; + + if (phydev->autoneg == AUTONEG_DISABLE) { + /* Configure half duplex with genphy_setup_forced, + * because genphy_c45_pma_setup_forced does not support. + */ + return phydev->duplex != DUPLEX_FULL + ? genphy_setup_forced(phydev) + : genphy_c45_pma_setup_forced(phydev); + } + + ret = genphy_c45_an_config_aneg(phydev); + if (ret < 0) + return ret; + if (ret > 0) + changed = true; + + adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising); + ret = phy_modify_changed(phydev, MII_CTRL1000, + ADVERTISE_1000FULL | ADVERTISE_1000HALF, + adv); + if (ret < 0) + return ret; + if (ret > 0) + changed = true; + + ret = genphy_c45_check_and_restart_aneg(phydev, changed); + if (ret < 0) + return ret; + + if (phydev->interface == PHY_INTERFACE_MODE_USXGMII || + phydev->interface == PHY_INTERFACE_MODE_INTERNAL) + return 0; + + /* No need to trigger re-ANEG if link speed is 2.5G or SGMII ANEG is + * disabled. + */ + if (!gpy_sgmii_need_reaneg(phydev) || gpy_2500basex_chk(phydev) || + !gpy_sgmii_aneg_en(phydev)) + return 0; + + /* There is a design constraint in GPY2xx device where SGMII AN is + * only triggered when there is change of speed. If, PHY link + * partner`s speed is still same even after PHY TPI is down and up + * again, SGMII AN is not triggered and hence no new in-band message + * from GPY to MAC side SGMII. + * This could cause an issue during power up, when PHY is up prior to + * MAC. At this condition, once MAC side SGMII is up, MAC side SGMII + * wouldn`t receive new in-band message from GPY with correct link + * status, speed and duplex info. + * + * 1) If PHY is already up and TPI link status is still down (such as + * hard reboot), TPI link status is polled for 4 seconds before + * retriggerring SGMII AN. + * 2) If PHY is already up and TPI link status is also up (such as soft + * reboot), polling of TPI link status is not needed and SGMII AN is + * immediately retriggered. + * 3) Other conditions such as PHY is down, speed change etc, skip + * retriggering SGMII AN. Note: in case of speed change, GPY FW will + * initiate SGMII AN. + */ + + if (phydev->state != PHY_UP) + return 0; + + ret = phy_read_poll_timeout(phydev, MII_BMSR, ret, ret & BMSR_LSTATUS, + 20000, 4000000, false); + if (ret == -ETIMEDOUT) + return 0; + else if (ret < 0) + return ret; + + /* Trigger SGMII AN. */ + return phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL, + VSPEC1_SGMII_CTRL_ANRS, VSPEC1_SGMII_CTRL_ANRS); +} + +static void gpy_update_interface(struct phy_device *phydev) +{ + int ret; + + /* Interface mode is fixed for USXGMII and integrated PHY */ + if (phydev->interface == PHY_INTERFACE_MODE_USXGMII || + phydev->interface == PHY_INTERFACE_MODE_INTERNAL) + return; + + /* Automatically switch SERDES interface between SGMII and 2500-BaseX + * according to speed. Disable ANEG in 2500-BaseX mode. + */ + switch (phydev->speed) { + case SPEED_2500: + phydev->interface = PHY_INTERFACE_MODE_2500BASEX; + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL, + VSPEC1_SGMII_CTRL_ANEN, 0); + if (ret < 0) + phydev_err(phydev, + "Error: Disable of SGMII ANEG failed: %d\n", + ret); + break; + case SPEED_1000: + case SPEED_100: + case SPEED_10: + phydev->interface = PHY_INTERFACE_MODE_SGMII; + if (gpy_sgmii_aneg_en(phydev)) + break; + /* Enable and restart SGMII ANEG for 10/100/1000Mbps link speed + * if ANEG is disabled (in 2500-BaseX mode). + */ + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL, + VSPEC1_SGMII_ANEN_ANRS, + VSPEC1_SGMII_ANEN_ANRS); + if (ret < 0) + phydev_err(phydev, + "Error: Enable of SGMII ANEG failed: %d\n", + ret); + break; + } +} + +static int gpy_read_status(struct phy_device *phydev) +{ + int ret; + + ret = genphy_update_link(phydev); + if (ret) + return ret; + + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; + phydev->pause = 0; + phydev->asym_pause = 0; + + if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { + ret = genphy_c45_read_lpa(phydev); + if (ret < 0) + return ret; + + /* Read the link partner's 1G advertisement */ + ret = phy_read(phydev, MII_STAT1000); + if (ret < 0) + return ret; + mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, ret); + } else if (phydev->autoneg == AUTONEG_DISABLE) { + linkmode_zero(phydev->lp_advertising); + } + + ret = phy_read(phydev, PHY_MIISTAT); + if (ret < 0) + return ret; + + phydev->link = (ret & PHY_MIISTAT_LS) ? 1 : 0; + phydev->duplex = (ret & PHY_MIISTAT_DPX) ? DUPLEX_FULL : DUPLEX_HALF; + switch (FIELD_GET(PHY_MIISTAT_SPD_MASK, ret)) { + case PHY_MIISTAT_SPD_10: + phydev->speed = SPEED_10; + break; + case PHY_MIISTAT_SPD_100: + phydev->speed = SPEED_100; + break; + case PHY_MIISTAT_SPD_1000: + phydev->speed = SPEED_1000; + break; + case PHY_MIISTAT_SPD_2500: + phydev->speed = SPEED_2500; + break; + } + + if (phydev->link) + gpy_update_interface(phydev); + + return 0; +} + +static int gpy_config_intr(struct phy_device *phydev) +{ + u16 mask = 0; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + mask = PHY_IMASK_MASK; + + return phy_write(phydev, PHY_IMASK, mask); +} + +static irqreturn_t gpy_handle_interrupt(struct phy_device *phydev) +{ + int reg; + + reg = phy_read(phydev, PHY_ISTAT); + if (reg < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(reg & PHY_IMASK_MASK)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + +static int gpy_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + struct net_device *attach_dev = phydev->attached_dev; + int ret; + + if (wol->wolopts & WAKE_MAGIC) { + /* MAC address - Byte0:Byte1:Byte2:Byte3:Byte4:Byte5 + * VPSPEC2_WOL_AD45 = Byte0:Byte1 + * VPSPEC2_WOL_AD23 = Byte2:Byte3 + * VPSPEC2_WOL_AD01 = Byte4:Byte5 + */ + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + VPSPEC2_WOL_AD45, + ((attach_dev->dev_addr[0] << 8) | + attach_dev->dev_addr[1])); + if (ret < 0) + return ret; + + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + VPSPEC2_WOL_AD23, + ((attach_dev->dev_addr[2] << 8) | + attach_dev->dev_addr[3])); + if (ret < 0) + return ret; + + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + VPSPEC2_WOL_AD01, + ((attach_dev->dev_addr[4] << 8) | + attach_dev->dev_addr[5])); + if (ret < 0) + return ret; + + /* Enable the WOL interrupt */ + ret = phy_write(phydev, PHY_IMASK, PHY_IMASK_WOL); + if (ret < 0) + return ret; + + /* Enable magic packet matching */ + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + VPSPEC2_WOL_CTL, + WOL_EN); + if (ret < 0) + return ret; + + /* Clear the interrupt status register. + * Only WoL is enabled so clear all. + */ + ret = phy_read(phydev, PHY_ISTAT); + if (ret < 0) + return ret; + } else { + /* Disable magic packet matching */ + ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, + VPSPEC2_WOL_CTL, + WOL_EN); + if (ret < 0) + return ret; + } + + if (wol->wolopts & WAKE_PHY) { + /* Enable the link state change interrupt */ + ret = phy_set_bits(phydev, PHY_IMASK, PHY_IMASK_LSTC); + if (ret < 0) + return ret; + + /* Clear the interrupt status register */ + ret = phy_read(phydev, PHY_ISTAT); + if (ret < 0) + return ret; + + if (ret & (PHY_IMASK_MASK & ~PHY_IMASK_LSTC)) + phy_trigger_machine(phydev); + + return 0; + } + + /* Disable the link state change interrupt */ + return phy_clear_bits(phydev, PHY_IMASK, PHY_IMASK_LSTC); +} + +static void gpy_get_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + wol->supported = WAKE_MAGIC | WAKE_PHY; + wol->wolopts = 0; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, VPSPEC2_WOL_CTL); + if (ret & WOL_EN) + wol->wolopts |= WAKE_MAGIC; + + ret = phy_read(phydev, PHY_IMASK); + if (ret & PHY_IMASK_LSTC) + wol->wolopts |= WAKE_PHY; +} + +static int gpy_loopback(struct phy_device *phydev, bool enable) +{ + int ret; + + ret = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, + enable ? BMCR_LOOPBACK : 0); + if (!ret) { + /* It takes some time for PHY device to switch + * into/out-of loopback mode. + */ + msleep(100); + } + + return ret; +} + +static struct phy_driver gpy_drivers[] = { + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY2xx), + .name = "Maxlinear Ethernet GPY2xx", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + .phy_id = PHY_ID_GPY115B, + .phy_id_mask = PHY_ID_GPYx15B_MASK, + .name = "Maxlinear Ethernet GPY115B", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY115C), + .name = "Maxlinear Ethernet GPY115C", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + .phy_id = PHY_ID_GPY211B, + .phy_id_mask = PHY_ID_GPY21xB_MASK, + .name = "Maxlinear Ethernet GPY211B", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY211C), + .name = "Maxlinear Ethernet GPY211C", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + .phy_id = PHY_ID_GPY212B, + .phy_id_mask = PHY_ID_GPY21xB_MASK, + .name = "Maxlinear Ethernet GPY212B", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY212C), + .name = "Maxlinear Ethernet GPY212C", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + .phy_id = PHY_ID_GPY215B, + .phy_id_mask = PHY_ID_GPYx15B_MASK, + .name = "Maxlinear Ethernet GPY215B", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY215C), + .name = "Maxlinear Ethernet GPY215C", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY241B), + .name = "Maxlinear Ethernet GPY241B", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY241BM), + .name = "Maxlinear Ethernet GPY241BM", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, + { + PHY_ID_MATCH_MODEL(PHY_ID_GPY245B), + .name = "Maxlinear Ethernet GPY245B", + .get_features = genphy_c45_pma_read_abilities, + .config_init = gpy_config_init, + .probe = gpy_probe, + .suspend = genphy_suspend, + .resume = genphy_resume, + .config_aneg = gpy_config_aneg, + .aneg_done = genphy_c45_aneg_done, + .read_status = gpy_read_status, + .config_intr = gpy_config_intr, + .handle_interrupt = gpy_handle_interrupt, + .set_wol = gpy_set_wol, + .get_wol = gpy_get_wol, + .set_loopback = gpy_loopback, + }, +}; +module_phy_driver(gpy_drivers); + +static struct mdio_device_id __maybe_unused gpy_tbl[] = { + {PHY_ID_MATCH_MODEL(PHY_ID_GPY2xx)}, + {PHY_ID_GPY115B, PHY_ID_GPYx15B_MASK}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY115C)}, + {PHY_ID_GPY211B, PHY_ID_GPY21xB_MASK}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY211C)}, + {PHY_ID_GPY212B, PHY_ID_GPY21xB_MASK}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY212C)}, + {PHY_ID_GPY215B, PHY_ID_GPYx15B_MASK}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY215C)}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY241B)}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY241BM)}, + {PHY_ID_MATCH_MODEL(PHY_ID_GPY245B)}, + { } +}; +MODULE_DEVICE_TABLE(mdio, gpy_tbl); + +MODULE_DESCRIPTION("Maxlinear Ethernet GPY Driver"); +MODULE_AUTHOR("Xu Liang"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 5d5f9a9ee768..107aa6d7bc6b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -969,6 +969,20 @@ void phy_device_remove(struct phy_device *phydev) EXPORT_SYMBOL(phy_device_remove); /** + * phy_get_c45_ids - Read 802.3-c45 IDs for phy device. + * @phydev: phy_device structure to read 802.3-c45 IDs + * + * Returns zero on success, %-EIO on bus access error, or %-ENODEV if + * the "devices in package" is invalid. + */ +int phy_get_c45_ids(struct phy_device *phydev) +{ + return get_phy_c45_ids(phydev->mdio.bus, phydev->mdio.addr, + &phydev->c45_ids); +} +EXPORT_SYMBOL(phy_get_c45_ids); + +/** * phy_find_first - finds the first PHY device on the bus * @bus: the target MII bus */ diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index eb29ef53d971..2cdf9f989dec 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -942,10 +942,11 @@ static void phylink_phy_change(struct phy_device *phydev, bool up) phylink_run_resolve(pl); - phylink_dbg(pl, "phy link %s %s/%s/%s\n", up ? "up" : "down", + phylink_dbg(pl, "phy link %s %s/%s/%s/%s\n", up ? "up" : "down", phy_modes(phydev->interface), phy_speed_to_str(phydev->speed), - phy_duplex_to_str(phydev->duplex)); + phy_duplex_to_str(phydev->duplex), + phylink_pause_to_str(pl->phy_state.pause)); } static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, @@ -1457,15 +1458,11 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, return phy_ethtool_ksettings_set(pl->phydev, kset); } - linkmode_copy(support, pl->supported); config = pl->link_config; - config.an_enabled = kset->base.autoneg == AUTONEG_ENABLE; - /* Mask out unsupported advertisements, and force the autoneg bit */ + /* Mask out unsupported advertisements */ linkmode_and(config.advertising, kset->link_modes.advertising, - support); - linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising, - config.an_enabled); + pl->supported); /* FIXME: should we reject autoneg if phy/mac does not support it? */ switch (kset->base.autoneg) { @@ -1474,7 +1471,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, * duplex. */ s = phy_lookup_setting(kset->base.speed, kset->base.duplex, - support, false); + pl->supported, false); if (!s) return -EINVAL; @@ -1515,6 +1512,12 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, /* We have ruled out the case with a PHY attached, and the * fixed-link cases. All that is left are in-band links. */ + config.an_enabled = kset->base.autoneg == AUTONEG_ENABLE; + linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising, + config.an_enabled); + + /* Validate without changing the current supported mask. */ + linkmode_copy(support, pl->supported); if (phylink_validate(pl, support, &config)) return -EINVAL; diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 2c115216420a..9b914765c2de 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -714,7 +714,6 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) u8 buf[ETH_ALEN] = {0}, chipcode = 0; struct asix_common_private *priv; int ret, i; - u32 phyid; usbnet_get_endpoints(dev, intf); @@ -762,10 +761,6 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) return ret; } - /* Read PHYID register *AFTER* the PHY was reset properly */ - phyid = asix_get_phyid(dev); - netdev_dbg(dev->net, "PHYID=0x%08x\n", phyid); - /* Asix framing packs multiple eth frames into a 2K usb bulk transfer */ if (dev->driver_info->flags & FLAG_FRAMING_AX) { /* hard_mtu is still the default - the device does not support @@ -1215,6 +1210,7 @@ static const struct driver_info ax88772b_info = { .unbind = ax88772_unbind, .status = asix_status, .reset = ax88772_reset, + .stop = ax88772_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | FLAG_MULTI_PACKET, .rx_fixup = asix_rx_fixup_common, diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index dec96e8ab567..827d574f764a 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2353,7 +2353,7 @@ static int remove_net_device(struct hso_device *hso_dev) } /* Frees our network device */ -static void hso_free_net_device(struct hso_device *hso_dev, bool bailout) +static void hso_free_net_device(struct hso_device *hso_dev) { int i; struct hso_net *hso_net = dev2net(hso_dev); @@ -2376,7 +2376,7 @@ static void hso_free_net_device(struct hso_device *hso_dev, bool bailout) kfree(hso_net->mux_bulk_tx_buf); hso_net->mux_bulk_tx_buf = NULL; - if (hso_net->net && !bailout) + if (hso_net->net) free_netdev(hso_net->net); kfree(hso_dev); @@ -3133,7 +3133,7 @@ static void hso_free_interface(struct usb_interface *interface) rfkill_unregister(rfk); rfkill_destroy(rfk); } - hso_free_net_device(network_table[i], false); + hso_free_net_device(network_table[i]); } } } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index bdb7ce3cb054..381670c08ba7 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -224,12 +224,13 @@ static void veth_get_channels(struct net_device *dev, { channels->tx_count = dev->real_num_tx_queues; channels->rx_count = dev->real_num_rx_queues; - channels->max_tx = dev->real_num_tx_queues; - channels->max_rx = dev->real_num_rx_queues; - channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues); - channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues); + channels->max_tx = dev->num_tx_queues; + channels->max_rx = dev->num_rx_queues; } +static int veth_set_channels(struct net_device *dev, + struct ethtool_channels *ch); + static const struct ethtool_ops veth_ethtool_ops = { .get_drvinfo = veth_get_drvinfo, .get_link = ethtool_op_get_link, @@ -239,6 +240,7 @@ static const struct ethtool_ops veth_ethtool_ops = { .get_link_ksettings = veth_get_link_ksettings, .get_ts_info = ethtool_op_get_ts_info, .get_channels = veth_get_channels, + .set_channels = veth_set_channels, }; /* general routines */ @@ -928,12 +930,12 @@ static int veth_poll(struct napi_struct *napi, int budget) return done; } -static int __veth_napi_enable(struct net_device *dev) +static int __veth_napi_enable_range(struct net_device *dev, int start, int end) { struct veth_priv *priv = netdev_priv(dev); int err, i; - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = start; i < end; i++) { struct veth_rq *rq = &priv->rq[i]; err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL); @@ -941,7 +943,7 @@ static int __veth_napi_enable(struct net_device *dev) goto err_xdp_ring; } - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = start; i < end; i++) { struct veth_rq *rq = &priv->rq[i]; napi_enable(&rq->xdp_napi); @@ -949,19 +951,25 @@ static int __veth_napi_enable(struct net_device *dev) } return 0; + err_xdp_ring: - for (i--; i >= 0; i--) + for (i--; i >= start; i--) ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free); return err; } -static void veth_napi_del(struct net_device *dev) +static int __veth_napi_enable(struct net_device *dev) +{ + return __veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); +} + +static void veth_napi_del_range(struct net_device *dev, int start, int end) { struct veth_priv *priv = netdev_priv(dev); int i; - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = start; i < end; i++) { struct veth_rq *rq = &priv->rq[i]; rcu_assign_pointer(priv->rq[i].napi, NULL); @@ -970,7 +978,7 @@ static void veth_napi_del(struct net_device *dev) } synchronize_net(); - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = start; i < end; i++) { struct veth_rq *rq = &priv->rq[i]; rq->rx_notify_masked = false; @@ -978,41 +986,90 @@ static void veth_napi_del(struct net_device *dev) } } +static void veth_napi_del(struct net_device *dev) +{ + veth_napi_del_range(dev, 0, dev->real_num_rx_queues); +} + static bool veth_gro_requested(const struct net_device *dev) { return !!(dev->wanted_features & NETIF_F_GRO); } -static int veth_enable_xdp(struct net_device *dev) +static int veth_enable_xdp_range(struct net_device *dev, int start, int end, + bool napi_already_on) { - bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP); struct veth_priv *priv = netdev_priv(dev); int err, i; - if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) { - for (i = 0; i < dev->real_num_rx_queues; i++) { - struct veth_rq *rq = &priv->rq[i]; + for (i = start; i < end; i++) { + struct veth_rq *rq = &priv->rq[i]; - if (!napi_already_on) - netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT); - err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id); - if (err < 0) - goto err_rxq_reg; + if (!napi_already_on) + netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT); + err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id); + if (err < 0) + goto err_rxq_reg; - err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, - MEM_TYPE_PAGE_SHARED, - NULL); - if (err < 0) - goto err_reg_mem; + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + if (err < 0) + goto err_reg_mem; - /* Save original mem info as it can be overwritten */ - rq->xdp_mem = rq->xdp_rxq.mem; - } + /* Save original mem info as it can be overwritten */ + rq->xdp_mem = rq->xdp_rxq.mem; + } + return 0; + +err_reg_mem: + xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); +err_rxq_reg: + for (i--; i >= start; i--) { + struct veth_rq *rq = &priv->rq[i]; + + xdp_rxq_info_unreg(&rq->xdp_rxq); + if (!napi_already_on) + netif_napi_del(&rq->xdp_napi); + } + + return err; +} + +static void veth_disable_xdp_range(struct net_device *dev, int start, int end, + bool delete_napi) +{ + struct veth_priv *priv = netdev_priv(dev); + int i; + + for (i = start; i < end; i++) { + struct veth_rq *rq = &priv->rq[i]; + + rq->xdp_rxq.mem = rq->xdp_mem; + xdp_rxq_info_unreg(&rq->xdp_rxq); + + if (delete_napi) + netif_napi_del(&rq->xdp_napi); + } +} + +static int veth_enable_xdp(struct net_device *dev) +{ + bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP); + struct veth_priv *priv = netdev_priv(dev); + int err, i; + + if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) { + err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on); + if (err) + return err; if (!napi_already_on) { err = __veth_napi_enable(dev); - if (err) - goto err_rxq_reg; + if (err) { + veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); + return err; + } if (!veth_gro_requested(dev)) { /* user-space did not require GRO, but adding XDP @@ -1030,18 +1087,6 @@ static int veth_enable_xdp(struct net_device *dev) } return 0; -err_reg_mem: - xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); -err_rxq_reg: - for (i--; i >= 0; i--) { - struct veth_rq *rq = &priv->rq[i]; - - xdp_rxq_info_unreg(&rq->xdp_rxq); - if (!napi_already_on) - netif_napi_del(&rq->xdp_napi); - } - - return err; } static void veth_disable_xdp(struct net_device *dev) @@ -1064,28 +1109,23 @@ static void veth_disable_xdp(struct net_device *dev) } } - for (i = 0; i < dev->real_num_rx_queues; i++) { - struct veth_rq *rq = &priv->rq[i]; - - rq->xdp_rxq.mem = rq->xdp_mem; - xdp_rxq_info_unreg(&rq->xdp_rxq); - } + veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); } -static int veth_napi_enable(struct net_device *dev) +static int veth_napi_enable_range(struct net_device *dev, int start, int end) { struct veth_priv *priv = netdev_priv(dev); int err, i; - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = start; i < end; i++) { struct veth_rq *rq = &priv->rq[i]; netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT); } - err = __veth_napi_enable(dev); + err = __veth_napi_enable_range(dev, start, end); if (err) { - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = start; i < end; i++) { struct veth_rq *rq = &priv->rq[i]; netif_napi_del(&rq->xdp_napi); @@ -1095,6 +1135,128 @@ static int veth_napi_enable(struct net_device *dev) return err; } +static int veth_napi_enable(struct net_device *dev) +{ + return veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); +} + +static void veth_disable_range_safe(struct net_device *dev, int start, int end) +{ + struct veth_priv *priv = netdev_priv(dev); + + if (start >= end) + return; + + if (priv->_xdp_prog) { + veth_napi_del_range(dev, start, end); + veth_disable_xdp_range(dev, start, end, false); + } else if (veth_gro_requested(dev)) { + veth_napi_del_range(dev, start, end); + } +} + +static int veth_enable_range_safe(struct net_device *dev, int start, int end) +{ + struct veth_priv *priv = netdev_priv(dev); + int err; + + if (start >= end) + return 0; + + if (priv->_xdp_prog) { + /* these channels are freshly initialized, napi is not on there even + * when GRO is requeste + */ + err = veth_enable_xdp_range(dev, start, end, false); + if (err) + return err; + + err = __veth_napi_enable_range(dev, start, end); + if (err) { + /* on error always delete the newly added napis */ + veth_disable_xdp_range(dev, start, end, true); + return err; + } + } else if (veth_gro_requested(dev)) { + return veth_napi_enable_range(dev, start, end); + } + return 0; +} + +static int veth_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct veth_priv *priv = netdev_priv(dev); + unsigned int old_rx_count, new_rx_count; + struct veth_priv *peer_priv; + struct net_device *peer; + int err; + + /* sanity check. Upper bounds are already enforced by the caller */ + if (!ch->rx_count || !ch->tx_count) + return -EINVAL; + + /* avoid braking XDP, if that is enabled */ + peer = rtnl_dereference(priv->peer); + peer_priv = peer ? netdev_priv(peer) : NULL; + if (priv->_xdp_prog && peer && ch->rx_count < peer->real_num_tx_queues) + return -EINVAL; + + if (peer && peer_priv && peer_priv->_xdp_prog && ch->tx_count > peer->real_num_rx_queues) + return -EINVAL; + + old_rx_count = dev->real_num_rx_queues; + new_rx_count = ch->rx_count; + if (netif_running(dev)) { + /* turn device off */ + netif_carrier_off(dev); + if (peer) + netif_carrier_off(peer); + + /* try to allocate new resurces, as needed*/ + err = veth_enable_range_safe(dev, old_rx_count, new_rx_count); + if (err) + goto out; + } + + err = netif_set_real_num_rx_queues(dev, ch->rx_count); + if (err) + goto revert; + + err = netif_set_real_num_tx_queues(dev, ch->tx_count); + if (err) { + int err2 = netif_set_real_num_rx_queues(dev, old_rx_count); + + /* this error condition could happen only if rx and tx change + * in opposite directions (e.g. tx nr raises, rx nr decreases) + * and we can't do anything to fully restore the original + * status + */ + if (err2) + pr_warn("Can't restore rx queues config %d -> %d %d", + new_rx_count, old_rx_count, err2); + else + goto revert; + } + +out: + if (netif_running(dev)) { + /* note that we need to swap the arguments WRT the enable part + * to identify the range we have to disable + */ + veth_disable_range_safe(dev, new_rx_count, old_rx_count); + netif_carrier_on(dev); + if (peer) + netif_carrier_on(peer); + } + return err; + +revert: + new_rx_count = old_rx_count; + old_rx_count = ch->rx_count; + goto out; +} + static int veth_open(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); @@ -1447,6 +1609,23 @@ static void veth_disable_gro(struct net_device *dev) netdev_update_features(dev); } +static int veth_init_queues(struct net_device *dev, struct nlattr *tb[]) +{ + int err; + + if (!tb[IFLA_NUM_TX_QUEUES] && dev->num_tx_queues > 1) { + err = netif_set_real_num_tx_queues(dev, 1); + if (err) + return err; + } + if (!tb[IFLA_NUM_RX_QUEUES] && dev->num_rx_queues > 1) { + err = netif_set_real_num_rx_queues(dev, 1); + if (err) + return err; + } + return 0; +} + static int veth_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -1556,13 +1735,21 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, priv = netdev_priv(dev); rcu_assign_pointer(priv->peer, peer); + err = veth_init_queues(dev, tb); + if (err) + goto err_queues; priv = netdev_priv(peer); rcu_assign_pointer(priv->peer, dev); + err = veth_init_queues(peer, tb); + if (err) + goto err_queues; veth_disable_gro(dev); return 0; +err_queues: + unregister_netdevice(dev); err_register_dev: /* nothing to do */ err_configure_peer: @@ -1608,6 +1795,16 @@ static struct net *veth_get_link_net(const struct net_device *dev) return peer ? dev_net(peer) : dev_net(dev); } +static unsigned int veth_get_num_queues(void) +{ + /* enforce the same queue limit as rtnl_create_link */ + int queues = num_possible_cpus(); + + if (queues > 4096) + queues = 4096; + return queues; +} + static struct rtnl_link_ops veth_link_ops = { .kind = DRV_NAME, .priv_size = sizeof(struct veth_priv), @@ -1618,6 +1815,8 @@ static struct rtnl_link_ops veth_link_ops = { .policy = veth_policy, .maxtype = VETH_INFO_MAX, .get_link_net = veth_get_link_net, + .get_num_tx_queues = veth_get_num_queues, + .get_num_rx_queues = veth_get_num_queues, }; /* diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile index c5a167a1c85c..7a38925f4165 100644 --- a/drivers/net/vmxnet3/Makefile +++ b/drivers/net/vmxnet3/Makefile @@ -2,7 +2,7 @@ # # Linux driver for VMware's vmxnet3 ethernet NIC. # -# Copyright (C) 2007-2020, VMware, Inc. All Rights Reserved. +# Copyright (C) 2007-2021, VMware, Inc. All Rights Reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/upt1_defs.h b/drivers/net/vmxnet3/upt1_defs.h index 8c014c98471c..f9f3a23d1698 100644 --- a/drivers/net/vmxnet3/upt1_defs.h +++ b/drivers/net/vmxnet3/upt1_defs.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h index a8d5ebd47c71..74d4e8bc4abc 100644 --- a/drivers/net/vmxnet3/vmxnet3_defs.h +++ b/drivers/net/vmxnet3/vmxnet3_defs.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -98,6 +98,9 @@ enum { VMXNET3_CMD_GET_TXDATA_DESC_SIZE, VMXNET3_CMD_GET_COALESCE, VMXNET3_CMD_GET_RSS_FIELDS, + VMXNET3_CMD_GET_RESERVED2, + VMXNET3_CMD_GET_RESERVED3, + VMXNET3_CMD_GET_MAX_QUEUES_CONF, }; /* @@ -341,13 +344,15 @@ struct Vmxnet3_RxCompDescExt { #define VMXNET3_TXD_EOP_SIZE 1 /* value of RxCompDesc.rssType */ -enum { - VMXNET3_RCD_RSS_TYPE_NONE = 0, - VMXNET3_RCD_RSS_TYPE_IPV4 = 1, - VMXNET3_RCD_RSS_TYPE_TCPIPV4 = 2, - VMXNET3_RCD_RSS_TYPE_IPV6 = 3, - VMXNET3_RCD_RSS_TYPE_TCPIPV6 = 4, -}; +#define VMXNET3_RCD_RSS_TYPE_NONE 0 +#define VMXNET3_RCD_RSS_TYPE_IPV4 1 +#define VMXNET3_RCD_RSS_TYPE_TCPIPV4 2 +#define VMXNET3_RCD_RSS_TYPE_IPV6 3 +#define VMXNET3_RCD_RSS_TYPE_TCPIPV6 4 +#define VMXNET3_RCD_RSS_TYPE_UDPIPV4 5 +#define VMXNET3_RCD_RSS_TYPE_UDPIPV6 6 +#define VMXNET3_RCD_RSS_TYPE_ESPIPV4 7 +#define VMXNET3_RCD_RSS_TYPE_ESPIPV6 8 /* a union for accessing all cmd/completion descriptors */ @@ -533,6 +538,13 @@ enum vmxnet3_intr_type { /* addition 1 for events */ #define VMXNET3_MAX_INTRS 25 +/* Version 6 and later will use below macros */ +#define VMXNET3_EXT_MAX_TX_QUEUES 32 +#define VMXNET3_EXT_MAX_RX_QUEUES 32 +/* addition 1 for events */ +#define VMXNET3_EXT_MAX_INTRS 65 +#define VMXNET3_FIRST_SET_INTRS 64 + /* value of intrCtrl */ #define VMXNET3_IC_DISABLE_ALL 0x1 /* bit 0 */ @@ -547,6 +559,19 @@ struct Vmxnet3_IntrConf { __le32 reserved[2]; }; +struct Vmxnet3_IntrConfExt { + u8 autoMask; + u8 numIntrs; /* # of interrupts */ + u8 eventIntrIdx; + u8 reserved; + __le32 intrCtrl; + __le32 reserved1; + u8 modLevels[VMXNET3_EXT_MAX_INTRS]; /* moderation level for + * each intr + */ + u8 reserved2[3]; +}; + /* one bit per VLAN ID, the size is in the units of u32 */ #define VMXNET3_VFT_SIZE (4096 / (sizeof(u32) * 8)) @@ -719,11 +744,16 @@ struct Vmxnet3_DSDevRead { struct Vmxnet3_VariableLenConfDesc pluginConfDesc; }; +struct Vmxnet3_DSDevReadExt { + /* read-only region for device, read by dev in response to a SET cmd */ + struct Vmxnet3_IntrConfExt intrConfExt; +}; + /* All structures in DriverShared are padded to multiples of 8 bytes */ struct Vmxnet3_DriverShared { __le32 magic; /* make devRead start at 64bit boundaries */ - __le32 pad; + __le32 size; /* size of DriverShared */ struct Vmxnet3_DSDevRead devRead; __le32 ecr; __le32 reserved; @@ -734,6 +764,7 @@ struct Vmxnet3_DriverShared { * command */ } cu; + struct Vmxnet3_DSDevReadExt devReadExt; }; @@ -764,6 +795,7 @@ struct Vmxnet3_DriverShared { ((vfTable[vid >> 5] & (1 << (vid & 31))) != 0) #define VMXNET3_MAX_MTU 9000 +#define VMXNET3_V6_MAX_MTU 9190 #define VMXNET3_MIN_MTU 60 #define VMXNET3_LINK_UP (10000 << 16 | 1) /* 10 Gbps, up */ diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 6e87f1fc4874..e3c6b7e3bfdd 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -1478,10 +1478,28 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, #ifdef VMXNET3_RSS if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE && - (adapter->netdev->features & NETIF_F_RXHASH)) + (adapter->netdev->features & NETIF_F_RXHASH)) { + enum pkt_hash_types hash_type; + + switch (rcd->rssType) { + case VMXNET3_RCD_RSS_TYPE_IPV4: + case VMXNET3_RCD_RSS_TYPE_IPV6: + hash_type = PKT_HASH_TYPE_L3; + break; + case VMXNET3_RCD_RSS_TYPE_TCPIPV4: + case VMXNET3_RCD_RSS_TYPE_TCPIPV6: + case VMXNET3_RCD_RSS_TYPE_UDPIPV4: + case VMXNET3_RCD_RSS_TYPE_UDPIPV6: + hash_type = PKT_HASH_TYPE_L4; + break; + default: + hash_type = PKT_HASH_TYPE_L3; + break; + } skb_set_hash(ctx->skb, le32_to_cpu(rcd->rssHash), - PKT_HASH_TYPE_L3); + hash_type); + } #endif skb_put(ctx->skb, rcd->len); @@ -2460,6 +2478,7 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) { struct Vmxnet3_DriverShared *shared = adapter->shared; struct Vmxnet3_DSDevRead *devRead = &shared->devRead; + struct Vmxnet3_DSDevReadExt *devReadExt = &shared->devReadExt; struct Vmxnet3_TxQueueConf *tqc; struct Vmxnet3_RxQueueConf *rqc; int i; @@ -2572,14 +2591,26 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) #endif /* VMXNET3_RSS */ /* intr settings */ - devRead->intrConf.autoMask = adapter->intr.mask_mode == - VMXNET3_IMM_AUTO; - devRead->intrConf.numIntrs = adapter->intr.num_intrs; - for (i = 0; i < adapter->intr.num_intrs; i++) - devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i]; + if (!VMXNET3_VERSION_GE_6(adapter) || + !adapter->queuesExtEnabled) { + devRead->intrConf.autoMask = adapter->intr.mask_mode == + VMXNET3_IMM_AUTO; + devRead->intrConf.numIntrs = adapter->intr.num_intrs; + for (i = 0; i < adapter->intr.num_intrs; i++) + devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i]; + + devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx; + devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } else { + devReadExt->intrConfExt.autoMask = adapter->intr.mask_mode == + VMXNET3_IMM_AUTO; + devReadExt->intrConfExt.numIntrs = adapter->intr.num_intrs; + for (i = 0; i < adapter->intr.num_intrs; i++) + devReadExt->intrConfExt.modLevels[i] = adapter->intr.mod_levels[i]; - devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx; - devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + devReadExt->intrConfExt.eventIntrIdx = adapter->intr.event_intr_idx; + devReadExt->intrConfExt.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } /* rx filter settings */ devRead->rxFilterConf.rxMode = 0; @@ -2717,6 +2748,7 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) * tx queue if the link is up. */ vmxnet3_check_link(adapter, true); + netif_tx_wake_all_queues(adapter->netdev); for (i = 0; i < adapter->num_rx_queues; i++) napi_enable(&adapter->rx_queue[i].napi); vmxnet3_enable_all_intrs(adapter); @@ -3372,6 +3404,8 @@ vmxnet3_probe_device(struct pci_dev *pdev, int size; int num_tx_queues; int num_rx_queues; + int queues; + unsigned long flags; if (!pci_msi_enabled()) enable_mq = 0; @@ -3383,7 +3417,6 @@ vmxnet3_probe_device(struct pci_dev *pdev, else #endif num_rx_queues = 1; - num_rx_queues = rounddown_pow_of_two(num_rx_queues); if (enable_mq) num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES, @@ -3391,13 +3424,8 @@ vmxnet3_probe_device(struct pci_dev *pdev, else num_tx_queues = 1; - num_tx_queues = rounddown_pow_of_two(num_tx_queues); netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter), max(num_tx_queues, num_rx_queues)); - dev_info(&pdev->dev, - "# of Tx queues : %d, # of Rx queues : %d\n", - num_tx_queues, num_rx_queues); - if (!netdev) return -ENOMEM; @@ -3447,51 +3475,22 @@ vmxnet3_probe_device(struct pci_dev *pdev, goto err_alloc_shared; } - adapter->num_rx_queues = num_rx_queues; - adapter->num_tx_queues = num_tx_queues; - adapter->rx_buf_per_pkt = 1; - - size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues; - size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues; - adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size, - &adapter->queue_desc_pa, - GFP_KERNEL); - - if (!adapter->tqd_start) { - dev_err(&pdev->dev, "Failed to allocate memory\n"); - err = -ENOMEM; - goto err_alloc_queue_desc; - } - adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start + - adapter->num_tx_queues); - - adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev, - sizeof(struct Vmxnet3_PMConf), - &adapter->pm_conf_pa, - GFP_KERNEL); - if (adapter->pm_conf == NULL) { - err = -ENOMEM; - goto err_alloc_pm; - } - -#ifdef VMXNET3_RSS - - adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev, - sizeof(struct UPT1_RSSConf), - &adapter->rss_conf_pa, - GFP_KERNEL); - if (adapter->rss_conf == NULL) { - err = -ENOMEM; - goto err_alloc_rss; - } -#endif /* VMXNET3_RSS */ - err = vmxnet3_alloc_pci_resources(adapter); if (err < 0) goto err_alloc_pci; ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS); - if (ver & (1 << VMXNET3_REV_4)) { + if (ver & (1 << VMXNET3_REV_6)) { + VMXNET3_WRITE_BAR1_REG(adapter, + VMXNET3_REG_VRRS, + 1 << VMXNET3_REV_6); + adapter->version = VMXNET3_REV_6 + 1; + } else if (ver & (1 << VMXNET3_REV_5)) { + VMXNET3_WRITE_BAR1_REG(adapter, + VMXNET3_REG_VRRS, + 1 << VMXNET3_REV_5); + adapter->version = VMXNET3_REV_5 + 1; + } else if (ver & (1 << VMXNET3_REV_4)) { VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1 << VMXNET3_REV_4); @@ -3529,6 +3528,77 @@ vmxnet3_probe_device(struct pci_dev *pdev, goto err_ver; } + if (VMXNET3_VERSION_GE_6(adapter)) { + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_MAX_QUEUES_CONF); + queues = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + if (queues > 0) { + adapter->num_rx_queues = min(num_rx_queues, ((queues >> 8) & 0xff)); + adapter->num_tx_queues = min(num_tx_queues, (queues & 0xff)); + } else { + adapter->num_rx_queues = min(num_rx_queues, + VMXNET3_DEVICE_DEFAULT_RX_QUEUES); + adapter->num_tx_queues = min(num_tx_queues, + VMXNET3_DEVICE_DEFAULT_TX_QUEUES); + } + if (adapter->num_rx_queues > VMXNET3_MAX_RX_QUEUES || + adapter->num_tx_queues > VMXNET3_MAX_TX_QUEUES) { + adapter->queuesExtEnabled = true; + } else { + adapter->queuesExtEnabled = false; + } + } else { + adapter->queuesExtEnabled = false; + num_rx_queues = rounddown_pow_of_two(num_rx_queues); + num_tx_queues = rounddown_pow_of_two(num_tx_queues); + adapter->num_rx_queues = min(num_rx_queues, + VMXNET3_DEVICE_DEFAULT_RX_QUEUES); + adapter->num_tx_queues = min(num_tx_queues, + VMXNET3_DEVICE_DEFAULT_TX_QUEUES); + } + dev_info(&pdev->dev, + "# of Tx queues : %d, # of Rx queues : %d\n", + adapter->num_tx_queues, adapter->num_rx_queues); + + adapter->rx_buf_per_pkt = 1; + + size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues; + size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues; + adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size, + &adapter->queue_desc_pa, + GFP_KERNEL); + + if (!adapter->tqd_start) { + dev_err(&pdev->dev, "Failed to allocate memory\n"); + err = -ENOMEM; + goto err_ver; + } + adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start + + adapter->num_tx_queues); + + adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev, + sizeof(struct Vmxnet3_PMConf), + &adapter->pm_conf_pa, + GFP_KERNEL); + if (adapter->pm_conf == NULL) { + err = -ENOMEM; + goto err_alloc_pm; + } + +#ifdef VMXNET3_RSS + + adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev, + sizeof(struct UPT1_RSSConf), + &adapter->rss_conf_pa, + GFP_KERNEL); + if (adapter->rss_conf == NULL) { + err = -ENOMEM; + goto err_alloc_rss; + } +#endif /* VMXNET3_RSS */ + if (VMXNET3_VERSION_GE_3(adapter)) { adapter->coal_conf = dma_alloc_coherent(&adapter->pdev->dev, @@ -3538,7 +3608,7 @@ vmxnet3_probe_device(struct pci_dev *pdev, GFP_KERNEL); if (!adapter->coal_conf) { err = -ENOMEM; - goto err_ver; + goto err_coal_conf; } adapter->coal_conf->coalMode = VMXNET3_COALESCE_DISABLED; adapter->default_coal_mode = true; @@ -3581,9 +3651,12 @@ vmxnet3_probe_device(struct pci_dev *pdev, vmxnet3_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; - /* MTU range: 60 - 9000 */ + /* MTU range: 60 - 9190 */ netdev->min_mtu = VMXNET3_MIN_MTU; - netdev->max_mtu = VMXNET3_MAX_MTU; + if (VMXNET3_VERSION_GE_6(adapter)) + netdev->max_mtu = VMXNET3_V6_MAX_MTU; + else + netdev->max_mtu = VMXNET3_MAX_MTU; INIT_WORK(&adapter->work, vmxnet3_reset_work); set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state); @@ -3621,9 +3694,7 @@ err_register: adapter->coal_conf, adapter->coal_conf_pa); } vmxnet3_free_intr_resources(adapter); -err_ver: - vmxnet3_free_pci_resources(adapter); -err_alloc_pci: +err_coal_conf: #ifdef VMXNET3_RSS dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf), adapter->rss_conf, adapter->rss_conf_pa); @@ -3634,7 +3705,9 @@ err_alloc_rss: err_alloc_pm: dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start, adapter->queue_desc_pa); -err_alloc_queue_desc: +err_ver: + vmxnet3_free_pci_resources(adapter); +err_alloc_pci: dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_DriverShared), adapter->shared, adapter->shared_pa); @@ -3653,7 +3726,8 @@ vmxnet3_remove_device(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct vmxnet3_adapter *adapter = netdev_priv(netdev); int size = 0; - int num_rx_queues; + int num_rx_queues, rx_queues; + unsigned long flags; #ifdef VMXNET3_RSS if (enable_mq) @@ -3662,7 +3736,24 @@ vmxnet3_remove_device(struct pci_dev *pdev) else #endif num_rx_queues = 1; - num_rx_queues = rounddown_pow_of_two(num_rx_queues); + if (!VMXNET3_VERSION_GE_6(adapter)) { + num_rx_queues = rounddown_pow_of_two(num_rx_queues); + } + if (VMXNET3_VERSION_GE_6(adapter)) { + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_MAX_QUEUES_CONF); + rx_queues = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + if (rx_queues > 0) + rx_queues = (rx_queues >> 8) & 0xff; + else + rx_queues = min(num_rx_queues, VMXNET3_DEVICE_DEFAULT_RX_QUEUES); + num_rx_queues = min(num_rx_queues, rx_queues); + } else { + num_rx_queues = min(num_rx_queues, + VMXNET3_DEVICE_DEFAULT_RX_QUEUES); + } cancel_work_sync(&adapter->work); diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 1b483cf2b1ca..a3e2f2ba68b5 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -787,6 +787,10 @@ vmxnet3_get_rss_hash_opts(struct vmxnet3_adapter *adapter, case AH_ESP_V6_FLOW: case AH_V6_FLOW: case ESP_V6_FLOW: + if (VMXNET3_VERSION_GE_6(adapter) && + (rss_fields & VMXNET3_RSS_FIELDS_ESPIP6)) + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + fallthrough; case SCTP_V6_FLOW: case IPV6_FLOW: info->data |= RXH_IP_SRC | RXH_IP_DST; @@ -871,6 +875,22 @@ vmxnet3_set_rss_hash_opt(struct net_device *netdev, case ESP_V6_FLOW: case AH_V6_FLOW: case AH_ESP_V6_FLOW: + if (!VMXNET3_VERSION_GE_6(adapter)) + return -EOPNOTSUPP; + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + rss_fields &= ~VMXNET3_RSS_FIELDS_ESPIP6; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + rss_fields |= VMXNET3_RSS_FIELDS_ESPIP6; + break; + default: + return -EINVAL; + } + break; case SCTP_V4_FLOW: case SCTP_V6_FLOW: if (!(nfc->data & RXH_IP_SRC) || diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index e910596b79cf..7027ff483fa5 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -69,18 +69,20 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.5.0.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.6.0.0-k" /* Each byte of this 32-bit integer encodes a version number in * VMXNET3_DRIVER_VERSION_STRING. */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01050000 +#define VMXNET3_DRIVER_VERSION_NUM 0x01060000 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ #define VMXNET3_RSS #endif +#define VMXNET3_REV_6 5 /* Vmxnet3 Rev. 6 */ +#define VMXNET3_REV_5 4 /* Vmxnet3 Rev. 5 */ #define VMXNET3_REV_4 3 /* Vmxnet3 Rev. 4 */ #define VMXNET3_REV_3 2 /* Vmxnet3 Rev. 3 */ #define VMXNET3_REV_2 1 /* Vmxnet3 Rev. 2 */ @@ -301,15 +303,18 @@ struct vmxnet3_rx_queue { struct vmxnet3_rq_driver_stats stats; } __attribute__((__aligned__(SMP_CACHE_BYTES))); -#define VMXNET3_DEVICE_MAX_TX_QUEUES 8 -#define VMXNET3_DEVICE_MAX_RX_QUEUES 8 /* Keep this value as a power of 2 */ +#define VMXNET3_DEVICE_MAX_TX_QUEUES 32 +#define VMXNET3_DEVICE_MAX_RX_QUEUES 32 /* Keep this value as a power of 2 */ + +#define VMXNET3_DEVICE_DEFAULT_TX_QUEUES 8 +#define VMXNET3_DEVICE_DEFAULT_RX_QUEUES 8 /* Keep this value as a power of 2 */ /* Should be less than UPT1_RSS_MAX_IND_TABLE_SIZE */ #define VMXNET3_RSS_IND_TABLE_SIZE (VMXNET3_DEVICE_MAX_RX_QUEUES * 4) #define VMXNET3_LINUX_MAX_MSIX_VECT (VMXNET3_DEVICE_MAX_TX_QUEUES + \ VMXNET3_DEVICE_MAX_RX_QUEUES + 1) -#define VMXNET3_LINUX_MIN_MSIX_VECT 2 /* 1 for tx-rx pair and 1 for event */ +#define VMXNET3_LINUX_MIN_MSIX_VECT 3 /* 1 for tx, 1 for rx pair and 1 for event */ struct vmxnet3_intr { @@ -396,6 +401,7 @@ struct vmxnet3_adapter { dma_addr_t adapter_pa; dma_addr_t pm_conf_pa; dma_addr_t rss_conf_pa; + bool queuesExtEnabled; }; #define VMXNET3_WRITE_BAR0_REG(adapter, reg, val) \ @@ -421,6 +427,10 @@ struct vmxnet3_adapter { (adapter->version >= VMXNET3_REV_3 + 1) #define VMXNET3_VERSION_GE_4(adapter) \ (adapter->version >= VMXNET3_REV_4 + 1) +#define VMXNET3_VERSION_GE_5(adapter) \ + (adapter->version >= VMXNET3_REV_5 + 1) +#define VMXNET3_VERSION_GE_6(adapter) \ + (adapter->version >= VMXNET3_REV_6 + 1) /* must be a multiple of VMXNET3_RING_SIZE_ALIGN */ #define VMXNET3_DEF_TX_RING_SIZE 512 diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index 3c51ab239fb2..2cebbfca0bd1 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -975,11 +975,10 @@ static int init_hdlc_queues(struct port *port) return -ENOMEM; } - port->desc_tab = dma_pool_alloc(dma_pool, GFP_KERNEL, + port->desc_tab = dma_pool_zalloc(dma_pool, GFP_KERNEL, &port->desc_tab_phys); if (!port->desc_tab) return -ENOMEM; - memset(port->desc_tab, 0, POOL_ALLOC_SIZE); memset(port->rx_buff_tab, 0, sizeof(port->rx_buff_tab)); /* tables */ memset(port->tx_buff_tab, 0, sizeof(port->tx_buff_tab)); diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c index 7f7d364d3a51..2fe88b8be348 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c +++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c @@ -479,6 +479,7 @@ static struct pci_driver iosm_ipc_driver = { }, .id_table = iosm_ipc_ids, }; +module_pci_driver(iosm_ipc_driver); int ipc_pcie_addr_map(struct iosm_pcie *ipc_pcie, unsigned char *data, size_t size, dma_addr_t *mapping, int direction) @@ -560,21 +561,3 @@ void ipc_pcie_kfree_skb(struct iosm_pcie *ipc_pcie, struct sk_buff *skb) IPC_CB(skb)->mapping = 0; dev_kfree_skb(skb); } - -static int __init iosm_ipc_driver_init(void) -{ - if (pci_register_driver(&iosm_ipc_driver)) { - pr_err("registering of IOSM PCIe driver failed"); - return -1; - } - - return 0; -} - -static void __exit iosm_ipc_driver_exit(void) -{ - pci_unregister_driver(&iosm_ipc_driver); -} - -module_init(iosm_ipc_driver_init); -module_exit(iosm_ipc_driver_exit); diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index 528745862738..3e542b7389cb 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -38,7 +38,7 @@ #define NCI_OP_PROP_SET_PDATA_OID 0x23 struct fdp_nci_info { - struct nfc_phy_ops *phy_ops; + const struct nfc_phy_ops *phy_ops; struct fdp_i2c_phy *phy; struct nci_dev *ndev; @@ -651,7 +651,7 @@ static int fdp_nci_core_get_config_rsp_packet(struct nci_dev *ndev, return 0; } -static struct nci_driver_ops fdp_core_ops[] = { +static const struct nci_driver_ops fdp_core_ops[] = { { .opcode = NCI_OP_CORE_GET_CONFIG_RSP, .rsp = fdp_nci_core_get_config_rsp_packet, @@ -662,7 +662,7 @@ static struct nci_driver_ops fdp_core_ops[] = { }, }; -static struct nci_driver_ops fdp_prop_ops[] = { +static const struct nci_driver_ops fdp_prop_ops[] = { { .opcode = nci_opcode_pack(NCI_GID_PROP, NCI_OP_PROP_PATCH_OID), .rsp = fdp_nci_prop_patch_rsp_packet, @@ -675,7 +675,7 @@ static struct nci_driver_ops fdp_prop_ops[] = { }, }; -static struct nci_ops nci_ops = { +static const struct nci_ops nci_ops = { .open = fdp_nci_open, .close = fdp_nci_close, .send = fdp_nci_send, @@ -687,7 +687,7 @@ static struct nci_ops nci_ops = { .n_core_ops = ARRAY_SIZE(fdp_core_ops), }; -int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops, +int fdp_nci_probe(struct fdp_i2c_phy *phy, const struct nfc_phy_ops *phy_ops, struct nci_dev **ndevp, int tx_headroom, int tx_tailroom, u8 clock_type, u32 clock_freq, u8 *fw_vsc_cfg) @@ -718,6 +718,7 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops, NFC_PROTO_NFC_DEP_MASK | NFC_PROTO_ISO15693_MASK; + BUILD_BUG_ON(ARRAY_SIZE(fdp_prop_ops) > NCI_MAX_PROPRIETARY_CMD); ndev = nci_allocate_device(&nci_ops, protocols, tx_headroom, tx_tailroom); if (!ndev) { diff --git a/drivers/nfc/fdp/fdp.h b/drivers/nfc/fdp/fdp.h index ead3b21ccae6..dc048d4b977e 100644 --- a/drivers/nfc/fdp/fdp.h +++ b/drivers/nfc/fdp/fdp.h @@ -21,7 +21,7 @@ struct fdp_i2c_phy { uint16_t next_read_size; }; -int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops, +int fdp_nci_probe(struct fdp_i2c_phy *phy, const struct nfc_phy_ops *phy_ops, struct nci_dev **ndev, int tx_headroom, int tx_tailroom, u8 clock_type, u32 clock_freq, u8 *fw_vsc_cfg); void fdp_nci_remove(struct nci_dev *ndev); diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index c5596e514648..98e1876c9468 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -120,7 +120,7 @@ static int fdp_nci_i2c_write(void *phy_id, struct sk_buff *skb) return r; } -static struct nfc_phy_ops i2c_phy_ops = { +static const struct nfc_phy_ops i2c_phy_ops = { .write = fdp_nci_i2c_write, .enable = fdp_nci_i2c_enable, .disable = fdp_nci_i2c_disable, diff --git a/drivers/nfc/mei_phy.c b/drivers/nfc/mei_phy.c index e56cea716cd2..41146bb99474 100644 --- a/drivers/nfc/mei_phy.c +++ b/drivers/nfc/mei_phy.c @@ -362,7 +362,7 @@ static void nfc_mei_phy_disable(void *phy_id) phy->powered = 0; } -struct nfc_phy_ops mei_phy_ops = { +const struct nfc_phy_ops mei_phy_ops = { .write = nfc_mei_phy_write, .enable = nfc_mei_phy_enable, .disable = nfc_mei_phy_disable, diff --git a/drivers/nfc/mei_phy.h b/drivers/nfc/mei_phy.h index 51bd44f5f3b8..2b1edb3eba15 100644 --- a/drivers/nfc/mei_phy.h +++ b/drivers/nfc/mei_phy.h @@ -45,7 +45,7 @@ struct nfc_mei_phy { int hard_fault; }; -extern struct nfc_phy_ops mei_phy_ops; +extern const struct nfc_phy_ops mei_phy_ops; struct nfc_mei_phy *nfc_mei_phy_alloc(struct mei_cl_device *device); void nfc_mei_phy_free(struct nfc_mei_phy *phy); diff --git a/drivers/nfc/microread/i2c.c b/drivers/nfc/microread/i2c.c index dd78d987e6c9..f91760c78455 100644 --- a/drivers/nfc/microread/i2c.c +++ b/drivers/nfc/microread/i2c.c @@ -225,7 +225,7 @@ static irqreturn_t microread_i2c_irq_thread_fn(int irq, void *phy_id) return IRQ_HANDLED; } -static struct nfc_phy_ops i2c_phy_ops = { +static const struct nfc_phy_ops i2c_phy_ops = { .write = microread_i2c_write, .enable = microread_i2c_enable, .disable = microread_i2c_disable, diff --git a/drivers/nfc/microread/microread.c b/drivers/nfc/microread/microread.c index b1d3975e8a81..8e847524937c 100644 --- a/drivers/nfc/microread/microread.c +++ b/drivers/nfc/microread/microread.c @@ -131,7 +131,7 @@ #define MICROREAD_ELT_ID_SE2 0x04 #define MICROREAD_ELT_ID_SE3 0x05 -static struct nfc_hci_gate microread_gates[] = { +static const struct nfc_hci_gate microread_gates[] = { {MICROREAD_GATE_ID_ADM, MICROREAD_PIPE_ID_ADMIN}, {MICROREAD_GATE_ID_LOOPBACK, MICROREAD_PIPE_ID_HDS_LOOPBACK}, {MICROREAD_GATE_ID_IDT, MICROREAD_PIPE_ID_HDS_IDT}, @@ -152,7 +152,7 @@ static struct nfc_hci_gate microread_gates[] = { #define MICROREAD_CMD_TAILROOM 2 struct microread_info { - struct nfc_phy_ops *phy_ops; + const struct nfc_phy_ops *phy_ops; void *phy_id; struct nfc_hci_dev *hdev; @@ -625,7 +625,7 @@ static int microread_event_received(struct nfc_hci_dev *hdev, u8 pipe, return r; } -static struct nfc_hci_ops microread_hci_ops = { +static const struct nfc_hci_ops microread_hci_ops = { .open = microread_open, .close = microread_close, .hci_ready = microread_hci_ready, @@ -641,9 +641,9 @@ static struct nfc_hci_ops microread_hci_ops = { .event_received = microread_event_received, }; -int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, - int phy_headroom, int phy_tailroom, int phy_payload, - struct nfc_hci_dev **hdev) +int microread_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, + char *llc_name, int phy_headroom, int phy_tailroom, + int phy_payload, struct nfc_hci_dev **hdev) { struct microread_info *info; unsigned long quirks = 0; diff --git a/drivers/nfc/microread/microread.h b/drivers/nfc/microread/microread.h index 044f5e456375..76152d7aa53c 100644 --- a/drivers/nfc/microread/microread.h +++ b/drivers/nfc/microread/microread.h @@ -10,9 +10,9 @@ #define DRIVER_DESC "NFC driver for microread" -int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, - int phy_headroom, int phy_tailroom, int phy_payload, - struct nfc_hci_dev **hdev); +int microread_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, + char *llc_name, int phy_headroom, int phy_tailroom, + int phy_payload, struct nfc_hci_dev **hdev); void microread_remove(struct nfc_hci_dev *hdev); diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c index a4620b480c4f..6e9e7ce8792c 100644 --- a/drivers/nfc/nfcmrvl/main.c +++ b/drivers/nfc/nfcmrvl/main.c @@ -81,7 +81,7 @@ static int nfcmrvl_nci_fw_download(struct nci_dev *ndev, return nfcmrvl_fw_dnld_start(ndev, firmware_name); } -static struct nci_ops nfcmrvl_nci_ops = { +static const struct nci_ops nfcmrvl_nci_ops = { .open = nfcmrvl_nci_open, .close = nfcmrvl_nci_close, .send = nfcmrvl_nci_send, diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c index a9864fcdfba6..143dc49b815b 100644 --- a/drivers/nfc/nfcsim.c +++ b/drivers/nfc/nfcsim.c @@ -320,7 +320,7 @@ static int nfcsim_tg_listen(struct nfc_digital_dev *ddev, u16 timeout, return nfcsim_send(ddev, NULL, timeout, cb, arg); } -static struct nfc_digital_ops nfcsim_digital_ops = { +static const struct nfc_digital_ops nfcsim_digital_ops = { .in_configure_hw = nfcsim_in_configure_hw, .in_send_cmd = nfcsim_in_send_cmd, diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c index 2b0c7232e91f..518e2afb43a8 100644 --- a/drivers/nfc/nxp-nci/core.c +++ b/drivers/nfc/nxp-nci/core.c @@ -83,7 +83,7 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) return r; } -static struct nci_ops nxp_nci_ops = { +static const struct nci_ops nxp_nci_ops = { .open = nxp_nci_open, .close = nxp_nci_close, .send = nxp_nci_send, diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index cd64bfe20402..2f3f3fe9a0ba 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -2623,7 +2623,7 @@ static int pn533_dev_down(struct nfc_dev *nfc_dev) return ret; } -static struct nfc_ops pn533_nfc_ops = { +static const struct nfc_ops pn533_nfc_ops = { .dev_up = pn533_dev_up, .dev_down = pn533_dev_down, .dep_link_up = pn533_dep_link_up, diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index de59e439c369..37d26f01986b 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -515,7 +515,7 @@ static irqreturn_t pn544_hci_i2c_irq_thread_fn(int irq, void *phy_id) return IRQ_HANDLED; } -static struct nfc_phy_ops i2c_phy_ops = { +static const struct nfc_phy_ops i2c_phy_ops = { .write = pn544_hci_i2c_write, .enable = pn544_hci_i2c_enable, .disable = pn544_hci_i2c_disable, diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index b788870473e8..c2b4555ab4b7 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -86,7 +86,7 @@ enum pn544_state { #define PN544_HCI_CMD_ATTREQUEST 0x12 #define PN544_HCI_CMD_CONTINUE_ACTIVATION 0x13 -static struct nfc_hci_gate pn544_gates[] = { +static const struct nfc_hci_gate pn544_gates[] = { {NFC_HCI_ADMIN_GATE, NFC_HCI_INVALID_PIPE}, {NFC_HCI_LOOPBACK_GATE, NFC_HCI_INVALID_PIPE}, {NFC_HCI_ID_MGMT_GATE, NFC_HCI_INVALID_PIPE}, @@ -108,7 +108,7 @@ static struct nfc_hci_gate pn544_gates[] = { #define PN544_CMDS_HEADROOM 2 struct pn544_hci_info { - struct nfc_phy_ops *phy_ops; + const struct nfc_phy_ops *phy_ops; void *phy_id; struct nfc_hci_dev *hdev; @@ -881,7 +881,7 @@ static int pn544_hci_disable_se(struct nfc_hci_dev *hdev, u32 se_idx) } } -static struct nfc_hci_ops pn544_hci_ops = { +static const struct nfc_hci_ops pn544_hci_ops = { .open = pn544_hci_open, .close = pn544_hci_close, .hci_ready = pn544_hci_ready, @@ -901,9 +901,10 @@ static struct nfc_hci_ops pn544_hci_ops = { .disable_se = pn544_hci_disable_se, }; -int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, - int phy_headroom, int phy_tailroom, int phy_payload, - fw_download_t fw_download, struct nfc_hci_dev **hdev) +int pn544_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, + char *llc_name, int phy_headroom, int phy_tailroom, + int phy_payload, fw_download_t fw_download, + struct nfc_hci_dev **hdev) { struct pn544_hci_info *info; u32 protocols; diff --git a/drivers/nfc/pn544/pn544.h b/drivers/nfc/pn544/pn544.h index 5634ba215ead..c6fe3e11e0c8 100644 --- a/drivers/nfc/pn544/pn544.h +++ b/drivers/nfc/pn544/pn544.h @@ -16,9 +16,10 @@ typedef int (*fw_download_t)(void *context, const char *firmware_name, u8 hw_variant); -int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, - int phy_headroom, int phy_tailroom, int phy_payload, - fw_download_t fw_download, struct nfc_hci_dev **hdev); +int pn544_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, + char *llc_name, int phy_headroom, int phy_tailroom, + int phy_payload, fw_download_t fw_download, + struct nfc_hci_dev **hdev); void pn544_hci_remove(struct nfc_hci_dev *hdev); #endif /* __LOCAL_PN544_H_ */ diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 4df926cc37d0..ccb5c5fab905 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -217,7 +217,7 @@ struct port100_protocol { u8 value; } __packed; -static struct port100_protocol +static const struct port100_protocol in_protocols[][PORT100_IN_MAX_NUM_PROTOCOLS + 1] = { [NFC_DIGITAL_FRAMING_NFCA_SHORT] = { { PORT100_IN_PROT_INITIAL_GUARD_TIME, 6 }, @@ -391,7 +391,7 @@ in_protocols[][PORT100_IN_MAX_NUM_PROTOCOLS + 1] = { }, }; -static struct port100_protocol +static const struct port100_protocol tg_protocols[][PORT100_TG_MAX_NUM_PROTOCOLS + 1] = { [NFC_DIGITAL_FRAMING_NFCA_SHORT] = { { PORT100_TG_PROT_END, 0 }, @@ -1098,7 +1098,7 @@ static int port100_in_set_rf(struct nfc_digital_dev *ddev, u8 rf) static int port100_in_set_framing(struct nfc_digital_dev *ddev, int param) { struct port100 *dev = nfc_digital_get_drvdata(ddev); - struct port100_protocol *protocols; + const struct port100_protocol *protocols; struct sk_buff *skb; struct sk_buff *resp; int num_protocols; @@ -1255,7 +1255,7 @@ static int port100_tg_set_rf(struct nfc_digital_dev *ddev, u8 rf) static int port100_tg_set_framing(struct nfc_digital_dev *ddev, int param) { struct port100 *dev = nfc_digital_get_drvdata(ddev); - struct port100_protocol *protocols; + const struct port100_protocol *protocols; struct sk_buff *skb; struct sk_buff *resp; int rc; @@ -1463,7 +1463,7 @@ static int port100_listen(struct nfc_digital_dev *ddev, u16 timeout, return port100_tg_send_cmd(ddev, skb, timeout, cb, arg); } -static struct nfc_digital_ops port100_digital_ops = { +static const struct nfc_digital_ops port100_digital_ops = { .in_configure_hw = port100_in_configure_hw, .in_send_cmd = port100_in_send_cmd, diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c index 865d3e3d1528..1c412007fabb 100644 --- a/drivers/nfc/s3fwrn5/core.c +++ b/drivers/nfc/s3fwrn5/core.c @@ -143,11 +143,13 @@ static int s3fwrn5_nci_post_setup(struct nci_dev *ndev) return nci_core_init(info->ndev); } -static struct nci_ops s3fwrn5_nci_ops = { +static const struct nci_ops s3fwrn5_nci_ops = { .open = s3fwrn5_nci_open, .close = s3fwrn5_nci_close, .send = s3fwrn5_nci_send, .post_setup = s3fwrn5_nci_post_setup, + .prop_ops = s3fwrn5_nci_prop_ops, + .n_prop_ops = ARRAY_SIZE(s3fwrn5_nci_prop_ops), }; int s3fwrn5_probe(struct nci_dev **ndev, void *phy_id, struct device *pdev, @@ -167,9 +169,6 @@ int s3fwrn5_probe(struct nci_dev **ndev, void *phy_id, struct device *pdev, s3fwrn5_set_mode(info, S3FWRN5_MODE_COLD); - s3fwrn5_nci_get_prop_ops(&s3fwrn5_nci_ops.prop_ops, - &s3fwrn5_nci_ops.n_prop_ops); - info->ndev = nci_allocate_device(&s3fwrn5_nci_ops, S3FWRN5_NFC_PROTOCOLS, 0, 0); if (!info->ndev) diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index eb5d7a5beac7..1421ffd46d9a 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -421,10 +421,9 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) tfm = crypto_alloc_shash("sha1", 0, 0); if (IS_ERR(tfm)) { - ret = PTR_ERR(tfm); dev_err(&fw_info->ndev->nfc_dev->dev, "Cannot allocate shash (code=%d)\n", ret); - goto out; + return PTR_ERR(tfm); } ret = crypto_shash_tfm_digest(tfm, fw->image, image_size, hash_data); @@ -433,7 +432,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (ret) { dev_err(&fw_info->ndev->nfc_dev->dev, "Cannot compute hash (code=%d)\n", ret); - goto out; + return ret; } /* Firmware update process */ @@ -446,7 +445,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (ret < 0) { dev_err(&fw_info->ndev->nfc_dev->dev, "Unable to enter update mode\n"); - goto out; + return ret; } for (off = 0; off < image_size; off += fw_info->sector_size) { @@ -455,7 +454,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (ret < 0) { dev_err(&fw_info->ndev->nfc_dev->dev, "Firmware update error (code=%d)\n", ret); - goto out; + return ret; } } @@ -463,13 +462,12 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (ret < 0) { dev_err(&fw_info->ndev->nfc_dev->dev, "Unable to complete update mode\n"); - goto out; + return ret; } dev_info(&fw_info->ndev->nfc_dev->dev, "Firmware update: success\n"); -out: return ret; } diff --git a/drivers/nfc/s3fwrn5/nci.c b/drivers/nfc/s3fwrn5/nci.c index f042d3eaf8f6..e374e670b36b 100644 --- a/drivers/nfc/s3fwrn5/nci.c +++ b/drivers/nfc/s3fwrn5/nci.c @@ -20,7 +20,7 @@ static int s3fwrn5_nci_prop_rsp(struct nci_dev *ndev, struct sk_buff *skb) return 0; } -static struct nci_driver_ops s3fwrn5_nci_prop_ops[] = { +const struct nci_driver_ops s3fwrn5_nci_prop_ops[4] = { { .opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, NCI_PROP_SET_RFREG), @@ -43,12 +43,6 @@ static struct nci_driver_ops s3fwrn5_nci_prop_ops[] = { }, }; -void s3fwrn5_nci_get_prop_ops(struct nci_driver_ops **ops, size_t *n) -{ - *ops = s3fwrn5_nci_prop_ops; - *n = ARRAY_SIZE(s3fwrn5_nci_prop_ops); -} - #define S3FWRN5_RFREG_SECTION_SIZE 252 int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name) diff --git a/drivers/nfc/s3fwrn5/nci.h b/drivers/nfc/s3fwrn5/nci.h index a80f0fb082a8..c2d906591e9e 100644 --- a/drivers/nfc/s3fwrn5/nci.h +++ b/drivers/nfc/s3fwrn5/nci.h @@ -50,7 +50,7 @@ struct nci_prop_fw_cfg_rsp { __u8 status; }; -void s3fwrn5_nci_get_prop_ops(struct nci_driver_ops **ops, size_t *n); +extern const struct nci_driver_ops s3fwrn5_nci_prop_ops[4]; int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name); #endif /* __LOCAL_S3FWRN5_NCI_H_ */ diff --git a/drivers/nfc/st-nci/core.c b/drivers/nfc/st-nci/core.c index 110ff1281e5f..72bb51efdf9c 100644 --- a/drivers/nfc/st-nci/core.c +++ b/drivers/nfc/st-nci/core.c @@ -86,7 +86,7 @@ static int st_nci_prop_rsp_packet(struct nci_dev *ndev, return 0; } -static struct nci_driver_ops st_nci_prop_ops[] = { +static const struct nci_driver_ops st_nci_prop_ops[] = { { .opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, ST_NCI_CORE_PROP), @@ -94,7 +94,7 @@ static struct nci_driver_ops st_nci_prop_ops[] = { }, }; -static struct nci_ops st_nci_ops = { +static const struct nci_ops st_nci_ops = { .init = st_nci_init, .open = st_nci_open, .close = st_nci_close, @@ -131,6 +131,7 @@ int st_nci_probe(struct llt_ndlc *ndlc, int phy_headroom, | NFC_PROTO_ISO15693_MASK | NFC_PROTO_NFC_DEP_MASK; + BUILD_BUG_ON(ARRAY_SIZE(st_nci_prop_ops) > NCI_MAX_PROPRIETARY_CMD); ndlc->ndev = nci_allocate_device(&st_nci_ops, protocols, phy_headroom, phy_tailroom); if (!ndlc->ndev) { diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c index 46981405e8b1..ccf6152ebb9f 100644 --- a/drivers/nfc/st-nci/i2c.c +++ b/drivers/nfc/st-nci/i2c.c @@ -186,7 +186,7 @@ static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id) return IRQ_HANDLED; } -static struct nfc_phy_ops i2c_phy_ops = { +static const struct nfc_phy_ops i2c_phy_ops = { .write = st_nci_i2c_write, .enable = st_nci_i2c_enable, .disable = st_nci_i2c_disable, diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c index 5d74c674368a..e9dc313b333e 100644 --- a/drivers/nfc/st-nci/ndlc.c +++ b/drivers/nfc/st-nci/ndlc.c @@ -253,9 +253,9 @@ static void ndlc_t2_timeout(struct timer_list *t) schedule_work(&ndlc->sm_work); } -int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev, - int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id, - struct st_nci_se_status *se_status) +int ndlc_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, + struct device *dev, int phy_headroom, int phy_tailroom, + struct llt_ndlc **ndlc_id, struct st_nci_se_status *se_status) { struct llt_ndlc *ndlc; diff --git a/drivers/nfc/st-nci/ndlc.h b/drivers/nfc/st-nci/ndlc.h index 066e2fd75238..c24ce9b0df52 100644 --- a/drivers/nfc/st-nci/ndlc.h +++ b/drivers/nfc/st-nci/ndlc.h @@ -16,7 +16,7 @@ struct st_nci_se_status; /* Low Level Transport description */ struct llt_ndlc { struct nci_dev *ndev; - struct nfc_phy_ops *ops; + const struct nfc_phy_ops *ops; void *phy_id; struct timer_list t1_timer; @@ -45,8 +45,8 @@ int ndlc_open(struct llt_ndlc *ndlc); void ndlc_close(struct llt_ndlc *ndlc); int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb); void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb); -int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev, - int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id, - struct st_nci_se_status *se_status); +int ndlc_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, + struct device *dev, int phy_headroom, int phy_tailroom, + struct llt_ndlc **ndlc_id, struct st_nci_se_status *se_status); void ndlc_remove(struct llt_ndlc *ndlc); #endif /* __LOCAL_NDLC_H__ */ diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c index 250d56f204c3..a620c34790e6 100644 --- a/drivers/nfc/st-nci/spi.c +++ b/drivers/nfc/st-nci/spi.c @@ -198,7 +198,7 @@ static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id) return IRQ_HANDLED; } -static struct nfc_phy_ops spi_phy_ops = { +static const struct nfc_phy_ops spi_phy_ops = { .write = st_nci_spi_write, .enable = st_nci_spi_enable, .disable = st_nci_spi_disable, diff --git a/drivers/nfc/st-nci/vendor_cmds.c b/drivers/nfc/st-nci/vendor_cmds.c index 94b600029a2a..30d2912d1a05 100644 --- a/drivers/nfc/st-nci/vendor_cmds.c +++ b/drivers/nfc/st-nci/vendor_cmds.c @@ -371,7 +371,7 @@ static int st_nci_manufacturer_specific(struct nfc_dev *dev, void *data, return nfc_vendor_cmd_reply(msg); } -static struct nfc_vendor_cmd st_nci_vendor_cmds[] = { +static const struct nfc_vendor_cmd st_nci_vendor_cmds[] = { { .vendor_id = ST_NCI_VENDOR_OUI, .subcmd = FACTORY_MODE, diff --git a/drivers/nfc/st21nfca/core.c b/drivers/nfc/st21nfca/core.c index 6ca0d2f56b18..5e6c99fcfd27 100644 --- a/drivers/nfc/st21nfca/core.c +++ b/drivers/nfc/st21nfca/core.c @@ -72,7 +72,7 @@ static DECLARE_BITMAP(dev_mask, ST21NFCA_NUM_DEVICES); -static struct nfc_hci_gate st21nfca_gates[] = { +static const struct nfc_hci_gate st21nfca_gates[] = { {NFC_HCI_ADMIN_GATE, NFC_HCI_ADMIN_PIPE}, {NFC_HCI_LINK_MGMT_GATE, NFC_HCI_LINK_MGMT_PIPE}, {ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DEVICE_MGNT_PIPE}, @@ -912,7 +912,7 @@ static int st21nfca_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, } } -static struct nfc_hci_ops st21nfca_hci_ops = { +static const struct nfc_hci_ops st21nfca_hci_ops = { .open = st21nfca_hci_open, .close = st21nfca_hci_close, .load_session = st21nfca_hci_load_session, @@ -935,7 +935,7 @@ static struct nfc_hci_ops st21nfca_hci_ops = { .se_io = st21nfca_hci_se_io, }; -int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, +int st21nfca_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, char *llc_name, int phy_headroom, int phy_tailroom, int phy_payload, struct nfc_hci_dev **hdev, struct st21nfca_se_status *se_status) diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c index 7a9f4d71707e..1b44a37a71aa 100644 --- a/drivers/nfc/st21nfca/i2c.c +++ b/drivers/nfc/st21nfca/i2c.c @@ -76,8 +76,8 @@ struct st21nfca_i2c_phy { struct mutex phy_lock; }; -static u8 len_seq[] = { 16, 24, 12, 29 }; -static u16 wait_tab[] = { 2, 3, 5, 15, 20, 40}; +static const u8 len_seq[] = { 16, 24, 12, 29 }; +static const u16 wait_tab[] = { 2, 3, 5, 15, 20, 40}; #define I2C_DUMP_SKB(info, skb) \ do { \ @@ -482,7 +482,7 @@ static irqreturn_t st21nfca_hci_irq_thread_fn(int irq, void *phy_id) return IRQ_HANDLED; } -static struct nfc_phy_ops i2c_phy_ops = { +static const struct nfc_phy_ops i2c_phy_ops = { .write = st21nfca_hci_i2c_write, .enable = st21nfca_hci_i2c_enable, .disable = st21nfca_hci_i2c_disable, diff --git a/drivers/nfc/st21nfca/st21nfca.h b/drivers/nfc/st21nfca/st21nfca.h index 5e0de0fef1d4..cb6ad916be91 100644 --- a/drivers/nfc/st21nfca/st21nfca.h +++ b/drivers/nfc/st21nfca/st21nfca.h @@ -144,7 +144,7 @@ struct st21nfca_se_info { }; struct st21nfca_hci_info { - struct nfc_phy_ops *phy_ops; + const struct nfc_phy_ops *phy_ops; void *phy_id; struct nfc_hci_dev *hdev; @@ -163,7 +163,7 @@ struct st21nfca_hci_info { struct st21nfca_vendor_info vendor_info; }; -int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, +int st21nfca_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops, char *llc_name, int phy_headroom, int phy_tailroom, int phy_payload, struct nfc_hci_dev **hdev, struct st21nfca_se_status *se_status); diff --git a/drivers/nfc/st21nfca/vendor_cmds.c b/drivers/nfc/st21nfca/vendor_cmds.c index 62332ca91554..74882866dbaf 100644 --- a/drivers/nfc/st21nfca/vendor_cmds.c +++ b/drivers/nfc/st21nfca/vendor_cmds.c @@ -295,7 +295,7 @@ exit: return r; } -static struct nfc_vendor_cmd st21nfca_vendor_cmds[] = { +static const struct nfc_vendor_cmd st21nfca_vendor_cmds[] = { { .vendor_id = ST21NFCA_VENDOR_OUI, .subcmd = FACTORY_MODE, diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c index 2dc788c363fd..993818742570 100644 --- a/drivers/nfc/st95hf/core.c +++ b/drivers/nfc/st95hf/core.c @@ -1037,7 +1037,7 @@ static void st95hf_abort_cmd(struct nfc_digital_dev *ddev) { } -static struct nfc_digital_ops st95hf_nfc_digital_ops = { +static const struct nfc_digital_ops st95hf_nfc_digital_ops = { .in_configure_hw = st95hf_in_configure_hw, .in_send_cmd = st95hf_in_send_cmd, diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 33978022ae47..1aed44629aaa 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -1861,7 +1861,7 @@ static void trf7970a_abort_cmd(struct nfc_digital_dev *ddev) mutex_unlock(&trf->lock); } -static struct nfc_digital_ops trf7970a_nfc_ops = { +static const struct nfc_digital_ops trf7970a_nfc_ops = { .in_configure_hw = trf7970a_in_configure_hw, .in_send_cmd = trf7970a_send_cmd, .tg_configure_hw = trf7970a_tg_configure_hw, diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c index f73ee0bf3593..b914ab2c2109 100644 --- a/drivers/nfc/virtual_ncidev.c +++ b/drivers/nfc/virtual_ncidev.c @@ -65,7 +65,7 @@ static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb) return 0; } -static struct nci_ops virtual_nci_ops = { +static const struct nci_ops virtual_nci_ops = { .open = virtual_nci_open, .close = virtual_nci_close, .send = virtual_nci_send diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 9748165e08e9..acbe76a76fb2 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -504,28 +504,6 @@ void ccwgroup_driver_unregister(struct ccwgroup_driver *cdriver) EXPORT_SYMBOL(ccwgroup_driver_unregister); /** - * get_ccwgroupdev_by_busid() - obtain device from a bus id - * @gdrv: driver the device is owned by - * @bus_id: bus id of the device to be searched - * - * This function searches all devices owned by @gdrv for a device with a bus - * id matching @bus_id. - * Returns: - * If a match is found, its reference count of the found device is increased - * and it is returned; else %NULL is returned. - */ -struct ccwgroup_device *get_ccwgroupdev_by_busid(struct ccwgroup_driver *gdrv, - char *bus_id) -{ - struct device *dev; - - dev = driver_find_device_by_name(&gdrv->driver, bus_id); - - return dev ? to_ccwgroupdev(dev) : NULL; -} -EXPORT_SYMBOL_GPL(get_ccwgroupdev_by_busid); - -/** * ccwgroup_probe_ccwdev() - probe function for slave devices * @cdev: ccw device to be probed * diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index bf236d474538..cff91b4f1a76 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -88,15 +88,6 @@ config QETH_L3 To compile as a module choose M. The module name is qeth_l3. If unsure, choose Y. -config QETH_OSN - def_bool !HAVE_MARCH_Z14_FEATURES - prompt "qeth OSN device support" - depends on QETH - help - This enables the qeth driver to support devices in OSN mode. - This feature will be removed in 2021. - If unsure, choose N. - config QETH_OSX def_bool !HAVE_MARCH_Z15_FEATURES prompt "qeth OSX device support" diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index f4d554ea0c93..c17031519900 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -259,22 +259,10 @@ struct qeth_hdr_layer2 { __u8 reserved2[16]; } __attribute__ ((packed)); -struct qeth_hdr_osn { - __u8 id; - __u8 reserved; - __u16 seq_no; - __u16 reserved2; - __u16 control_flags; - __u16 pdu_length; - __u8 reserved3[18]; - __u32 ccid; -} __attribute__ ((packed)); - struct qeth_hdr { union { struct qeth_hdr_layer2 l2; struct qeth_hdr_layer3 l3; - struct qeth_hdr_osn osn; } hdr; } __attribute__ ((packed)); @@ -341,7 +329,6 @@ enum qeth_header_ids { QETH_HEADER_TYPE_LAYER3 = 0x01, QETH_HEADER_TYPE_LAYER2 = 0x02, QETH_HEADER_TYPE_L3_TSO = 0x03, - QETH_HEADER_TYPE_OSN = 0x04, QETH_HEADER_TYPE_L2_TSO = 0x06, QETH_HEADER_MASK_INVAL = 0x80, }; @@ -779,13 +766,7 @@ enum qeth_threads { QETH_RECOVER_THREAD = 1, }; -struct qeth_osn_info { - int (*assist_cb)(struct net_device *dev, void *data); - int (*data_cb)(struct sk_buff *skb); -}; - struct qeth_discipline { - const struct device_type *devtype; int (*setup) (struct ccwgroup_device *); void (*remove) (struct ccwgroup_device *); int (*set_online)(struct qeth_card *card, bool carrier_ok); @@ -865,7 +846,6 @@ struct qeth_card { /* QDIO buffer handling */ struct qeth_qdio_info qdio; int read_or_write_problem; - struct qeth_osn_info osn_info; const struct qeth_discipline *discipline; atomic_t force_alloc_skb; struct service_level qeth_service_level; @@ -1058,10 +1038,7 @@ int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb); extern const struct qeth_discipline qeth_l2_discipline; extern const struct qeth_discipline qeth_l3_discipline; extern const struct ethtool_ops qeth_ethtool_ops; -extern const struct ethtool_ops qeth_osn_ethtool_ops; extern const struct attribute_group *qeth_dev_groups[]; -extern const struct attribute_group *qeth_osn_dev_groups[]; -extern const struct device_type qeth_generic_devtype; const char *qeth_get_cardname_short(struct qeth_card *); int qeth_resize_buffer_pool(struct qeth_card *card, unsigned int count); @@ -1069,11 +1046,9 @@ int qeth_setup_discipline(struct qeth_card *card, enum qeth_discipline_id disc); void qeth_remove_discipline(struct qeth_card *card); /* exports for qeth discipline device drivers */ -extern struct kmem_cache *qeth_core_header_cache; extern struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS]; struct net_device *qeth_clone_netdev(struct net_device *orig); -struct qeth_card *qeth_get_card_by_busid(char *bus_id); void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads, int clear_start_mask); int qeth_threads_running(struct qeth_card *, unsigned long); @@ -1088,9 +1063,6 @@ struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct qeth_card *card, enum qeth_ipa_cmds cmd_code, enum qeth_prot_versions prot, unsigned int data_length); -struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel, - unsigned int length, unsigned int ccws, - long timeout); struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card, enum qeth_ipa_funcs ipa_func, u16 cmd_code, @@ -1099,18 +1071,12 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card, struct qeth_cmd_buffer *qeth_get_diag_cmd(struct qeth_card *card, enum qeth_diags_cmds sub_cmd, unsigned int data_length); -void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason); -void qeth_put_cmd(struct qeth_cmd_buffer *iob); int qeth_schedule_recovery(struct qeth_card *card); int qeth_poll(struct napi_struct *napi, int budget); void qeth_setadp_promisc_mode(struct qeth_card *card, bool enable); int qeth_setadpparms_change_macaddr(struct qeth_card *); void qeth_tx_timeout(struct net_device *, unsigned int txqueue); -void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, - u16 cmd_length, - bool (*match)(struct qeth_cmd_buffer *iob, - struct qeth_cmd_buffer *reply)); int qeth_query_switch_attributes(struct qeth_card *card, struct qeth_switch_info *sw_info); int qeth_query_card_info(struct qeth_card *card, @@ -1118,11 +1084,6 @@ int qeth_query_card_info(struct qeth_card *card, int qeth_setadpparms_set_access_ctrl(struct qeth_card *card, enum qeth_ipa_isolation_modes mode); -unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset); -int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, - struct sk_buff *skb, struct qeth_hdr *hdr, - unsigned int offset, unsigned int hd_len, - int elements_needed); int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); void qeth_dbf_longtext(debug_info_t *id, int level, char *text, ...); int qeth_configure_cq(struct qeth_card *, enum qeth_cq); @@ -1148,11 +1109,4 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, struct qeth_hdr *hdr, struct sk_buff *skb, __be16 proto, unsigned int data_len)); -/* exports for OSN */ -int qeth_osn_assist(struct net_device *, void *, int); -int qeth_osn_register(unsigned char *read_dev_no, struct net_device **, - int (*assist_cb)(struct net_device *, void *), - int (*data_cb)(struct sk_buff *)); -void qeth_osn_deregister(struct net_device *); - #endif /* __QETH_CORE_H__ */ diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 62f88ccbd03f..7f486212c6aa 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -57,8 +57,7 @@ struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS] = { }; EXPORT_SYMBOL_GPL(qeth_dbf); -struct kmem_cache *qeth_core_header_cache; -EXPORT_SYMBOL_GPL(qeth_core_header_cache); +static struct kmem_cache *qeth_core_header_cache; static struct kmem_cache *qeth_qdio_outbuf_cache; static struct device *qeth_core_root_dev; @@ -101,8 +100,6 @@ static const char *qeth_get_cardname(struct qeth_card *card) return " OSD Express"; case QETH_CARD_TYPE_IQD: return " HiperSockets"; - case QETH_CARD_TYPE_OSN: - return " OSN QDIO"; case QETH_CARD_TYPE_OSM: return " OSM QDIO"; case QETH_CARD_TYPE_OSX: @@ -157,8 +154,6 @@ const char *qeth_get_cardname_short(struct qeth_card *card) } case QETH_CARD_TYPE_IQD: return "HiperSockets"; - case QETH_CARD_TYPE_OSN: - return "OSN"; case QETH_CARD_TYPE_OSM: return "OSM_1000"; case QETH_CARD_TYPE_OSX: @@ -431,6 +426,13 @@ static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15, return n; } +static void qeth_put_cmd(struct qeth_cmd_buffer *iob) +{ + if (refcount_dec_and_test(&iob->ref_count)) { + kfree(iob->data); + kfree(iob); + } +} static void qeth_setup_ccw(struct ccw1 *ccw, u8 cmd_code, u8 flags, u32 len, void *data) { @@ -499,12 +501,11 @@ static void qeth_dequeue_cmd(struct qeth_card *card, spin_unlock_irq(&card->lock); } -void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason) +static void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason) { iob->rc = reason; complete(&iob->done); } -EXPORT_SYMBOL_GPL(qeth_notify_cmd); static void qeth_flush_local_addrs4(struct qeth_card *card) { @@ -781,10 +782,7 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card, QETH_CARD_TEXT(card, 5, "chkipad"); if (IS_IPA_REPLY(cmd)) { - if (cmd->hdr.command != IPA_CMD_SETCCID && - cmd->hdr.command != IPA_CMD_DELCCID && - cmd->hdr.command != IPA_CMD_MODCCID && - cmd->hdr.command != IPA_CMD_SET_DIAG_ASS) + if (cmd->hdr.command != IPA_CMD_SET_DIAG_ASS) qeth_issue_ipa_msg(cmd, cmd->hdr.return_code, card); return cmd; } @@ -819,8 +817,6 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card, if (card->discipline->control_event_handler(card, cmd)) return cmd; return NULL; - case IPA_CMD_MODCCID: - return cmd; case IPA_CMD_REGISTER_LOCAL_ADDR: if (cmd->hdr.prot_version == QETH_PROT_IPV4) qeth_add_local_addrs4(card, &cmd->data.local_addrs4); @@ -877,15 +873,6 @@ static int qeth_check_idx_response(struct qeth_card *card, return 0; } -void qeth_put_cmd(struct qeth_cmd_buffer *iob) -{ - if (refcount_dec_and_test(&iob->ref_count)) { - kfree(iob->data); - kfree(iob); - } -} -EXPORT_SYMBOL_GPL(qeth_put_cmd); - static void qeth_release_buffer_cb(struct qeth_card *card, struct qeth_cmd_buffer *iob, unsigned int data_length) @@ -899,9 +886,9 @@ static void qeth_cancel_cmd(struct qeth_cmd_buffer *iob, int rc) qeth_put_cmd(iob); } -struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel, - unsigned int length, unsigned int ccws, - long timeout) +static struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel, + unsigned int length, + unsigned int ccws, long timeout) { struct qeth_cmd_buffer *iob; @@ -927,7 +914,6 @@ struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel, iob->length = length; return iob; } -EXPORT_SYMBOL_GPL(qeth_alloc_cmd); static void qeth_issue_next_read_cb(struct qeth_card *card, struct qeth_cmd_buffer *iob, @@ -958,11 +944,6 @@ static void qeth_issue_next_read_cb(struct qeth_card *card, cmd = qeth_check_ipa_data(card, cmd); if (!cmd) goto out; - if (IS_OSN(card) && card->osn_info.assist_cb && - cmd->hdr.command != IPA_CMD_STARTLAN) { - card->osn_info.assist_cb(card->dev, cmd); - goto out; - } } /* match against pending cmd requests */ @@ -1835,7 +1816,7 @@ static enum qeth_discipline_id qeth_enforce_discipline(struct qeth_card *card) { enum qeth_discipline_id disc = QETH_DISCIPLINE_UNDETERMINED; - if (IS_OSM(card) || IS_OSN(card)) + if (IS_OSM(card)) disc = QETH_DISCIPLINE_LAYER2; else if (IS_VM_NIC(card)) disc = IS_IQD(card) ? QETH_DISCIPLINE_LAYER3 : @@ -1885,7 +1866,6 @@ static void qeth_idx_init(struct qeth_card *card) card->info.func_level = QETH_IDX_FUNC_LEVEL_IQD; break; case QETH_CARD_TYPE_OSD: - case QETH_CARD_TYPE_OSN: card->info.func_level = QETH_IDX_FUNC_LEVEL_OSD; break; default: @@ -2442,9 +2422,7 @@ static int qeth_ulp_enable_cb(struct qeth_card *card, struct qeth_reply *reply, static u8 qeth_mpc_select_prot_type(struct qeth_card *card) { - if (IS_OSN(card)) - return QETH_PROT_OSN2; - return IS_LAYER2(card) ? QETH_PROT_LAYER2 : QETH_PROT_TCPIP; + return IS_LAYER2(card) ? QETH_MPC_PROT_L2 : QETH_MPC_PROT_L3; } static int qeth_ulp_enable(struct qeth_card *card) @@ -3000,10 +2978,8 @@ static void qeth_ipa_finalize_cmd(struct qeth_card *card, __ipa_cmd(iob)->hdr.seqno = card->seqno.ipa++; } -void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, - u16 cmd_length, - bool (*match)(struct qeth_cmd_buffer *iob, - struct qeth_cmd_buffer *reply)) +static void qeth_prepare_ipa_cmd(struct qeth_card *card, + struct qeth_cmd_buffer *iob, u16 cmd_length) { u8 prot_type = qeth_mpc_select_prot_type(card); u16 total_length = iob->length; @@ -3011,7 +2987,6 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, qeth_setup_ccw(__ccw_from_cmd(iob), CCW_CMD_WRITE, 0, total_length, iob->data); iob->finalize = qeth_ipa_finalize_cmd; - iob->match = match; memcpy(iob->data, IPA_PDU_HEADER, IPA_PDU_HEADER_SIZE); memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &total_length, 2); @@ -3022,7 +2997,6 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, &card->token.ulp_connection_r, QETH_MPC_TOKEN_LENGTH); memcpy(QETH_IPA_PDU_LEN_PDU3(iob->data), &cmd_length, 2); } -EXPORT_SYMBOL_GPL(qeth_prepare_ipa_cmd); static bool qeth_ipa_match_reply(struct qeth_cmd_buffer *iob, struct qeth_cmd_buffer *reply) @@ -3046,7 +3020,8 @@ struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct qeth_card *card, if (!iob) return NULL; - qeth_prepare_ipa_cmd(card, iob, data_length, qeth_ipa_match_reply); + qeth_prepare_ipa_cmd(card, iob, data_length); + iob->match = qeth_ipa_match_reply; hdr = &__ipa_cmd(iob)->hdr; hdr->command = cmd_code; @@ -3894,7 +3869,8 @@ static int qeth_get_elements_for_frags(struct sk_buff *skb) * Returns the number of pages, and thus QDIO buffer elements, needed to map the * skb's data (both its linear part and paged fragments). */ -unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset) +static unsigned int qeth_count_elements(struct sk_buff *skb, + unsigned int data_offset) { unsigned int elements = qeth_get_elements_for_frags(skb); addr_t end = (addr_t)skb->data + skb_headlen(skb); @@ -3904,7 +3880,6 @@ unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset) elements += qeth_get_elements_for_range(start, end); return elements; } -EXPORT_SYMBOL_GPL(qeth_count_elements); #define QETH_HDR_CACHE_OBJ_SIZE (sizeof(struct qeth_hdr_tso) + \ MAX_TCP_HEADER) @@ -4192,10 +4167,11 @@ static int __qeth_xmit(struct qeth_card *card, struct qeth_qdio_out_q *queue, return 0; } -int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, - struct sk_buff *skb, struct qeth_hdr *hdr, - unsigned int offset, unsigned int hd_len, - int elements_needed) +static int qeth_do_send_packet(struct qeth_card *card, + struct qeth_qdio_out_q *queue, + struct sk_buff *skb, struct qeth_hdr *hdr, + unsigned int offset, unsigned int hd_len, + unsigned int elements_needed) { unsigned int start_index = queue->next_buf_to_fill; struct qeth_qdio_out_buffer *buffer; @@ -4275,7 +4251,6 @@ out: netif_tx_start_queue(txq); return rc; } -EXPORT_SYMBOL_GPL(qeth_do_send_packet); static void qeth_fill_tso_ext(struct qeth_hdr_tso *hdr, unsigned int payload_len, struct sk_buff *skb, @@ -4554,7 +4529,6 @@ static int qeth_mdio_read(struct net_device *dev, int phy_id, int regnum) case MII_BMCR: /* Basic mode control register */ rc = BMCR_FULLDPLX; if ((card->info.link_type != QETH_LINK_TYPE_GBIT_ETH) && - (card->info.link_type != QETH_LINK_TYPE_OSN) && (card->info.link_type != QETH_LINK_TYPE_10GBIT_ETH) && (card->info.link_type != QETH_LINK_TYPE_25GBIT_ETH)) rc |= BMCR_SPEED100; @@ -5266,10 +5240,6 @@ static struct ccw_device_id qeth_ids[] = { .driver_info = QETH_CARD_TYPE_OSD}, {CCW_DEVICE_DEVTYPE(0x1731, 0x05, 0x1732, 0x05), .driver_info = QETH_CARD_TYPE_IQD}, -#ifdef CONFIG_QETH_OSN - {CCW_DEVICE_DEVTYPE(0x1731, 0x06, 0x1732, 0x06), - .driver_info = QETH_CARD_TYPE_OSN}, -#endif {CCW_DEVICE_DEVTYPE(0x1731, 0x02, 0x1732, 0x03), .driver_info = QETH_CARD_TYPE_OSM}, #ifdef CONFIG_QETH_OSX @@ -5628,14 +5598,6 @@ static void qeth_receive_skb(struct qeth_card *card, struct sk_buff *skb, bool is_cso; switch (hdr->hdr.l2.id) { - case QETH_HEADER_TYPE_OSN: - skb_push(skb, sizeof(*hdr)); - skb_copy_to_linear_data(skb, hdr, sizeof(*hdr)); - QETH_CARD_STAT_ADD(card, rx_bytes, skb->len); - QETH_CARD_STAT_INC(card, rx_packets); - - card->osn_info.data_cb(skb); - return; #if IS_ENABLED(CONFIG_QETH_L3) case QETH_HEADER_TYPE_LAYER3: qeth_l3_rebuild_skb(card, skb, hdr); @@ -5750,16 +5712,6 @@ next_packet: linear_len = sizeof(struct iphdr); headroom = ETH_HLEN; break; - case QETH_HEADER_TYPE_OSN: - skb_len = hdr->hdr.osn.pdu_length; - if (!IS_OSN(card)) { - QETH_CARD_STAT_INC(card, rx_dropped_notsupp); - goto walk_packet; - } - - linear_len = skb_len; - headroom = sizeof(struct qeth_hdr); - break; default: if (hdr->hdr.l2.id & QETH_HEADER_MASK_INVAL) QETH_CARD_STAT_INC(card, rx_frame_errors); @@ -5777,8 +5729,7 @@ next_packet: use_rx_sg = (card->options.cq == QETH_CQ_ENABLED) || (skb_len > READ_ONCE(priv->rx_copybreak) && - !atomic_read(&card->force_alloc_skb) && - !IS_OSN(card)); + !atomic_read(&card->force_alloc_skb)); if (use_rx_sg) { /* QETH_CQ_ENABLED only: */ @@ -6335,14 +6286,9 @@ void qeth_remove_discipline(struct qeth_card *card) card->discipline = NULL; } -const struct device_type qeth_generic_devtype = { +static const struct device_type qeth_generic_devtype = { .name = "qeth_generic", }; -EXPORT_SYMBOL_GPL(qeth_generic_devtype); - -static const struct device_type qeth_osn_devtype = { - .name = "qeth_osn", -}; #define DBF_NAME_LEN 20 @@ -6425,10 +6371,6 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card) case QETH_CARD_TYPE_OSM: dev = alloc_etherdev(sizeof(*priv)); break; - case QETH_CARD_TYPE_OSN: - dev = alloc_netdev(sizeof(*priv), "osn%d", NET_NAME_UNKNOWN, - ether_setup); - break; default: dev = alloc_etherdev_mqs(sizeof(*priv), QETH_MAX_OUT_QUEUES, 1); } @@ -6442,23 +6384,19 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card) dev->ml_priv = card; dev->watchdog_timeo = QETH_TX_TIMEOUT; - dev->min_mtu = IS_OSN(card) ? 64 : 576; + dev->min_mtu = 576; /* initialized when device first goes online: */ dev->max_mtu = 0; dev->mtu = 0; SET_NETDEV_DEV(dev, &card->gdev->dev); netif_carrier_off(dev); - if (IS_OSN(card)) { - dev->ethtool_ops = &qeth_osn_ethtool_ops; - } else { - dev->ethtool_ops = &qeth_ethtool_ops; - dev->priv_flags &= ~IFF_TX_SKB_SHARING; - dev->hw_features |= NETIF_F_SG; - dev->vlan_features |= NETIF_F_SG; - if (IS_IQD(card)) - dev->features |= NETIF_F_SG; - } + dev->ethtool_ops = &qeth_ethtool_ops; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->hw_features |= NETIF_F_SG; + dev->vlan_features |= NETIF_F_SG; + if (IS_IQD(card)) + dev->features |= NETIF_F_SG; return dev; } @@ -6521,10 +6459,7 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) if (rc) goto err_chp_desc; - if (IS_OSN(card)) - gdev->dev.groups = qeth_osn_dev_groups; - else - gdev->dev.groups = qeth_dev_groups; + gdev->dev.groups = qeth_dev_groups; enforced_disc = qeth_enforce_discipline(card); switch (enforced_disc) { @@ -6538,8 +6473,6 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) if (rc) goto err_setup_disc; - gdev->dev.type = IS_OSN(card) ? &qeth_osn_devtype : - card->discipline->devtype; break; } @@ -6657,21 +6590,6 @@ static struct ccwgroup_driver qeth_core_ccwgroup_driver = { .shutdown = qeth_core_shutdown, }; -struct qeth_card *qeth_get_card_by_busid(char *bus_id) -{ - struct ccwgroup_device *gdev; - struct qeth_card *card; - - gdev = get_ccwgroupdev_by_busid(&qeth_core_ccwgroup_driver, bus_id); - if (!gdev) - return NULL; - - card = dev_get_drvdata(&gdev->dev); - put_device(&gdev->dev); - return card; -} -EXPORT_SYMBOL_GPL(qeth_get_card_by_busid); - int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct qeth_card *card = dev->ml_priv; diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c index 68c2588b9dcc..d9266f7d8187 100644 --- a/drivers/s390/net/qeth_core_mpc.c +++ b/drivers/s390/net/qeth_core_mpc.c @@ -232,9 +232,6 @@ static const struct ipa_cmd_names qeth_ipa_cmd_names[] = { {IPA_CMD_DELVLAN, "delvlan"}, {IPA_CMD_VNICC, "vnic_characteristics"}, {IPA_CMD_SETBRIDGEPORT_OSA, "set_bridge_port(osa)"}, - {IPA_CMD_SETCCID, "setccid"}, - {IPA_CMD_DELCCID, "delccid"}, - {IPA_CMD_MODCCID, "modccid"}, {IPA_CMD_SETIP, "setip"}, {IPA_CMD_QIPASSIST, "qipassist"}, {IPA_CMD_SETASSPARMS, "setassparms"}, diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index e4bde7daf083..6257f00786b3 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -34,8 +34,6 @@ extern const unsigned char IPA_PDU_HEADER[]; /*****************************************************************************/ #define IPA_CMD_INITIATOR_HOST 0x00 #define IPA_CMD_INITIATOR_OSA 0x01 -#define IPA_CMD_INITIATOR_HOST_REPLY 0x80 -#define IPA_CMD_INITIATOR_OSA_REPLY 0x81 #define IPA_CMD_PRIM_VERSION_NO 0x01 struct qeth_ipa_caps { @@ -66,7 +64,6 @@ static inline bool qeth_ipa_caps_enabled(struct qeth_ipa_caps *caps, u32 mask) enum qeth_card_types { QETH_CARD_TYPE_OSD = 1, QETH_CARD_TYPE_IQD = 5, - QETH_CARD_TYPE_OSN = 6, QETH_CARD_TYPE_OSM = 3, QETH_CARD_TYPE_OSX = 2, }; @@ -75,12 +72,6 @@ enum qeth_card_types { #define IS_OSD(card) ((card)->info.type == QETH_CARD_TYPE_OSD) #define IS_OSM(card) ((card)->info.type == QETH_CARD_TYPE_OSM) -#ifdef CONFIG_QETH_OSN -#define IS_OSN(card) ((card)->info.type == QETH_CARD_TYPE_OSN) -#else -#define IS_OSN(card) false -#endif - #ifdef CONFIG_QETH_OSX #define IS_OSX(card) ((card)->info.type == QETH_CARD_TYPE_OSX) #else @@ -95,7 +86,6 @@ enum qeth_link_types { QETH_LINK_TYPE_FAST_ETH = 0x01, QETH_LINK_TYPE_HSTR = 0x02, QETH_LINK_TYPE_GBIT_ETH = 0x03, - QETH_LINK_TYPE_OSN = 0x04, QETH_LINK_TYPE_10GBIT_ETH = 0x10, QETH_LINK_TYPE_25GBIT_ETH = 0x12, QETH_LINK_TYPE_LANE_ETH100 = 0x81, @@ -126,9 +116,6 @@ enum qeth_ipa_cmds { IPA_CMD_DELVLAN = 0x26, IPA_CMD_VNICC = 0x2a, IPA_CMD_SETBRIDGEPORT_OSA = 0x2b, - IPA_CMD_SETCCID = 0x41, - IPA_CMD_DELCCID = 0x42, - IPA_CMD_MODCCID = 0x43, IPA_CMD_SETIP = 0xb1, IPA_CMD_QIPASSIST = 0xb2, IPA_CMD_SETASSPARMS = 0xb3, @@ -879,8 +866,7 @@ extern const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc); extern const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd); /* Helper functions */ -#define IS_IPA_REPLY(cmd) ((cmd->hdr.initiator == IPA_CMD_INITIATOR_HOST) || \ - (cmd->hdr.initiator == IPA_CMD_INITIATOR_OSA_REPLY)) +#define IS_IPA_REPLY(cmd) ((cmd)->hdr.initiator == IPA_CMD_INITIATOR_HOST) /*****************************************************************************/ /* END OF IP Assist related definitions */ @@ -919,10 +905,9 @@ extern const unsigned char ULP_ENABLE[]; (PDU_ENCAPSULATION(buffer) + 0x17) #define QETH_ULP_ENABLE_RESP_LINK_TYPE(buffer) \ (PDU_ENCAPSULATION(buffer) + 0x2b) -/* Layer 2 definitions */ -#define QETH_PROT_LAYER2 0x08 -#define QETH_PROT_TCPIP 0x03 -#define QETH_PROT_OSN2 0x0a + +#define QETH_MPC_PROT_L2 0x08 +#define QETH_MPC_PROT_L3 0x03 #define QETH_ULP_ENABLE_PROT_TYPE(buffer) (buffer + 0x50) #define QETH_IPA_CMD_PROT_TYPE(buffer) (buffer + 0x19) diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index 5815114da468..406be169173c 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -671,11 +671,6 @@ static const struct attribute_group qeth_dev_group = { .attrs = qeth_dev_attrs, }; -const struct attribute_group *qeth_osn_dev_groups[] = { - &qeth_dev_group, - NULL, -}; - const struct attribute_group *qeth_dev_groups[] = { &qeth_dev_group, &qeth_dev_extended_group, diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c index 2c4cb300a8fc..3937986f159a 100644 --- a/drivers/s390/net/qeth_ethtool.c +++ b/drivers/s390/net/qeth_ethtool.c @@ -469,10 +469,3 @@ const struct ethtool_ops qeth_ethtool_ops = { .set_per_queue_coalesce = qeth_set_per_queue_coalesce, .get_link_ksettings = qeth_get_link_ksettings, }; - -const struct ethtool_ops qeth_osn_ethtool_ops = { - .get_strings = qeth_get_strings, - .get_ethtool_stats = qeth_get_ethtool_stats, - .get_sset_count = qeth_get_sset_count, - .get_drvinfo = qeth_get_drvinfo, -}; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2abf86c104d5..7fe0f1aea3cb 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -309,17 +309,16 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card) /* fall back to alternative mechanism: */ } - if (!IS_OSN(card)) { - rc = qeth_setadpparms_change_macaddr(card); - if (!rc) - goto out; - QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n", - CARD_DEVID(card), rc); - QETH_CARD_TEXT_(card, 2, "1err%04x", rc); - /* fall back once more: */ - } + rc = qeth_setadpparms_change_macaddr(card); + if (!rc) + goto out; + QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n", + CARD_DEVID(card), rc); + QETH_CARD_TEXT_(card, 2, "1err%04x", rc); - /* some devices don't support a custom MAC address: */ + /* Fall back once more, but some devices don't support a custom MAC + * address: + */ if (IS_OSM(card) || IS_OSX(card)) return (rc) ? rc : -EADDRNOTAVAIL; eth_hw_addr_random(card->dev); @@ -334,7 +333,7 @@ static void qeth_l2_register_dev_addr(struct qeth_card *card) if (!is_valid_ether_addr(card->dev->dev_addr)) qeth_l2_request_initial_mac(card); - if (!IS_OSN(card) && !qeth_l2_send_setmac(card, card->dev->dev_addr)) + if (!qeth_l2_send_setmac(card, card->dev->dev_addr)) card->info.dev_addr_is_registered = 1; else card->info.dev_addr_is_registered = 0; @@ -496,44 +495,6 @@ static void qeth_l2_rx_mode_work(struct work_struct *work) qeth_l2_set_promisc_mode(card); } -static int qeth_l2_xmit_osn(struct qeth_card *card, struct sk_buff *skb, - struct qeth_qdio_out_q *queue) -{ - gfp_t gfp = GFP_ATOMIC | (skb_pfmemalloc(skb) ? __GFP_MEMALLOC : 0); - struct qeth_hdr *hdr = (struct qeth_hdr *)skb->data; - addr_t end = (addr_t)(skb->data + sizeof(*hdr)); - addr_t start = (addr_t)skb->data; - unsigned int elements = 0; - unsigned int hd_len = 0; - int rc; - - if (skb->protocol == htons(ETH_P_IPV6)) - return -EPROTONOSUPPORT; - - if (qeth_get_elements_for_range(start, end) > 1) { - /* Misaligned HW header, move it to its own buffer element. */ - hdr = kmem_cache_alloc(qeth_core_header_cache, gfp); - if (!hdr) - return -ENOMEM; - hd_len = sizeof(*hdr); - skb_copy_from_linear_data(skb, (char *)hdr, hd_len); - elements++; - } - - elements += qeth_count_elements(skb, hd_len); - if (elements > queue->max_elements) { - rc = -E2BIG; - goto out; - } - - rc = qeth_do_send_packet(card, queue, skb, hdr, hd_len, hd_len, - elements); -out: - if (rc && hd_len) - kmem_cache_free(qeth_core_header_cache, hdr); - return rc; -} - static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -548,12 +509,8 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, txq = qeth_iqd_translate_txq(dev, txq); queue = card->qdio.out_qs[txq]; - if (IS_OSN(card)) - rc = qeth_l2_xmit_osn(card, skb, queue); - else - rc = qeth_xmit(card, skb, queue, vlan_get_protocol(skb), - qeth_l2_fill_header); - + rc = qeth_xmit(card, skb, queue, vlan_get_protocol(skb), + qeth_l2_fill_header); if (!rc) return NETDEV_TX_OK; @@ -890,23 +847,8 @@ static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_bridge_setlink = qeth_l2_bridge_setlink, }; -static const struct net_device_ops qeth_osn_netdev_ops = { - .ndo_open = qeth_open, - .ndo_stop = qeth_stop, - .ndo_get_stats64 = qeth_get_stats64, - .ndo_start_xmit = qeth_l2_hard_start_xmit, - .ndo_validate_addr = eth_validate_addr, - .ndo_tx_timeout = qeth_tx_timeout, -}; - static int qeth_l2_setup_netdev(struct qeth_card *card) { - if (IS_OSN(card)) { - card->dev->netdev_ops = &qeth_osn_netdev_ops; - card->dev->flags |= IFF_NOARP; - goto add_napi; - } - card->dev->needed_headroom = sizeof(struct qeth_hdr); card->dev->netdev_ops = &qeth_l2_netdev_ops; card->dev->priv_flags |= IFF_UNICAST_FLT; @@ -952,7 +894,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) PAGE_SIZE * (QDIO_MAX_ELEMENTS_PER_BUFFER - 1)); } -add_napi: netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); return register_netdev(card->dev); } @@ -1044,84 +985,6 @@ static void qeth_l2_enable_brport_features(struct qeth_card *card) } } -#ifdef CONFIG_QETH_OSN -static void qeth_osn_assist_cb(struct qeth_card *card, - struct qeth_cmd_buffer *iob, - unsigned int data_length) -{ - qeth_notify_cmd(iob, 0); - qeth_put_cmd(iob); -} - -int qeth_osn_assist(struct net_device *dev, void *data, int data_len) -{ - struct qeth_cmd_buffer *iob; - struct qeth_card *card; - - if (data_len < 0) - return -EINVAL; - if (!dev) - return -ENODEV; - card = dev->ml_priv; - if (!card) - return -ENODEV; - QETH_CARD_TEXT(card, 2, "osnsdmc"); - if (!qeth_card_hw_is_reachable(card)) - return -ENODEV; - - iob = qeth_alloc_cmd(&card->write, IPA_PDU_HEADER_SIZE + data_len, 1, - QETH_IPA_TIMEOUT); - if (!iob) - return -ENOMEM; - - qeth_prepare_ipa_cmd(card, iob, (u16) data_len, NULL); - - memcpy(__ipa_cmd(iob), data, data_len); - iob->callback = qeth_osn_assist_cb; - return qeth_send_ipa_cmd(card, iob, NULL, NULL); -} -EXPORT_SYMBOL(qeth_osn_assist); - -int qeth_osn_register(unsigned char *read_dev_no, struct net_device **dev, - int (*assist_cb)(struct net_device *, void *), - int (*data_cb)(struct sk_buff *)) -{ - struct qeth_card *card; - char bus_id[16]; - u16 devno; - - memcpy(&devno, read_dev_no, 2); - sprintf(bus_id, "0.0.%04x", devno); - card = qeth_get_card_by_busid(bus_id); - if (!card || !IS_OSN(card)) - return -ENODEV; - *dev = card->dev; - - QETH_CARD_TEXT(card, 2, "osnreg"); - if ((assist_cb == NULL) || (data_cb == NULL)) - return -EINVAL; - card->osn_info.assist_cb = assist_cb; - card->osn_info.data_cb = data_cb; - return 0; -} -EXPORT_SYMBOL(qeth_osn_register); - -void qeth_osn_deregister(struct net_device *dev) -{ - struct qeth_card *card; - - if (!dev) - return; - card = dev->ml_priv; - if (!card) - return; - QETH_CARD_TEXT(card, 2, "osndereg"); - card->osn_info.assist_cb = NULL; - card->osn_info.data_cb = NULL; -} -EXPORT_SYMBOL(qeth_osn_deregister); -#endif - /* SETBRIDGEPORT support, async notifications */ enum qeth_an_event_type {anev_reg_unreg, anev_abort, anev_reset}; @@ -2190,16 +2053,15 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev) struct qeth_card *card = dev_get_drvdata(&gdev->dev); int rc; - if (IS_OSN(card)) - dev_notice(&gdev->dev, "OSN support will be dropped in 2021\n"); - qeth_l2_vnicc_set_defaults(card); mutex_init(&card->sbp_lock); - if (gdev->dev.type == &qeth_generic_devtype) { + if (gdev->dev.type) { rc = device_add_groups(&gdev->dev, qeth_l2_attr_groups); if (rc) return rc; + } else { + gdev->dev.type = &qeth_l2_devtype; } INIT_WORK(&card->rx_mode_work, qeth_l2_rx_mode_work); @@ -2210,8 +2072,9 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); - if (gdev->dev.type == &qeth_generic_devtype) + if (gdev->dev.type != &qeth_l2_devtype) device_remove_groups(&gdev->dev, qeth_l2_attr_groups); + qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); @@ -2331,7 +2194,6 @@ static int qeth_l2_control_event(struct qeth_card *card, } const struct qeth_discipline qeth_l2_discipline = { - .devtype = &qeth_l2_devtype, .setup = qeth_l2_probe_device, .remove = qeth_l2_remove_device, .set_online = qeth_l2_set_online, diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index f0d6f205c53c..7cc59f4f046c 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1940,12 +1940,14 @@ static int qeth_l3_probe_device(struct ccwgroup_device *gdev) if (!card->cmd_wq) return -ENOMEM; - if (gdev->dev.type == &qeth_generic_devtype) { + if (gdev->dev.type) { rc = device_add_groups(&gdev->dev, qeth_l3_attr_groups); if (rc) { destroy_workqueue(card->cmd_wq); return rc; } + } else { + gdev->dev.type = &qeth_l3_devtype; } INIT_WORK(&card->rx_mode_work, qeth_l3_rx_mode_work); @@ -1956,7 +1958,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) { struct qeth_card *card = dev_get_drvdata(&cgdev->dev); - if (cgdev->dev.type == &qeth_generic_devtype) + if (cgdev->dev.type != &qeth_l3_devtype) device_remove_groups(&cgdev->dev, qeth_l3_attr_groups); qeth_set_allowed_threads(card, 0, 1); @@ -2065,7 +2067,6 @@ static int qeth_l3_control_event(struct qeth_card *card, } const struct qeth_discipline qeth_l3_discipline = { - .devtype = &qeth_l3_devtype, .setup = qeth_l3_probe_device, .remove = qeth_l3_remove_device, .set_online = qeth_l3_set_online, diff --git a/include/asm-generic/compat.h b/include/asm-generic/compat.h index 30f7b18a36f9..d46c0201cc34 100644 --- a/include/asm-generic/compat.h +++ b/include/asm-generic/compat.h @@ -20,7 +20,18 @@ typedef u16 compat_ushort_t; typedef u32 compat_uint_t; typedef u32 compat_ulong_t; typedef u32 compat_uptr_t; +typedef u32 compat_caddr_t; typedef u32 compat_aio_context_t; +typedef u32 compat_old_sigset_t; + +#ifndef __compat_uid32_t +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; +#endif + +#ifndef compat_mode_t +typedef u32 compat_mode_t; +#endif #ifdef CONFIG_COMPAT_FOR_U64_ALIGNMENT typedef s64 __attribute__((aligned(4))) compat_s64; @@ -30,4 +41,10 @@ typedef s64 compat_s64; typedef u64 compat_u64; #endif +#ifndef _COMPAT_NSIG +typedef u32 compat_sigset_word; +#define _COMPAT_NSIG _NSIG +#define _COMPAT_NSIG_BPW 32 +#endif + #endif diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 26bf15e6cd35..5e62e2383b7f 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -4,6 +4,7 @@ #include <asm/types.h> #include <linux/bits.h> +#include <linux/typecheck.h> #include <uapi/linux/kernel.h> @@ -253,6 +254,55 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, __clear_bit(nr, addr); } +/** + * __ptr_set_bit - Set bit in a pointer's value + * @nr: the bit to set + * @addr: the address of the pointer variable + * + * Example: + * void *p = foo(); + * __ptr_set_bit(bit, &p); + */ +#define __ptr_set_bit(nr, addr) \ + ({ \ + typecheck_pointer(*(addr)); \ + __set_bit(nr, (unsigned long *)(addr)); \ + }) + +/** + * __ptr_clear_bit - Clear bit in a pointer's value + * @nr: the bit to clear + * @addr: the address of the pointer variable + * + * Example: + * void *p = foo(); + * __ptr_clear_bit(bit, &p); + */ +#define __ptr_clear_bit(nr, addr) \ + ({ \ + typecheck_pointer(*(addr)); \ + __clear_bit(nr, (unsigned long *)(addr)); \ + }) + +/** + * __ptr_test_bit - Test bit in a pointer's value + * @nr: the bit to test + * @addr: the address of the pointer variable + * + * Example: + * void *p = foo(); + * if (__ptr_test_bit(bit, &p)) { + * ... + * } else { + * ... + * } + */ +#define __ptr_test_bit(nr, addr) \ + ({ \ + typecheck_pointer(*(addr)); \ + test_bit(nr, (unsigned long *)(addr)); \ + }) + #ifdef __KERNEL__ #ifndef set_mask_bits diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e8e2b0393ca9..0edff8f5177e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -168,6 +168,7 @@ struct bpf_map { u32 max_entries; u32 map_flags; int spin_lock_off; /* >=0 valid offset, <0 error */ + int timer_off; /* >=0 valid offset, <0 error */ u32 id; int numa_node; u32 btf_key_type_id; @@ -197,30 +198,53 @@ static inline bool map_value_has_spin_lock(const struct bpf_map *map) return map->spin_lock_off >= 0; } -static inline void check_and_init_map_lock(struct bpf_map *map, void *dst) +static inline bool map_value_has_timer(const struct bpf_map *map) { - if (likely(!map_value_has_spin_lock(map))) - return; - *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = - (struct bpf_spin_lock){}; + return map->timer_off >= 0; } -/* copy everything but bpf_spin_lock */ +static inline void check_and_init_map_value(struct bpf_map *map, void *dst) +{ + if (unlikely(map_value_has_spin_lock(map))) + *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = + (struct bpf_spin_lock){}; + if (unlikely(map_value_has_timer(map))) + *(struct bpf_timer *)(dst + map->timer_off) = + (struct bpf_timer){}; +} + +/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) { + u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0; + if (unlikely(map_value_has_spin_lock(map))) { - u32 off = map->spin_lock_off; + s_off = map->spin_lock_off; + s_sz = sizeof(struct bpf_spin_lock); + } else if (unlikely(map_value_has_timer(map))) { + t_off = map->timer_off; + t_sz = sizeof(struct bpf_timer); + } - memcpy(dst, src, off); - memcpy(dst + off + sizeof(struct bpf_spin_lock), - src + off + sizeof(struct bpf_spin_lock), - map->value_size - off - sizeof(struct bpf_spin_lock)); + if (unlikely(s_sz || t_sz)) { + if (s_off < t_off || !s_sz) { + swap(s_off, t_off); + swap(s_sz, t_sz); + } + memcpy(dst, src, t_off); + memcpy(dst + t_off + t_sz, + src + t_off + t_sz, + s_off - t_off - t_sz); + memcpy(dst + s_off + s_sz, + src + s_off + s_sz, + map->value_size - s_off - s_sz); } else { memcpy(dst, src, map->value_size); } } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); +void bpf_timer_cancel_and_free(void *timer); int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; @@ -314,6 +338,7 @@ enum bpf_arg_type { ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ + ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ __BPF_ARG_TYPE_MAX, }; @@ -554,6 +579,11 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SKIP_FRAME BIT(2) +/* Store IP address of the caller on the trampoline stack, + * so it's available for trampoline's programs. + */ +#define BPF_TRAMP_F_IP_ARG BIT(3) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 */ @@ -1509,12 +1539,12 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, struct bpf_prog *xdp_prog, struct bpf_map *map, bool exclude_ingress); -bool dev_map_can_have_prog(struct bpf_map *map); void __cpu_map_flush(void); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); -bool cpu_map_prog_allowed(struct bpf_map *map); +int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, + struct sk_buff *skb); /* Return map's numa specified by userspace */ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) @@ -1711,6 +1741,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, return 0; } +static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, + struct sk_buff *skb) +{ + return -EOPNOTSUPP; +} + static inline bool cpu_map_prog_allowed(struct bpf_map *map) { return false; @@ -1852,6 +1888,12 @@ void bpf_map_offload_map_free(struct bpf_map *map); int bpf_prog_test_run_syscall(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); + +int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); +int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); +void sock_map_unhash(struct sock *sk); +void sock_map_close(struct sock *sk, long timeout); #else static inline int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) @@ -1884,24 +1926,6 @@ static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog, { return -ENOTSUPP; } -#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ - -#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) -int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); -int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); -int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); -void sock_map_unhash(struct sock *sk); -void sock_map_close(struct sock *sk, long timeout); - -void bpf_sk_reuseport_detach(struct sock *sk); -int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, - void *value); -int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags); -#else -static inline void bpf_sk_reuseport_detach(struct sock *sk) -{ -} #ifdef CONFIG_BPF_SYSCALL static inline int sock_map_get_from_fd(const union bpf_attr *attr, @@ -1921,7 +1945,21 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void { return -EOPNOTSUPP; } +#endif /* CONFIG_BPF_SYSCALL */ +#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ + +#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) +void bpf_sk_reuseport_detach(struct sock *sk); +int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, + void *value); +int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags); +#else +static inline void bpf_sk_reuseport_detach(struct sock *sk) +{ +} +#ifdef CONFIG_BPF_SYSCALL static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, void *value) { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7ba7e800d472..2338cb113d62 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -53,7 +53,14 @@ struct bpf_reg_state { /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | * PTR_TO_MAP_VALUE_OR_NULL */ - struct bpf_map *map_ptr; + struct { + struct bpf_map *map_ptr; + /* To distinguish map lookups from outer map + * the map_uid is non-zero for registers + * pointing to inner maps. + */ + u32 map_uid; + }; /* for PTR_TO_BTF_ID */ struct { @@ -201,12 +208,19 @@ struct bpf_func_state { * zero == main subprog */ u32 subprogno; + /* Every bpf_timer_start will increment async_entry_cnt. + * It's used to distinguish: + * void foo(void) { for(;;); } + * void foo(void) { bpf_timer_set_callback(,foo); } + */ + u32 async_entry_cnt; + bool in_callback_fn; + bool in_async_callback_fn; /* The following fields should be last. See copy_func_state() */ int acquired_refs; struct bpf_reference_state *refs; int allocated_stack; - bool in_callback_fn; struct bpf_stack_state *stack; }; @@ -392,6 +406,7 @@ struct bpf_subprog_info { bool has_tail_call; bool tail_call_reachable; bool has_ld_abs; + bool is_async_cb; }; /* single container for all structs diff --git a/include/linux/btf.h b/include/linux/btf.h index 94a0c976c90f..214fde93214b 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -99,6 +99,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); +int btf_find_timer(const struct btf *btf, const struct btf_type *t); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index ae7a3411167c..9de6e9053e34 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -37,7 +37,7 @@ * quanta, from when the bit is sent on the TX pin to when it is * received on the RX pin of the transmitter. Possible options: * - * O: automatic mode. The controller dynamically measure @tdcv + * 0: automatic mode. The controller dynamically measures @tdcv * for each transmitted CAN FD frame. * * Other values: manual mode. Use the fixed provided value. @@ -45,7 +45,7 @@ * @tdco: Transmitter Delay Compensation Offset. Offset value, in time * quanta, defining the distance between the start of the bit * reception on the RX pin of the transceiver and the SSP - * position such as SSP = @tdcv + @tdco. + * position such that SSP = @tdcv + @tdco. * * If @tdco is zero, then TDC is disabled and both @tdcv and * @tdcf should be ignored. diff --git a/include/linux/can/platform/flexcan.h b/include/linux/can/platform/flexcan.h new file mode 100644 index 000000000000..1b536fb999de --- /dev/null +++ b/include/linux/can/platform/flexcan.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Angelo Dureghello <angelo@kernel-space.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _CAN_PLATFORM_FLEXCAN_H +#define _CAN_PLATFORM_FLEXCAN_H + +struct flexcan_platform_data { + u32 clock_frequency; + u8 clk_src; +}; + +#endif /* _CAN_PLATFORM_FLEXCAN_H */ diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index 40882df7105e..c11477620403 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -20,6 +20,7 @@ struct can_rx_offload { bool drop); struct sk_buff_head skb_queue; + struct sk_buff_head skb_irq_queue; u32 skb_queue_len_max; unsigned int mb_first; @@ -48,14 +49,11 @@ unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, unsigned int *frame_len_ptr); int can_rx_offload_queue_tail(struct can_rx_offload *offload, struct sk_buff *skb); +void can_rx_offload_irq_finish(struct can_rx_offload *offload); +void can_rx_offload_threaded_irq_finish(struct can_rx_offload *offload); void can_rx_offload_del(struct can_rx_offload *offload); void can_rx_offload_enable(struct can_rx_offload *offload); -static inline void can_rx_offload_schedule(struct can_rx_offload *offload) -{ - napi_schedule(&offload->napi); -} - static inline void can_rx_offload_disable(struct can_rx_offload *offload) { napi_disable(&offload->napi); diff --git a/include/linux/compat.h b/include/linux/compat.h index c270124e4402..8e0598c7d1d1 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -20,11 +20,8 @@ #include <linux/unistd.h> #include <asm/compat.h> - -#ifdef CONFIG_COMPAT #include <asm/siginfo.h> #include <asm/signal.h> -#endif #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER /* @@ -95,8 +92,6 @@ struct compat_iovec { compat_size_t iov_len; }; -#ifdef CONFIG_COMPAT - #ifndef compat_user_stack_pointer #define compat_user_stack_pointer() current_user_stack_pointer() #endif @@ -131,9 +126,11 @@ struct compat_tms { #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW) +#ifndef compat_sigset_t typedef struct { compat_sigset_word sig[_COMPAT_NSIG_WORDS]; } compat_sigset_t; +#endif int set_compat_user_sigmask(const compat_sigset_t __user *umask, size_t sigsetsize); @@ -384,6 +381,7 @@ struct compat_keyctl_kdf_params { __u32 __spare[8]; }; +struct compat_stat; struct compat_statfs; struct compat_statfs64; struct compat_old_linux_dirent; @@ -428,7 +426,7 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, unsigned int size) { /* size <= sizeof(compat_sigset_t) <= sizeof(sigset_t) */ -#ifdef __BIG_ENDIAN +#if defined(__BIG_ENDIAN) && defined(CONFIG_64BIT) compat_sigset_t v; switch (_NSIG_WORDS) { case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3]; @@ -929,17 +927,6 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args); #endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */ - -/* - * For most but not all architectures, "am I in a compat syscall?" and - * "am I a compat task?" are the same question. For architectures on which - * they aren't the same question, arch code can override in_compat_syscall. - */ - -#ifndef in_compat_syscall -static inline bool in_compat_syscall(void) { return is_compat_task(); } -#endif - /** * ns_to_old_timeval32 - Compat version of ns_to_timeval * @nsec: the nanoseconds value to be converted @@ -969,6 +956,17 @@ int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz, int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user * buf); +#ifdef CONFIG_COMPAT + +/* + * For most but not all architectures, "am I in a compat syscall?" and + * "am I a compat task?" are the same question. For architectures on which + * they aren't the same question, arch code can override in_compat_syscall. + */ +#ifndef in_compat_syscall +static inline bool in_compat_syscall(void) { return is_compat_task(); } +#endif + #else /* !CONFIG_COMPAT */ #define is_compat_task() (0) diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 1587961f1a7b..c7fa4a3498fe 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -11,60 +11,48 @@ struct dsa_switch; struct sk_buff; struct net_device; -struct packet_type; -struct dsa_8021q_context; -struct dsa_8021q_crosschip_link { +struct dsa_tag_8021q_vlan { struct list_head list; int port; - struct dsa_8021q_context *other_ctx; - int other_port; + u16 vid; refcount_t refcount; }; -struct dsa_8021q_ops { - int (*vlan_add)(struct dsa_switch *ds, int port, u16 vid, u16 flags); - int (*vlan_del)(struct dsa_switch *ds, int port, u16 vid); -}; - struct dsa_8021q_context { - const struct dsa_8021q_ops *ops; struct dsa_switch *ds; - struct list_head crosschip_links; + struct list_head vlans; /* EtherType of RX VID, used for filtering on master interface */ __be16 proto; }; -#define DSA_8021Q_N_SUBVLAN 8 - -int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled); +int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto); -int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, - int other_port); - -int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, - int other_port); +void dsa_tag_8021q_unregister(struct dsa_switch *ds); struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci); -void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, - int *subvlan); +void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id); + +int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, + struct net_device *br, + int bridge_num); + +void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, + struct net_device *br, + int bridge_num); + +u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num); u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port); u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port); -u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan); - int dsa_8021q_rx_switch_id(u16 vid); int dsa_8021q_rx_source_port(u16 vid); -u16 dsa_8021q_rx_subvlan(u16 vid); - bool vid_is_dsa_8021q_rxvlan(u16 vid); bool vid_is_dsa_8021q_txvlan(u16 vid); diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index b6089b88314c..0eadc7ac44ec 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -59,7 +59,6 @@ struct sja1105_skb_cb { ((struct sja1105_skb_cb *)((skb)->cb)) struct sja1105_port { - u16 subvlan_map[DSA_8021Q_N_SUBVLAN]; struct kthread_worker *xmit_worker; struct kthread_work xmit_work; struct sk_buff_head xmit_queue; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 232daaec56e4..4711b96dae0c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -17,8 +17,6 @@ #include <linux/compat.h> #include <uapi/linux/ethtool.h> -#ifdef CONFIG_COMPAT - struct compat_ethtool_rx_flow_spec { u32 flow_type; union ethtool_flow_union h_u; @@ -38,8 +36,6 @@ struct compat_ethtool_rxnfc { u32 rule_locs[]; }; -#endif /* CONFIG_COMPAT */ - #include <linux/rculist.h> /** diff --git a/include/linux/filter.h b/include/linux/filter.h index 472f97074da0..ba36989f711a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -559,7 +559,8 @@ struct bpf_prog { kprobe_override:1, /* Do we override a kprobe? */ has_callchain_buf:1, /* callchain buffer allocated? */ enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ - call_get_stack:1; /* Do we call bpf_get_stack() or bpf_get_stackid() */ + call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ + call_get_func_ip:1; /* Do we call get_func_ip() */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index b651c5e32a28..b73b4ff749e1 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -57,6 +57,7 @@ struct br_ip_list { #define BR_MRP_AWARE BIT(17) #define BR_MRP_LOST_CONT BIT(18) #define BR_MRP_LOST_IN_CONT BIT(19) +#define BR_TX_FWD_OFFLOAD BIT(20) #define BR_DEFAULT_AGEING_TIME (300 * HZ) @@ -70,9 +71,6 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto); bool br_multicast_has_router_adjacent(struct net_device *dev, int proto); bool br_multicast_enabled(const struct net_device *dev); bool br_multicast_router(const struct net_device *dev); -int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack); #else static inline int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list) @@ -104,13 +102,6 @@ static inline bool br_multicast_router(const struct net_device *dev) { return false; } -static inline int br_mdb_replay(const struct net_device *br_dev, - const struct net_device *dev, const void *ctx, - bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack) -{ - return -EOPNOTSUPP; -} #endif #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) @@ -120,9 +111,8 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid); int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto); int br_vlan_get_info(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo); -int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack); +int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, + struct bridge_vlan_info *p_vinfo); #else static inline bool br_vlan_enabled(const struct net_device *dev) { @@ -150,12 +140,10 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid, return -EINVAL; } -static inline int br_vlan_replay(struct net_device *br_dev, - struct net_device *dev, const void *ctx, - bool adding, struct notifier_block *nb, - struct netlink_ext_ack *extack) +static inline int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, + struct bridge_vlan_info *p_vinfo) { - return -EOPNOTSUPP; + return -EINVAL; } #endif @@ -167,8 +155,6 @@ void br_fdb_clear_offload(const struct net_device *dev, u16 vid); bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag); u8 br_port_get_stp_state(const struct net_device *dev); clock_t br_get_ageing_time(const struct net_device *br_dev); -int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb); #else static inline struct net_device * br_fdb_find_port(const struct net_device *br_dev, @@ -197,12 +183,40 @@ static inline clock_t br_get_ageing_time(const struct net_device *br_dev) { return 0; } +#endif + +#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_NET_SWITCHDEV) + +int switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack); +void switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb); + +#else + +static inline int +switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + return -EINVAL; +} -static inline int br_fdb_replay(const struct net_device *br_dev, - const struct net_device *dev, const void *ctx, - bool adding, struct notifier_block *nb) +static inline void +switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) { - return -EOPNOTSUPP; } #endif diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 53aa0343bf69..67e042932681 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -178,6 +178,15 @@ static inline struct net_device *ip_dev_find(struct net *net, __be32 addr) int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *); +#ifdef CONFIG_INET +int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size); +#else +static inline int inet_gifconf(struct net_device *dev, char __user *buf, + int len, int size) +{ + return 0; +} +#endif void devinet_init(void); struct in_device *inetdev_by_index(struct net *, int); __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); diff --git a/include/linux/ioam6.h b/include/linux/ioam6.h new file mode 100644 index 000000000000..94a24b36998f --- /dev/null +++ b/include/linux/ioam6.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * IPv6 IOAM + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ +#ifndef _LINUX_IOAM6_H +#define _LINUX_IOAM6_H + +#include <uapi/linux/ioam6.h> + +#endif /* _LINUX_IOAM6_H */ diff --git a/include/linux/ioam6_genl.h b/include/linux/ioam6_genl.h new file mode 100644 index 000000000000..176e67919de3 --- /dev/null +++ b/include/linux/ioam6_genl.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * IPv6 IOAM Generic Netlink API + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ +#ifndef _LINUX_IOAM6_GENL_H +#define _LINUX_IOAM6_GENL_H + +#include <uapi/linux/ioam6_genl.h> + +#endif /* _LINUX_IOAM6_GENL_H */ diff --git a/include/linux/ioam6_iptunnel.h b/include/linux/ioam6_iptunnel.h new file mode 100644 index 000000000000..07d9dfedd29d --- /dev/null +++ b/include/linux/ioam6_iptunnel.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * IPv6 IOAM Lightweight Tunnel API + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ +#ifndef _LINUX_IOAM6_IPTUNNEL_H +#define _LINUX_IOAM6_IPTUNNEL_H + +#include <uapi/linux/ioam6_iptunnel.h> + +#endif /* _LINUX_IOAM6_IPTUNNEL_H */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 70b2ad3b9884..ef4a69865737 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -76,6 +76,9 @@ struct ipv6_devconf { __s32 disable_policy; __s32 ndisc_tclass; __s32 rpl_seg_enabled; + __u32 ioam6_id; + __u32 ioam6_id_wide; + __u8 ioam6_enabled; struct ctl_table_header *sysctl_header; }; diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 944aa3aa3035..beb918328eef 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -356,6 +356,7 @@ struct mhi_controller_config { * @fbc_download: MHI host needs to do complete image transfer (optional) * @wake_set: Device wakeup set flag * @irq_flags: irq flags passed to request_irq (optional) + * @mru: the default MRU for the MHI device * * Fields marked as (required) need to be populated by the controller driver * before calling mhi_register_controller(). For the fields marked as (optional) @@ -448,6 +449,7 @@ struct mhi_controller { bool fbc_download; bool wake_set; unsigned long irq_flags; + u32 mru; }; /** diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3dd6641e942c..b358fc160cfc 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -921,7 +921,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 scatter_fcs[0x1]; u8 enhanced_multi_pkt_send_wqe[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; - u8 reserved_at_1c[0x2]; + u8 tunnel_lro_gre[0x1]; + u8 tunnel_lro_vxlan[0x1]; u8 tunnel_stateless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index eaf5bb008aa9..c871dc223dfa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3289,14 +3289,6 @@ static inline bool dev_has_header(const struct net_device *dev) return dev->header_ops && dev->header_ops->create; } -typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, - int len, int size); -int register_gifconf(unsigned int family, gifconf_func_t *gifconf); -static inline int unregister_gifconf(unsigned int family) -{ - return register_gifconf(family, NULL); -} - #ifdef CONFIG_NET_FLOW_LIMIT #define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ struct sd_flow_limit { @@ -3984,6 +3976,8 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) __dev_kfree_skb_any(skb, SKB_REASON_CONSUMED); } +u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog); void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); int netif_rx(struct sk_buff *skb); @@ -4012,9 +4006,11 @@ int netdev_rx_handler_register(struct net_device *dev, void netdev_rx_handler_unregister(struct net_device *dev); bool dev_valid_name(const char *name); +int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg); +int put_user_ifreq(struct ifreq *ifr, void __user *arg); int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout); -int dev_ifconf(struct net *net, struct ifconf *, int); +int dev_ifconf(struct net *net, struct ifconf __user *ifc); int dev_ethtool(struct net *net, struct ifreq *); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, diff --git a/include/linux/phy.h b/include/linux/phy.h index 3b80dc3ed68b..736e1d1a47c4 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1431,6 +1431,7 @@ static inline int phy_device_register(struct phy_device *phy) static inline void phy_device_free(struct phy_device *phydev) { } #endif /* CONFIG_PHYLIB */ void phy_device_remove(struct phy_device *phydev); +int phy_get_c45_ids(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b2db9cd9a73f..f19190820e63 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -863,8 +863,8 @@ struct sk_buff { __u8 tc_skip_classify:1; __u8 tc_at_ingress:1; #endif -#ifdef CONFIG_NET_REDIRECT __u8 redirected:1; +#ifdef CONFIG_NET_REDIRECT __u8 from_ingress:1; #endif #ifdef CONFIG_TLS_DEVICE @@ -4664,17 +4664,13 @@ static inline __wsum lco_csum(struct sk_buff *skb) static inline bool skb_is_redirected(const struct sk_buff *skb) { -#ifdef CONFIG_NET_REDIRECT return skb->redirected; -#else - return false; -#endif } static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) { -#ifdef CONFIG_NET_REDIRECT skb->redirected = 1; +#ifdef CONFIG_NET_REDIRECT skb->from_ingress = from_ingress; if (skb->from_ingress) skb->tstamp = 0; @@ -4683,9 +4679,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) static inline void skb_reset_redirect(struct sk_buff *skb) { -#ifdef CONFIG_NET_REDIRECT skb->redirected = 0; -#endif } static inline bool skb_csum_is_sctp(struct sk_buff *skb) diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h index 20d310331eb5..46b15e2aaefb 100644 --- a/include/linux/typecheck.h +++ b/include/linux/typecheck.h @@ -22,4 +22,13 @@ (void)__tmp; \ }) +/* + * Check at compile time that something is a pointer type. + */ +#define typecheck_pointer(x) \ +({ typeof(x) __dummy; \ + (void)sizeof(*__dummy); \ + 1; \ +}) + #endif /* TYPECHECK_H_INCLUDED */ diff --git a/include/net/af_unix.h b/include/net/af_unix.h index f42fdddecd41..435a2c3d5a6f 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -82,6 +82,8 @@ static inline struct unix_sock *unix_sk(const struct sock *sk) long unix_inq_len(struct sock *sk); long unix_outq_len(struct sock *sk); +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, + int flags); #ifdef CONFIG_SYSCTL int unix_sysctl_register(struct net *net); void unix_sysctl_unregister(struct net *net); @@ -89,4 +91,14 @@ void unix_sysctl_unregister(struct net *net); static inline int unix_sysctl_register(struct net *net) { return 0; } static inline void unix_sysctl_unregister(struct net *net) {} #endif + +#ifdef CONFIG_BPF_SYSCALL +extern struct proto unix_proto; + +int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); +void __init unix_bpf_build_proto(void); +#else +static inline void __init unix_bpf_build_proto(void) +{} +#endif #endif diff --git a/include/net/dsa.h b/include/net/dsa.h index 33f40c1ec379..55fcac854058 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -88,11 +88,6 @@ struct dsa_device_ops { struct packet_type *pt); void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); - /* Used to determine which traffic should match the DSA filter in - * eth_type_trans, and which, if any, should bypass it and be processed - * as regular on the master net device. - */ - bool (*filter)(const struct sk_buff *skb, struct net_device *dev); unsigned int needed_headroom; unsigned int needed_tailroom; const char *name; @@ -159,6 +154,12 @@ struct dsa_switch_tree { */ struct net_device **lags; unsigned int lags_len; + + /* Track the largest switch index within a tree */ + unsigned int last_switch; + + /* Track the bridges with forwarding offload enabled */ + unsigned long fwd_offloading_bridges; }; #define dsa_lags_foreach_id(_id, _dst) \ @@ -240,7 +241,6 @@ struct dsa_port { struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); - bool (*filter)(const struct sk_buff *skb, struct net_device *dev); enum { DSA_PORT_TYPE_UNUSED = 0, @@ -259,6 +259,7 @@ struct dsa_port { bool vlan_filtering; u8 stp_state; struct net_device *bridge_dev; + int bridge_num; struct devlink_port devlink_port; bool devlink_port_setup; struct phylink *pl; @@ -352,6 +353,9 @@ struct dsa_switch { unsigned int ageing_time_min; unsigned int ageing_time_max; + /* Storage for drivers using tag_8021q */ + struct dsa_8021q_context *tag_8021q_ctx; + /* devlink used to represent this switch device */ struct devlink *devlink; @@ -407,6 +411,12 @@ struct dsa_switch { */ unsigned int num_lag_ids; + /* Drivers that support bridge forwarding offload should set this to + * the maximum number of bridges spanning the same switch tree that can + * be offloaded. + */ + unsigned int num_fwd_offloading_bridges; + size_t num_ports; }; @@ -690,6 +700,14 @@ struct dsa_switch_ops { struct net_device *bridge); void (*port_bridge_leave)(struct dsa_switch *ds, int port, struct net_device *bridge); + /* Called right after .port_bridge_join() */ + int (*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port, + struct net_device *bridge, + int bridge_num); + /* Called right before .port_bridge_leave() */ + void (*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port, + struct net_device *bridge, + int bridge_num); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); void (*port_fast_age)(struct dsa_switch *ds, int port); @@ -869,6 +887,13 @@ struct dsa_switch_ops { const struct switchdev_obj_ring_role_mrp *mrp); int (*port_mrp_del_ring_role)(struct dsa_switch *ds, int port, const struct switchdev_obj_ring_role_mrp *mrp); + + /* + * tag_8021q operations + */ + int (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid, + u16 flags); + int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ @@ -954,15 +979,6 @@ static inline bool netdev_uses_dsa(const struct net_device *dev) return false; } -static inline bool dsa_can_decode(const struct sk_buff *skb, - struct net_device *dev) -{ -#if IS_ENABLED(CONFIG_NET_DSA) - return !dev->dsa_ptr->filter || dev->dsa_ptr->filter(skb, dev); -#endif - return false; -} - /* All DSA tags that push the EtherType to the right (basically all except tail * tags, which don't break dissection) can be treated the same from the * perspective of the flow dissector. diff --git a/include/net/ioam6.h b/include/net/ioam6.h new file mode 100644 index 000000000000..3c2993bc48c8 --- /dev/null +++ b/include/net/ioam6.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * IPv6 IOAM implementation + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#ifndef _NET_IOAM6_H +#define _NET_IOAM6_H + +#include <linux/net.h> +#include <linux/ipv6.h> +#include <linux/ioam6.h> +#include <linux/rhashtable-types.h> + +struct ioam6_namespace { + struct rhash_head head; + struct rcu_head rcu; + + struct ioam6_schema __rcu *schema; + + __be16 id; + __be32 data; + __be64 data_wide; +}; + +struct ioam6_schema { + struct rhash_head head; + struct rcu_head rcu; + + struct ioam6_namespace __rcu *ns; + + u32 id; + int len; + __be32 hdr; + + u8 data[0]; +}; + +struct ioam6_pernet_data { + struct mutex lock; + struct rhashtable namespaces; + struct rhashtable schemas; +}; + +static inline struct ioam6_pernet_data *ioam6_pernet(struct net *net) +{ +#if IS_ENABLED(CONFIG_IPV6) + return net->ipv6.ioam6_data; +#else + return NULL; +#endif +} + +struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id); +void ioam6_fill_trace_data(struct sk_buff *skb, + struct ioam6_namespace *ns, + struct ioam6_trace_hdr *trace); + +int ioam6_init(void); +void ioam6_exit(void); + +int ioam6_iptunnel_init(void); +void ioam6_iptunnel_exit(void); + +#endif /* _NET_IOAM6_H */ diff --git a/include/net/ip.h b/include/net/ip.h index d9683bef8684..9192444f2964 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -436,18 +436,32 @@ static inline bool ip_sk_ignore_df(const struct sock *sk) static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { + const struct rtable *rt = container_of(dst, struct rtable, dst); struct net *net = dev_net(dst->dev); unsigned int mtu; if (net->ipv4.sysctl_ip_fwd_use_pmtu || ip_mtu_locked(dst) || - !forwarding) - return dst_mtu(dst); + !forwarding) { + mtu = rt->rt_pmtu; + if (mtu && time_before(jiffies, rt->dst.expires)) + goto out; + } /* 'forwarding = true' case should always honour route mtu */ mtu = dst_metric_raw(dst, RTAX_MTU); - if (!mtu) - mtu = min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); + if (mtu) + goto out; + + mtu = READ_ONCE(dst->dev->mtu); + + if (unlikely(ip_mtu_locked(dst))) { + if (rt->rt_uses_gateway && mtu > 576) + mtu = 576; + } + +out: + mtu = min_t(unsigned int, mtu, IP_MAX_MTU); return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 625a38ccb5d9..820eae3ea95f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -316,12 +316,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info * !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); } -static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst, + bool forwarding) { struct inet6_dev *idev; unsigned int mtu; - if (dst_metric_locked(dst, RTAX_MTU)) { + if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) { mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) goto out; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b8620519eace..2f65701a43c9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -174,7 +174,6 @@ struct netns_ipv4 { int sysctl_tcp_fastopen; const struct tcp_congestion_ops __rcu *tcp_congestion_control; struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; - spinlock_t tcp_fastopen_ctx_lock; unsigned int sysctl_tcp_fastopen_blackhole_timeout; atomic_t tfo_active_disable_times; unsigned long tfo_active_disable_stamp; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index bde0b7adb4a3..a4b550380316 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -51,6 +51,8 @@ struct netns_sysctl_ipv6 { int max_dst_opts_len; int max_hbh_opts_len; int seg6_flowlabel; + u32 ioam6_id; + u64 ioam6_id_wide; bool skip_notify_on_dev_down; u8 fib_notify_on_flag_change; }; @@ -110,6 +112,7 @@ struct netns_ipv6 { spinlock_t lock; u32 seq; } ip6addrlbl_table; + struct ioam6_pernet_data *ioam6_data; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h index 963db96bcbbb..bb3e8fdc0692 100644 --- a/include/net/nfc/digital.h +++ b/include/net/nfc/digital.h @@ -191,7 +191,7 @@ struct digital_poll_tech { struct nfc_digital_dev { struct nfc_dev *nfc_dev; - struct nfc_digital_ops *ops; + const struct nfc_digital_ops *ops; u32 protocols; @@ -236,7 +236,7 @@ struct nfc_digital_dev { void (*skb_add_crc)(struct sk_buff *skb); }; -struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops, +struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops, __u32 supported_protocols, __u32 driver_capabilities, int tx_headroom, diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index b35f37a57686..756c11084f65 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -118,7 +118,7 @@ struct nfc_hci_dev { struct sk_buff_head msg_rx_queue; - struct nfc_hci_ops *ops; + const struct nfc_hci_ops *ops; struct nfc_llc *llc; @@ -151,7 +151,7 @@ struct nfc_hci_dev { }; /* hci device allocation */ -struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, +struct nfc_hci_dev *nfc_hci_allocate_device(const struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, @@ -168,7 +168,7 @@ void nfc_hci_set_clientdata(struct nfc_hci_dev *hdev, void *clientdata); void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev); static inline int nfc_hci_set_vendor_cmds(struct nfc_hci_dev *hdev, - struct nfc_vendor_cmd *cmds, + const struct nfc_vendor_cmd *cmds, int n_cmds) { return nfc_set_vendor_cmds(hdev->ndev, cmds, n_cmds); diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 1df0f8074c9d..00f2c60971d7 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -82,10 +82,10 @@ struct nci_ops { void (*hci_cmd_received)(struct nci_dev *ndev, u8 pipe, u8 cmd, struct sk_buff *skb); - struct nci_driver_ops *prop_ops; + const struct nci_driver_ops *prop_ops; size_t n_prop_ops; - struct nci_driver_ops *core_ops; + const struct nci_driver_ops *core_ops; size_t n_core_ops; }; @@ -194,7 +194,7 @@ struct nci_hci_dev { /* NCI Core structures */ struct nci_dev { struct nfc_dev *nfc_dev; - struct nci_ops *ops; + const struct nci_ops *ops; struct nci_hci_dev *hci_dev; int tx_headroom; @@ -267,7 +267,7 @@ struct nci_dev { }; /* ----- NCI Devices ----- */ -struct nci_dev *nci_allocate_device(struct nci_ops *ops, +struct nci_dev *nci_allocate_device(const struct nci_ops *ops, __u32 supported_protocols, int tx_headroom, int tx_tailroom); @@ -343,7 +343,7 @@ static inline void *nci_get_drvdata(struct nci_dev *ndev) } static inline int nci_set_vendor_cmds(struct nci_dev *ndev, - struct nfc_vendor_cmd *cmds, + const struct nfc_vendor_cmd *cmds, int n_cmds) { return nfc_set_vendor_cmds(ndev->nfc_dev, cmds, n_cmds); @@ -360,7 +360,7 @@ int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode, int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb); void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb); -int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload); +int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload); int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb); int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id); void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 2cd3a261bcbc..85b698794b14 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -188,17 +188,17 @@ struct nfc_dev { struct rfkill *rfkill; - struct nfc_vendor_cmd *vendor_cmds; + const struct nfc_vendor_cmd *vendor_cmds; int n_vendor_cmds; - struct nfc_ops *ops; + const struct nfc_ops *ops; struct genl_info *cur_cmd_info; }; #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) extern struct class nfc_class; -struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, +struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops, u32 supported_protocols, int tx_headroom, int tx_tailroom); @@ -297,7 +297,7 @@ void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb, u8 payload_type, u8 direction); static inline int nfc_set_vendor_cmds(struct nfc_dev *dev, - struct nfc_vendor_cmd *cmds, + const struct nfc_vendor_cmd *cmds, int n_cmds) { if (dev->vendor_cmds || dev->n_vendor_cmds) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index e4cac9218ce1..66468ff8cc0a 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -238,6 +238,12 @@ switchdev_notifier_info_to_extack(const struct switchdev_notifier_info *info) return info->extack; } +static inline bool +switchdev_fdb_is_dynamically_learned(const struct switchdev_notifier_fdb_info *fdb_info) +{ + return !fdb_info->added_by_user && !fdb_info->is_local; +} + #ifdef CONFIG_NET_SWITCHDEV void switchdev_deferred_process(void); @@ -266,6 +272,30 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); +int switchdev_handle_fdb_add_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)); + +int switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)); + int switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), @@ -350,6 +380,38 @@ call_switchdev_blocking_notifiers(unsigned long val, } static inline int +switchdev_handle_fdb_add_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + return 0; +} + +static inline int +switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + return 0; +} + +static inline int switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), diff --git a/include/net/xdp.h b/include/net/xdp.h index 5533f0ab2afc..ad5b02dcb6f4 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -276,6 +276,11 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp) return unlikely(xdp->data_meta > xdp->data); } +static inline bool xdp_metalen_invalid(unsigned long metalen) +{ + return (metalen & (sizeof(__u32) - 1)) || (metalen > 32); +} + struct xdp_attachment_info { struct bpf_prog *prog; u32 flags; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bf9252c7381e..2db6925e04f4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -324,9 +324,6 @@ union bpf_iter_link_info { * **BPF_PROG_TYPE_SK_LOOKUP** * *data_in* and *data_out* must be NULL. * - * **BPF_PROG_TYPE_XDP** - * *ctx_in* and *ctx_out* must be NULL. - * * **BPF_PROG_TYPE_RAW_TRACEPOINT**, * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** * @@ -3249,7 +3246,7 @@ union bpf_attr { * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. * It checks the selected socket is matching the incoming * request in the socket buffer. * Return @@ -4780,6 +4777,76 @@ union bpf_attr { * Execute close syscall for given FD. * Return * A syscall result. + * + * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags) + * Description + * Initialize the timer. + * First 4 bits of *flags* specify clockid. + * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. + * All other bits of *flags* are reserved. + * The verifier will reject the program if *timer* is not from + * the same *map*. + * Return + * 0 on success. + * **-EBUSY** if *timer* is already initialized. + * **-EINVAL** if invalid *flags* are passed. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn) + * Description + * Configure the timer to call *callback_fn* static function. + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags) + * Description + * Set timer expiration N nanoseconds from the current time. The + * configured callback will be invoked in soft irq context on some cpu + * and will not repeat unless another bpf_timer_start() is made. + * In such case the next invocation can migrate to a different cpu. + * Since struct bpf_timer is a field inside map element the map + * owns the timer. The bpf_timer_set_callback() will increment refcnt + * of BPF program to make sure that callback_fn code stays valid. + * When user space reference to a map reaches zero all timers + * in a map are cancelled and corresponding program's refcnts are + * decremented. This is done to make sure that Ctrl-C of a user + * process doesn't leave any timers running. If map is pinned in + * bpffs the callback_fn can re-arm itself indefinitely. + * bpf_map_update/delete_elem() helpers and user space sys_bpf commands + * cancel and free the timer in the given map element. + * The map can contain timers that invoke callback_fn-s from different + * programs. The same callback_fn can serve different timers from + * different maps if key/value layout matches across maps. + * Every bpf_timer_set_callback() can have different callback_fn. + * + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier + * or invalid *flags* are passed. + * + * long bpf_timer_cancel(struct bpf_timer *timer) + * Description + * Cancel the timer and wait for callback_fn to finish if it was running. + * Return + * 0 if the timer was not active. + * 1 if the timer was active. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its + * own timer which would have led to a deadlock otherwise. + * + * u64 bpf_get_func_ip(void *ctx) + * Description + * Get address of the traced function (for tracing and kprobe programs). + * Return + * Address of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4951,6 +5018,11 @@ union bpf_attr { FN(sys_bpf), \ FN(btf_find_by_name_kind), \ FN(sys_close), \ + FN(timer_init), \ + FN(timer_set_callback), \ + FN(timer_start), \ + FN(timer_cancel), \ + FN(get_func_ip), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6077,6 +6149,11 @@ struct bpf_spin_lock { __u32 val; }; +struct bpf_timer { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 6b56a7549531..5aca85874447 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -479,16 +479,22 @@ enum { /* flags used in BRIDGE_VLANDB_DUMP_FLAGS attribute to affect dumps */ #define BRIDGE_VLANDB_DUMPF_STATS (1 << 0) /* Include stats in the dump */ +#define BRIDGE_VLANDB_DUMPF_GLOBAL (1 << 1) /* Dump global vlan options only */ /* Bridge vlan RTM attributes * [BRIDGE_VLANDB_ENTRY] = { * [BRIDGE_VLANDB_ENTRY_INFO] * ... * } + * [BRIDGE_VLANDB_GLOBAL_OPTIONS] = { + * [BRIDGE_VLANDB_GOPTS_ID] + * ... + * } */ enum { BRIDGE_VLANDB_UNSPEC, BRIDGE_VLANDB_ENTRY, + BRIDGE_VLANDB_GLOBAL_OPTIONS, __BRIDGE_VLANDB_MAX, }; #define BRIDGE_VLANDB_MAX (__BRIDGE_VLANDB_MAX - 1) @@ -538,6 +544,15 @@ enum { }; #define BRIDGE_VLANDB_STATS_MAX (__BRIDGE_VLANDB_STATS_MAX - 1) +enum { + BRIDGE_VLANDB_GOPTS_UNSPEC, + BRIDGE_VLANDB_GOPTS_ID, + BRIDGE_VLANDB_GOPTS_RANGE, + BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, + __BRIDGE_VLANDB_GOPTS_MAX +}; +#define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { @@ -629,6 +644,7 @@ enum { MDBA_ROUTER_PATTR_TYPE, MDBA_ROUTER_PATTR_INET_TIMER, MDBA_ROUTER_PATTR_INET6_TIMER, + MDBA_ROUTER_PATTR_VID, __MDBA_ROUTER_PATTR_MAX }; #define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1) @@ -720,12 +736,14 @@ struct br_mcast_stats { /* bridge boolean options * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets + * BR_BOOLOPT_MCAST_VLAN_SNOOPING - control vlan multicast snooping * * IMPORTANT: if adding a new option do not forget to handle * it in br_boolopt_toggle/get and bridge sysfs */ enum br_boolopt_id { BR_BOOLOPT_NO_LL_LEARN, + BR_BOOLOPT_MCAST_VLAN_SNOOPING, BR_BOOLOPT_MAX }; diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 5ad396a57eb3..c4c53a9ab959 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -145,6 +145,7 @@ struct in6_flowlabel_req { #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ +#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ #define IPV6_TLV_JUMBO 194 #define IPV6_TLV_HAO 201 /* home address option */ diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h new file mode 100644 index 000000000000..ac4de376f0ce --- /dev/null +++ b/include/uapi/linux/ioam6.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM implementation + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#ifndef _UAPI_LINUX_IOAM6_H +#define _UAPI_LINUX_IOAM6_H + +#include <asm/byteorder.h> +#include <linux/types.h> + +#define IOAM6_U16_UNAVAILABLE U16_MAX +#define IOAM6_U32_UNAVAILABLE U32_MAX +#define IOAM6_U64_UNAVAILABLE U64_MAX + +#define IOAM6_DEFAULT_ID (IOAM6_U32_UNAVAILABLE >> 8) +#define IOAM6_DEFAULT_ID_WIDE (IOAM6_U64_UNAVAILABLE >> 8) +#define IOAM6_DEFAULT_IF_ID IOAM6_U16_UNAVAILABLE +#define IOAM6_DEFAULT_IF_ID_WIDE IOAM6_U32_UNAVAILABLE + +/* + * IPv6 IOAM Option Header + */ +struct ioam6_hdr { + __u8 opt_type; + __u8 opt_len; + __u8 :8; /* reserved */ +#define IOAM6_TYPE_PREALLOC 0 + __u8 type; +} __attribute__((packed)); + +/* + * IOAM Trace Header + */ +struct ioam6_trace_hdr { + __be16 namespace_id; + +#if defined(__LITTLE_ENDIAN_BITFIELD) + + __u8 :1, /* unused */ + :1, /* unused */ + overflow:1, + nodelen:5; + + __u8 remlen:7, + :1; /* unused */ + + union { + __be32 type_be32; + + struct { + __u32 bit7:1, + bit6:1, + bit5:1, + bit4:1, + bit3:1, + bit2:1, + bit1:1, + bit0:1, + bit15:1, /* unused */ + bit14:1, /* unused */ + bit13:1, /* unused */ + bit12:1, /* unused */ + bit11:1, + bit10:1, + bit9:1, + bit8:1, + bit23:1, /* reserved */ + bit22:1, + bit21:1, /* unused */ + bit20:1, /* unused */ + bit19:1, /* unused */ + bit18:1, /* unused */ + bit17:1, /* unused */ + bit16:1, /* unused */ + :8; /* reserved */ + } type; + }; + +#elif defined(__BIG_ENDIAN_BITFIELD) + + __u8 nodelen:5, + overflow:1, + :1, /* unused */ + :1; /* unused */ + + __u8 :1, /* unused */ + remlen:7; + + union { + __be32 type_be32; + + struct { + __u32 bit0:1, + bit1:1, + bit2:1, + bit3:1, + bit4:1, + bit5:1, + bit6:1, + bit7:1, + bit8:1, + bit9:1, + bit10:1, + bit11:1, + bit12:1, /* unused */ + bit13:1, /* unused */ + bit14:1, /* unused */ + bit15:1, /* unused */ + bit16:1, /* unused */ + bit17:1, /* unused */ + bit18:1, /* unused */ + bit19:1, /* unused */ + bit20:1, /* unused */ + bit21:1, /* unused */ + bit22:1, + bit23:1, /* reserved */ + :8; /* reserved */ + } type; + }; + +#else +#error "Please fix <asm/byteorder.h>" +#endif + +#define IOAM6_TRACE_DATA_SIZE_MAX 244 + __u8 data[0]; +} __attribute__((packed)); + +#endif /* _UAPI_LINUX_IOAM6_H */ diff --git a/include/uapi/linux/ioam6_genl.h b/include/uapi/linux/ioam6_genl.h new file mode 100644 index 000000000000..ca4b22833754 --- /dev/null +++ b/include/uapi/linux/ioam6_genl.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM Generic Netlink API + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#ifndef _UAPI_LINUX_IOAM6_GENL_H +#define _UAPI_LINUX_IOAM6_GENL_H + +#define IOAM6_GENL_NAME "IOAM6" +#define IOAM6_GENL_VERSION 0x1 + +enum { + IOAM6_ATTR_UNSPEC, + + IOAM6_ATTR_NS_ID, /* u16 */ + IOAM6_ATTR_NS_DATA, /* u32 */ + IOAM6_ATTR_NS_DATA_WIDE,/* u64 */ + +#define IOAM6_MAX_SCHEMA_DATA_LEN (255 * 4) + IOAM6_ATTR_SC_ID, /* u32 */ + IOAM6_ATTR_SC_DATA, /* Binary */ + IOAM6_ATTR_SC_NONE, /* Flag */ + + IOAM6_ATTR_PAD, + + __IOAM6_ATTR_MAX, +}; + +#define IOAM6_ATTR_MAX (__IOAM6_ATTR_MAX - 1) + +enum { + IOAM6_CMD_UNSPEC, + + IOAM6_CMD_ADD_NAMESPACE, + IOAM6_CMD_DEL_NAMESPACE, + IOAM6_CMD_DUMP_NAMESPACES, + + IOAM6_CMD_ADD_SCHEMA, + IOAM6_CMD_DEL_SCHEMA, + IOAM6_CMD_DUMP_SCHEMAS, + + IOAM6_CMD_NS_SET_SCHEMA, + + __IOAM6_CMD_MAX, +}; + +#define IOAM6_CMD_MAX (__IOAM6_CMD_MAX - 1) + +#endif /* _UAPI_LINUX_IOAM6_GENL_H */ diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h new file mode 100644 index 000000000000..bae14636a8c8 --- /dev/null +++ b/include/uapi/linux/ioam6_iptunnel.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM Lightweight Tunnel API + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#ifndef _UAPI_LINUX_IOAM6_IPTUNNEL_H +#define _UAPI_LINUX_IOAM6_IPTUNNEL_H + +enum { + IOAM6_IPTUNNEL_UNSPEC, + IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */ + __IOAM6_IPTUNNEL_MAX, +}; + +#define IOAM6_IPTUNNEL_MAX (__IOAM6_IPTUNNEL_MAX - 1) + +#endif /* _UAPI_LINUX_IOAM6_IPTUNNEL_H */ diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 70603775fe91..b243a53fa985 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -190,6 +190,9 @@ enum { DEVCONF_NDISC_TCLASS, DEVCONF_RPL_SEG_ENABLED, DEVCONF_RA_DEFRTR_METRIC, + DEVCONF_IOAM6_ENABLED, + DEVCONF_IOAM6_ID, + DEVCONF_IOAM6_ID_WIDE, DEVCONF_MAX }; diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 568a4303ccce..2e206919125c 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -14,6 +14,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_BPF, LWTUNNEL_ENCAP_SEG6_LOCAL, LWTUNNEL_ENCAP_RPL, + LWTUNNEL_ENCAP_IOAM6, __LWTUNNEL_ENCAP_MAX, }; diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 8d16744edc31..6571b57b2268 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -70,6 +70,8 @@ enum ovs_datapath_cmd { * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should * not be sent. + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the @@ -87,6 +89,9 @@ enum ovs_datapath_attr { OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ OVS_DP_ATTR_PAD, OVS_DP_ATTR_MASKS_CACHE_SIZE, + OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in per-cpu + * dispatch mode + */ __OVS_DP_ATTR_MAX }; @@ -127,6 +132,9 @@ struct ovs_vport_stats { /* Allow tc offload recirc sharing */ #define OVS_DP_F_TC_RECIRC_SHARING (1 << 2) +/* Allow per-cpu dispatch of upcalls */ +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig index bd04f4a44c01..a82d6de86522 100644 --- a/kernel/bpf/Kconfig +++ b/kernel/bpf/Kconfig @@ -29,7 +29,7 @@ config BPF_SYSCALL select IRQ_WORK select TASKS_TRACE_RCU select BINARY_PRINTF - select NET_SOCK_MSG if INET + select NET_SOCK_MSG if NET default n help Enable the bpf() system call that allows to manipulate BPF programs diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 3c4105603f9d..cebd4fb06d19 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -287,6 +287,12 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key return 0; } +static void check_and_free_timer_in_array(struct bpf_array *arr, void *val) +{ + if (unlikely(map_value_has_timer(&arr->map))) + bpf_timer_cancel_and_free(val + arr->map.timer_off); +} + /* Called from syscall or from eBPF program */ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) @@ -321,6 +327,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, copy_map_value_locked(map, val, value, false); else copy_map_value(map, val, value); + check_and_free_timer_in_array(array, val); } return 0; } @@ -374,6 +381,19 @@ static void *array_map_vmalloc_addr(struct bpf_array *array) return (void *)round_down((unsigned long)array, PAGE_SIZE); } +static void array_map_free_timers(struct bpf_map *map) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + int i; + + if (likely(!map_value_has_timer(map))) + return; + + for (i = 0; i < array->map.max_entries; i++) + bpf_timer_cancel_and_free(array->value + array->elem_size * i + + map->timer_off); +} + /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ static void array_map_free(struct bpf_map *map) { @@ -668,6 +688,7 @@ const struct bpf_map_ops array_map_ops = { .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, + .map_release_uref = array_map_free_timers, .map_lookup_elem = array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index cb4b72997d9b..7780131f710e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3046,43 +3046,92 @@ static void btf_struct_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } -/* find 'struct bpf_spin_lock' in map value. - * return >= 0 offset if found - * and < 0 in case of error - */ -int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) +static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t, + const char *name, int sz, int align) { const struct btf_member *member; u32 i, off = -ENOENT; - if (!__btf_type_is_struct(t)) - return -EINVAL; - for_each_member(i, t, member) { const struct btf_type *member_type = btf_type_by_id(btf, member->type); if (!__btf_type_is_struct(member_type)) continue; - if (member_type->size != sizeof(struct bpf_spin_lock)) + if (member_type->size != sz) continue; - if (strcmp(__btf_name_by_offset(btf, member_type->name_off), - "bpf_spin_lock")) + if (strcmp(__btf_name_by_offset(btf, member_type->name_off), name)) continue; if (off != -ENOENT) - /* only one 'struct bpf_spin_lock' is allowed */ + /* only one such field is allowed */ return -E2BIG; off = btf_member_bit_offset(t, member); if (off % 8) /* valid C code cannot generate such BTF */ return -EINVAL; off /= 8; - if (off % __alignof__(struct bpf_spin_lock)) - /* valid struct bpf_spin_lock will be 4 byte aligned */ + if (off % align) + return -EINVAL; + } + return off; +} + +static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, + const char *name, int sz, int align) +{ + const struct btf_var_secinfo *vsi; + u32 i, off = -ENOENT; + + for_each_vsi(i, t, vsi) { + const struct btf_type *var = btf_type_by_id(btf, vsi->type); + const struct btf_type *var_type = btf_type_by_id(btf, var->type); + + if (!__btf_type_is_struct(var_type)) + continue; + if (var_type->size != sz) + continue; + if (vsi->size != sz) + continue; + if (strcmp(__btf_name_by_offset(btf, var_type->name_off), name)) + continue; + if (off != -ENOENT) + /* only one such field is allowed */ + return -E2BIG; + off = vsi->offset; + if (off % align) return -EINVAL; } return off; } +static int btf_find_field(const struct btf *btf, const struct btf_type *t, + const char *name, int sz, int align) +{ + + if (__btf_type_is_struct(t)) + return btf_find_struct_field(btf, t, name, sz, align); + else if (btf_type_is_datasec(t)) + return btf_find_datasec_var(btf, t, name, sz, align); + return -EINVAL; +} + +/* find 'struct bpf_spin_lock' in map value. + * return >= 0 offset if found + * and < 0 in case of error + */ +int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) +{ + return btf_find_field(btf, t, "bpf_spin_lock", + sizeof(struct bpf_spin_lock), + __alignof__(struct bpf_spin_lock)); +} + +int btf_find_timer(const struct btf *btf, const struct btf_type *t) +{ + return btf_find_field(btf, t, "bpf_timer", + sizeof(struct bpf_timer), + __alignof__(struct bpf_timer)); +} + static void __btf_struct_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct btf_show *show) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 480e936c54d0..585b2b77ccc4 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -16,6 +16,7 @@ * netstack, and assigning dedicated CPUs for this stage. This * basically allows for 10G wirespeed pre-filtering via bpf. */ +#include <linux/bitops.h> #include <linux/bpf.h> #include <linux/filter.h> #include <linux/ptr_ring.h> @@ -168,6 +169,46 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) } } +static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu, + struct list_head *listp, + struct xdp_cpumap_stats *stats) +{ + struct sk_buff *skb, *tmp; + struct xdp_buff xdp; + u32 act; + int err; + + list_for_each_entry_safe(skb, tmp, listp, list) { + act = bpf_prog_run_generic_xdp(skb, &xdp, rcpu->prog); + switch (act) { + case XDP_PASS: + break; + case XDP_REDIRECT: + skb_list_del_init(skb); + err = xdp_do_generic_redirect(skb->dev, skb, &xdp, + rcpu->prog); + if (unlikely(err)) { + kfree_skb(skb); + stats->drop++; + } else { + stats->redirect++; + } + return; + default: + bpf_warn_invalid_xdp_action(act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(skb->dev, rcpu->prog, act); + fallthrough; + case XDP_DROP: + skb_list_del_init(skb); + kfree_skb(skb); + stats->drop++; + return; + } + } +} + static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, void **frames, int n, struct xdp_cpumap_stats *stats) @@ -176,11 +217,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, struct xdp_buff xdp; int i, nframes = 0; - if (!rcpu->prog) - return n; - - rcu_read_lock_bh(); - xdp_set_return_frame_no_direct(); xdp.rxq = &rxq; @@ -227,17 +263,37 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, } } + xdp_clear_return_frame_no_direct(); + + return nframes; +} + +#define CPUMAP_BATCH 8 + +static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, + int xdp_n, struct xdp_cpumap_stats *stats, + struct list_head *list) +{ + int nframes; + + if (!rcpu->prog) + return xdp_n; + + rcu_read_lock_bh(); + + nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, xdp_n, stats); + if (stats->redirect) - xdp_do_flush_map(); + xdp_do_flush(); - xdp_clear_return_frame_no_direct(); + if (unlikely(!list_empty(list))) + cpu_map_bpf_prog_run_skb(rcpu, list, stats); rcu_read_unlock_bh(); /* resched point, may call do_softirq() */ return nframes; } -#define CPUMAP_BATCH 8 static int cpu_map_kthread_run(void *data) { @@ -254,9 +310,9 @@ static int cpu_map_kthread_run(void *data) struct xdp_cpumap_stats stats = {}; /* zero stats */ unsigned int kmem_alloc_drops = 0, sched = 0; gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; + int i, n, m, nframes, xdp_n; void *frames[CPUMAP_BATCH]; void *skbs[CPUMAP_BATCH]; - int i, n, m, nframes; LIST_HEAD(list); /* Release CPU reschedule checks */ @@ -280,9 +336,20 @@ static int cpu_map_kthread_run(void *data) */ n = __ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH); - for (i = 0; i < n; i++) { + for (i = 0, xdp_n = 0; i < n; i++) { void *f = frames[i]; - struct page *page = virt_to_page(f); + struct page *page; + + if (unlikely(__ptr_test_bit(0, &f))) { + struct sk_buff *skb = f; + + __ptr_clear_bit(0, &skb); + list_add_tail(&skb->list, &list); + continue; + } + + frames[xdp_n++] = f; + page = virt_to_page(f); /* Bring struct page memory area to curr CPU. Read by * build_skb_around via page_is_pfmemalloc(), and when @@ -292,7 +359,7 @@ static int cpu_map_kthread_run(void *data) } /* Support running another XDP prog on this CPU */ - nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats); + nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list); if (nframes) { m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs); if (unlikely(m == 0)) { @@ -330,12 +397,6 @@ static int cpu_map_kthread_run(void *data) return 0; } -bool cpu_map_prog_allowed(struct bpf_map *map) -{ - return map->map_type == BPF_MAP_TYPE_CPUMAP && - map->value_size != offsetofend(struct bpf_cpumap_val, qsize); -} - static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd) { struct bpf_prog *prog; @@ -701,6 +762,25 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, return 0; } +int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, + struct sk_buff *skb) +{ + int ret; + + __skb_pull(skb, skb->mac_len); + skb_set_redirected(skb, false); + __ptr_set_bit(0, &skb); + + ret = ptr_ring_produce(rcpu->queue, skb); + if (ret < 0) + goto trace; + + wake_up_process(rcpu->kthread); +trace: + trace_xdp_cpumap_enqueue(rcpu->map_id, !ret, !!ret, rcpu->cpu); + return ret; +} + void __cpu_map_flush(void) { struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index fdc20892837c..542e94fa30b4 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -322,16 +322,6 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key, return -ENOENT; } -bool dev_map_can_have_prog(struct bpf_map *map) -{ - if ((map->map_type == BPF_MAP_TYPE_DEVMAP || - map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) && - map->value_size != offsetofend(struct bpf_devmap_val, ifindex)) - return true; - - return false; -} - static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog, struct xdp_frame **frames, int n, struct net_device *dev) @@ -499,6 +489,37 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, return 0; } +static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev *dst) +{ + struct xdp_txq_info txq = { .dev = dst->dev }; + struct xdp_buff xdp; + u32 act; + + if (!dst->xdp_prog) + return XDP_PASS; + + __skb_pull(skb, skb->mac_len); + xdp.txq = &txq; + + act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog); + switch (act) { + case XDP_PASS: + __skb_push(skb, skb->mac_len); + break; + default: + bpf_warn_invalid_xdp_action(act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dst->dev, dst->xdp_prog, act); + fallthrough; + case XDP_DROP: + kfree_skb(skb); + break; + } + + return act; +} + int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, struct net_device *dev_rx) { @@ -615,6 +636,14 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, err = xdp_ok_fwd_dev(dst->dev, skb->len); if (unlikely(err)) return err; + + /* Redirect has already succeeded semantically at this point, so we just + * return 0 even if packet is dropped. Helper below takes care of + * freeing skb. + */ + if (dev_map_bpf_prog_run_skb(skb, dst) != XDP_PASS) + return 0; + skb->dev = dst->dev; generic_xdp_tx(skb, xdp_prog); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 72c58cc516a3..6dc3fae46a56 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -228,6 +228,32 @@ static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size); } +static bool htab_has_extra_elems(struct bpf_htab *htab) +{ + return !htab_is_percpu(htab) && !htab_is_lru(htab); +} + +static void htab_free_prealloced_timers(struct bpf_htab *htab) +{ + u32 num_entries = htab->map.max_entries; + int i; + + if (likely(!map_value_has_timer(&htab->map))) + return; + if (htab_has_extra_elems(htab)) + num_entries += num_possible_cpus(); + + for (i = 0; i < num_entries; i++) { + struct htab_elem *elem; + + elem = get_htab_elem(htab, i); + bpf_timer_cancel_and_free(elem->key + + round_up(htab->map.key_size, 8) + + htab->map.timer_off); + cond_resched(); + } +} + static void htab_free_elems(struct bpf_htab *htab) { int i; @@ -265,8 +291,12 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, struct htab_elem *l; if (node) { + u32 key_size = htab->map.key_size; + l = container_of(node, struct htab_elem, lru_node); - memcpy(l->key, key, htab->map.key_size); + memcpy(l->key, key, key_size); + check_and_init_map_value(&htab->map, + l->key + round_up(key_size, 8)); return l; } @@ -278,7 +308,7 @@ static int prealloc_init(struct bpf_htab *htab) u32 num_entries = htab->map.max_entries; int err = -ENOMEM, i; - if (!htab_is_percpu(htab) && !htab_is_lru(htab)) + if (htab_has_extra_elems(htab)) num_entries += num_possible_cpus(); htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries, @@ -695,6 +725,14 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map, return insn - insn_buf; } +static void check_and_free_timer(struct bpf_htab *htab, struct htab_elem *elem) +{ + if (unlikely(map_value_has_timer(&htab->map))) + bpf_timer_cancel_and_free(elem->key + + round_up(htab->map.key_size, 8) + + htab->map.timer_off); +} + /* It is called from the bpf_lru_list when the LRU needs to delete * older elements from the htab. */ @@ -719,6 +757,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) if (l == tgt_l) { hlist_nulls_del_rcu(&l->hash_node); + check_and_free_timer(htab, l); break; } @@ -790,6 +829,7 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) { if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); + check_and_free_timer(htab, l); kfree(l); } @@ -817,6 +857,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) htab_put_fd_value(htab, l); if (htab_is_prealloc(htab)) { + check_and_free_timer(htab, l); __pcpu_freelist_push(&htab->freelist, &l->fnode); } else { atomic_dec(&htab->count); @@ -920,8 +961,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, l_new = ERR_PTR(-ENOMEM); goto dec_count; } - check_and_init_map_lock(&htab->map, - l_new->key + round_up(key_size, 8)); + check_and_init_map_value(&htab->map, + l_new->key + round_up(key_size, 8)); } memcpy(l_new->key, key, key_size); @@ -1062,6 +1103,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, hlist_nulls_del_rcu(&l_old->hash_node); if (!htab_is_prealloc(htab)) free_htab_elem(htab, l_old); + else + check_and_free_timer(htab, l_old); } ret = 0; err: @@ -1069,6 +1112,12 @@ err: return ret; } +static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) +{ + check_and_free_timer(htab, elem); + bpf_lru_push_free(&htab->lru, &elem->lru_node); +} + static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { @@ -1102,7 +1151,8 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, l_new = prealloc_lru_pop(htab, key, hash); if (!l_new) return -ENOMEM; - memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size); + copy_map_value(&htab->map, + l_new->key + round_up(map->key_size, 8), value); ret = htab_lock_bucket(htab, b, hash, &flags); if (ret) @@ -1128,9 +1178,9 @@ err: htab_unlock_bucket(htab, b, hash, flags); if (ret) - bpf_lru_push_free(&htab->lru, &l_new->lru_node); + htab_lru_push_free(htab, l_new); else if (l_old) - bpf_lru_push_free(&htab->lru, &l_old->lru_node); + htab_lru_push_free(htab, l_old); return ret; } @@ -1339,7 +1389,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) htab_unlock_bucket(htab, b, hash, flags); if (l) - bpf_lru_push_free(&htab->lru, &l->lru_node); + htab_lru_push_free(htab, l); return ret; } @@ -1359,6 +1409,35 @@ static void delete_all_elements(struct bpf_htab *htab) } } +static void htab_free_malloced_timers(struct bpf_htab *htab) +{ + int i; + + rcu_read_lock(); + for (i = 0; i < htab->n_buckets; i++) { + struct hlist_nulls_head *head = select_bucket(htab, i); + struct hlist_nulls_node *n; + struct htab_elem *l; + + hlist_nulls_for_each_entry(l, n, head, hash_node) + check_and_free_timer(htab, l); + cond_resched_rcu(); + } + rcu_read_unlock(); +} + +static void htab_map_free_timers(struct bpf_map *map) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + + if (likely(!map_value_has_timer(&htab->map))) + return; + if (!htab_is_prealloc(htab)) + htab_free_malloced_timers(htab); + else + htab_free_prealloced_timers(htab); +} + /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ static void htab_map_free(struct bpf_map *map) { @@ -1456,7 +1535,7 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, else copy_map_value(map, value, l->key + roundup_key_size); - check_and_init_map_lock(map, value); + check_and_init_map_value(map, value); } hlist_nulls_del_rcu(&l->hash_node); @@ -1467,7 +1546,7 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, htab_unlock_bucket(htab, b, hash, bflags); if (is_lru_map && l) - bpf_lru_push_free(&htab->lru, &l->lru_node); + htab_lru_push_free(htab, l); return ret; } @@ -1645,7 +1724,7 @@ again_nocopy: true); else copy_map_value(map, dst_val, value); - check_and_init_map_lock(map, dst_val); + check_and_init_map_value(map, dst_val); } if (do_delete) { hlist_nulls_del_rcu(&l->hash_node); @@ -1672,7 +1751,7 @@ again_nocopy: while (node_to_free) { l = node_to_free; node_to_free = node_to_free->batch_flink; - bpf_lru_push_free(&htab->lru, &l->lru_node); + htab_lru_push_free(htab, l); } next_batch: @@ -2034,6 +2113,7 @@ const struct bpf_map_ops htab_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, + .map_release_uref = htab_map_free_timers, .map_lookup_elem = htab_map_lookup_elem, .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem, .map_update_elem = htab_map_update_elem, @@ -2055,6 +2135,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, + .map_release_uref = htab_map_free_timers, .map_lookup_elem = htab_lru_map_lookup_elem, .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem, .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 62cf00383910..9fe846ec6bd1 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -289,13 +289,18 @@ static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) static DEFINE_PER_CPU(unsigned long, irqsave_flags); -notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) +static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock) { unsigned long flags; local_irq_save(flags); __bpf_spin_lock(lock); __this_cpu_write(irqsave_flags, flags); +} + +notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) +{ + __bpf_spin_lock_irqsave(lock); return 0; } @@ -306,13 +311,18 @@ const struct bpf_func_proto bpf_spin_lock_proto = { .arg1_type = ARG_PTR_TO_SPIN_LOCK, }; -notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) +static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock) { unsigned long flags; flags = __this_cpu_read(irqsave_flags); __bpf_spin_unlock(lock); local_irq_restore(flags); +} + +notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) +{ + __bpf_spin_unlock_irqrestore(lock); return 0; } @@ -333,9 +343,9 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, else lock = dst + map->spin_lock_off; preempt_disable(); - ____bpf_spin_lock(lock); + __bpf_spin_lock_irqsave(lock); copy_map_value(map, dst, src); - ____bpf_spin_unlock(lock); + __bpf_spin_unlock_irqrestore(lock); preempt_enable(); } @@ -989,6 +999,320 @@ const struct bpf_func_proto bpf_snprintf_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; +/* BPF map elements can contain 'struct bpf_timer'. + * Such map owns all of its BPF timers. + * 'struct bpf_timer' is allocated as part of map element allocation + * and it's zero initialized. + * That space is used to keep 'struct bpf_timer_kern'. + * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and + * remembers 'struct bpf_map *' pointer it's part of. + * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn. + * bpf_timer_start() arms the timer. + * If user space reference to a map goes to zero at this point + * ops->map_release_uref callback is responsible for cancelling the timers, + * freeing their memory, and decrementing prog's refcnts. + * bpf_timer_cancel() cancels the timer and decrements prog's refcnt. + * Inner maps can contain bpf timers as well. ops->map_release_uref is + * freeing the timers when inner map is replaced or deleted by user space. + */ +struct bpf_hrtimer { + struct hrtimer timer; + struct bpf_map *map; + struct bpf_prog *prog; + void __rcu *callback_fn; + void *value; +}; + +/* the actual struct hidden inside uapi struct bpf_timer */ +struct bpf_timer_kern { + struct bpf_hrtimer *timer; + /* bpf_spin_lock is used here instead of spinlock_t to make + * sure that it always fits into space resereved by struct bpf_timer + * regardless of LOCKDEP and spinlock debug flags. + */ + struct bpf_spin_lock lock; +} __attribute__((aligned(8))); + +static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running); + +static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer) +{ + struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer); + struct bpf_map *map = t->map; + void *value = t->value; + void *callback_fn; + void *key; + u32 idx; + + callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held()); + if (!callback_fn) + goto out; + + /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and + * cannot be preempted by another bpf_timer_cb() on the same cpu. + * Remember the timer this callback is servicing to prevent + * deadlock if callback_fn() calls bpf_timer_cancel() or + * bpf_map_delete_elem() on the same timer. + */ + this_cpu_write(hrtimer_running, t); + if (map->map_type == BPF_MAP_TYPE_ARRAY) { + struct bpf_array *array = container_of(map, struct bpf_array, map); + + /* compute the key */ + idx = ((char *)value - array->value) / array->elem_size; + key = &idx; + } else { /* hash or lru */ + key = value - round_up(map->key_size, 8); + } + + BPF_CAST_CALL(callback_fn)((u64)(long)map, (u64)(long)key, + (u64)(long)value, 0, 0); + /* The verifier checked that return value is zero. */ + + this_cpu_write(hrtimer_running, NULL); +out: + return HRTIMER_NORESTART; +} + +BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map, + u64, flags) +{ + clockid_t clockid = flags & (MAX_CLOCKS - 1); + struct bpf_hrtimer *t; + int ret = 0; + + BUILD_BUG_ON(MAX_CLOCKS != 16); + BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer)); + BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer)); + + if (in_nmi()) + return -EOPNOTSUPP; + + if (flags >= MAX_CLOCKS || + /* similar to timerfd except _ALARM variants are not supported */ + (clockid != CLOCK_MONOTONIC && + clockid != CLOCK_REALTIME && + clockid != CLOCK_BOOTTIME)) + return -EINVAL; + __bpf_spin_lock_irqsave(&timer->lock); + t = timer->timer; + if (t) { + ret = -EBUSY; + goto out; + } + if (!atomic64_read(&map->usercnt)) { + /* maps with timers must be either held by user space + * or pinned in bpffs. + */ + ret = -EPERM; + goto out; + } + /* allocate hrtimer via map_kmalloc to use memcg accounting */ + t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node); + if (!t) { + ret = -ENOMEM; + goto out; + } + t->value = (void *)timer - map->timer_off; + t->map = map; + t->prog = NULL; + rcu_assign_pointer(t->callback_fn, NULL); + hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); + t->timer.function = bpf_timer_cb; + timer->timer = t; +out: + __bpf_spin_unlock_irqrestore(&timer->lock); + return ret; +} + +static const struct bpf_func_proto bpf_timer_init_proto = { + .func = bpf_timer_init, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_TIMER, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; + +BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn, + struct bpf_prog_aux *, aux) +{ + struct bpf_prog *prev, *prog = aux->prog; + struct bpf_hrtimer *t; + int ret = 0; + + if (in_nmi()) + return -EOPNOTSUPP; + __bpf_spin_lock_irqsave(&timer->lock); + t = timer->timer; + if (!t) { + ret = -EINVAL; + goto out; + } + if (!atomic64_read(&t->map->usercnt)) { + /* maps with timers must be either held by user space + * or pinned in bpffs. Otherwise timer might still be + * running even when bpf prog is detached and user space + * is gone, since map_release_uref won't ever be called. + */ + ret = -EPERM; + goto out; + } + prev = t->prog; + if (prev != prog) { + /* Bump prog refcnt once. Every bpf_timer_set_callback() + * can pick different callback_fn-s within the same prog. + */ + prog = bpf_prog_inc_not_zero(prog); + if (IS_ERR(prog)) { + ret = PTR_ERR(prog); + goto out; + } + if (prev) + /* Drop prev prog refcnt when swapping with new prog */ + bpf_prog_put(prev); + t->prog = prog; + } + rcu_assign_pointer(t->callback_fn, callback_fn); +out: + __bpf_spin_unlock_irqrestore(&timer->lock); + return ret; +} + +static const struct bpf_func_proto bpf_timer_set_callback_proto = { + .func = bpf_timer_set_callback, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_TIMER, + .arg2_type = ARG_PTR_TO_FUNC, +}; + +BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags) +{ + struct bpf_hrtimer *t; + int ret = 0; + + if (in_nmi()) + return -EOPNOTSUPP; + if (flags) + return -EINVAL; + __bpf_spin_lock_irqsave(&timer->lock); + t = timer->timer; + if (!t || !t->prog) { + ret = -EINVAL; + goto out; + } + hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT); +out: + __bpf_spin_unlock_irqrestore(&timer->lock); + return ret; +} + +static const struct bpf_func_proto bpf_timer_start_proto = { + .func = bpf_timer_start, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_TIMER, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + +static void drop_prog_refcnt(struct bpf_hrtimer *t) +{ + struct bpf_prog *prog = t->prog; + + if (prog) { + bpf_prog_put(prog); + t->prog = NULL; + rcu_assign_pointer(t->callback_fn, NULL); + } +} + +BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) +{ + struct bpf_hrtimer *t; + int ret = 0; + + if (in_nmi()) + return -EOPNOTSUPP; + __bpf_spin_lock_irqsave(&timer->lock); + t = timer->timer; + if (!t) { + ret = -EINVAL; + goto out; + } + if (this_cpu_read(hrtimer_running) == t) { + /* If bpf callback_fn is trying to bpf_timer_cancel() + * its own timer the hrtimer_cancel() will deadlock + * since it waits for callback_fn to finish + */ + ret = -EDEADLK; + goto out; + } + drop_prog_refcnt(t); +out: + __bpf_spin_unlock_irqrestore(&timer->lock); + /* Cancel the timer and wait for associated callback to finish + * if it was running. + */ + ret = ret ?: hrtimer_cancel(&t->timer); + return ret; +} + +static const struct bpf_func_proto bpf_timer_cancel_proto = { + .func = bpf_timer_cancel, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_TIMER, +}; + +/* This function is called by map_delete/update_elem for individual element and + * by ops->map_release_uref when the user space reference to a map reaches zero. + */ +void bpf_timer_cancel_and_free(void *val) +{ + struct bpf_timer_kern *timer = val; + struct bpf_hrtimer *t; + + /* Performance optimization: read timer->timer without lock first. */ + if (!READ_ONCE(timer->timer)) + return; + + __bpf_spin_lock_irqsave(&timer->lock); + /* re-read it under lock */ + t = timer->timer; + if (!t) + goto out; + drop_prog_refcnt(t); + /* The subsequent bpf_timer_start/cancel() helpers won't be able to use + * this timer, since it won't be initialized. + */ + timer->timer = NULL; +out: + __bpf_spin_unlock_irqrestore(&timer->lock); + if (!t) + return; + /* Cancel the timer and wait for callback to complete if it was running. + * If hrtimer_cancel() can be safely called it's safe to call kfree(t) + * right after for both preallocated and non-preallocated maps. + * The timer->timer = NULL was already done and no code path can + * see address 't' anymore. + * + * Check that bpf_map_delete/update_elem() wasn't called from timer + * callback_fn. In such case don't call hrtimer_cancel() (since it will + * deadlock) and don't call hrtimer_try_to_cancel() (since it will just + * return -1). Though callback_fn is still running on this cpu it's + * safe to do kfree(t) because bpf_timer_cb() read everything it needed + * from 't'. The bpf subprog callback_fn won't be able to access 't', + * since timer->timer = NULL was already done. The timer will be + * effectively cancelled because bpf_timer_cb() will return + * HRTIMER_NORESTART. + */ + if (this_cpu_read(hrtimer_running) != t) + hrtimer_cancel(&t->timer); + kfree(t); +} + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; @@ -1055,6 +1379,14 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_per_cpu_ptr_proto; case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; + case BPF_FUNC_timer_init: + return &bpf_timer_init_proto; + case BPF_FUNC_timer_set_callback: + return &bpf_timer_set_callback_proto; + case BPF_FUNC_timer_start: + return &bpf_timer_start_proto; + case BPF_FUNC_timer_cancel: + return &bpf_timer_cancel_proto; default: break; } diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index bd11db9774c3..95d70a08325d 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -173,7 +173,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key, return -ENOMEM; memcpy(&new->data[0], value, map->value_size); - check_and_init_map_lock(map, new->data); + check_and_init_map_value(map, new->data); new = xchg(&storage->buf, new); kfree_rcu(new, rcu); @@ -509,7 +509,7 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, map->numa_node); if (!storage->buf) goto enomem; - check_and_init_map_lock(map, storage->buf->data); + check_and_init_map_value(map, storage->buf->data); } else { storage->percpu_buf = bpf_map_alloc_percpu(map, size, 8, gfp); if (!storage->percpu_buf) diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 39ab0b68cade..5cd8f5277279 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -3,6 +3,7 @@ */ #include <linux/slab.h> #include <linux/bpf.h> +#include <linux/btf.h> #include "map_in_map.h" @@ -50,6 +51,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->map_flags = inner_map->map_flags; inner_map_meta->max_entries = inner_map->max_entries; inner_map_meta->spin_lock_off = inner_map->spin_lock_off; + inner_map_meta->timer_off = inner_map->timer_off; + if (inner_map->btf) { + btf_get(inner_map->btf); + inner_map_meta->btf = inner_map->btf; + } /* Misc members not needed in bpf_map_meta_equal() check. */ inner_map_meta->ops = inner_map->ops; @@ -65,6 +71,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) void bpf_map_meta_free(struct bpf_map *map_meta) { + btf_put(map_meta->btf); kfree(map_meta); } @@ -75,6 +82,7 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0, return meta0->map_type == meta1->map_type && meta0->key_size == meta1->key_size && meta0->value_size == meta1->value_size && + meta0->timer_off == meta1->timer_off && meta0->map_flags == meta1->map_flags; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e343f158e556..9a2068e39d23 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -260,8 +260,8 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, copy_map_value_locked(map, value, ptr, true); else copy_map_value(map, value, ptr); - /* mask lock, since value wasn't zero inited */ - check_and_init_map_lock(map, value); + /* mask lock and timer, since value wasn't zero inited */ + check_and_init_map_value(map, value); } rcu_read_unlock(); } @@ -623,7 +623,8 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) struct bpf_map *map = filp->private_data; int err; - if (!map->ops->map_mmap || map_value_has_spin_lock(map)) + if (!map->ops->map_mmap || map_value_has_spin_lock(map) || + map_value_has_timer(map)) return -ENOTSUPP; if (!(vma->vm_flags & VM_SHARED)) @@ -793,6 +794,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, } } + map->timer_off = btf_find_timer(btf, value_type); + if (map_value_has_timer(map)) { + if (map->map_flags & BPF_F_RDONLY_PROG) + return -EACCES; + if (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_LRU_HASH && + map->map_type != BPF_MAP_TYPE_ARRAY) + return -EOPNOTSUPP; + } + if (map->ops->map_check_btf) ret = map->ops->map_check_btf(map, btf, key_type, value_type); @@ -844,6 +855,7 @@ static int map_create(union bpf_attr *attr) mutex_init(&map->freeze_mutex); map->spin_lock_off = -EINVAL; + map->timer_off = -EINVAL; if (attr->btf_key_type_id || attr->btf_value_type_id || /* Even the map's value is a kernel's struct, * the bpf_prog.o must have BTF to begin with @@ -1591,7 +1603,8 @@ static int map_freeze(const union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); - if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { + if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || + map_value_has_timer(map)) { fdput(f); return -ENOTSUPP; } @@ -1699,6 +1712,8 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog) void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) { + unsigned long flags; + /* cBPF to eBPF migrations are currently not in the idr store. * Offloaded programs are removed from the store when their device * disappears - even if someone grabs an fd to them they are unusable, @@ -1708,7 +1723,7 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) return; if (do_idr_lock) - spin_lock_bh(&prog_idr_lock); + spin_lock_irqsave(&prog_idr_lock, flags); else __acquire(&prog_idr_lock); @@ -1716,7 +1731,7 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) prog->aux->id = 0; if (do_idr_lock) - spin_unlock_bh(&prog_idr_lock); + spin_unlock_irqrestore(&prog_idr_lock, flags); else __release(&prog_idr_lock); } @@ -1752,14 +1767,32 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) } } +static void bpf_prog_put_deferred(struct work_struct *work) +{ + struct bpf_prog_aux *aux; + struct bpf_prog *prog; + + aux = container_of(work, struct bpf_prog_aux, work); + prog = aux->prog; + perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); + bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); + __bpf_prog_put_noref(prog, true); +} + static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) { - if (atomic64_dec_and_test(&prog->aux->refcnt)) { - perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); - bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); + struct bpf_prog_aux *aux = prog->aux; + + if (atomic64_dec_and_test(&aux->refcnt)) { /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); - __bpf_prog_put_noref(prog, true); + + if (in_irq() || irqs_disabled()) { + INIT_WORK(&aux->work, bpf_prog_put_deferred); + schedule_work(&aux->work); + } else { + bpf_prog_put_deferred(&aux->work); + } } } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 28a3630c48ee..b2535acfe9db 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -172,7 +172,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) } static struct bpf_tramp_progs * -bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total) +bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg) { const struct bpf_prog_aux *aux; struct bpf_tramp_progs *tprogs; @@ -189,8 +189,10 @@ bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total) *total += tr->progs_cnt[kind]; progs = tprogs[kind].progs; - hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) + hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) { + *ip_arg |= aux->prog->call_get_func_ip; *progs++ = aux->prog; + } } return tprogs; } @@ -333,9 +335,10 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) struct bpf_tramp_image *im; struct bpf_tramp_progs *tprogs; u32 flags = BPF_TRAMP_F_RESTORE_REGS; + bool ip_arg = false; int err, total; - tprogs = bpf_trampoline_get_progs(tr, &total); + tprogs = bpf_trampoline_get_progs(tr, &total, &ip_arg); if (IS_ERR(tprogs)) return PTR_ERR(tprogs); @@ -357,6 +360,9 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; + if (ip_arg) + flags |= BPF_TRAMP_F_IP_ARG; + err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE, &tr->func.model, flags, tprogs, tr->func.addr); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 657062cb4d85..8e707da8f643 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -255,6 +255,7 @@ struct bpf_call_arg_meta { int mem_size; u64 msize_max_value; int ref_obj_id; + int map_uid; int func_id; struct btf *btf; u32 btf_id; @@ -734,6 +735,10 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (state->refs[i].id) verbose(env, ",%d", state->refs[i].id); } + if (state->in_callback_fn) + verbose(env, " cb"); + if (state->in_async_callback_fn) + verbose(env, " async_cb"); verbose(env, "\n"); } @@ -1135,6 +1140,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) if (map->inner_map_meta) { reg->type = CONST_PTR_TO_MAP; reg->map_ptr = map->inner_map_meta; + /* transfer reg's id which is unique for every map_lookup_elem + * as UID of the inner map. + */ + reg->map_uid = reg->id; } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { reg->type = PTR_TO_XDP_SOCK; } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || @@ -1522,6 +1531,54 @@ static void init_func_state(struct bpf_verifier_env *env, init_reg_state(env, state); } +/* Similar to push_stack(), but for async callbacks */ +static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx, + int subprog) +{ + struct bpf_verifier_stack_elem *elem; + struct bpf_func_state *frame; + + elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); + if (!elem) + goto err; + + elem->insn_idx = insn_idx; + elem->prev_insn_idx = prev_insn_idx; + elem->next = env->head; + elem->log_pos = env->log.len_used; + env->head = elem; + env->stack_size++; + if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { + verbose(env, + "The sequence of %d jumps is too complex for async cb.\n", + env->stack_size); + goto err; + } + /* Unlike push_stack() do not copy_verifier_state(). + * The caller state doesn't matter. + * This is async callback. It starts in a fresh stack. + * Initialize it similar to do_check_common(). + */ + elem->st.branches = 1; + frame = kzalloc(sizeof(*frame), GFP_KERNEL); + if (!frame) + goto err; + init_func_state(env, frame, + BPF_MAIN_FUNC /* callsite */, + 0 /* frameno within this callchain */, + subprog /* subprog number within this prog */); + elem->st.frame[0] = frame; + return &elem->st; +err: + free_verifier_state(env->cur_state, true); + env->cur_state = NULL; + /* pop all elements and return */ + while (!pop_stack(env, NULL, NULL, false)); + return NULL; +} + + enum reg_arg_type { SRC_OP, /* register is used as source operand */ DST_OP, /* register is used as destination operand */ @@ -3241,6 +3298,15 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, return -EACCES; } } + if (map_value_has_timer(map)) { + u32 t = map->timer_off; + + if (reg->smin_value + off < t + sizeof(struct bpf_timer) && + t < reg->umax_value + off + size) { + verbose(env, "bpf_timer cannot be accessed directly by load/store\n"); + return -EACCES; + } + } return err; } @@ -3643,6 +3709,8 @@ process_func: continue_func: subprog_end = subprog[idx + 1].start; for (; i < subprog_end; i++) { + int next_insn; + if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) continue; /* remember insn and function to return to */ @@ -3650,13 +3718,22 @@ continue_func: ret_prog[frame] = idx; /* find the callee */ - i = i + insn[i].imm + 1; - idx = find_subprog(env, i); + next_insn = i + insn[i].imm + 1; + idx = find_subprog(env, next_insn); if (idx < 0) { WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", - i); + next_insn); return -EFAULT; } + if (subprog[idx].is_async_cb) { + if (subprog[idx].has_tail_call) { + verbose(env, "verifier bug. subprog has tail_call and async cb\n"); + return -EFAULT; + } + /* async callbacks don't increase bpf prog stack size */ + continue; + } + i = next_insn; if (subprog[idx].has_tail_call) tail_call_reachable = true; @@ -4658,6 +4735,54 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, return 0; } +static int process_timer_func(struct bpf_verifier_env *env, int regno, + struct bpf_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + bool is_const = tnum_is_const(reg->var_off); + struct bpf_map *map = reg->map_ptr; + u64 val = reg->var_off.value; + + if (!is_const) { + verbose(env, + "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n", + regno); + return -EINVAL; + } + if (!map->btf) { + verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n", + map->name); + return -EINVAL; + } + if (!map_value_has_timer(map)) { + if (map->timer_off == -E2BIG) + verbose(env, + "map '%s' has more than one 'struct bpf_timer'\n", + map->name); + else if (map->timer_off == -ENOENT) + verbose(env, + "map '%s' doesn't have 'struct bpf_timer'\n", + map->name); + else + verbose(env, + "map '%s' is not a struct type or bpf_timer is mangled\n", + map->name); + return -EINVAL; + } + if (map->timer_off != val + reg->off) { + verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n", + val + reg->off, map->timer_off); + return -EINVAL; + } + if (meta->map_ptr) { + verbose(env, "verifier bug. Two map pointers in a timer helper\n"); + return -EFAULT; + } + meta->map_uid = reg->map_uid; + meta->map_ptr = map; + return 0; +} + static bool arg_type_is_mem_ptr(enum bpf_arg_type type) { return type == ARG_PTR_TO_MEM || @@ -4790,6 +4915,7 @@ static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PER static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; +static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@ -4821,6 +4947,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_FUNC] = &func_ptr_types, [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, + [ARG_PTR_TO_TIMER] = &timer_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@ -4950,7 +5077,29 @@ skip_type_check: if (arg_type == ARG_CONST_MAP_PTR) { /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ + if (meta->map_ptr) { + /* Use map_uid (which is unique id of inner map) to reject: + * inner_map1 = bpf_map_lookup_elem(outer_map, key1) + * inner_map2 = bpf_map_lookup_elem(outer_map, key2) + * if (inner_map1 && inner_map2) { + * timer = bpf_map_lookup_elem(inner_map1); + * if (timer) + * // mismatch would have been allowed + * bpf_timer_init(timer, inner_map2); + * } + * + * Comparing map_ptr is enough to distinguish normal and outer maps. + */ + if (meta->map_ptr != reg->map_ptr || + meta->map_uid != reg->map_uid) { + verbose(env, + "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n", + meta->map_uid, reg->map_uid); + return -EINVAL; + } + } meta->map_ptr = reg->map_ptr; + meta->map_uid = reg->map_uid; } else if (arg_type == ARG_PTR_TO_MAP_KEY) { /* bpf_map_xxx(..., map_ptr, ..., key) call: * check that [key, key + map->key_size) are within @@ -5002,6 +5151,9 @@ skip_type_check: verbose(env, "verifier internal error\n"); return -EFAULT; } + } else if (arg_type == ARG_PTR_TO_TIMER) { + if (process_timer_func(env, regno, meta)) + return -EACCES; } else if (arg_type == ARG_PTR_TO_FUNC) { meta->subprogno = reg->subprogno; } else if (arg_type_is_mem_ptr(arg_type)) { @@ -5617,6 +5769,31 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn } } + if (insn->code == (BPF_JMP | BPF_CALL) && + insn->imm == BPF_FUNC_timer_set_callback) { + struct bpf_verifier_state *async_cb; + + /* there is no real recursion here. timer callbacks are async */ + env->subprog_info[subprog].is_async_cb = true; + async_cb = push_async_cb(env, env->subprog_info[subprog].start, + *insn_idx, subprog); + if (!async_cb) + return -EFAULT; + callee = async_cb->frame[0]; + callee->async_entry_cnt = caller->async_entry_cnt + 1; + + /* Convert bpf_timer_set_callback() args into timer callback args */ + err = set_callee_state_cb(env, caller, callee, *insn_idx); + if (err) + return err; + + clear_caller_saved_regs(env, caller->regs); + mark_reg_unknown(env, caller->regs, BPF_REG_0); + caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; + /* continue with next insn after call */ + return 0; + } + callee = kzalloc(sizeof(*callee), GFP_KERNEL); if (!callee) return -ENOMEM; @@ -5744,6 +5921,35 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env, return 0; } +static int set_timer_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr; + + /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn); + * callback_fn(struct bpf_map *map, void *key, void *value); + */ + callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP; + __mark_reg_known_zero(&callee->regs[BPF_REG_1]); + callee->regs[BPF_REG_1].map_ptr = map_ptr; + + callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY; + __mark_reg_known_zero(&callee->regs[BPF_REG_2]); + callee->regs[BPF_REG_2].map_ptr = map_ptr; + + callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE; + __mark_reg_known_zero(&callee->regs[BPF_REG_3]); + callee->regs[BPF_REG_3].map_ptr = map_ptr; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + callee->in_async_callback_fn = true; + return 0; +} + static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) { struct bpf_verifier_state *state = env->cur_state; @@ -5957,6 +6163,29 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, return err; } +static int check_get_func_ip(struct bpf_verifier_env *env) +{ + enum bpf_attach_type eatype = env->prog->expected_attach_type; + enum bpf_prog_type type = resolve_prog_type(env->prog); + int func_id = BPF_FUNC_get_func_ip; + + if (type == BPF_PROG_TYPE_TRACING) { + if (eatype != BPF_TRACE_FENTRY && eatype != BPF_TRACE_FEXIT && + eatype != BPF_MODIFY_RETURN) { + verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n", + func_id_name(func_id), func_id); + return -ENOTSUPP; + } + return 0; + } else if (type == BPF_PROG_TYPE_KPROBE) { + return 0; + } + + verbose(env, "func %s#%d not supported for program type %d\n", + func_id_name(func_id), func_id, type); + return -ENOTSUPP; +} + static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { @@ -6071,6 +6300,13 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EINVAL; } + if (func_id == BPF_FUNC_timer_set_callback) { + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_timer_callback_state); + if (err < 0) + return -EINVAL; + } + if (func_id == BPF_FUNC_snprintf) { err = check_bpf_snprintf_call(env, regs); if (err < 0) @@ -6106,6 +6342,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EINVAL; } regs[BPF_REG_0].map_ptr = meta.map_ptr; + regs[BPF_REG_0].map_uid = meta.map_uid; if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; if (map_value_has_spin_lock(meta.map_ptr)) @@ -6227,6 +6464,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack) env->prog->call_get_stack = true; + if (func_id == BPF_FUNC_get_func_ip) { + if (check_get_func_ip(env)) + return -ENOTSUPP; + env->prog->call_get_func_ip = true; + } + if (changes_data) clear_all_pkt_pointers(env); return 0; @@ -9107,7 +9350,8 @@ static int check_return_code(struct bpf_verifier_env *env) struct tnum range = tnum_range(0, 1); enum bpf_prog_type prog_type = resolve_prog_type(env->prog); int err; - const bool is_subprog = env->cur_state->frame[0]->subprogno; + struct bpf_func_state *frame = env->cur_state->frame[0]; + const bool is_subprog = frame->subprogno; /* LSM and struct_ops func-ptr's return type could be "void" */ if (!is_subprog && @@ -9132,6 +9376,22 @@ static int check_return_code(struct bpf_verifier_env *env) } reg = cur_regs(env) + BPF_REG_0; + + if (frame->in_async_callback_fn) { + /* enforce return zero from async callbacks like timer */ + if (reg->type != SCALAR_VALUE) { + verbose(env, "In async callback the register R0 is not a known value (%s)\n", + reg_type_str[reg->type]); + return -EINVAL; + } + + if (!tnum_in(tnum_const(0), reg->var_off)) { + verbose_invalid_scalar(env, reg, &range, "async callback", "R0"); + return -EINVAL; + } + return 0; + } + if (is_subprog) { if (reg->type != SCALAR_VALUE) { verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", @@ -9346,8 +9606,12 @@ static int visit_func_call_insn(int t, int insn_cnt, init_explored_state(env, t + 1); if (visit_callee) { init_explored_state(env, t); - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, - env, false); + ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env, + /* It's ok to allow recursion from CFG point of + * view. __check_func_call() will do the actual + * check. + */ + bpf_pseudo_func(insns + t)); } return ret; } @@ -9375,6 +9639,13 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env) return DONE_EXPLORING; case BPF_CALL: + if (insns[t].imm == BPF_FUNC_timer_set_callback) + /* Mark this call insn to trigger is_state_visited() check + * before call itself is processed by __check_func_call(). + * Otherwise new async state will be pushed for further + * exploration. + */ + init_explored_state(env, t); return visit_func_call_insn(t, insn_cnt, insns, env, insns[t].src_reg == BPF_PSEUDO_CALL); @@ -10383,9 +10654,25 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) states_cnt++; if (sl->state.insn_idx != insn_idx) goto next; + if (sl->state.branches) { - if (states_maybe_looping(&sl->state, cur) && - states_equal(env, &sl->state, cur)) { + struct bpf_func_state *frame = sl->state.frame[sl->state.curframe]; + + if (frame->in_async_callback_fn && + frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) { + /* Different async_entry_cnt means that the verifier is + * processing another entry into async callback. + * Seeing the same state is not an indication of infinite + * loop or infinite recursion. + * But finding the same state doesn't mean that it's safe + * to stop processing the current state. The previous state + * hasn't yet reached bpf_exit, since state.branches > 0. + * Checking in_async_callback_fn alone is not enough either. + * Since the verifier still needs to catch infinite loops + * inside async callbacks. + */ + } else if (states_maybe_looping(&sl->state, cur) && + states_equal(env, &sl->state, cur)) { verbose_linfo(env, insn_idx, "; "); verbose(env, "infinite loop detected at insn %d\n", insn_idx); return -EINVAL; @@ -11434,10 +11721,11 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying * [0, off) and [off, end) to new locations, so the patched range stays zero */ -static int adjust_insn_aux_data(struct bpf_verifier_env *env, - struct bpf_prog *new_prog, u32 off, u32 cnt) +static void adjust_insn_aux_data(struct bpf_verifier_env *env, + struct bpf_insn_aux_data *new_data, + struct bpf_prog *new_prog, u32 off, u32 cnt) { - struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; + struct bpf_insn_aux_data *old_data = env->insn_aux_data; struct bpf_insn *insn = new_prog->insnsi; u32 old_seen = old_data[off].seen; u32 prog_len; @@ -11450,12 +11738,9 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1); if (cnt == 1) - return 0; + return; prog_len = new_prog->len; - new_data = vzalloc(array_size(prog_len, - sizeof(struct bpf_insn_aux_data))); - if (!new_data) - return -ENOMEM; + memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); memcpy(new_data + off + cnt - 1, old_data + off, sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); @@ -11466,7 +11751,6 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, } env->insn_aux_data = new_data; vfree(old_data); - return 0; } static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) @@ -11501,6 +11785,14 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of const struct bpf_insn *patch, u32 len) { struct bpf_prog *new_prog; + struct bpf_insn_aux_data *new_data = NULL; + + if (len > 1) { + new_data = vzalloc(array_size(env->prog->len + len - 1, + sizeof(struct bpf_insn_aux_data))); + if (!new_data) + return NULL; + } new_prog = bpf_patch_insn_single(env->prog, off, patch, len); if (IS_ERR(new_prog)) { @@ -11508,10 +11800,10 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of verbose(env, "insn %d cannot be patched due to 16-bit range\n", env->insn_aux_data[off].orig_idx); + vfree(new_data); return NULL; } - if (adjust_insn_aux_data(env, new_prog, off, len)) - return NULL; + adjust_insn_aux_data(env, new_data, new_prog, off, len); adjust_subprog_starts(env, off, len); adjust_poke_descs(new_prog, off, len); return new_prog; @@ -12360,6 +12652,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; bool expect_blinding = bpf_jit_blinding_enabled(prog); + enum bpf_prog_type prog_type = resolve_prog_type(prog); struct bpf_insn *insn = prog->insnsi; const struct bpf_func_proto *fn; const int insn_cnt = prog->len; @@ -12577,6 +12870,39 @@ static int do_misc_fixups(struct bpf_verifier_env *env) continue; } + if (insn->imm == BPF_FUNC_timer_set_callback) { + /* The verifier will process callback_fn as many times as necessary + * with different maps and the register states prepared by + * set_timer_callback_state will be accurate. + * + * The following use case is valid: + * map1 is shared by prog1, prog2, prog3. + * prog1 calls bpf_timer_init for some map1 elements + * prog2 calls bpf_timer_set_callback for some map1 elements. + * Those that were not bpf_timer_init-ed will return -EINVAL. + * prog3 calls bpf_timer_start for some map1 elements. + * Those that were not both bpf_timer_init-ed and + * bpf_timer_set_callback-ed will return -EINVAL. + */ + struct bpf_insn ld_addrs[2] = { + BPF_LD_IMM64(BPF_REG_3, (long)prog->aux), + }; + + insn_buf[0] = ld_addrs[0]; + insn_buf[1] = ld_addrs[1]; + insn_buf[2] = *insn; + cnt = 3; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto patch_call_imm; + } + /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup * and other inlining handlers are currently limited to 64 bit * only. @@ -12693,6 +13019,21 @@ patch_map_ops_generic: continue; } + /* Implement bpf_get_func_ip inline. */ + if (prog_type == BPF_PROG_TYPE_TRACING && + insn->imm == BPF_FUNC_get_func_ip) { + /* Load IP address from ctx - 8 */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); + if (!new_prog) + return -ENOMEM; + + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + patch_call_imm: fn = env->ops->get_func_proto(insn->imm, env->prog); /* all functions that have prototype and verifier allowed diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b4916ef388ad..08906007306d 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -948,6 +948,33 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx) +{ + /* This helper call is inlined by verifier. */ + return ((u64 *)ctx)[-1]; +} + +static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { + .func = bpf_get_func_ip_tracing, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + +BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) +{ + struct kprobe *kp = kprobe_running(); + + return kp ? (u64) kp->addr : 0; +} + +static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = { + .func = bpf_get_func_ip_kprobe, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1058,8 +1085,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_for_each_map_elem_proto; case BPF_FUNC_snprintf: return &bpf_snprintf_proto; + case BPF_FUNC_get_func_ip: + return &bpf_get_func_ip_proto_tracing; default: - return NULL; + return bpf_base_func_proto(func_id); } } @@ -1077,6 +1106,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_override_return: return &bpf_override_return_proto; #endif + case BPF_FUNC_get_func_ip: + return &bpf_get_func_ip_proto_kprobe; default: return bpf_tracing_func_proto(func_id, prog); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ae1f5d0cb581..1bbf239b06f2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -968,7 +968,7 @@ static __always_inline bool memcg_kmem_bypass(void) return false; /* Memcg to charge can't be determined. */ - if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD)) + if (!in_task() || !current->mm || (current->flags & PF_KTHREAD)) return true; return false; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 4cdf8416869d..55275ef9a31a 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -67,7 +67,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, return 0; size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN; - array = kzalloc(size, GFP_KERNEL); + array = kzalloc(size, GFP_KERNEL_ACCOUNT); if (array == NULL) return -ENOBUFS; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 8ade5a4ceaf5..bf5736c1d458 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -666,7 +666,7 @@ static int atif_ioctl(int cmd, void __user *arg) struct rtentry rtdef; int add_route; - if (copy_from_user(&atreq, arg, sizeof(atreq))) + if (get_user_ifreq(&atreq, NULL, arg)) return -EFAULT; dev = __dev_get_by_name(&init_net, atreq.ifr_name); @@ -865,7 +865,7 @@ static int atif_ioctl(int cmd, void __user *arg) return 0; } - return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0; + return put_user_ifreq(&atreq, arg); } static int atrtr_ioctl_addrt(struct rtentry *rt) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 1cc75c811e24..b488e2779718 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -15,6 +15,7 @@ #include <linux/error-injection.h> #include <linux/smp.h> #include <linux/sock_diag.h> +#include <net/xdp.h> #define CREATE_TRACE_POINTS #include <trace/events/bpf_test_run.h> @@ -687,6 +688,64 @@ out: return ret; } +static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp) +{ + unsigned int ingress_ifindex, rx_queue_index; + struct netdev_rx_queue *rxqueue; + struct net_device *device; + + if (!xdp_md) + return 0; + + if (xdp_md->egress_ifindex != 0) + return -EINVAL; + + ingress_ifindex = xdp_md->ingress_ifindex; + rx_queue_index = xdp_md->rx_queue_index; + + if (!ingress_ifindex && rx_queue_index) + return -EINVAL; + + if (ingress_ifindex) { + device = dev_get_by_index(current->nsproxy->net_ns, + ingress_ifindex); + if (!device) + return -ENODEV; + + if (rx_queue_index >= device->real_num_rx_queues) + goto free_dev; + + rxqueue = __netif_get_rx_queue(device, rx_queue_index); + + if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq)) + goto free_dev; + + xdp->rxq = &rxqueue->xdp_rxq; + /* The device is now tracked in the xdp->rxq for later + * dev_put() + */ + } + + xdp->data = xdp->data_meta + xdp_md->data; + return 0; + +free_dev: + dev_put(device); + return -EINVAL; +} + +static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md) +{ + if (!xdp_md) + return; + + xdp_md->data = xdp->data - xdp->data_meta; + xdp_md->data_end = xdp->data_end - xdp->data_meta; + + if (xdp_md->ingress_ifindex) + dev_put(xdp->rxq->dev); +} + int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { @@ -697,38 +756,74 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, struct netdev_rx_queue *rxqueue; struct xdp_buff xdp = {}; u32 retval, duration; + struct xdp_md *ctx; u32 max_data_sz; void *data; - int ret; + int ret = -EINVAL; if (prog->expected_attach_type == BPF_XDP_DEVMAP || prog->expected_attach_type == BPF_XDP_CPUMAP) return -EINVAL; if (kattr->test.ctx_in || kattr->test.ctx_out) return -EINVAL; + ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md)); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + if (ctx) { + /* There can't be user provided data before the meta data */ + if (ctx->data_meta || ctx->data_end != size || + ctx->data > ctx->data_end || + unlikely(xdp_metalen_invalid(ctx->data))) + goto free_ctx; + /* Meta data is allocated from the headroom */ + headroom -= ctx->data; + } /* XDP have extra tailroom as (most) drivers use full page */ max_data_sz = 4096 - headroom - tailroom; data = bpf_test_init(kattr, max_data_sz, headroom, tailroom); - if (IS_ERR(data)) - return PTR_ERR(data); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + goto free_ctx; + } rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); xdp_init_buff(&xdp, headroom + max_data_sz + tailroom, &rxqueue->xdp_rxq); xdp_prepare_buff(&xdp, data, headroom, size, true); + ret = xdp_convert_md_to_buff(ctx, &xdp); + if (ret) + goto free_data; + bpf_prog_change_xdp(NULL, prog); ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); + /* We convert the xdp_buff back to an xdp_md before checking the return + * code so the reference count of any held netdevice will be decremented + * even if the test run failed. + */ + xdp_convert_buff_to_md(&xdp, ctx); if (ret) goto out; - if (xdp.data != data + headroom || xdp.data_end != xdp.data + size) - size = xdp.data_end - xdp.data; - ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration); + + if (xdp.data_meta != data + headroom || + xdp.data_end != xdp.data_meta + size) + size = xdp.data_end - xdp.data_meta; + + ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval, + duration); + if (!ret) + ret = bpf_ctx_finish(kattr, uattr, ctx, + sizeof(struct xdp_md)); + out: bpf_prog_change_xdp(prog, NULL); +free_data: kfree(data); +free_ctx: + kfree(ctx); return ret; } diff --git a/net/bridge/br.c b/net/bridge/br.c index ef743f94254d..51f2e25c4cd6 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -214,17 +214,22 @@ static struct notifier_block br_switchdev_notifier = { int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on, struct netlink_ext_ack *extack) { + int err = 0; + switch (opt) { case BR_BOOLOPT_NO_LL_LEARN: br_opt_toggle(br, BROPT_NO_LL_LEARN, on); break; + case BR_BOOLOPT_MCAST_VLAN_SNOOPING: + err = br_multicast_toggle_vlan_snooping(br, on, extack); + break; default: /* shouldn't be called with unsupported options */ WARN_ON(1); break; } - return 0; + return err; } int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt) @@ -232,6 +237,8 @@ int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt) switch (opt) { case BR_BOOLOPT_NO_LL_LEARN: return br_opt_get(br, BROPT_NO_LL_LEARN); + case BR_BOOLOPT_MCAST_VLAN_SNOOPING: + return br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED); default: /* shouldn't be called with unsupported options */ WARN_ON(1); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index e8b626cc6bfd..00daf35f54d5 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -27,11 +27,14 @@ EXPORT_SYMBOL_GPL(nf_br_ops); /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { + struct net_bridge_mcast_port *pmctx_null = NULL; struct net_bridge *br = netdev_priv(dev); + struct net_bridge_mcast *brmctx = &br->multicast_ctx; struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; const struct nf_br_ops *nf_ops; u8 state = BR_STATE_FORWARDING; + struct net_bridge_vlan *vlan; const unsigned char *dest; u16 vid = 0; @@ -53,7 +56,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); - if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, &state)) + if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, + &state, &vlan)) goto out; if (IS_ENABLED(CONFIG_INET) && @@ -82,15 +86,15 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_flood(br, skb, BR_PKT_MULTICAST, false, true); goto out; } - if (br_multicast_rcv(br, NULL, skb, vid)) { + if (br_multicast_rcv(&brmctx, &pmctx_null, vlan, skb, vid)) { kfree_skb(skb); goto out; } - mdst = br_mdb_get(br, skb, vid); + mdst = br_mdb_get(brmctx, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && - br_multicast_querier_exists(br, eth_hdr(skb), mdst)) - br_multicast_flood(mdst, skb, false, true); + br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) + br_multicast_flood(mdst, skb, brmctx, false, true); else br_flood(br, skb, BR_PKT_MULTICAST, false, true); } else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) { diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a16191dcaed1..5b345bb72078 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -760,6 +760,9 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, unsigned long action; int err = 0; + if (!nb) + return 0; + if (!netif_is_bridge_master(br_dev)) return -EINVAL; @@ -792,7 +795,6 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, return err; } -EXPORT_SYMBOL_GPL(br_fdb_replay); static void fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, int type, diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 07856362538f..ec646656dbf1 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -48,6 +48,8 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb skb_set_network_header(skb, depth); } + br_switchdev_frame_set_offload_fwd_mark(skb); + dev_queue_xmit(skb); return 0; @@ -76,6 +78,11 @@ static void __br_forward(const struct net_bridge_port *to, struct net *net; int br_hook; + /* Mark the skb for forwarding offload early so that br_handle_vlan() + * can know whether to pop the VLAN header on egress or keep it. + */ + nbp_switchdev_frame_mark_tx_fwd_offload(to, skb); + vg = nbp_vlan_group_rcu(to); skb = br_handle_vlan(to->br, to, vg, skb); if (!skb) @@ -174,6 +181,8 @@ static struct net_bridge_port *maybe_deliver( if (!should_deliver(p, skb)) return prev; + nbp_switchdev_frame_mark_tx_fwd_to_hwdom(p, skb); + if (!prev) goto out; @@ -267,20 +276,19 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, /* called with rcu_read_lock */ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, + struct net_bridge_mcast *brmctx, bool local_rcv, bool local_orig) { - struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; - struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *prev = NULL; struct net_bridge_port_group *p; bool allow_mode_include = true; struct hlist_node *rp; - rp = br_multicast_get_first_rport_node(br, skb); + rp = br_multicast_get_first_rport_node(brmctx, skb); if (mdst) { p = rcu_dereference(mdst->ports); - if (br_multicast_should_handle_mode(br, mdst->addr.proto) && + if (br_multicast_should_handle_mode(brmctx, mdst->addr.proto) && br_multicast_is_star_g(&mdst->addr)) allow_mode_include = false; } else { diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 6e4a32354a13..86f6d7e93ea8 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -643,10 +643,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, if (err) goto err5; - err = nbp_switchdev_mark_set(p); - if (err) - goto err6; - dev_disable_lro(dev); list_add_rcu(&p->list, &br->port_list); @@ -684,13 +680,13 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, */ err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack); if (err) - goto err7; + goto err6; } err = nbp_vlan_init(p, extack); if (err) { netdev_err(dev, "failed to initialize vlan filtering on this port\n"); - goto err7; + goto err6; } spin_lock_bh(&br->lock); @@ -713,13 +709,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, return 0; -err7: +err6: if (fdb_synced) br_fdb_unsync_static(br, p); list_del_rcu(&p->list); br_fdb_delete_by_port(br, p, 0, 1); nbp_update_port_count(br); -err6: netdev_upper_dev_unlink(dev, br->dev); err5: dev->priv_flags &= ~IFF_BRIDGE_PORT; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 1f506309efa8..8a0c0cc55cb4 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -69,8 +69,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb struct net_bridge_port *p = br_port_get_rcu(skb->dev); enum br_pkt_type pkt_type = BR_PKT_UNICAST; struct net_bridge_fdb_entry *dst = NULL; + struct net_bridge_mcast_port *pmctx; struct net_bridge_mdb_entry *mdst; bool local_rcv, mcast_hit = false; + struct net_bridge_mcast *brmctx; + struct net_bridge_vlan *vlan; struct net_bridge *br; u16 vid = 0; u8 state; @@ -78,9 +81,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (!p || p->state == BR_STATE_DISABLED) goto drop; + brmctx = &p->br->multicast_ctx; + pmctx = &p->multicast_ctx; state = p->state; if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid, - &state)) + &state, &vlan)) goto out; nbp_switchdev_frame_mark(p, skb); @@ -98,7 +103,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb local_rcv = true; } else { pkt_type = BR_PKT_MULTICAST; - if (br_multicast_rcv(br, p, skb, vid)) + if (br_multicast_rcv(&brmctx, &pmctx, vlan, skb, vid)) goto drop; } } @@ -128,11 +133,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb switch (pkt_type) { case BR_PKT_MULTICAST: - mdst = br_mdb_get(br, skb, vid); + mdst = br_mdb_get(brmctx, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && - br_multicast_querier_exists(br, eth_hdr(skb), mdst)) { + br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) { if ((mdst && mdst->host_joined) || - br_multicast_is_router(br, skb)) { + br_multicast_is_router(brmctx, skb)) { local_rcv = true; br->dev->stats.multicast++; } @@ -162,7 +167,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (!mcast_hit) br_flood(br, skb, pkt_type, local_rcv, false); else - br_multicast_flood(mdst, skb, local_rcv, false); + br_multicast_flood(mdst, skb, brmctx, local_rcv, false); } if (local_rcv) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 17a720b4473f..73a8915b0148 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -16,29 +16,29 @@ #include "br_private.h" -static bool br_rports_have_mc_router(struct net_bridge *br) +static bool br_rports_have_mc_router(struct net_bridge_mcast *brmctx) { #if IS_ENABLED(CONFIG_IPV6) - return !hlist_empty(&br->ip4_mc_router_list) || - !hlist_empty(&br->ip6_mc_router_list); + return !hlist_empty(&brmctx->ip4_mc_router_list) || + !hlist_empty(&brmctx->ip6_mc_router_list); #else - return !hlist_empty(&br->ip4_mc_router_list); + return !hlist_empty(&brmctx->ip4_mc_router_list); #endif } static bool br_ip4_rports_get_timer(struct net_bridge_port *port, unsigned long *timer) { - *timer = br_timer_value(&port->ip4_mc_router_timer); - return !hlist_unhashed(&port->ip4_rlist); + *timer = br_timer_value(&port->multicast_ctx.ip4_mc_router_timer); + return !hlist_unhashed(&port->multicast_ctx.ip4_rlist); } static bool br_ip6_rports_get_timer(struct net_bridge_port *port, unsigned long *timer) { #if IS_ENABLED(CONFIG_IPV6) - *timer = br_timer_value(&port->ip6_mc_router_timer); - return !hlist_unhashed(&port->ip6_rlist); + *timer = br_timer_value(&port->multicast_ctx.ip6_mc_router_timer); + return !hlist_unhashed(&port->multicast_ctx.ip6_rlist); #else *timer = 0; return false; @@ -54,10 +54,10 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, struct nlattr *nest, *port_nest; struct net_bridge_port *p; - if (!br->multicast_router) + if (!br->multicast_ctx.multicast_router) return 0; - if (!br_rports_have_mc_router(br)) + if (!br_rports_have_mc_router(&br->multicast_ctx)) return 0; nest = nla_nest_start_noflag(skb, MDBA_ROUTER); @@ -79,7 +79,7 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER, max(ip4_timer, ip6_timer)) || nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE, - p->multicast_router) || + p->multicast_ctx.multicast_router) || (have_ip4_mc_rtr && nla_put_u32(skb, MDBA_ROUTER_PATTR_INET_TIMER, ip4_timer)) || @@ -240,7 +240,7 @@ static int __mdb_fill_info(struct sk_buff *skb, switch (mp->addr.proto) { case htons(ETH_P_IP): - dump_srcs_mode = !!(mp->br->multicast_igmp_version == 3); + dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_igmp_version == 3); if (mp->addr.src.ip4) { if (nla_put_in_addr(skb, MDBA_MDB_EATTR_SOURCE, mp->addr.src.ip4)) @@ -250,7 +250,7 @@ static int __mdb_fill_info(struct sk_buff *skb, break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - dump_srcs_mode = !!(mp->br->multicast_mld_version == 2); + dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_mld_version == 2); if (!ipv6_addr_any(&mp->addr.src.ip6)) { if (nla_put_in6_addr(skb, MDBA_MDB_EATTR_SOURCE, &mp->addr.src.ip6)) @@ -483,7 +483,7 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg) /* MDBA_MDB_EATTR_SOURCE */ if (pg->key.addr.src.ip4) nlmsg_size += nla_total_size(sizeof(__be32)); - if (pg->key.port->br->multicast_igmp_version == 2) + if (pg->key.port->br->multicast_ctx.multicast_igmp_version == 2) goto out; addr_size = sizeof(__be32); break; @@ -492,7 +492,7 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg) /* MDBA_MDB_EATTR_SOURCE */ if (!ipv6_addr_any(&pg->key.addr.src.ip6)) nlmsg_size += nla_total_size(sizeof(struct in6_addr)); - if (pg->key.port->br->multicast_mld_version == 1) + if (pg->key.port->br->multicast_ctx.multicast_mld_version == 1) goto out; addr_size = sizeof(struct in6_addr); break; @@ -617,6 +617,9 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, ASSERT_RTNL(); + if (!nb) + return 0; + if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev)) return -EINVAL; @@ -686,7 +689,6 @@ out_free_mdb: return err; } -EXPORT_SYMBOL_GPL(br_mdb_replay); static void br_mdb_switchdev_host_port(struct net_device *dev, struct net_device *lower_dev, @@ -781,12 +783,12 @@ errout: static int nlmsg_populate_rtr_fill(struct sk_buff *skb, struct net_device *dev, - int ifindex, u32 pid, + int ifindex, u16 vid, u32 pid, u32 seq, int type, unsigned int flags) { + struct nlattr *nest, *port_nest; struct br_port_msg *bpm; struct nlmsghdr *nlh; - struct nlattr *nest; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0); if (!nlh) @@ -800,8 +802,18 @@ static int nlmsg_populate_rtr_fill(struct sk_buff *skb, if (!nest) goto cancel; - if (nla_put_u32(skb, MDBA_ROUTER_PORT, ifindex)) + port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT); + if (!port_nest) + goto end; + if (nla_put_nohdr(skb, sizeof(u32), &ifindex)) { + nla_nest_cancel(skb, port_nest); goto end; + } + if (vid && nla_put_u16(skb, MDBA_ROUTER_PATTR_VID, vid)) { + nla_nest_cancel(skb, port_nest); + goto end; + } + nla_nest_end(skb, port_nest); nla_nest_end(skb, nest); nlmsg_end(skb, nlh); @@ -817,23 +829,28 @@ cancel: static inline size_t rtnl_rtr_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct br_port_msg)) - + nla_total_size(sizeof(__u32)); + + nla_total_size(sizeof(__u32)) + + nla_total_size(sizeof(u16)); } -void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, +void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx, int type) { struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; int ifindex; + u16 vid; - ifindex = port ? port->dev->ifindex : 0; + ifindex = pmctx ? pmctx->port->dev->ifindex : 0; + vid = pmctx && br_multicast_port_ctx_is_vlan(pmctx) ? pmctx->vlan->vid : + 0; skb = nlmsg_new(rtnl_rtr_nlmsg_size(), GFP_ATOMIC); if (!skb) goto errout; - err = nlmsg_populate_rtr_fill(skb, dev, ifindex, 0, 0, type, NTF_SELF); + err = nlmsg_populate_rtr_fill(skb, dev, ifindex, vid, 0, 0, type, + NTF_SELF); if (err < 0) { kfree_skb(skb); goto errout; @@ -1004,14 +1021,47 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +static struct net_bridge_mcast * +__br_mdb_choose_context(struct net_bridge *br, + const struct br_mdb_entry *entry, + struct netlink_ext_ack *extack) +{ + struct net_bridge_mcast *brmctx = NULL; + struct net_bridge_vlan *v; + + if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { + brmctx = &br->multicast_ctx; + goto out; + } + + if (!entry->vid) { + NL_SET_ERR_MSG_MOD(extack, "Cannot add an entry without a vlan when vlan snooping is enabled"); + goto out; + } + + v = br_vlan_find(br_vlan_group(br), entry->vid); + if (!v) { + NL_SET_ERR_MSG_MOD(extack, "Vlan is not configured"); + goto out; + } + if (br_multicast_ctx_vlan_global_disabled(&v->br_mcast_ctx)) { + NL_SET_ERR_MSG_MOD(extack, "Vlan's multicast processing is disabled"); + goto out; + } + brmctx = &v->br_mcast_ctx; +out: + return brmctx; +} + static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, struct br_mdb_entry *entry, struct nlattr **mdb_attrs, struct netlink_ext_ack *extack) { struct net_bridge_mdb_entry *mp, *star_mp; - struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; + struct net_bridge_port_group *p; + struct net_bridge_mcast *brmctx; struct br_ip group, star_group; unsigned long now = jiffies; unsigned char flags = 0; @@ -1020,6 +1070,10 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, __mdb_entry_to_br_ip(entry, &group, mdb_attrs); + brmctx = __br_mdb_choose_context(br, entry, extack); + if (!brmctx) + return -EINVAL; + /* host join errors which can happen before creating the group */ if (!port) { /* don't allow any flags for host-joined groups */ @@ -1053,7 +1107,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, return -EEXIST; } - br_multicast_host_join(mp, false); + br_multicast_host_join(brmctx, mp, false); br_mdb_notify(br->dev, mp, NULL, RTM_NEWMDB); return 0; @@ -1084,14 +1138,15 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, } rcu_assign_pointer(*pp, p); if (entry->state == MDB_TEMPORARY) - mod_timer(&p->timer, now + br->multicast_membership_interval); + mod_timer(&p->timer, + now + brmctx->multicast_membership_interval); br_mdb_notify(br->dev, mp, p, RTM_NEWMDB); /* if we are adding a new EXCLUDE port group (*,G) it needs to be also * added to all S,G entries for proper replication, if we are adding * a new INCLUDE port (S,G) then all of *,G EXCLUDE ports need to be * added to it for proper replication */ - if (br_multicast_should_handle_mode(br, group.proto)) { + if (br_multicast_should_handle_mode(brmctx, group.proto)) { switch (filter_mode) { case MCAST_EXCLUDE: br_multicast_star_g_handle_mode(p, MCAST_EXCLUDE); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d0434dc8c03b..470f1ec3b579 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -49,30 +49,30 @@ static const struct rhashtable_params br_sg_port_rht_params = { .automatic_shrinking = true, }; -static void br_multicast_start_querier(struct net_bridge *br, +static void br_multicast_start_querier(struct net_bridge_mcast *brmctx, struct bridge_mcast_own_query *query); -static void br_ip4_multicast_add_router(struct net_bridge *br, - struct net_bridge_port *port); -static void br_ip4_multicast_leave_group(struct net_bridge *br, - struct net_bridge_port *port, +static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx); +static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, __be32 group, __u16 vid, const unsigned char *src); static void br_multicast_port_group_rexmit(struct timer_list *t); static void -br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted); -static void br_ip6_multicast_add_router(struct net_bridge *br, - struct net_bridge_port *port); +br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted); +static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx); #if IS_ENABLED(CONFIG_IPV6) -static void br_ip6_multicast_leave_group(struct net_bridge *br, - struct net_bridge_port *port, +static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, const struct in6_addr *group, __u16 vid, const unsigned char *src); #endif static struct net_bridge_port_group * -__br_multicast_add_group(struct net_bridge *br, - struct net_bridge_port *port, +__br_multicast_add_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct br_ip *group, const unsigned char *src, u8 filter_mode, @@ -80,6 +80,7 @@ __br_multicast_add_group(struct net_bridge *br, bool blocked); static void br_multicast_find_del_pg(struct net_bridge *br, struct net_bridge_port_group *pg); +static void __br_multicast_stop(struct net_bridge_mcast *brmctx); static struct net_bridge_port_group * br_sg_port_find(struct net_bridge *br, @@ -140,12 +141,14 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br, } #endif -struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, +struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, u16 vid) { + struct net_bridge *br = brmctx->br; struct br_ip ip; - if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || + br_multicast_ctx_vlan_global_disabled(brmctx)) return NULL; if (BR_INPUT_SKB_CB(skb)->igmp) @@ -158,7 +161,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, switch (skb->protocol) { case htons(ETH_P_IP): ip.dst.ip4 = ip_hdr(skb)->daddr; - if (br->multicast_igmp_version == 3) { + if (brmctx->multicast_igmp_version == 3) { struct net_bridge_mdb_entry *mdb; ip.src.ip4 = ip_hdr(skb)->saddr; @@ -171,7 +174,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): ip.dst.ip6 = ipv6_hdr(skb)->daddr; - if (br->multicast_mld_version == 2) { + if (brmctx->multicast_mld_version == 2) { struct net_bridge_mdb_entry *mdb; ip.src.ip6 = ipv6_hdr(skb)->saddr; @@ -190,6 +193,62 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, return br_mdb_ip_get_rcu(br, &ip); } +/* IMPORTANT: this function must be used only when the contexts cannot be + * passed down (e.g. timer) and must be used for read-only purposes because + * the vlan snooping option can change, so it can return any context + * (non-vlan or vlan). Its initial intended purpose is to read timer values + * from the *current* context based on the option. At worst that could lead + * to inconsistent timers when the contexts are changed, i.e. src timer + * which needs to re-arm with a specific delay taken from the old context + */ +static struct net_bridge_mcast_port * +br_multicast_pg_to_port_ctx(const struct net_bridge_port_group *pg) +{ + struct net_bridge_mcast_port *pmctx = &pg->key.port->multicast_ctx; + struct net_bridge_vlan *vlan; + + lockdep_assert_held_once(&pg->key.port->br->multicast_lock); + + /* if vlan snooping is disabled use the port's multicast context */ + if (!pg->key.addr.vid || + !br_opt_get(pg->key.port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) + goto out; + + /* locking is tricky here, due to different rules for multicast and + * vlans we need to take rcu to find the vlan and make sure it has + * the BR_VLFLAG_MCAST_ENABLED flag set, it can only change under + * multicast_lock which must be already held here, so the vlan's pmctx + * can safely be used on return + */ + rcu_read_lock(); + vlan = br_vlan_find(nbp_vlan_group(pg->key.port), pg->key.addr.vid); + if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx)) + pmctx = &vlan->port_mcast_ctx; + else + pmctx = NULL; + rcu_read_unlock(); +out: + return pmctx; +} + +/* when snooping we need to check if the contexts should be used + * in the following order: + * - if pmctx is non-NULL (port), check if it should be used + * - if pmctx is NULL (bridge), check if brmctx should be used + */ +static bool +br_multicast_ctx_should_use(const struct net_bridge_mcast *brmctx, + const struct net_bridge_mcast_port *pmctx) +{ + if (!netif_running(brmctx->br->dev)) + return false; + + if (pmctx) + return !br_multicast_port_ctx_state_disabled(pmctx); + else + return !br_multicast_ctx_vlan_disabled(brmctx); +} + static bool br_port_group_equal(struct net_bridge_port_group *p, struct net_bridge_port *port, const unsigned char *src) @@ -203,20 +262,23 @@ static bool br_port_group_equal(struct net_bridge_port_group *p, return ether_addr_equal(src, p->eth_addr); } -static void __fwd_add_star_excl(struct net_bridge_port_group *pg, +static void __fwd_add_star_excl(struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, struct br_ip *sg_ip) { struct net_bridge_port_group_sg_key sg_key; - struct net_bridge *br = pg->key.port->br; struct net_bridge_port_group *src_pg; + struct net_bridge_mcast *brmctx; memset(&sg_key, 0, sizeof(sg_key)); + brmctx = br_multicast_port_ctx_get_global(pmctx); sg_key.port = pg->key.port; sg_key.addr = *sg_ip; - if (br_sg_port_find(br, &sg_key)) + if (br_sg_port_find(brmctx->br, &sg_key)) return; - src_pg = __br_multicast_add_group(br, pg->key.port, sg_ip, pg->eth_addr, + src_pg = __br_multicast_add_group(brmctx, pmctx, + sg_ip, pg->eth_addr, MCAST_INCLUDE, false, false); if (IS_ERR_OR_NULL(src_pg) || src_pg->rt_protocol != RTPROT_KERNEL) @@ -256,6 +318,7 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, { struct net_bridge *br = pg->key.port->br; struct net_bridge_port_group *pg_lst; + struct net_bridge_mcast_port *pmctx; struct net_bridge_mdb_entry *mp; struct br_ip sg_ip; @@ -265,9 +328,13 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, mp = br_mdb_ip_get(br, &pg->key.addr); if (!mp) return; + pmctx = br_multicast_pg_to_port_ctx(pg); + if (!pmctx) + return; memset(&sg_ip, 0, sizeof(sg_ip)); sg_ip = pg->key.addr; + for (pg_lst = mlock_dereference(mp->ports, br); pg_lst; pg_lst = mlock_dereference(pg_lst->next, br)) { @@ -284,7 +351,7 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, __fwd_del_star_excl(pg, &sg_ip); break; case MCAST_EXCLUDE: - __fwd_add_star_excl(pg, &sg_ip); + __fwd_add_star_excl(pmctx, pg, &sg_ip); break; } } @@ -377,7 +444,9 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, { struct net_bridge_port_group_sg_key sg_key; struct net_bridge *br = star_mp->br; + struct net_bridge_mcast_port *pmctx; struct net_bridge_port_group *pg; + struct net_bridge_mcast *brmctx; if (WARN_ON(br_multicast_is_star_g(&sg->key.addr))) return; @@ -400,7 +469,12 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, if (br_sg_port_find(br, &sg_key)) continue; - src_pg = __br_multicast_add_group(br, pg->key.port, + pmctx = br_multicast_pg_to_port_ctx(pg); + if (!pmctx) + continue; + brmctx = br_multicast_port_ctx_get_global(pmctx); + + src_pg = __br_multicast_add_group(brmctx, pmctx, &sg->key.addr, sg->eth_addr, MCAST_INCLUDE, false, false); @@ -414,16 +488,23 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, static void br_multicast_fwd_src_add(struct net_bridge_group_src *src) { struct net_bridge_mdb_entry *star_mp; + struct net_bridge_mcast_port *pmctx; struct net_bridge_port_group *sg; + struct net_bridge_mcast *brmctx; struct br_ip sg_ip; if (src->flags & BR_SGRP_F_INSTALLED) return; memset(&sg_ip, 0, sizeof(sg_ip)); + pmctx = br_multicast_pg_to_port_ctx(src->pg); + if (!pmctx) + return; + brmctx = br_multicast_port_ctx_get_global(pmctx); sg_ip = src->pg->key.addr; sg_ip.src = src->addr.src; - sg = __br_multicast_add_group(src->br, src->pg->key.port, &sg_ip, + + sg = __br_multicast_add_group(brmctx, pmctx, &sg_ip, src->pg->eth_addr, MCAST_INCLUDE, false, !timer_pending(&src->timer)); if (IS_ERR_OR_NULL(sg)) @@ -692,7 +773,28 @@ static void br_multicast_gc(struct hlist_head *head) } } -static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, +static void __br_multicast_query_handle_vlan(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct sk_buff *skb) +{ + struct net_bridge_vlan *vlan = NULL; + + if (pmctx && br_multicast_port_ctx_is_vlan(pmctx)) + vlan = pmctx->vlan; + else if (br_multicast_ctx_is_vlan(brmctx)) + vlan = brmctx->vlan; + + if (vlan && !(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED)) { + u16 vlan_proto; + + if (br_vlan_get_proto(brmctx->br->dev, &vlan_proto) != 0) + return; + __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vlan->vid); + } +} + +static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, __be32 ip_dst, __be32 group, bool with_srcs, bool over_lmqt, @@ -714,11 +816,11 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, u16 lmqt_srcs = 0; igmp_hdr_size = sizeof(*ih); - if (br->multicast_igmp_version == 3) { + if (brmctx->multicast_igmp_version == 3) { igmp_hdr_size = sizeof(*ihv3); if (pg && with_srcs) { - lmqt = now + (br->multicast_last_member_interval * - br->multicast_last_member_count); + lmqt = now + (brmctx->multicast_last_member_interval * + brmctx->multicast_last_member_count); hlist_for_each_entry(ent, &pg->src_list, node) { if (over_lmqt == time_after(ent->timer.expires, lmqt) && @@ -734,19 +836,20 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, pkt_size = sizeof(*eth) + sizeof(*iph) + 4 + igmp_hdr_size; if ((p && pkt_size > p->dev->mtu) || - pkt_size > br->dev->mtu) + pkt_size > brmctx->br->dev->mtu) return NULL; - skb = netdev_alloc_skb_ip_align(br->dev, pkt_size); + skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size); if (!skb) goto out; + __br_multicast_query_handle_vlan(brmctx, pmctx, skb); skb->protocol = htons(ETH_P_IP); skb_reset_mac_header(skb); eth = eth_hdr(skb); - ether_addr_copy(eth->h_source, br->dev->dev_addr); + ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr); ip_eth_mc_map(ip_dst, eth->h_dest); eth->h_proto = htons(ETH_P_IP); skb_put(skb, sizeof(*eth)); @@ -762,8 +865,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, iph->frag_off = htons(IP_DF); iph->ttl = 1; iph->protocol = IPPROTO_IGMP; - iph->saddr = br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR) ? - inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0; + iph->saddr = br_opt_get(brmctx->br, BROPT_MULTICAST_QUERY_USE_IFADDR) ? + inet_select_addr(brmctx->br->dev, 0, RT_SCOPE_LINK) : 0; iph->daddr = ip_dst; ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; @@ -775,12 +878,12 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, skb_set_transport_header(skb, skb->len); *igmp_type = IGMP_HOST_MEMBERSHIP_QUERY; - switch (br->multicast_igmp_version) { + switch (brmctx->multicast_igmp_version) { case 2: ih = igmp_hdr(skb); ih->type = IGMP_HOST_MEMBERSHIP_QUERY; - ih->code = (group ? br->multicast_last_member_interval : - br->multicast_query_response_interval) / + ih->code = (group ? brmctx->multicast_last_member_interval : + brmctx->multicast_query_response_interval) / (HZ / IGMP_TIMER_SCALE); ih->group = group; ih->csum = 0; @@ -790,11 +893,11 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, case 3: ihv3 = igmpv3_query_hdr(skb); ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY; - ihv3->code = (group ? br->multicast_last_member_interval : - br->multicast_query_response_interval) / + ihv3->code = (group ? brmctx->multicast_last_member_interval : + brmctx->multicast_query_response_interval) / (HZ / IGMP_TIMER_SCALE); ihv3->group = group; - ihv3->qqic = br->multicast_query_interval / HZ; + ihv3->qqic = brmctx->multicast_query_interval / HZ; ihv3->nsrcs = htons(lmqt_srcs); ihv3->resv = 0; ihv3->suppress = sflag; @@ -837,7 +940,8 @@ out: } #if IS_ENABLED(CONFIG_IPV6) -static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, +static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, const struct in6_addr *ip6_dst, const struct in6_addr *group, @@ -862,11 +966,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, u8 *hopopt; mld_hdr_size = sizeof(*mldq); - if (br->multicast_mld_version == 2) { + if (brmctx->multicast_mld_version == 2) { mld_hdr_size = sizeof(*mld2q); if (pg && with_srcs) { - llqt = now + (br->multicast_last_member_interval * - br->multicast_last_member_count); + llqt = now + (brmctx->multicast_last_member_interval * + brmctx->multicast_last_member_count); hlist_for_each_entry(ent, &pg->src_list, node) { if (over_llqt == time_after(ent->timer.expires, llqt) && @@ -882,20 +986,21 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, pkt_size = sizeof(*eth) + sizeof(*ip6h) + 8 + mld_hdr_size; if ((p && pkt_size > p->dev->mtu) || - pkt_size > br->dev->mtu) + pkt_size > brmctx->br->dev->mtu) return NULL; - skb = netdev_alloc_skb_ip_align(br->dev, pkt_size); + skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size); if (!skb) goto out; + __br_multicast_query_handle_vlan(brmctx, pmctx, skb); skb->protocol = htons(ETH_P_IPV6); /* Ethernet header */ skb_reset_mac_header(skb); eth = eth_hdr(skb); - ether_addr_copy(eth->h_source, br->dev->dev_addr); + ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr); eth->h_proto = htons(ETH_P_IPV6); skb_put(skb, sizeof(*eth)); @@ -908,14 +1013,14 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; ip6h->daddr = *ip6_dst; - if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, - &ip6h->saddr)) { + if (ipv6_dev_get_saddr(dev_net(brmctx->br->dev), brmctx->br->dev, + &ip6h->daddr, 0, &ip6h->saddr)) { kfree_skb(skb); - br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, false); + br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, false); return NULL; } - br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true); + br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, true); ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); @@ -933,10 +1038,10 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, /* ICMPv6 */ skb_set_transport_header(skb, skb->len); interval = ipv6_addr_any(group) ? - br->multicast_query_response_interval : - br->multicast_last_member_interval; + brmctx->multicast_query_response_interval : + brmctx->multicast_last_member_interval; *igmp_type = ICMPV6_MGM_QUERY; - switch (br->multicast_mld_version) { + switch (brmctx->multicast_mld_version) { case 1: mldq = (struct mld_msg *)icmp6_hdr(skb); mldq->mld_type = ICMPV6_MGM_QUERY; @@ -959,7 +1064,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, mld2q->mld2q_suppress = sflag; mld2q->mld2q_qrv = 2; mld2q->mld2q_nsrcs = htons(llqt_srcs); - mld2q->mld2q_qqic = br->multicast_query_interval / HZ; + mld2q->mld2q_qqic = brmctx->multicast_query_interval / HZ; mld2q->mld2q_mca = *group; csum = &mld2q->mld2q_cksum; csum_start = (void *)mld2q; @@ -1000,7 +1105,8 @@ out: } #endif -static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, +static struct sk_buff *br_multicast_alloc_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, struct br_ip *ip_dst, struct br_ip *group, @@ -1013,7 +1119,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, switch (group->proto) { case htons(ETH_P_IP): ip4_dst = ip_dst ? ip_dst->dst.ip4 : htonl(INADDR_ALLHOSTS_GROUP); - return br_ip4_multicast_alloc_query(br, pg, + return br_ip4_multicast_alloc_query(brmctx, pmctx, pg, ip4_dst, group->dst.ip4, with_srcs, over_lmqt, sflag, igmp_type, @@ -1028,7 +1134,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, ipv6_addr_set(&ip6_dst, htonl(0xff020000), 0, 0, htonl(1)); - return br_ip6_multicast_alloc_query(br, pg, + return br_ip6_multicast_alloc_query(brmctx, pmctx, pg, &ip6_dst, &group->dst.ip6, with_srcs, over_lmqt, sflag, igmp_type, @@ -1206,7 +1312,8 @@ struct net_bridge_port_group *br_multicast_new_port_group( return p; } -void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify) +void br_multicast_host_join(const struct net_bridge_mcast *brmctx, + struct net_bridge_mdb_entry *mp, bool notify) { if (!mp->host_joined) { mp->host_joined = true; @@ -1219,7 +1326,7 @@ void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify) if (br_group_is_l2(&mp->addr)) return; - mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval); + mod_timer(&mp->timer, jiffies + brmctx->multicast_membership_interval); } void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify) @@ -1235,8 +1342,8 @@ void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify) } static struct net_bridge_port_group * -__br_multicast_add_group(struct net_bridge *br, - struct net_bridge_port *port, +__br_multicast_add_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct br_ip *group, const unsigned char *src, u8 filter_mode, @@ -1248,29 +1355,28 @@ __br_multicast_add_group(struct net_bridge *br, struct net_bridge_mdb_entry *mp; unsigned long now = jiffies; - if (!netif_running(br->dev) || - (port && port->state == BR_STATE_DISABLED)) + if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto out; - mp = br_multicast_new_group(br, group); + mp = br_multicast_new_group(brmctx->br, group); if (IS_ERR(mp)) return ERR_CAST(mp); - if (!port) { - br_multicast_host_join(mp, true); + if (!pmctx) { + br_multicast_host_join(brmctx, mp, true); goto out; } for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; + (p = mlock_dereference(*pp, brmctx->br)) != NULL; pp = &p->next) { - if (br_port_group_equal(p, port, src)) + if (br_port_group_equal(p, pmctx->port, src)) goto found; - if ((unsigned long)p->key.port < (unsigned long)port) + if ((unsigned long)p->key.port < (unsigned long)pmctx->port) break; } - p = br_multicast_new_port_group(port, group, *pp, 0, src, + p = br_multicast_new_port_group(pmctx->port, group, *pp, 0, src, filter_mode, RTPROT_KERNEL); if (unlikely(!p)) { p = ERR_PTR(-ENOMEM); @@ -1279,18 +1385,19 @@ __br_multicast_add_group(struct net_bridge *br, rcu_assign_pointer(*pp, p); if (blocked) p->flags |= MDB_PG_FLAGS_BLOCKED; - br_mdb_notify(br->dev, mp, p, RTM_NEWMDB); + br_mdb_notify(brmctx->br->dev, mp, p, RTM_NEWMDB); found: if (igmpv2_mldv1) - mod_timer(&p->timer, now + br->multicast_membership_interval); + mod_timer(&p->timer, + now + brmctx->multicast_membership_interval); out: return p; } -static int br_multicast_add_group(struct net_bridge *br, - struct net_bridge_port *port, +static int br_multicast_add_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct br_ip *group, const unsigned char *src, u8 filter_mode, @@ -1299,18 +1406,18 @@ static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_port_group *pg; int err; - spin_lock(&br->multicast_lock); - pg = __br_multicast_add_group(br, port, group, src, filter_mode, + spin_lock(&brmctx->br->multicast_lock); + pg = __br_multicast_add_group(brmctx, pmctx, group, src, filter_mode, igmpv2_mldv1, false); /* NULL is considered valid for host joined groups */ err = PTR_ERR_OR_ZERO(pg); - spin_unlock(&br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); return err; } -static int br_ip4_multicast_add_group(struct net_bridge *br, - struct net_bridge_port *port, +static int br_ip4_multicast_add_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, __be32 group, __u16 vid, const unsigned char *src, @@ -1328,13 +1435,13 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, br_group.vid = vid; filter_mode = igmpv2 ? MCAST_EXCLUDE : MCAST_INCLUDE; - return br_multicast_add_group(br, port, &br_group, src, filter_mode, - igmpv2); + return br_multicast_add_group(brmctx, pmctx, &br_group, src, + filter_mode, igmpv2); } #if IS_ENABLED(CONFIG_IPV6) -static int br_ip6_multicast_add_group(struct net_bridge *br, - struct net_bridge_port *port, +static int br_ip6_multicast_add_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, const struct in6_addr *group, __u16 vid, const unsigned char *src, @@ -1352,8 +1459,8 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, br_group.vid = vid; filter_mode = mldv1 ? MCAST_EXCLUDE : MCAST_INCLUDE; - return br_multicast_add_group(br, port, &br_group, src, filter_mode, - mldv1); + return br_multicast_add_group(brmctx, pmctx, &br_group, src, + filter_mode, mldv1); } #endif @@ -1366,52 +1473,54 @@ static bool br_multicast_rport_del(struct hlist_node *rlist) return true; } -static bool br_ip4_multicast_rport_del(struct net_bridge_port *p) +static bool br_ip4_multicast_rport_del(struct net_bridge_mcast_port *pmctx) { - return br_multicast_rport_del(&p->ip4_rlist); + return br_multicast_rport_del(&pmctx->ip4_rlist); } -static bool br_ip6_multicast_rport_del(struct net_bridge_port *p) +static bool br_ip6_multicast_rport_del(struct net_bridge_mcast_port *pmctx) { #if IS_ENABLED(CONFIG_IPV6) - return br_multicast_rport_del(&p->ip6_rlist); + return br_multicast_rport_del(&pmctx->ip6_rlist); #else return false; #endif } -static void br_multicast_router_expired(struct net_bridge_port *port, +static void br_multicast_router_expired(struct net_bridge_mcast_port *pmctx, struct timer_list *t, struct hlist_node *rlist) { - struct net_bridge *br = port->br; + struct net_bridge *br = pmctx->port->br; bool del; spin_lock(&br->multicast_lock); - if (port->multicast_router == MDB_RTR_TYPE_DISABLED || - port->multicast_router == MDB_RTR_TYPE_PERM || + if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED || + pmctx->multicast_router == MDB_RTR_TYPE_PERM || timer_pending(t)) goto out; del = br_multicast_rport_del(rlist); - br_multicast_rport_del_notify(port, del); + br_multicast_rport_del_notify(pmctx, del); out: spin_unlock(&br->multicast_lock); } static void br_ip4_multicast_router_expired(struct timer_list *t) { - struct net_bridge_port *port = from_timer(port, t, ip4_mc_router_timer); + struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t, + ip4_mc_router_timer); - br_multicast_router_expired(port, t, &port->ip4_rlist); + br_multicast_router_expired(pmctx, t, &pmctx->ip4_rlist); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_router_expired(struct timer_list *t) { - struct net_bridge_port *port = from_timer(port, t, ip6_mc_router_timer); + struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t, + ip6_mc_router_timer); - br_multicast_router_expired(port, t, &port->ip6_rlist); + br_multicast_router_expired(pmctx, t, &pmctx->ip6_rlist); } #endif @@ -1428,80 +1537,86 @@ static void br_mc_router_state_change(struct net_bridge *p, switchdev_port_attr_set(p->dev, &attr, NULL); } -static void br_multicast_local_router_expired(struct net_bridge *br, +static void br_multicast_local_router_expired(struct net_bridge_mcast *brmctx, struct timer_list *timer) { - spin_lock(&br->multicast_lock); - if (br->multicast_router == MDB_RTR_TYPE_DISABLED || - br->multicast_router == MDB_RTR_TYPE_PERM || - br_ip4_multicast_is_router(br) || - br_ip6_multicast_is_router(br)) + spin_lock(&brmctx->br->multicast_lock); + if (brmctx->multicast_router == MDB_RTR_TYPE_DISABLED || + brmctx->multicast_router == MDB_RTR_TYPE_PERM || + br_ip4_multicast_is_router(brmctx) || + br_ip6_multicast_is_router(brmctx)) goto out; - br_mc_router_state_change(br, false); + br_mc_router_state_change(brmctx->br, false); out: - spin_unlock(&br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); } static void br_ip4_multicast_local_router_expired(struct timer_list *t) { - struct net_bridge *br = from_timer(br, t, ip4_mc_router_timer); + struct net_bridge_mcast *brmctx = from_timer(brmctx, t, + ip4_mc_router_timer); - br_multicast_local_router_expired(br, t); + br_multicast_local_router_expired(brmctx, t); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_local_router_expired(struct timer_list *t) { - struct net_bridge *br = from_timer(br, t, ip6_mc_router_timer); + struct net_bridge_mcast *brmctx = from_timer(brmctx, t, + ip6_mc_router_timer); - br_multicast_local_router_expired(br, t); + br_multicast_local_router_expired(brmctx, t); } #endif -static void br_multicast_querier_expired(struct net_bridge *br, +static void br_multicast_querier_expired(struct net_bridge_mcast *brmctx, struct bridge_mcast_own_query *query) { - spin_lock(&br->multicast_lock); - if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED)) + spin_lock(&brmctx->br->multicast_lock); + if (!netif_running(brmctx->br->dev) || + br_multicast_ctx_vlan_global_disabled(brmctx) || + !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED)) goto out; - br_multicast_start_querier(br, query); + br_multicast_start_querier(brmctx, query); out: - spin_unlock(&br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); } static void br_ip4_multicast_querier_expired(struct timer_list *t) { - struct net_bridge *br = from_timer(br, t, ip4_other_query.timer); + struct net_bridge_mcast *brmctx = from_timer(brmctx, t, + ip4_other_query.timer); - br_multicast_querier_expired(br, &br->ip4_own_query); + br_multicast_querier_expired(brmctx, &brmctx->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_querier_expired(struct timer_list *t) { - struct net_bridge *br = from_timer(br, t, ip6_other_query.timer); + struct net_bridge_mcast *brmctx = from_timer(brmctx, t, + ip6_other_query.timer); - br_multicast_querier_expired(br, &br->ip6_own_query); + br_multicast_querier_expired(brmctx, &brmctx->ip6_own_query); } #endif -static void br_multicast_select_own_querier(struct net_bridge *br, +static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx, struct br_ip *ip, struct sk_buff *skb) { if (ip->proto == htons(ETH_P_IP)) - br->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr; + brmctx->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr; #if IS_ENABLED(CONFIG_IPV6) else - br->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr; + brmctx->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr; #endif } -static void __br_multicast_send_query(struct net_bridge *br, - struct net_bridge_port *port, +static void __br_multicast_send_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct net_bridge_port_group *pg, struct br_ip *ip_dst, struct br_ip *group, @@ -1513,19 +1628,22 @@ static void __br_multicast_send_query(struct net_bridge *br, struct sk_buff *skb; u8 igmp_type; + if (!br_multicast_ctx_should_use(brmctx, pmctx)) + return; + again_under_lmqt: - skb = br_multicast_alloc_query(br, pg, ip_dst, group, with_srcs, - over_lmqt, sflag, &igmp_type, + skb = br_multicast_alloc_query(brmctx, pmctx, pg, ip_dst, group, + with_srcs, over_lmqt, sflag, &igmp_type, need_rexmit); if (!skb) return; - if (port) { - skb->dev = port->dev; - br_multicast_count(br, port, skb, igmp_type, + if (pmctx) { + skb->dev = pmctx->port->dev; + br_multicast_count(brmctx->br, pmctx->port, skb, igmp_type, BR_MCAST_DIR_TX); NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, - dev_net(port->dev), NULL, skb, NULL, skb->dev, + dev_net(pmctx->port->dev), NULL, skb, NULL, skb->dev, br_dev_queue_push_xmit); if (over_lmqt && with_srcs && sflag) { @@ -1533,35 +1651,35 @@ again_under_lmqt: goto again_under_lmqt; } } else { - br_multicast_select_own_querier(br, group, skb); - br_multicast_count(br, port, skb, igmp_type, + br_multicast_select_own_querier(brmctx, group, skb); + br_multicast_count(brmctx->br, NULL, skb, igmp_type, BR_MCAST_DIR_RX); netif_rx(skb); } } -static void br_multicast_send_query(struct net_bridge *br, - struct net_bridge_port *port, +static void br_multicast_send_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct bridge_mcast_own_query *own_query) { struct bridge_mcast_other_query *other_query = NULL; struct br_ip br_group; unsigned long time; - if (!netif_running(br->dev) || - !br_opt_get(br, BROPT_MULTICAST_ENABLED) || - !br_opt_get(br, BROPT_MULTICAST_QUERIER)) + if (!br_multicast_ctx_should_use(brmctx, pmctx) || + !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) || + !br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER)) return; memset(&br_group.dst, 0, sizeof(br_group.dst)); - if (port ? (own_query == &port->ip4_own_query) : - (own_query == &br->ip4_own_query)) { - other_query = &br->ip4_other_query; + if (pmctx ? (own_query == &pmctx->ip4_own_query) : + (own_query == &brmctx->ip4_own_query)) { + other_query = &brmctx->ip4_other_query; br_group.proto = htons(ETH_P_IP); #if IS_ENABLED(CONFIG_IPV6) } else { - other_query = &br->ip6_other_query; + other_query = &brmctx->ip6_other_query; br_group.proto = htons(ETH_P_IPV6); #endif } @@ -1569,31 +1687,32 @@ static void br_multicast_send_query(struct net_bridge *br, if (!other_query || timer_pending(&other_query->timer)) return; - __br_multicast_send_query(br, port, NULL, NULL, &br_group, false, 0, - NULL); + __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &br_group, false, + 0, NULL); time = jiffies; - time += own_query->startup_sent < br->multicast_startup_query_count ? - br->multicast_startup_query_interval : - br->multicast_query_interval; + time += own_query->startup_sent < brmctx->multicast_startup_query_count ? + brmctx->multicast_startup_query_interval : + brmctx->multicast_query_interval; mod_timer(&own_query->timer, time); } static void -br_multicast_port_query_expired(struct net_bridge_port *port, +br_multicast_port_query_expired(struct net_bridge_mcast_port *pmctx, struct bridge_mcast_own_query *query) { - struct net_bridge *br = port->br; + struct net_bridge *br = pmctx->port->br; + struct net_bridge_mcast *brmctx; spin_lock(&br->multicast_lock); - if (port->state == BR_STATE_DISABLED || - port->state == BR_STATE_BLOCKING) + if (br_multicast_port_ctx_state_stopped(pmctx)) goto out; - if (query->startup_sent < br->multicast_startup_query_count) + brmctx = br_multicast_port_ctx_get_global(pmctx); + if (query->startup_sent < brmctx->multicast_startup_query_count) query->startup_sent++; - br_multicast_send_query(port->br, port, query); + br_multicast_send_query(brmctx, pmctx, query); out: spin_unlock(&br->multicast_lock); @@ -1601,17 +1720,19 @@ out: static void br_ip4_multicast_port_query_expired(struct timer_list *t) { - struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer); + struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t, + ip4_own_query.timer); - br_multicast_port_query_expired(port, &port->ip4_own_query); + br_multicast_port_query_expired(pmctx, &pmctx->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_port_query_expired(struct timer_list *t) { - struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer); + struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t, + ip6_own_query.timer); - br_multicast_port_query_expired(port, &port->ip6_own_query); + br_multicast_port_query_expired(pmctx, &pmctx->ip6_own_query); } #endif @@ -1620,6 +1741,8 @@ static void br_multicast_port_group_rexmit(struct timer_list *t) struct net_bridge_port_group *pg = from_timer(pg, t, rexmit_timer); struct bridge_mcast_other_query *other_query = NULL; struct net_bridge *br = pg->key.port->br; + struct net_bridge_mcast_port *pmctx; + struct net_bridge_mcast *brmctx; bool need_rexmit = false; spin_lock(&br->multicast_lock); @@ -1628,11 +1751,15 @@ static void br_multicast_port_group_rexmit(struct timer_list *t) !br_opt_get(br, BROPT_MULTICAST_QUERIER)) goto out; + pmctx = br_multicast_pg_to_port_ctx(pg); + if (!pmctx) + goto out; + brmctx = br_multicast_port_ctx_get_global(pmctx); if (pg->key.addr.proto == htons(ETH_P_IP)) - other_query = &br->ip4_other_query; + other_query = &brmctx->ip4_other_query; #if IS_ENABLED(CONFIG_IPV6) else - other_query = &br->ip6_other_query; + other_query = &brmctx->ip6_other_query; #endif if (!other_query || timer_pending(&other_query->timer)) @@ -1640,15 +1767,15 @@ static void br_multicast_port_group_rexmit(struct timer_list *t) if (pg->grp_query_rexmit_cnt) { pg->grp_query_rexmit_cnt--; - __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr, + __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr, &pg->key.addr, false, 1, NULL); } - __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr, + __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr, &pg->key.addr, true, 0, &need_rexmit); if (pg->grp_query_rexmit_cnt || need_rexmit) mod_timer(&pg->rexmit_timer, jiffies + - br->multicast_last_member_interval); + brmctx->multicast_last_member_interval); out: spin_unlock(&br->multicast_lock); } @@ -1666,23 +1793,40 @@ static int br_mc_disabled_update(struct net_device *dev, bool value, return switchdev_port_attr_set(dev, &attr, extack); } -int br_multicast_add_port(struct net_bridge_port *port) +void br_multicast_port_ctx_init(struct net_bridge_port *port, + struct net_bridge_vlan *vlan, + struct net_bridge_mcast_port *pmctx) { - int err; - - port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; - port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT; - - timer_setup(&port->ip4_mc_router_timer, + pmctx->port = port; + pmctx->vlan = vlan; + pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; + timer_setup(&pmctx->ip4_mc_router_timer, br_ip4_multicast_router_expired, 0); - timer_setup(&port->ip4_own_query.timer, + timer_setup(&pmctx->ip4_own_query.timer, br_ip4_multicast_port_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) - timer_setup(&port->ip6_mc_router_timer, + timer_setup(&pmctx->ip6_mc_router_timer, br_ip6_multicast_router_expired, 0); - timer_setup(&port->ip6_own_query.timer, + timer_setup(&pmctx->ip6_own_query.timer, br_ip6_multicast_port_query_expired, 0); #endif +} + +void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx) +{ +#if IS_ENABLED(CONFIG_IPV6) + del_timer_sync(&pmctx->ip6_mc_router_timer); +#endif + del_timer_sync(&pmctx->ip4_mc_router_timer); +} + +int br_multicast_add_port(struct net_bridge_port *port) +{ + int err; + + port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT; + br_multicast_port_ctx_init(port, NULL, &port->multicast_ctx); + err = br_mc_disabled_update(port->dev, br_opt_get(port->br, BROPT_MULTICAST_ENABLED), @@ -1711,10 +1855,7 @@ void br_multicast_del_port(struct net_bridge_port *port) hlist_move_list(&br->mcast_gc_list, &deleted_head); spin_unlock_bh(&br->multicast_lock); br_multicast_gc(&deleted_head); - del_timer_sync(&port->ip4_mc_router_timer); -#if IS_ENABLED(CONFIG_IPV6) - del_timer_sync(&port->ip6_mc_router_timer); -#endif + br_multicast_port_ctx_deinit(&port->multicast_ctx); free_percpu(port->mcast_stats); } @@ -1727,20 +1868,23 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query) mod_timer(&query->timer, jiffies); } -static void __br_multicast_enable_port(struct net_bridge_port *port) +static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx) { - struct net_bridge *br = port->br; + struct net_bridge *br = pmctx->port->br; + struct net_bridge_mcast *brmctx; - if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || !netif_running(br->dev)) + brmctx = br_multicast_port_ctx_get_global(pmctx); + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || + !netif_running(br->dev)) return; - br_multicast_enable(&port->ip4_own_query); + br_multicast_enable(&pmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - br_multicast_enable(&port->ip6_own_query); + br_multicast_enable(&pmctx->ip6_own_query); #endif - if (port->multicast_router == MDB_RTR_TYPE_PERM) { - br_ip4_multicast_add_router(br, port); - br_ip6_multicast_add_router(br, port); + if (pmctx->multicast_router == MDB_RTR_TYPE_PERM) { + br_ip4_multicast_add_router(brmctx, pmctx); + br_ip6_multicast_add_router(brmctx, pmctx); } } @@ -1748,33 +1892,39 @@ void br_multicast_enable_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; - spin_lock(&br->multicast_lock); - __br_multicast_enable_port(port); - spin_unlock(&br->multicast_lock); + spin_lock_bh(&br->multicast_lock); + __br_multicast_enable_port_ctx(&port->multicast_ctx); + spin_unlock_bh(&br->multicast_lock); } -void br_multicast_disable_port(struct net_bridge_port *port) +static void __br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx) { - struct net_bridge *br = port->br; struct net_bridge_port_group *pg; struct hlist_node *n; bool del = false; - spin_lock(&br->multicast_lock); - hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) - if (!(pg->flags & MDB_PG_FLAGS_PERMANENT)) - br_multicast_find_del_pg(br, pg); + hlist_for_each_entry_safe(pg, n, &pmctx->port->mglist, mglist) + if (!(pg->flags & MDB_PG_FLAGS_PERMANENT) && + (!br_multicast_port_ctx_is_vlan(pmctx) || + pg->key.addr.vid == pmctx->vlan->vid)) + br_multicast_find_del_pg(pmctx->port->br, pg); - del |= br_ip4_multicast_rport_del(port); - del_timer(&port->ip4_mc_router_timer); - del_timer(&port->ip4_own_query.timer); - del |= br_ip6_multicast_rport_del(port); + del |= br_ip4_multicast_rport_del(pmctx); + del_timer(&pmctx->ip4_mc_router_timer); + del_timer(&pmctx->ip4_own_query.timer); + del |= br_ip6_multicast_rport_del(pmctx); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&port->ip6_mc_router_timer); - del_timer(&port->ip6_own_query.timer); + del_timer(&pmctx->ip6_mc_router_timer); + del_timer(&pmctx->ip6_own_query.timer); #endif - br_multicast_rport_del_notify(port, del); - spin_unlock(&br->multicast_lock); + br_multicast_rport_del_notify(pmctx, del); +} + +void br_multicast_disable_port(struct net_bridge_port *port) +{ + spin_lock_bh(&port->br->multicast_lock); + __br_multicast_disable_port_ctx(&port->multicast_ctx); + spin_unlock_bh(&port->br->multicast_lock); } static int __grp_src_delete_marked(struct net_bridge_port_group *pg) @@ -1799,31 +1949,33 @@ static void __grp_src_mod_timer(struct net_bridge_group_src *src, br_multicast_fwd_src_handle(src); } -static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg) +static void __grp_src_query_marked_and_rexmit(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg) { struct bridge_mcast_other_query *other_query = NULL; - struct net_bridge *br = pg->key.port->br; - u32 lmqc = br->multicast_last_member_count; + u32 lmqc = brmctx->multicast_last_member_count; unsigned long lmqt, lmi, now = jiffies; struct net_bridge_group_src *ent; - if (!netif_running(br->dev) || - !br_opt_get(br, BROPT_MULTICAST_ENABLED)) + if (!netif_running(brmctx->br->dev) || + !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED)) return; if (pg->key.addr.proto == htons(ETH_P_IP)) - other_query = &br->ip4_other_query; + other_query = &brmctx->ip4_other_query; #if IS_ENABLED(CONFIG_IPV6) else - other_query = &br->ip6_other_query; + other_query = &brmctx->ip6_other_query; #endif - lmqt = now + br_multicast_lmqt(br); + lmqt = now + br_multicast_lmqt(brmctx); hlist_for_each_entry(ent, &pg->src_list, node) { if (ent->flags & BR_SGRP_F_SEND) { ent->flags &= ~BR_SGRP_F_SEND; if (ent->timer.expires > lmqt) { - if (br_opt_get(br, BROPT_MULTICAST_QUERIER) && + if (br_opt_get(brmctx->br, + BROPT_MULTICAST_QUERIER) && other_query && !timer_pending(&other_query->timer)) ent->src_query_rexmit_cnt = lmqc; @@ -1832,41 +1984,42 @@ static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg) } } - if (!br_opt_get(br, BROPT_MULTICAST_QUERIER) || + if (!br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER) || !other_query || timer_pending(&other_query->timer)) return; - __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr, + __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr, &pg->key.addr, true, 1, NULL); - lmi = now + br->multicast_last_member_interval; + lmi = now + brmctx->multicast_last_member_interval; if (!timer_pending(&pg->rexmit_timer) || time_after(pg->rexmit_timer.expires, lmi)) mod_timer(&pg->rexmit_timer, lmi); } -static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg) +static void __grp_send_query_and_rexmit(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg) { struct bridge_mcast_other_query *other_query = NULL; - struct net_bridge *br = pg->key.port->br; unsigned long now = jiffies, lmi; - if (!netif_running(br->dev) || - !br_opt_get(br, BROPT_MULTICAST_ENABLED)) + if (!netif_running(brmctx->br->dev) || + !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED)) return; if (pg->key.addr.proto == htons(ETH_P_IP)) - other_query = &br->ip4_other_query; + other_query = &brmctx->ip4_other_query; #if IS_ENABLED(CONFIG_IPV6) else - other_query = &br->ip6_other_query; + other_query = &brmctx->ip6_other_query; #endif - if (br_opt_get(br, BROPT_MULTICAST_QUERIER) && + if (br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER) && other_query && !timer_pending(&other_query->timer)) { - lmi = now + br->multicast_last_member_interval; - pg->grp_query_rexmit_cnt = br->multicast_last_member_count - 1; - __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr, + lmi = now + brmctx->multicast_last_member_interval; + pg->grp_query_rexmit_cnt = brmctx->multicast_last_member_count - 1; + __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr, &pg->key.addr, false, 0, NULL); if (!timer_pending(&pg->rexmit_timer) || time_after(pg->rexmit_timer.expires, lmi)) @@ -1875,8 +2028,8 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg) if (pg->filter_mode == MCAST_EXCLUDE && (!timer_pending(&pg->timer) || - time_after(pg->timer.expires, now + br_multicast_lmqt(br)))) - mod_timer(&pg->timer, now + br_multicast_lmqt(br)); + time_after(pg->timer.expires, now + br_multicast_lmqt(brmctx)))) + mod_timer(&pg->timer, now + br_multicast_lmqt(brmctx)); } /* State Msg type New state Actions @@ -1884,11 +2037,11 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg) * INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI * EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI */ -static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_addr, +static bool br_multicast_isinc_allow(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { - struct net_bridge *br = pg->key.port->br; struct net_bridge_group_src *ent; unsigned long now = jiffies; bool changed = false; @@ -1907,10 +2060,11 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a } if (ent) - __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); + __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx)); } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; return changed; @@ -1921,7 +2075,8 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a * Delete (A-B) * Group Timer=GMI */ -static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr, +static void __grp_src_isexc_incl(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { @@ -1945,7 +2100,8 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr, br_multicast_fwd_src_handle(ent); } - br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type); + br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type); __grp_src_delete_marked(pg); } @@ -1956,11 +2112,11 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr, * Delete (Y-A) * Group Timer=GMI */ -static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr, +static bool __grp_src_isexc_excl(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { - struct net_bridge *br = pg->key.port->br; struct net_bridge_group_src *ent; unsigned long now = jiffies; bool changed = false; @@ -1981,13 +2137,14 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr, ent = br_multicast_new_group_src(pg, &src_ip); if (ent) { __grp_src_mod_timer(ent, - now + br_multicast_gmi(br)); + now + br_multicast_gmi(brmctx)); changed = true; } } } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; if (__grp_src_delete_marked(pg)) @@ -1996,28 +2153,28 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr, return changed; } -static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr, +static bool br_multicast_isexc(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { - struct net_bridge *br = pg->key.port->br; bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_isexc_incl(pg, h_addr, srcs, nsrcs, addr_size, + __grp_src_isexc_incl(brmctx, pg, h_addr, srcs, nsrcs, addr_size, grec_type); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: - changed = __grp_src_isexc_excl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + changed = __grp_src_isexc_excl(brmctx, pg, h_addr, srcs, nsrcs, + addr_size, grec_type); break; } pg->filter_mode = MCAST_EXCLUDE; - mod_timer(&pg->timer, jiffies + br_multicast_gmi(br)); + mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx)); return changed; } @@ -2026,11 +2183,12 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr, * INCLUDE (A) TO_IN (B) INCLUDE (A+B) (B)=GMI * Send Q(G,A-B) */ -static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr, +static bool __grp_src_toin_incl(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { - struct net_bridge *br = pg->key.port->br; u32 src_idx, to_send = pg->src_ents; struct net_bridge_group_src *ent; unsigned long now = jiffies; @@ -2054,14 +2212,15 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr, changed = true; } if (ent) - __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); + __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx)); } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; if (to_send) - __grp_src_query_marked_and_rexmit(pg); + __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); return changed; } @@ -2071,11 +2230,12 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr, * Send Q(G,X-A) * Send Q(G) */ -static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr, +static bool __grp_src_toin_excl(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { - struct net_bridge *br = pg->key.port->br; u32 src_idx, to_send = pg->src_ents; struct net_bridge_group_src *ent; unsigned long now = jiffies; @@ -2102,21 +2262,24 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr, changed = true; } if (ent) - __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); + __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx)); } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; if (to_send) - __grp_src_query_marked_and_rexmit(pg); + __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); - __grp_send_query_and_rexmit(pg); + __grp_send_query_and_rexmit(brmctx, pmctx, pg); return changed; } -static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr, +static bool br_multicast_toin(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { @@ -2124,12 +2287,12 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr, switch (pg->filter_mode) { case MCAST_INCLUDE: - changed = __grp_src_toin_incl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + changed = __grp_src_toin_incl(brmctx, pmctx, pg, h_addr, srcs, + nsrcs, addr_size, grec_type); break; case MCAST_EXCLUDE: - changed = __grp_src_toin_excl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + changed = __grp_src_toin_excl(brmctx, pmctx, pg, h_addr, srcs, + nsrcs, addr_size, grec_type); break; } @@ -2151,7 +2314,9 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr, * Send Q(G,A*B) * Group Timer=GMI */ -static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr, +static void __grp_src_toex_incl(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { @@ -2178,11 +2343,12 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr, br_multicast_fwd_src_handle(ent); } - br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type); + br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type); __grp_src_delete_marked(pg); if (to_send) - __grp_src_query_marked_and_rexmit(pg); + __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); } /* State Msg type New state Actions @@ -2192,7 +2358,9 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr, * Send Q(G,A-Y) * Group Timer=GMI */ -static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr, +static bool __grp_src_toex_excl(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { @@ -2224,39 +2392,41 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr, } } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; if (__grp_src_delete_marked(pg)) changed = true; if (to_send) - __grp_src_query_marked_and_rexmit(pg); + __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); return changed; } -static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr, +static bool br_multicast_toex(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { - struct net_bridge *br = pg->key.port->br; bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_toex_incl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + __grp_src_toex_incl(brmctx, pmctx, pg, h_addr, srcs, nsrcs, + addr_size, grec_type); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: - changed = __grp_src_toex_excl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + changed = __grp_src_toex_excl(brmctx, pmctx, pg, h_addr, srcs, + nsrcs, addr_size, grec_type); break; } pg->filter_mode = MCAST_EXCLUDE; - mod_timer(&pg->timer, jiffies + br_multicast_gmi(br)); + mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx)); return changed; } @@ -2264,7 +2434,9 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr, /* State Msg type New state Actions * INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B) */ -static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, +static bool __grp_src_block_incl(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; @@ -2286,11 +2458,12 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, } } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; if (to_send) - __grp_src_query_marked_and_rexmit(pg); + __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); return changed; } @@ -2299,7 +2472,9 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, * EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer * Send Q(G,A-Y) */ -static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr, +static bool __grp_src_block_excl(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; @@ -2328,28 +2503,31 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr, } } - if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size, + grec_type)) changed = true; if (to_send) - __grp_src_query_marked_and_rexmit(pg); + __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg); return changed; } -static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr, +static bool br_multicast_block(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - changed = __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + changed = __grp_src_block_incl(brmctx, pmctx, pg, h_addr, srcs, + nsrcs, addr_size, grec_type); break; case MCAST_EXCLUDE: - changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size, - grec_type); + changed = __grp_src_block_excl(brmctx, pmctx, pg, h_addr, srcs, + nsrcs, addr_size, grec_type); break; } @@ -2384,12 +2562,12 @@ br_multicast_find_port(struct net_bridge_mdb_entry *mp, return NULL; } -static int br_ip4_multicast_igmp3_report(struct net_bridge *br, - struct net_bridge_port *port, +static int br_ip4_multicast_igmp3_report(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { - bool igmpv2 = br->multicast_igmp_version == 2; + bool igmpv2 = brmctx->multicast_igmp_version == 2; struct net_bridge_mdb_entry *mdst; struct net_bridge_port_group *pg; const unsigned char *src; @@ -2436,25 +2614,29 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, if (nsrcs == 0 && (type == IGMPV3_CHANGE_TO_INCLUDE || type == IGMPV3_MODE_IS_INCLUDE)) { - if (!port || igmpv2) { - br_ip4_multicast_leave_group(br, port, group, vid, src); + if (!pmctx || igmpv2) { + br_ip4_multicast_leave_group(brmctx, pmctx, + group, vid, src); continue; } } else { - err = br_ip4_multicast_add_group(br, port, group, vid, - src, igmpv2); + err = br_ip4_multicast_add_group(brmctx, pmctx, group, + vid, src, igmpv2); if (err) break; } - if (!port || igmpv2) + if (!pmctx || igmpv2) continue; - spin_lock_bh(&br->multicast_lock); - mdst = br_mdb_ip4_get(br, group, vid); + spin_lock_bh(&brmctx->br->multicast_lock); + if (!br_multicast_ctx_should_use(brmctx, pmctx)) + goto unlock_continue; + + mdst = br_mdb_ip4_get(brmctx->br, group, vid); if (!mdst) goto unlock_continue; - pg = br_multicast_find_port(mdst, port, src); + pg = br_multicast_find_port(mdst, pmctx->port, src); if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) goto unlock_continue; /* reload grec and host addr */ @@ -2462,46 +2644,52 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, h_addr = &ip_hdr(skb)->saddr; switch (type) { case IGMPV3_ALLOW_NEW_SOURCES: - changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src, + changed = br_multicast_isinc_allow(brmctx, pg, h_addr, + grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_MODE_IS_INCLUDE: - changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src, + changed = br_multicast_isinc_allow(brmctx, pg, h_addr, + grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_MODE_IS_EXCLUDE: - changed = br_multicast_isexc(pg, h_addr, grec->grec_src, + changed = br_multicast_isexc(brmctx, pg, h_addr, + grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_CHANGE_TO_INCLUDE: - changed = br_multicast_toin(pg, h_addr, grec->grec_src, + changed = br_multicast_toin(brmctx, pmctx, pg, h_addr, + grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_CHANGE_TO_EXCLUDE: - changed = br_multicast_toex(pg, h_addr, grec->grec_src, + changed = br_multicast_toex(brmctx, pmctx, pg, h_addr, + grec->grec_src, nsrcs, sizeof(__be32), type); break; case IGMPV3_BLOCK_OLD_SOURCES: - changed = br_multicast_block(pg, h_addr, grec->grec_src, + changed = br_multicast_block(brmctx, pmctx, pg, h_addr, + grec->grec_src, nsrcs, sizeof(__be32), type); break; } if (changed) - br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB); + br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB); unlock_continue: - spin_unlock_bh(&br->multicast_lock); + spin_unlock_bh(&brmctx->br->multicast_lock); } return err; } #if IS_ENABLED(CONFIG_IPV6) -static int br_ip6_multicast_mld2_report(struct net_bridge *br, - struct net_bridge_port *port, +static int br_ip6_multicast_mld2_report(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { - bool mldv1 = br->multicast_mld_version == 1; + bool mldv1 = brmctx->multicast_mld_version == 1; struct net_bridge_mdb_entry *mdst; struct net_bridge_port_group *pg; unsigned int nsrcs_offset; @@ -2562,137 +2750,144 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE || grec->grec_type == MLD2_MODE_IS_INCLUDE) && nsrcs == 0) { - if (!port || mldv1) { - br_ip6_multicast_leave_group(br, port, + if (!pmctx || mldv1) { + br_ip6_multicast_leave_group(brmctx, pmctx, &grec->grec_mca, vid, src); continue; } } else { - err = br_ip6_multicast_add_group(br, port, + err = br_ip6_multicast_add_group(brmctx, pmctx, &grec->grec_mca, vid, src, mldv1); if (err) break; } - if (!port || mldv1) + if (!pmctx || mldv1) continue; - spin_lock_bh(&br->multicast_lock); - mdst = br_mdb_ip6_get(br, &grec->grec_mca, vid); + spin_lock_bh(&brmctx->br->multicast_lock); + if (!br_multicast_ctx_should_use(brmctx, pmctx)) + goto unlock_continue; + + mdst = br_mdb_ip6_get(brmctx->br, &grec->grec_mca, vid); if (!mdst) goto unlock_continue; - pg = br_multicast_find_port(mdst, port, src); + pg = br_multicast_find_port(mdst, pmctx->port, src); if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) goto unlock_continue; h_addr = &ipv6_hdr(skb)->saddr; switch (grec->grec_type) { case MLD2_ALLOW_NEW_SOURCES: - changed = br_multicast_isinc_allow(pg, h_addr, + changed = br_multicast_isinc_allow(brmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_MODE_IS_INCLUDE: - changed = br_multicast_isinc_allow(pg, h_addr, + changed = br_multicast_isinc_allow(brmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_MODE_IS_EXCLUDE: - changed = br_multicast_isexc(pg, h_addr, + changed = br_multicast_isexc(brmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_CHANGE_TO_INCLUDE: - changed = br_multicast_toin(pg, h_addr, + changed = br_multicast_toin(brmctx, pmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_CHANGE_TO_EXCLUDE: - changed = br_multicast_toex(pg, h_addr, + changed = br_multicast_toex(brmctx, pmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; case MLD2_BLOCK_OLD_SOURCES: - changed = br_multicast_block(pg, h_addr, + changed = br_multicast_block(brmctx, pmctx, pg, h_addr, grec->grec_src, nsrcs, sizeof(struct in6_addr), grec->grec_type); break; } if (changed) - br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB); + br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB); unlock_continue: - spin_unlock_bh(&br->multicast_lock); + spin_unlock_bh(&brmctx->br->multicast_lock); } return err; } #endif -static bool br_ip4_multicast_select_querier(struct net_bridge *br, - struct net_bridge_port *port, +static bool br_ip4_multicast_select_querier(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, __be32 saddr) { - if (!timer_pending(&br->ip4_own_query.timer) && - !timer_pending(&br->ip4_other_query.timer)) + struct net_bridge_port *port = pmctx ? pmctx->port : NULL; + + if (!timer_pending(&brmctx->ip4_own_query.timer) && + !timer_pending(&brmctx->ip4_other_query.timer)) goto update; - if (!br->ip4_querier.addr.src.ip4) + if (!brmctx->ip4_querier.addr.src.ip4) goto update; - if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.src.ip4)) + if (ntohl(saddr) <= ntohl(brmctx->ip4_querier.addr.src.ip4)) goto update; return false; update: - br->ip4_querier.addr.src.ip4 = saddr; + brmctx->ip4_querier.addr.src.ip4 = saddr; /* update protected by general multicast_lock by caller */ - rcu_assign_pointer(br->ip4_querier.port, port); + rcu_assign_pointer(brmctx->ip4_querier.port, port); return true; } #if IS_ENABLED(CONFIG_IPV6) -static bool br_ip6_multicast_select_querier(struct net_bridge *br, - struct net_bridge_port *port, +static bool br_ip6_multicast_select_querier(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct in6_addr *saddr) { - if (!timer_pending(&br->ip6_own_query.timer) && - !timer_pending(&br->ip6_other_query.timer)) + struct net_bridge_port *port = pmctx ? pmctx->port : NULL; + + if (!timer_pending(&brmctx->ip6_own_query.timer) && + !timer_pending(&brmctx->ip6_other_query.timer)) goto update; - if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.src.ip6) <= 0) + if (ipv6_addr_cmp(saddr, &brmctx->ip6_querier.addr.src.ip6) <= 0) goto update; return false; update: - br->ip6_querier.addr.src.ip6 = *saddr; + brmctx->ip6_querier.addr.src.ip6 = *saddr; /* update protected by general multicast_lock by caller */ - rcu_assign_pointer(br->ip6_querier.port, port); + rcu_assign_pointer(brmctx->ip6_querier.port, port); return true; } #endif static void -br_multicast_update_query_timer(struct net_bridge *br, +br_multicast_update_query_timer(struct net_bridge_mcast *brmctx, struct bridge_mcast_other_query *query, unsigned long max_delay) { if (!timer_pending(&query->timer)) query->delay_time = jiffies + max_delay; - mod_timer(&query->timer, jiffies + br->multicast_querier_interval); + mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval); } static void br_port_mc_router_state_change(struct net_bridge_port *p, @@ -2709,19 +2904,26 @@ static void br_port_mc_router_state_change(struct net_bridge_port *p, } static struct net_bridge_port * -br_multicast_rport_from_node(struct net_bridge *br, +br_multicast_rport_from_node(struct net_bridge_mcast *brmctx, struct hlist_head *mc_router_list, struct hlist_node *rlist) { + struct net_bridge_mcast_port *pmctx; + #if IS_ENABLED(CONFIG_IPV6) - if (mc_router_list == &br->ip6_mc_router_list) - return hlist_entry(rlist, struct net_bridge_port, ip6_rlist); + if (mc_router_list == &brmctx->ip6_mc_router_list) + pmctx = hlist_entry(rlist, struct net_bridge_mcast_port, + ip6_rlist); + else #endif - return hlist_entry(rlist, struct net_bridge_port, ip4_rlist); + pmctx = hlist_entry(rlist, struct net_bridge_mcast_port, + ip4_rlist); + + return pmctx->port; } static struct hlist_node * -br_multicast_get_rport_slot(struct net_bridge *br, +br_multicast_get_rport_slot(struct net_bridge_mcast *brmctx, struct net_bridge_port *port, struct hlist_head *mc_router_list) @@ -2731,7 +2933,7 @@ br_multicast_get_rport_slot(struct net_bridge *br, struct hlist_node *rlist; hlist_for_each(rlist, mc_router_list) { - p = br_multicast_rport_from_node(br, mc_router_list, rlist); + p = br_multicast_rport_from_node(brmctx, mc_router_list, rlist); if ((unsigned long)port >= (unsigned long)p) break; @@ -2742,14 +2944,14 @@ br_multicast_get_rport_slot(struct net_bridge *br, return slot; } -static bool br_multicast_no_router_otherpf(struct net_bridge_port *port, +static bool br_multicast_no_router_otherpf(struct net_bridge_mcast_port *pmctx, struct hlist_node *rnode) { #if IS_ENABLED(CONFIG_IPV6) - if (rnode != &port->ip6_rlist) - return hlist_unhashed(&port->ip6_rlist); + if (rnode != &pmctx->ip6_rlist) + return hlist_unhashed(&pmctx->ip6_rlist); else - return hlist_unhashed(&port->ip4_rlist); + return hlist_unhashed(&pmctx->ip4_rlist); #else return true; #endif @@ -2759,8 +2961,8 @@ static bool br_multicast_no_router_otherpf(struct net_bridge_port *port, * list is maintained ordered by pointer value * and locked by br->multicast_lock and RCU */ -static void br_multicast_add_router(struct net_bridge *br, - struct net_bridge_port *port, +static void br_multicast_add_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct hlist_node *rlist, struct hlist_head *mc_router_list) { @@ -2769,7 +2971,7 @@ static void br_multicast_add_router(struct net_bridge *br, if (!hlist_unhashed(rlist)) return; - slot = br_multicast_get_rport_slot(br, port, mc_router_list); + slot = br_multicast_get_rport_slot(brmctx, pmctx->port, mc_router_list); if (slot) hlist_add_behind_rcu(rlist, slot); @@ -2780,9 +2982,9 @@ static void br_multicast_add_router(struct net_bridge *br, * switched from no IPv4/IPv6 multicast router to a new * IPv4 or IPv6 multicast router. */ - if (br_multicast_no_router_otherpf(port, rlist)) { - br_rtr_notify(br->dev, port, RTM_NEWMDB); - br_port_mc_router_state_change(port, true); + if (br_multicast_no_router_otherpf(pmctx, rlist)) { + br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_NEWMDB); + br_port_mc_router_state_change(pmctx->port, true); } } @@ -2790,116 +2992,119 @@ static void br_multicast_add_router(struct net_bridge *br, * list is maintained ordered by pointer value * and locked by br->multicast_lock and RCU */ -static void br_ip4_multicast_add_router(struct net_bridge *br, - struct net_bridge_port *port) +static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx) { - br_multicast_add_router(br, port, &port->ip4_rlist, - &br->ip4_mc_router_list); + br_multicast_add_router(brmctx, pmctx, &pmctx->ip4_rlist, + &brmctx->ip4_mc_router_list); } /* Add port to router_list * list is maintained ordered by pointer value * and locked by br->multicast_lock and RCU */ -static void br_ip6_multicast_add_router(struct net_bridge *br, - struct net_bridge_port *port) +static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx) { #if IS_ENABLED(CONFIG_IPV6) - br_multicast_add_router(br, port, &port->ip6_rlist, - &br->ip6_mc_router_list); + br_multicast_add_router(brmctx, pmctx, &pmctx->ip6_rlist, + &brmctx->ip6_mc_router_list); #endif } -static void br_multicast_mark_router(struct net_bridge *br, - struct net_bridge_port *port, +static void br_multicast_mark_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct timer_list *timer, struct hlist_node *rlist, struct hlist_head *mc_router_list) { unsigned long now = jiffies; - if (!port) { - if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) { - if (!br_ip4_multicast_is_router(br) && - !br_ip6_multicast_is_router(br)) - br_mc_router_state_change(br, true); - mod_timer(timer, now + br->multicast_querier_interval); + if (!br_multicast_ctx_should_use(brmctx, pmctx)) + return; + + if (!pmctx) { + if (brmctx->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) { + if (!br_ip4_multicast_is_router(brmctx) && + !br_ip6_multicast_is_router(brmctx)) + br_mc_router_state_change(brmctx->br, true); + mod_timer(timer, now + brmctx->multicast_querier_interval); } return; } - if (port->multicast_router == MDB_RTR_TYPE_DISABLED || - port->multicast_router == MDB_RTR_TYPE_PERM) + if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED || + pmctx->multicast_router == MDB_RTR_TYPE_PERM) return; - br_multicast_add_router(br, port, rlist, mc_router_list); - mod_timer(timer, now + br->multicast_querier_interval); + br_multicast_add_router(brmctx, pmctx, rlist, mc_router_list); + mod_timer(timer, now + brmctx->multicast_querier_interval); } -static void br_ip4_multicast_mark_router(struct net_bridge *br, - struct net_bridge_port *port) +static void br_ip4_multicast_mark_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx) { - struct timer_list *timer = &br->ip4_mc_router_timer; + struct timer_list *timer = &brmctx->ip4_mc_router_timer; struct hlist_node *rlist = NULL; - if (port) { - timer = &port->ip4_mc_router_timer; - rlist = &port->ip4_rlist; + if (pmctx) { + timer = &pmctx->ip4_mc_router_timer; + rlist = &pmctx->ip4_rlist; } - br_multicast_mark_router(br, port, timer, rlist, - &br->ip4_mc_router_list); + br_multicast_mark_router(brmctx, pmctx, timer, rlist, + &brmctx->ip4_mc_router_list); } -static void br_ip6_multicast_mark_router(struct net_bridge *br, - struct net_bridge_port *port) +static void br_ip6_multicast_mark_router(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx) { #if IS_ENABLED(CONFIG_IPV6) - struct timer_list *timer = &br->ip6_mc_router_timer; + struct timer_list *timer = &brmctx->ip6_mc_router_timer; struct hlist_node *rlist = NULL; - if (port) { - timer = &port->ip6_mc_router_timer; - rlist = &port->ip6_rlist; + if (pmctx) { + timer = &pmctx->ip6_mc_router_timer; + rlist = &pmctx->ip6_rlist; } - br_multicast_mark_router(br, port, timer, rlist, - &br->ip6_mc_router_list); + br_multicast_mark_router(brmctx, pmctx, timer, rlist, + &brmctx->ip6_mc_router_list); #endif } static void -br_ip4_multicast_query_received(struct net_bridge *br, - struct net_bridge_port *port, +br_ip4_multicast_query_received(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct bridge_mcast_other_query *query, struct br_ip *saddr, unsigned long max_delay) { - if (!br_ip4_multicast_select_querier(br, port, saddr->src.ip4)) + if (!br_ip4_multicast_select_querier(brmctx, pmctx, saddr->src.ip4)) return; - br_multicast_update_query_timer(br, query, max_delay); - br_ip4_multicast_mark_router(br, port); + br_multicast_update_query_timer(brmctx, query, max_delay); + br_ip4_multicast_mark_router(brmctx, pmctx); } #if IS_ENABLED(CONFIG_IPV6) static void -br_ip6_multicast_query_received(struct net_bridge *br, - struct net_bridge_port *port, +br_ip6_multicast_query_received(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct bridge_mcast_other_query *query, struct br_ip *saddr, unsigned long max_delay) { - if (!br_ip6_multicast_select_querier(br, port, &saddr->src.ip6)) + if (!br_ip6_multicast_select_querier(brmctx, pmctx, &saddr->src.ip6)) return; - br_multicast_update_query_timer(br, query, max_delay); - br_ip6_multicast_mark_router(br, port); + br_multicast_update_query_timer(brmctx, query, max_delay); + br_ip6_multicast_mark_router(brmctx, pmctx); } #endif -static void br_ip4_multicast_query(struct net_bridge *br, - struct net_bridge_port *port, +static void br_ip4_multicast_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { @@ -2915,9 +3120,8 @@ static void br_ip4_multicast_query(struct net_bridge *br, unsigned long now = jiffies; __be32 group; - spin_lock(&br->multicast_lock); - if (!netif_running(br->dev) || - (port && port->state == BR_STATE_DISABLED)) + spin_lock(&brmctx->br->multicast_lock); + if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto out; group = ih->group; @@ -2932,7 +3136,8 @@ static void br_ip4_multicast_query(struct net_bridge *br, } else if (transport_len >= sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs || - (br->multicast_igmp_version == 3 && group && ih3->suppress)) + (brmctx->multicast_igmp_version == 3 && group && + ih3->suppress)) goto out; max_delay = ih3->code ? @@ -2945,16 +3150,17 @@ static void br_ip4_multicast_query(struct net_bridge *br, saddr.proto = htons(ETH_P_IP); saddr.src.ip4 = iph->saddr; - br_ip4_multicast_query_received(br, port, &br->ip4_other_query, + br_ip4_multicast_query_received(brmctx, pmctx, + &brmctx->ip4_other_query, &saddr, max_delay); goto out; } - mp = br_mdb_ip4_get(br, group, vid); + mp = br_mdb_ip4_get(brmctx->br, group, vid); if (!mp) goto out; - max_delay *= br->multicast_last_member_count; + max_delay *= brmctx->multicast_last_member_count; if (mp->host_joined && (timer_pending(&mp->timer) ? @@ -2963,23 +3169,23 @@ static void br_ip4_multicast_query(struct net_bridge *br, mod_timer(&mp->timer, now + max_delay); for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; + (p = mlock_dereference(*pp, brmctx->br)) != NULL; pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0 && - (br->multicast_igmp_version == 2 || + (brmctx->multicast_igmp_version == 2 || p->filter_mode == MCAST_EXCLUDE)) mod_timer(&p->timer, now + max_delay); } out: - spin_unlock(&br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); } #if IS_ENABLED(CONFIG_IPV6) -static int br_ip6_multicast_query(struct net_bridge *br, - struct net_bridge_port *port, +static int br_ip6_multicast_query(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { @@ -2997,9 +3203,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, bool is_general_query; int err = 0; - spin_lock(&br->multicast_lock); - if (!netif_running(br->dev) || - (port && port->state == BR_STATE_DISABLED)) + spin_lock(&brmctx->br->multicast_lock); + if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto out; if (transport_len == sizeof(*mld)) { @@ -3019,7 +3224,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, mld2q = (struct mld2_query *)icmp6_hdr(skb); if (!mld2q->mld2q_nsrcs) group = &mld2q->mld2q_mca; - if (br->multicast_mld_version == 2 && + if (brmctx->multicast_mld_version == 2 && !ipv6_addr_any(&mld2q->mld2q_mca) && mld2q->mld2q_suppress) goto out; @@ -3033,18 +3238,19 @@ static int br_ip6_multicast_query(struct net_bridge *br, saddr.proto = htons(ETH_P_IPV6); saddr.src.ip6 = ipv6_hdr(skb)->saddr; - br_ip6_multicast_query_received(br, port, &br->ip6_other_query, + br_ip6_multicast_query_received(brmctx, pmctx, + &brmctx->ip6_other_query, &saddr, max_delay); goto out; } else if (!group) { goto out; } - mp = br_mdb_ip6_get(br, group, vid); + mp = br_mdb_ip6_get(brmctx->br, group, vid); if (!mp) goto out; - max_delay *= br->multicast_last_member_count; + max_delay *= brmctx->multicast_last_member_count; if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : @@ -3052,25 +3258,25 @@ static int br_ip6_multicast_query(struct net_bridge *br, mod_timer(&mp->timer, now + max_delay); for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; + (p = mlock_dereference(*pp, brmctx->br)) != NULL; pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0 && - (br->multicast_mld_version == 1 || + (brmctx->multicast_mld_version == 1 || p->filter_mode == MCAST_EXCLUDE)) mod_timer(&p->timer, now + max_delay); } out: - spin_unlock(&br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); return err; } #endif static void -br_multicast_leave_group(struct net_bridge *br, - struct net_bridge_port *port, +br_multicast_leave_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct br_ip *group, struct bridge_mcast_other_query *other_query, struct bridge_mcast_own_query *own_query, @@ -3081,22 +3287,21 @@ br_multicast_leave_group(struct net_bridge *br, unsigned long now; unsigned long time; - spin_lock(&br->multicast_lock); - if (!netif_running(br->dev) || - (port && port->state == BR_STATE_DISABLED)) + spin_lock(&brmctx->br->multicast_lock); + if (!br_multicast_ctx_should_use(brmctx, pmctx)) goto out; - mp = br_mdb_ip_get(br, group); + mp = br_mdb_ip_get(brmctx->br, group); if (!mp) goto out; - if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { + if (pmctx && (pmctx->port->flags & BR_MULTICAST_FAST_LEAVE)) { struct net_bridge_port_group __rcu **pp; for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; + (p = mlock_dereference(*pp, brmctx->br)) != NULL; pp = &p->next) { - if (!br_port_group_equal(p, port, src)) + if (!br_port_group_equal(p, pmctx->port, src)) continue; if (p->flags & MDB_PG_FLAGS_PERMANENT) @@ -3111,19 +3316,19 @@ br_multicast_leave_group(struct net_bridge *br, if (timer_pending(&other_query->timer)) goto out; - if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) { - __br_multicast_send_query(br, port, NULL, NULL, &mp->addr, + if (br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER)) { + __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &mp->addr, false, 0, NULL); - time = jiffies + br->multicast_last_member_count * - br->multicast_last_member_interval; + time = jiffies + brmctx->multicast_last_member_count * + brmctx->multicast_last_member_interval; mod_timer(&own_query->timer, time); - for (p = mlock_dereference(mp->ports, br); - p != NULL; - p = mlock_dereference(p->next, br)) { - if (!br_port_group_equal(p, port, src)) + for (p = mlock_dereference(mp->ports, brmctx->br); + p != NULL && pmctx != NULL; + p = mlock_dereference(p->next, brmctx->br)) { + if (!br_port_group_equal(p, pmctx->port, src)) continue; if (!hlist_unhashed(&p->mglist) && @@ -3138,10 +3343,10 @@ br_multicast_leave_group(struct net_bridge *br, } now = jiffies; - time = now + br->multicast_last_member_count * - br->multicast_last_member_interval; + time = now + brmctx->multicast_last_member_count * + brmctx->multicast_last_member_interval; - if (!port) { + if (!pmctx) { if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : @@ -3152,10 +3357,10 @@ br_multicast_leave_group(struct net_bridge *br, goto out; } - for (p = mlock_dereference(mp->ports, br); + for (p = mlock_dereference(mp->ports, brmctx->br); p != NULL; - p = mlock_dereference(p->next, br)) { - if (p->key.port != port) + p = mlock_dereference(p->next, brmctx->br)) { + if (p->key.port != pmctx->port) continue; if (!hlist_unhashed(&p->mglist) && @@ -3168,11 +3373,11 @@ br_multicast_leave_group(struct net_bridge *br, break; } out: - spin_unlock(&br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); } -static void br_ip4_multicast_leave_group(struct net_bridge *br, - struct net_bridge_port *port, +static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, __be32 group, __u16 vid, const unsigned char *src) @@ -3183,20 +3388,21 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, if (ipv4_is_local_multicast(group)) return; - own_query = port ? &port->ip4_own_query : &br->ip4_own_query; + own_query = pmctx ? &pmctx->ip4_own_query : &brmctx->ip4_own_query; memset(&br_group, 0, sizeof(br_group)); br_group.dst.ip4 = group; br_group.proto = htons(ETH_P_IP); br_group.vid = vid; - br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query, + br_multicast_leave_group(brmctx, pmctx, &br_group, + &brmctx->ip4_other_query, own_query, src); } #if IS_ENABLED(CONFIG_IPV6) -static void br_ip6_multicast_leave_group(struct net_bridge *br, - struct net_bridge_port *port, +static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, const struct in6_addr *group, __u16 vid, const unsigned char *src) @@ -3207,14 +3413,15 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, if (ipv6_addr_is_ll_all_nodes(group)) return; - own_query = port ? &port->ip6_own_query : &br->ip6_own_query; + own_query = pmctx ? &pmctx->ip6_own_query : &brmctx->ip6_own_query; memset(&br_group, 0, sizeof(br_group)); br_group.dst.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); br_group.vid = vid; - br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query, + br_multicast_leave_group(brmctx, pmctx, &br_group, + &brmctx->ip6_other_query, own_query, src); } #endif @@ -3252,8 +3459,8 @@ static void br_multicast_err_count(const struct net_bridge *br, u64_stats_update_end(&pstats->syncp); } -static void br_multicast_pim(struct net_bridge *br, - struct net_bridge_port *port, +static void br_multicast_pim(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, const struct sk_buff *skb) { unsigned int offset = skb_transport_offset(skb); @@ -3264,31 +3471,32 @@ static void br_multicast_pim(struct net_bridge *br, pim_hdr_type(pimhdr) != PIM_TYPE_HELLO) return; - spin_lock(&br->multicast_lock); - br_ip4_multicast_mark_router(br, port); - spin_unlock(&br->multicast_lock); + spin_lock(&brmctx->br->multicast_lock); + br_ip4_multicast_mark_router(brmctx, pmctx); + spin_unlock(&brmctx->br->multicast_lock); } -static int br_ip4_multicast_mrd_rcv(struct net_bridge *br, - struct net_bridge_port *port, +static int br_ip4_multicast_mrd_rcv(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb) { if (ip_hdr(skb)->protocol != IPPROTO_IGMP || igmp_hdr(skb)->type != IGMP_MRDISC_ADV) return -ENOMSG; - spin_lock(&br->multicast_lock); - br_ip4_multicast_mark_router(br, port); - spin_unlock(&br->multicast_lock); + spin_lock(&brmctx->br->multicast_lock); + br_ip4_multicast_mark_router(brmctx, pmctx); + spin_unlock(&brmctx->br->multicast_lock); return 0; } -static int br_multicast_ipv4_rcv(struct net_bridge *br, - struct net_bridge_port *port, +static int br_multicast_ipv4_rcv(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { + struct net_bridge_port *p = pmctx ? pmctx->port : NULL; const unsigned char *src; struct igmphdr *ih; int err; @@ -3300,14 +3508,14 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, BR_INPUT_SKB_CB(skb)->mrouters_only = 1; } else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) { if (ip_hdr(skb)->protocol == IPPROTO_PIM) - br_multicast_pim(br, port, skb); + br_multicast_pim(brmctx, pmctx, skb); } else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) { - br_ip4_multicast_mrd_rcv(br, port, skb); + br_ip4_multicast_mrd_rcv(brmctx, pmctx, skb); } return 0; } else if (err < 0) { - br_multicast_err_count(br, port, skb->protocol); + br_multicast_err_count(brmctx->br, p, skb->protocol); return err; } @@ -3319,44 +3527,45 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - err = br_ip4_multicast_add_group(br, port, ih->group, vid, src, - true); + err = br_ip4_multicast_add_group(brmctx, pmctx, ih->group, vid, + src, true); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: - err = br_ip4_multicast_igmp3_report(br, port, skb, vid); + err = br_ip4_multicast_igmp3_report(brmctx, pmctx, skb, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: - br_ip4_multicast_query(br, port, skb, vid); + br_ip4_multicast_query(brmctx, pmctx, skb, vid); break; case IGMP_HOST_LEAVE_MESSAGE: - br_ip4_multicast_leave_group(br, port, ih->group, vid, src); + br_ip4_multicast_leave_group(brmctx, pmctx, ih->group, vid, src); break; } - br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp, + br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp, BR_MCAST_DIR_RX); return err; } #if IS_ENABLED(CONFIG_IPV6) -static void br_ip6_multicast_mrd_rcv(struct net_bridge *br, - struct net_bridge_port *port, +static void br_ip6_multicast_mrd_rcv(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb) { if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV) return; - spin_lock(&br->multicast_lock); - br_ip6_multicast_mark_router(br, port); - spin_unlock(&br->multicast_lock); + spin_lock(&brmctx->br->multicast_lock); + br_ip6_multicast_mark_router(brmctx, pmctx); + spin_unlock(&brmctx->br->multicast_lock); } -static int br_multicast_ipv6_rcv(struct net_bridge *br, - struct net_bridge_port *port, +static int br_multicast_ipv6_rcv(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, struct sk_buff *skb, u16 vid) { + struct net_bridge_port *p = pmctx ? pmctx->port : NULL; const unsigned char *src; struct mld_msg *mld; int err; @@ -3368,11 +3577,11 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, BR_INPUT_SKB_CB(skb)->mrouters_only = 1; if (err == -ENODATA && ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr)) - br_ip6_multicast_mrd_rcv(br, port, skb); + br_ip6_multicast_mrd_rcv(brmctx, pmctx, skb); return 0; } else if (err < 0) { - br_multicast_err_count(br, port, skb->protocol); + br_multicast_err_count(brmctx->br, p, skb->protocol); return err; } @@ -3383,29 +3592,32 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, case ICMPV6_MGM_REPORT: src = eth_hdr(skb)->h_source; BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid, - src, true); + err = br_ip6_multicast_add_group(brmctx, pmctx, &mld->mld_mca, + vid, src, true); break; case ICMPV6_MLD2_REPORT: - err = br_ip6_multicast_mld2_report(br, port, skb, vid); + err = br_ip6_multicast_mld2_report(brmctx, pmctx, skb, vid); break; case ICMPV6_MGM_QUERY: - err = br_ip6_multicast_query(br, port, skb, vid); + err = br_ip6_multicast_query(brmctx, pmctx, skb, vid); break; case ICMPV6_MGM_REDUCTION: src = eth_hdr(skb)->h_source; - br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid, src); + br_ip6_multicast_leave_group(brmctx, pmctx, &mld->mld_mca, vid, + src); break; } - br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp, + br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp, BR_MCAST_DIR_RX); return err; } #endif -int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, +int br_multicast_rcv(struct net_bridge_mcast **brmctx, + struct net_bridge_mcast_port **pmctx, + struct net_bridge_vlan *vlan, struct sk_buff *skb, u16 vid) { int ret = 0; @@ -3413,16 +3625,36 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, BR_INPUT_SKB_CB(skb)->igmp = 0; BR_INPUT_SKB_CB(skb)->mrouters_only = 0; - if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) + if (!br_opt_get((*brmctx)->br, BROPT_MULTICAST_ENABLED)) return 0; + if (br_opt_get((*brmctx)->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && vlan) { + const struct net_bridge_vlan *masterv; + + /* the vlan has the master flag set only when transmitting + * through the bridge device + */ + if (br_vlan_is_master(vlan)) { + masterv = vlan; + *brmctx = &vlan->br_mcast_ctx; + *pmctx = NULL; + } else { + masterv = vlan->brvlan; + *brmctx = &vlan->brvlan->br_mcast_ctx; + *pmctx = &vlan->port_mcast_ctx; + } + + if (!(masterv->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)) + return 0; + } + switch (skb->protocol) { case htons(ETH_P_IP): - ret = br_multicast_ipv4_rcv(br, port, skb, vid); + ret = br_multicast_ipv4_rcv(*brmctx, *pmctx, skb, vid); break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - ret = br_multicast_ipv6_rcv(br, port, skb, vid); + ret = br_multicast_ipv6_rcv(*brmctx, *pmctx, skb, vid); break; #endif } @@ -3430,32 +3662,40 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, return ret; } -static void br_multicast_query_expired(struct net_bridge *br, +static void br_multicast_query_expired(struct net_bridge_mcast *brmctx, struct bridge_mcast_own_query *query, struct bridge_mcast_querier *querier) { - spin_lock(&br->multicast_lock); - if (query->startup_sent < br->multicast_startup_query_count) + spin_lock(&brmctx->br->multicast_lock); + if (br_multicast_ctx_vlan_disabled(brmctx)) + goto out; + + if (query->startup_sent < brmctx->multicast_startup_query_count) query->startup_sent++; RCU_INIT_POINTER(querier->port, NULL); - br_multicast_send_query(br, NULL, query); - spin_unlock(&br->multicast_lock); + br_multicast_send_query(brmctx, NULL, query); +out: + spin_unlock(&brmctx->br->multicast_lock); } static void br_ip4_multicast_query_expired(struct timer_list *t) { - struct net_bridge *br = from_timer(br, t, ip4_own_query.timer); + struct net_bridge_mcast *brmctx = from_timer(brmctx, t, + ip4_own_query.timer); - br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier); + br_multicast_query_expired(brmctx, &brmctx->ip4_own_query, + &brmctx->ip4_querier); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_query_expired(struct timer_list *t) { - struct net_bridge *br = from_timer(br, t, ip6_own_query.timer); + struct net_bridge_mcast *brmctx = from_timer(brmctx, t, + ip6_own_query.timer); - br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier); + br_multicast_query_expired(brmctx, &brmctx->ip6_own_query, + &brmctx->ip6_querier); } #endif @@ -3472,47 +3712,63 @@ static void br_multicast_gc_work(struct work_struct *work) br_multicast_gc(&deleted_head); } -void br_multicast_init(struct net_bridge *br) +void br_multicast_ctx_init(struct net_bridge *br, + struct net_bridge_vlan *vlan, + struct net_bridge_mcast *brmctx) { - br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX; - - br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; - br->multicast_last_member_count = 2; - br->multicast_startup_query_count = 2; + brmctx->br = br; + brmctx->vlan = vlan; + brmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; + brmctx->multicast_last_member_count = 2; + brmctx->multicast_startup_query_count = 2; - br->multicast_last_member_interval = HZ; - br->multicast_query_response_interval = 10 * HZ; - br->multicast_startup_query_interval = 125 * HZ / 4; - br->multicast_query_interval = 125 * HZ; - br->multicast_querier_interval = 255 * HZ; - br->multicast_membership_interval = 260 * HZ; + brmctx->multicast_last_member_interval = HZ; + brmctx->multicast_query_response_interval = 10 * HZ; + brmctx->multicast_startup_query_interval = 125 * HZ / 4; + brmctx->multicast_query_interval = 125 * HZ; + brmctx->multicast_querier_interval = 255 * HZ; + brmctx->multicast_membership_interval = 260 * HZ; - br->ip4_other_query.delay_time = 0; - br->ip4_querier.port = NULL; - br->multicast_igmp_version = 2; + brmctx->ip4_other_query.delay_time = 0; + brmctx->ip4_querier.port = NULL; + brmctx->multicast_igmp_version = 2; #if IS_ENABLED(CONFIG_IPV6) - br->multicast_mld_version = 1; - br->ip6_other_query.delay_time = 0; - br->ip6_querier.port = NULL; + brmctx->multicast_mld_version = 1; + brmctx->ip6_other_query.delay_time = 0; + brmctx->ip6_querier.port = NULL; #endif - br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true); - br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true); - spin_lock_init(&br->multicast_lock); - timer_setup(&br->ip4_mc_router_timer, + timer_setup(&brmctx->ip4_mc_router_timer, br_ip4_multicast_local_router_expired, 0); - timer_setup(&br->ip4_other_query.timer, + timer_setup(&brmctx->ip4_other_query.timer, br_ip4_multicast_querier_expired, 0); - timer_setup(&br->ip4_own_query.timer, + timer_setup(&brmctx->ip4_own_query.timer, br_ip4_multicast_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) - timer_setup(&br->ip6_mc_router_timer, + timer_setup(&brmctx->ip6_mc_router_timer, br_ip6_multicast_local_router_expired, 0); - timer_setup(&br->ip6_other_query.timer, + timer_setup(&brmctx->ip6_other_query.timer, br_ip6_multicast_querier_expired, 0); - timer_setup(&br->ip6_own_query.timer, + timer_setup(&brmctx->ip6_own_query.timer, br_ip6_multicast_query_expired, 0); #endif +} + +void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx) +{ + __br_multicast_stop(brmctx); +} + +void br_multicast_init(struct net_bridge *br) +{ + br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX; + + br_multicast_ctx_init(br, NULL, &br->multicast_ctx); + + br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true); + br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true); + + spin_lock_init(&br->multicast_lock); INIT_HLIST_HEAD(&br->mdb_list); INIT_HLIST_HEAD(&br->mcast_gc_list); INIT_WORK(&br->mcast_gc_work, br_multicast_gc_work); @@ -3580,8 +3836,8 @@ void br_multicast_leave_snoopers(struct net_bridge *br) br_ip6_multicast_leave_snoopers(br); } -static void __br_multicast_open(struct net_bridge *br, - struct bridge_mcast_own_query *query) +static void __br_multicast_open_query(struct net_bridge *br, + struct bridge_mcast_own_query *query) { query->startup_sent = 0; @@ -3591,26 +3847,191 @@ static void __br_multicast_open(struct net_bridge *br, mod_timer(&query->timer, jiffies); } -void br_multicast_open(struct net_bridge *br) +static void __br_multicast_open(struct net_bridge_mcast *brmctx) { - __br_multicast_open(br, &br->ip4_own_query); + __br_multicast_open_query(brmctx->br, &brmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - __br_multicast_open(br, &br->ip6_own_query); + __br_multicast_open_query(brmctx->br, &brmctx->ip6_own_query); #endif } -void br_multicast_stop(struct net_bridge *br) +void br_multicast_open(struct net_bridge *br) +{ + ASSERT_RTNL(); + + if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *vlan; + + vg = br_vlan_group(br); + if (vg) { + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + struct net_bridge_mcast *brmctx; + + brmctx = &vlan->br_mcast_ctx; + if (br_vlan_is_brentry(vlan) && + !br_multicast_ctx_vlan_disabled(brmctx)) + __br_multicast_open(&vlan->br_mcast_ctx); + } + } + } + + __br_multicast_open(&br->multicast_ctx); +} + +static void __br_multicast_stop(struct net_bridge_mcast *brmctx) { - del_timer_sync(&br->ip4_mc_router_timer); - del_timer_sync(&br->ip4_other_query.timer); - del_timer_sync(&br->ip4_own_query.timer); + del_timer_sync(&brmctx->ip4_mc_router_timer); + del_timer_sync(&brmctx->ip4_other_query.timer); + del_timer_sync(&brmctx->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) - del_timer_sync(&br->ip6_mc_router_timer); - del_timer_sync(&br->ip6_other_query.timer); - del_timer_sync(&br->ip6_own_query.timer); + del_timer_sync(&brmctx->ip6_mc_router_timer); + del_timer_sync(&brmctx->ip6_other_query.timer); + del_timer_sync(&brmctx->ip6_own_query.timer); #endif } +void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on) +{ + struct net_bridge *br; + + /* it's okay to check for the flag without the multicast lock because it + * can only change under RTNL -> multicast_lock, we need the latter to + * sync with timers and packets + */ + if (on == !!(vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED)) + return; + + if (br_vlan_is_master(vlan)) { + br = vlan->br; + + if (!br_vlan_is_brentry(vlan) || + (on && + br_multicast_ctx_vlan_global_disabled(&vlan->br_mcast_ctx))) + return; + + spin_lock_bh(&br->multicast_lock); + vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED; + spin_unlock_bh(&br->multicast_lock); + + if (on) + __br_multicast_open(&vlan->br_mcast_ctx); + else + __br_multicast_stop(&vlan->br_mcast_ctx); + } else { + struct net_bridge_mcast *brmctx; + + brmctx = br_multicast_port_ctx_get_global(&vlan->port_mcast_ctx); + if (on && br_multicast_ctx_vlan_global_disabled(brmctx)) + return; + + br = vlan->port->br; + spin_lock_bh(&br->multicast_lock); + vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED; + if (on) + __br_multicast_enable_port_ctx(&vlan->port_mcast_ctx); + else + __br_multicast_disable_port_ctx(&vlan->port_mcast_ctx); + spin_unlock_bh(&br->multicast_lock); + } +} + +void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on) +{ + struct net_bridge_port *p; + + if (WARN_ON_ONCE(!br_vlan_is_master(vlan))) + return; + + list_for_each_entry(p, &vlan->br->port_list, list) { + struct net_bridge_vlan *vport; + + vport = br_vlan_find(nbp_vlan_group(p), vlan->vid); + if (!vport) + continue; + br_multicast_toggle_one_vlan(vport, on); + } +} + +int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, + struct netlink_ext_ack *extack) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *vlan; + struct net_bridge_port *p; + + if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) == on) + return 0; + + if (on && !br_opt_get(br, BROPT_VLAN_ENABLED)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot enable multicast vlan snooping with vlan filtering disabled"); + return -EINVAL; + } + + vg = br_vlan_group(br); + if (!vg) + return 0; + + br_opt_toggle(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED, on); + + /* disable/enable non-vlan mcast contexts based on vlan snooping */ + if (on) + __br_multicast_stop(&br->multicast_ctx); + else + __br_multicast_open(&br->multicast_ctx); + list_for_each_entry(p, &br->port_list, list) { + if (on) + br_multicast_disable_port(p); + else + br_multicast_enable_port(p); + } + + list_for_each_entry(vlan, &vg->vlan_list, vlist) + br_multicast_toggle_vlan(vlan, on); + + return 0; +} + +bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on) +{ + ASSERT_RTNL(); + + /* BR_VLFLAG_GLOBAL_MCAST_ENABLED relies on eventual consistency and + * requires only RTNL to change + */ + if (on == !!(vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)) + return false; + + vlan->priv_flags ^= BR_VLFLAG_GLOBAL_MCAST_ENABLED; + br_multicast_toggle_vlan(vlan, on); + + return true; +} + +void br_multicast_stop(struct net_bridge *br) +{ + ASSERT_RTNL(); + + if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *vlan; + + vg = br_vlan_group(br); + if (vg) { + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + struct net_bridge_mcast *brmctx; + + brmctx = &vlan->br_mcast_ctx; + if (br_vlan_is_brentry(vlan) && + !br_multicast_ctx_vlan_disabled(brmctx)) + __br_multicast_stop(&vlan->br_mcast_ctx); + } + } + } + + __br_multicast_stop(&br->multicast_ctx); +} + void br_multicast_dev_del(struct net_bridge *br) { struct net_bridge_mdb_entry *mp; @@ -3623,6 +4044,7 @@ void br_multicast_dev_del(struct net_bridge *br) hlist_move_list(&br->mcast_gc_list, &deleted_head); spin_unlock_bh(&br->multicast_lock); + br_multicast_ctx_deinit(&br->multicast_ctx); br_multicast_gc(&deleted_head); cancel_work_sync(&br->mcast_gc_work); @@ -3631,6 +4053,7 @@ void br_multicast_dev_del(struct net_bridge *br) int br_multicast_set_router(struct net_bridge *br, unsigned long val) { + struct net_bridge_mcast *brmctx = &br->multicast_ctx; int err = -EINVAL; spin_lock_bh(&br->multicast_lock); @@ -3639,17 +4062,17 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val) case MDB_RTR_TYPE_DISABLED: case MDB_RTR_TYPE_PERM: br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM); - del_timer(&br->ip4_mc_router_timer); + del_timer(&brmctx->ip4_mc_router_timer); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&br->ip6_mc_router_timer); + del_timer(&brmctx->ip6_mc_router_timer); #endif - br->multicast_router = val; + brmctx->multicast_router = val; err = 0; break; case MDB_RTR_TYPE_TEMP_QUERY: - if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY) + if (brmctx->multicast_router != MDB_RTR_TYPE_TEMP_QUERY) br_mc_router_state_change(br, false); - br->multicast_router = val; + brmctx->multicast_router = val; err = 0; break; } @@ -3660,7 +4083,7 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val) } static void -br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted) +br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted) { if (!deleted) return; @@ -3668,37 +4091,38 @@ br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted) /* For backwards compatibility for now, only notify if there is * no multicast router anymore for both IPv4 and IPv6. */ - if (!hlist_unhashed(&p->ip4_rlist)) + if (!hlist_unhashed(&pmctx->ip4_rlist)) return; #if IS_ENABLED(CONFIG_IPV6) - if (!hlist_unhashed(&p->ip6_rlist)) + if (!hlist_unhashed(&pmctx->ip6_rlist)) return; #endif - br_rtr_notify(p->br->dev, p, RTM_DELMDB); - br_port_mc_router_state_change(p, false); + br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_DELMDB); + br_port_mc_router_state_change(pmctx->port, false); /* don't allow timer refresh */ - if (p->multicast_router == MDB_RTR_TYPE_TEMP) - p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; + if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) + pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; } int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) { - struct net_bridge *br = p->br; + struct net_bridge_mcast *brmctx = &p->br->multicast_ctx; + struct net_bridge_mcast_port *pmctx = &p->multicast_ctx; unsigned long now = jiffies; int err = -EINVAL; bool del = false; - spin_lock(&br->multicast_lock); - if (p->multicast_router == val) { + spin_lock(&p->br->multicast_lock); + if (pmctx->multicast_router == val) { /* Refresh the temp router port timer */ - if (p->multicast_router == MDB_RTR_TYPE_TEMP) { - mod_timer(&p->ip4_mc_router_timer, - now + br->multicast_querier_interval); + if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) { + mod_timer(&pmctx->ip4_mc_router_timer, + now + brmctx->multicast_querier_interval); #if IS_ENABLED(CONFIG_IPV6) - mod_timer(&p->ip6_mc_router_timer, - now + br->multicast_querier_interval); + mod_timer(&pmctx->ip6_mc_router_timer, + now + brmctx->multicast_querier_interval); #endif } err = 0; @@ -3706,63 +4130,86 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) } switch (val) { case MDB_RTR_TYPE_DISABLED: - p->multicast_router = MDB_RTR_TYPE_DISABLED; - del |= br_ip4_multicast_rport_del(p); - del_timer(&p->ip4_mc_router_timer); - del |= br_ip6_multicast_rport_del(p); + pmctx->multicast_router = MDB_RTR_TYPE_DISABLED; + del |= br_ip4_multicast_rport_del(pmctx); + del_timer(&pmctx->ip4_mc_router_timer); + del |= br_ip6_multicast_rport_del(pmctx); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&p->ip6_mc_router_timer); + del_timer(&pmctx->ip6_mc_router_timer); #endif - br_multicast_rport_del_notify(p, del); + br_multicast_rport_del_notify(pmctx, del); break; case MDB_RTR_TYPE_TEMP_QUERY: - p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; - del |= br_ip4_multicast_rport_del(p); - del |= br_ip6_multicast_rport_del(p); - br_multicast_rport_del_notify(p, del); + pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; + del |= br_ip4_multicast_rport_del(pmctx); + del |= br_ip6_multicast_rport_del(pmctx); + br_multicast_rport_del_notify(pmctx, del); break; case MDB_RTR_TYPE_PERM: - p->multicast_router = MDB_RTR_TYPE_PERM; - del_timer(&p->ip4_mc_router_timer); - br_ip4_multicast_add_router(br, p); + pmctx->multicast_router = MDB_RTR_TYPE_PERM; + del_timer(&pmctx->ip4_mc_router_timer); + br_ip4_multicast_add_router(brmctx, pmctx); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&p->ip6_mc_router_timer); + del_timer(&pmctx->ip6_mc_router_timer); #endif - br_ip6_multicast_add_router(br, p); + br_ip6_multicast_add_router(brmctx, pmctx); break; case MDB_RTR_TYPE_TEMP: - p->multicast_router = MDB_RTR_TYPE_TEMP; - br_ip4_multicast_mark_router(br, p); - br_ip6_multicast_mark_router(br, p); + pmctx->multicast_router = MDB_RTR_TYPE_TEMP; + br_ip4_multicast_mark_router(brmctx, pmctx); + br_ip6_multicast_mark_router(brmctx, pmctx); break; default: goto unlock; } err = 0; unlock: - spin_unlock(&br->multicast_lock); + spin_unlock(&p->br->multicast_lock); return err; } -static void br_multicast_start_querier(struct net_bridge *br, +static void br_multicast_start_querier(struct net_bridge_mcast *brmctx, struct bridge_mcast_own_query *query) { struct net_bridge_port *port; - __br_multicast_open(br, query); + __br_multicast_open_query(brmctx->br, query); rcu_read_lock(); - list_for_each_entry_rcu(port, &br->port_list, list) { - if (port->state == BR_STATE_DISABLED || - port->state == BR_STATE_BLOCKING) + list_for_each_entry_rcu(port, &brmctx->br->port_list, list) { + struct bridge_mcast_own_query *ip4_own_query; +#if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_own_query *ip6_own_query; +#endif + + if (br_multicast_port_ctx_state_stopped(&port->multicast_ctx)) continue; - if (query == &br->ip4_own_query) - br_multicast_enable(&port->ip4_own_query); + if (br_multicast_ctx_is_vlan(brmctx)) { + struct net_bridge_vlan *vlan; + + vlan = br_vlan_find(nbp_vlan_group(port), brmctx->vlan->vid); + if (!vlan || + br_multicast_port_ctx_state_stopped(&vlan->port_mcast_ctx)) + continue; + + ip4_own_query = &vlan->port_mcast_ctx.ip4_own_query; +#if IS_ENABLED(CONFIG_IPV6) + ip6_own_query = &vlan->port_mcast_ctx.ip6_own_query; +#endif + } else { + ip4_own_query = &port->multicast_ctx.ip4_own_query; +#if IS_ENABLED(CONFIG_IPV6) + ip6_own_query = &port->multicast_ctx.ip6_own_query; +#endif + } + + if (query == &brmctx->ip4_own_query) + br_multicast_enable(ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) else - br_multicast_enable(&port->ip6_own_query); + br_multicast_enable(ip6_own_query); #endif } rcu_read_unlock(); @@ -3796,7 +4243,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val, br_multicast_open(br); list_for_each_entry(port, &br->port_list, list) - __br_multicast_enable_port(port); + __br_multicast_enable_port_ctx(&port->multicast_ctx); change_snoopers = true; @@ -3839,7 +4286,7 @@ bool br_multicast_router(const struct net_device *dev) bool is_router; spin_lock_bh(&br->multicast_lock); - is_router = br_multicast_is_router(br, NULL); + is_router = br_multicast_is_router(&br->multicast_ctx, NULL); spin_unlock_bh(&br->multicast_lock); return is_router; } @@ -3847,6 +4294,7 @@ EXPORT_SYMBOL_GPL(br_multicast_router); int br_multicast_set_querier(struct net_bridge *br, unsigned long val) { + struct net_bridge_mcast *brmctx = &br->multicast_ctx; unsigned long max_delay; val = !!val; @@ -3859,18 +4307,18 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val) if (!val) goto unlock; - max_delay = br->multicast_query_response_interval; + max_delay = brmctx->multicast_query_response_interval; - if (!timer_pending(&br->ip4_other_query.timer)) - br->ip4_other_query.delay_time = jiffies + max_delay; + if (!timer_pending(&brmctx->ip4_other_query.timer)) + brmctx->ip4_other_query.delay_time = jiffies + max_delay; - br_multicast_start_querier(br, &br->ip4_own_query); + br_multicast_start_querier(brmctx, &brmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - if (!timer_pending(&br->ip6_other_query.timer)) - br->ip6_other_query.delay_time = jiffies + max_delay; + if (!timer_pending(&brmctx->ip6_other_query.timer)) + brmctx->ip6_other_query.delay_time = jiffies + max_delay; - br_multicast_start_querier(br, &br->ip6_own_query); + br_multicast_start_querier(brmctx, &brmctx->ip6_own_query); #endif unlock: @@ -3891,7 +4339,7 @@ int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val) } spin_lock_bh(&br->multicast_lock); - br->multicast_igmp_version = val; + br->multicast_ctx.multicast_igmp_version = val; spin_unlock_bh(&br->multicast_lock); return 0; @@ -3910,7 +4358,7 @@ int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val) } spin_lock_bh(&br->multicast_lock); - br->multicast_mld_version = val; + br->multicast_ctx.multicast_mld_version = val; spin_unlock_bh(&br->multicast_lock); return 0; @@ -4003,7 +4451,7 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto) memset(ð, 0, sizeof(eth)); eth.h_proto = htons(proto); - ret = br_multicast_querier_exists(br, ð, NULL); + ret = br_multicast_querier_exists(&br->multicast_ctx, ð, NULL); unlock: rcu_read_unlock(); @@ -4022,6 +4470,7 @@ EXPORT_SYMBOL_GPL(br_multicast_has_querier_anywhere); */ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) { + struct net_bridge_mcast *brmctx; struct net_bridge *br; struct net_bridge_port *port; bool ret = false; @@ -4035,17 +4484,18 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) goto unlock; br = port->br; + brmctx = &br->multicast_ctx; switch (proto) { case ETH_P_IP: - if (!timer_pending(&br->ip4_other_query.timer) || - rcu_dereference(br->ip4_querier.port) == port) + if (!timer_pending(&brmctx->ip4_other_query.timer) || + rcu_dereference(brmctx->ip4_querier.port) == port) goto unlock; break; #if IS_ENABLED(CONFIG_IPV6) case ETH_P_IPV6: - if (!timer_pending(&br->ip6_other_query.timer) || - rcu_dereference(br->ip6_querier.port) == port) + if (!timer_pending(&brmctx->ip6_other_query.timer) || + rcu_dereference(brmctx->ip6_querier.port) == port) goto unlock; break; #endif @@ -4071,7 +4521,9 @@ EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent); */ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto) { - struct net_bridge_port *port, *p; + struct net_bridge_mcast_port *pmctx; + struct net_bridge_mcast *brmctx; + struct net_bridge_port *port; bool ret = false; rcu_read_lock(); @@ -4079,11 +4531,12 @@ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto) if (!port) goto unlock; + brmctx = &port->br->multicast_ctx; switch (proto) { case ETH_P_IP: - hlist_for_each_entry_rcu(p, &port->br->ip4_mc_router_list, + hlist_for_each_entry_rcu(pmctx, &brmctx->ip4_mc_router_list, ip4_rlist) { - if (p == port) + if (pmctx->port == port) continue; ret = true; @@ -4092,9 +4545,9 @@ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto) break; #if IS_ENABLED(CONFIG_IPV6) case ETH_P_IPV6: - hlist_for_each_entry_rcu(p, &port->br->ip6_mc_router_list, + hlist_for_each_entry_rcu(pmctx, &brmctx->ip6_mc_router_list, ip6_rlist) { - if (p == port) + if (pmctx->port == port) continue; ret = true; @@ -4186,7 +4639,8 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats, u64_stats_update_end(&pstats->syncp); } -void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, +void br_multicast_count(struct net_bridge *br, + const struct net_bridge_port *p, const struct sk_buff *skb, u8 type, u8 dir) { struct bridge_mcast_stats __percpu *stats; diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c index 13290a749d09..f91c071d1608 100644 --- a/net/bridge/br_multicast_eht.c +++ b/net/bridge/br_multicast_eht.c @@ -33,7 +33,8 @@ static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg, union net_bridge_eht_addr *src_addr, union net_bridge_eht_addr *h_addr); -static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg, +static void br_multicast_create_eht_set_entry(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *src_addr, union net_bridge_eht_addr *h_addr, int filter_mode, @@ -388,7 +389,8 @@ static void br_multicast_ip_src_to_eht_addr(const struct br_ip *src, } } -static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg, +static void br_eht_convert_host_filter_mode(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, int filter_mode) { @@ -405,14 +407,15 @@ static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg, br_multicast_del_eht_set_entry(pg, &zero_addr, h_addr); break; case MCAST_EXCLUDE: - br_multicast_create_eht_set_entry(pg, &zero_addr, h_addr, - MCAST_EXCLUDE, + br_multicast_create_eht_set_entry(brmctx, pg, &zero_addr, + h_addr, MCAST_EXCLUDE, true); break; } } -static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg, +static void br_multicast_create_eht_set_entry(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *src_addr, union net_bridge_eht_addr *h_addr, int filter_mode, @@ -441,8 +444,8 @@ static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg, if (!set_h) goto fail_set_entry; - mod_timer(&set_h->timer, jiffies + br_multicast_gmi(br)); - mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(br)); + mod_timer(&set_h->timer, jiffies + br_multicast_gmi(brmctx)); + mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(brmctx)); return; @@ -499,7 +502,8 @@ static void br_multicast_del_eht_host(struct net_bridge_port_group *pg, } /* create new set entries from reports */ -static void __eht_create_set_entries(struct net_bridge_port_group *pg, +static void __eht_create_set_entries(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -512,8 +516,8 @@ static void __eht_create_set_entries(struct net_bridge_port_group *pg, memset(&eht_src_addr, 0, sizeof(eht_src_addr)); for (src_idx = 0; src_idx < nsrcs; src_idx++) { memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size); - br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr, - filter_mode, + br_multicast_create_eht_set_entry(brmctx, pg, &eht_src_addr, + h_addr, filter_mode, false); } } @@ -549,7 +553,8 @@ static bool __eht_del_set_entries(struct net_bridge_port_group *pg, return changed; } -static bool br_multicast_eht_allow(struct net_bridge_port_group *pg, +static bool br_multicast_eht_allow(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -559,8 +564,8 @@ static bool br_multicast_eht_allow(struct net_bridge_port_group *pg, switch (br_multicast_eht_host_filter_mode(pg, h_addr)) { case MCAST_INCLUDE: - __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size, - MCAST_INCLUDE); + __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, + addr_size, MCAST_INCLUDE); break; case MCAST_EXCLUDE: changed = __eht_del_set_entries(pg, h_addr, srcs, nsrcs, @@ -571,7 +576,8 @@ static bool br_multicast_eht_allow(struct net_bridge_port_group *pg, return changed; } -static bool br_multicast_eht_block(struct net_bridge_port_group *pg, +static bool br_multicast_eht_block(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -585,7 +591,7 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg, addr_size); break; case MCAST_EXCLUDE: - __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size, + __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, addr_size, MCAST_EXCLUDE); break; } @@ -594,7 +600,8 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg, } /* flush_entries is true when changing mode */ -static bool __eht_inc_exc(struct net_bridge_port_group *pg, +static bool __eht_inc_exc(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -612,11 +619,10 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg, /* if we're changing mode del host and its entries */ if (flush_entries) br_multicast_del_eht_host(pg, h_addr); - __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size, + __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, addr_size, filter_mode); /* we can be missing sets only if we've deleted some entries */ if (flush_entries) { - struct net_bridge *br = pg->key.port->br; struct net_bridge_group_eht_set *eht_set; struct net_bridge_group_src *src_ent; struct hlist_node *tmp; @@ -647,14 +653,15 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg, &eht_src_addr); if (!eht_set) continue; - mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(br)); + mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(brmctx)); } } return changed; } -static bool br_multicast_eht_inc(struct net_bridge_port_group *pg, +static bool br_multicast_eht_inc(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -663,14 +670,15 @@ static bool br_multicast_eht_inc(struct net_bridge_port_group *pg, { bool changed; - changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size, + changed = __eht_inc_exc(brmctx, pg, h_addr, srcs, nsrcs, addr_size, MCAST_INCLUDE, to_report); - br_eht_convert_host_filter_mode(pg, h_addr, MCAST_INCLUDE); + br_eht_convert_host_filter_mode(brmctx, pg, h_addr, MCAST_INCLUDE); return changed; } -static bool br_multicast_eht_exc(struct net_bridge_port_group *pg, +static bool br_multicast_eht_exc(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -679,14 +687,15 @@ static bool br_multicast_eht_exc(struct net_bridge_port_group *pg, { bool changed; - changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size, + changed = __eht_inc_exc(brmctx, pg, h_addr, srcs, nsrcs, addr_size, MCAST_EXCLUDE, to_report); - br_eht_convert_host_filter_mode(pg, h_addr, MCAST_EXCLUDE); + br_eht_convert_host_filter_mode(brmctx, pg, h_addr, MCAST_EXCLUDE); return changed; } -static bool __eht_ip4_handle(struct net_bridge_port_group *pg, +static bool __eht_ip4_handle(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -696,24 +705,25 @@ static bool __eht_ip4_handle(struct net_bridge_port_group *pg, switch (grec_type) { case IGMPV3_ALLOW_NEW_SOURCES: - br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, sizeof(__be32)); + br_multicast_eht_allow(brmctx, pg, h_addr, srcs, nsrcs, + sizeof(__be32)); break; case IGMPV3_BLOCK_OLD_SOURCES: - changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs, + changed = br_multicast_eht_block(brmctx, pg, h_addr, srcs, nsrcs, sizeof(__be32)); break; case IGMPV3_CHANGE_TO_INCLUDE: to_report = true; fallthrough; case IGMPV3_MODE_IS_INCLUDE: - changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs, + changed = br_multicast_eht_inc(brmctx, pg, h_addr, srcs, nsrcs, sizeof(__be32), to_report); break; case IGMPV3_CHANGE_TO_EXCLUDE: to_report = true; fallthrough; case IGMPV3_MODE_IS_EXCLUDE: - changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs, + changed = br_multicast_eht_exc(brmctx, pg, h_addr, srcs, nsrcs, sizeof(__be32), to_report); break; } @@ -722,7 +732,8 @@ static bool __eht_ip4_handle(struct net_bridge_port_group *pg, } #if IS_ENABLED(CONFIG_IPV6) -static bool __eht_ip6_handle(struct net_bridge_port_group *pg, +static bool __eht_ip6_handle(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, @@ -732,18 +743,18 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg, switch (grec_type) { case MLD2_ALLOW_NEW_SOURCES: - br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, + br_multicast_eht_allow(brmctx, pg, h_addr, srcs, nsrcs, sizeof(struct in6_addr)); break; case MLD2_BLOCK_OLD_SOURCES: - changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs, + changed = br_multicast_eht_block(brmctx, pg, h_addr, srcs, nsrcs, sizeof(struct in6_addr)); break; case MLD2_CHANGE_TO_INCLUDE: to_report = true; fallthrough; case MLD2_MODE_IS_INCLUDE: - changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs, + changed = br_multicast_eht_inc(brmctx, pg, h_addr, srcs, nsrcs, sizeof(struct in6_addr), to_report); break; @@ -751,7 +762,7 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg, to_report = true; fallthrough; case MLD2_MODE_IS_EXCLUDE: - changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs, + changed = br_multicast_eht_exc(brmctx, pg, h_addr, srcs, nsrcs, sizeof(struct in6_addr), to_report); break; @@ -762,7 +773,8 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg, #endif /* true means an entry was deleted */ -bool br_multicast_eht_handle(struct net_bridge_port_group *pg, +bool br_multicast_eht_handle(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, @@ -779,12 +791,12 @@ bool br_multicast_eht_handle(struct net_bridge_port_group *pg, memset(&eht_host_addr, 0, sizeof(eht_host_addr)); memcpy(&eht_host_addr, h_addr, addr_size); if (addr_size == sizeof(__be32)) - changed = __eht_ip4_handle(pg, &eht_host_addr, srcs, nsrcs, - grec_type); + changed = __eht_ip4_handle(brmctx, pg, &eht_host_addr, srcs, + nsrcs, grec_type); #if IS_ENABLED(CONFIG_IPV6) else - changed = __eht_ip6_handle(pg, &eht_host_addr, srcs, nsrcs, - grec_type); + changed = __eht_ip6_handle(brmctx, pg, &eht_host_addr, srcs, + nsrcs, grec_type); #endif out: diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8642e56059fb..616a1b6dec3c 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -287,7 +287,7 @@ static int br_port_fill_attrs(struct sk_buff *skb, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER, - p->multicast_router) || + p->multicast_ctx.multicast_router) || nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, p->multicast_eht_hosts_limit) || nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, @@ -1324,49 +1324,49 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_MCAST_LAST_MEMBER_CNT]) { u32 val = nla_get_u32(data[IFLA_BR_MCAST_LAST_MEMBER_CNT]); - br->multicast_last_member_count = val; + br->multicast_ctx.multicast_last_member_count = val; } if (data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]) { u32 val = nla_get_u32(data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]); - br->multicast_startup_query_count = val; + br->multicast_ctx.multicast_startup_query_count = val; } if (data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]); - br->multicast_last_member_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val); } if (data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]); - br->multicast_membership_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val); } if (data[IFLA_BR_MCAST_QUERIER_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERIER_INTVL]); - br->multicast_querier_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val); } if (data[IFLA_BR_MCAST_QUERY_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]); - br->multicast_query_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); } if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]); - br->multicast_query_response_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val); } if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]); - br->multicast_startup_query_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); } if (data[IFLA_BR_MCAST_STATS_ENABLED]) { @@ -1566,7 +1566,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) return -EMSGSIZE; #endif #ifdef CONFIG_BRIDGE_IGMP_SNOOPING - if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, br->multicast_router) || + if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, + br->multicast_ctx.multicast_router) || nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING, br_opt_get(br, BROPT_MULTICAST_ENABLED)) || nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR, @@ -1578,38 +1579,38 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY, RHT_ELASTICITY) || nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) || nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT, - br->multicast_last_member_count) || + br->multicast_ctx.multicast_last_member_count) || nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT, - br->multicast_startup_query_count) || + br->multicast_ctx.multicast_startup_query_count) || nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION, - br->multicast_igmp_version)) + br->multicast_ctx.multicast_igmp_version)) return -EMSGSIZE; #if IS_ENABLED(CONFIG_IPV6) if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION, - br->multicast_mld_version)) + br->multicast_ctx.multicast_mld_version)) return -EMSGSIZE; #endif - clockval = jiffies_to_clock_t(br->multicast_last_member_interval); + clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval); if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval, IFLA_BR_PAD)) return -EMSGSIZE; - clockval = jiffies_to_clock_t(br->multicast_membership_interval); + clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval); if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval, IFLA_BR_PAD)) return -EMSGSIZE; - clockval = jiffies_to_clock_t(br->multicast_querier_interval); + clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval); if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval, IFLA_BR_PAD)) return -EMSGSIZE; - clockval = jiffies_to_clock_t(br->multicast_query_interval); + clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval); if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval, IFLA_BR_PAD)) return -EMSGSIZE; - clockval = jiffies_to_clock_t(br->multicast_query_response_interval); + clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval); if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval, IFLA_BR_PAD)) return -EMSGSIZE; - clockval = jiffies_to_clock_t(br->multicast_startup_query_interval); + clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval); if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval, IFLA_BR_PAD)) return -EMSGSIZE; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2b48b204205e..1c57877270f7 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -29,6 +29,8 @@ #define BR_MULTICAST_DEFAULT_HASH_MAX 4096 +#define BR_HWDOM_MAX BITS_PER_LONG + #define BR_VERSION "2.3" /* Control of forwarding link local multicast */ @@ -89,6 +91,59 @@ struct bridge_mcast_stats { }; #endif +/* net_bridge_mcast_port must be always defined due to forwarding stubs */ +struct net_bridge_mcast_port { +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + struct net_bridge_port *port; + struct net_bridge_vlan *vlan; + + struct bridge_mcast_own_query ip4_own_query; + struct timer_list ip4_mc_router_timer; + struct hlist_node ip4_rlist; +#if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_own_query ip6_own_query; + struct timer_list ip6_mc_router_timer; + struct hlist_node ip6_rlist; +#endif /* IS_ENABLED(CONFIG_IPV6) */ + unsigned char multicast_router; +#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */ +}; + +/* net_bridge_mcast must be always defined due to forwarding stubs */ +struct net_bridge_mcast { +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + struct net_bridge *br; + struct net_bridge_vlan *vlan; + + u32 multicast_last_member_count; + u32 multicast_startup_query_count; + + u8 multicast_igmp_version; + u8 multicast_router; +#if IS_ENABLED(CONFIG_IPV6) + u8 multicast_mld_version; +#endif + unsigned long multicast_last_member_interval; + unsigned long multicast_membership_interval; + unsigned long multicast_querier_interval; + unsigned long multicast_query_interval; + unsigned long multicast_query_response_interval; + unsigned long multicast_startup_query_interval; + struct hlist_head ip4_mc_router_list; + struct timer_list ip4_mc_router_timer; + struct bridge_mcast_other_query ip4_other_query; + struct bridge_mcast_own_query ip4_own_query; + struct bridge_mcast_querier ip4_querier; +#if IS_ENABLED(CONFIG_IPV6) + struct hlist_head ip6_mc_router_list; + struct timer_list ip6_mc_router_timer; + struct bridge_mcast_other_query ip6_other_query; + struct bridge_mcast_own_query ip6_own_query; + struct bridge_mcast_querier ip6_querier; +#endif /* IS_ENABLED(CONFIG_IPV6) */ +#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */ +}; + struct br_tunnel_info { __be64 tunnel_id; struct metadata_dst __rcu *tunnel_dst; @@ -98,6 +153,8 @@ struct br_tunnel_info { enum { BR_VLFLAG_PER_PORT_STATS = BIT(0), BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT(1), + BR_VLFLAG_MCAST_ENABLED = BIT(2), + BR_VLFLAG_GLOBAL_MCAST_ENABLED = BIT(3), }; /** @@ -114,6 +171,9 @@ enum { * @refcnt: if MASTER flag set, this is bumped for each port referencing it * @brvlan: if MASTER flag unset, this points to the global per-VLAN context * for this VLAN entry + * @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context + * @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast + * context * @vlist: sorted list of VLAN entries * @rcu: used for entry destruction * @@ -141,6 +201,11 @@ struct net_bridge_vlan { struct br_tunnel_info tinfo; + union { + struct net_bridge_mcast br_mcast_ctx; + struct net_bridge_mcast_port port_mcast_ctx; + }; + struct list_head vlist; struct rcu_head rcu; @@ -305,19 +370,13 @@ struct net_bridge_port { struct kobject kobj; struct rcu_head rcu; + struct net_bridge_mcast_port multicast_ctx; + #ifdef CONFIG_BRIDGE_IGMP_SNOOPING - struct bridge_mcast_own_query ip4_own_query; - struct timer_list ip4_mc_router_timer; - struct hlist_node ip4_rlist; -#if IS_ENABLED(CONFIG_IPV6) - struct bridge_mcast_own_query ip6_own_query; - struct timer_list ip6_mc_router_timer; - struct hlist_node ip6_rlist; -#endif /* IS_ENABLED(CONFIG_IPV6) */ + struct bridge_mcast_stats __percpu *mcast_stats; + u32 multicast_eht_hosts_limit; u32 multicast_eht_hosts_cnt; - unsigned char multicast_router; - struct bridge_mcast_stats __percpu *mcast_stats; struct hlist_head mglist; #endif @@ -329,7 +388,12 @@ struct net_bridge_port { struct netpoll *np; #endif #ifdef CONFIG_NET_SWITCHDEV - int offload_fwd_mark; + /* Identifier used to group ports that share the same switchdev + * hardware domain. + */ + int hwdom; + int offload_count; + struct netdev_phys_item_id ppid; #endif u16 group_fwd_mask; u16 backup_redirected_cnt; @@ -376,6 +440,7 @@ enum net_bridge_opts { BROPT_VLAN_STATS_PER_PORT, BROPT_NO_LL_LEARN, BROPT_VLAN_BRIDGE_BINDING, + BROPT_MCAST_VLAN_SNOOPING_ENABLED, }; struct net_bridge { @@ -426,25 +491,14 @@ struct net_bridge { BR_USER_STP, /* new RSTP in userspace */ } stp_enabled; + struct net_bridge_mcast multicast_ctx; + #ifdef CONFIG_BRIDGE_IGMP_SNOOPING + struct bridge_mcast_stats __percpu *mcast_stats; u32 hash_max; - u32 multicast_last_member_count; - u32 multicast_startup_query_count; - - u8 multicast_igmp_version; - u8 multicast_router; -#if IS_ENABLED(CONFIG_IPV6) - u8 multicast_mld_version; -#endif spinlock_t multicast_lock; - unsigned long multicast_last_member_interval; - unsigned long multicast_membership_interval; - unsigned long multicast_querier_interval; - unsigned long multicast_query_interval; - unsigned long multicast_query_response_interval; - unsigned long multicast_startup_query_interval; struct rhashtable mdb_hash_tbl; struct rhashtable sg_port_tbl; @@ -452,19 +506,6 @@ struct net_bridge { struct hlist_head mcast_gc_list; struct hlist_head mdb_list; - struct hlist_head ip4_mc_router_list; - struct timer_list ip4_mc_router_timer; - struct bridge_mcast_other_query ip4_other_query; - struct bridge_mcast_own_query ip4_own_query; - struct bridge_mcast_querier ip4_querier; - struct bridge_mcast_stats __percpu *mcast_stats; -#if IS_ENABLED(CONFIG_IPV6) - struct hlist_head ip6_mc_router_list; - struct timer_list ip6_mc_router_timer; - struct bridge_mcast_other_query ip6_other_query; - struct bridge_mcast_own_query ip6_own_query; - struct bridge_mcast_querier ip6_querier; -#endif /* IS_ENABLED(CONFIG_IPV6) */ struct work_struct mcast_gc_work; #endif @@ -476,7 +517,12 @@ struct net_bridge { u32 auto_cnt; #ifdef CONFIG_NET_SWITCHDEV - int offload_fwd_mark; + /* Counter used to make sure that hardware domains get unique + * identifiers in case a bridge spans multiple switchdev instances. + */ + int last_hwdom; + /* Bit mask of hardware domain numbers in use */ + unsigned long busy_hwdoms; #endif struct hlist_head fdb_list; @@ -506,7 +552,20 @@ struct br_input_skb_cb { #endif #ifdef CONFIG_NET_SWITCHDEV - int offload_fwd_mark; + /* Set if TX data plane offloading is used towards at least one + * hardware domain. + */ + u8 tx_fwd_offload:1; + /* The switchdev hardware domain from which this packet was received. + * If skb->offload_fwd_mark was set, then this packet was already + * forwarded by hardware to the other ports in the source hardware + * domain, otherwise it wasn't. + */ + int src_hwdom; + /* Bit mask of hardware domains towards this packet has already been + * transmitted using the TX data plane offload. + */ + unsigned long fwd_hwdoms; #endif }; @@ -718,6 +777,8 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, bool swdev_notify); void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, bool offloaded); +int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, + const void *ctx, bool adding, struct notifier_block *nb); /* br_forward.c */ enum br_pkt_type { @@ -796,9 +857,11 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, /* br_multicast.c */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING -int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, +int br_multicast_rcv(struct net_bridge_mcast **brmctx, + struct net_bridge_mcast_port **pmctx, + struct net_bridge_vlan *vlan, struct sk_buff *skb, u16 vid); -struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, +struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, u16 vid); int br_multicast_add_port(struct net_bridge_port *port); void br_multicast_del_port(struct net_bridge_port *port); @@ -810,8 +873,9 @@ void br_multicast_leave_snoopers(struct net_bridge *br); void br_multicast_open(struct net_bridge *br); void br_multicast_stop(struct net_bridge *br); void br_multicast_dev_del(struct net_bridge *br); -void br_multicast_flood(struct net_bridge_mdb_entry *mdst, - struct sk_buff *skb, bool local_rcv, bool local_orig); +void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, + struct net_bridge_mcast *brmctx, + bool local_rcv, bool local_orig); int br_multicast_set_router(struct net_bridge *br, unsigned long val); int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val); int br_multicast_toggle(struct net_bridge *br, unsigned long val, @@ -835,12 +899,13 @@ int br_mdb_hash_init(struct net_bridge *br); void br_mdb_hash_fini(struct net_bridge *br); void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, int type); -void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, +void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx, int type); void br_multicast_del_pg(struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, struct net_bridge_port_group __rcu **pp); -void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, +void br_multicast_count(struct net_bridge *br, + const struct net_bridge_port *p, const struct sk_buff *skb, u8 type, u8 dir); int br_multicast_init_stats(struct net_bridge *br); void br_multicast_uninit_stats(struct net_bridge *br); @@ -849,7 +914,8 @@ void br_multicast_get_stats(const struct net_bridge *br, struct br_mcast_stats *dest); void br_mdb_init(void); void br_mdb_uninit(void); -void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify); +void br_multicast_host_join(const struct net_bridge_mcast *brmctx, + struct net_bridge_mdb_entry *mp, bool notify); void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify); void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, u8 filter_mode); @@ -859,6 +925,23 @@ struct net_bridge_group_src * br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip); void br_multicast_del_group_src(struct net_bridge_group_src *src, bool fastleave); +void br_multicast_ctx_init(struct net_bridge *br, + struct net_bridge_vlan *vlan, + struct net_bridge_mcast *brmctx); +void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx); +void br_multicast_port_ctx_init(struct net_bridge_port *port, + struct net_bridge_vlan *vlan, + struct net_bridge_mcast_port *pmctx); +void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx); +void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on); +void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on); +int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, + struct netlink_ext_ack *extack); +bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on); + +int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, + const void *ctx, bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack); static inline bool br_group_is_l2(const struct br_ip *group) { @@ -869,52 +952,65 @@ static inline bool br_group_is_l2(const struct br_ip *group) rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) static inline struct hlist_node * -br_multicast_get_first_rport_node(struct net_bridge *b, struct sk_buff *skb) { +br_multicast_get_first_rport_node(struct net_bridge_mcast *brmctx, + struct sk_buff *skb) +{ #if IS_ENABLED(CONFIG_IPV6) if (skb->protocol == htons(ETH_P_IPV6)) - return rcu_dereference(hlist_first_rcu(&b->ip6_mc_router_list)); + return rcu_dereference(hlist_first_rcu(&brmctx->ip6_mc_router_list)); #endif - return rcu_dereference(hlist_first_rcu(&b->ip4_mc_router_list)); + return rcu_dereference(hlist_first_rcu(&brmctx->ip4_mc_router_list)); } static inline struct net_bridge_port * -br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb) { +br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb) +{ + struct net_bridge_mcast_port *mctx; + #if IS_ENABLED(CONFIG_IPV6) if (skb->protocol == htons(ETH_P_IPV6)) - return hlist_entry_safe(rp, struct net_bridge_port, ip6_rlist); + mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port, + ip6_rlist); + else #endif - return hlist_entry_safe(rp, struct net_bridge_port, ip4_rlist); + mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port, + ip4_rlist); + + if (mctx) + return mctx->port; + else + return NULL; } -static inline bool br_ip4_multicast_is_router(struct net_bridge *br) +static inline bool br_ip4_multicast_is_router(struct net_bridge_mcast *brmctx) { - return timer_pending(&br->ip4_mc_router_timer); + return timer_pending(&brmctx->ip4_mc_router_timer); } -static inline bool br_ip6_multicast_is_router(struct net_bridge *br) +static inline bool br_ip6_multicast_is_router(struct net_bridge_mcast *brmctx) { #if IS_ENABLED(CONFIG_IPV6) - return timer_pending(&br->ip6_mc_router_timer); + return timer_pending(&brmctx->ip6_mc_router_timer); #else return false; #endif } static inline bool -br_multicast_is_router(struct net_bridge *br, struct sk_buff *skb) +br_multicast_is_router(struct net_bridge_mcast *brmctx, struct sk_buff *skb) { - switch (br->multicast_router) { + switch (brmctx->multicast_router) { case MDB_RTR_TYPE_PERM: return true; case MDB_RTR_TYPE_TEMP_QUERY: if (skb) { if (skb->protocol == htons(ETH_P_IP)) - return br_ip4_multicast_is_router(br); + return br_ip4_multicast_is_router(brmctx); else if (skb->protocol == htons(ETH_P_IPV6)) - return br_ip6_multicast_is_router(br); + return br_ip6_multicast_is_router(brmctx); } else { - return br_ip4_multicast_is_router(br) || - br_ip6_multicast_is_router(br); + return br_ip4_multicast_is_router(brmctx) || + br_ip6_multicast_is_router(brmctx); } fallthrough; default: @@ -923,14 +1019,14 @@ br_multicast_is_router(struct net_bridge *br, struct sk_buff *skb) } static inline bool -__br_multicast_querier_exists(struct net_bridge *br, - struct bridge_mcast_other_query *querier, - const bool is_ipv6) +__br_multicast_querier_exists(struct net_bridge_mcast *brmctx, + struct bridge_mcast_other_query *querier, + const bool is_ipv6) { bool own_querier_enabled; - if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) { - if (is_ipv6 && !br_opt_get(br, BROPT_HAS_IPV6_ADDR)) + if (br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER)) { + if (is_ipv6 && !br_opt_get(brmctx->br, BROPT_HAS_IPV6_ADDR)) own_querier_enabled = false; else own_querier_enabled = true; @@ -942,18 +1038,18 @@ __br_multicast_querier_exists(struct net_bridge *br, (own_querier_enabled || timer_pending(&querier->timer)); } -static inline bool br_multicast_querier_exists(struct net_bridge *br, +static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx, struct ethhdr *eth, const struct net_bridge_mdb_entry *mdb) { switch (eth->h_proto) { case (htons(ETH_P_IP)): - return __br_multicast_querier_exists(br, - &br->ip4_other_query, false); + return __br_multicast_querier_exists(brmctx, + &brmctx->ip4_other_query, false); #if IS_ENABLED(CONFIG_IPV6) case (htons(ETH_P_IPV6)): - return __br_multicast_querier_exists(br, - &br->ip6_other_query, true); + return __br_multicast_querier_exists(brmctx, + &brmctx->ip6_other_query, true); #endif default: return !!mdb && br_group_is_l2(&mdb->addr); @@ -974,15 +1070,16 @@ static inline bool br_multicast_is_star_g(const struct br_ip *ip) } } -static inline bool br_multicast_should_handle_mode(const struct net_bridge *br, - __be16 proto) +static inline bool +br_multicast_should_handle_mode(const struct net_bridge_mcast *brmctx, + __be16 proto) { switch (proto) { case htons(ETH_P_IP): - return !!(br->multicast_igmp_version == 3); + return !!(brmctx->multicast_igmp_version == 3); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return !!(br->multicast_mld_version == 2); + return !!(brmctx->multicast_mld_version == 2); #endif default: return false; @@ -994,28 +1091,90 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb) return BR_INPUT_SKB_CB(skb)->igmp; } -static inline unsigned long br_multicast_lmqt(const struct net_bridge *br) +static inline unsigned long br_multicast_lmqt(const struct net_bridge_mcast *brmctx) { - return br->multicast_last_member_interval * - br->multicast_last_member_count; + return brmctx->multicast_last_member_interval * + brmctx->multicast_last_member_count; } -static inline unsigned long br_multicast_gmi(const struct net_bridge *br) +static inline unsigned long br_multicast_gmi(const struct net_bridge_mcast *brmctx) { /* use the RFC default of 2 for QRV */ - return 2 * br->multicast_query_interval + - br->multicast_query_response_interval; + return 2 * brmctx->multicast_query_interval + + brmctx->multicast_query_response_interval; +} + +static inline bool +br_multicast_ctx_is_vlan(const struct net_bridge_mcast *brmctx) +{ + return !!brmctx->vlan; +} + +static inline bool +br_multicast_port_ctx_is_vlan(const struct net_bridge_mcast_port *pmctx) +{ + return !!pmctx->vlan; +} + +static inline struct net_bridge_mcast * +br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx) +{ + if (!br_multicast_port_ctx_is_vlan(pmctx)) + return &pmctx->port->br->multicast_ctx; + else + return &pmctx->vlan->brvlan->br_mcast_ctx; +} + +static inline bool +br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx) +{ + return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && + br_multicast_ctx_is_vlan(brmctx) && + !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED); +} + +static inline bool +br_multicast_ctx_vlan_disabled(const struct net_bridge_mcast *brmctx) +{ + return br_multicast_ctx_is_vlan(brmctx) && + !(brmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED); +} + +static inline bool +br_multicast_port_ctx_vlan_disabled(const struct net_bridge_mcast_port *pmctx) +{ + return br_multicast_port_ctx_is_vlan(pmctx) && + !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED); +} + +static inline bool +br_multicast_port_ctx_state_disabled(const struct net_bridge_mcast_port *pmctx) +{ + return pmctx->port->state == BR_STATE_DISABLED || + (br_multicast_port_ctx_is_vlan(pmctx) && + (br_multicast_port_ctx_vlan_disabled(pmctx) || + pmctx->vlan->state == BR_STATE_DISABLED)); +} + +static inline bool +br_multicast_port_ctx_state_stopped(const struct net_bridge_mcast_port *pmctx) +{ + return br_multicast_port_ctx_state_disabled(pmctx) || + pmctx->port->state == BR_STATE_BLOCKING || + (br_multicast_port_ctx_is_vlan(pmctx) && + pmctx->vlan->state == BR_STATE_BLOCKING); } #else -static inline int br_multicast_rcv(struct net_bridge *br, - struct net_bridge_port *port, +static inline int br_multicast_rcv(struct net_bridge_mcast **brmctx, + struct net_bridge_mcast_port **pmctx, + struct net_bridge_vlan *vlan, struct sk_buff *skb, u16 vid) { return 0; } -static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, +static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, u16 vid) { return NULL; @@ -1064,17 +1223,18 @@ static inline void br_multicast_dev_del(struct net_bridge *br) static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, + struct net_bridge_mcast *brmctx, bool local_rcv, bool local_orig) { } -static inline bool br_multicast_is_router(struct net_bridge *br, +static inline bool br_multicast_is_router(struct net_bridge_mcast *brmctx, struct sk_buff *skb) { return false; } -static inline bool br_multicast_querier_exists(struct net_bridge *br, +static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx, struct ethhdr *eth, const struct net_bridge_mdb_entry *mdb) { @@ -1118,13 +1278,65 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb) { return 0; } + +static inline void br_multicast_ctx_init(struct net_bridge *br, + struct net_bridge_vlan *vlan, + struct net_bridge_mcast *brmctx) +{ +} + +static inline void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx) +{ +} + +static inline void br_multicast_port_ctx_init(struct net_bridge_port *port, + struct net_bridge_vlan *vlan, + struct net_bridge_mcast_port *pmctx) +{ +} + +static inline void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx) +{ +} + +static inline void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, + bool on) +{ +} + +static inline void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, + bool on) +{ +} + +static inline int br_multicast_toggle_vlan_snooping(struct net_bridge *br, + bool on, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, + bool on) +{ + return false; +} + +static inline int br_mdb_replay(struct net_device *br_dev, + struct net_device *dev, const void *ctx, + bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} #endif /* br_vlan.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING bool br_allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb, - u16 *vid, u8 *state); + u16 *vid, u8 *state, + struct net_bridge_vlan **vlan); bool br_allowed_egress(struct net_bridge_vlan_group *vg, const struct sk_buff *skb); bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid); @@ -1168,6 +1380,9 @@ void br_vlan_notify(const struct net_bridge *br, const struct net_bridge_port *p, u16 vid, u16 vid_range, int cmd); +int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, + const void *ctx, bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack); bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *range_end); @@ -1236,8 +1451,11 @@ static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid) static inline bool br_allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb, - u16 *vid, u8 *state) + u16 *vid, u8 *state, + struct net_bridge_vlan **vlan) + { + *vlan = NULL; return true; } @@ -1410,6 +1628,14 @@ static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, { return true; } + +static inline int br_vlan_replay(struct net_device *br_dev, + struct net_device *dev, const void *ctx, + bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} #endif /* br_vlan_options.c */ @@ -1424,6 +1650,14 @@ int br_vlan_process_options(const struct net_bridge *br, struct net_bridge_vlan *range_end, struct nlattr **tb, struct netlink_ext_ack *extack); +int br_vlan_rtm_process_global_options(struct net_device *dev, + const struct nlattr *attr, + int cmd, + struct netlink_ext_ack *extack); +bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *r_end); +bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range, + const struct net_bridge_vlan *v_opts); /* vlan state manipulation helpers using *_ONCE to annotate lock-free access */ static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v) @@ -1645,7 +1879,14 @@ static inline void br_sysfs_delbr(struct net_device *dev) { return; } /* br_switchdev.c */ #ifdef CONFIG_NET_SWITCHDEV -int nbp_switchdev_mark_set(struct net_bridge_port *p); +bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb); + +void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb); + +void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p, + struct sk_buff *skb); +void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p, + struct sk_buff *skb); void nbp_switchdev_frame_mark(const struct net_bridge_port *p, struct sk_buff *skb); bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, @@ -1659,15 +1900,32 @@ void br_switchdev_fdb_notify(struct net_bridge *br, int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, struct netlink_ext_ack *extack); int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid); +void br_switchdev_init(struct net_bridge *br); static inline void br_switchdev_frame_unmark(struct sk_buff *skb) { skb->offload_fwd_mark = 0; } #else -static inline int nbp_switchdev_mark_set(struct net_bridge_port *p) +static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb) +{ + return false; +} + +static inline void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb) +{ +} + +static inline void +nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p, + struct sk_buff *skb) +{ +} + +static inline void +nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p, + struct sk_buff *skb) { - return 0; } static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p, @@ -1710,6 +1968,11 @@ br_switchdev_fdb_notify(struct net_bridge *br, static inline void br_switchdev_frame_unmark(struct sk_buff *skb) { } + +static inline void br_switchdev_init(struct net_bridge *br) +{ +} + #endif /* CONFIG_NET_SWITCHDEV */ /* br_arp_nd_proxy.c */ diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h index f89049f4892c..adf82a05515a 100644 --- a/net/bridge/br_private_mcast_eht.h +++ b/net/bridge/br_private_mcast_eht.h @@ -51,7 +51,8 @@ struct net_bridge_group_eht_set { #ifdef CONFIG_BRIDGE_IGMP_SNOOPING void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg); -bool br_multicast_eht_handle(struct net_bridge_port_group *pg, +bool br_multicast_eht_handle(const struct net_bridge_mcast *brmctx, + struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index d3adee0f91f9..9cf9ab320c48 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -8,50 +8,65 @@ #include "br_private.h" -static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) -{ - struct net_bridge_port *p; +static struct static_key_false br_switchdev_tx_fwd_offload; - /* dev is yet to be added to the port list. */ - list_for_each_entry(p, &br->port_list, list) { - if (netdev_port_same_parent_id(dev, p->dev)) - return p->offload_fwd_mark; - } +static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload)) + return false; - return ++br->offload_fwd_mark; + return (p->flags & BR_TX_FWD_OFFLOAD) && + (p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom); } -int nbp_switchdev_mark_set(struct net_bridge_port *p) +bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb) { - struct netdev_phys_item_id ppid = { }; - int err; + if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload)) + return false; - ASSERT_RTNL(); + return BR_INPUT_SKB_CB(skb)->tx_fwd_offload; +} - err = dev_get_port_parent_id(p->dev, &ppid, true); - if (err) { - if (err == -EOPNOTSUPP) - return 0; - return err; - } +void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb) +{ + skb->offload_fwd_mark = br_switchdev_frame_uses_tx_fwd_offload(skb); +} - p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev); +/* Mark the frame for TX forwarding offload if this egress port supports it */ +void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + if (nbp_switchdev_can_offload_tx_fwd(p, skb)) + BR_INPUT_SKB_CB(skb)->tx_fwd_offload = true; +} - return 0; +/* Lazily adds the hwdom of the egress bridge port to the bit mask of hwdoms + * that the skb has been already forwarded to, to avoid further cloning to + * other ports in the same hwdom by making nbp_switchdev_allowed_egress() + * return false. + */ +void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + if (nbp_switchdev_can_offload_tx_fwd(p, skb)) + set_bit(p->hwdom, &BR_INPUT_SKB_CB(skb)->fwd_hwdoms); } void nbp_switchdev_frame_mark(const struct net_bridge_port *p, struct sk_buff *skb) { - if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark)) - BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark; + if (p->hwdom) + BR_INPUT_SKB_CB(skb)->src_hwdom = p->hwdom; } bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, const struct sk_buff *skb) { - return !skb->offload_fwd_mark || - BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark; + struct br_input_skb_cb *cb = BR_INPUT_SKB_CB(skb); + + return !test_bit(p->hwdom, &cb->fwd_hwdoms) && + (!skb->offload_fwd_mark || cb->src_hwdom != p->hwdom); } /* Flags that can be offloaded to hardware */ @@ -156,3 +171,210 @@ int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid) return switchdev_port_obj_del(dev, &v.obj); } + +static int nbp_switchdev_hwdom_set(struct net_bridge_port *joining) +{ + struct net_bridge *br = joining->br; + struct net_bridge_port *p; + int hwdom; + + /* joining is yet to be added to the port list. */ + list_for_each_entry(p, &br->port_list, list) { + if (netdev_phys_item_id_same(&joining->ppid, &p->ppid)) { + joining->hwdom = p->hwdom; + return 0; + } + } + + hwdom = find_next_zero_bit(&br->busy_hwdoms, BR_HWDOM_MAX, 1); + if (hwdom >= BR_HWDOM_MAX) + return -EBUSY; + + set_bit(hwdom, &br->busy_hwdoms); + joining->hwdom = hwdom; + return 0; +} + +static void nbp_switchdev_hwdom_put(struct net_bridge_port *leaving) +{ + struct net_bridge *br = leaving->br; + struct net_bridge_port *p; + + /* leaving is no longer in the port list. */ + list_for_each_entry(p, &br->port_list, list) { + if (p->hwdom == leaving->hwdom) + return; + } + + clear_bit(leaving->hwdom, &br->busy_hwdoms); +} + +static int nbp_switchdev_add(struct net_bridge_port *p, + struct netdev_phys_item_id ppid, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + int err; + + if (p->offload_count) { + /* Prevent unsupported configurations such as a bridge port + * which is a bonding interface, and the member ports are from + * different hardware switches. + */ + if (!netdev_phys_item_id_same(&p->ppid, &ppid)) { + NL_SET_ERR_MSG_MOD(extack, + "Same bridge port cannot be offloaded by two physical switches"); + return -EBUSY; + } + + /* Tolerate drivers that call switchdev_bridge_port_offload() + * more than once for the same bridge port, such as when the + * bridge port is an offloaded bonding/team interface. + */ + p->offload_count++; + + return 0; + } + + p->ppid = ppid; + p->offload_count = 1; + + err = nbp_switchdev_hwdom_set(p); + if (err) + return err; + + if (tx_fwd_offload) { + p->flags |= BR_TX_FWD_OFFLOAD; + static_branch_inc(&br_switchdev_tx_fwd_offload); + } + + return 0; +} + +static void nbp_switchdev_del(struct net_bridge_port *p) +{ + if (WARN_ON(!p->offload_count)) + return; + + p->offload_count--; + + if (p->offload_count) + return; + + if (p->hwdom) + nbp_switchdev_hwdom_put(p); + + if (p->flags & BR_TX_FWD_OFFLOAD) { + p->flags &= ~BR_TX_FWD_OFFLOAD; + static_branch_dec(&br_switchdev_tx_fwd_offload); + } +} + +static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + struct netlink_ext_ack *extack) +{ + struct net_device *br_dev = p->br->dev; + struct net_device *dev = p->dev; + int err; + + err = br_vlan_replay(br_dev, dev, ctx, true, blocking_nb, extack); + if (err && err != -EOPNOTSUPP) + return err; + + err = br_mdb_replay(br_dev, dev, ctx, true, blocking_nb, extack); + if (err && err != -EOPNOTSUPP) + return err; + + /* Forwarding and termination FDB entries on the port */ + err = br_fdb_replay(br_dev, dev, ctx, true, atomic_nb); + if (err && err != -EOPNOTSUPP) + return err; + + /* Termination FDB entries on the bridge itself */ + err = br_fdb_replay(br_dev, br_dev, ctx, true, atomic_nb); + if (err && err != -EOPNOTSUPP) + return err; + + return 0; +} + +static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ + struct net_device *br_dev = p->br->dev; + struct net_device *dev = p->dev; + + br_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL); + + br_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); + + /* Forwarding and termination FDB entries on the port */ + br_fdb_replay(br_dev, dev, ctx, false, atomic_nb); + + /* Termination FDB entries on the bridge itself */ + br_fdb_replay(br_dev, br_dev, ctx, false, atomic_nb); +} + +/* Let the bridge know that this port is offloaded, so that it can assign a + * switchdev hardware domain to it. + */ +int switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + struct netdev_phys_item_id ppid; + struct net_bridge_port *p; + int err; + + ASSERT_RTNL(); + + p = br_port_get_rtnl(brport_dev); + if (!p) + return -ENODEV; + + err = dev_get_port_parent_id(dev, &ppid, false); + if (err) + return err; + + err = nbp_switchdev_add(p, ppid, tx_fwd_offload, extack); + if (err) + return err; + + err = nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack); + if (err) + goto out_switchdev_del; + + return 0; + +out_switchdev_del: + nbp_switchdev_del(p); + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload); + +void switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ + struct net_bridge_port *p; + + ASSERT_RTNL(); + + p = br_port_get_rtnl(brport_dev); + if (!p) + return; + + nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb); + + nbp_switchdev_del(p); +} +EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload); diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 381467b691d5..953d544663d5 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -384,7 +384,7 @@ static ssize_t multicast_router_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%d\n", br->multicast_router); + return sprintf(buf, "%d\n", br->multicast_ctx.multicast_router); } static int set_multicast_router(struct net_bridge *br, unsigned long val, @@ -514,7 +514,7 @@ static ssize_t multicast_igmp_version_show(struct device *d, { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%u\n", br->multicast_igmp_version); + return sprintf(buf, "%u\n", br->multicast_ctx.multicast_igmp_version); } static int set_multicast_igmp_version(struct net_bridge *br, unsigned long val, @@ -536,13 +536,13 @@ static ssize_t multicast_last_member_count_show(struct device *d, char *buf) { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%u\n", br->multicast_last_member_count); + return sprintf(buf, "%u\n", br->multicast_ctx.multicast_last_member_count); } static int set_last_member_count(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_last_member_count = val; + br->multicast_ctx.multicast_last_member_count = val; return 0; } @@ -558,13 +558,13 @@ static ssize_t multicast_startup_query_count_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%u\n", br->multicast_startup_query_count); + return sprintf(buf, "%u\n", br->multicast_ctx.multicast_startup_query_count); } static int set_startup_query_count(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_startup_query_count = val; + br->multicast_ctx.multicast_startup_query_count = val; return 0; } @@ -581,13 +581,13 @@ static ssize_t multicast_last_member_interval_show( { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", - jiffies_to_clock_t(br->multicast_last_member_interval)); + jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval)); } static int set_last_member_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_last_member_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val); return 0; } @@ -604,13 +604,13 @@ static ssize_t multicast_membership_interval_show( { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", - jiffies_to_clock_t(br->multicast_membership_interval)); + jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval)); } static int set_membership_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_membership_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val); return 0; } @@ -628,13 +628,13 @@ static ssize_t multicast_querier_interval_show(struct device *d, { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", - jiffies_to_clock_t(br->multicast_querier_interval)); + jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval)); } static int set_querier_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_querier_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val); return 0; } @@ -652,13 +652,13 @@ static ssize_t multicast_query_interval_show(struct device *d, { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", - jiffies_to_clock_t(br->multicast_query_interval)); + jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval)); } static int set_query_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_query_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); return 0; } @@ -676,13 +676,13 @@ static ssize_t multicast_query_response_interval_show( struct net_bridge *br = to_bridge(d); return sprintf( buf, "%lu\n", - jiffies_to_clock_t(br->multicast_query_response_interval)); + jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval)); } static int set_query_response_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_query_response_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val); return 0; } @@ -700,13 +700,13 @@ static ssize_t multicast_startup_query_interval_show( struct net_bridge *br = to_bridge(d); return sprintf( buf, "%lu\n", - jiffies_to_clock_t(br->multicast_startup_query_interval)); + jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval)); } static int set_startup_query_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_startup_query_interval = clock_t_to_jiffies(val); + br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); return 0; } @@ -751,7 +751,7 @@ static ssize_t multicast_mld_version_show(struct device *d, { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%u\n", br->multicast_mld_version); + return sprintf(buf, "%u\n", br->multicast_ctx.multicast_mld_version); } static int set_multicast_mld_version(struct net_bridge *br, unsigned long val, diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 72e92376eef1..e9e3aedd3178 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -244,7 +244,7 @@ BRPORT_ATTR_FLAG(isolated, BR_ISOLATED); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) { - return sprintf(buf, "%d\n", p->multicast_router); + return sprintf(buf, "%d\n", p->multicast_ctx.multicast_router); } static int store_multicast_router(struct net_bridge_port *p, diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index a08e9f193009..8cfd035bbaf9 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -190,6 +190,8 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv) rhashtable_remove_fast(&vg->vlan_hash, &masterv->vnode, br_vlan_rht_params); __vlan_del_list(masterv); + br_multicast_toggle_one_vlan(masterv, false); + br_multicast_ctx_deinit(&masterv->br_mcast_ctx); call_rcu(&masterv->rcu, br_master_vlan_rcu_free); } } @@ -280,10 +282,13 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, } else { v->stats = masterv->stats; } + br_multicast_port_ctx_init(p, v, &v->port_mcast_ctx); } else { err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack); if (err && err != -EOPNOTSUPP) goto out; + br_multicast_ctx_init(br, v, &v->br_mcast_ctx); + v->priv_flags |= BR_VLFLAG_GLOBAL_MCAST_ENABLED; } /* Add the dev mac and count the vlan only if it's usable */ @@ -306,6 +311,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, __vlan_add_list(v); __vlan_add_flags(v, flags); + br_multicast_toggle_one_vlan(v, true); if (p) nbp_vlan_set_vlan_dev_state(p, v->vid); @@ -374,6 +380,8 @@ static int __vlan_del(struct net_bridge_vlan *v) br_vlan_rht_params); __vlan_del_list(v); nbp_vlan_set_vlan_dev_state(p, v->vid); + br_multicast_toggle_one_vlan(v, false); + br_multicast_port_ctx_deinit(&v->port_mcast_ctx); call_rcu(&v->rcu, nbp_vlan_rcu_free); } @@ -457,7 +465,15 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, u64_stats_update_end(&stats->syncp); } - if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) + /* If the skb will be sent using forwarding offload, the assumption is + * that the switchdev will inject the packet into hardware together + * with the bridge VLAN, so that it can be forwarded according to that + * VLAN. The switchdev should deal with popping the VLAN header in + * hardware on each egress port as appropriate. So only strip the VLAN + * header if forwarding offload is not being used. + */ + if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED && + !br_switchdev_frame_uses_tx_fwd_offload(skb)) __vlan_hwaccel_clear_tag(skb); if (p && (p->flags & BR_VLAN_TUNNEL) && @@ -473,7 +489,8 @@ out: static bool __allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb, u16 *vid, - u8 *state) + u8 *state, + struct net_bridge_vlan **vlan) { struct pcpu_sw_netstats *stats; struct net_bridge_vlan *v; @@ -538,8 +555,9 @@ static bool __allowed_ingress(const struct net_bridge *br, */ skb->vlan_tci |= pvid; - /* if stats are disabled we can avoid the lookup */ - if (!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { + /* if snooping and stats are disabled we can avoid the lookup */ + if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && + !br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { if (*state == BR_STATE_FORWARDING) { *state = br_vlan_get_pvid_state(vg); return br_vlan_state_allowed(*state, true); @@ -566,6 +584,8 @@ static bool __allowed_ingress(const struct net_bridge *br, u64_stats_update_end(&stats->syncp); } + *vlan = v; + return true; drop: @@ -575,17 +595,19 @@ drop: bool br_allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb, - u16 *vid, u8 *state) + u16 *vid, u8 *state, + struct net_bridge_vlan **vlan) { /* If VLAN filtering is disabled on the bridge, all packets are * permitted. */ + *vlan = NULL; if (!br_opt_get(br, BROPT_VLAN_ENABLED)) { BR_INPUT_SKB_CB(skb)->vlan_filtered = false; return true; } - return __allowed_ingress(br, vg, skb, vid, state); + return __allowed_ingress(br, vg, skb, vid, state, vlan); } /* Called under RCU. */ @@ -818,14 +840,21 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val, if (br_opt_get(br, BROPT_VLAN_ENABLED) == !!val) return 0; + br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val); + err = switchdev_port_attr_set(br->dev, &attr, extack); - if (err && err != -EOPNOTSUPP) + if (err && err != -EOPNOTSUPP) { + br_opt_toggle(br, BROPT_VLAN_ENABLED, !val); return err; + } - br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val); br_manage_promisc(br); recalculate_group_addr(br); br_recalculate_fwd_mask(br); + if (!val && br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { + br_info(br, "vlan filtering disabled, automatically disabling multicast vlan snooping\n"); + br_multicast_toggle_vlan_snooping(br, false, NULL); + } return 0; } @@ -1420,6 +1449,33 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid, } EXPORT_SYMBOL_GPL(br_vlan_get_info); +int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, + struct bridge_vlan_info *p_vinfo) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + struct net_bridge_port *p; + + p = br_port_get_check_rcu(dev); + if (p) + vg = nbp_vlan_group_rcu(p); + else if (netif_is_bridge_master(dev)) + vg = br_vlan_group_rcu(netdev_priv(dev)); + else + return -EINVAL; + + v = br_vlan_find(vg, vid); + if (!v) + return -ENOENT; + + p_vinfo->vid = vid; + p_vinfo->flags = v->flags; + if (vid == br_get_pvid(vg)) + p_vinfo->flags |= BRIDGE_VLAN_INFO_PVID; + return 0; +} +EXPORT_SYMBOL_GPL(br_vlan_get_info_rcu); + static int br_vlan_is_bind_vlan_dev(const struct net_device *dev) { return is_vlan_dev(dev) && @@ -1838,6 +1894,9 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, ASSERT_RTNL(); + if (!nb) + return 0; + if (!netif_is_bridge_master(br_dev)) return -EINVAL; @@ -1884,7 +1943,6 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, return err; } -EXPORT_SYMBOL_GPL(br_vlan_replay); /* check if v_curr can enter a range ending in range_end */ bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, @@ -1901,6 +1959,7 @@ static int br_vlan_dump_dev(const struct net_device *dev, u32 dump_flags) { struct net_bridge_vlan *v, *range_start = NULL, *range_end = NULL; + bool dump_global = !!(dump_flags & BRIDGE_VLANDB_DUMPF_GLOBAL); bool dump_stats = !!(dump_flags & BRIDGE_VLANDB_DUMPF_STATS); struct net_bridge_vlan_group *vg; int idx = 0, s_idx = cb->args[1]; @@ -1919,6 +1978,10 @@ static int br_vlan_dump_dev(const struct net_device *dev, vg = br_vlan_group_rcu(br); p = NULL; } else { + /* global options are dumped only for bridge devices */ + if (dump_global) + return 0; + p = br_port_get_rcu(dev); if (WARN_ON(!p)) return -EINVAL; @@ -1941,7 +2004,7 @@ static int br_vlan_dump_dev(const struct net_device *dev, /* idx must stay at range's beginning until it is filled in */ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { - if (!br_vlan_should_use(v)) + if (!dump_global && !br_vlan_should_use(v)) continue; if (idx < s_idx) { idx++; @@ -1954,8 +2017,21 @@ static int br_vlan_dump_dev(const struct net_device *dev, continue; } - if (dump_stats || v->vid == pvid || - !br_vlan_can_enter_range(v, range_end)) { + if (dump_global) { + if (br_vlan_global_opts_can_enter_range(v, range_end)) + continue; + if (!br_vlan_global_opts_fill(skb, range_start->vid, + range_end->vid, + range_start)) { + err = -EMSGSIZE; + break; + } + /* advance number of filled vlans */ + idx += range_end->vid - range_start->vid + 1; + + range_start = v; + } else if (dump_stats || v->vid == pvid || + !br_vlan_can_enter_range(v, range_end)) { u16 vlan_flags = br_vlan_flags(range_start, pvid); if (!br_vlan_fill_vids(skb, range_start->vid, @@ -1977,11 +2053,18 @@ static int br_vlan_dump_dev(const struct net_device *dev, * - last vlan (range_start == range_end, not in range) * - last vlan range (range_start != range_end, in range) */ - if (!err && range_start && - !br_vlan_fill_vids(skb, range_start->vid, range_end->vid, - range_start, br_vlan_flags(range_start, pvid), - dump_stats)) - err = -EMSGSIZE; + if (!err && range_start) { + if (dump_global && + !br_vlan_global_opts_fill(skb, range_start->vid, + range_end->vid, range_start)) + err = -EMSGSIZE; + else if (!dump_global && + !br_vlan_fill_vids(skb, range_start->vid, + range_end->vid, range_start, + br_vlan_flags(range_start, pvid), + dump_stats)) + err = -EMSGSIZE; + } cb->args[1] = err ? idx : 0; @@ -2185,12 +2268,22 @@ static int br_vlan_rtm_process(struct sk_buff *skb, struct nlmsghdr *nlh, } nlmsg_for_each_attr(attr, nlh, sizeof(*bvm), rem) { - if (nla_type(attr) != BRIDGE_VLANDB_ENTRY) + switch (nla_type(attr)) { + case BRIDGE_VLANDB_ENTRY: + err = br_vlan_rtm_process_one(dev, attr, + nlh->nlmsg_type, + extack); + break; + case BRIDGE_VLANDB_GLOBAL_OPTIONS: + err = br_vlan_rtm_process_global_options(dev, attr, + nlh->nlmsg_type, + extack); + break; + default: continue; + } vlans++; - err = br_vlan_rtm_process_one(dev, attr, nlh->nlmsg_type, - extack); if (err) break; } diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index b4add9ea8964..4ef975b20185 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -258,3 +258,219 @@ int br_vlan_process_options(const struct net_bridge *br, return err; } + +bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *r_end) +{ + return v_curr->vid - r_end->vid == 1 && + ((v_curr->priv_flags ^ r_end->priv_flags) & + BR_VLFLAG_GLOBAL_MCAST_ENABLED) == 0; +} + +bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range, + const struct net_bridge_vlan *v_opts) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, BRIDGE_VLANDB_GLOBAL_OPTIONS); + if (!nest) + return false; + + if (nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_ID, vid)) + goto out_err; + + if (vid_range && vid < vid_range && + nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_RANGE, vid_range)) + goto out_err; + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, + !!(v_opts->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED))) + goto out_err; +#endif + + nla_nest_end(skb, nest); + + return true; + +out_err: + nla_nest_cancel(skb, nest); + return false; +} + +static size_t rtnl_vlan_global_opts_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct br_vlan_msg)) + + nla_total_size(0) /* BRIDGE_VLANDB_GLOBAL_OPTIONS */ + + nla_total_size(sizeof(u16)) /* BRIDGE_VLANDB_GOPTS_ID */ +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING */ +#endif + + nla_total_size(sizeof(u16)); /* BRIDGE_VLANDB_GOPTS_RANGE */ +} + +static void br_vlan_global_opts_notify(const struct net_bridge *br, + u16 vid, u16 vid_range) +{ + struct net_bridge_vlan *v; + struct br_vlan_msg *bvm; + struct nlmsghdr *nlh; + struct sk_buff *skb; + int err = -ENOBUFS; + + /* right now notifications are done only with rtnl held */ + ASSERT_RTNL(); + + skb = nlmsg_new(rtnl_vlan_global_opts_nlmsg_size(), GFP_KERNEL); + if (!skb) + goto out_err; + + err = -EMSGSIZE; + nlh = nlmsg_put(skb, 0, 0, RTM_NEWVLAN, sizeof(*bvm), 0); + if (!nlh) + goto out_err; + bvm = nlmsg_data(nlh); + memset(bvm, 0, sizeof(*bvm)); + bvm->family = AF_BRIDGE; + bvm->ifindex = br->dev->ifindex; + + /* need to find the vlan due to flags/options */ + v = br_vlan_find(br_vlan_group(br), vid); + if (!v) + goto out_kfree; + + if (!br_vlan_global_opts_fill(skb, vid, vid_range, v)) + goto out_err; + + nlmsg_end(skb, nlh); + rtnl_notify(skb, dev_net(br->dev), 0, RTNLGRP_BRVLAN, NULL, GFP_KERNEL); + return; + +out_err: + rtnl_set_sk_err(dev_net(br->dev), RTNLGRP_BRVLAN, err); +out_kfree: + kfree_skb(skb); +} + +static int br_vlan_process_global_one_opts(const struct net_bridge *br, + struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *v, + struct nlattr **tb, + bool *changed, + struct netlink_ext_ack *extack) +{ + *changed = false; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (tb[BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]) { + u8 mc_snooping; + + mc_snooping = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]); + if (br_multicast_toggle_global_vlan(v, !!mc_snooping)) + *changed = true; + } +#endif + + return 0; +} + +static const struct nla_policy br_vlan_db_gpol[BRIDGE_VLANDB_GOPTS_MAX + 1] = { + [BRIDGE_VLANDB_GOPTS_ID] = { .type = NLA_U16 }, + [BRIDGE_VLANDB_GOPTS_RANGE] = { .type = NLA_U16 }, + [BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING] = { .type = NLA_U8 }, +}; + +int br_vlan_rtm_process_global_options(struct net_device *dev, + const struct nlattr *attr, + int cmd, + struct netlink_ext_ack *extack) +{ + struct net_bridge_vlan *v, *curr_start = NULL, *curr_end = NULL; + struct nlattr *tb[BRIDGE_VLANDB_GOPTS_MAX + 1]; + struct net_bridge_vlan_group *vg; + u16 vid, vid_range = 0; + struct net_bridge *br; + int err = 0; + + if (cmd != RTM_NEWVLAN) { + NL_SET_ERR_MSG_MOD(extack, "Global vlan options support only set operation"); + return -EINVAL; + } + if (!netif_is_bridge_master(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Global vlan options can only be set on bridge device"); + return -EINVAL; + } + br = netdev_priv(dev); + vg = br_vlan_group(br); + if (WARN_ON(!vg)) + return -ENODEV; + + err = nla_parse_nested(tb, BRIDGE_VLANDB_GOPTS_MAX, attr, + br_vlan_db_gpol, extack); + if (err) + return err; + + if (!tb[BRIDGE_VLANDB_GOPTS_ID]) { + NL_SET_ERR_MSG_MOD(extack, "Missing vlan entry id"); + return -EINVAL; + } + vid = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_ID]); + if (!br_vlan_valid_id(vid, extack)) + return -EINVAL; + + if (tb[BRIDGE_VLANDB_GOPTS_RANGE]) { + vid_range = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_RANGE]); + if (!br_vlan_valid_id(vid_range, extack)) + return -EINVAL; + if (vid >= vid_range) { + NL_SET_ERR_MSG_MOD(extack, "End vlan id is less than or equal to start vlan id"); + return -EINVAL; + } + } else { + vid_range = vid; + } + + for (; vid <= vid_range; vid++) { + bool changed = false; + + v = br_vlan_find(vg, vid); + if (!v) { + NL_SET_ERR_MSG_MOD(extack, "Vlan in range doesn't exist, can't process global options"); + err = -ENOENT; + break; + } + + err = br_vlan_process_global_one_opts(br, vg, v, tb, &changed, + extack); + if (err) + break; + + if (changed) { + /* vlan options changed, check for range */ + if (!curr_start) { + curr_start = v; + curr_end = v; + continue; + } + + if (!br_vlan_global_opts_can_enter_range(v, curr_end)) { + br_vlan_global_opts_notify(br, curr_start->vid, + curr_end->vid); + curr_start = v; + } + curr_end = v; + } else { + /* nothing changed and nothing to notify yet */ + if (!curr_start) + continue; + + br_vlan_global_opts_notify(br, curr_start->vid, + curr_end->vid); + curr_start = NULL; + curr_end = NULL; + } + } + if (curr_start) + br_vlan_global_opts_notify(br, curr_start->vid, curr_end->vid); + + return err; +} diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 54f6d521492f..b904c06ab0cf 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -352,7 +352,7 @@ static void j1939_sk_sock_destruct(struct sock *sk) { struct j1939_sock *jsk = j1939_sk(sk); - /* This function will be call by the generic networking code, when then + /* This function will be called by the generic networking code, when * the socket is ultimately closed (sk->sk_destruct). * * The race between diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index bdc95bd7a851..dac70cdd3f41 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -776,7 +776,7 @@ static int j1939_session_tx_dpo(struct j1939_session *session) static int j1939_session_tx_dat(struct j1939_session *session) { struct j1939_priv *priv = session->priv; - struct j1939_sk_buff_cb *skcb; + struct j1939_sk_buff_cb *se_skcb; int offset, pkt_done, pkt_end; unsigned int len, pdelay; struct sk_buff *se_skb; @@ -788,7 +788,7 @@ static int j1939_session_tx_dat(struct j1939_session *session) if (!se_skb) return -ENOBUFS; - skcb = j1939_skb_to_cb(se_skb); + se_skcb = j1939_skb_to_cb(se_skb); tpdat = se_skb->data; ret = 0; pkt_done = 0; @@ -800,7 +800,7 @@ static int j1939_session_tx_dat(struct j1939_session *session) while (session->pkt.tx < pkt_end) { dat[0] = session->pkt.tx - session->pkt.dpo + 1; - offset = (session->pkt.tx * 7) - skcb->offset; + offset = (session->pkt.tx * 7) - se_skcb->offset; len = se_skb->len - offset; if (len > 7) len = 7; @@ -808,7 +808,8 @@ static int j1939_session_tx_dat(struct j1939_session *session) if (offset + len > se_skb->len) { netdev_err_once(priv->ndev, "%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n", - __func__, session, skcb->offset, se_skb->len , session->pkt.tx); + __func__, session, se_skcb->offset, + se_skb->len , session->pkt.tx); ret = -EOVERFLOW; goto out_free; } @@ -1097,7 +1098,7 @@ j1939_session_deactivate_activate_next(struct j1939_session *session) } static void __j1939_session_cancel(struct j1939_session *session, - enum j1939_xtp_abort err) + enum j1939_xtp_abort err) { struct j1939_priv *priv = session->priv; @@ -1195,13 +1196,13 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) static void j1939_session_completed(struct j1939_session *session) { - struct sk_buff *skb; + struct sk_buff *se_skb; if (!session->transmission) { - skb = j1939_session_skb_get(session); + se_skb = j1939_session_skb_get(session); /* distribute among j1939 receivers */ - j1939_sk_recv(session->priv, skb); - consume_skb(skb); + j1939_sk_recv(session->priv, se_skb); + consume_skb(se_skb); } j1939_session_deactivate_activate_next(session); @@ -1268,12 +1269,14 @@ static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session, break; case J1939_ETP_CMD_RTS: - case J1939_TP_CMD_RTS: /* fall through */ + fallthrough; + case J1939_TP_CMD_RTS: abort = J1939_XTP_ABORT_BUSY; break; case J1939_ETP_CMD_CTS: - case J1939_TP_CMD_CTS: /* fall through */ + fallthrough; + case J1939_TP_CMD_CTS: abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN; break; @@ -1282,7 +1285,8 @@ static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session, break; case J1939_ETP_CMD_EOMA: - case J1939_TP_CMD_EOMA: /* fall through */ + fallthrough; + case J1939_TP_CMD_EOMA: abort = J1939_XTP_ABORT_OTHER; break; @@ -1772,7 +1776,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, struct sk_buff *skb) { struct j1939_priv *priv = session->priv; - struct j1939_sk_buff_cb *skcb; + struct j1939_sk_buff_cb *skcb, *se_skcb; struct sk_buff *se_skb = NULL; const u8 *dat; u8 *tpdat; @@ -1797,7 +1801,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, break; fallthrough; case J1939_TP_CMD_BAM: - case J1939_TP_CMD_CTS: /* fall through */ + fallthrough; + case J1939_TP_CMD_CTS: if (skcb->addr.type != J1939_ETP) break; fallthrough; @@ -1822,8 +1827,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, goto out_session_cancel; } - skcb = j1939_skb_to_cb(se_skb); - offset = packet * 7 - skcb->offset; + se_skcb = j1939_skb_to_cb(se_skb); + offset = packet * 7 - se_skcb->offset; nbytes = se_skb->len - offset; if (nbytes > 7) nbytes = 7; @@ -1851,7 +1856,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, if (packet == session->pkt.rx) session->pkt.rx++; - if (skcb->addr.type != J1939_ETP && + if (se_skcb->addr.type != J1939_ETP && j1939_cb_is_broadcast(&session->skcb)) { if (session->pkt.rx >= session->pkt.total) final = true; @@ -2000,7 +2005,8 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) extd = J1939_ETP; fallthrough; case J1939_TP_CMD_BAM: - case J1939_TP_CMD_RTS: /* fall through */ + fallthrough; + case J1939_TP_CMD_RTS: if (skcb->addr.type != extd) return; diff --git a/net/core/Makefile b/net/core/Makefile index f7f16650fe9e..35ced6201814 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -33,8 +33,6 @@ obj-$(CONFIG_HWBM) += hwbm.o obj-$(CONFIG_NET_DEVLINK) += devlink.o obj-$(CONFIG_GRO_CELLS) += gro_cells.o obj-$(CONFIG_FAILOVER) += failover.o -ifeq ($(CONFIG_INET),y) obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o obj-$(CONFIG_BPF_SYSCALL) += sock_map.o -endif obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o diff --git a/net/core/dev.c b/net/core/dev.c index 8f1a47ad6781..fb5d12a3d52d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4756,45 +4756,18 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb) return rxqueue; } -static u32 netif_receive_generic_xdp(struct sk_buff *skb, - struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) +u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) { void *orig_data, *orig_data_end, *hard_start; struct netdev_rx_queue *rxqueue; - u32 metalen, act = XDP_DROP; bool orig_bcast, orig_host; u32 mac_len, frame_sz; __be16 orig_eth_type; struct ethhdr *eth; + u32 metalen, act; int off; - /* Reinjected packets coming from act_mirred or similar should - * not get XDP generic processing. - */ - if (skb_is_redirected(skb)) - return XDP_PASS; - - /* XDP packets must be linear and must have sufficient headroom - * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also - * native XDP provides, thus we need to do it here as well. - */ - if (skb_cloned(skb) || skb_is_nonlinear(skb) || - skb_headroom(skb) < XDP_PACKET_HEADROOM) { - int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); - int troom = skb->tail + skb->data_len - skb->end; - - /* In case we have to go down the path and also linearize, - * then lets do the pskb_expand_head() work just once here. - */ - if (pskb_expand_head(skb, - hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, - troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) - goto do_drop; - if (skb_linearize(skb)) - goto do_drop; - } - /* The XDP program wants to see the packet starting at the MAC * header. */ @@ -4849,6 +4822,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, skb->protocol = eth_type_trans(skb, skb->dev); } + /* Redirect/Tx gives L2 packet, code that will reuse skb must __skb_pull + * before calling us again on redirect path. We do not call do_redirect + * as we leave that up to the caller. + * + * Caller is responsible for managing lifetime of skb (i.e. calling + * kfree_skb in response to actions it cannot handle/XDP_DROP). + */ switch (act) { case XDP_REDIRECT: case XDP_TX: @@ -4859,6 +4839,49 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, if (metalen) skb_metadata_set(skb, metalen); break; + } + + return act; +} + +static u32 netif_receive_generic_xdp(struct sk_buff *skb, + struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + u32 act = XDP_DROP; + + /* Reinjected packets coming from act_mirred or similar should + * not get XDP generic processing. + */ + if (skb_is_redirected(skb)) + return XDP_PASS; + + /* XDP packets must be linear and must have sufficient headroom + * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also + * native XDP provides, thus we need to do it here as well. + */ + if (skb_cloned(skb) || skb_is_nonlinear(skb) || + skb_headroom(skb) < XDP_PACKET_HEADROOM) { + int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); + int troom = skb->tail + skb->data_len - skb->end; + + /* In case we have to go down the path and also linearize, + * then lets do the pskb_expand_head() work just once here. + */ + if (pskb_expand_head(skb, + hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, + troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) + goto do_drop; + if (skb_linearize(skb)) + goto do_drop; + } + + act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog); + switch (act) { + case XDP_REDIRECT: + case XDP_TX: + case XDP_PASS: + break; default: bpf_warn_invalid_xdp_action(act); fallthrough; @@ -5324,7 +5347,6 @@ another_round: ret = NET_RX_DROP; goto out; } - skb_reset_mac_len(skb); } if (eth_type_vlan(skb->protocol)) { @@ -5650,25 +5672,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) struct bpf_prog *new = xdp->prog; int ret = 0; - if (new) { - u32 i; - - mutex_lock(&new->aux->used_maps_mutex); - - /* generic XDP does not work with DEVMAPs that can - * have a bpf_prog installed on an entry - */ - for (i = 0; i < new->aux->used_map_cnt; i++) { - if (dev_map_can_have_prog(new->aux->used_maps[i]) || - cpu_map_prog_allowed(new->aux->used_maps[i])) { - mutex_unlock(&new->aux->used_maps_mutex); - return -EINVAL; - } - } - - mutex_unlock(&new->aux->used_maps_mutex); - } - switch (xdp->command) { case XDP_SETUP_PROG: rcu_assign_pointer(dev->xdp_prog, new); @@ -10134,7 +10137,7 @@ static int netif_alloc_rx_queues(struct net_device *dev) BUG_ON(count < 1); - rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL); + rx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!rx) return -ENOMEM; @@ -10201,7 +10204,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev) if (count < 1 || count > 0xffff) return -EINVAL; - tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL); + tx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!tx) return -ENOMEM; @@ -10841,7 +10844,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, /* ensure 32-byte alignment of whole construct */ alloc_size += NETDEV_ALIGN - 1; - p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL); + p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!p) return NULL; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 478d032f34ac..950e2fe5d56a 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/kmod.h> #include <linux/netdevice.h> +#include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <linux/net_tstamp.h> @@ -25,79 +26,108 @@ static int dev_ifname(struct net *net, struct ifreq *ifr) return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex); } -static gifconf_func_t *gifconf_list[NPROTO]; - -/** - * register_gifconf - register a SIOCGIF handler - * @family: Address family - * @gifconf: Function handler - * - * Register protocol dependent address dumping routines. The handler - * that is passed must not be freed or reused until it has been replaced - * by another handler. - */ -int register_gifconf(unsigned int family, gifconf_func_t *gifconf) -{ - if (family >= NPROTO) - return -EINVAL; - gifconf_list[family] = gifconf; - return 0; -} -EXPORT_SYMBOL(register_gifconf); - /* * Perform a SIOCGIFCONF call. This structure will change * size eventually, and there is nothing I can do about it. * Thus we will need a 'compatibility mode'. */ - -int dev_ifconf(struct net *net, struct ifconf *ifc, int size) +int dev_ifconf(struct net *net, struct ifconf __user *uifc) { struct net_device *dev; - char __user *pos; - int len; - int total; - int i; + void __user *pos; + size_t size; + int len, total = 0, done; - /* - * Fetch the caller's info block. - */ + /* both the ifconf and the ifreq structures are slightly different */ + if (in_compat_syscall()) { + struct compat_ifconf ifc32; - pos = ifc->ifc_buf; - len = ifc->ifc_len; + if (copy_from_user(&ifc32, uifc, sizeof(struct compat_ifconf))) + return -EFAULT; - /* - * Loop over the interfaces, and write an info block for each. - */ + pos = compat_ptr(ifc32.ifcbuf); + len = ifc32.ifc_len; + size = sizeof(struct compat_ifreq); + } else { + struct ifconf ifc; + + if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) + return -EFAULT; + + pos = ifc.ifc_buf; + len = ifc.ifc_len; + size = sizeof(struct ifreq); + } - total = 0; + /* Loop over the interfaces, and write an info block for each. */ + rtnl_lock(); for_each_netdev(net, dev) { - for (i = 0; i < NPROTO; i++) { - if (gifconf_list[i]) { - int done; - if (!pos) - done = gifconf_list[i](dev, NULL, 0, size); - else - done = gifconf_list[i](dev, pos + total, - len - total, size); - if (done < 0) - return -EFAULT; - total += done; - } + if (!pos) + done = inet_gifconf(dev, NULL, 0, size); + else + done = inet_gifconf(dev, pos + total, + len - total, size); + if (done < 0) { + rtnl_unlock(); + return -EFAULT; } + total += done; } + rtnl_unlock(); - /* - * All done. Write the updated control block back to the caller. - */ - ifc->ifc_len = total; + return put_user(total, &uifc->ifc_len); +} + +static int dev_getifmap(struct net_device *dev, struct ifreq *ifr) +{ + struct ifmap *ifmap = &ifr->ifr_map; + + if (in_compat_syscall()) { + struct compat_ifmap *cifmap = (struct compat_ifmap *)ifmap; + + cifmap->mem_start = dev->mem_start; + cifmap->mem_end = dev->mem_end; + cifmap->base_addr = dev->base_addr; + cifmap->irq = dev->irq; + cifmap->dma = dev->dma; + cifmap->port = dev->if_port; + + return 0; + } + + ifmap->mem_start = dev->mem_start; + ifmap->mem_end = dev->mem_end; + ifmap->base_addr = dev->base_addr; + ifmap->irq = dev->irq; + ifmap->dma = dev->dma; + ifmap->port = dev->if_port; - /* - * Both BSD and Solaris return 0 here, so we do too. - */ return 0; } +static int dev_setifmap(struct net_device *dev, struct ifreq *ifr) +{ + struct compat_ifmap *cifmap = (struct compat_ifmap *)&ifr->ifr_map; + + if (!dev->netdev_ops->ndo_set_config) + return -EOPNOTSUPP; + + if (in_compat_syscall()) { + struct ifmap ifmap = { + .mem_start = cifmap->mem_start, + .mem_end = cifmap->mem_end, + .base_addr = cifmap->base_addr, + .irq = cifmap->irq, + .dma = cifmap->dma, + .port = cifmap->port, + }; + + return dev->netdev_ops->ndo_set_config(dev, &ifmap); + } + + return dev->netdev_ops->ndo_set_config(dev, &ifr->ifr_map); +} + /* * Perform the SIOCxIFxxx calls, inside rcu_read_lock() */ @@ -128,13 +158,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm break; case SIOCGIFMAP: - ifr->ifr_map.mem_start = dev->mem_start; - ifr->ifr_map.mem_end = dev->mem_end; - ifr->ifr_map.base_addr = dev->base_addr; - ifr->ifr_map.irq = dev->irq; - ifr->ifr_map.dma = dev->dma; - ifr->ifr_map.port = dev->if_port; - return 0; + return dev_getifmap(dev, ifr); case SIOCGIFINDEX: ifr->ifr_ifindex = dev->ifindex; @@ -275,12 +299,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return 0; case SIOCSIFMAP: - if (ops->ndo_set_config) { - if (!netif_device_present(dev)) - return -ENODEV; - return ops->ndo_set_config(dev, &ifr->ifr_map); - } - return -EOPNOTSUPP; + return dev_setifmap(dev, ifr); case SIOCADDMULTI: if (!ops->ndo_set_rx_mode || diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index a9f937975080..79df7cd9dbc1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -57,7 +57,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, { struct fib_rule *r; - r = kzalloc(ops->rule_size, GFP_KERNEL); + r = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT); if (r == NULL) return -ENOMEM; @@ -541,7 +541,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - nlrule = kzalloc(ops->rule_size, GFP_KERNEL); + nlrule = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT); if (!nlrule) { err = -ENOMEM; goto errout; diff --git a/net/core/filter.c b/net/core/filter.c index d70187ce851b..3b4986e96e9c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -77,6 +77,7 @@ #include <net/transp_v6.h> #include <linux/btf_ids.h> #include <net/tls.h> +#include <net/xdp.h> static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -3880,8 +3881,7 @@ BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) if (unlikely(meta < xdp_frame_end || meta > xdp->data)) return -EINVAL; - if (unlikely((metalen & (sizeof(__u32) - 1)) || - (metalen > 32))) + if (unlikely(xdp_metalen_invalid(metalen))) return -EACCES; xdp->data_meta = meta; @@ -4040,8 +4040,12 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, goto err; consume_skb(skb); break; + case BPF_MAP_TYPE_CPUMAP: + err = cpu_map_generic_redirect(fwd, skb); + if (unlikely(err)) + goto err; + break; default: - /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ err = -EBADRQC; goto err; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2aadbfc5193b..39d7be03e568 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1056,8 +1056,10 @@ proto_again: FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container); - memcpy(&key_addrs->v4addrs, &iph->saddr, - sizeof(key_addrs->v4addrs)); + memcpy(&key_addrs->v4addrs.src, &iph->saddr, + sizeof(key_addrs->v4addrs.src)); + memcpy(&key_addrs->v4addrs.dst, &iph->daddr, + sizeof(key_addrs->v4addrs.dst)); key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } @@ -1101,8 +1103,10 @@ proto_again: FLOW_DISSECTOR_KEY_IPV6_ADDRS, target_container); - memcpy(&key_addrs->v6addrs, &iph->saddr, - sizeof(key_addrs->v6addrs)); + memcpy(&key_addrs->v6addrs.src, &iph->saddr, + sizeof(key_addrs->v6addrs.src)); + memcpy(&key_addrs->v6addrs.dst, &iph->daddr, + sizeof(key_addrs->v6addrs.dst)); key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 8ec7d13d2860..d0ae987d2de9 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -43,6 +43,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "SEG6LOCAL"; case LWTUNNEL_ENCAP_RPL: return "RPL"; + case LWTUNNEL_ENCAP_IOAM6: + return "IOAM6"; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: case LWTUNNEL_ENCAP_NONE: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f6af3e74fc44..670d74ab91ae 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -710,15 +710,8 @@ out: int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo) { struct sock *rtnl = net->rtnl; - int err = 0; - NETLINK_CB(skb).dst_group = group; - if (echo) - refcount_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); - if (echo) - err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); - return err; + return nlmsg_notify(rtnl, skb, pid, group, echo, GFP_KERNEL); } int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) diff --git a/net/core/scm.c b/net/core/scm.c index ae3085d9aae8..5c356f0dee30 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -79,7 +79,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) { - fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL); + fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL_ACCOUNT); if (!fpl) return -ENOMEM; *fplp = fpl; @@ -355,7 +355,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) return NULL; new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (new_fpl) { for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); diff --git a/net/core/selftests.c b/net/core/selftests.c index ba7b0171974c..9077fa969892 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -318,6 +318,15 @@ static int net_test_phy_loopback_udp(struct net_device *ndev) return __net_test_loopback(ndev, &attr); } +static int net_test_phy_loopback_udp_mtu(struct net_device *ndev) +{ + struct net_packet_attrs attr = { }; + + attr.dst = ndev->dev_addr; + attr.max_size = ndev->mtu; + return __net_test_loopback(ndev, &attr); +} + static int net_test_phy_loopback_tcp(struct net_device *ndev) { struct net_packet_attrs attr = { }; @@ -345,6 +354,9 @@ static const struct net_test { .name = "PHY internal loopback, UDP ", .fn = net_test_phy_loopback_udp, }, { + .name = "PHY internal loopback, MTU ", + .fn = net_test_phy_loopback_udp_mtu, + }, { .name = "PHY internal loopback, TCP ", .fn = net_test_phy_loopback_tcp, }, { diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 60decd6420ca..ae5fa4338d9c 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -211,8 +211,6 @@ out: return psock; } -static bool sock_map_redirect_allowed(const struct sock *sk); - static int sock_map_link(struct bpf_map *map, struct sock *sk) { struct sk_psock_progs *progs = sock_map_progs(map); @@ -223,13 +221,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk) struct sk_psock *psock; int ret; - /* Only sockets we can redirect into/from in BPF need to hold - * refs to parser/verdict progs and have their sk_data_ready - * and sk_write_space callbacks overridden. - */ - if (!sock_map_redirect_allowed(sk)) - goto no_progs; - stream_verdict = READ_ONCE(progs->stream_verdict); if (stream_verdict) { stream_verdict = bpf_prog_inc_not_zero(stream_verdict); @@ -264,7 +255,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk) } } -no_progs: psock = sock_map_psock_get_checked(sk); if (IS_ERR(psock)) { ret = PTR_ERR(psock); @@ -527,12 +517,6 @@ static bool sk_is_tcp(const struct sock *sk) sk->sk_protocol == IPPROTO_TCP; } -static bool sk_is_udp(const struct sock *sk) -{ - return sk->sk_type == SOCK_DGRAM && - sk->sk_protocol == IPPROTO_UDP; -} - static bool sock_map_redirect_allowed(const struct sock *sk) { if (sk_is_tcp(sk)) @@ -550,10 +534,7 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) { if (sk_is_tcp(sk)) return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); - else if (sk_is_udp(sk)) - return sk_hashed(sk); - - return false; + return true; } static int sock_hash_update_common(struct bpf_map *map, void *key, @@ -1536,6 +1517,7 @@ void sock_map_close(struct sock *sk, long timeout) release_sock(sk); saved_close(sk, timeout); } +EXPORT_SYMBOL_GPL(sock_map_close); static int sock_map_iter_attach_target(struct bpf_prog *prog, union bpf_iter_link_info *linfo, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 7eb0fb231940..abb5c596a817 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1126,7 +1126,7 @@ static int __init dccp_init(void) dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); if (!dccp_hashinfo.bind_bucket_cachep) goto out_free_hashinfo2; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 00bb89b2d86f..bca1b5d66df2 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -18,16 +18,6 @@ if NET_DSA # Drivers must select the appropriate tagging format(s) -config NET_DSA_TAG_8021Q - tristate - select VLAN_8021Q - help - Unlike the other tagging protocols, the 802.1Q config option simply - provides helpers for other tagging implementations that might rely on - VLAN in one way or another. It is not a complete solution. - - Drivers which use these helpers should select this as dependency. - config NET_DSA_TAG_AR9331 tristate "Tag driver for Atheros AR9331 SoC with built-in switch" help @@ -126,7 +116,6 @@ config NET_DSA_TAG_OCELOT_8021Q tristate "Tag driver for Ocelot family of switches, using VLAN" depends on MSCC_OCELOT_SWITCH_LIB || \ (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST) - select NET_DSA_TAG_8021Q help Say Y or M if you want to enable support for tagging frames with a custom VLAN-based header. Frames that require timestamping, such as @@ -149,7 +138,6 @@ config NET_DSA_TAG_LAN9303 config NET_DSA_TAG_SJA1105 tristate "Tag driver for NXP SJA1105 switches" - select NET_DSA_TAG_8021Q select PACKING help Say Y or M if you want to enable support for tagging frames with the diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 44bc79952b8b..67ea009f242c 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,10 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o +dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o tag_8021q.o # tagging formats -obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o obj-$(CONFIG_NET_DSA_TAG_DSA_COMMON) += tag_dsa.o diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 185629f27f80..c7fa85fb3086 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1044,6 +1044,7 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) dp->ds = ds; dp->index = index; + dp->bridge_num = -1; INIT_LIST_HEAD(&dp->list); list_add_tail(&dp->list, &dst->ports); @@ -1265,6 +1266,9 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds, return -EEXIST; } + if (ds->dst->last_switch < ds->index) + ds->dst->last_switch = ds->index; + return 0; } diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index f201c33980bf..da3ad02d6ceb 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -14,6 +14,8 @@ #include <net/dsa.h> #include <net/gro_cells.h> +#define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG + enum { DSA_NOTIFIER_AGEING_TIME, DSA_NOTIFIER_BRIDGE_JOIN, @@ -39,6 +41,8 @@ enum { DSA_NOTIFIER_MRP_DEL, DSA_NOTIFIER_MRP_ADD_RING_ROLE, DSA_NOTIFIER_MRP_DEL_RING_ROLE, + DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, + DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, }; /* DSA_NOTIFIER_AGEING_TIME */ @@ -113,6 +117,14 @@ struct dsa_notifier_mrp_ring_role_info { int port; }; +/* DSA_NOTIFIER_TAG_8021Q_VLAN_* */ +struct dsa_notifier_tag_8021q_vlan_info { + int tree_index; + int sw_index; + int port; + u16 vid; +}; + struct dsa_switchdev_event_work { struct dsa_switch *ds; int port; @@ -194,16 +206,14 @@ void dsa_port_disable_rt(struct dsa_port *dp); void dsa_port_disable(struct dsa_port *dp); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, struct netlink_ext_ack *extack); -int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br, - struct netlink_ext_ack *extack); +void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); int dsa_port_lag_change(struct dsa_port *dp, struct netdev_lag_lower_state_info *linfo); int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, struct netdev_lag_upper_info *uinfo, struct netlink_ext_ack *extack); -int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev, - struct netlink_ext_ack *extack); +void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, struct netlink_ext_ack *extack); @@ -253,16 +263,18 @@ int dsa_port_link_register_of(struct dsa_port *dp); void dsa_port_link_unregister_of(struct dsa_port *dp); int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); +int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid); +void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid); extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, - struct net_device *dev) + const struct net_device *dev) { return dsa_port_to_bridge_port(dp) == dev; } static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, - struct net_device *bridge_dev) + const struct net_device *bridge_dev) { /* DSA ports connected to a bridge, and event was emitted * for the bridge. @@ -272,7 +284,7 @@ static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, /* Returns true if any port of this tree offloads the given net_device */ static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, - struct net_device *dev) + const struct net_device *dev) { struct dsa_port *dp; @@ -283,6 +295,19 @@ static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, return false; } +/* Returns true if any port of this tree offloads the given bridge */ +static inline bool dsa_tree_offloads_bridge(struct dsa_switch_tree *dst, + const struct net_device *bridge_dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_bridge(dp, bridge_dev)) + return true; + + return false; +} + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; extern struct notifier_block dsa_slave_switchdev_notifier; @@ -372,6 +397,49 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) return skb; } +/* For switches without hardware support for DSA tagging to be able + * to support termination through the bridge. + */ +static inline struct net_device * +dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct bridge_vlan_info vinfo; + struct net_device *slave; + struct dsa_port *dp; + int err; + + list_for_each_entry(dp, &dst->ports, list) { + if (dp->type != DSA_PORT_TYPE_USER) + continue; + + if (!dp->bridge_dev) + continue; + + if (dp->stp_state != BR_STATE_LEARNING && + dp->stp_state != BR_STATE_FORWARDING) + continue; + + /* Since the bridge might learn this packet, keep the CPU port + * affinity with the port that will be used for the reply on + * xmit. + */ + if (dp->cpu_dp != cpu_dp) + continue; + + slave = dp->slave; + + err = br_vlan_get_info_rcu(slave, vid, &vinfo); + if (err) + continue; + + return slave; + } + + return NULL; +} + /* switch.c */ int dsa_switch_register_notifier(struct dsa_switch *ds); void dsa_switch_unregister_notifier(struct dsa_switch *ds); @@ -386,6 +454,16 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, const struct dsa_device_ops *tag_ops, const struct dsa_device_ops *old_tag_ops); +/* tag_8021q.c */ +int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info); +int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info); +int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info); +int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info); + extern struct list_head dsa_tree_list; #endif diff --git a/net/dsa/port.c b/net/dsa/port.c index 28b45b7e66df..b927d94b6934 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -167,8 +167,8 @@ static void dsa_port_clear_brport_flags(struct dsa_port *dp) } } -static int dsa_port_switchdev_sync(struct dsa_port *dp, - struct netlink_ext_ack *extack) +static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp, + struct netlink_ext_ack *extack) { struct net_device *brport_dev = dsa_port_to_bridge_port(dp); struct net_device *br = dp->bridge_dev; @@ -194,59 +194,6 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp, if (err && err != -EOPNOTSUPP) return err; - err = br_mdb_replay(br, brport_dev, dp, true, - &dsa_slave_switchdev_blocking_notifier, extack); - if (err && err != -EOPNOTSUPP) - return err; - - /* Forwarding and termination FDB entries on the port */ - err = br_fdb_replay(br, brport_dev, dp, true, - &dsa_slave_switchdev_notifier); - if (err && err != -EOPNOTSUPP) - return err; - - /* Termination FDB entries on the bridge itself */ - err = br_fdb_replay(br, br, dp, true, &dsa_slave_switchdev_notifier); - if (err && err != -EOPNOTSUPP) - return err; - - err = br_vlan_replay(br, brport_dev, dp, true, - &dsa_slave_switchdev_blocking_notifier, extack); - if (err && err != -EOPNOTSUPP) - return err; - - return 0; -} - -static int dsa_port_switchdev_unsync_objs(struct dsa_port *dp, - struct net_device *br, - struct netlink_ext_ack *extack) -{ - struct net_device *brport_dev = dsa_port_to_bridge_port(dp); - int err; - - /* Delete the switchdev objects left on this port */ - err = br_mdb_replay(br, brport_dev, dp, false, - &dsa_slave_switchdev_blocking_notifier, extack); - if (err && err != -EOPNOTSUPP) - return err; - - /* Forwarding and termination FDB entries on the port */ - err = br_fdb_replay(br, brport_dev, dp, false, - &dsa_slave_switchdev_notifier); - if (err && err != -EOPNOTSUPP) - return err; - - /* Termination FDB entries on the bridge itself */ - err = br_fdb_replay(br, br, dp, false, &dsa_slave_switchdev_notifier); - if (err && err != -EOPNOTSUPP) - return err; - - err = br_vlan_replay(br, brport_dev, dp, false, - &dsa_slave_switchdev_blocking_notifier, extack); - if (err && err != -EOPNOTSUPP) - return err; - return 0; } @@ -283,6 +230,83 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp) */ } +static int dsa_tree_find_bridge_num(struct dsa_switch_tree *dst, + struct net_device *bridge_dev) +{ + struct dsa_port *dp; + + /* When preparing the offload for a port, it will have a valid + * dp->bridge_dev pointer but a not yet valid dp->bridge_num. + * However there might be other ports having the same dp->bridge_dev + * and a valid dp->bridge_num, so just ignore this port. + */ + list_for_each_entry(dp, &dst->ports, list) + if (dp->bridge_dev == bridge_dev && dp->bridge_num != -1) + return dp->bridge_num; + + return -1; +} + +static void dsa_port_bridge_tx_fwd_unoffload(struct dsa_port *dp, + struct net_device *bridge_dev) +{ + struct dsa_switch_tree *dst = dp->ds->dst; + int bridge_num = dp->bridge_num; + struct dsa_switch *ds = dp->ds; + + /* No bridge TX forwarding offload => do nothing */ + if (!ds->ops->port_bridge_tx_fwd_unoffload || dp->bridge_num == -1) + return; + + dp->bridge_num = -1; + + /* Check if the bridge is still in use, otherwise it is time + * to clean it up so we can reuse this bridge_num later. + */ + if (!dsa_tree_find_bridge_num(dst, bridge_dev)) + clear_bit(bridge_num, &dst->fwd_offloading_bridges); + + /* Notify the chips only once the offload has been deactivated, so + * that they can update their configuration accordingly. + */ + ds->ops->port_bridge_tx_fwd_unoffload(ds, dp->index, bridge_dev, + bridge_num); +} + +static bool dsa_port_bridge_tx_fwd_offload(struct dsa_port *dp, + struct net_device *bridge_dev) +{ + struct dsa_switch_tree *dst = dp->ds->dst; + struct dsa_switch *ds = dp->ds; + int bridge_num, err; + + if (!ds->ops->port_bridge_tx_fwd_offload) + return false; + + bridge_num = dsa_tree_find_bridge_num(dst, bridge_dev); + if (bridge_num < 0) { + /* First port that offloads TX forwarding for this bridge */ + bridge_num = find_first_zero_bit(&dst->fwd_offloading_bridges, + DSA_MAX_NUM_OFFLOADING_BRIDGES); + if (bridge_num >= ds->num_fwd_offloading_bridges) + return false; + + set_bit(bridge_num, &dst->fwd_offloading_bridges); + } + + dp->bridge_num = bridge_num; + + /* Notify the driver */ + err = ds->ops->port_bridge_tx_fwd_offload(ds, dp->index, bridge_dev, + bridge_num); + if (err) { + dsa_port_bridge_tx_fwd_unoffload(dp, bridge_dev); + return false; + } + + return true; +} + int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, struct netlink_ext_ack *extack) { @@ -292,6 +316,9 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, .port = dp->index, .br = br, }; + struct net_device *dev = dp->slave; + struct net_device *brport_dev; + bool tx_fwd_offload; int err; /* Here the interface is already bridged. Reflect the current @@ -299,16 +326,31 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, */ dp->bridge_dev = br; + brport_dev = dsa_port_to_bridge_port(dp); + err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_JOIN, &info); if (err) goto out_rollback; - err = dsa_port_switchdev_sync(dp, extack); + tx_fwd_offload = dsa_port_bridge_tx_fwd_offload(dp, br); + + err = switchdev_bridge_port_offload(brport_dev, dev, dp, + &dsa_slave_switchdev_notifier, + &dsa_slave_switchdev_blocking_notifier, + tx_fwd_offload, extack); if (err) goto out_rollback_unbridge; + err = dsa_port_switchdev_sync_attrs(dp, extack); + if (err) + goto out_rollback_unoffload; + return 0; +out_rollback_unoffload: + switchdev_bridge_port_unoffload(brport_dev, dp, + &dsa_slave_switchdev_notifier, + &dsa_slave_switchdev_blocking_notifier); out_rollback_unbridge: dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); out_rollback: @@ -316,10 +358,13 @@ out_rollback: return err; } -int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br, - struct netlink_ext_ack *extack) +void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br) { - return dsa_port_switchdev_unsync_objs(dp, br, extack); + struct net_device *brport_dev = dsa_port_to_bridge_port(dp); + + switchdev_bridge_port_unoffload(brport_dev, dp, + &dsa_slave_switchdev_notifier, + &dsa_slave_switchdev_blocking_notifier); } void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) @@ -337,6 +382,8 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) */ dp->bridge_dev = NULL; + dsa_port_bridge_tx_fwd_unoffload(dp, br); + err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); if (err) pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); @@ -409,13 +456,10 @@ err_lag_join: return err; } -int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag, - struct netlink_ext_ack *extack) +void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag) { if (dp->bridge_dev) - return dsa_port_pre_bridge_leave(dp, dp->bridge_dev, extack); - - return 0; + dsa_port_pre_bridge_leave(dp, dp->bridge_dev); } void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag) @@ -844,7 +888,6 @@ int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops) { - cpu_dp->filter = tag_ops->filter; cpu_dp->rcv = tag_ops->rcv; cpu_dp->tag_ops = tag_ops; } @@ -1217,3 +1260,31 @@ void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr) if (err) pr_err("DSA: failed to notify DSA_NOTIFIER_HSR_LEAVE\n"); } + +int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid) +{ + struct dsa_notifier_tag_8021q_vlan_info info = { + .tree_index = dp->ds->dst->index, + .sw_index = dp->ds->index, + .port = dp->index, + .vid = vid, + }; + + return dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info); +} + +void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid) +{ + struct dsa_notifier_tag_8021q_vlan_info info = { + .tree_index = dp->ds->dst->index, + .sw_index = dp->ds->index, + .port = dp->index, + .vid = vid, + }; + int err; + + err = dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info); + if (err) + pr_err("DSA: failed to notify tag_8021q VLAN deletion: %pe\n", + ERR_PTR(err)); +} diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 532085da8d8f..8c112d7d5b0a 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -2056,20 +2056,16 @@ static int dsa_slave_prechangeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct netlink_ext_ack *extack; - int err = 0; - - extack = netdev_notifier_info_to_extack(&info->info); if (netif_is_bridge_master(info->upper_dev) && !info->linking) - err = dsa_port_pre_bridge_leave(dp, info->upper_dev, extack); + dsa_port_pre_bridge_leave(dp, info->upper_dev); else if (netif_is_lag_master(info->upper_dev) && !info->linking) - err = dsa_port_pre_lag_leave(dp, info->upper_dev, extack); + dsa_port_pre_lag_leave(dp, info->upper_dev); /* dsa_port_pre_hsr_leave is not yet necessary since hsr cannot be * meaningfully enslaved to a bridge yet */ - return notifier_from_errno(err); + return NOTIFY_DONE; } static int @@ -2357,26 +2353,98 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) kfree(switchdev_work); } -static int dsa_lower_dev_walk(struct net_device *lower_dev, - struct netdev_nested_priv *priv) +static bool dsa_foreign_dev_check(const struct net_device *dev, + const struct net_device *foreign_dev) { - if (dsa_slave_dev_check(lower_dev)) { - priv->data = (void *)netdev_priv(lower_dev); - return 1; - } + const struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch_tree *dst = dp->ds->dst; - return 0; + if (netif_is_bridge_master(foreign_dev)) + return !dsa_tree_offloads_bridge(dst, foreign_dev); + + if (netif_is_bridge_port(foreign_dev)) + return !dsa_tree_offloads_bridge_port(dst, foreign_dev); + + /* Everything else is foreign */ + return true; } -static struct dsa_slave_priv *dsa_slave_dev_lower_find(struct net_device *dev) +static int dsa_slave_fdb_event(struct net_device *dev, + const struct net_device *orig_dev, + const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info, + unsigned long event) { - struct netdev_nested_priv priv = { - .data = NULL, - }; + struct dsa_switchdev_event_work *switchdev_work; + struct dsa_port *dp = dsa_slave_to_port(dev); + bool host_addr = fdb_info->is_local; + struct dsa_switch *ds = dp->ds; - netdev_walk_all_lower_dev_rcu(dev, dsa_lower_dev_walk, &priv); + if (ctx && ctx != dp) + return 0; + + if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del) + return -EOPNOTSUPP; + + if (dsa_slave_dev_check(orig_dev) && + switchdev_fdb_is_dynamically_learned(fdb_info)) + return 0; + + /* FDB entries learned by the software bridge should be installed as + * host addresses only if the driver requests assisted learning. + */ + if (switchdev_fdb_is_dynamically_learned(fdb_info) && + !ds->assisted_learning_on_cpu_port) + return 0; + + /* Also treat FDB entries on foreign interfaces bridged with us as host + * addresses. + */ + if (dsa_foreign_dev_check(dev, orig_dev)) + host_addr = true; + + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); + if (!switchdev_work) + return -ENOMEM; - return (struct dsa_slave_priv *)priv.data; + netdev_dbg(dev, "%s FDB entry towards %s, addr %pM vid %d%s\n", + event == SWITCHDEV_FDB_ADD_TO_DEVICE ? "Adding" : "Deleting", + orig_dev->name, fdb_info->addr, fdb_info->vid, + host_addr ? " as host address" : ""); + + INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work); + switchdev_work->ds = ds; + switchdev_work->port = dp->index; + switchdev_work->event = event; + switchdev_work->dev = dev; + + ether_addr_copy(switchdev_work->addr, fdb_info->addr); + switchdev_work->vid = fdb_info->vid; + switchdev_work->host_addr = host_addr; + + /* Hold a reference for dsa_fdb_offload_notify */ + dev_hold(dev); + dsa_schedule_work(&switchdev_work->work); + + return 0; +} + +static int +dsa_slave_fdb_add_to_device(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info) +{ + return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info, + SWITCHDEV_FDB_ADD_TO_DEVICE); +} + +static int +dsa_slave_fdb_del_to_device(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info) +{ + return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info, + SWITCHDEV_FDB_DEL_TO_DEVICE); } /* Called under rcu_read_lock() */ @@ -2384,10 +2452,6 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = switchdev_notifier_info_to_dev(ptr); - const struct switchdev_notifier_fdb_info *fdb_info; - struct dsa_switchdev_event_work *switchdev_work; - bool host_addr = false; - struct dsa_port *dp; int err; switch (event) { @@ -2397,92 +2461,19 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, dsa_slave_port_attr_set); return notifier_from_errno(err); case SWITCHDEV_FDB_ADD_TO_DEVICE: + err = switchdev_handle_fdb_add_to_device(dev, ptr, + dsa_slave_dev_check, + dsa_foreign_dev_check, + dsa_slave_fdb_add_to_device, + NULL); + return notifier_from_errno(err); case SWITCHDEV_FDB_DEL_TO_DEVICE: - fdb_info = ptr; - - if (dsa_slave_dev_check(dev)) { - dp = dsa_slave_to_port(dev); - - if (fdb_info->is_local) - host_addr = true; - else if (!fdb_info->added_by_user) - return NOTIFY_OK; - } else { - /* Snoop addresses added to foreign interfaces - * bridged with us, or the bridge - * itself. Dynamically learned addresses can - * also be added for switches that don't - * automatically learn SA from CPU-injected - * traffic. - */ - struct net_device *br_dev; - struct dsa_slave_priv *p; - - if (netif_is_bridge_master(dev)) - br_dev = dev; - else - br_dev = netdev_master_upper_dev_get_rcu(dev); - - if (!br_dev) - return NOTIFY_DONE; - - if (!netif_is_bridge_master(br_dev)) - return NOTIFY_DONE; - - p = dsa_slave_dev_lower_find(br_dev); - if (!p) - return NOTIFY_DONE; - - dp = p->dp; - host_addr = fdb_info->is_local; - - /* FDB entries learned by the software bridge should - * be installed as host addresses only if the driver - * requests assisted learning. - * On the other hand, FDB entries for local termination - * should always be installed. - */ - if (!fdb_info->added_by_user && !fdb_info->is_local && - !dp->ds->assisted_learning_on_cpu_port) - return NOTIFY_DONE; - - /* When the bridge learns an address on an offloaded - * LAG we don't want to send traffic to the CPU, the - * other ports bridged with the LAG should be able to - * autonomously forward towards it. - * On the other hand, if the address is local - * (therefore not learned) then we want to trap it to - * the CPU regardless of whether the interface it - * belongs to is offloaded or not. - */ - if (dsa_tree_offloads_bridge_port(dp->ds->dst, dev) && - !fdb_info->is_local) - return NOTIFY_DONE; - } - - if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del) - return NOTIFY_DONE; - - switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); - if (!switchdev_work) - return NOTIFY_BAD; - - INIT_WORK(&switchdev_work->work, - dsa_slave_switchdev_event_work); - switchdev_work->ds = dp->ds; - switchdev_work->port = dp->index; - switchdev_work->event = event; - switchdev_work->dev = dev; - - ether_addr_copy(switchdev_work->addr, - fdb_info->addr); - switchdev_work->vid = fdb_info->vid; - switchdev_work->host_addr = host_addr; - - /* Hold a reference for dsa_fdb_offload_notify */ - dev_hold(dev); - dsa_schedule_work(&switchdev_work->work); - break; + err = switchdev_handle_fdb_del_to_device(dev, ptr, + dsa_slave_dev_check, + dsa_foreign_dev_check, + dsa_slave_fdb_del_to_device, + NULL); + return notifier_from_errno(err); default: return NOTIFY_DONE; } diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 5ece05dfd8f2..fd1a1c6bf9cf 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -90,18 +90,25 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) { struct dsa_switch_tree *dst = ds->dst; + int err; if (dst->index == info->tree_index && ds->index == info->sw_index && - ds->ops->port_bridge_join) - return ds->ops->port_bridge_join(ds, info->port, info->br); + ds->ops->port_bridge_join) { + err = ds->ops->port_bridge_join(ds, info->port, info->br); + if (err) + return err; + } if ((dst->index != info->tree_index || ds->index != info->sw_index) && - ds->ops->crosschip_bridge_join) - return ds->ops->crosschip_bridge_join(ds, info->tree_index, - info->sw_index, - info->port, info->br); + ds->ops->crosschip_bridge_join) { + err = ds->ops->crosschip_bridge_join(ds, info->tree_index, + info->sw_index, + info->port, info->br); + if (err) + return err; + } - return 0; + return dsa_tag_8021q_bridge_join(ds, info); } static int dsa_switch_bridge_leave(struct dsa_switch *ds, @@ -151,7 +158,8 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, if (err && err != EOPNOTSUPP) return err; } - return 0; + + return dsa_tag_8021q_bridge_leave(ds, info); } /* Matches for all upstream-facing ports (the CPU port and all upstream-facing @@ -726,6 +734,12 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_MRP_DEL_RING_ROLE: err = dsa_switch_mrp_del_ring_role(ds, info); break; + case DSA_NOTIFIER_TAG_8021Q_VLAN_ADD: + err = dsa_switch_tag_8021q_vlan_add(ds, info); + break; + case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL: + err = dsa_switch_tag_8021q_vlan_del(ds, info); + break; default: err = -EOPNOTSUPP; break; diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 4aa29f90ecea..654697ebb6f3 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -17,7 +17,7 @@ * * | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | * +-----------+-----+-----------------+-----------+-----------------------+ - * | DIR | SVL | SWITCH_ID | SUBVLAN | PORT | + * | DIR | VBID| SWITCH_ID | VBID | PORT | * +-----------+-----+-----------------+-----------+-----------------------+ * * DIR - VID[11:10]: @@ -27,24 +27,14 @@ * These values make the special VIDs of 0, 1 and 4095 to be left * unused by this coding scheme. * - * SVL/SUBVLAN - { VID[9], VID[5:4] }: - * Sub-VLAN encoding. Valid only when DIR indicates an RX VLAN. - * * 0 (0b000): Field does not encode a sub-VLAN, either because - * received traffic is untagged, PVID-tagged or because a second - * VLAN tag is present after this tag and not inside of it. - * * 1 (0b001): Received traffic is tagged with a VID value private - * to the host. This field encodes the index in the host's lookup - * table through which the value of the ingress VLAN ID can be - * recovered. - * * 2 (0b010): Field encodes a sub-VLAN. - * ... - * * 7 (0b111): Field encodes a sub-VLAN. - * When DIR indicates a TX VLAN, SUBVLAN must be transmitted as zero - * (by the host) and ignored on receive (by the switch). - * * SWITCH_ID - VID[8:6]: * Index of switch within DSA tree. Must be between 0 and 7. * + * VBID - { VID[9], VID[5:4] }: + * Virtual bridge ID. If between 1 and 7, packet targets the broadcast + * domain of a bridge. If transmitted as zero, packet targets a single + * port. Field only valid on transmit, must be ignored on receive. + * * PORT - VID[3:0]: * Index of switch port. Must be between 0 and 15. */ @@ -61,23 +51,30 @@ #define DSA_8021Q_SWITCH_ID(x) (((x) << DSA_8021Q_SWITCH_ID_SHIFT) & \ DSA_8021Q_SWITCH_ID_MASK) -#define DSA_8021Q_SUBVLAN_HI_SHIFT 9 -#define DSA_8021Q_SUBVLAN_HI_MASK GENMASK(9, 9) -#define DSA_8021Q_SUBVLAN_LO_SHIFT 4 -#define DSA_8021Q_SUBVLAN_LO_MASK GENMASK(5, 4) -#define DSA_8021Q_SUBVLAN_HI(x) (((x) & GENMASK(2, 2)) >> 2) -#define DSA_8021Q_SUBVLAN_LO(x) ((x) & GENMASK(1, 0)) -#define DSA_8021Q_SUBVLAN(x) \ - (((DSA_8021Q_SUBVLAN_LO(x) << DSA_8021Q_SUBVLAN_LO_SHIFT) & \ - DSA_8021Q_SUBVLAN_LO_MASK) | \ - ((DSA_8021Q_SUBVLAN_HI(x) << DSA_8021Q_SUBVLAN_HI_SHIFT) & \ - DSA_8021Q_SUBVLAN_HI_MASK)) +#define DSA_8021Q_VBID_HI_SHIFT 9 +#define DSA_8021Q_VBID_HI_MASK GENMASK(9, 9) +#define DSA_8021Q_VBID_LO_SHIFT 4 +#define DSA_8021Q_VBID_LO_MASK GENMASK(5, 4) +#define DSA_8021Q_VBID_HI(x) (((x) & GENMASK(2, 2)) >> 2) +#define DSA_8021Q_VBID_LO(x) ((x) & GENMASK(1, 0)) +#define DSA_8021Q_VBID(x) \ + (((DSA_8021Q_VBID_LO(x) << DSA_8021Q_VBID_LO_SHIFT) & \ + DSA_8021Q_VBID_LO_MASK) | \ + ((DSA_8021Q_VBID_HI(x) << DSA_8021Q_VBID_HI_SHIFT) & \ + DSA_8021Q_VBID_HI_MASK)) #define DSA_8021Q_PORT_SHIFT 0 #define DSA_8021Q_PORT_MASK GENMASK(3, 0) #define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \ DSA_8021Q_PORT_MASK) +u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num) +{ + /* The VBID value of 0 is reserved for precise TX */ + return DSA_8021Q_DIR_TX | DSA_8021Q_VBID(bridge_num + 1); +} +EXPORT_SYMBOL_GPL(dsa_8021q_bridge_tx_fwd_offload_vid); + /* Returns the VID to be inserted into the frame from xmit for switch steering * instructions on egress. Encodes switch ID and port ID. */ @@ -98,13 +95,6 @@ u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port) } EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid); -u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan) -{ - return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) | - DSA_8021Q_PORT(port) | DSA_8021Q_SUBVLAN(subvlan); -} -EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid_subvlan); - /* Returns the decoded switch ID from the RX VID. */ int dsa_8021q_rx_switch_id(u16 vid) { @@ -119,20 +109,6 @@ int dsa_8021q_rx_source_port(u16 vid) } EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port); -/* Returns the decoded subvlan from the RX VID. */ -u16 dsa_8021q_rx_subvlan(u16 vid) -{ - u16 svl_hi, svl_lo; - - svl_hi = (vid & DSA_8021Q_SUBVLAN_HI_MASK) >> - DSA_8021Q_SUBVLAN_HI_SHIFT; - svl_lo = (vid & DSA_8021Q_SUBVLAN_LO_MASK) >> - DSA_8021Q_SUBVLAN_LO_SHIFT; - - return (svl_hi << 2) | svl_lo; -} -EXPORT_SYMBOL_GPL(dsa_8021q_rx_subvlan); - bool vid_is_dsa_8021q_rxvlan(u16 vid) { return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX; @@ -151,21 +127,152 @@ bool vid_is_dsa_8021q(u16 vid) } EXPORT_SYMBOL_GPL(vid_is_dsa_8021q); -/* If @enabled is true, installs @vid with @flags into the switch port's HW - * filter. - * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the - * user explicitly configured this @vid through the bridge core, then the @vid - * is installed again, but this time with the flags from the bridge layer. - */ -static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid, - u16 flags, bool enabled) +static struct dsa_tag_8021q_vlan * +dsa_tag_8021q_vlan_find(struct dsa_8021q_context *ctx, int port, u16 vid) { - struct dsa_port *dp = dsa_to_port(ctx->ds, port); + struct dsa_tag_8021q_vlan *v; - if (enabled) - return ctx->ops->vlan_add(ctx->ds, dp->index, vid, flags); + list_for_each_entry(v, &ctx->vlans, list) + if (v->vid == vid && v->port == port) + return v; - return ctx->ops->vlan_del(ctx->ds, dp->index, vid); + return NULL; +} + +static int dsa_switch_do_tag_8021q_vlan_add(struct dsa_switch *ds, int port, + u16 vid, u16 flags) +{ + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_tag_8021q_vlan *v; + int err; + + /* No need to bother with refcounting for user ports */ + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) + return ds->ops->tag_8021q_vlan_add(ds, port, vid, flags); + + v = dsa_tag_8021q_vlan_find(ctx, port, vid); + if (v) { + refcount_inc(&v->refcount); + return 0; + } + + v = kzalloc(sizeof(*v), GFP_KERNEL); + if (!v) + return -ENOMEM; + + err = ds->ops->tag_8021q_vlan_add(ds, port, vid, flags); + if (err) { + kfree(v); + return err; + } + + v->vid = vid; + v->port = port; + refcount_set(&v->refcount, 1); + list_add_tail(&v->list, &ctx->vlans); + + return 0; +} + +static int dsa_switch_do_tag_8021q_vlan_del(struct dsa_switch *ds, int port, + u16 vid) +{ + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_tag_8021q_vlan *v; + int err; + + /* No need to bother with refcounting for user ports */ + if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))) + return ds->ops->tag_8021q_vlan_del(ds, port, vid); + + v = dsa_tag_8021q_vlan_find(ctx, port, vid); + if (!v) + return -ENOENT; + + if (!refcount_dec_and_test(&v->refcount)) + return 0; + + err = ds->ops->tag_8021q_vlan_del(ds, port, vid); + if (err) { + refcount_inc(&v->refcount); + return err; + } + + list_del(&v->list); + kfree(v); + + return 0; +} + +static bool +dsa_switch_tag_8021q_vlan_match(struct dsa_switch *ds, int port, + struct dsa_notifier_tag_8021q_vlan_info *info) +{ + if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) + return true; + + if (ds->dst->index == info->tree_index && ds->index == info->sw_index) + return port == info->port; + + return false; +} + +int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info) +{ + int port, err; + + /* Since we use dsa_broadcast(), there might be other switches in other + * trees which don't support tag_8021q, so don't return an error. + * Or they might even support tag_8021q but have not registered yet to + * use it (maybe they use another tagger currently). + */ + if (!ds->ops->tag_8021q_vlan_add || !ds->tag_8021q_ctx) + return 0; + + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) { + u16 flags = 0; + + if (dsa_is_user_port(ds, port)) + flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (vid_is_dsa_8021q_rxvlan(info->vid) && + dsa_8021q_rx_switch_id(info->vid) == ds->index && + dsa_8021q_rx_source_port(info->vid) == port) + flags |= BRIDGE_VLAN_INFO_PVID; + + err = dsa_switch_do_tag_8021q_vlan_add(ds, port, + info->vid, + flags); + if (err) + return err; + } + } + + return 0; +} + +int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info) +{ + int port, err; + + if (!ds->ops->tag_8021q_vlan_del || !ds->tag_8021q_ctx) + return 0; + + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) { + err = dsa_switch_do_tag_8021q_vlan_del(ds, port, + info->vid); + if (err) + return err; + } + } + + return 0; } /* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single @@ -181,12 +288,6 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid, * force all switched traffic to pass through the CPU. So we must also make * the other front-panel ports members of this VID we're adding, albeit * we're not making it their PVID (they'll still have their own). - * By the way - just because we're installing the same VID in multiple - * switch ports doesn't mean that they'll start to talk to one another, even - * while not bridged: the final forwarding decision is still an AND between - * the L2 forwarding information (which is limiting forwarding in this case) - * and the VLAN-based restrictions (of which there are none in this case, - * since all ports are members). * - On TX (ingress from CPU and towards network) we are faced with a problem. * If we were to tag traffic (from within DSA) with the port's pvid, all * would be well, assuming the switch ports were standalone. Frames would @@ -200,9 +301,10 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid, * a member of the VID we're tagging the traffic with - the desired one. * * So at the end, each front-panel port will have one RX VID (also the PVID), - * the RX VID of all other front-panel ports, and one TX VID. Whereas the CPU - * port will have the RX and TX VIDs of all front-panel ports, and on top of - * that, is also tagged-input and tagged-output (VLAN trunk). + * the RX VID of all other front-panel ports that are in the same bridge, and + * one TX VID. Whereas the CPU port will have the RX and TX VIDs of all + * front-panel ports, and on top of that, is also tagged-input and + * tagged-output (VLAN trunk). * * CPU port CPU port * +-------------+-----+-------------+ +-------------+-----+-------------+ @@ -220,246 +322,245 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid, * +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+ * swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3 */ -static int dsa_8021q_setup_port(struct dsa_8021q_context *ctx, int port, - bool enabled) +static bool dsa_tag_8021q_bridge_match(struct dsa_switch *ds, int port, + struct dsa_notifier_bridge_info *info) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + + /* Don't match on self */ + if (ds->dst->index == info->tree_index && + ds->index == info->sw_index && + port == info->port) + return false; + + if (dsa_port_is_user(dp)) + return dp->bridge_dev == info->br; + + return false; +} + +int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) +{ + struct dsa_switch *targeted_ds; + struct dsa_port *targeted_dp; + u16 targeted_rx_vid; + int err, port; + + if (!ds->tag_8021q_ctx) + return 0; + + targeted_ds = dsa_switch_find(info->tree_index, info->sw_index); + targeted_dp = dsa_to_port(targeted_ds, info->port); + targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port); + + for (port = 0; port < ds->num_ports; port++) { + struct dsa_port *dp = dsa_to_port(ds, port); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + + if (!dsa_tag_8021q_bridge_match(ds, port, info)) + continue; + + /* Install the RX VID of the targeted port in our VLAN table */ + err = dsa_port_tag_8021q_vlan_add(dp, targeted_rx_vid); + if (err) + return err; + + /* Install our RX VID into the targeted port's VLAN table */ + err = dsa_port_tag_8021q_vlan_add(targeted_dp, rx_vid); + if (err) + return err; + } + + return 0; +} + +int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) { - int upstream = dsa_upstream_port(ctx->ds, port); - u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port); - u16 tx_vid = dsa_8021q_tx_vid(ctx->ds, port); + struct dsa_switch *targeted_ds; + struct dsa_port *targeted_dp; + u16 targeted_rx_vid; + int port; + + if (!ds->tag_8021q_ctx) + return 0; + + targeted_ds = dsa_switch_find(info->tree_index, info->sw_index); + targeted_dp = dsa_to_port(targeted_ds, info->port); + targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port); + + for (port = 0; port < ds->num_ports; port++) { + struct dsa_port *dp = dsa_to_port(ds, port); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + + if (!dsa_tag_8021q_bridge_match(ds, port, info)) + continue; + + /* Remove the RX VID of the targeted port from our VLAN table */ + dsa_port_tag_8021q_vlan_del(dp, targeted_rx_vid); + + /* Remove our RX VID from the targeted port's VLAN table */ + dsa_port_tag_8021q_vlan_del(targeted_dp, rx_vid); + } + + return 0; +} + +int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port, + struct net_device *br, + int bridge_num) +{ + u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num); + + return dsa_port_tag_8021q_vlan_add(dsa_to_port(ds, port), tx_vid); +} +EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_offload); + +void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port, + struct net_device *br, + int bridge_num) +{ + u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num); + + dsa_port_tag_8021q_vlan_del(dsa_to_port(ds, port), tx_vid); +} +EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_unoffload); + +/* Set up a port's tag_8021q RX and TX VLAN for standalone mode operation */ +static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) +{ + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_port *dp = dsa_to_port(ds, port); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + u16 tx_vid = dsa_8021q_tx_vid(ds, port); struct net_device *master; - int i, err, subvlan; + int err; /* The CPU port is implicitly configured by * configuring the front-panel ports */ - if (!dsa_is_user_port(ctx->ds, port)) + if (!dsa_port_is_user(dp)) return 0; - master = dsa_to_port(ctx->ds, port)->cpu_dp->master; + master = dp->cpu_dp->master; /* Add this user port's RX VID to the membership list of all others * (including itself). This is so that bridging will not be hindered. * L2 forwarding rules still take precedence when there are no VLAN * restrictions, so there are no concerns about leaking traffic. */ - for (i = 0; i < ctx->ds->num_ports; i++) { - u16 flags; - - if (i == upstream) - continue; - else if (i == port) - /* The RX VID is pvid on this port */ - flags = BRIDGE_VLAN_INFO_UNTAGGED | - BRIDGE_VLAN_INFO_PVID; - else - /* The RX VID is a regular VLAN on all others */ - flags = BRIDGE_VLAN_INFO_UNTAGGED; - - err = dsa_8021q_vid_apply(ctx, i, rx_vid, flags, enabled); - if (err) { - dev_err(ctx->ds->dev, - "Failed to apply RX VID %d to port %d: %d\n", - rx_vid, port, err); - return err; - } - } - - /* CPU port needs to see this port's RX VID - * as tagged egress. - */ - err = dsa_8021q_vid_apply(ctx, upstream, rx_vid, 0, enabled); + err = dsa_port_tag_8021q_vlan_add(dp, rx_vid); if (err) { - dev_err(ctx->ds->dev, - "Failed to apply RX VID %d to port %d: %d\n", - rx_vid, port, err); + dev_err(ds->dev, + "Failed to apply RX VID %d to port %d: %pe\n", + rx_vid, port, ERR_PTR(err)); return err; } - /* Add to the master's RX filter not only @rx_vid, but in fact - * the entire subvlan range, just in case this DSA switch might - * want to use sub-VLANs. - */ - for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) { - u16 vid = dsa_8021q_rx_vid_subvlan(ctx->ds, port, subvlan); - - if (enabled) - vlan_vid_add(master, ctx->proto, vid); - else - vlan_vid_del(master, ctx->proto, vid); - } + /* Add @rx_vid to the master's RX filter. */ + vlan_vid_add(master, ctx->proto, rx_vid); /* Finally apply the TX VID on this port and on the CPU port */ - err = dsa_8021q_vid_apply(ctx, port, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED, - enabled); - if (err) { - dev_err(ctx->ds->dev, - "Failed to apply TX VID %d on port %d: %d\n", - tx_vid, port, err); - return err; - } - err = dsa_8021q_vid_apply(ctx, upstream, tx_vid, 0, enabled); + err = dsa_port_tag_8021q_vlan_add(dp, tx_vid); if (err) { - dev_err(ctx->ds->dev, - "Failed to apply TX VID %d on port %d: %d\n", - tx_vid, upstream, err); + dev_err(ds->dev, + "Failed to apply TX VID %d on port %d: %pe\n", + tx_vid, port, ERR_PTR(err)); return err; } return err; } -int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled) +static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) { - int rc, port; + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_port *dp = dsa_to_port(ds, port); + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + u16 tx_vid = dsa_8021q_tx_vid(ds, port); + struct net_device *master; - ASSERT_RTNL(); + /* The CPU port is implicitly configured by + * configuring the front-panel ports + */ + if (!dsa_port_is_user(dp)) + return; - for (port = 0; port < ctx->ds->num_ports; port++) { - rc = dsa_8021q_setup_port(ctx, port, enabled); - if (rc < 0) { - dev_err(ctx->ds->dev, - "Failed to setup VLAN tagging for port %d: %d\n", - port, rc); - return rc; - } - } + master = dp->cpu_dp->master; - return 0; -} -EXPORT_SYMBOL_GPL(dsa_8021q_setup); + dsa_port_tag_8021q_vlan_del(dp, rx_vid); -static int dsa_8021q_crosschip_link_apply(struct dsa_8021q_context *ctx, - int port, - struct dsa_8021q_context *other_ctx, - int other_port, bool enabled) -{ - u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port); + vlan_vid_del(master, ctx->proto, rx_vid); - /* @rx_vid of local @ds port @port goes to @other_port of - * @other_ds - */ - return dsa_8021q_vid_apply(other_ctx, other_port, rx_vid, - BRIDGE_VLAN_INFO_UNTAGGED, enabled); + dsa_port_tag_8021q_vlan_del(dp, tx_vid); } -static int dsa_8021q_crosschip_link_add(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, - int other_port) +static int dsa_tag_8021q_setup(struct dsa_switch *ds) { - struct dsa_8021q_crosschip_link *c; + int err, port; + + ASSERT_RTNL(); - list_for_each_entry(c, &ctx->crosschip_links, list) { - if (c->port == port && c->other_ctx == other_ctx && - c->other_port == other_port) { - refcount_inc(&c->refcount); - return 0; + for (port = 0; port < ds->num_ports; port++) { + err = dsa_tag_8021q_port_setup(ds, port); + if (err < 0) { + dev_err(ds->dev, + "Failed to setup VLAN tagging for port %d: %pe\n", + port, ERR_PTR(err)); + return err; } } - dev_dbg(ctx->ds->dev, - "adding crosschip link from port %d to %s port %d\n", - port, dev_name(other_ctx->ds->dev), other_port); - - c = kzalloc(sizeof(*c), GFP_KERNEL); - if (!c) - return -ENOMEM; - - c->port = port; - c->other_ctx = other_ctx; - c->other_port = other_port; - refcount_set(&c->refcount, 1); - - list_add(&c->list, &ctx->crosschip_links); - return 0; } -static void dsa_8021q_crosschip_link_del(struct dsa_8021q_context *ctx, - struct dsa_8021q_crosschip_link *c, - bool *keep) +static void dsa_tag_8021q_teardown(struct dsa_switch *ds) { - *keep = !refcount_dec_and_test(&c->refcount); + int port; - if (*keep) - return; - - dev_dbg(ctx->ds->dev, - "deleting crosschip link from port %d to %s port %d\n", - c->port, dev_name(c->other_ctx->ds->dev), c->other_port); + ASSERT_RTNL(); - list_del(&c->list); - kfree(c); + for (port = 0; port < ds->num_ports; port++) + dsa_tag_8021q_port_teardown(ds, port); } -/* Make traffic from local port @port be received by remote port @other_port. - * This means that our @rx_vid needs to be installed on @other_ds's upstream - * and user ports. The user ports should be egress-untagged so that they can - * pop the dsa_8021q VLAN. But the @other_upstream can be either egress-tagged - * or untagged: it doesn't matter, since it should never egress a frame having - * our @rx_vid. - */ -int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, - int other_port) +int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto) { - /* @other_upstream is how @other_ds reaches us. If we are part - * of disjoint trees, then we are probably connected through - * our CPU ports. If we're part of the same tree though, we should - * probably use dsa_towards_port. - */ - int other_upstream = dsa_upstream_port(other_ctx->ds, other_port); - int rc; + struct dsa_8021q_context *ctx; - rc = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_port); - if (rc) - return rc; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; - rc = dsa_8021q_crosschip_link_apply(ctx, port, other_ctx, - other_port, true); - if (rc) - return rc; + ctx->proto = proto; + ctx->ds = ds; - rc = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_upstream); - if (rc) - return rc; + INIT_LIST_HEAD(&ctx->vlans); - return dsa_8021q_crosschip_link_apply(ctx, port, other_ctx, - other_upstream, true); + ds->tag_8021q_ctx = ctx; + + return dsa_tag_8021q_setup(ds); } -EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_join); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_register); -int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port, - struct dsa_8021q_context *other_ctx, - int other_port) +void dsa_tag_8021q_unregister(struct dsa_switch *ds) { - int other_upstream = dsa_upstream_port(other_ctx->ds, other_port); - struct dsa_8021q_crosschip_link *c, *n; - - list_for_each_entry_safe(c, n, &ctx->crosschip_links, list) { - if (c->port == port && c->other_ctx == other_ctx && - (c->other_port == other_port || - c->other_port == other_upstream)) { - struct dsa_8021q_context *other_ctx = c->other_ctx; - int other_port = c->other_port; - bool keep; - int rc; - - dsa_8021q_crosschip_link_del(ctx, c, &keep); - if (keep) - continue; - - rc = dsa_8021q_crosschip_link_apply(ctx, port, - other_ctx, - other_port, - false); - if (rc) - return rc; - } + struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; + struct dsa_tag_8021q_vlan *v, *n; + + dsa_tag_8021q_teardown(ds); + + list_for_each_entry_safe(v, n, &ctx->vlans, list) { + list_del(&v->list); + kfree(v); } - return 0; + ds->tag_8021q_ctx = NULL; + + kfree(ctx); } -EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_leave); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_unregister); struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci) @@ -471,8 +572,7 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, } EXPORT_SYMBOL_GPL(dsa_8021q_xmit); -void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, - int *subvlan) +void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id) { u16 vid, tci; @@ -489,9 +589,6 @@ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, *source_port = dsa_8021q_rx_source_port(vid); *switch_id = dsa_8021q_rx_switch_id(vid); - *subvlan = dsa_8021q_rx_subvlan(vid); skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } EXPORT_SYMBOL_GPL(dsa_8021q_rcv); - -MODULE_LICENSE("GPL v2"); diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index a822355afc90..0f258218c8cf 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -126,7 +126,42 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, u8 extra) { struct dsa_port *dp = dsa_slave_to_port(dev); + u8 tag_dev, tag_port; + enum dsa_cmd cmd; u8 *dsa_header; + u16 pvid = 0; + int err; + + if (skb->offload_fwd_mark) { + struct dsa_switch_tree *dst = dp->ds->dst; + struct net_device *br = dp->bridge_dev; + + cmd = DSA_CMD_FORWARD; + + /* When offloading forwarding for a bridge, inject FORWARD + * packets on behalf of a virtual switch device with an index + * past the physical switches. + */ + tag_dev = dst->last_switch + 1 + dp->bridge_num; + tag_port = 0; + + /* If we are offloading forwarding for a VLAN-unaware bridge, + * inject packets to hardware using the bridge's pvid, since + * that's where the packets ingressed from. + */ + if (!br_vlan_enabled(br)) { + /* Safe because __dev_queue_xmit() runs under + * rcu_read_lock_bh() + */ + err = br_vlan_get_pvid_rcu(br, &pvid); + if (err) + return NULL; + } + } else { + cmd = DSA_CMD_FROM_CPU; + tag_dev = dp->ds->index; + tag_port = dp->index; + } if (skb->protocol == htons(ETH_P_8021Q)) { if (extra) { @@ -134,10 +169,10 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, memmove(skb->data, skb->data + extra, 2 * ETH_ALEN); } - /* Construct tagged FROM_CPU DSA tag from 802.1Q tag. */ + /* Construct tagged DSA tag from 802.1Q tag. */ dsa_header = skb->data + 2 * ETH_ALEN + extra; - dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | 0x20 | dp->ds->index; - dsa_header[1] = dp->index << 3; + dsa_header[0] = (cmd << 6) | 0x20 | tag_dev; + dsa_header[1] = tag_port << 3; /* Move CFI field from byte 2 to byte 1. */ if (dsa_header[2] & 0x10) { @@ -148,12 +183,13 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, skb_push(skb, DSA_HLEN + extra); memmove(skb->data, skb->data + DSA_HLEN + extra, 2 * ETH_ALEN); - /* Construct untagged FROM_CPU DSA tag. */ + /* Construct untagged DSA tag. */ dsa_header = skb->data + 2 * ETH_ALEN + extra; - dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | dp->ds->index; - dsa_header[1] = dp->index << 3; - dsa_header[2] = 0x00; - dsa_header[3] = 0x00; + + dsa_header[0] = (cmd << 6) | tag_dev; + dsa_header[1] = tag_port << 3; + dsa_header[2] = pvid >> 8; + dsa_header[3] = pvid & 0xff; } return skb; diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 85ac85c3af8c..d0781b058610 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -41,9 +41,9 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, struct net_device *netdev, struct packet_type *pt) { - int src_port, switch_id, subvlan; + int src_port, switch_id; - dsa_8021q_rcv(skb, &src_port, &switch_id, &subvlan); + dsa_8021q_rcv(skb, &src_port, &switch_id); skb->dev = dsa_master_find_slave(netdev, switch_id, src_port); if (!skb->dev) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 9c2df9ece01b..cddee4b499d8 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -115,40 +115,6 @@ static inline bool sja1105_is_meta_frame(const struct sk_buff *skb) return true; } -static bool sja1105_can_use_vlan_as_tags(const struct sk_buff *skb) -{ - struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); - u16 vlan_tci; - - if (hdr->h_vlan_proto == htons(ETH_P_SJA1105)) - return true; - - if (hdr->h_vlan_proto != htons(ETH_P_8021Q) && - !skb_vlan_tag_present(skb)) - return false; - - if (skb_vlan_tag_present(skb)) - vlan_tci = skb_vlan_tag_get(skb); - else - vlan_tci = ntohs(hdr->h_vlan_TCI); - - return vid_is_dsa_8021q(vlan_tci & VLAN_VID_MASK); -} - -/* This is the first time the tagger sees the frame on RX. - * Figure out if we can decode it. - */ -static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev) -{ - if (sja1105_can_use_vlan_as_tags(skb)) - return true; - if (sja1105_is_link_local(skb)) - return true; - if (sja1105_is_meta_frame(skb)) - return true; - return false; -} - /* Calls sja1105_port_deferred_xmit in sja1105_main.c */ static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp, struct sk_buff *skb) @@ -167,6 +133,31 @@ static u16 sja1105_xmit_tpid(struct sja1105_port *sp) return sp->xmit_tpid; } +static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct dsa_port *dp = dsa_slave_to_port(netdev); + struct net_device *br = dp->bridge_dev; + u16 tx_vid; + + /* If the port is under a VLAN-aware bridge, just slide the + * VLAN-tagged packet into the FDB and hope for the best. + * This works because we support a single VLAN-aware bridge + * across the entire dst, and its VLANs cannot be shared with + * any standalone port. + */ + if (br_vlan_enabled(br)) + return skb; + + /* If the port is under a VLAN-unaware bridge, use an imprecise + * TX VLAN that targets the bridge's entire broadcast domain, + * instead of just the specific port. + */ + tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(dp->bridge_num); + + return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv), tx_vid); +} + static struct sk_buff *sja1105_xmit(struct sk_buff *skb, struct net_device *netdev) { @@ -175,6 +166,9 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb, u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); + if (skb->offload_fwd_mark) + return sja1105_imprecise_xmit(skb, netdev); + /* Transmitting management traffic does not rely upon switch tagging, * but instead SPI-installed management routes. Part 2 of this * is the .port_deferred_xmit driver callback. @@ -199,6 +193,9 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb, __be16 *tx_header; int trailer_pos; + if (skb->offload_fwd_mark) + return sja1105_imprecise_xmit(skb, netdev); + /* Transmitting control packets is done using in-band control * extensions, while data packets are transmitted using * tag_8021q TX VLANs. @@ -358,20 +355,6 @@ static struct sk_buff return skb; } -static void sja1105_decode_subvlan(struct sk_buff *skb, u16 subvlan) -{ - struct dsa_port *dp = dsa_slave_to_port(skb->dev); - struct sja1105_port *sp = dp->priv; - u16 vid = sp->subvlan_map[subvlan]; - u16 vlan_tci; - - if (vid == VLAN_N_VID) - return; - - vlan_tci = (skb->priority << VLAN_PRIO_SHIFT) | vid; - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); -} - static bool sja1105_skb_has_tag_8021q(const struct sk_buff *skb) { u16 tpid = ntohs(eth_hdr(skb)->h_proto); @@ -385,15 +368,42 @@ static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb) return ntohs(eth_hdr(skb)->h_proto) == ETH_P_SJA1110; } +/* Returns true for imprecise RX and sets the @vid. + * Returns false for precise RX and sets @source_port and @switch_id. + */ +static bool sja1105_vlan_rcv(struct sk_buff *skb, int *source_port, + int *switch_id, u16 *vid) +{ + struct vlan_ethhdr *hdr = (struct vlan_ethhdr *)skb_mac_header(skb); + u16 vlan_tci; + + if (skb_vlan_tag_present(skb)) + vlan_tci = skb_vlan_tag_get(skb); + else + vlan_tci = ntohs(hdr->h_vlan_TCI); + + if (vid_is_dsa_8021q_rxvlan(vlan_tci & VLAN_VID_MASK)) { + dsa_8021q_rcv(skb, source_port, switch_id); + return false; + } + + /* Try our best with imprecise RX */ + *vid = vlan_tci & VLAN_VID_MASK; + + return true; +} + static struct sk_buff *sja1105_rcv(struct sk_buff *skb, struct net_device *netdev, struct packet_type *pt) { - int source_port, switch_id, subvlan = 0; + int source_port = -1, switch_id = -1; struct sja1105_meta meta = {0}; + bool imprecise_rx = false; struct ethhdr *hdr; bool is_link_local; bool is_meta; + u16 vid; hdr = eth_hdr(skb); is_link_local = sja1105_is_link_local(skb); @@ -403,7 +413,8 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, if (sja1105_skb_has_tag_8021q(skb)) { /* Normal traffic path. */ - dsa_8021q_rcv(skb, &source_port, &switch_id, &subvlan); + imprecise_rx = sja1105_vlan_rcv(skb, &source_port, &switch_id, + &vid); } else if (is_link_local) { /* Management traffic path. Switch embeds the switch ID and * port ID into bytes of the destination MAC, courtesy of @@ -422,15 +433,15 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, return NULL; } - skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + if (imprecise_rx) + skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); + else + skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); if (!skb->dev) { netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; } - if (subvlan) - sja1105_decode_subvlan(skb, subvlan); - return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local, is_meta); } @@ -538,7 +549,9 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb, struct net_device *netdev, struct packet_type *pt) { - int source_port = -1, switch_id = -1, subvlan = 0; + int source_port = -1, switch_id = -1; + bool imprecise_rx = false; + u16 vid; skb->offload_fwd_mark = 1; @@ -551,19 +564,18 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb, /* Packets with in-band control extensions might still have RX VLANs */ if (likely(sja1105_skb_has_tag_8021q(skb))) - dsa_8021q_rcv(skb, &source_port, &switch_id, &subvlan); + imprecise_rx = sja1105_vlan_rcv(skb, &source_port, &switch_id, + &vid); - skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + if (imprecise_rx) + skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); + else + skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); if (!skb->dev) { - netdev_warn(netdev, - "Couldn't decode source port %d and switch id %d\n", - source_port, switch_id); + netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; } - if (subvlan) - sja1105_decode_subvlan(skb, subvlan); - return skb; } @@ -596,7 +608,6 @@ static const struct dsa_device_ops sja1105_netdev_ops = { .proto = DSA_TAG_PROTO_SJA1105, .xmit = sja1105_xmit, .rcv = sja1105_rcv, - .filter = sja1105_filter, .needed_headroom = VLAN_HLEN, .flow_dissect = sja1105_flow_dissect, .promisc_on_master = true, @@ -610,7 +621,6 @@ static const struct dsa_device_ops sja1110_netdev_ops = { .proto = DSA_TAG_PROTO_SJA1110, .xmit = sja1110_xmit, .rcv = sja1110_rcv, - .filter = sja1105_filter, .flow_dissect = sja1110_flow_dissect, .needed_headroom = SJA1110_HEADER_LEN + VLAN_HLEN, .needed_tailroom = SJA1110_RX_TRAILER_LEN + SJA1110_MAX_PADDING_LEN, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9cce612e8976..171ba75b74c9 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -182,12 +182,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * at all, so we check here whether one of those tagging * variants has been configured on the receiving interface, * and if so, set skb->protocol without looking at the packet. - * The DSA tagging protocol may be able to decode some but not all - * traffic (for example only for management). In that case give it the - * option to filter the packets from which it can decode source port - * information. */ - if (unlikely(netdev_uses_dsa(dev)) && dsa_can_decode(skb, dev)) + if (unlikely(netdev_uses_dsa(dev))) return htons(ETH_P_XDSA); if (likely(eth_proto_is_802_3(eth->h_proto))) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index baa5d10043cb..6134b180f59f 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -7,6 +7,7 @@ * the information ethtool needs. */ +#include <linux/compat.h> #include <linux/module.h> #include <linux/types.h> #include <linux/capability.h> @@ -807,6 +808,120 @@ out: return ret; } +static noinline_for_stack int +ethtool_rxnfc_copy_from_compat(struct ethtool_rxnfc *rxnfc, + const struct compat_ethtool_rxnfc __user *useraddr, + size_t size) +{ + struct compat_ethtool_rxnfc crxnfc = {}; + + /* We expect there to be holes between fs.m_ext and + * fs.ring_cookie and at the end of fs, but nowhere else. + * On non-x86, no conversion should be needed. + */ + BUILD_BUG_ON(!IS_ENABLED(CONFIG_X86_64) && + sizeof(struct compat_ethtool_rxnfc) != + sizeof(struct ethtool_rxnfc)); + BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) + + sizeof(useraddr->fs.m_ext) != + offsetof(struct ethtool_rxnfc, fs.m_ext) + + sizeof(rxnfc->fs.m_ext)); + BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.location) - + offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != + offsetof(struct ethtool_rxnfc, fs.location) - + offsetof(struct ethtool_rxnfc, fs.ring_cookie)); + + if (copy_from_user(&crxnfc, useraddr, min(size, sizeof(crxnfc)))) + return -EFAULT; + + *rxnfc = (struct ethtool_rxnfc) { + .cmd = crxnfc.cmd, + .flow_type = crxnfc.flow_type, + .data = crxnfc.data, + .fs = { + .flow_type = crxnfc.fs.flow_type, + .h_u = crxnfc.fs.h_u, + .h_ext = crxnfc.fs.h_ext, + .m_u = crxnfc.fs.m_u, + .m_ext = crxnfc.fs.m_ext, + .ring_cookie = crxnfc.fs.ring_cookie, + .location = crxnfc.fs.location, + }, + .rule_cnt = crxnfc.rule_cnt, + }; + + return 0; +} + +static int ethtool_rxnfc_copy_from_user(struct ethtool_rxnfc *rxnfc, + const void __user *useraddr, + size_t size) +{ + if (compat_need_64bit_alignment_fixup()) + return ethtool_rxnfc_copy_from_compat(rxnfc, useraddr, size); + + if (copy_from_user(rxnfc, useraddr, size)) + return -EFAULT; + + return 0; +} + +static int ethtool_rxnfc_copy_to_compat(void __user *useraddr, + const struct ethtool_rxnfc *rxnfc, + size_t size, const u32 *rule_buf) +{ + struct compat_ethtool_rxnfc crxnfc; + + memset(&crxnfc, 0, sizeof(crxnfc)); + crxnfc = (struct compat_ethtool_rxnfc) { + .cmd = rxnfc->cmd, + .flow_type = rxnfc->flow_type, + .data = rxnfc->data, + .fs = { + .flow_type = rxnfc->fs.flow_type, + .h_u = rxnfc->fs.h_u, + .h_ext = rxnfc->fs.h_ext, + .m_u = rxnfc->fs.m_u, + .m_ext = rxnfc->fs.m_ext, + .ring_cookie = rxnfc->fs.ring_cookie, + .location = rxnfc->fs.location, + }, + .rule_cnt = rxnfc->rule_cnt, + }; + + if (copy_to_user(useraddr, &crxnfc, min(size, sizeof(crxnfc)))) + return -EFAULT; + + return 0; +} + +static int ethtool_rxnfc_copy_to_user(void __user *useraddr, + const struct ethtool_rxnfc *rxnfc, + size_t size, const u32 *rule_buf) +{ + int ret; + + if (compat_need_64bit_alignment_fixup()) { + ret = ethtool_rxnfc_copy_to_compat(useraddr, rxnfc, size, + rule_buf); + useraddr += offsetof(struct compat_ethtool_rxnfc, rule_locs); + } else { + ret = copy_to_user(useraddr, &rxnfc, size); + useraddr += offsetof(struct ethtool_rxnfc, rule_locs); + } + + if (ret) + return -EFAULT; + + if (rule_buf) { + if (copy_to_user(useraddr, rule_buf, + rxnfc->rule_cnt * sizeof(u32))) + return -EFAULT; + } + + return 0; +} + static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, u32 cmd, void __user *useraddr) { @@ -825,7 +940,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, info_size = (offsetof(struct ethtool_rxnfc, data) + sizeof(info.data)); - if (copy_from_user(&info, useraddr, info_size)) + if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size)) return -EFAULT; rc = dev->ethtool_ops->set_rxnfc(dev, &info); @@ -833,7 +948,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, return rc; if (cmd == ETHTOOL_SRXCLSRLINS && - copy_to_user(useraddr, &info, info_size)) + ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, NULL)) return -EFAULT; return 0; @@ -859,7 +974,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, info_size = (offsetof(struct ethtool_rxnfc, data) + sizeof(info.data)); - if (copy_from_user(&info, useraddr, info_size)) + if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size)) return -EFAULT; /* If FLOW_RSS was requested then user-space must be using the @@ -867,7 +982,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, */ if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) { info_size = sizeof(info); - if (copy_from_user(&info, useraddr, info_size)) + if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size)) return -EFAULT; /* Since malicious users may modify the original data, * we need to check whether FLOW_RSS is still requested. @@ -893,18 +1008,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, if (ret < 0) goto err_out; - ret = -EFAULT; - if (copy_to_user(useraddr, &info, info_size)) - goto err_out; - - if (rule_buf) { - useraddr += offsetof(struct ethtool_rxnfc, rule_locs); - if (copy_to_user(useraddr, rule_buf, - info.rule_cnt * sizeof(u32))) - goto err_out; - } - ret = 0; - + ret = ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, rule_buf); err_out: kfree(rule_buf); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index a45a0401adc5..f5077de3619e 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -129,7 +129,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, int ret = -ENOIOCTLCMD; struct net_device *dev; - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + if (get_user_ifreq(&ifr, NULL, arg)) return -EFAULT; ifr.ifr_name[IFNAMSIZ-1] = 0; @@ -143,7 +143,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl) ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); - if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) + if (!ret && put_user_ifreq(&ifr, arg)) ret = -EFAULT; dev_put(dev); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 54648181dd56..0e4d758c2585 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -953,10 +953,10 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCGIFNETMASK: case SIOCGIFDSTADDR: case SIOCGIFPFLAGS: - if (copy_from_user(&ifr, p, sizeof(struct ifreq))) + if (get_user_ifreq(&ifr, NULL, p)) return -EFAULT; err = devinet_ioctl(net, cmd, &ifr); - if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq))) + if (!err && put_user_ifreq(&ifr, p)) err = -EFAULT; break; @@ -966,7 +966,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFDSTADDR: case SIOCSIFPFLAGS: case SIOCSIFFLAGS: - if (copy_from_user(&ifr, p, sizeof(struct ifreq))) + if (get_user_ifreq(&ifr, NULL, p)) return -EFAULT; err = devinet_ioctl(net, cmd, &ifr); break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 73721a4448bd..c82aded8da7d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -215,7 +215,7 @@ static void devinet_sysctl_unregister(struct in_device *idev) static struct in_ifaddr *inet_alloc_ifa(void) { - return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); + return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT); } static void inet_rcu_free_ifa(struct rcu_head *head) @@ -1243,7 +1243,7 @@ out: return ret; } -static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) +int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) { struct in_device *in_dev = __in_dev_get_rtnl(dev); const struct in_ifaddr *ifa; @@ -2424,11 +2424,15 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write, int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; - int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + struct net *net = ctl->extra2; + int ret; - if (write && *valp != val) { - struct net *net = ctl->extra2; + if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + if (write && *valp != val) { if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { if (!rtnl_trylock()) { /* Restore the original values before restarting */ @@ -2762,8 +2766,6 @@ void __init devinet_init(void) INIT_HLIST_HEAD(&inet_addr_lst[i]); register_pernet_subsys(&devinet_ops); - - register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 25cf387cca5b..8060524f4256 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2380,11 +2380,11 @@ void __init fib_trie_init(void) { fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), - 0, SLAB_PANIC, NULL); + 0, SLAB_PANIC | SLAB_ACCOUNT, NULL); trie_leaf_kmem = kmem_cache_create("ip_fib_trie", LEAF_SIZE, - 0, SLAB_PANIC, NULL); + 0, SLAB_PANIC | SLAB_ACCOUNT, NULL); } struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6b3c558a4f23..03589a04f9aa 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2713,6 +2713,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u rv = 1; } else if (im) { if (src_addr) { + spin_lock_bh(&im->lock); for (psf = im->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == src_addr) break; @@ -2723,6 +2724,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u im->sfcount[MCAST_EXCLUDE]; else rv = im->sfcount[MCAST_EXCLUDE] != 0; + spin_unlock_bh(&im->lock); } else rv = 1; /* unspecified source; tentatively allow */ } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8d8a8da3ae7e..a202dcec0dc2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -446,8 +446,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) { BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) != offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr)); - memcpy(&iph->saddr, &fl4->saddr, - sizeof(fl4->saddr) + sizeof(fl4->daddr)); + + iph->saddr = fl4->saddr; + iph->daddr = fl4->daddr; } /* Note: skb->sk can be different from sk, in case of tunnels */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 99c06944501a..04754d55b3c1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1299,26 +1299,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst) { - const struct rtable *rt = (const struct rtable *)dst; - unsigned int mtu = rt->rt_pmtu; - - if (!mtu || time_after_eq(jiffies, rt->dst.expires)) - mtu = dst_metric_raw(dst, RTAX_MTU); - - if (mtu) - goto out; - - mtu = READ_ONCE(dst->dev->mtu); - - if (unlikely(ip_mtu_locked(dst))) { - if (rt->rt_uses_gateway && mtu > 576) - mtu = 576; - } - -out: - mtu = min_t(unsigned int, mtu, IP_MAX_MTU); - - return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + return ip_dst_mtu_maybe_forward(dst, false); } EXPORT_INDIRECT_CALLABLE(ipv4_mtu); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8cb44040ec68..f931def6302e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4512,7 +4512,9 @@ void __init tcp_init(void) tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + SLAB_HWCACHE_ALIGN | SLAB_PANIC | + SLAB_ACCOUNT, + NULL); /* Size and allocate the main established and bind bucket * hash tables. diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 25fa4c01a17f..62ba8d0f2c60 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -55,12 +55,7 @@ void tcp_fastopen_ctx_destroy(struct net *net) { struct tcp_fastopen_context *ctxt; - spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); - - ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, - lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); - rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL); - spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); + ctxt = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, NULL); if (ctxt) call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); @@ -89,18 +84,12 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, ctx->num = 1; } - spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); if (sk) { q = &inet_csk(sk)->icsk_accept_queue.fastopenq; - octx = rcu_dereference_protected(q->ctx, - lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); - rcu_assign_pointer(q->ctx, ctx); + octx = xchg((__force struct tcp_fastopen_context **)&q->ctx, ctx); } else { - octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, - lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); - rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx); + octx = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, ctx); } - spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); if (octx) call_rcu(&octx->rcu, tcp_fastopen_ctx_free); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 149ceb5c94ff..501d8d4d4ba4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -454,11 +454,12 @@ static void tcp_sndbuf_expand(struct sock *sk) */ /* Slow part of check#2. */ -static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) +static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb, + unsigned int skbtruesize) { struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ - int truesize = tcp_win_from_space(sk, skb->truesize) >> 1; + int truesize = tcp_win_from_space(sk, skbtruesize) >> 1; int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; while (tp->rcv_ssthresh <= window) { @@ -471,7 +472,27 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) return 0; } -static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) +/* Even if skb appears to have a bad len/truesize ratio, TCP coalescing + * can play nice with us, as sk_buff and skb->head might be either + * freed or shared with up to MAX_SKB_FRAGS segments. + * Only give a boost to drivers using page frag(s) to hold the frame(s), + * and if no payload was pulled in skb->head before reaching us. + */ +static u32 truesize_adjust(bool adjust, const struct sk_buff *skb) +{ + u32 truesize = skb->truesize; + + if (adjust && !skb_headlen(skb)) { + truesize -= SKB_TRUESIZE(skb_end_offset(skb)); + /* paranoid check, some drivers might be buggy */ + if (unlikely((int)truesize < (int)skb->len)) + truesize = skb->truesize; + } + return truesize; +} + +static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb, + bool adjust) { struct tcp_sock *tp = tcp_sk(sk); int room; @@ -480,15 +501,16 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) /* Check #1 */ if (room > 0 && !tcp_under_memory_pressure(sk)) { + unsigned int truesize = truesize_adjust(adjust, skb); int incr; /* Check #2. Increase window, if skb with such overhead * will fit to rcvbuf in future. */ - if (tcp_win_from_space(sk, skb->truesize) <= skb->len) + if (tcp_win_from_space(sk, truesize) <= skb->len) incr = 2 * tp->advmss; else - incr = __tcp_grow_window(sk, skb); + incr = __tcp_grow_window(sk, skb, truesize); if (incr) { incr = max_t(int, incr, 2 * skb->len); @@ -782,7 +804,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) tcp_ecn_check_ce(sk, skb); if (skb->len >= 128) - tcp_grow_window(sk, skb); + tcp_grow_window(sk, skb, true); } /* Called to compute a smoothed rtt estimate. The data fed to this @@ -4769,7 +4791,7 @@ coalesce_done: * and trigger fast retransmit. */ if (tcp_is_sack(tp)) - tcp_grow_window(sk, skb); + tcp_grow_window(sk, skb, true); kfree_skb_partial(skb, fragstolen); skb = NULL; goto add_sack; @@ -4857,7 +4879,7 @@ end: * and trigger fast retransmit. */ if (tcp_is_sack(tp)) - tcp_grow_window(sk, skb); + tcp_grow_window(sk, skb, false); skb_condense(skb); skb_set_owner_r(skb, sk); } @@ -5383,7 +5405,7 @@ static void tcp_new_space(struct sock *sk) tp->snd_cwnd_stamp = tcp_jiffies32; } - sk->sk_write_space(sk); + INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk); } static void tcp_check_space(struct sock *sk) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a692626c19e4..84db1c9ee92a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2964,7 +2964,6 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC; net->ipv4.sysctl_tcp_comp_sack_nr = 44; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; - spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; atomic_set(&net->ipv4.tfo_active_disable_times, 0); diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 9f5a5cdc38e6..7a1d5f473878 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -112,7 +112,6 @@ static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS]; static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base) { *prot = *base; - prot->unhash = sock_map_unhash; prot->close = sock_map_close; prot->recvmsg = udp_bpf_recvmsg; } diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 747f56e0c636..e504204bca92 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -328,4 +328,15 @@ config IPV6_RPL_LWTUNNEL If unsure, say N. +config IPV6_IOAM6_LWTUNNEL + bool "IPv6: IOAM Pre-allocated Trace insertion support" + depends on IPV6 + select LWTUNNEL + help + Support for the inline insertion of IOAM Pre-allocated + Trace Header (only on locally generated packets), using + the lightweight tunnels mechanism. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index cf7b47bdb9b3..1bc7e143217b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o seg6.o fib6_notifier.o rpl.o + udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o @@ -27,6 +27,7 @@ ipv6-$(CONFIG_NETLABEL) += calipso.o ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o +ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3bf685fe64b9..db0a89810f28 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -89,6 +89,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/export.h> +#include <linux/ioam6.h> #define INFINITY_LIFE_TIME 0xFFFFFFFF @@ -237,6 +238,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, .disable_policy = 0, .rpl_seg_enabled = 0, + .ioam6_enabled = 0, + .ioam6_id = IOAM6_DEFAULT_IF_ID, + .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -293,6 +297,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, .disable_policy = 0, .rpl_seg_enabled = 0, + .ioam6_enabled = 0, + .ioam6_id = IOAM6_DEFAULT_IF_ID, + .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -1080,7 +1087,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, goto out; } - ifa = kzalloc(sizeof(*ifa), gfp_flags); + ifa = kzalloc(sizeof(*ifa), gfp_flags | __GFP_ACCOUNT); if (!ifa) { err = -ENOBUFS; goto out; @@ -5211,8 +5218,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, .netnsid = -1, .type = type, }; - struct net *net = sock_net(skb->sk); - struct net *tgt_net = net; + struct net *tgt_net = sock_net(skb->sk); int idx, s_idx, s_ip_idx; int h, s_h; struct net_device *dev; @@ -5351,7 +5357,7 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb, static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - struct net *net = sock_net(in_skb->sk); + struct net *tgt_net = sock_net(in_skb->sk); struct inet6_fill_args fillargs = { .portid = NETLINK_CB(in_skb).portid, .seq = nlh->nlmsg_seq, @@ -5359,7 +5365,6 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, .flags = 0, .netnsid = -1, }; - struct net *tgt_net = net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *addr = NULL, *peer; @@ -5526,6 +5531,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass; array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled; + array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled; + array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; + array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; } static inline size_t inet6_ifla6_size(void) @@ -6540,6 +6548,7 @@ static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, static int minus_one = -1; static const int two_five_five = 255; +static u32 ioam6_if_id_max = U16_MAX; static const struct ctl_table addrconf_sysctl[] = { { @@ -6933,6 +6942,31 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_dointvec, }, { + .procname = "ioam6_enabled", + .data = &ipv6_devconf.ioam6_enabled, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)SYSCTL_ONE, + }, + { + .procname = "ioam6_id", + .data = &ipv6_devconf.ioam6_id, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)&ioam6_if_id_max, + }, + { + .procname = "ioam6_id_wide", + .data = &ipv6_devconf.ioam6_id_wide, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + { /* sentinel */ } }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2389ff702f51..d92c90d97763 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -62,6 +62,7 @@ #include <net/rpl.h> #include <net/compat.h> #include <net/xfrm.h> +#include <net/ioam6.h> #include <linux/uaccess.h> #include <linux/mroute6.h> @@ -961,6 +962,9 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.fib_notify_on_flag_change = 0; atomic_set(&net->ipv6.fib6_sernum, 1); + net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID; + net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE; + err = ipv6_init_mibs(net); if (err) return err; @@ -1191,6 +1195,10 @@ static int __init inet6_init(void) if (err) goto rpl_fail; + err = ioam6_init(); + if (err) + goto ioam6_fail; + err = igmp6_late_init(); if (err) goto igmp6_late_err; @@ -1213,6 +1221,8 @@ sysctl_fail: igmp6_late_cleanup(); #endif igmp6_late_err: + ioam6_exit(); +ioam6_fail: rpl_exit(); rpl_fail: seg6_exit(); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 26882e165c9e..d897faa4e9e6 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -49,6 +49,9 @@ #include <net/seg6_hmac.h> #endif #include <net/rpl.h> +#include <linux/ioam6.h> +#include <net/ioam6.h> +#include <net/dst_metadata.h> #include <linux/uaccess.h> @@ -928,6 +931,60 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) return false; } +/* IOAM */ + +static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) +{ + struct ioam6_trace_hdr *trace; + struct ioam6_namespace *ns; + struct ioam6_hdr *hdr; + + /* Bad alignment (must be 4n-aligned) */ + if (optoff & 3) + goto drop; + + /* Ignore if IOAM is not enabled on ingress */ + if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled) + goto ignore; + + /* Truncated Option header */ + hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff); + if (hdr->opt_len < 2) + goto drop; + + switch (hdr->type) { + case IOAM6_TYPE_PREALLOC: + /* Truncated Pre-allocated Trace header */ + if (hdr->opt_len < 2 + sizeof(*trace)) + goto drop; + + /* Malformed Pre-allocated Trace header */ + trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr)); + if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4) + goto drop; + + /* Ignore if the IOAM namespace is unknown */ + ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id); + if (!ns) + goto ignore; + + if (!skb_valid_dst(skb)) + ip6_route_input(skb); + + ioam6_fill_trace_data(skb, ns, trace); + break; + default: + break; + } + +ignore: + return true; + +drop: + kfree_skb(skb); + return false; +} + /* Jumbo payload */ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) @@ -1000,6 +1057,10 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = { .func = ipv6_hop_ra, }, { + .type = IPV6_TLV_IOAM, + .func = ipv6_hop_ioam, + }, + { .type = IPV6_TLV_JUMBO, .func = ipv6_hop_jumbo, }, diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c new file mode 100644 index 000000000000..5e8961004832 --- /dev/null +++ b/net/ipv6/ioam6.c @@ -0,0 +1,910 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * IPv6 IOAM implementation + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/net.h> +#include <linux/ioam6.h> +#include <linux/ioam6_genl.h> +#include <linux/rhashtable.h> + +#include <net/addrconf.h> +#include <net/genetlink.h> +#include <net/ioam6.h> + +static void ioam6_ns_release(struct ioam6_namespace *ns) +{ + kfree_rcu(ns, rcu); +} + +static void ioam6_sc_release(struct ioam6_schema *sc) +{ + kfree_rcu(sc, rcu); +} + +static void ioam6_free_ns(void *ptr, void *arg) +{ + struct ioam6_namespace *ns = (struct ioam6_namespace *)ptr; + + if (ns) + ioam6_ns_release(ns); +} + +static void ioam6_free_sc(void *ptr, void *arg) +{ + struct ioam6_schema *sc = (struct ioam6_schema *)ptr; + + if (sc) + ioam6_sc_release(sc); +} + +static int ioam6_ns_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct ioam6_namespace *ns = obj; + + return (ns->id != *(__be16 *)arg->key); +} + +static int ioam6_sc_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct ioam6_schema *sc = obj; + + return (sc->id != *(u32 *)arg->key); +} + +static const struct rhashtable_params rht_ns_params = { + .key_len = sizeof(__be16), + .key_offset = offsetof(struct ioam6_namespace, id), + .head_offset = offsetof(struct ioam6_namespace, head), + .automatic_shrinking = true, + .obj_cmpfn = ioam6_ns_cmpfn, +}; + +static const struct rhashtable_params rht_sc_params = { + .key_len = sizeof(u32), + .key_offset = offsetof(struct ioam6_schema, id), + .head_offset = offsetof(struct ioam6_schema, head), + .automatic_shrinking = true, + .obj_cmpfn = ioam6_sc_cmpfn, +}; + +static struct genl_family ioam6_genl_family; + +static const struct nla_policy ioam6_genl_policy_addns[] = { + [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, + [IOAM6_ATTR_NS_DATA] = { .type = NLA_U32 }, + [IOAM6_ATTR_NS_DATA_WIDE] = { .type = NLA_U64 }, +}; + +static const struct nla_policy ioam6_genl_policy_delns[] = { + [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, +}; + +static const struct nla_policy ioam6_genl_policy_addsc[] = { + [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, + [IOAM6_ATTR_SC_DATA] = { .type = NLA_BINARY, + .len = IOAM6_MAX_SCHEMA_DATA_LEN }, +}; + +static const struct nla_policy ioam6_genl_policy_delsc[] = { + [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, +}; + +static const struct nla_policy ioam6_genl_policy_ns_sc[] = { + [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, + [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, + [IOAM6_ATTR_SC_NONE] = { .type = NLA_FLAG }, +}; + +static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + struct ioam6_namespace *ns; + u64 data64; + u32 data32; + __be16 id; + int err; + + if (!info->attrs[IOAM6_ATTR_NS_ID]) + return -EINVAL; + + id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); + if (ns) { + err = -EEXIST; + goto out_unlock; + } + + ns = kzalloc(sizeof(*ns), GFP_KERNEL); + if (!ns) { + err = -ENOMEM; + goto out_unlock; + } + + ns->id = id; + + if (!info->attrs[IOAM6_ATTR_NS_DATA]) + data32 = IOAM6_U32_UNAVAILABLE; + else + data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]); + + if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE]) + data64 = IOAM6_U64_UNAVAILABLE; + else + data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]); + + ns->data = cpu_to_be32(data32); + ns->data_wide = cpu_to_be64(data64); + + err = rhashtable_lookup_insert_fast(&nsdata->namespaces, &ns->head, + rht_ns_params); + if (err) + kfree(ns); + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static int ioam6_genl_delns(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + struct ioam6_namespace *ns; + struct ioam6_schema *sc; + __be16 id; + int err; + + if (!info->attrs[IOAM6_ATTR_NS_ID]) + return -EINVAL; + + id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); + if (!ns) { + err = -ENOENT; + goto out_unlock; + } + + sc = rcu_dereference_protected(ns->schema, + lockdep_is_held(&nsdata->lock)); + + err = rhashtable_remove_fast(&nsdata->namespaces, &ns->head, + rht_ns_params); + if (err) + goto out_unlock; + + if (sc) + rcu_assign_pointer(sc->ns, NULL); + + ioam6_ns_release(ns); + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static int __ioam6_genl_dumpns_element(struct ioam6_namespace *ns, + u32 portid, + u32 seq, + u32 flags, + struct sk_buff *skb, + u8 cmd) +{ + struct ioam6_schema *sc; + u64 data64; + u32 data32; + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + data32 = be32_to_cpu(ns->data); + data64 = be64_to_cpu(ns->data_wide); + + if (nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id)) || + (data32 != IOAM6_U32_UNAVAILABLE && + nla_put_u32(skb, IOAM6_ATTR_NS_DATA, data32)) || + (data64 != IOAM6_U64_UNAVAILABLE && + nla_put_u64_64bit(skb, IOAM6_ATTR_NS_DATA_WIDE, + data64, IOAM6_ATTR_PAD))) + goto nla_put_failure; + + rcu_read_lock(); + + sc = rcu_dereference(ns->schema); + if (sc && nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id)) { + rcu_read_unlock(); + goto nla_put_failure; + } + + rcu_read_unlock(); + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ioam6_genl_dumpns_start(struct netlink_callback *cb) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk)); + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } + + rhashtable_walk_enter(&nsdata->namespaces, iter); + + return 0; +} + +static int ioam6_genl_dumpns_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + rhashtable_walk_exit(iter); + kfree(iter); + + return 0; +} + +static int ioam6_genl_dumpns(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rhashtable_iter *iter; + struct ioam6_namespace *ns; + int err; + + iter = (struct rhashtable_iter *)cb->args[0]; + rhashtable_walk_start(iter); + + for (;;) { + ns = rhashtable_walk_next(iter); + + if (IS_ERR(ns)) { + if (PTR_ERR(ns) == -EAGAIN) + continue; + err = PTR_ERR(ns); + goto done; + } else if (!ns) { + break; + } + + err = __ioam6_genl_dumpns_element(ns, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + skb, + IOAM6_CMD_DUMP_NAMESPACES); + if (err) + goto done; + } + + err = skb->len; + +done: + rhashtable_walk_stop(iter); + return err; +} + +static int ioam6_genl_addsc(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + int len, len_aligned, err; + struct ioam6_schema *sc; + u32 id; + + if (!info->attrs[IOAM6_ATTR_SC_ID] || !info->attrs[IOAM6_ATTR_SC_DATA]) + return -EINVAL; + + id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params); + if (sc) { + err = -EEXIST; + goto out_unlock; + } + + len = nla_len(info->attrs[IOAM6_ATTR_SC_DATA]); + len_aligned = ALIGN(len, 4); + + sc = kzalloc(sizeof(*sc) + len_aligned, GFP_KERNEL); + if (!sc) { + err = -ENOMEM; + goto out_unlock; + } + + sc->id = id; + sc->len = len_aligned; + sc->hdr = cpu_to_be32(sc->id | ((u8)(sc->len / 4) << 24)); + nla_memcpy(sc->data, info->attrs[IOAM6_ATTR_SC_DATA], len); + + err = rhashtable_lookup_insert_fast(&nsdata->schemas, &sc->head, + rht_sc_params); + if (err) + goto free_sc; + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +free_sc: + kfree(sc); + goto out_unlock; +} + +static int ioam6_genl_delsc(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + struct ioam6_namespace *ns; + struct ioam6_schema *sc; + int err; + u32 id; + + if (!info->attrs[IOAM6_ATTR_SC_ID]) + return -EINVAL; + + id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params); + if (!sc) { + err = -ENOENT; + goto out_unlock; + } + + ns = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock)); + + err = rhashtable_remove_fast(&nsdata->schemas, &sc->head, + rht_sc_params); + if (err) + goto out_unlock; + + if (ns) + rcu_assign_pointer(ns->schema, NULL); + + ioam6_sc_release(sc); + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static int __ioam6_genl_dumpsc_element(struct ioam6_schema *sc, + u32 portid, u32 seq, u32 flags, + struct sk_buff *skb, u8 cmd) +{ + struct ioam6_namespace *ns; + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id) || + nla_put(skb, IOAM6_ATTR_SC_DATA, sc->len, sc->data)) + goto nla_put_failure; + + rcu_read_lock(); + + ns = rcu_dereference(sc->ns); + if (ns && nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id))) { + rcu_read_unlock(); + goto nla_put_failure; + } + + rcu_read_unlock(); + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ioam6_genl_dumpsc_start(struct netlink_callback *cb) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk)); + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } + + rhashtable_walk_enter(&nsdata->schemas, iter); + + return 0; +} + +static int ioam6_genl_dumpsc_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + rhashtable_walk_exit(iter); + kfree(iter); + + return 0; +} + +static int ioam6_genl_dumpsc(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rhashtable_iter *iter; + struct ioam6_schema *sc; + int err; + + iter = (struct rhashtable_iter *)cb->args[0]; + rhashtable_walk_start(iter); + + for (;;) { + sc = rhashtable_walk_next(iter); + + if (IS_ERR(sc)) { + if (PTR_ERR(sc) == -EAGAIN) + continue; + err = PTR_ERR(sc); + goto done; + } else if (!sc) { + break; + } + + err = __ioam6_genl_dumpsc_element(sc, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + skb, + IOAM6_CMD_DUMP_SCHEMAS); + if (err) + goto done; + } + + err = skb->len; + +done: + rhashtable_walk_stop(iter); + return err; +} + +static int ioam6_genl_ns_set_schema(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_namespace *ns, *ns_ref; + struct ioam6_schema *sc, *sc_ref; + struct ioam6_pernet_data *nsdata; + __be16 ns_id; + u32 sc_id; + int err; + + if (!info->attrs[IOAM6_ATTR_NS_ID] || + (!info->attrs[IOAM6_ATTR_SC_ID] && + !info->attrs[IOAM6_ATTR_SC_NONE])) + return -EINVAL; + + ns_id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + ns = rhashtable_lookup_fast(&nsdata->namespaces, &ns_id, rht_ns_params); + if (!ns) { + err = -ENOENT; + goto out_unlock; + } + + if (info->attrs[IOAM6_ATTR_SC_NONE]) { + sc = NULL; + } else { + sc_id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); + sc = rhashtable_lookup_fast(&nsdata->schemas, &sc_id, + rht_sc_params); + if (!sc) { + err = -ENOENT; + goto out_unlock; + } + } + + sc_ref = rcu_dereference_protected(ns->schema, + lockdep_is_held(&nsdata->lock)); + if (sc_ref) + rcu_assign_pointer(sc_ref->ns, NULL); + rcu_assign_pointer(ns->schema, sc); + + if (sc) { + ns_ref = rcu_dereference_protected(sc->ns, + lockdep_is_held(&nsdata->lock)); + if (ns_ref) + rcu_assign_pointer(ns_ref->schema, NULL); + rcu_assign_pointer(sc->ns, ns); + } + + err = 0; + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static const struct genl_ops ioam6_genl_ops[] = { + { + .cmd = IOAM6_CMD_ADD_NAMESPACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_addns, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_addns, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_addns) - 1, + }, + { + .cmd = IOAM6_CMD_DEL_NAMESPACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_delns, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_delns, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_delns) - 1, + }, + { + .cmd = IOAM6_CMD_DUMP_NAMESPACES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .start = ioam6_genl_dumpns_start, + .dumpit = ioam6_genl_dumpns, + .done = ioam6_genl_dumpns_done, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = IOAM6_CMD_ADD_SCHEMA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_addsc, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_addsc, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_addsc) - 1, + }, + { + .cmd = IOAM6_CMD_DEL_SCHEMA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_delsc, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_delsc, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_delsc) - 1, + }, + { + .cmd = IOAM6_CMD_DUMP_SCHEMAS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .start = ioam6_genl_dumpsc_start, + .dumpit = ioam6_genl_dumpsc, + .done = ioam6_genl_dumpsc_done, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = IOAM6_CMD_NS_SET_SCHEMA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_ns_set_schema, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_ns_sc, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_ns_sc) - 1, + }, +}; + +static struct genl_family ioam6_genl_family __ro_after_init = { + .name = IOAM6_GENL_NAME, + .version = IOAM6_GENL_VERSION, + .netnsok = true, + .parallel_ops = true, + .ops = ioam6_genl_ops, + .n_ops = ARRAY_SIZE(ioam6_genl_ops), + .module = THIS_MODULE, +}; + +struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(net); + + return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); +} + +static void __ioam6_fill_trace_data(struct sk_buff *skb, + struct ioam6_namespace *ns, + struct ioam6_trace_hdr *trace, + struct ioam6_schema *sc, + u8 sclen) +{ + struct __kernel_sock_timeval ts; + u64 raw64; + u32 raw32; + u16 raw16; + u8 *data; + u8 byte; + + data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4; + + /* hop_lim and node_id */ + if (trace->type.bit0) { + byte = ipv6_hdr(skb)->hop_limit; + if (skb->dev) + byte--; + + raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id; + + *(__be32 *)data = cpu_to_be32((byte << 24) | raw32); + data += sizeof(__be32); + } + + /* ingress_if_id and egress_if_id */ + if (trace->type.bit1) { + if (!skb->dev) + raw16 = IOAM6_U16_UNAVAILABLE; + else + raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id; + + *(__be16 *)data = cpu_to_be16(raw16); + data += sizeof(__be16); + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + raw16 = IOAM6_U16_UNAVAILABLE; + else + raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id; + + *(__be16 *)data = cpu_to_be16(raw16); + data += sizeof(__be16); + } + + /* timestamp seconds */ + if (trace->type.bit2) { + if (!skb->dev) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + } else { + if (!skb->tstamp) + __net_timestamp(skb); + + skb_get_new_timestamp(skb, &ts); + *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec); + } + data += sizeof(__be32); + } + + /* timestamp subseconds */ + if (trace->type.bit3) { + if (!skb->dev) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + } else { + if (!skb->tstamp) + __net_timestamp(skb); + + if (!trace->type.bit2) + skb_get_new_timestamp(skb, &ts); + + *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec); + } + data += sizeof(__be32); + } + + /* transit delay */ + if (trace->type.bit4) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* namespace data */ + if (trace->type.bit5) { + *(__be32 *)data = ns->data; + data += sizeof(__be32); + } + + /* queue depth */ + if (trace->type.bit6) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* checksum complement */ + if (trace->type.bit7) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* hop_lim and node_id (wide) */ + if (trace->type.bit8) { + byte = ipv6_hdr(skb)->hop_limit; + if (skb->dev) + byte--; + + raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide; + + *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64); + data += sizeof(__be64); + } + + /* ingress_if_id and egress_if_id (wide) */ + if (trace->type.bit9) { + if (!skb->dev) + raw32 = IOAM6_U32_UNAVAILABLE; + else + raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide; + + *(__be32 *)data = cpu_to_be32(raw32); + data += sizeof(__be32); + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + raw32 = IOAM6_U32_UNAVAILABLE; + else + raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide; + + *(__be32 *)data = cpu_to_be32(raw32); + data += sizeof(__be32); + } + + /* namespace data (wide) */ + if (trace->type.bit10) { + *(__be64 *)data = ns->data_wide; + data += sizeof(__be64); + } + + /* buffer occupancy */ + if (trace->type.bit11) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* opaque state snapshot */ + if (trace->type.bit22) { + if (!sc) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE >> 8); + } else { + *(__be32 *)data = sc->hdr; + data += sizeof(__be32); + + memcpy(data, sc->data, sc->len); + } + } +} + +/* called with rcu_read_lock() */ +void ioam6_fill_trace_data(struct sk_buff *skb, + struct ioam6_namespace *ns, + struct ioam6_trace_hdr *trace) +{ + struct ioam6_schema *sc; + u8 sclen = 0; + + /* Skip if Overflow flag is set OR + * if an unknown type (bit 12-21) is set + */ + if (trace->overflow || + trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | + trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | + trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | + trace->type.bit21) { + return; + } + + /* NodeLen does not include Opaque State Snapshot length. We need to + * take it into account if the corresponding bit is set (bit 22) and + * if the current IOAM namespace has an active schema attached to it + */ + sc = rcu_dereference(ns->schema); + if (trace->type.bit22) { + sclen = sizeof_field(struct ioam6_schema, hdr) / 4; + + if (sc) + sclen += sc->len / 4; + } + + /* If there is no space remaining, we set the Overflow flag and we + * skip without filling the trace + */ + if (!trace->remlen || trace->remlen < trace->nodelen + sclen) { + trace->overflow = 1; + return; + } + + __ioam6_fill_trace_data(skb, ns, trace, sc, sclen); + trace->remlen -= trace->nodelen + sclen; +} + +static int __net_init ioam6_net_init(struct net *net) +{ + struct ioam6_pernet_data *nsdata; + int err = -ENOMEM; + + nsdata = kzalloc(sizeof(*nsdata), GFP_KERNEL); + if (!nsdata) + goto out; + + mutex_init(&nsdata->lock); + net->ipv6.ioam6_data = nsdata; + + err = rhashtable_init(&nsdata->namespaces, &rht_ns_params); + if (err) + goto free_nsdata; + + err = rhashtable_init(&nsdata->schemas, &rht_sc_params); + if (err) + goto free_rht_ns; + +out: + return err; +free_rht_ns: + rhashtable_destroy(&nsdata->namespaces); +free_nsdata: + kfree(nsdata); + net->ipv6.ioam6_data = NULL; + goto out; +} + +static void __net_exit ioam6_net_exit(struct net *net) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(net); + + rhashtable_free_and_destroy(&nsdata->namespaces, ioam6_free_ns, NULL); + rhashtable_free_and_destroy(&nsdata->schemas, ioam6_free_sc, NULL); + + kfree(nsdata); +} + +static struct pernet_operations ioam6_net_ops = { + .init = ioam6_net_init, + .exit = ioam6_net_exit, +}; + +int __init ioam6_init(void) +{ + int err = register_pernet_subsys(&ioam6_net_ops); + if (err) + goto out; + + err = genl_register_family(&ioam6_genl_family); + if (err) + goto out_unregister_pernet_subsys; + +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL + err = ioam6_iptunnel_init(); + if (err) + goto out_unregister_genl; +#endif + + pr_info("In-situ OAM (IOAM) with IPv6\n"); + +out: + return err; +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL +out_unregister_genl: + genl_unregister_family(&ioam6_genl_family); +#endif +out_unregister_pernet_subsys: + unregister_pernet_subsys(&ioam6_net_ops); + goto out; +} + +void ioam6_exit(void) +{ +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL + ioam6_iptunnel_exit(); +#endif + genl_unregister_family(&ioam6_genl_family); + unregister_pernet_subsys(&ioam6_net_ops); +} diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c new file mode 100644 index 000000000000..f9ee04541c17 --- /dev/null +++ b/net/ipv6/ioam6_iptunnel.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * IPv6 IOAM Lightweight Tunnel implementation + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/net.h> +#include <linux/netlink.h> +#include <linux/in6.h> +#include <linux/ioam6.h> +#include <linux/ioam6_iptunnel.h> +#include <net/dst.h> +#include <net/sock.h> +#include <net/lwtunnel.h> +#include <net/ioam6.h> + +#define IOAM6_MASK_SHORT_FIELDS 0xff100000 +#define IOAM6_MASK_WIDE_FIELDS 0xe00000 + +struct ioam6_lwt_encap { + struct ipv6_hopopt_hdr eh; + u8 pad[2]; /* 2-octet padding for 4n-alignment */ + struct ioam6_hdr ioamh; + struct ioam6_trace_hdr traceh; +} __packed; + +struct ioam6_lwt { + struct ioam6_lwt_encap tuninfo; +}; + +static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt) +{ + return (struct ioam6_lwt *)lwt->data; +} + +static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt) +{ + return &ioam6_lwt_state(lwt)->tuninfo; +} + +static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt) +{ + return &(ioam6_lwt_state(lwt)->tuninfo.traceh); +} + +static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = { + [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)), +}; + +static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype, + struct ioam6_trace_hdr *trace) +{ + struct ioam6_trace_hdr *data; + struct nlattr *nla; + int len; + + len = sizeof(*trace); + + nla = nla_reserve(skb, attrtype, len); + if (!nla) + return -EMSGSIZE; + + data = nla_data(nla); + memcpy(data, trace, len); + + return 0; +} + +static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace) +{ + u32 fields; + + if (!trace->type_be32 || !trace->remlen || + trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4) + return false; + + trace->nodelen = 0; + fields = be32_to_cpu(trace->type_be32); + + trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS) + * (sizeof(__be32) / 4); + trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS) + * (sizeof(__be64) / 4); + + return true; +} + +static int ioam6_build_state(struct net *net, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1]; + struct ioam6_lwt_encap *tuninfo; + struct ioam6_trace_hdr *trace; + struct lwtunnel_state *s; + int len_aligned; + int len, err; + + if (family != AF_INET6) + return -EINVAL; + + err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla, + ioam6_iptunnel_policy, extack); + if (err < 0) + return err; + + if (!tb[IOAM6_IPTUNNEL_TRACE]) { + NL_SET_ERR_MSG(extack, "missing trace"); + return -EINVAL; + } + + trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]); + if (!ioam6_validate_trace_hdr(trace)) { + NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE], + "invalid trace validation"); + return -EINVAL; + } + + len = sizeof(*tuninfo) + trace->remlen * 4; + len_aligned = ALIGN(len, 8); + + s = lwtunnel_state_alloc(len_aligned); + if (!s) + return -ENOMEM; + + tuninfo = ioam6_lwt_info(s); + tuninfo->eh.hdrlen = (len_aligned >> 3) - 1; + tuninfo->pad[0] = IPV6_TLV_PADN; + tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC; + tuninfo->ioamh.opt_type = IPV6_TLV_IOAM; + tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace) + + trace->remlen * 4; + + memcpy(&tuninfo->traceh, trace, sizeof(*trace)); + + len = len_aligned - len; + if (len == 1) { + tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1; + } else if (len > 0) { + tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN; + tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2; + } + + s->type = LWTUNNEL_ENCAP_IOAM6; + s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + + *ts = s; + + return 0; +} + +static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo) +{ + struct ioam6_trace_hdr *trace; + struct ipv6hdr *oldhdr, *hdr; + struct ioam6_namespace *ns; + int hdrlen, err; + + hdrlen = (tuninfo->eh.hdrlen + 1) << 3; + + err = skb_cow_head(skb, hdrlen + skb->mac_len); + if (unlikely(err)) + return err; + + oldhdr = ipv6_hdr(skb); + skb_pull(skb, sizeof(*oldhdr)); + skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr)); + + skb_push(skb, sizeof(*oldhdr) + hdrlen); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + hdr = ipv6_hdr(skb); + memmove(hdr, oldhdr, sizeof(*oldhdr)); + tuninfo->eh.nexthdr = hdr->nexthdr; + + skb_set_transport_header(skb, sizeof(*hdr)); + skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen); + + memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen); + + hdr->nexthdr = NEXTHDR_HOP; + hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); + + trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb) + + sizeof(struct ipv6_hopopt_hdr) + 2 + + sizeof(struct ioam6_hdr)); + + ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id); + if (ns) + ioam6_fill_trace_data(skb, ns, trace); + + return 0; +} + +static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate; + int err = -EINVAL; + + if (skb->protocol != htons(ETH_P_IPV6)) + goto drop; + + /* Only for packets we send and + * that do not contain a Hop-by-Hop yet + */ + if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) + goto out; + + err = ioam6_do_inline(skb, ioam6_lwt_info(lwt)); + if (unlikely(err)) + goto drop; + + err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev)); + if (unlikely(err)) + goto drop; + +out: + return lwt->orig_output(net, sk, skb); + +drop: + kfree_skb(skb); + return err; +} + +static int ioam6_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + + if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace)) + return -EMSGSIZE; + + return 0; +} + +static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + + return nla_total_size(sizeof(*trace)); +} + +static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct ioam6_trace_hdr *a_hdr = ioam6_trace(a); + struct ioam6_trace_hdr *b_hdr = ioam6_trace(b); + + return (a_hdr->namespace_id != b_hdr->namespace_id); +} + +static const struct lwtunnel_encap_ops ioam6_iptun_ops = { + .build_state = ioam6_build_state, + .output = ioam6_output, + .fill_encap = ioam6_fill_encap_info, + .get_encap_size = ioam6_encap_nlsize, + .cmp_encap = ioam6_encap_cmp, + .owner = THIS_MODULE, +}; + +int __init ioam6_iptunnel_init(void) +{ + return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); +} + +void ioam6_iptunnel_exit(void) +{ + lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); +} diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 2d650dc24349..a8f118e469b7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2449,8 +2449,8 @@ int __init fib6_init(void) int ret = -ENOMEM; fib6_node_kmem = kmem_cache_create("fib6_nodes", - sizeof(struct fib6_node), - 0, SLAB_HWCACHE_ALIGN, + sizeof(struct fib6_node), 0, + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); if (!fib6_node_kmem) goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8e6ca9ad6812..d72347c75f8b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -608,7 +608,7 @@ int ip6_forward(struct sk_buff *skb) } } - mtu = ip6_dst_mtu_forward(dst); + mtu = ip6_dst_mtu_maybe_forward(dst, true); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b6ddf23d3833..6b8051106aba 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3201,25 +3201,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst) { - struct inet6_dev *idev; - unsigned int mtu; - - mtu = dst_metric_raw(dst, RTAX_MTU); - if (mtu) - goto out; - - mtu = IPV6_MIN_MTU; - - rcu_read_lock(); - idev = __in6_dev_get(dst->dev); - if (idev) - mtu = idev->cnf.mtu6; - rcu_read_unlock(); - -out: - mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); - - return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + return ip6_dst_mtu_maybe_forward(dst, false); } EXPORT_INDIRECT_CALLABLE(ip6_mtu); @@ -6638,7 +6620,7 @@ int __init ip6_route_init(void) ret = -ENOMEM; ip6_dst_ops_template.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); if (!ip6_dst_ops_template.kmem_cachep) goto out; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index df5bea818410..33adc12b697d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -321,7 +321,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) * we try harder to allocate. */ kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ? - kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) : + kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) : NULL; rcu_read_lock(); @@ -334,7 +334,8 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) * For root users, retry allocating enough memory for * the answer. */ - kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC); + kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT | + __GFP_NOWARN); if (!kp) { ret = -ENOMEM; goto out; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index d7cf26f730d7..d53dd142bf87 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -21,6 +21,7 @@ #ifdef CONFIG_NETLABEL #include <net/calipso.h> #endif +#include <linux/ioam6.h> static int two = 2; static int three = 3; @@ -28,6 +29,8 @@ static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static u32 rt6_multipath_hash_fields_all_mask = FIB_MULTIPATH_HASH_FIELD_ALL_MASK; +static u32 ioam6_id_max = IOAM6_DEFAULT_ID; +static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE; static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -196,6 +199,22 @@ static struct ctl_table ipv6_table_template[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two, }, + { + .procname = "ioam6_id", + .data = &init_net.ipv6.sysctl.ioam6_id, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra2 = &ioam6_id_max, + }, + { + .procname = "ioam6_id_wide", + .data = &init_net.ipv6.sysctl.ioam6_id_wide, + .maxlen = sizeof(u64), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + .extra2 = &ioam6_id_wide_max, + }, { } }; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 05a21dd072df..ffeb2df8be7a 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -407,7 +407,6 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, /* Verify ttl is valid */ if (dec.ttl <= 1) goto err; - dec.ttl -= 1; /* Find the output device */ out_dev = rcu_dereference(nh->nh_dev); @@ -431,6 +430,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, skb->dev = out_dev; skb->protocol = htons(ETH_P_MPLS_UC); + dec.ttl -= 1; if (unlikely(!new_header_size && dec.bos)) { /* Penultimate hop popping */ if (!mpls_egress(dev_net(out_dev), rt, skb, dec)) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 551976e4284c..8ecad71b3613 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -99,7 +99,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true); break; case NFPROTO_IPV6: - flow_tuple->mtu = ip6_dst_mtu_forward(dst); + flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true); break; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 380f95aacdec..24b7cf447bc5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2545,13 +2545,15 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, /* errors reported via destination sk->sk_err, but propagate * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); + if (err == -ESRCH) + err = 0; } if (report) { int err2; err2 = nlmsg_unicast(sk, skb, portid); - if (!err || err == -ESRCH) + if (!err) err = err2; } diff --git a/net/nfc/core.c b/net/nfc/core.c index 573c80c6ff7a..6ade54149b73 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1048,7 +1048,7 @@ struct nfc_dev *nfc_get_device(unsigned int idx) * @tx_headroom: reserved space at beginning of skb * @tx_tailroom: reserved space at end of skb */ -struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, +struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops, u32 supported_protocols, int tx_headroom, int tx_tailroom) { diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index 5044c7db577e..fefc03674f4f 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -732,7 +732,7 @@ exit: return rc; } -static struct nfc_ops digital_nfc_ops = { +static const struct nfc_ops digital_nfc_ops = { .dev_up = digital_dev_up, .dev_down = digital_dev_down, .start_poll = digital_start_poll, @@ -745,7 +745,7 @@ static struct nfc_ops digital_nfc_ops = { .im_transceive = digital_in_send, }; -struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops, +struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops, __u32 supported_protocols, __u32 driver_capabilities, int tx_headroom, int tx_tailroom) diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 3481941be70b..ff94ac774937 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -447,7 +447,7 @@ static void nfc_hci_cmd_timeout(struct timer_list *t) } static int hci_dev_connect_gates(struct nfc_hci_dev *hdev, u8 gate_count, - struct nfc_hci_gate *gates) + const struct nfc_hci_gate *gates) { int r; while (gate_count--) { @@ -928,7 +928,7 @@ static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) return hdev->ops->fw_download(hdev, firmware_name); } -static struct nfc_ops hci_nfc_ops = { +static const struct nfc_ops hci_nfc_ops = { .dev_up = hci_dev_up, .dev_down = hci_dev_down, .start_poll = hci_start_poll, @@ -947,7 +947,7 @@ static struct nfc_ops hci_nfc_ops = { .se_io = hci_se_io, }; -struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, +struct nfc_hci_dev *nfc_hci_allocate_device(const struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index 6ab40ea17662..fc6b63de3462 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -41,7 +41,7 @@ void nfc_llc_exit(void) } } -int nfc_llc_register(const char *name, struct nfc_llc_ops *ops) +int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops) { struct nfc_llc_engine *llc_engine; diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h index 823ddb621e5d..d66271d211a5 100644 --- a/net/nfc/hci/llc.h +++ b/net/nfc/hci/llc.h @@ -26,20 +26,20 @@ struct nfc_llc_ops { struct nfc_llc_engine { const char *name; - struct nfc_llc_ops *ops; + const struct nfc_llc_ops *ops; struct list_head entry; }; struct nfc_llc { void *data; - struct nfc_llc_ops *ops; + const struct nfc_llc_ops *ops; int rx_headroom; int rx_tailroom; }; void *nfc_llc_get_data(struct nfc_llc *llc); -int nfc_llc_register(const char *name, struct nfc_llc_ops *ops); +int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops); void nfc_llc_unregister(const char *name); int nfc_llc_nop_register(void); diff --git a/net/nfc/hci/llc_nop.c b/net/nfc/hci/llc_nop.c index a42852f36f2e..a58716f16954 100644 --- a/net/nfc/hci/llc_nop.c +++ b/net/nfc/hci/llc_nop.c @@ -71,7 +71,7 @@ static int llc_nop_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb) return llc_nop->xmit_to_drv(llc_nop->hdev, skb); } -static struct nfc_llc_ops llc_nop_ops = { +static const struct nfc_llc_ops llc_nop_ops = { .init = llc_nop_init, .deinit = llc_nop_deinit, .start = llc_nop_start, diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 1e3a90049da9..6b747856d095 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -820,7 +820,7 @@ static int llc_shdlc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb) return 0; } -static struct nfc_llc_ops llc_shdlc_ops = { +static const struct nfc_llc_ops llc_shdlc_ops = { .init = llc_shdlc_init, .deinit = llc_shdlc_deinit, .start = llc_shdlc_start, diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index da7fe9db1b00..400d66c4e210 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1102,7 +1102,7 @@ static int nci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) return ndev->ops->fw_download(ndev, firmware_name); } -static struct nfc_ops nci_nfc_ops = { +static const struct nfc_ops nci_nfc_ops = { .dev_up = nci_dev_up, .dev_down = nci_dev_down, .start_poll = nci_start_poll, @@ -1129,7 +1129,7 @@ static struct nfc_ops nci_nfc_ops = { * @tx_headroom: Reserved space at beginning of skb * @tx_tailroom: Reserved space at end of skb */ -struct nci_dev *nci_allocate_device(struct nci_ops *ops, +struct nci_dev *nci_allocate_device(const struct nci_ops *ops, __u32 supported_protocols, int tx_headroom, int tx_tailroom) { @@ -1152,8 +1152,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, if (ops->n_prop_ops > NCI_MAX_PROPRIETARY_CMD) { pr_err("Too many proprietary commands: %zd\n", ops->n_prop_ops); - ops->prop_ops = NULL; - ops->n_prop_ops = 0; + goto free_nci; } ndev->tx_headroom = tx_headroom; @@ -1332,7 +1331,7 @@ int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) EXPORT_SYMBOL(nci_send_frame); /* Send NCI command */ -int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) +int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload) { struct nci_ctrl_hdr *hdr; struct sk_buff *skb; @@ -1364,12 +1363,12 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) EXPORT_SYMBOL(nci_send_cmd); /* Proprietary commands API */ -static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops, - size_t n_ops, - __u16 opcode) +static const struct nci_driver_ops *ops_cmd_lookup(const struct nci_driver_ops *ops, + size_t n_ops, + __u16 opcode) { size_t i; - struct nci_driver_ops *op; + const struct nci_driver_ops *op; if (!ops || !n_ops) return NULL; @@ -1384,10 +1383,10 @@ static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops, } static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode, - struct sk_buff *skb, struct nci_driver_ops *ops, + struct sk_buff *skb, const struct nci_driver_ops *ops, size_t n_ops) { - struct nci_driver_ops *op; + const struct nci_driver_ops *op; op = ops_cmd_lookup(ops, n_ops, rsp_opcode); if (!op || !op->rsp) @@ -1397,10 +1396,10 @@ static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode, } static int nci_op_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode, - struct sk_buff *skb, struct nci_driver_ops *ops, + struct sk_buff *skb, const struct nci_driver_ops *ops, size_t n_ops) { - struct nci_driver_ops *op; + const struct nci_driver_ops *op; op = ops_cmd_lookup(ops, n_ops, ntf_opcode); if (!op || !op->ntf) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 722f7ef891e1..70467a82be8f 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1531,7 +1531,7 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; - struct nfc_vendor_cmd *cmd; + const struct nfc_vendor_cmd *cmd; u32 dev_idx, vid, subcmd; u8 *data; size_t data_len; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index ef15d9eb4774..f79679746c62 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -924,7 +924,11 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, break; case OVS_USERSPACE_ATTR_PID: - upcall.portid = nla_get_u32(a); + if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) + upcall.portid = + ovs_dp_get_upcall_portid(dp, smp_processor_id()); + else + upcall.portid = nla_get_u32(a); break; case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: { diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index bc164b35e67d..7a4edafdc685 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -133,6 +133,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, static void ovs_dp_masks_rebalance(struct work_struct *work); +static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *); + /* Must be called with rcu_read_lock or ovs_mutex. */ const char *ovs_dp_name(const struct datapath *dp) { @@ -166,6 +168,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) free_percpu(dp->stats_percpu); kfree(dp->ports); ovs_meters_exit(dp); + kfree(dp->upcall_portids); kfree(dp); } @@ -239,7 +242,12 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; - upcall.portid = ovs_vport_find_upcall_portid(p, skb); + + if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) + upcall.portid = ovs_dp_get_upcall_portid(dp, smp_processor_id()); + else + upcall.portid = ovs_vport_find_upcall_portid(p, skb); + upcall.mru = OVS_CB(skb)->mru; error = ovs_dp_upcall(dp, skb, key, &upcall, 0); if (unlikely(error)) @@ -1594,16 +1602,67 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support); +static int ovs_dp_set_upcall_portids(struct datapath *dp, + const struct nlattr *ids) +{ + struct dp_nlsk_pids *old, *dp_nlsk_pids; + + if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) + return -EINVAL; + + old = ovsl_dereference(dp->upcall_portids); + + dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids), + GFP_KERNEL); + if (!dp_nlsk_pids) + return -ENOMEM; + + dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32); + nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids)); + + rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids); + + kfree_rcu(old, rcu); + + return 0; +} + +u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id) +{ + struct dp_nlsk_pids *dp_nlsk_pids; + + dp_nlsk_pids = rcu_dereference(dp->upcall_portids); + + if (dp_nlsk_pids) { + if (cpu_id < dp_nlsk_pids->n_pids) { + return dp_nlsk_pids->pids[cpu_id]; + } else if (dp_nlsk_pids->n_pids > 0 && cpu_id >= dp_nlsk_pids->n_pids) { + /* If the number of netlink PIDs is mismatched with the number of + * CPUs as seen by the kernel, log this and send the upcall to an + * arbitrary socket (0) in order to not drop packets + */ + pr_info_ratelimited("cpu_id mismatch with handler threads"); + return dp_nlsk_pids->pids[cpu_id % dp_nlsk_pids->n_pids]; + } else { + return 0; + } + } else { + return 0; + } +} + static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) { u32 user_features = 0; + int err; if (a[OVS_DP_ATTR_USER_FEATURES]) { user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); if (user_features & ~(OVS_DP_F_VPORT_PIDS | OVS_DP_F_UNALIGNED | - OVS_DP_F_TC_RECIRC_SHARING)) + OVS_DP_F_TC_RECIRC_SHARING | + OVS_DP_F_DISPATCH_UPCALL_PER_CPU)) return -EOPNOTSUPP; #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) @@ -1624,6 +1683,15 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) dp->user_features = user_features; + if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && + a[OVS_DP_ATTR_PER_CPU_PIDS]) { + /* Upcall Netlink Port IDs have been updated */ + err = ovs_dp_set_upcall_portids(dp, + a[OVS_DP_ATTR_PER_CPU_PIDS]); + if (err) + return err; + } + if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) static_branch_enable(&tc_recirc_sharing_support); else diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 38f7d3e66ca6..fcfe6cb46441 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -51,6 +51,21 @@ struct dp_stats_percpu { }; /** + * struct dp_nlsk_pids - array of netlink portids of for a datapath. + * This is used when OVS_DP_F_DISPATCH_UPCALL_PER_CPU + * is enabled and must be protected by rcu. + * @rcu: RCU callback head for deferred destruction. + * @n_pids: Size of @pids array. + * @pids: Array storing the Netlink socket PIDs indexed by CPU ID for packets + * that miss the flow table. + */ +struct dp_nlsk_pids { + struct rcu_head rcu; + u32 n_pids; + u32 pids[]; +}; + +/** * struct datapath - datapath for flow-based packet switching * @rcu: RCU callback head for deferred destruction. * @list_node: Element in global 'dps' list. @@ -61,6 +76,7 @@ struct dp_stats_percpu { * @net: Reference to net namespace. * @max_headroom: the maximum headroom of all vports in this datapath; it will * be used by all the internal vports in this dp. + * @upcall_portids: RCU protected 'struct dp_nlsk_pids'. * * Context: See the comment on locking at the top of datapath.c for additional * locking information. @@ -87,6 +103,8 @@ struct datapath { /* Switch meters. */ struct dp_meter_table meter_tbl; + + struct dp_nlsk_pids __rcu *upcall_portids; }; /** @@ -243,6 +261,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *, const struct sw_flow_key *, const struct dp_upcall_info *, uint32_t cutlen); +u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id); + const char *ovs_dp_name(const struct datapath *dp); struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, u32 portid, u32 seq, u8 cmd); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 171b7f3be6ef..6c61b7b1838f 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1157,14 +1157,14 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = put_user(len, (int __user *)argp); break; case SIOCGIFADDR: - if (copy_from_user(&ifr, argp, sizeof(ifr))) { + if (get_user_ifreq(&ifr, NULL, argp)) { rc = -EFAULT; break; } sq = (struct sockaddr_qrtr *)&ifr.ifr_addr; *sq = ipc->us; - if (copy_to_user(argp, &ifr, sizeof(ifr))) { + if (put_user_ifreq(&ifr, argp)) { rc = -EFAULT; break; } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d17a66aab8ee..998a2374f7ae 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1351,8 +1351,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, module_put(ops->owner); err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); - if (err > 0) - return 0; if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification"); @@ -1423,8 +1421,6 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); - if (ret > 0) - return 0; return ret; } @@ -1481,7 +1477,6 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; - int err = 0; skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, GFP_KERNEL); @@ -1495,11 +1490,8 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], return -EINVAL; } - err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); - if (err > 0) - err = 0; - return err; + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); } static int tcf_action_add(struct net *net, struct nlattr *nla, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e3e79e9bd706..1167cd0be179 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1870,13 +1870,10 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, } if (unicast) - err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + err = rtnl_unicast(skb, net, portid); else err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); - - if (err > 0) - err = 0; return err; } @@ -1909,15 +1906,13 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, } if (unicast) - err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + err = rtnl_unicast(skb, net, portid); else err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter delete notification"); - if (err > 0) - err = 0; return err; } @@ -2711,13 +2706,11 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, } if (unicast) - err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + err = rtnl_unicast(skb, net, portid); else err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); - if (err > 0) - err = 0; return err; } @@ -2741,7 +2734,7 @@ static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, } if (unicast) - return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + return rtnl_unicast(skb, net, portid); return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f87d07736a14..5e90e9b160e3 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1845,7 +1845,6 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; - int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) @@ -1856,11 +1855,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, return -EINVAL; } - err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); - if (err > 0) - err = 0; - return err; + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); } static int tclass_del_notify(struct net *net, @@ -1894,8 +1890,6 @@ static int tclass_del_notify(struct net *net, err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); - if (err > 0) - err = 0; return err; } diff --git a/net/socket.c b/net/socket.c index 0b2dad3bdf7f..84de89c1ee9d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1088,6 +1088,8 @@ EXPORT_SYMBOL(vlan_ioctl_set); static long sock_do_ioctl(struct net *net, struct socket *sock, unsigned int cmd, unsigned long arg) { + struct ifreq ifr; + bool need_copyout; int err; void __user *argp = (void __user *)arg; @@ -1100,25 +1102,13 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, if (err != -ENOIOCTLCMD) return err; - if (cmd == SIOCGIFCONF) { - struct ifconf ifc; - if (copy_from_user(&ifc, argp, sizeof(struct ifconf))) - return -EFAULT; - rtnl_lock(); - err = dev_ifconf(net, &ifc, sizeof(struct ifreq)); - rtnl_unlock(); - if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf))) - err = -EFAULT; - } else { - struct ifreq ifr; - bool need_copyout; - if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, &need_copyout); + if (!err && need_copyout) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) return -EFAULT; - err = dev_ioctl(net, cmd, &ifr, &need_copyout); - if (!err && need_copyout) - if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) - return -EFAULT; - } + return err; } @@ -1217,6 +1207,11 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) cmd == SIOCGSTAMP_NEW, false); break; + + case SIOCGIFCONF: + err = dev_ifconf(net, argp); + break; + default: err = sock_do_ioctl(net, sock, cmd, arg); break; @@ -3126,154 +3121,55 @@ void socket_seq_show(struct seq_file *seq) } #endif /* CONFIG_PROC_FS */ -#ifdef CONFIG_COMPAT -static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) +/* Handle the fact that while struct ifreq has the same *layout* on + * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, + * which are handled elsewhere, it still has different *size* due to + * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, + * resulting in struct ifreq being 32 and 40 bytes respectively). + * As a result, if the struct happens to be at the end of a page and + * the next page isn't readable/writable, we get a fault. To prevent + * that, copy back and forth to the full size. + */ +int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg) { - struct compat_ifconf ifc32; - struct ifconf ifc; - int err; + if (in_compat_syscall()) { + struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr; - if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) - return -EFAULT; + memset(ifr, 0, sizeof(*ifr)); + if (copy_from_user(ifr32, arg, sizeof(*ifr32))) + return -EFAULT; - ifc.ifc_len = ifc32.ifc_len; - ifc.ifc_req = compat_ptr(ifc32.ifcbuf); + if (ifrdata) + *ifrdata = compat_ptr(ifr32->ifr_data); - rtnl_lock(); - err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq)); - rtnl_unlock(); - if (err) - return err; + return 0; + } - ifc32.ifc_len = ifc.ifc_len; - if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) + if (copy_from_user(ifr, arg, sizeof(*ifr))) return -EFAULT; + if (ifrdata) + *ifrdata = ifr->ifr_data; + return 0; } +EXPORT_SYMBOL(get_user_ifreq); -static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) +int put_user_ifreq(struct ifreq *ifr, void __user *arg) { - struct compat_ethtool_rxnfc __user *compat_rxnfc; - bool convert_in = false, convert_out = false; - size_t buf_size = 0; - struct ethtool_rxnfc __user *rxnfc = NULL; - struct ifreq ifr; - u32 rule_cnt = 0, actual_rule_cnt; - u32 ethcmd; - u32 data; - int ret; - - if (get_user(data, &ifr32->ifr_ifru.ifru_data)) - return -EFAULT; + size_t size = sizeof(*ifr); - compat_rxnfc = compat_ptr(data); + if (in_compat_syscall()) + size = sizeof(struct compat_ifreq); - if (get_user(ethcmd, &compat_rxnfc->cmd)) + if (copy_to_user(arg, ifr, size)) return -EFAULT; - /* Most ethtool structures are defined without padding. - * Unfortunately struct ethtool_rxnfc is an exception. - */ - switch (ethcmd) { - default: - break; - case ETHTOOL_GRXCLSRLALL: - /* Buffer size is variable */ - if (get_user(rule_cnt, &compat_rxnfc->rule_cnt)) - return -EFAULT; - if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) - return -ENOMEM; - buf_size += rule_cnt * sizeof(u32); - fallthrough; - case ETHTOOL_GRXRINGS: - case ETHTOOL_GRXCLSRLCNT: - case ETHTOOL_GRXCLSRULE: - case ETHTOOL_SRXCLSRLINS: - convert_out = true; - fallthrough; - case ETHTOOL_SRXCLSRLDEL: - buf_size += sizeof(struct ethtool_rxnfc); - convert_in = true; - rxnfc = compat_alloc_user_space(buf_size); - break; - } - - if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ)) - return -EFAULT; - - ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc; - - if (convert_in) { - /* We expect there to be holes between fs.m_ext and - * fs.ring_cookie and at the end of fs, but nowhere else. - */ - BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) + - sizeof(compat_rxnfc->fs.m_ext) != - offsetof(struct ethtool_rxnfc, fs.m_ext) + - sizeof(rxnfc->fs.m_ext)); - BUILD_BUG_ON( - offsetof(struct compat_ethtool_rxnfc, fs.location) - - offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != - offsetof(struct ethtool_rxnfc, fs.location) - - offsetof(struct ethtool_rxnfc, fs.ring_cookie)); - - if (copy_in_user(rxnfc, compat_rxnfc, - (void __user *)(&rxnfc->fs.m_ext + 1) - - (void __user *)rxnfc) || - copy_in_user(&rxnfc->fs.ring_cookie, - &compat_rxnfc->fs.ring_cookie, - (void __user *)(&rxnfc->fs.location + 1) - - (void __user *)&rxnfc->fs.ring_cookie)) - return -EFAULT; - if (ethcmd == ETHTOOL_GRXCLSRLALL) { - if (put_user(rule_cnt, &rxnfc->rule_cnt)) - return -EFAULT; - } else if (copy_in_user(&rxnfc->rule_cnt, - &compat_rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) - return -EFAULT; - } - - ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL); - if (ret) - return ret; - - if (convert_out) { - if (copy_in_user(compat_rxnfc, rxnfc, - (const void __user *)(&rxnfc->fs.m_ext + 1) - - (const void __user *)rxnfc) || - copy_in_user(&compat_rxnfc->fs.ring_cookie, - &rxnfc->fs.ring_cookie, - (const void __user *)(&rxnfc->fs.location + 1) - - (const void __user *)&rxnfc->fs.ring_cookie) || - copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) - return -EFAULT; - - if (ethcmd == ETHTOOL_GRXCLSRLALL) { - /* As an optimisation, we only copy the actual - * number of rules that the underlying - * function returned. Since Mallory might - * change the rule count in user memory, we - * check that it is less than the rule count - * originally given (as the user buffer size), - * which has been range-checked. - */ - if (get_user(actual_rule_cnt, &rxnfc->rule_cnt)) - return -EFAULT; - if (actual_rule_cnt < rule_cnt) - rule_cnt = actual_rule_cnt; - if (copy_in_user(&compat_rxnfc->rule_locs[0], - &rxnfc->rule_locs[0], - rule_cnt * sizeof(u32))) - return -EFAULT; - } - } - return 0; } +EXPORT_SYMBOL(put_user_ifreq); +#ifdef CONFIG_COMPAT static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) { compat_uptr_t uptr32; @@ -3281,7 +3177,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32 void __user *saved; int err; - if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq))) + if (get_user_ifreq(&ifr, NULL, uifr32)) return -EFAULT; if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) @@ -3293,7 +3189,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL); if (!err) { ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved; - if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq))) + if (put_user_ifreq(&ifr, uifr32)) err = -EFAULT; } return err; @@ -3317,83 +3213,28 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, static int compat_ifreq_ioctl(struct net *net, struct socket *sock, unsigned int cmd, + unsigned long arg, struct compat_ifreq __user *uifr32) { - struct ifreq __user *uifr; + struct ifreq ifr; + bool need_copyout; int err; - /* Handle the fact that while struct ifreq has the same *layout* on - * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, - * which are handled elsewhere, it still has different *size* due to - * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, - * resulting in struct ifreq being 32 and 40 bytes respectively). - * As a result, if the struct happens to be at the end of a page and - * the next page isn't readable/writable, we get a fault. To prevent - * that, copy back and forth to the full size. - */ - - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) - return -EFAULT; - - err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); - - if (!err) { - switch (cmd) { - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFMEM: - case SIOCGIFHWADDR: - case SIOCGIFINDEX: - case SIOCGIFADDR: - case SIOCGIFBRDADDR: - case SIOCGIFDSTADDR: - case SIOCGIFNETMASK: - case SIOCGIFPFLAGS: - case SIOCGIFTXQLEN: - case SIOCGMIIPHY: - case SIOCGMIIREG: - case SIOCGIFNAME: - if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) - err = -EFAULT; - break; - } - } - return err; -} + err = sock->ops->ioctl(sock, cmd, arg); -static int compat_sioc_ifmap(struct net *net, unsigned int cmd, - struct compat_ifreq __user *uifr32) -{ - struct ifreq ifr; - struct compat_ifmap __user *uifmap32; - int err; + /* If this ioctl is unknown try to hand it down + * to the NIC driver. + */ + if (err != -ENOIOCTLCMD) + return err; - uifmap32 = &uifr32->ifr_ifru.ifru_map; - err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); - err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= get_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= get_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= get_user(ifr.ifr_map.port, &uifmap32->port); - if (err) + if (get_user_ifreq(&ifr, NULL, uifr32)) return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, &need_copyout); + if (!err && need_copyout) + if (put_user_ifreq(&ifr, uifr32)) + return -EFAULT; - err = dev_ioctl(net, cmd, &ifr, NULL); - - if (cmd == SIOCGIFMAP && !err) { - err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); - err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= put_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= put_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= put_user(ifr.ifr_map.port, &uifmap32->port); - if (err) - err = -EFAULT; - } return err; } @@ -3426,15 +3267,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSIFBR: case SIOCGIFBR: return old_bridge_ioctl(argp); - case SIOCGIFCONF: - return compat_dev_ifconf(net, argp); - case SIOCETHTOOL: - return ethtool_ioctl(net, argp); case SIOCWANDEV: return compat_siocwandev(net, argp); - case SIOCGIFMAP: - case SIOCSIFMAP: - return compat_sioc_ifmap(net, cmd, argp); case SIOCGSTAMP_OLD: case SIOCGSTAMPNS_OLD: if (!sock->ops->gettstamp) @@ -3442,6 +3276,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD, !COMPAT_USE_64BIT_TIME); + case SIOCETHTOOL: case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: case SIOCSHWTSTAMP: @@ -3459,10 +3294,13 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGSKNS: case SIOCGSTAMP_NEW: case SIOCGSTAMPNS_NEW: + case SIOCGIFCONF: return sock_ioctl(file, cmd, arg); case SIOCGIFFLAGS: case SIOCSIFFLAGS: + case SIOCGIFMAP: + case SIOCSIFMAP: case SIOCGIFMETRIC: case SIOCSIFMETRIC: case SIOCGIFMTU: @@ -3499,7 +3337,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: - return compat_ifreq_ioctl(net, sock, cmd, argp); + return compat_ifreq_ioctl(net, sock, cmd, arg, argp); case SIOCSARP: case SIOCGARP: diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 070698dd19bc..0ae3478561f4 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -378,6 +378,266 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, } EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers); +struct switchdev_nested_priv { + bool (*check_cb)(const struct net_device *dev); + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev); + const struct net_device *dev; + struct net_device *lower_dev; +}; + +static int switchdev_lower_dev_walk(struct net_device *lower_dev, + struct netdev_nested_priv *priv) +{ + struct switchdev_nested_priv *switchdev_priv = priv->data; + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev); + bool (*check_cb)(const struct net_device *dev); + const struct net_device *dev; + + check_cb = switchdev_priv->check_cb; + foreign_dev_check_cb = switchdev_priv->foreign_dev_check_cb; + dev = switchdev_priv->dev; + + if (check_cb(lower_dev) && !foreign_dev_check_cb(lower_dev, dev)) { + switchdev_priv->lower_dev = lower_dev; + return 1; + } + + return 0; +} + +static struct net_device * +switchdev_lower_dev_find(struct net_device *dev, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev)) +{ + struct switchdev_nested_priv switchdev_priv = { + .check_cb = check_cb, + .foreign_dev_check_cb = foreign_dev_check_cb, + .dev = dev, + .lower_dev = NULL, + }; + struct netdev_nested_priv priv = { + .data = &switchdev_priv, + }; + + netdev_walk_all_lower_dev_rcu(dev, switchdev_lower_dev_walk, &priv); + + return switchdev_priv.lower_dev; +} + +static int __switchdev_handle_fdb_add_to_device(struct net_device *dev, + const struct net_device *orig_dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + const struct switchdev_notifier_info *info = &fdb_info->info; + struct net_device *br, *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (check_cb(dev)) + return add_cb(dev, orig_dev, info->ctx, fdb_info); + + if (netif_is_lag_master(dev)) { + if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb)) + goto maybe_bridged_with_us; + + /* This is a LAG interface that we offload */ + if (!lag_add_cb) + return -EOPNOTSUPP; + + return lag_add_cb(dev, orig_dev, info->ctx, fdb_info); + } + + /* Recurse through lower interfaces in case the FDB entry is pointing + * towards a bridge device. + */ + if (netif_is_bridge_master(dev)) { + if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb)) + return 0; + + /* This is a bridge interface that we offload */ + netdev_for_each_lower_dev(dev, lower_dev, iter) { + /* Do not propagate FDB entries across bridges */ + if (netif_is_bridge_master(lower_dev)) + continue; + + /* Bridge ports might be either us, or LAG interfaces + * that we offload. + */ + if (!check_cb(lower_dev) && + !switchdev_lower_dev_find(lower_dev, check_cb, + foreign_dev_check_cb)) + continue; + + err = __switchdev_handle_fdb_add_to_device(lower_dev, orig_dev, + fdb_info, check_cb, + foreign_dev_check_cb, + add_cb, lag_add_cb); + if (err && err != -EOPNOTSUPP) + return err; + } + + return 0; + } + +maybe_bridged_with_us: + /* Event is neither on a bridge nor a LAG. Check whether it is on an + * interface that is in a bridge with us. + */ + br = netdev_master_upper_dev_get_rcu(dev); + if (!br || !netif_is_bridge_master(br)) + return 0; + + if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb)) + return 0; + + return __switchdev_handle_fdb_add_to_device(br, orig_dev, fdb_info, + check_cb, foreign_dev_check_cb, + add_cb, lag_add_cb); +} + +int switchdev_handle_fdb_add_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_add_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + int err; + + err = __switchdev_handle_fdb_add_to_device(dev, dev, fdb_info, + check_cb, + foreign_dev_check_cb, + add_cb, lag_add_cb); + if (err == -EOPNOTSUPP) + err = 0; + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_handle_fdb_add_to_device); + +static int __switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct net_device *orig_dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + const struct switchdev_notifier_info *info = &fdb_info->info; + struct net_device *br, *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (check_cb(dev)) + return del_cb(dev, orig_dev, info->ctx, fdb_info); + + if (netif_is_lag_master(dev)) { + if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb)) + goto maybe_bridged_with_us; + + /* This is a LAG interface that we offload */ + if (!lag_del_cb) + return -EOPNOTSUPP; + + return lag_del_cb(dev, orig_dev, info->ctx, fdb_info); + } + + /* Recurse through lower interfaces in case the FDB entry is pointing + * towards a bridge device. + */ + if (netif_is_bridge_master(dev)) { + if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb)) + return 0; + + /* This is a bridge interface that we offload */ + netdev_for_each_lower_dev(dev, lower_dev, iter) { + /* Do not propagate FDB entries across bridges */ + if (netif_is_bridge_master(lower_dev)) + continue; + + /* Bridge ports might be either us, or LAG interfaces + * that we offload. + */ + if (!check_cb(lower_dev) && + !switchdev_lower_dev_find(lower_dev, check_cb, + foreign_dev_check_cb)) + continue; + + err = __switchdev_handle_fdb_del_to_device(lower_dev, orig_dev, + fdb_info, check_cb, + foreign_dev_check_cb, + del_cb, lag_del_cb); + if (err && err != -EOPNOTSUPP) + return err; + } + + return 0; + } + +maybe_bridged_with_us: + /* Event is neither on a bridge nor a LAG. Check whether it is on an + * interface that is in a bridge with us. + */ + br = netdev_master_upper_dev_get_rcu(dev); + if (!br || !netif_is_bridge_master(br)) + return 0; + + if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb)) + return 0; + + return __switchdev_handle_fdb_del_to_device(br, orig_dev, fdb_info, + check_cb, foreign_dev_check_cb, + del_cb, lag_del_cb); +} + +int switchdev_handle_fdb_del_to_device(struct net_device *dev, + const struct switchdev_notifier_fdb_info *fdb_info, + bool (*check_cb)(const struct net_device *dev), + bool (*foreign_dev_check_cb)(const struct net_device *dev, + const struct net_device *foreign_dev), + int (*del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info), + int (*lag_del_cb)(struct net_device *dev, + const struct net_device *orig_dev, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info)) +{ + int err; + + err = __switchdev_handle_fdb_del_to_device(dev, dev, fdb_info, + check_cb, + foreign_dev_check_cb, + del_cb, lag_del_cb); + if (err == -EOPNOTSUPP) + err = 0; + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_handle_fdb_del_to_device); + static int __switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 75b99b7eda22..b15b2b1b2f38 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1886,6 +1886,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, bool connected = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); int rc, err, hlen, dlen, copy; + struct tipc_skb_cb *skb_cb; struct sk_buff_head xmitq; struct tipc_msg *hdr; struct sk_buff *skb; @@ -1909,6 +1910,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, if (unlikely(rc)) goto exit; skb = skb_peek(&sk->sk_receive_queue); + skb_cb = TIPC_SKB_CB(skb); hdr = buf_msg(skb); dlen = msg_data_sz(hdr); hlen = msg_hdr_sz(hdr); @@ -1928,18 +1930,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, /* Capture data if non-error msg, otherwise just set return value */ if (likely(!err)) { - copy = min_t(int, dlen, buflen); - if (unlikely(copy != dlen)) - m->msg_flags |= MSG_TRUNC; - rc = skb_copy_datagram_msg(skb, hlen, m, copy); + int offset = skb_cb->bytes_read; + + copy = min_t(int, dlen - offset, buflen); + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); + if (unlikely(rc)) + goto exit; + if (unlikely(offset + copy < dlen)) { + if (flags & MSG_EOR) { + if (!(flags & MSG_PEEK)) + skb_cb->bytes_read = offset + copy; + } else { + m->msg_flags |= MSG_TRUNC; + skb_cb->bytes_read = 0; + } + } else { + if (flags & MSG_EOR) + m->msg_flags |= MSG_EOR; + skb_cb->bytes_read = 0; + } } else { copy = 0; rc = 0; - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { rc = -ECONNRESET; + goto exit; + } } - if (unlikely(rc)) - goto exit; /* Mark message as group event if applicable */ if (unlikely(grp_evt)) { @@ -1962,6 +1979,9 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, tipc_node_distr_xmit(sock_net(sk), &xmitq); } + if (skb_cb->bytes_read) + goto exit; + tsk_advance_rx_queue(sk); if (likely(!connected)) diff --git a/net/unix/Makefile b/net/unix/Makefile index 54e58cc4f945..20491825b4d0 100644 --- a/net/unix/Makefile +++ b/net/unix/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_UNIX) += unix.o unix-y := af_unix.o garbage.o unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o +unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o obj-$(CONFIG_UNIX_DIAG) += unix_diag.o unix_diag-y := diag.o diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 23c92ad15c61..89927678c0dc 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -494,6 +494,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) sk_error_report(other); } } + sk->sk_state = other->sk_state = TCP_CLOSE; } static void unix_sock_destructor(struct sock *sk) @@ -669,6 +670,8 @@ static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, unsigned int flags); static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor); static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int); static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); @@ -746,6 +749,7 @@ static const struct proto_ops unix_dgram_ops = { .listen = sock_no_listen, .shutdown = unix_shutdown, .sendmsg = unix_dgram_sendmsg, + .read_sock = unix_read_sock, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, @@ -777,10 +781,21 @@ static const struct proto_ops unix_seqpacket_ops = { .show_fdinfo = unix_show_fdinfo, }; -static struct proto unix_proto = { +static void unix_close(struct sock *sk, long timeout) +{ + /* Nothing to do here, unix socket does not need a ->close(). + * This is merely for sockmap. + */ +} + +struct proto unix_proto = { .name = "UNIX", .owner = THIS_MODULE, .obj_size = sizeof(struct unix_sock), + .close = unix_close, +#ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = unix_bpf_update_proto, +#endif }; static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) @@ -864,6 +879,7 @@ static int unix_release(struct socket *sock) if (!sk) return 0; + sk->sk_prot->close(sk, 0); unix_release_sock(sk, 0); sock->sk = NULL; @@ -1199,6 +1215,9 @@ restart: unix_peer(sk) = other; unix_state_double_unlock(sk, other); } + + if (unix_peer(sk)) + sk->sk_state = other->sk_state = TCP_ESTABLISHED; return 0; out_unlock: @@ -1431,12 +1450,10 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) init_peercred(ska); init_peercred(skb); - if (ska->sk_type != SOCK_DGRAM) { - ska->sk_state = TCP_ESTABLISHED; - skb->sk_state = TCP_ESTABLISHED; - socka->state = SS_CONNECTED; - sockb->state = SS_CONNECTED; - } + ska->sk_state = TCP_ESTABLISHED; + skb->sk_state = TCP_ESTABLISHED; + socka->state = SS_CONNECTED; + sockb->state = SS_CONNECTED; return 0; } @@ -2081,11 +2098,11 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) } } -static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, - size_t size, int flags) +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, + int flags) { struct scm_cookie scm; - struct sock *sk = sock->sk; + struct socket *sock = sk->sk_socket; struct unix_sock *u = unix_sk(sk); struct sk_buff *skb, *last; long timeo; @@ -2188,6 +2205,53 @@ out: return err; } +static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) +{ + struct sock *sk = sock->sk; + +#ifdef CONFIG_BPF_SYSCALL + if (sk->sk_prot != &unix_proto) + return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, + flags & ~MSG_DONTWAIT, NULL); +#endif + return __unix_dgram_recvmsg(sk, msg, size, flags); +} + +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor) +{ + int copied = 0; + + while (1) { + struct unix_sock *u = unix_sk(sk); + struct sk_buff *skb; + int used, err; + + mutex_lock(&u->iolock); + skb = skb_recv_datagram(sk, 0, 1, &err); + mutex_unlock(&u->iolock); + if (!skb) + return err; + + used = recv_actor(desc, skb, 0, skb->len); + if (used <= 0) { + if (!copied) + copied = used; + kfree_skb(skb); + break; + } else if (used <= skb->len) { + copied += used; + } + + kfree_skb(skb); + if (!desc->count) + break; + } + + return copied; +} + /* * Sleep until more data has arrived. But check for races.. */ @@ -2925,6 +2989,7 @@ static int __init af_unix_init(void) sock_register(&unix_family_ops); register_pernet_subsys(&unix_net_ops); + unix_bpf_build_proto(); out: return rc; } diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c new file mode 100644 index 000000000000..db0cda29fb2f --- /dev/null +++ b/net/unix/unix_bpf.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */ + +#include <linux/skmsg.h> +#include <linux/bpf.h> +#include <net/sock.h> +#include <net/af_unix.h> + +#define unix_sk_has_data(__sk, __psock) \ + ({ !skb_queue_empty(&__sk->sk_receive_queue) || \ + !skb_queue_empty(&__psock->ingress_skb) || \ + !list_empty(&__psock->ingress_msg); \ + }) + +static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock, + long timeo) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct unix_sock *u = unix_sk(sk); + int ret = 0; + + if (sk->sk_shutdown & RCV_SHUTDOWN) + return 1; + + if (!timeo) + return ret; + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + if (!unix_sk_has_data(sk, psock)) { + mutex_unlock(&u->iolock); + wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + mutex_lock(&u->iolock); + ret = unix_sk_has_data(sk, psock); + } + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + return ret; +} + +static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg, + size_t len, int nonblock, int flags, + int *addr_len) +{ + struct unix_sock *u = unix_sk(sk); + struct sk_psock *psock; + int copied, ret; + + psock = sk_psock_get(sk); + if (unlikely(!psock)) + return __unix_dgram_recvmsg(sk, msg, len, flags); + + mutex_lock(&u->iolock); + if (!skb_queue_empty(&sk->sk_receive_queue) && + sk_psock_queue_empty(psock)) { + ret = __unix_dgram_recvmsg(sk, msg, len, flags); + goto out; + } + +msg_bytes_ready: + copied = sk_msg_recvmsg(sk, psock, msg, len, flags); + if (!copied) { + long timeo; + int data; + + timeo = sock_rcvtimeo(sk, nonblock); + data = unix_msg_wait_data(sk, psock, timeo); + if (data) { + if (!sk_psock_queue_empty(psock)) + goto msg_bytes_ready; + ret = __unix_dgram_recvmsg(sk, msg, len, flags); + goto out; + } + copied = -EAGAIN; + } + ret = copied; +out: + mutex_unlock(&u->iolock); + sk_psock_put(sk, psock); + return ret; +} + +static struct proto *unix_prot_saved __read_mostly; +static DEFINE_SPINLOCK(unix_prot_lock); +static struct proto unix_bpf_prot; + +static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base) +{ + *prot = *base; + prot->close = sock_map_close; + prot->recvmsg = unix_dgram_bpf_recvmsg; +} + +static void unix_bpf_check_needs_rebuild(struct proto *ops) +{ + if (unlikely(ops != smp_load_acquire(&unix_prot_saved))) { + spin_lock_bh(&unix_prot_lock); + if (likely(ops != unix_prot_saved)) { + unix_bpf_rebuild_protos(&unix_bpf_prot, ops); + smp_store_release(&unix_prot_saved, ops); + } + spin_unlock_bh(&unix_prot_lock); + } +} + +int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) +{ + if (restore) { + sk->sk_write_space = psock->saved_write_space; + WRITE_ONCE(sk->sk_prot, psock->sk_proto); + return 0; + } + + unix_bpf_check_needs_rebuild(psock->sk_proto); + WRITE_ONCE(sk->sk_prot, &unix_bpf_prot); + return 0; +} + +void __init unix_bpf_build_proto(void) +{ + unix_bpf_rebuild_protos(&unix_bpf_prot, &unix_proto); +} diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 576411612523..d3ecdc18b9c1 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -792,13 +792,23 @@ int main(int argc, char **argv) n_cpus = get_nprocs_conf(); - /* Notice: choosing he queue size is very important with the - * ixgbe driver, because it's driver page recycling trick is - * dependend on pages being returned quickly. The number of - * out-standing packets in the system must be less-than 2x - * RX-ring size. + /* Notice: Choosing the queue size is very important when CPU is + * configured with power-saving states. + * + * If deepest state take 133 usec to wakeup from (133/10^6). When link + * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can + * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) = + * 166250 bytes. With MTU size packets this is 110 packets, and with + * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets. + * + * Setting default cpumap queue to 2048 as worst-case (small packet) + * should be +64 packet due kthread wakeup call (due to xdp_do_flush) + * worst-case is 2043 packets. + * + * Sysadm can configured system to avoid deep-sleep via: + * tuned-adm profile network-latency */ - qsize = 128+64; + qsize = 2048; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 2d94025b38e9..00ac7b79cddb 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -547,6 +547,7 @@ class PrinterHelpers(Printer): 'struct inode', 'struct socket', 'struct file', + 'struct bpf_timer', ] known_types = { '...', @@ -594,6 +595,7 @@ class PrinterHelpers(Printer): 'struct inode', 'struct socket', 'struct file', + 'struct bpf_timer', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bf9252c7381e..2db6925e04f4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -324,9 +324,6 @@ union bpf_iter_link_info { * **BPF_PROG_TYPE_SK_LOOKUP** * *data_in* and *data_out* must be NULL. * - * **BPF_PROG_TYPE_XDP** - * *ctx_in* and *ctx_out* must be NULL. - * * **BPF_PROG_TYPE_RAW_TRACEPOINT**, * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** * @@ -3249,7 +3246,7 @@ union bpf_attr { * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. * It checks the selected socket is matching the incoming * request in the socket buffer. * Return @@ -4780,6 +4777,76 @@ union bpf_attr { * Execute close syscall for given FD. * Return * A syscall result. + * + * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags) + * Description + * Initialize the timer. + * First 4 bits of *flags* specify clockid. + * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. + * All other bits of *flags* are reserved. + * The verifier will reject the program if *timer* is not from + * the same *map*. + * Return + * 0 on success. + * **-EBUSY** if *timer* is already initialized. + * **-EINVAL** if invalid *flags* are passed. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn) + * Description + * Configure the timer to call *callback_fn* static function. + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags) + * Description + * Set timer expiration N nanoseconds from the current time. The + * configured callback will be invoked in soft irq context on some cpu + * and will not repeat unless another bpf_timer_start() is made. + * In such case the next invocation can migrate to a different cpu. + * Since struct bpf_timer is a field inside map element the map + * owns the timer. The bpf_timer_set_callback() will increment refcnt + * of BPF program to make sure that callback_fn code stays valid. + * When user space reference to a map reaches zero all timers + * in a map are cancelled and corresponding program's refcnts are + * decremented. This is done to make sure that Ctrl-C of a user + * process doesn't leave any timers running. If map is pinned in + * bpffs the callback_fn can re-arm itself indefinitely. + * bpf_map_update/delete_elem() helpers and user space sys_bpf commands + * cancel and free the timer in the given map element. + * The map can contain timers that invoke callback_fn-s from different + * programs. The same callback_fn can serve different timers from + * different maps if key/value layout matches across maps. + * Every bpf_timer_set_callback() can have different callback_fn. + * + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier + * or invalid *flags* are passed. + * + * long bpf_timer_cancel(struct bpf_timer *timer) + * Description + * Cancel the timer and wait for callback_fn to finish if it was running. + * Return + * 0 if the timer was not active. + * 1 if the timer was active. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its + * own timer which would have led to a deadlock otherwise. + * + * u64 bpf_get_func_ip(void *ctx) + * Description + * Get address of the traced function (for tracing and kprobe programs). + * Return + * Address of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4951,6 +5018,11 @@ union bpf_attr { FN(sys_bpf), \ FN(btf_find_by_name_kind), \ FN(sys_close), \ + FN(timer_init), \ + FN(timer_set_callback), \ + FN(timer_start), \ + FN(timer_cancel), \ + FN(get_func_ip), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6077,6 +6149,11 @@ struct bpf_spin_lock { __u32 val; }; +struct bpf_timer { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6f5e2757bb3c..4ccfae30e681 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3894,6 +3894,42 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) return 0; } +static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) +{ + char file[PATH_MAX], buff[4096]; + FILE *fp; + __u32 val; + int err; + + snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); + memset(info, 0, sizeof(*info)); + + fp = fopen(file, "r"); + if (!fp) { + err = -errno; + pr_warn("failed to open %s: %d. No procfs support?\n", file, + err); + return err; + } + + while (fgets(buff, sizeof(buff), fp)) { + if (sscanf(buff, "map_type:\t%u", &val) == 1) + info->type = val; + else if (sscanf(buff, "key_size:\t%u", &val) == 1) + info->key_size = val; + else if (sscanf(buff, "value_size:\t%u", &val) == 1) + info->value_size = val; + else if (sscanf(buff, "max_entries:\t%u", &val) == 1) + info->max_entries = val; + else if (sscanf(buff, "map_flags:\t%i", &val) == 1) + info->map_flags = val; + } + + fclose(fp); + + return 0; +} + int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info = {}; @@ -3902,6 +3938,8 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) char *new_name; err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err && errno == EINVAL) + err = bpf_get_map_info_from_fdinfo(fd, &info); if (err) return libbpf_err(err); @@ -4381,12 +4419,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) struct bpf_map_info map_info = {}; char msg[STRERR_BUFSIZE]; __u32 map_info_len; + int err; map_info_len = sizeof(map_info); - if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) { - pr_warn("failed to get map info for map FD %d: %s\n", - map_fd, libbpf_strerror_r(errno, msg, sizeof(msg))); + err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len); + if (err && errno == EINVAL) + err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); + if (err) { + pr_warn("failed to get map info for map FD %d: %s\n", map_fd, + libbpf_strerror_r(errno, msg, sizeof(msg))); return false; } @@ -10304,19 +10346,25 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, return pfd; } -struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, - bool retprobe, - const char *func_name) +struct bpf_program_attach_kprobe_opts { + bool retprobe; + unsigned long offset; +}; + +static struct bpf_link* +bpf_program__attach_kprobe_opts(struct bpf_program *prog, + const char *func_name, + struct bpf_program_attach_kprobe_opts *opts) { char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int pfd, err; - pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, - 0 /* offset */, -1 /* pid */); + pfd = perf_event_open_probe(false /* uprobe */, opts->retprobe, func_name, + opts->offset, -1 /* pid */); if (pfd < 0) { pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n", - prog->name, retprobe ? "kretprobe" : "kprobe", func_name, + prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return libbpf_err_ptr(pfd); } @@ -10325,23 +10373,53 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, if (err) { close(pfd); pr_warn("prog '%s': failed to attach to %s '%s': %s\n", - prog->name, retprobe ? "kretprobe" : "kprobe", func_name, + prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return libbpf_err_ptr(err); } return link; } +struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, + bool retprobe, + const char *func_name) +{ + struct bpf_program_attach_kprobe_opts opts = { + .retprobe = retprobe, + }; + + return bpf_program__attach_kprobe_opts(prog, func_name, &opts); +} + static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, struct bpf_program *prog) { + struct bpf_program_attach_kprobe_opts opts; + unsigned long offset = 0; + struct bpf_link *link; const char *func_name; - bool retprobe; + char *func; + int n, err; func_name = prog->sec_name + sec->len; - retprobe = strcmp(sec->sec, "kretprobe/") == 0; + opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0; + + n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%lx", &func, &offset); + if (n < 1) { + err = -EINVAL; + pr_warn("kprobe name is invalid: %s\n", func_name); + return libbpf_err_ptr(err); + } + if (opts.retprobe && offset != 0) { + err = -EINVAL; + pr_warn("kretprobes do not support offset specification\n"); + return libbpf_err_ptr(err); + } - return bpf_program__attach_kprobe(prog, retprobe, func_name); + opts.offset = offset; + link = bpf_program__attach_kprobe_opts(prog, func, &opts); + free(func); + return link; } struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c new file mode 100644 index 000000000000..088b3653610d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "get_func_ip_test.skel.h" + +void test_get_func_ip_test(void) +{ + struct get_func_ip_test *skel = NULL; + __u32 duration = 0, retval; + int err, prog_fd; + + skel = get_func_ip_test__open(); + if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open")) + return; + + /* test6 is x86_64 specifc because of the instruction + * offset, disabling it for all other archs + */ +#ifndef __x86_64__ + bpf_program__set_autoload(skel->progs.test6, false); +#endif + + err = get_func_ip_test__load(skel); + if (!ASSERT_OK(err, "get_func_ip_test__load")) + goto cleanup; + + err = get_func_ip_test__attach(skel); + if (!ASSERT_OK(err, "get_func_ip_test__attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(retval, 0, "test_run"); + + prog_fd = bpf_program__fd(skel->progs.test5); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + + ASSERT_OK(err, "test_run"); + + ASSERT_EQ(skel->bss->test1_result, 1, "test1_result"); + ASSERT_EQ(skel->bss->test2_result, 1, "test2_result"); + ASSERT_EQ(skel->bss->test3_result, 1, "test3_result"); + ASSERT_EQ(skel->bss->test4_result, 1, "test4_result"); + ASSERT_EQ(skel->bss->test5_result, 1, "test5_result"); +#ifdef __x86_64__ + ASSERT_EQ(skel->bss->test6_result, 1, "test6_result"); +#endif + +cleanup: + get_func_ip_test__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 515229f24a93..a9f1bf9d5dff 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -351,9 +351,11 @@ static void test_insert_opened(int family, int sotype, int mapfd) errno = 0; value = s; err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); - if (!err || errno != EOPNOTSUPP) - FAIL_ERRNO("map_update: expected EOPNOTSUPP"); - + if (sotype == SOCK_STREAM) { + if (!err || errno != EOPNOTSUPP) + FAIL_ERRNO("map_update: expected EOPNOTSUPP"); + } else if (err) + FAIL_ERRNO("map_update: expected success"); xclose(s); } @@ -919,6 +921,23 @@ static const char *redir_mode_str(enum redir_mode mode) } } +static int add_to_sockmap(int sock_mapfd, int fd1, int fd2) +{ + u64 value; + u32 key; + int err; + + key = 0; + value = fd1; + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + if (err) + return err; + + key = 1; + value = fd2; + return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); +} + static void redir_to_connected(int family, int sotype, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { @@ -928,7 +947,6 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, unsigned int pass; socklen_t len; int err, n; - u64 value; u32 key; char b; @@ -965,15 +983,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, if (p1 < 0) goto close_cli1; - key = 0; - value = p0; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); - if (err) - goto close_peer1; - - key = 1; - value = p1; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + err = add_to_sockmap(sock_mapfd, p0, p1); if (err) goto close_peer1; @@ -1061,7 +1071,6 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd, int s, c, p, err, n; unsigned int drop; socklen_t len; - u64 value; u32 key; zero_verdict_count(verd_mapfd); @@ -1086,15 +1095,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd, if (p < 0) goto close_cli; - key = 0; - value = s; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); - if (err) - goto close_peer; - - key = 1; - value = p; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + err = add_to_sockmap(sock_mapfd, s, p); if (err) goto close_peer; @@ -1346,7 +1347,6 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, int s1, s2, c, err; unsigned int drop; socklen_t len; - u64 value; u32 key; zero_verdict_count(verd_map); @@ -1360,16 +1360,10 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, if (s2 < 0) goto close_srv1; - key = 0; - value = s1; - err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); + err = add_to_sockmap(sock_map, s1, s2); if (err) goto close_srv2; - key = 1; - value = s2; - err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); - /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */ len = sizeof(addr); err = xgetsockname(s2, sockaddr(&addr), &len); @@ -1441,6 +1435,8 @@ static const char *family_str(sa_family_t family) return "IPv4"; case AF_INET6: return "IPv6"; + case AF_UNIX: + return "Unix"; default: return "unknown"; } @@ -1563,6 +1559,99 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, } } +static void unix_redir_to_connected(int sotype, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + const char *log_prefix = redir_mode_str(mode); + int c0, c1, p0, p1; + unsigned int pass; + int retries = 100; + int err, n; + int sfd[2]; + u32 key; + char b; + + zero_verdict_count(verd_mapfd); + + if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) + return; + c0 = sfd[0], p0 = sfd[1]; + + if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) + goto close0; + c1 = sfd[0], p1 = sfd[1]; + + err = add_to_sockmap(sock_mapfd, p0, p1); + if (err) + goto close; + + n = write(c1, "a", 1); + if (n < 0) + FAIL_ERRNO("%s: write", log_prefix); + if (n == 0) + FAIL("%s: incomplete write", log_prefix); + if (n < 1) + goto close; + + key = SK_PASS; + err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); + if (err) + goto close; + if (pass != 1) + FAIL("%s: want pass count 1, have %d", log_prefix, pass); + +again: + n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); + if (n < 0) { + if (errno == EAGAIN && retries--) + goto again; + FAIL_ERRNO("%s: read", log_prefix); + } + if (n == 0) + FAIL("%s: incomplete read", log_prefix); + +close: + xclose(c1); + xclose(p1); +close0: + xclose(c0); + xclose(p0); +} + +static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int sotype) +{ + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); + if (err) + return; + + skel->bss->test_ingress = false; + unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS); + skel->bss->test_ingress = true; + unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); +} + +static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, + int sotype) +{ + const char *family_name, *map_name; + char s[MAX_TEST_NAME]; + + family_name = family_str(AF_UNIX); + map_name = map_type_str(map); + snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); + if (!test__start_subtest(s)) + return; + unix_skb_redir_to_connected(skel, map, sotype); +} + static void test_reuseport(struct test_sockmap_listen *skel, struct bpf_map *map, int family, int sotype) { @@ -1603,33 +1692,27 @@ static void test_reuseport(struct test_sockmap_listen *skel, } } -static void udp_redir_to_connected(int family, int sotype, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) +static int udp_socketpair(int family, int *s, int *c) { - const char *log_prefix = redir_mode_str(mode); struct sockaddr_storage addr; - int c0, c1, p0, p1; - unsigned int pass; - int retries = 100; socklen_t len; - int err, n; - u64 value; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); + int p0, c0; + int err; - p0 = socket_loopback(family, sotype | SOCK_NONBLOCK); + p0 = socket_loopback(family, SOCK_DGRAM | SOCK_NONBLOCK); if (p0 < 0) - return; + return p0; + len = sizeof(addr); err = xgetsockname(p0, sockaddr(&addr), &len); if (err) goto close_peer0; - c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0); - if (c0 < 0) + c0 = xsocket(family, SOCK_DGRAM | SOCK_NONBLOCK, 0); + if (c0 < 0) { + err = c0; goto close_peer0; + } err = xconnect(c0, sockaddr(&addr), len); if (err) goto close_cli0; @@ -1640,35 +1723,131 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd, if (err) goto close_cli0; - p1 = socket_loopback(family, sotype | SOCK_NONBLOCK); - if (p1 < 0) - goto close_cli0; - err = xgetsockname(p1, sockaddr(&addr), &len); + *s = p0; + *c = c0; + return 0; + +close_cli0: + xclose(c0); +close_peer0: + xclose(p0); + return err; +} + +static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, + enum redir_mode mode) +{ + const char *log_prefix = redir_mode_str(mode); + int c0, c1, p0, p1; + unsigned int pass; + int retries = 100; + int err, n; + u32 key; + char b; + + zero_verdict_count(verd_mapfd); + + err = udp_socketpair(family, &p0, &c0); + if (err) + return; + err = udp_socketpair(family, &p1, &c1); if (err) goto close_cli0; - c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0); - if (c1 < 0) - goto close_peer1; - err = xconnect(c1, sockaddr(&addr), len); + err = add_to_sockmap(sock_mapfd, p0, p1); if (err) goto close_cli1; - err = xgetsockname(c1, sockaddr(&addr), &len); - if (err) + + n = write(c1, "a", 1); + if (n < 0) + FAIL_ERRNO("%s: write", log_prefix); + if (n == 0) + FAIL("%s: incomplete write", log_prefix); + if (n < 1) goto close_cli1; - err = xconnect(p1, sockaddr(&addr), len); + + key = SK_PASS; + err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); if (err) goto close_cli1; + if (pass != 1) + FAIL("%s: want pass count 1, have %d", log_prefix, pass); - key = 0; - value = p0; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); +again: + n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); + if (n < 0) { + if (errno == EAGAIN && retries--) + goto again; + FAIL_ERRNO("%s: read", log_prefix); + } + if (n == 0) + FAIL("%s: incomplete read", log_prefix); + +close_cli1: + xclose(c1); + xclose(p1); +close_cli0: + xclose(c0); + xclose(p0); +} + +static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family) +{ + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); if (err) - goto close_cli1; + return; - key = 1; - value = p1; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + skel->bss->test_ingress = false; + udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS); + skel->bss->test_ingress = true; + udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); +} + +static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map, + int family) +{ + const char *family_name, *map_name; + char s[MAX_TEST_NAME]; + + family_name = family_str(family); + map_name = map_type_str(map); + snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); + if (!test__start_subtest(s)) + return; + udp_skb_redir_to_connected(skel, map, family); +} + +static void udp_unix_redir_to_connected(int family, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + const char *log_prefix = redir_mode_str(mode); + int c0, c1, p0, p1; + unsigned int pass; + int retries = 100; + int err, n; + int sfd[2]; + u32 key; + char b; + + zero_verdict_count(verd_mapfd); + + if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd)) + return; + c0 = sfd[0], p0 = sfd[1]; + + err = udp_socketpair(family, &p1, &c1); + if (err) + goto close; + + err = add_to_sockmap(sock_mapfd, p0, p1); if (err) goto close_cli1; @@ -1699,16 +1878,89 @@ again: close_cli1: xclose(c1); -close_peer1: + xclose(p1); +close: + xclose(c0); + xclose(p0); +} + +static void udp_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family) +{ + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); + if (err) + return; + + skel->bss->test_ingress = false; + udp_unix_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS); + skel->bss->test_ingress = true; + udp_unix_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); +} + +static void unix_udp_redir_to_connected(int family, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + const char *log_prefix = redir_mode_str(mode); + int c0, c1, p0, p1; + unsigned int pass; + int err, n; + int sfd[2]; + u32 key; + char b; + + zero_verdict_count(verd_mapfd); + + err = udp_socketpair(family, &p0, &c0); + if (err) + return; + + if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd)) + goto close_cli0; + c1 = sfd[0], p1 = sfd[1]; + + err = add_to_sockmap(sock_mapfd, p0, p1); + if (err) + goto close; + + n = write(c1, "a", 1); + if (n < 0) + FAIL_ERRNO("%s: write", log_prefix); + if (n == 0) + FAIL("%s: incomplete write", log_prefix); + if (n < 1) + goto close; + + key = SK_PASS; + err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); + if (err) + goto close; + if (pass != 1) + FAIL("%s: want pass count 1, have %d", log_prefix, pass); + + n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); + if (n < 0) + FAIL_ERRNO("%s: read", log_prefix); + if (n == 0) + FAIL("%s: incomplete read", log_prefix); + +close: + xclose(c1); xclose(p1); close_cli0: xclose(c0); -close_peer0: xclose(p0); + } -static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int family) +static void unix_udp_skb_redir_to_connected(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family) { int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); int verdict_map = bpf_map__fd(skel->maps.verdict_map); @@ -1720,17 +1972,15 @@ static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel, return; skel->bss->test_ingress = false; - udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, - REDIR_EGRESS); + unix_udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS); skel->bss->test_ingress = true; - udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, - REDIR_INGRESS); + unix_udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS); xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); } -static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map, - int family) +static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, + int family) { const char *family_name, *map_name; char s[MAX_TEST_NAME]; @@ -1740,7 +1990,8 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); if (!test__start_subtest(s)) return; - udp_skb_redir_to_connected(skel, map, family); + udp_unix_skb_redir_to_connected(skel, map, family); + unix_udp_skb_redir_to_connected(skel, map, family); } static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, @@ -1752,6 +2003,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, test_reuseport(skel, map, family, SOCK_STREAM); test_reuseport(skel, map, family, SOCK_DGRAM); test_udp_redir(skel, map, family); + test_udp_unix_redir(skel, map, family); } void test_sockmap_listen(void) @@ -1767,10 +2019,12 @@ void test_sockmap_listen(void) skel->bss->test_sockmap = true; run_tests(skel, skel->maps.sock_map, AF_INET); run_tests(skel, skel->maps.sock_map, AF_INET6); + test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM); skel->bss->test_sockmap = false; run_tests(skel, skel->maps.sock_hash, AF_INET); run_tests(skel, skel->maps.sock_hash, AF_INET6); + test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM); test_sockmap_listen__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c new file mode 100644 index 000000000000..25f40e1b9967 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <test_progs.h> +#include "timer.skel.h" + +static int timer(struct timer *timer_skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + + err = timer__attach(timer_skel); + if (!ASSERT_OK(err, "timer_attach")) + return err; + + ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1"); + ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1"); + + prog_fd = bpf_program__fd(timer_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(retval, 0, "test_run"); + timer__detach(timer_skel); + + usleep(50); /* 10 usecs should be enough, but give it extra */ + /* check that timer_cb1() was executed 10+10 times */ + ASSERT_EQ(timer_skel->data->callback_check, 42, "callback_check2"); + ASSERT_EQ(timer_skel->data->callback2_check, 42, "callback2_check2"); + + /* check that timer_cb2() was executed twice */ + ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data"); + + /* check that there were no errors in timer execution */ + ASSERT_EQ(timer_skel->bss->err, 0, "err"); + + /* check that code paths completed */ + ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok"); + + return 0; +} + +void test_timer(void) +{ + struct timer *timer_skel = NULL; + int err; + + timer_skel = timer__open_and_load(); + if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) + goto cleanup; + + err = timer(timer_skel); + ASSERT_OK(err, "timer"); +cleanup: + timer__destroy(timer_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c new file mode 100644 index 000000000000..f5acbcbe33a4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <test_progs.h> +#include "timer_mim.skel.h" +#include "timer_mim_reject.skel.h" + +static int timer_mim(struct timer_mim *timer_skel) +{ + __u32 duration = 0, retval; + __u64 cnt1, cnt2; + int err, prog_fd, key1 = 1; + + err = timer_mim__attach(timer_skel); + if (!ASSERT_OK(err, "timer_attach")) + return err; + + prog_fd = bpf_program__fd(timer_skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(retval, 0, "test_run"); + timer_mim__detach(timer_skel); + + /* check that timer_cb[12] are incrementing 'cnt' */ + cnt1 = READ_ONCE(timer_skel->bss->cnt); + usleep(200); /* 100 times more than interval */ + cnt2 = READ_ONCE(timer_skel->bss->cnt); + ASSERT_GT(cnt2, cnt1, "cnt"); + + ASSERT_EQ(timer_skel->bss->err, 0, "err"); + /* check that code paths completed */ + ASSERT_EQ(timer_skel->bss->ok, 1 | 2, "ok"); + + close(bpf_map__fd(timer_skel->maps.inner_htab)); + err = bpf_map_delete_elem(bpf_map__fd(timer_skel->maps.outer_arr), &key1); + ASSERT_EQ(err, 0, "delete inner map"); + + /* check that timer_cb[12] are no longer running */ + cnt1 = READ_ONCE(timer_skel->bss->cnt); + usleep(200); + cnt2 = READ_ONCE(timer_skel->bss->cnt); + ASSERT_EQ(cnt2, cnt1, "cnt"); + + return 0; +} + +void test_timer_mim(void) +{ + struct timer_mim_reject *timer_reject_skel = NULL; + libbpf_print_fn_t old_print_fn = NULL; + struct timer_mim *timer_skel = NULL; + int err; + + old_print_fn = libbpf_set_print(NULL); + timer_reject_skel = timer_mim_reject__open_and_load(); + libbpf_set_print(old_print_fn); + if (!ASSERT_ERR_PTR(timer_reject_skel, "timer_reject_skel_load")) + goto cleanup; + + timer_skel = timer_mim__open_and_load(); + if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) + goto cleanup; + + err = timer_mim(timer_skel); + ASSERT_OK(err, "timer_mim"); +cleanup: + timer_mim__destroy(timer_skel); + timer_mim_reject__destroy(timer_reject_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c new file mode 100644 index 000000000000..ab4952b9fb1d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> +#include "test_xdp_context_test_run.skel.h" + +void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts, + __u32 data_meta, __u32 data, __u32 data_end, + __u32 ingress_ifindex, __u32 rx_queue_index, + __u32 egress_ifindex) +{ + struct xdp_md ctx = { + .data = data, + .data_end = data_end, + .data_meta = data_meta, + .ingress_ifindex = ingress_ifindex, + .rx_queue_index = rx_queue_index, + .egress_ifindex = egress_ifindex, + }; + int err; + + opts.ctx_in = &ctx; + opts.ctx_size_in = sizeof(ctx); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_EQ(errno, EINVAL, "errno-EINVAL"); + ASSERT_ERR(err, "bpf_prog_test_run"); +} + +void test_xdp_context_test_run(void) +{ + struct test_xdp_context_test_run *skel = NULL; + char data[sizeof(pkt_v4) + sizeof(__u32)]; + char bad_ctx[sizeof(struct xdp_md) + 1]; + struct xdp_md ctx_in, ctx_out; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data, + .data_size_in = sizeof(data), + .ctx_out = &ctx_out, + .ctx_size_out = sizeof(ctx_out), + .repeat = 1, + ); + int err, prog_fd; + + skel = test_xdp_context_test_run__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel")) + return; + prog_fd = bpf_program__fd(skel->progs.xdp_context); + + /* Data past the end of the kernel's struct xdp_md must be 0 */ + bad_ctx[sizeof(bad_ctx) - 1] = 1; + opts.ctx_in = bad_ctx; + opts.ctx_size_in = sizeof(bad_ctx); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_EQ(errno, E2BIG, "extradata-errno"); + ASSERT_ERR(err, "bpf_prog_test_run(extradata)"); + + *(__u32 *)data = XDP_PASS; + *(struct ipv4_packet *)(data + sizeof(__u32)) = pkt_v4; + opts.ctx_in = &ctx_in; + opts.ctx_size_in = sizeof(ctx_in); + memset(&ctx_in, 0, sizeof(ctx_in)); + ctx_in.data_meta = 0; + ctx_in.data = sizeof(__u32); + ctx_in.data_end = ctx_in.data + sizeof(pkt_v4); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(err, "bpf_prog_test_run(valid)"); + ASSERT_EQ(opts.retval, XDP_PASS, "valid-retval"); + ASSERT_EQ(opts.data_size_out, sizeof(pkt_v4), "valid-datasize"); + ASSERT_EQ(opts.ctx_size_out, opts.ctx_size_in, "valid-ctxsize"); + ASSERT_EQ(ctx_out.data_meta, 0, "valid-datameta"); + ASSERT_EQ(ctx_out.data, 0, "valid-data"); + ASSERT_EQ(ctx_out.data_end, sizeof(pkt_v4), "valid-dataend"); + + /* Meta data's size must be a multiple of 4 */ + test_xdp_context_error(prog_fd, opts, 0, 1, sizeof(data), 0, 0, 0); + + /* data_meta must reference the start of data */ + test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data), + 0, 0, 0); + + /* Meta data must be 32 bytes or smaller */ + test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0); + + /* Total size of data must match data_end - data_meta */ + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), + sizeof(data) - 1, 0, 0, 0); + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), + sizeof(data) + 1, 0, 0, 0); + + /* RX queue cannot be specified without specifying an ingress */ + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data), + 0, 1, 0); + + /* Interface 1 is always the loopback interface which always has only + * one RX queue (index 0). This makes index 1 an invalid rx queue index + * for interface 1. + */ + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data), + 1, 1, 0); + + /* The egress cannot be specified */ + test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data), + 0, 0, 1); + + test_xdp_context_test_run__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index 0176573fe4e7..8755effd80b0 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -7,64 +7,53 @@ #define IFINDEX_LO 1 -void test_xdp_with_cpumap_helpers(void) +void test_xdp_cpumap_attach(void) { struct test_xdp_with_cpumap_helpers *skel; struct bpf_prog_info info = {}; + __u32 len = sizeof(info); struct bpf_cpumap_val val = { .qsize = 192, }; - __u32 duration = 0, idx = 0; - __u32 len = sizeof(info); int err, prog_fd, map_fd; + __u32 idx = 0; skel = test_xdp_with_cpumap_helpers__open_and_load(); - if (CHECK_FAIL(!skel)) { - perror("test_xdp_with_cpumap_helpers__open_and_load"); + if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load")) return; - } - /* can not attach program with cpumaps that allow programs - * as xdp generic - */ prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog); err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); - CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP", - "should have failed\n"); + if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP")) + goto out_close; + + err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE); + ASSERT_OK(err, "XDP program detach"); prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm); map_fd = bpf_map__fd(skel->maps.cpu_map); err = bpf_obj_get_info_by_fd(prog_fd, &info, &len); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) goto out_close; val.bpf_prog.fd = prog_fd; err = bpf_map_update_elem(map_fd, &idx, &val, 0); - CHECK(err, "Add program to cpumap entry", "err %d errno %d\n", - err, errno); + ASSERT_OK(err, "Add program to cpumap entry"); err = bpf_map_lookup_elem(map_fd, &idx, &val); - CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno); - CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry", - "expected %u read %u\n", info.id, val.bpf_prog.id); + ASSERT_OK(err, "Read cpumap entry"); + ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id"); /* can not attach BPF_XDP_CPUMAP program to a device */ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); - CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program", - "should have failed\n"); + if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program")) + bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE); val.qsize = 192; val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog); err = bpf_map_update_elem(map_fd, &idx, &val, 0); - CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry", - "should have failed\n"); + ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry"); out_close: test_xdp_with_cpumap_helpers__destroy(skel); } - -void test_xdp_cpumap_attach(void) -{ - if (test__start_subtest("cpumap_with_progs")) - test_xdp_with_cpumap_helpers(); -} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index 88ef3ec8ac4c..c72af030ff10 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -16,50 +16,45 @@ void test_xdp_with_devmap_helpers(void) .ifindex = IFINDEX_LO, }; __u32 len = sizeof(info); - __u32 duration = 0, idx = 0; int err, dm_fd, map_fd; + __u32 idx = 0; skel = test_xdp_with_devmap_helpers__open_and_load(); - if (CHECK_FAIL(!skel)) { - perror("test_xdp_with_devmap_helpers__open_and_load"); + if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load")) return; - } - /* can not attach program with DEVMAPs that allow programs - * as xdp generic - */ dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog); err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE); - CHECK(err == 0, "Generic attach of program with 8-byte devmap", - "should have failed\n"); + if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap")) + goto out_close; + + err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE); + ASSERT_OK(err, "XDP program detach"); dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm); map_fd = bpf_map__fd(skel->maps.dm_ports); err = bpf_obj_get_info_by_fd(dm_fd, &info, &len); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) goto out_close; val.bpf_prog.fd = dm_fd; err = bpf_map_update_elem(map_fd, &idx, &val, 0); - CHECK(err, "Add program to devmap entry", - "err %d errno %d\n", err, errno); + ASSERT_OK(err, "Add program to devmap entry"); err = bpf_map_lookup_elem(map_fd, &idx, &val); - CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno); - CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry", - "expected %u read %u\n", info.id, val.bpf_prog.id); + ASSERT_OK(err, "Read devmap entry"); + ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id"); /* can not attach BPF_XDP_DEVMAP program to a device */ err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE); - CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program", - "should have failed\n"); + if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program")) + bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE); val.ifindex = 1; val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog); err = bpf_map_update_elem(map_fd, &idx, &val, 0); - CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry", - "should have failed\n"); + ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry"); out_close: test_xdp_with_devmap_helpers__destroy(skel); @@ -68,12 +63,10 @@ out_close: void test_neg_xdp_devmap_helpers(void) { struct test_xdp_devmap_helpers *skel; - __u32 duration = 0; skel = test_xdp_devmap_helpers__open_and_load(); - if (CHECK(skel, - "Load of XDP program accessing egress ifindex without attach type", - "should have failed\n")) { + if (!ASSERT_EQ(skel, NULL, + "Load of XDP program accessing egress ifindex without attach type")) { test_xdp_devmap_helpers__destroy(skel); } } diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c new file mode 100644 index 000000000000..acd587b6e859 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +extern const void bpf_fentry_test1 __ksym; +extern const void bpf_fentry_test2 __ksym; +extern const void bpf_fentry_test3 __ksym; +extern const void bpf_fentry_test4 __ksym; +extern const void bpf_modify_return_test __ksym; +extern const void bpf_fentry_test6 __ksym; + +__u64 test1_result = 0; +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test1, int a) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test1_result = (const void *) addr == &bpf_fentry_test1; + return 0; +} + +__u64 test2_result = 0; +SEC("fexit/bpf_fentry_test2") +int BPF_PROG(test2, int a) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test2_result = (const void *) addr == &bpf_fentry_test2; + return 0; +} + +__u64 test3_result = 0; +SEC("kprobe/bpf_fentry_test3") +int test3(struct pt_regs *ctx) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test3_result = (const void *) addr == &bpf_fentry_test3; + return 0; +} + +__u64 test4_result = 0; +SEC("kretprobe/bpf_fentry_test4") +int BPF_KRETPROBE(test4) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test4_result = (const void *) addr == &bpf_fentry_test4; + return 0; +} + +__u64 test5_result = 0; +SEC("fmod_ret/bpf_modify_return_test") +int BPF_PROG(test5, int a, int *b, int ret) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test5_result = (const void *) addr == &bpf_modify_return_test; + return ret; +} + +__u64 test6_result = 0; +SEC("kprobe/bpf_fentry_test6+0x5") +int test6(struct pt_regs *ctx) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test6_result = (const void *) addr == &bpf_fentry_test6 + 5; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index 84cd63259554..a0e7762b1e5a 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -528,7 +528,6 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb) static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) { - char buf[sizeof(struct v6hdr)]; struct gre_hdr greh; struct udphdr udph; int olen = len; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c new file mode 100644 index 000000000000..d7b88cd05afd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("xdp") +int xdp_context(struct xdp_md *xdp) +{ + void *data = (void *)(long)xdp->data; + __u32 *metadata = (void *)(long)xdp->data_meta; + __u32 ret; + + if (metadata + 1 > data) + return XDP_ABORTED; + ret = *metadata; + if (bpf_xdp_adjust_meta(xdp, 4)) + return XDP_ABORTED; + return ret; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c new file mode 100644 index 000000000000..5f5309791649 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; +struct hmap_elem { + int counter; + struct bpf_timer timer; + struct bpf_spin_lock lock; /* unused */ +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1000); + __type(key, int); + __type(value, struct hmap_elem); +} hmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, 1000); + __type(key, int); + __type(value, struct hmap_elem); +} hmap_malloc SEC(".maps"); + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 4); + __type(key, int); + __type(value, struct elem); +} lru SEC(".maps"); + +__u64 bss_data; +__u64 err; +__u64 ok; +__u64 callback_check = 52; +__u64 callback2_check = 52; + +#define ARRAY 1 +#define HTAB 2 +#define HTAB_MALLOC 3 +#define LRU 4 + +/* callback for array and lru timers */ +static int timer_cb1(void *map, int *key, struct bpf_timer *timer) +{ + /* increment bss variable twice. + * Once via array timer callback and once via lru timer callback + */ + bss_data += 5; + + /* *key == 0 - the callback was called for array timer. + * *key == 4 - the callback was called from lru timer. + */ + if (*key == ARRAY) { + struct bpf_timer *lru_timer; + int lru_key = LRU; + + /* rearm array timer to be called again in ~35 seconds */ + if (bpf_timer_start(timer, 1ull << 35, 0) != 0) + err |= 1; + + lru_timer = bpf_map_lookup_elem(&lru, &lru_key); + if (!lru_timer) + return 0; + bpf_timer_set_callback(lru_timer, timer_cb1); + if (bpf_timer_start(lru_timer, 0, 0) != 0) + err |= 2; + } else if (*key == LRU) { + int lru_key, i; + + for (i = LRU + 1; + i <= 100 /* for current LRU eviction algorithm this number + * should be larger than ~ lru->max_entries * 2 + */; + i++) { + struct elem init = {}; + + /* lru_key cannot be used as loop induction variable + * otherwise the loop will be unbounded. + */ + lru_key = i; + + /* add more elements into lru map to push out current + * element and force deletion of this timer + */ + bpf_map_update_elem(map, &lru_key, &init, 0); + /* look it up to bump it into active list */ + bpf_map_lookup_elem(map, &lru_key); + + /* keep adding until *key changes underneath, + * which means that key/timer memory was reused + */ + if (*key != LRU) + break; + } + + /* check that the timer was removed */ + if (bpf_timer_cancel(timer) != -EINVAL) + err |= 4; + ok |= 1; + } + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test1, int a) +{ + struct bpf_timer *arr_timer, *lru_timer; + struct elem init = {}; + int lru_key = LRU; + int array_key = ARRAY; + + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC); + + bpf_map_update_elem(&lru, &lru_key, &init, 0); + lru_timer = bpf_map_lookup_elem(&lru, &lru_key); + if (!lru_timer) + return 0; + bpf_timer_init(lru_timer, &lru, CLOCK_MONOTONIC); + + bpf_timer_set_callback(arr_timer, timer_cb1); + bpf_timer_start(arr_timer, 0 /* call timer_cb1 asap */, 0); + + /* init more timers to check that array destruction + * doesn't leak timer memory. + */ + array_key = 0; + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC); + return 0; +} + +/* callback for prealloc and non-prealloca hashtab timers */ +static int timer_cb2(void *map, int *key, struct hmap_elem *val) +{ + if (*key == HTAB) + callback_check--; + else + callback2_check--; + if (val->counter > 0 && --val->counter) { + /* re-arm the timer again to execute after 1 usec */ + bpf_timer_start(&val->timer, 1000, 0); + } else if (*key == HTAB) { + struct bpf_timer *arr_timer; + int array_key = ARRAY; + + /* cancel arr_timer otherwise bpf_fentry_test1 prog + * will stay alive forever. + */ + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + if (bpf_timer_cancel(arr_timer) != 1) + /* bpf_timer_cancel should return 1 to indicate + * that arr_timer was active at this time + */ + err |= 8; + + /* try to cancel ourself. It shouldn't deadlock. */ + if (bpf_timer_cancel(&val->timer) != -EDEADLK) + err |= 16; + + /* delete this key and this timer anyway. + * It shouldn't deadlock either. + */ + bpf_map_delete_elem(map, key); + + /* in preallocated hashmap both 'key' and 'val' could have been + * reused to store another map element (like in LRU above), + * but in controlled test environment the below test works. + * It's not a use-after-free. The memory is owned by the map. + */ + if (bpf_timer_start(&val->timer, 1000, 0) != -EINVAL) + err |= 32; + ok |= 2; + } else { + if (*key != HTAB_MALLOC) + err |= 64; + + /* try to cancel ourself. It shouldn't deadlock. */ + if (bpf_timer_cancel(&val->timer) != -EDEADLK) + err |= 128; + + /* delete this key and this timer anyway. + * It shouldn't deadlock either. + */ + bpf_map_delete_elem(map, key); + + /* in non-preallocated hashmap both 'key' and 'val' are RCU + * protected and still valid though this element was deleted + * from the map. Arm this timer for ~35 seconds. When callback + * finishes the call_rcu will invoke: + * htab_elem_free_rcu + * check_and_free_timer + * bpf_timer_cancel_and_free + * to cancel this 35 second sleep and delete the timer for real. + */ + if (bpf_timer_start(&val->timer, 1ull << 35, 0) != 0) + err |= 256; + ok |= 4; + } + return 0; +} + +int bpf_timer_test(void) +{ + struct hmap_elem *val; + int key = HTAB, key_malloc = HTAB_MALLOC; + + val = bpf_map_lookup_elem(&hmap, &key); + if (val) { + if (bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME) != 0) + err |= 512; + bpf_timer_set_callback(&val->timer, timer_cb2); + bpf_timer_start(&val->timer, 1000, 0); + } + val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc); + if (val) { + if (bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME) != 0) + err |= 1024; + bpf_timer_set_callback(&val->timer, timer_cb2); + bpf_timer_start(&val->timer, 1000, 0); + } + return 0; +} + +SEC("fentry/bpf_fentry_test2") +int BPF_PROG(test2, int a, int b) +{ + struct hmap_elem init = {}, *val; + int key = HTAB, key_malloc = HTAB_MALLOC; + + init.counter = 10; /* number of times to trigger timer_cb2 */ + bpf_map_update_elem(&hmap, &key, &init, 0); + val = bpf_map_lookup_elem(&hmap, &key); + if (val) + bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME); + /* update the same key to free the timer */ + bpf_map_update_elem(&hmap, &key, &init, 0); + + bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0); + val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc); + if (val) + bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME); + /* update the same key to free the timer */ + bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0); + + /* init more timers to check that htab operations + * don't leak timer memory. + */ + key = 0; + bpf_map_update_elem(&hmap, &key, &init, 0); + val = bpf_map_lookup_elem(&hmap, &key); + if (val) + bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME); + bpf_map_delete_elem(&hmap, &key); + bpf_map_update_elem(&hmap, &key, &init, 0); + val = bpf_map_lookup_elem(&hmap, &key); + if (val) + bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME); + + /* and with non-prealloc htab */ + key_malloc = 0; + bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0); + val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc); + if (val) + bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME); + bpf_map_delete_elem(&hmap_malloc, &key_malloc); + bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0); + val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc); + if (val) + bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME); + + return bpf_timer_test(); +} diff --git a/tools/testing/selftests/bpf/progs/timer_mim.c b/tools/testing/selftests/bpf/progs/timer_mim.c new file mode 100644 index 000000000000..2fee7ab105ef --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_mim.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; +struct hmap_elem { + int pad; /* unused */ + struct bpf_timer timer; +}; + +struct inner_map { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1024); + __type(key, int); + __type(value, struct hmap_elem); +} inner_htab SEC(".maps"); + +#define ARRAY_KEY 1 +#define HASH_KEY 1234 + +struct outer_arr { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 2); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct inner_map); +} outer_arr SEC(".maps") = { + .values = { [ARRAY_KEY] = &inner_htab }, +}; + +__u64 err; +__u64 ok; +__u64 cnt; + +static int timer_cb1(void *map, int *key, struct hmap_elem *val); + +static int timer_cb2(void *map, int *key, struct hmap_elem *val) +{ + cnt++; + bpf_timer_set_callback(&val->timer, timer_cb1); + if (bpf_timer_start(&val->timer, 1000, 0)) + err |= 1; + ok |= 1; + return 0; +} + +/* callback for inner hash map */ +static int timer_cb1(void *map, int *key, struct hmap_elem *val) +{ + cnt++; + bpf_timer_set_callback(&val->timer, timer_cb2); + if (bpf_timer_start(&val->timer, 1000, 0)) + err |= 2; + /* Do a lookup to make sure 'map' and 'key' pointers are correct */ + bpf_map_lookup_elem(map, key); + ok |= 2; + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test1, int a) +{ + struct hmap_elem init = {}; + struct bpf_map *inner_map; + struct hmap_elem *val; + int array_key = ARRAY_KEY; + int hash_key = HASH_KEY; + + inner_map = bpf_map_lookup_elem(&outer_arr, &array_key); + if (!inner_map) + return 0; + + bpf_map_update_elem(inner_map, &hash_key, &init, 0); + val = bpf_map_lookup_elem(inner_map, &hash_key); + if (!val) + return 0; + + bpf_timer_init(&val->timer, inner_map, CLOCK_MONOTONIC); + if (bpf_timer_set_callback(&val->timer, timer_cb1)) + err |= 4; + if (bpf_timer_start(&val->timer, 0, 0)) + err |= 8; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/timer_mim_reject.c b/tools/testing/selftests/bpf/progs/timer_mim_reject.c new file mode 100644 index 000000000000..5d648e3d8a41 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_mim_reject.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; +struct hmap_elem { + int pad; /* unused */ + struct bpf_timer timer; +}; + +struct inner_map { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1024); + __type(key, int); + __type(value, struct hmap_elem); +} inner_htab SEC(".maps"); + +#define ARRAY_KEY 1 +#define ARRAY_KEY2 2 +#define HASH_KEY 1234 + +struct outer_arr { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 2); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct inner_map); +} outer_arr SEC(".maps") = { + .values = { [ARRAY_KEY] = &inner_htab }, +}; + +__u64 err; +__u64 ok; +__u64 cnt; + +/* callback for inner hash map */ +static int timer_cb(void *map, int *key, struct hmap_elem *val) +{ + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test1, int a) +{ + struct hmap_elem init = {}; + struct bpf_map *inner_map, *inner_map2; + struct hmap_elem *val; + int array_key = ARRAY_KEY; + int array_key2 = ARRAY_KEY2; + int hash_key = HASH_KEY; + + inner_map = bpf_map_lookup_elem(&outer_arr, &array_key); + if (!inner_map) + return 0; + + inner_map2 = bpf_map_lookup_elem(&outer_arr, &array_key2); + if (!inner_map2) + return 0; + bpf_map_update_elem(inner_map, &hash_key, &init, 0); + val = bpf_map_lookup_elem(inner_map, &hash_key); + if (!val) + return 0; + + bpf_timer_init(&val->timer, inner_map2, CLOCK_MONOTONIC); + if (bpf_timer_set_callback(&val->timer, timer_cb)) + err |= 4; + if (bpf_timer_start(&val->timer, 0, 0)) + err |= 8; + return 0; +} diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 79c9eb0034d5..5b169e915679 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -25,6 +25,7 @@ TEST_PROGS += bareudp.sh TEST_PROGS += unicast_extensions.sh TEST_PROGS += udpgro_fwd.sh TEST_PROGS += veth.sh +TEST_PROGS += ioam6.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any @@ -36,6 +37,7 @@ TEST_GEN_FILES += fin_ack_lat TEST_GEN_FILES += reuseaddr_ports_exhausted TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp TEST_GEN_FILES += ipsec +TEST_GEN_FILES += ioam6_parser TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 6f905b53904f..21b646d10b88 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -42,3 +42,4 @@ CONFIG_NET_CLS_FLOWER=m CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_MIRRED=m CONFIG_BAREUDP=m +CONFIG_IPV6_IOAM6_LWTUNNEL=y diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh new file mode 100644 index 000000000000..bcf15487e584 --- /dev/null +++ b/tools/testing/selftests/net/ioam6.sh @@ -0,0 +1,297 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Author: Justin Iurman <justin.iurman@uliege.be> +# +# This test evaluates the IOAM insertion for IPv6 by checking the IOAM data +# integrity on the receiver. +# +# The topology is formed by 3 nodes: Alpha (sender), Beta (router in-between) +# and Gamma (receiver). An IOAM domain is configured from Alpha to Gamma only, +# which means not on the reverse path. When Gamma is the destination, Alpha +# adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop and fills the +# trace with its own IOAM data. Beta and Gamma also fill the trace. The IOAM +# data integrity is checked on Gamma, by comparing with the pre-defined IOAM +# configuration (see below). +# +# +-------------------+ +-------------------+ +# | | | | +# | alpha netns | | gamma netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | db01::2/64 | | | | db02::2/64 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +----------------------------------------------------+ +# | . . | +# | +-------------+ +-------------+ | +# | | veth0 | | veth1 | | +# | | db01::1/64 | ................ | db02::1/64 | | +# | +-------------+ +-------------+ | +# | | +# | beta netns | +# | | +# +--------------------------+-------------------------+ +# +# +# ~~~~~~~~~~~~~~~~~~~~~~ +# | IOAM configuration | +# ~~~~~~~~~~~~~~~~~~~~~~ +# +# Alpha +# +-----------------------------------------------------------+ +# | Type | Value | +# +-----------------------------------------------------------+ +# | Node ID | 1 | +# +-----------------------------------------------------------+ +# | Node Wide ID | 11111111 | +# +-----------------------------------------------------------+ +# | Ingress ID | 0xffff (default value) | +# +-----------------------------------------------------------+ +# | Ingress Wide ID | 0xffffffff (default value) | +# +-----------------------------------------------------------+ +# | Egress ID | 101 | +# +-----------------------------------------------------------+ +# | Egress Wide ID | 101101 | +# +-----------------------------------------------------------+ +# | Namespace Data | 0xdeadbee0 | +# +-----------------------------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf00dc0de | +# +-----------------------------------------------------------+ +# | Schema ID | 777 | +# +-----------------------------------------------------------+ +# | Schema Data | something that will be 4n-aligned | +# +-----------------------------------------------------------+ +# +# Note: When Gamma is the destination, Alpha adds an IOAM Pre-allocated Trace +# option inside a Hop-by-hop, where 164 bytes are pre-allocated for the +# trace, with 123 as the IOAM-Namespace and with 0xfff00200 as the trace +# type (= all available options at this time). As a result, and based on +# IOAM configurations here, only both Alpha and Beta should be capable of +# inserting their IOAM data while Gamma won't have enough space and will +# set the overflow bit. +# +# Beta +# +-----------------------------------------------------------+ +# | Type | Value | +# +-----------------------------------------------------------+ +# | Node ID | 2 | +# +-----------------------------------------------------------+ +# | Node Wide ID | 22222222 | +# +-----------------------------------------------------------+ +# | Ingress ID | 201 | +# +-----------------------------------------------------------+ +# | Ingress Wide ID | 201201 | +# +-----------------------------------------------------------+ +# | Egress ID | 202 | +# +-----------------------------------------------------------+ +# | Egress Wide ID | 202202 | +# +-----------------------------------------------------------+ +# | Namespace Data | 0xdeadbee1 | +# +-----------------------------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf11dc0de | +# +-----------------------------------------------------------+ +# | Schema ID | 0xffffff (= None) | +# +-----------------------------------------------------------+ +# | Schema Data | | +# +-----------------------------------------------------------+ +# +# Gamma +# +-----------------------------------------------------------+ +# | Type | Value | +# +-----------------------------------------------------------+ +# | Node ID | 3 | +# +-----------------------------------------------------------+ +# | Node Wide ID | 33333333 | +# +-----------------------------------------------------------+ +# | Ingress ID | 301 | +# +-----------------------------------------------------------+ +# | Ingress Wide ID | 301301 | +# +-----------------------------------------------------------+ +# | Egress ID | 0xffff (default value) | +# +-----------------------------------------------------------+ +# | Egress Wide ID | 0xffffffff (default value) | +# +-----------------------------------------------------------+ +# | Namespace Data | 0xdeadbee2 | +# +-----------------------------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf22dc0de | +# +-----------------------------------------------------------+ +# | Schema ID | 0xffffff (= None) | +# +-----------------------------------------------------------+ +# | Schema Data | | +# +-----------------------------------------------------------+ + +#=============================================================================== +# +# WARNING: +# Do NOT modify the following configuration unless you know what you're doing. +# +IOAM_NAMESPACE=123 +IOAM_TRACE_TYPE=0xfff00200 +IOAM_PREALLOC_DATA_SIZE=164 + +ALPHA=( + 1 # ID + 11111111 # Wide ID + 0xffff # Ingress ID + 0xffffffff # Ingress Wide ID + 101 # Egress ID + 101101 # Egress Wide ID + 0xdeadbee0 # Namespace Data + 0xcafec0caf00dc0de # Namespace Wide Data + 777 # Schema ID (0xffffff = None) + "something that will be 4n-aligned" # Schema Data +) + +BETA=( + 2 + 22222222 + 201 + 201201 + 202 + 202202 + 0xdeadbee1 + 0xcafec0caf11dc0de + 0xffffff + "" +) + +GAMMA=( + 3 + 33333333 + 301 + 301301 + 0xffff + 0xffffffff + 0xdeadbee2 + 0xcafec0caf22dc0de + 0xffffff + "" +) +#=============================================================================== + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit 1 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 1 +fi + +ip ioam &>/dev/null +if [ $? = 1 ]; then + echo "SKIP: ip tool must include IOAM" + exit 1 +fi + +if [ ! -e /proc/sys/net/ipv6/ioam6_id ]; then + echo "SKIP: ioam6 sysctls do not exist" + exit 1 +fi + +cleanup() +{ + ip link del ioam-veth-alpha 2>/dev/null || true + ip link del ioam-veth-gamma 2>/dev/null || true + + ip netns del ioam-node-alpha || true + ip netns del ioam-node-beta || true + ip netns del ioam-node-gamma || true +} + +setup() +{ + ip netns add ioam-node-alpha + ip netns add ioam-node-beta + ip netns add ioam-node-gamma + + ip link add name ioam-veth-alpha type veth peer name ioam-veth-betaL + ip link add name ioam-veth-betaR type veth peer name ioam-veth-gamma + + ip link set ioam-veth-alpha netns ioam-node-alpha + ip link set ioam-veth-betaL netns ioam-node-beta + ip link set ioam-veth-betaR netns ioam-node-beta + ip link set ioam-veth-gamma netns ioam-node-gamma + + ip -netns ioam-node-alpha link set ioam-veth-alpha name veth0 + ip -netns ioam-node-beta link set ioam-veth-betaL name veth0 + ip -netns ioam-node-beta link set ioam-veth-betaR name veth1 + ip -netns ioam-node-gamma link set ioam-veth-gamma name veth0 + + ip -netns ioam-node-alpha addr add db01::2/64 dev veth0 + ip -netns ioam-node-alpha link set veth0 up + ip -netns ioam-node-alpha link set lo up + ip -netns ioam-node-alpha route add default via db01::1 + + ip -netns ioam-node-beta addr add db01::1/64 dev veth0 + ip -netns ioam-node-beta addr add db02::1/64 dev veth1 + ip -netns ioam-node-beta link set veth0 up + ip -netns ioam-node-beta link set veth1 up + ip -netns ioam-node-beta link set lo up + + ip -netns ioam-node-gamma addr add db02::2/64 dev veth0 + ip -netns ioam-node-gamma link set veth0 up + ip -netns ioam-node-gamma link set lo up + ip -netns ioam-node-gamma route add default via db02::1 + + # - IOAM config - + ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} + ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} + ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} + ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} + ip -netns ioam-node-alpha ioam namespace add ${IOAM_NAMESPACE} data ${ALPHA[6]} wide ${ALPHA[7]} + ip -netns ioam-node-alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" + ip -netns ioam-node-alpha ioam namespace set ${IOAM_NAMESPACE} schema ${ALPHA[8]} + ip -netns ioam-node-alpha route add db02::/64 encap ioam6 trace type ${IOAM_TRACE_TYPE:0:-2} ns ${IOAM_NAMESPACE} size ${IOAM_PREALLOC_DATA_SIZE} via db01::1 dev veth0 + + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.all.forwarding=1 + ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} + ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} + ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} + ip -netns ioam-node-beta ioam namespace add ${IOAM_NAMESPACE} data ${BETA[6]} wide ${BETA[7]} + + ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} + ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} + ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} + ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} + ip -netns ioam-node-gamma ioam namespace add ${IOAM_NAMESPACE} data ${GAMMA[6]} wide ${GAMMA[7]} +} + +run() +{ + echo -n "IOAM test... " + + ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null + if [ $? != 0 ]; then + echo "FAILED" + cleanup &>/dev/null + exit 0 + fi + + ip netns exec ioam-node-gamma ./ioam6_parser veth0 2 ${IOAM_NAMESPACE} ${IOAM_TRACE_TYPE} 64 ${ALPHA[0]} ${ALPHA[1]} ${ALPHA[2]} ${ALPHA[3]} ${ALPHA[4]} ${ALPHA[5]} ${ALPHA[6]} ${ALPHA[7]} ${ALPHA[8]} "${ALPHA[9]}" 63 ${BETA[0]} ${BETA[1]} ${BETA[2]} ${BETA[3]} ${BETA[4]} ${BETA[5]} ${BETA[6]} ${BETA[7]} ${BETA[8]} & + + local spid=$! + sleep 0.1 + + ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null + + wait $spid + [ $? = 0 ] && echo "PASSED" || echo "FAILED" +} + +cleanup &>/dev/null +setup +run +cleanup &>/dev/null diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c new file mode 100644 index 000000000000..2256cf5ad637 --- /dev/null +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Author: Justin Iurman (justin.iurman@uliege.be) + * + * IOAM parser for IPv6, see ioam6.sh for details. + */ +#include <asm/byteorder.h> +#include <linux/const.h> +#include <linux/if_ether.h> +#include <linux/ioam6.h> +#include <linux/ipv6.h> +#include <sys/socket.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +struct node_args { + __u32 id; + __u64 wide; + __u16 ingr_id; + __u16 egr_id; + __u32 ingr_wide; + __u32 egr_wide; + __u32 ns_data; + __u64 ns_wide; + __u32 sc_id; + __u8 hop_limit; + __u8 *sc_data; /* NULL when sc_id = 0xffffff (default empty value) */ +}; + +/* expected args per node, in that order */ +enum { + NODE_ARG_HOP_LIMIT, + NODE_ARG_ID, + NODE_ARG_WIDE, + NODE_ARG_INGR_ID, + NODE_ARG_INGR_WIDE, + NODE_ARG_EGR_ID, + NODE_ARG_EGR_WIDE, + NODE_ARG_NS_DATA, + NODE_ARG_NS_WIDE, + NODE_ARG_SC_ID, + __NODE_ARG_MAX, +}; + +#define NODE_ARGS_SIZE __NODE_ARG_MAX + +struct args { + __u16 ns_id; + __u32 trace_type; + __u8 n_node; + __u8 *ifname; + struct node_args node[0]; +}; + +/* expected args, in that order */ +enum { + ARG_IFNAME, + ARG_N_NODE, + ARG_NS_ID, + ARG_TRACE_TYPE, + __ARG_MAX, +}; + +#define ARGS_SIZE __ARG_MAX + +int check_ioam6_node_data(__u8 **p, struct ioam6_trace_hdr *trace, __u8 hlim, + __u32 id, __u64 wide, __u16 ingr_id, __u32 ingr_wide, + __u16 egr_id, __u32 egr_wide, __u32 ns_data, + __u64 ns_wide, __u32 sc_id, __u8 *sc_data) +{ + __u64 raw64; + __u32 raw32; + __u8 sc_len; + + if (trace->type.bit0) { + raw32 = __be32_to_cpu(*((__u32 *)*p)); + if (hlim != (raw32 >> 24) || id != (raw32 & 0xffffff)) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit1) { + raw32 = __be32_to_cpu(*((__u32 *)*p)); + if (ingr_id != (raw32 >> 16) || egr_id != (raw32 & 0xffff)) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit2) + *p += sizeof(__u32); + + if (trace->type.bit3) + *p += sizeof(__u32); + + if (trace->type.bit4) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit5) { + if (__be32_to_cpu(*((__u32 *)*p)) != ns_data) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit6) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit7) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit8) { + raw64 = __be64_to_cpu(*((__u64 *)*p)); + if (hlim != (raw64 >> 56) || wide != (raw64 & 0xffffffffffffff)) + return 1; + *p += sizeof(__u64); + } + + if (trace->type.bit9) { + if (__be32_to_cpu(*((__u32 *)*p)) != ingr_wide) + return 1; + *p += sizeof(__u32); + + if (__be32_to_cpu(*((__u32 *)*p)) != egr_wide) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit10) { + if (__be64_to_cpu(*((__u64 *)*p)) != ns_wide) + return 1; + *p += sizeof(__u64); + } + + if (trace->type.bit11) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (trace->type.bit22) { + raw32 = __be32_to_cpu(*((__u32 *)*p)); + sc_len = sc_data ? __ALIGN_KERNEL(strlen(sc_data), 4) : 0; + if (sc_len != (raw32 >> 24) * 4 || sc_id != (raw32 & 0xffffff)) + return 1; + *p += sizeof(__u32); + + if (sc_data) { + if (strncmp(*p, sc_data, strlen(sc_data))) + return 1; + + *p += strlen(sc_data); + sc_len -= strlen(sc_data); + + while (sc_len--) { + if (**p != '\0') + return 1; + *p += sizeof(__u8); + } + } + } + + return 0; +} + +int check_ioam6_trace(struct ioam6_trace_hdr *trace, struct args *args) +{ + __u8 *p; + int i; + + if (__be16_to_cpu(trace->namespace_id) != args->ns_id || + __be32_to_cpu(trace->type_be32) != args->trace_type) + return 1; + + p = trace->data + trace->remlen * 4; + + for (i = args->n_node - 1; i >= 0; i--) { + if (check_ioam6_node_data(&p, trace, + args->node[i].hop_limit, + args->node[i].id, + args->node[i].wide, + args->node[i].ingr_id, + args->node[i].ingr_wide, + args->node[i].egr_id, + args->node[i].egr_wide, + args->node[i].ns_data, + args->node[i].ns_wide, + args->node[i].sc_id, + args->node[i].sc_data)) + return 1; + } + + return 0; +} + +int parse_node_args(int *argcp, char ***argvp, struct node_args *node) +{ + char **argv = *argvp; + + if (*argcp < NODE_ARGS_SIZE) + return 1; + + node->hop_limit = strtoul(argv[NODE_ARG_HOP_LIMIT], NULL, 10); + if (!node->hop_limit) { + node->hop_limit = strtoul(argv[NODE_ARG_HOP_LIMIT], NULL, 16); + if (!node->hop_limit) + return 1; + } + + node->id = strtoul(argv[NODE_ARG_ID], NULL, 10); + if (!node->id) { + node->id = strtoul(argv[NODE_ARG_ID], NULL, 16); + if (!node->id) + return 1; + } + + node->wide = strtoull(argv[NODE_ARG_WIDE], NULL, 10); + if (!node->wide) { + node->wide = strtoull(argv[NODE_ARG_WIDE], NULL, 16); + if (!node->wide) + return 1; + } + + node->ingr_id = strtoul(argv[NODE_ARG_INGR_ID], NULL, 10); + if (!node->ingr_id) { + node->ingr_id = strtoul(argv[NODE_ARG_INGR_ID], NULL, 16); + if (!node->ingr_id) + return 1; + } + + node->ingr_wide = strtoul(argv[NODE_ARG_INGR_WIDE], NULL, 10); + if (!node->ingr_wide) { + node->ingr_wide = strtoul(argv[NODE_ARG_INGR_WIDE], NULL, 16); + if (!node->ingr_wide) + return 1; + } + + node->egr_id = strtoul(argv[NODE_ARG_EGR_ID], NULL, 10); + if (!node->egr_id) { + node->egr_id = strtoul(argv[NODE_ARG_EGR_ID], NULL, 16); + if (!node->egr_id) + return 1; + } + + node->egr_wide = strtoul(argv[NODE_ARG_EGR_WIDE], NULL, 10); + if (!node->egr_wide) { + node->egr_wide = strtoul(argv[NODE_ARG_EGR_WIDE], NULL, 16); + if (!node->egr_wide) + return 1; + } + + node->ns_data = strtoul(argv[NODE_ARG_NS_DATA], NULL, 16); + if (!node->ns_data) + return 1; + + node->ns_wide = strtoull(argv[NODE_ARG_NS_WIDE], NULL, 16); + if (!node->ns_wide) + return 1; + + node->sc_id = strtoul(argv[NODE_ARG_SC_ID], NULL, 10); + if (!node->sc_id) { + node->sc_id = strtoul(argv[NODE_ARG_SC_ID], NULL, 16); + if (!node->sc_id) + return 1; + } + + *argcp -= NODE_ARGS_SIZE; + *argvp += NODE_ARGS_SIZE; + + if (node->sc_id != 0xffffff) { + if (!*argcp) + return 1; + + node->sc_data = argv[NODE_ARG_SC_ID + 1]; + + *argcp -= 1; + *argvp += 1; + } + + return 0; +} + +struct args *parse_args(int argc, char **argv) +{ + struct args *args; + int n_node, i; + + if (argc < ARGS_SIZE) + goto out; + + n_node = strtoul(argv[ARG_N_NODE], NULL, 10); + if (!n_node || n_node > 10) + goto out; + + args = calloc(1, sizeof(*args) + n_node * sizeof(struct node_args)); + if (!args) + goto out; + + args->ns_id = strtoul(argv[ARG_NS_ID], NULL, 10); + if (!args->ns_id) + goto free; + + args->trace_type = strtoul(argv[ARG_TRACE_TYPE], NULL, 16); + if (!args->trace_type) + goto free; + + args->n_node = n_node; + args->ifname = argv[ARG_IFNAME]; + + argv += ARGS_SIZE; + argc -= ARGS_SIZE; + + for (i = 0; i < n_node; i++) { + if (parse_node_args(&argc, &argv, &args->node[i])) + goto free; + } + + if (argc) + goto free; + + return args; +free: + free(args); +out: + return NULL; +} + +int main(int argc, char **argv) +{ + int ret, fd, pkts, size, hoplen, found; + struct ioam6_trace_hdr *ioam6h; + struct ioam6_hdr *opt; + struct ipv6hdr *ip6h; + __u8 buffer[400], *p; + struct args *args; + + args = parse_args(argc - 1, argv + 1); + if (!args) { + ret = 1; + goto out; + } + + fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); + if (!fd) { + ret = 1; + goto out; + } + + if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, + args->ifname, strlen(args->ifname))) { + ret = 1; + goto close; + } + + pkts = 0; + found = 0; + while (pkts < 3 && !found) { + size = recv(fd, buffer, sizeof(buffer), 0); + ip6h = (struct ipv6hdr *)buffer; + pkts++; + + if (ip6h->nexthdr == IPPROTO_HOPOPTS) { + p = buffer + sizeof(*ip6h); + hoplen = (p[1] + 1) << 3; + + p += sizeof(struct ipv6_hopopt_hdr); + while (hoplen > 0) { + opt = (struct ioam6_hdr *)p; + + if (opt->opt_type == IPV6_TLV_IOAM && + opt->type == IOAM6_TYPE_PREALLOC) { + found = 1; + + p += sizeof(*opt); + ioam6h = (struct ioam6_trace_hdr *)p; + + ret = check_ioam6_trace(ioam6h, args); + break; + } + + p += opt->opt_len + 2; + hoplen -= opt->opt_len + 2; + } + } + } + + if (!found) + ret = 1; +close: + close(fd); +out: + free(args); + return ret; +} diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 11d7cdb898c0..19eac3e44c06 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -13,7 +13,7 @@ readonly NS_DST=$BASE$DST readonly BM_NET_V4=192.168.1. readonly BM_NET_V6=2001:db8:: -readonly NPROCS=`nproc` +readonly CPUS=`nproc` ret=0 cleanup() { @@ -75,6 +75,29 @@ chk_tso_flag() { __chk_flag "$1" $2 $3 tcp-segmentation-offload } +chk_channels() { + local msg="$1" + local target=$2 + local rx=$3 + local tx=$4 + + local dev=veth$target + + local cur_rx=`ip netns exec $BASE$target ethtool -l $dev |\ + grep RX: | tail -n 1 | awk '{print $2}' ` + local cur_tx=`ip netns exec $BASE$target ethtool -l $dev |\ + grep TX: | tail -n 1 | awk '{print $2}'` + local cur_combined=`ip netns exec $BASE$target ethtool -l $dev |\ + grep Combined: | tail -n 1 | awk '{print $2}'` + + printf "%-60s" "$msg" + if [ "$cur_rx" = "$rx" -a "$cur_tx" = "$tx" -a "$cur_combined" = "n/a" ]; then + echo " ok " + else + echo " fail rx:$rx:$cur_rx tx:$tx:$cur_tx combined:n/a:$cur_combined" + fi +} + chk_gro() { local msg="$1" local expected=$2 @@ -107,11 +130,100 @@ chk_gro() { fi } +__change_channels() +{ + local cur_cpu + local end=$1 + local cur + local i + + while true; do + printf -v cur '%(%s)T' + [ $cur -le $end ] || break + + for i in `seq 1 $CPUS`; do + ip netns exec $NS_SRC ethtool -L veth$SRC rx $i tx $i + ip netns exec $NS_DST ethtool -L veth$DST rx $i tx $i + done + + for i in `seq 1 $((CPUS - 1))`; do + cur_cpu=$((CPUS - $i)) + ip netns exec $NS_SRC ethtool -L veth$SRC rx $cur_cpu tx $cur_cpu + ip netns exec $NS_DST ethtool -L veth$DST rx $cur_cpu tx $cur_cpu + done + done +} + +__send_data() { + local end=$1 + + while true; do + printf -v cur '%(%s)T' + [ $cur -le $end ] || break + + ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 1000 -M 300 -D $BM_NET_V4$DST + done +} + +do_stress() { + local end + printf -v end '%(%s)T' + end=$((end + $STRESS)) + + ip netns exec $NS_SRC ethtool -L veth$SRC rx 3 tx 3 + ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3 + + ip netns exec $NS_DST ./udpgso_bench_rx & + local rx_pid=$! + + echo "Running stress test for $STRESS seconds..." + __change_channels $end & + local ch_pid=$! + __send_data $end & + local data_pid_1=$! + __send_data $end & + local data_pid_2=$! + __send_data $end & + local data_pid_3=$! + __send_data $end & + local data_pid_4=$! + + wait $ch_pid $data_pid_1 $data_pid_2 $data_pid_3 $data_pid_4 + kill -9 $rx_pid + echo "done" + + # restore previous setting + ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 tx 2 + ip netns exec $NS_DST ethtool -L veth$DST rx 2 tx 1 +} + +usage() { + echo "Usage: $0 [-h] [-s <seconds>]" + echo -e "\t-h: show this help" + echo -e "\t-s: run optional stress tests for the given amount of seconds" +} + +STRESS=0 +while getopts "hs:" option; do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "s") + STRESS=$OPTARG + ;; + esac +done + if [ ! -f ../bpf/xdp_dummy.o ]; then echo "Missing xdp_dummy helper. Build bpf selftest first" exit 1 fi +[ $CPUS -lt 2 ] && echo "Only one CPU available, some tests will be skipped" +[ $STRESS -gt 0 -a $CPUS -lt 3 ] && echo " stress test will be skipped, too" + create_ns chk_gro_flag "default - gro flag" $SRC off chk_gro_flag " - peer gro flag" $DST off @@ -134,6 +246,8 @@ chk_gro " - aggregation with TSO off" 1 cleanup create_ns +chk_channels "default channels" $DST 1 1 + ip -n $NS_DST link set dev veth$DST down ip netns exec $NS_DST ethtool -K veth$DST gro on chk_gro_flag "with gro enabled on link down - gro flag" $DST on @@ -147,6 +261,56 @@ chk_gro " - aggregation with TSO off" 1 cleanup create_ns + +CUR_TX=1 +CUR_RX=1 +if [ $CPUS -gt 1 ]; then + ip netns exec $NS_DST ethtool -L veth$DST tx 2 + chk_channels "setting tx channels" $DST 1 2 + CUR_TX=2 +fi + +if [ $CPUS -gt 2 ]; then + ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3 + chk_channels "setting both rx and tx channels" $DST 3 3 + CUR_RX=3 + CUR_TX=3 +fi + +ip netns exec $NS_DST ethtool -L veth$DST combined 2 2>/dev/null +chk_channels "bad setting: combined channels" $DST $CUR_RX $CUR_TX + +ip netns exec $NS_DST ethtool -L veth$DST tx $((CPUS + 1)) 2>/dev/null +chk_channels "setting invalid channels nr" $DST $CUR_RX $CUR_TX + +if [ $CPUS -gt 1 ]; then + # this also tests queues nr reduction + ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null + ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null + printf "%-60s" "bad setting: XDP with RX nr less than TX" + ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + section xdp_dummy 2>/dev/null &&\ + echo "fail - set operation successful ?!?" || echo " ok " + + # the following tests will run with multiple channels active + ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 + ip netns exec $NS_DST ethtool -L veth$DST rx 2 + ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + section xdp_dummy 2>/dev/null + printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set" + ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\ + echo "fail - set operation successful ?!?" || echo " ok " + CUR_RX=2 + CUR_TX=2 +fi + +if [ $CPUS -gt 2 ]; then + printf "%-60s" "bad setting: increasing peer TX nr above RX with XDP set" + ip netns exec $NS_SRC ethtool -L veth$SRC tx 3 2>/dev/null &&\ + echo "fail - set operation successful ?!?" || echo " ok " + chk_channels "setting invalid channels nr" $DST 2 2 +fi + ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null chk_gro_flag "with xdp attached - gro flag" $DST on chk_gro_flag " - peer gro flag" $SRC off @@ -167,10 +331,27 @@ chk_gro_flag " - after gro on xdp off, gro flag" $DST on chk_gro_flag " - peer gro flag" $SRC off chk_tso_flag " - tso flag" $SRC on chk_tso_flag " - peer tso flag" $DST on + +if [ $CPUS -gt 1 ]; then + ip netns exec $NS_DST ethtool -L veth$DST tx 1 + chk_channels "decreasing tx channels with device down" $DST 2 1 +fi + ip -n $NS_DST link set dev veth$DST up ip -n $NS_SRC link set dev veth$SRC up chk_gro " - aggregation" 1 +if [ $CPUS -gt 1 ]; then + [ $STRESS -gt 0 -a $CPUS -gt 2 ] && do_stress + + ip -n $NS_DST link set dev veth$DST down + ip -n $NS_SRC link set dev veth$SRC down + ip netns exec $NS_DST ethtool -L veth$DST tx 2 + chk_channels "increasing tx channels with device down" $DST 2 2 + ip -n $NS_DST link set dev veth$DST up + ip -n $NS_SRC link set dev veth$SRC up +fi + ip netns exec $NS_DST ethtool -K veth$DST gro off ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off chk_gro "aggregation again with default and TSO off" 10 |