diff options
534 files changed, 17557 insertions, 8447 deletions
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst index 7940da9bc6c1..f49aeef62d0c 100644 --- a/Documentation/bpf/btf.rst +++ b/Documentation/bpf/btf.rst @@ -74,7 +74,7 @@ sequentially and type id is assigned to each recognized type starting from id #define BTF_KIND_ARRAY 3 /* Array */ #define BTF_KIND_STRUCT 4 /* Struct */ #define BTF_KIND_UNION 5 /* Union */ - #define BTF_KIND_ENUM 6 /* Enumeration */ + #define BTF_KIND_ENUM 6 /* Enumeration up to 32-bit values */ #define BTF_KIND_FWD 7 /* Forward */ #define BTF_KIND_TYPEDEF 8 /* Typedef */ #define BTF_KIND_VOLATILE 9 /* Volatile */ @@ -87,6 +87,7 @@ sequentially and type id is assigned to each recognized type starting from id #define BTF_KIND_FLOAT 16 /* Floating point */ #define BTF_KIND_DECL_TAG 17 /* Decl Tag */ #define BTF_KIND_TYPE_TAG 18 /* Type Tag */ + #define BTF_KIND_ENUM64 19 /* Enumeration up to 64-bit values */ Note that the type section encodes debug info, not just pure types. ``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram. @@ -101,10 +102,10 @@ Each type contains the following common data:: * bits 24-28: kind (e.g. int, ptr, array...etc) * bits 29-30: unused * bit 31: kind_flag, currently used by - * struct, union and fwd + * struct, union, fwd, enum and enum64. */ __u32 info; - /* "size" is used by INT, ENUM, STRUCT and UNION. + /* "size" is used by INT, ENUM, STRUCT, UNION and ENUM64. * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, @@ -281,10 +282,10 @@ modes exist: ``struct btf_type`` encoding requirement: * ``name_off``: 0 or offset to a valid C identifier - * ``info.kind_flag``: 0 + * ``info.kind_flag``: 0 for unsigned, 1 for signed * ``info.kind``: BTF_KIND_ENUM * ``info.vlen``: number of enum values - * ``size``: 4 + * ``size``: 1/2/4/8 ``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum``.:: @@ -297,6 +298,10 @@ The ``btf_enum`` encoding: * ``name_off``: offset to a valid C identifier * ``val``: any value +If the original enum value is signed and the size is less than 4, +that value will be sign extended into 4 bytes. If the size is 8, +the value will be truncated into 4 bytes. + 2.2.7 BTF_KIND_FWD ~~~~~~~~~~~~~~~~~~ @@ -493,7 +498,7 @@ the attribute is applied to a ``struct``/``union`` member or a ``func`` argument, and ``btf_decl_tag.component_idx`` should be a valid index (starting from 0) pointing to a member or an argument. -2.2.17 BTF_KIND_TYPE_TAG +2.2.18 BTF_KIND_TYPE_TAG ~~~~~~~~~~~~~~~~~~~~~~~~ ``struct btf_type`` encoding requirement: @@ -516,6 +521,32 @@ type_tag, then zero or more const/volatile/restrict/typedef and finally the base type. The base type is one of int, ptr, array, struct, union, enum, func_proto and float types. +2.2.19 BTF_KIND_ENUM64 +~~~~~~~~~~~~~~~~~~~~~~ + +``struct btf_type`` encoding requirement: + * ``name_off``: 0 or offset to a valid C identifier + * ``info.kind_flag``: 0 for unsigned, 1 for signed + * ``info.kind``: BTF_KIND_ENUM64 + * ``info.vlen``: number of enum values + * ``size``: 1/2/4/8 + +``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum64``.:: + + struct btf_enum64 { + __u32 name_off; + __u32 val_lo32; + __u32 val_hi32; + }; + +The ``btf_enum64`` encoding: + * ``name_off``: offset to a valid C identifier + * ``val_lo32``: lower 32-bit value for a 64-bit value + * ``val_hi32``: high 32-bit value for a 64-bit value + +If the original enum value is signed and the size is less than 8, +that value will be sign extended into 8 bytes. + 3. BTF Kernel API ================= diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index 1de6a57c7e1e..9e27fbdb2206 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -127,7 +127,7 @@ BPF_XOR | BPF_K | BPF_ALU64 means:: Byte swap instructions ---------------------- -The byte swap instructions use an instruction class of ``BFP_ALU`` and a 4-bit +The byte swap instructions use an instruction class of ``BPF_ALU`` and a 4-bit code field of ``BPF_END``. The byte swap instructions operate on the destination register diff --git a/Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml b/Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml new file mode 100644 index 000000000000..45aa3de7cf01 --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/can/microchip,mpfs-can.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: + Microchip PolarFire SoC (MPFS) can controller + +maintainers: + - Conor Dooley <conor.dooley@microchip.com> + +allOf: + - $ref: can-controller.yaml# + +properties: + compatible: + const: microchip,mpfs-can + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +additionalProperties: false + +examples: + - | + can@2010c000 { + compatible = "microchip,mpfs-can"; + reg = <0x2010c000 0x1000>; + clocks = <&clkcfg 17>; + interrupt-parent = <&plic>; + interrupts = <56>; + }; diff --git a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml new file mode 100644 index 000000000000..a3bf432960d8 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml @@ -0,0 +1,404 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/dsa/mediatek,mt7530.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Mediatek MT7530 Ethernet switch + +maintainers: + - Sean Wang <sean.wang@mediatek.com> + - Landen Chao <Landen.Chao@mediatek.com> + - DENG Qingfang <dqfext@gmail.com> + +description: | + Port 5 of mt7530 and mt7621 switch is muxed between: + 1. GMAC5: GMAC5 can interface with another external MAC or PHY. + 2. PHY of port 0 or port 4: PHY interfaces with an external MAC like 2nd GMAC + of the SOC. Used in many setups where port 0/4 becomes the WAN port. + Note: On a MT7621 SOC with integrated switch: 2nd GMAC can only connected to + GMAC5 when the gpios for RGMII2 (GPIO 22-33) are not used and not + connected to external component! + + Port 5 modes/configurations: + 1. Port 5 is disabled and isolated: An external phy can interface to the 2nd + GMAC of the SOC. + In the case of a build-in MT7530 switch, port 5 shares the RGMII bus with 2nd + GMAC and an optional external phy. Mind the GPIO/pinctl settings of the SOC! + 2. Port 5 is muxed to PHY of port 0/4: Port 0/4 interfaces with 2nd GMAC. + It is a simple MAC to PHY interface, port 5 needs to be setup for xMII mode + and RGMII delay. + 3. Port 5 is muxed to GMAC5 and can interface to an external phy. + Port 5 becomes an extra switch port. + Only works on platform where external phy TX<->RX lines are swapped. + Like in the Ubiquiti ER-X-SFP. + 4. Port 5 is muxed to GMAC5 and interfaces with the 2nd GAMC as 2nd CPU port. + Currently a 2nd CPU port is not supported by DSA code. + + Depending on how the external PHY is wired: + 1. normal: The PHY can only connect to 2nd GMAC but not to the switch + 2. swapped: RGMII TX, RX are swapped; external phy interface with the switch as + a ethernet port. But can't interface to the 2nd GMAC. + + Based on the DT the port 5 mode is configured. + + Driver tries to lookup the phy-handle of the 2nd GMAC of the master device. + When phy-handle matches PHY of port 0 or 4 then port 5 set-up as mode 2. + phy-mode must be set, see also example 2 below! + * mt7621: phy-mode = "rgmii-txid"; + * mt7623: phy-mode = "rgmii"; + + CPU-Ports need a phy-mode property: + Allowed values on mt7530 and mt7621: + - "rgmii" + - "trgmii" + On mt7531: + - "1000base-x" + - "2500base-x" + - "rgmii" + - "sgmii" + + +properties: + compatible: + enum: + - mediatek,mt7530 + - mediatek,mt7531 + - mediatek,mt7621 + + core-supply: + description: + Phandle to the regulator node necessary for the core power. + + "#gpio-cells": + const: 2 + + gpio-controller: + type: boolean + description: + if defined, MT7530's LED controller will run on GPIO mode. + + "#interrupt-cells": + const: 1 + + interrupt-controller: true + + interrupts: + maxItems: 1 + + io-supply: + description: + Phandle to the regulator node necessary for the I/O power. + See Documentation/devicetree/bindings/regulator/mt6323-regulator.txt + for details for the regulator setup on these boards. + + mediatek,mcm: + type: boolean + description: + if defined, indicates that either MT7530 is the part on multi-chip + module belong to MT7623A has or the remotely standalone chip as the + function MT7623N reference board provided for. + + reset-gpios: + maxItems: 1 + + reset-names: + const: mcm + + resets: + description: + Phandle pointing to the system reset controller with line index for + the ethsys. + maxItems: 1 + +patternProperties: + "^(ethernet-)?ports$": + type: object + + patternProperties: + "^(ethernet-)?port@[0-9]+$": + type: object + description: Ethernet switch ports + + unevaluatedProperties: false + + properties: + reg: + description: + Port address described must be 5 or 6 for CPU port and from 0 + to 5 for user ports. + + allOf: + - $ref: dsa-port.yaml# + - if: + properties: + label: + items: + - const: cpu + then: + required: + - reg + - phy-mode + +required: + - compatible + - reg + +allOf: + - $ref: "dsa.yaml#" + - if: + required: + - mediatek,mcm + then: + required: + - resets + - reset-names + + - dependencies: + interrupt-controller: [ interrupts ] + + - if: + properties: + compatible: + items: + - const: mediatek,mt7530 + then: + required: + - core-supply + - io-supply + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/gpio/gpio.h> + mdio { + #address-cells = <1>; + #size-cells = <0>; + switch@0 { + compatible = "mediatek,mt7530"; + reg = <0>; + + core-supply = <&mt6323_vpa_reg>; + io-supply = <&mt6323_vemc3v3_reg>; + reset-gpios = <&pio 33 GPIO_ACTIVE_HIGH>; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + label = "lan0"; + }; + + port@1 { + reg = <1>; + label = "lan1"; + }; + + port@2 { + reg = <2>; + label = "lan2"; + }; + + port@3 { + reg = <3>; + label = "lan3"; + }; + + port@4 { + reg = <4>; + label = "wan"; + }; + + port@6 { + reg = <6>; + label = "cpu"; + ethernet = <&gmac0>; + phy-mode = "trgmii"; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + }; + }; + + - | + //Example 2: MT7621: Port 4 is WAN port: 2nd GMAC -> Port 5 -> PHY port 4. + + ethernet { + #address-cells = <1>; + #size-cells = <0>; + gmac0: mac@0 { + compatible = "mediatek,eth-mac"; + reg = <0>; + phy-mode = "rgmii"; + + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + + gmac1: mac@1 { + compatible = "mediatek,eth-mac"; + reg = <1>; + phy-mode = "rgmii-txid"; + phy-handle = <&phy4>; + }; + + mdio: mdio-bus { + #address-cells = <1>; + #size-cells = <0>; + + /* Internal phy */ + phy4: ethernet-phy@4 { + reg = <4>; + }; + + mt7530: switch@1f { + compatible = "mediatek,mt7621"; + reg = <0x1f>; + mediatek,mcm; + + resets = <&rstctrl 2>; + reset-names = "mcm"; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "lan0"; + }; + + port@1 { + reg = <1>; + label = "lan1"; + }; + + port@2 { + reg = <2>; + label = "lan2"; + }; + + port@3 { + reg = <3>; + label = "lan3"; + }; + + /* Commented out. Port 4 is handled by 2nd GMAC. + port@4 { + reg = <4>; + label = "lan4"; + }; + */ + + port@6 { + reg = <6>; + label = "cpu"; + ethernet = <&gmac0>; + phy-mode = "rgmii"; + + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + }; + }; + }; + }; + + - | + //Example 3: MT7621: Port 5 is connected to external PHY: Port 5 -> external PHY. + + ethernet { + #address-cells = <1>; + #size-cells = <0>; + gmac_0: mac@0 { + compatible = "mediatek,eth-mac"; + reg = <0>; + phy-mode = "rgmii"; + + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + + mdio0: mdio-bus { + #address-cells = <1>; + #size-cells = <0>; + + /* External phy */ + ephy5: ethernet-phy@7 { + reg = <7>; + }; + + switch@1f { + compatible = "mediatek,mt7621"; + reg = <0x1f>; + mediatek,mcm; + + resets = <&rstctrl 2>; + reset-names = "mcm"; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "lan0"; + }; + + port@1 { + reg = <1>; + label = "lan1"; + }; + + port@2 { + reg = <2>; + label = "lan2"; + }; + + port@3 { + reg = <3>; + label = "lan3"; + }; + + port@4 { + reg = <4>; + label = "lan4"; + }; + + port@5 { + reg = <5>; + label = "lan5"; + phy-mode = "rgmii"; + phy-handle = <&ephy5>; + }; + + cpu_port0: port@6 { + reg = <6>; + label = "cpu"; + ethernet = <&gmac_0>; + phy-mode = "rgmii"; + + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/dsa/mt7530.txt b/Documentation/devicetree/bindings/net/dsa/mt7530.txt deleted file mode 100644 index 18247ebfc487..000000000000 --- a/Documentation/devicetree/bindings/net/dsa/mt7530.txt +++ /dev/null @@ -1,327 +0,0 @@ -Mediatek MT7530 Ethernet switch -================================ - -Required properties: - -- compatible: may be compatible = "mediatek,mt7530" - or compatible = "mediatek,mt7621" - or compatible = "mediatek,mt7531" -- #address-cells: Must be 1. -- #size-cells: Must be 0. -- mediatek,mcm: Boolean; if defined, indicates that either MT7530 is the part - on multi-chip module belong to MT7623A has or the remotely standalone - chip as the function MT7623N reference board provided for. - -If compatible mediatek,mt7530 is set then the following properties are required - -- core-supply: Phandle to the regulator node necessary for the core power. -- io-supply: Phandle to the regulator node necessary for the I/O power. - See Documentation/devicetree/bindings/regulator/mt6323-regulator.txt - for details for the regulator setup on these boards. - -If the property mediatek,mcm isn't defined, following property is required - -- reset-gpios: Should be a gpio specifier for a reset line. - -Else, following properties are required - -- resets : Phandle pointing to the system reset controller with - line index for the ethsys. -- reset-names : Should be set to "mcm". - -Required properties for the child nodes within ports container: - -- reg: Port address described must be 6 for CPU port and from 0 to 5 for - user ports. -- phy-mode: String, the following values are acceptable for port labeled - "cpu": - If compatible mediatek,mt7530 or mediatek,mt7621 is set, - must be either "trgmii" or "rgmii" - If compatible mediatek,mt7531 is set, - must be either "sgmii", "1000base-x" or "2500base-x" - -Port 5 of mt7530 and mt7621 switch is muxed between: -1. GMAC5: GMAC5 can interface with another external MAC or PHY. -2. PHY of port 0 or port 4: PHY interfaces with an external MAC like 2nd GMAC - of the SOC. Used in many setups where port 0/4 becomes the WAN port. - Note: On a MT7621 SOC with integrated switch: 2nd GMAC can only connected to - GMAC5 when the gpios for RGMII2 (GPIO 22-33) are not used and not - connected to external component! - -Port 5 modes/configurations: -1. Port 5 is disabled and isolated: An external phy can interface to the 2nd - GMAC of the SOC. - In the case of a build-in MT7530 switch, port 5 shares the RGMII bus with 2nd - GMAC and an optional external phy. Mind the GPIO/pinctl settings of the SOC! -2. Port 5 is muxed to PHY of port 0/4: Port 0/4 interfaces with 2nd GMAC. - It is a simple MAC to PHY interface, port 5 needs to be setup for xMII mode - and RGMII delay. -3. Port 5 is muxed to GMAC5 and can interface to an external phy. - Port 5 becomes an extra switch port. - Only works on platform where external phy TX<->RX lines are swapped. - Like in the Ubiquiti ER-X-SFP. -4. Port 5 is muxed to GMAC5 and interfaces with the 2nd GAMC as 2nd CPU port. - Currently a 2nd CPU port is not supported by DSA code. - -Depending on how the external PHY is wired: -1. normal: The PHY can only connect to 2nd GMAC but not to the switch -2. swapped: RGMII TX, RX are swapped; external phy interface with the switch as - a ethernet port. But can't interface to the 2nd GMAC. - -Based on the DT the port 5 mode is configured. - -Driver tries to lookup the phy-handle of the 2nd GMAC of the master device. -When phy-handle matches PHY of port 0 or 4 then port 5 set-up as mode 2. -phy-mode must be set, see also example 2 below! - * mt7621: phy-mode = "rgmii-txid"; - * mt7623: phy-mode = "rgmii"; - -Optional properties: - -- gpio-controller: Boolean; if defined, MT7530's LED controller will run on - GPIO mode. -- #gpio-cells: Must be 2 if gpio-controller is defined. -- interrupt-controller: Boolean; Enables the internal interrupt controller. - -If interrupt-controller is defined, the following properties are required. - -- #interrupt-cells: Must be 1. -- interrupts: Parent interrupt for the interrupt controller. - -See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional -required, optional properties and how the integrated switch subnodes must -be specified. - -Example: - - &mdio0 { - switch@0 { - compatible = "mediatek,mt7530"; - #address-cells = <1>; - #size-cells = <0>; - reg = <0>; - - core-supply = <&mt6323_vpa_reg>; - io-supply = <&mt6323_vemc3v3_reg>; - reset-gpios = <&pio 33 0>; - - ports { - #address-cells = <1>; - #size-cells = <0>; - reg = <0>; - port@0 { - reg = <0>; - label = "lan0"; - }; - - port@1 { - reg = <1>; - label = "lan1"; - }; - - port@2 { - reg = <2>; - label = "lan2"; - }; - - port@3 { - reg = <3>; - label = "lan3"; - }; - - port@4 { - reg = <4>; - label = "wan"; - }; - - port@6 { - reg = <6>; - label = "cpu"; - ethernet = <&gmac0>; - phy-mode = "trgmii"; - fixed-link { - speed = <1000>; - full-duplex; - }; - }; - }; - }; - }; - -Example 2: MT7621: Port 4 is WAN port: 2nd GMAC -> Port 5 -> PHY port 4. - -ð { - gmac0: mac@0 { - compatible = "mediatek,eth-mac"; - reg = <0>; - phy-mode = "rgmii"; - - fixed-link { - speed = <1000>; - full-duplex; - pause; - }; - }; - - gmac1: mac@1 { - compatible = "mediatek,eth-mac"; - reg = <1>; - phy-mode = "rgmii-txid"; - phy-handle = <&phy4>; - }; - - mdio: mdio-bus { - #address-cells = <1>; - #size-cells = <0>; - - /* Internal phy */ - phy4: ethernet-phy@4 { - reg = <4>; - }; - - mt7530: switch@1f { - compatible = "mediatek,mt7621"; - #address-cells = <1>; - #size-cells = <0>; - reg = <0x1f>; - pinctrl-names = "default"; - mediatek,mcm; - - resets = <&rstctrl 2>; - reset-names = "mcm"; - - ports { - #address-cells = <1>; - #size-cells = <0>; - - port@0 { - reg = <0>; - label = "lan0"; - }; - - port@1 { - reg = <1>; - label = "lan1"; - }; - - port@2 { - reg = <2>; - label = "lan2"; - }; - - port@3 { - reg = <3>; - label = "lan3"; - }; - -/* Commented out. Port 4 is handled by 2nd GMAC. - port@4 { - reg = <4>; - label = "lan4"; - }; -*/ - - cpu_port0: port@6 { - reg = <6>; - label = "cpu"; - ethernet = <&gmac0>; - phy-mode = "rgmii"; - - fixed-link { - speed = <1000>; - full-duplex; - pause; - }; - }; - }; - }; - }; -}; - -Example 3: MT7621: Port 5 is connected to external PHY: Port 5 -> external PHY. - -ð { - gmac0: mac@0 { - compatible = "mediatek,eth-mac"; - reg = <0>; - phy-mode = "rgmii"; - - fixed-link { - speed = <1000>; - full-duplex; - pause; - }; - }; - - mdio: mdio-bus { - #address-cells = <1>; - #size-cells = <0>; - - /* External phy */ - ephy5: ethernet-phy@7 { - reg = <7>; - }; - - mt7530: switch@1f { - compatible = "mediatek,mt7621"; - #address-cells = <1>; - #size-cells = <0>; - reg = <0x1f>; - pinctrl-names = "default"; - mediatek,mcm; - - resets = <&rstctrl 2>; - reset-names = "mcm"; - - ports { - #address-cells = <1>; - #size-cells = <0>; - - port@0 { - reg = <0>; - label = "lan0"; - }; - - port@1 { - reg = <1>; - label = "lan1"; - }; - - port@2 { - reg = <2>; - label = "lan2"; - }; - - port@3 { - reg = <3>; - label = "lan3"; - }; - - port@4 { - reg = <4>; - label = "lan4"; - }; - - port@5 { - reg = <5>; - label = "lan5"; - phy-mode = "rgmii"; - phy-handle = <&ephy5>; - }; - - cpu_port0: port@6 { - reg = <6>; - label = "cpu"; - ethernet = <&gmac0>; - phy-mode = "rgmii"; - - fixed-link { - speed = <1000>; - full-duplex; - pause; - }; - }; - }; - }; - }; -}; diff --git a/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml b/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml new file mode 100644 index 000000000000..103b1ef5af1b --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml @@ -0,0 +1,134 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/dsa/renesas,rzn1-a5psw.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas RZ/N1 Advanced 5 ports ethernet switch + +maintainers: + - Clément Léger <clement.leger@bootlin.com> + +description: | + The advanced 5 ports switch is present on the Renesas RZ/N1 SoC family and + handles 4 ports + 1 CPU management port. + +allOf: + - $ref: dsa.yaml# + +properties: + compatible: + items: + - enum: + - renesas,r9a06g032-a5psw + - const: renesas,rzn1-a5psw + + reg: + maxItems: 1 + + power-domains: + maxItems: 1 + + mdio: + $ref: /schemas/net/mdio.yaml# + unevaluatedProperties: false + + clocks: + items: + - description: AHB clock used for the switch register interface + - description: Switch system clock + + clock-names: + items: + - const: hclk + - const: clk + + ethernet-ports: + type: object + properties: + '#address-cells': + const: 1 + '#size-cells': + const: 0 + + patternProperties: + "^(ethernet-)?port@[0-4]$": + type: object + description: Ethernet switch ports + + properties: + pcs-handle: + description: + phandle pointing to a PCS sub-node compatible with + renesas,rzn1-miic.yaml# + $ref: /schemas/types.yaml#/definitions/phandle + +unevaluatedProperties: false + +required: + - compatible + - reg + - clocks + - clock-names + - power-domains + +examples: + - | + #include <dt-bindings/gpio/gpio.h> + #include <dt-bindings/clock/r9a06g032-sysctrl.h> + + switch@44050000 { + compatible = "renesas,r9a06g032-a5psw", "renesas,rzn1-a5psw"; + reg = <0x44050000 0x10000>; + clocks = <&sysctrl R9A06G032_HCLK_SWITCH>, <&sysctrl R9A06G032_CLK_SWITCH>; + clock-names = "hclk", "clk"; + power-domains = <&sysctrl>; + + dsa,member = <0 0>; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "lan0"; + phy-handle = <&switch0phy3>; + pcs-handle = <&mii_conv4>; + }; + + port@1 { + reg = <1>; + label = "lan1"; + phy-handle = <&switch0phy1>; + pcs-handle = <&mii_conv3>; + }; + + port@4 { + reg = <4>; + ethernet = <&gmac2>; + label = "cpu"; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + reset-gpios = <&gpio0a 2 GPIO_ACTIVE_HIGH>; + reset-delay-us = <15>; + clock-frequency = <2500000>; + + switch0phy1: ethernet-phy@1{ + reg = <1>; + }; + + switch0phy3: ethernet-phy@3{ + reg = <3>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/pcs/renesas,rzn1-miic.yaml b/Documentation/devicetree/bindings/net/pcs/renesas,rzn1-miic.yaml new file mode 100644 index 000000000000..2d33bbab7163 --- /dev/null +++ b/Documentation/devicetree/bindings/net/pcs/renesas,rzn1-miic.yaml @@ -0,0 +1,171 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/pcs/renesas,rzn1-miic.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas RZ/N1 MII converter + +maintainers: + - Clément Léger <clement.leger@bootlin.com> + +description: | + This MII converter is present on the Renesas RZ/N1 SoC family. It is + responsible to do MII passthrough or convert it to RMII/RGMII. + +properties: + '#address-cells': + const: 1 + + '#size-cells': + const: 0 + + compatible: + items: + - enum: + - renesas,r9a06g032-miic + - const: renesas,rzn1-miic + + reg: + maxItems: 1 + + clocks: + items: + - description: MII reference clock + - description: RGMII reference clock + - description: RMII reference clock + - description: AHB clock used for the MII converter register interface + + clock-names: + items: + - const: mii_ref + - const: rgmii_ref + - const: rmii_ref + - const: hclk + + renesas,miic-switch-portin: + description: MII Switch PORTIN configuration. This value should use one of + the values defined in dt-bindings/net/pcs-rzn1-miic.h. + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [1, 2] + + power-domains: + maxItems: 1 + +patternProperties: + "^mii-conv@[0-5]$": + type: object + description: MII converter port + + properties: + reg: + description: MII Converter port number. + enum: [1, 2, 3, 4, 5] + + renesas,miic-input: + description: Converter input port configuration. This value should use + one of the values defined in dt-bindings/net/pcs-rzn1-miic.h. + $ref: /schemas/types.yaml#/definitions/uint32 + + required: + - reg + - renesas,miic-input + + additionalProperties: false + + allOf: + - if: + properties: + reg: + const: 1 + then: + properties: + renesas,miic-input: + const: 0 + - if: + properties: + reg: + const: 2 + then: + properties: + renesas,miic-input: + enum: [1, 11] + - if: + properties: + reg: + const: 3 + then: + properties: + renesas,miic-input: + enum: [7, 10] + - if: + properties: + reg: + const: 4 + then: + properties: + renesas,miic-input: + enum: [4, 6, 9, 13] + - if: + properties: + reg: + const: 5 + then: + properties: + renesas,miic-input: + enum: [3, 5, 8, 12] + +required: + - '#address-cells' + - '#size-cells' + - compatible + - reg + - clocks + - clock-names + - power-domains + +additionalProperties: false + +examples: + - | + #include <dt-bindings/net/pcs-rzn1-miic.h> + #include <dt-bindings/clock/r9a06g032-sysctrl.h> + + eth-miic@44030000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "renesas,r9a06g032-miic", "renesas,rzn1-miic"; + reg = <0x44030000 0x10000>; + clocks = <&sysctrl R9A06G032_CLK_MII_REF>, + <&sysctrl R9A06G032_CLK_RGMII_REF>, + <&sysctrl R9A06G032_CLK_RMII_REF>, + <&sysctrl R9A06G032_HCLK_SWITCH_RG>; + clock-names = "mii_ref", "rgmii_ref", "rmii_ref", "hclk"; + renesas,miic-switch-portin = <MIIC_GMAC2_PORT>; + power-domains = <&sysctrl>; + + mii_conv1: mii-conv@1 { + renesas,miic-input = <MIIC_GMAC1_PORT>; + reg = <1>; + }; + + mii_conv2: mii-conv@2 { + renesas,miic-input = <MIIC_SWITCH_PORTD>; + reg = <2>; + }; + + mii_conv3: mii-conv@3 { + renesas,miic-input = <MIIC_SWITCH_PORTC>; + reg = <3>; + }; + + mii_conv4: mii-conv@4 { + renesas,miic-input = <MIIC_SWITCH_PORTB>; + reg = <4>; + }; + + mii_conv5: mii-conv@5 { + renesas,miic-input = <MIIC_SWITCH_PORTA>; + reg = <5>; + }; + }; diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 36c85eb3dc0d..491597c02edf 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -65,6 +65,8 @@ properties: - ingenic,x2000-mac - loongson,ls2k-dwmac - loongson,ls7a-dwmac + - renesas,r9a06g032-gmac + - renesas,rzn1-gmac - rockchip,px30-gmac - rockchip,rk3128-gmac - rockchip,rk3228-gmac @@ -135,6 +137,9 @@ properties: reset-names: const: stmmaceth + power-domains: + maxItems: 1 + mac-mode: $ref: ethernet-controller.yaml#/properties/phy-connection-type description: diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.yaml b/Documentation/devicetree/bindings/net/ti,dp83867.yaml index 047d757e8d82..76ff08a477ba 100644 --- a/Documentation/devicetree/bindings/net/ti,dp83867.yaml +++ b/Documentation/devicetree/bindings/net/ti,dp83867.yaml @@ -31,6 +31,16 @@ properties: reg: maxItems: 1 + nvmem-cells: + maxItems: 1 + description: + Nvmem data cell containing the value to write to the + IO_IMPEDANCE_CTRL field of the IO_MUX_CFG register. + + nvmem-cell-names: + items: + - const: io_impedance_ctrl + ti,min-output-impedance: type: boolean description: | @@ -42,9 +52,11 @@ properties: description: | MAC Interface Impedance control to set the programmable output impedance to a maximum value (70 ohms). - Note: ti,min-output-impedance and ti,max-output-impedance are mutually - exclusive. When both properties are present ti,max-output-impedance - takes precedence. + Note: Specifying an io_impedance_ctrl nvmem cell or one of the + ti,min-output-impedance, ti,max-output-impedance properties + are mutually exclusive. If more than one is present, an nvmem + cell takes precedence over ti,max-output-impedance, which in + turn takes precedence over ti,min-output-impedance. tx-fifo-depth: $ref: /schemas/types.yaml#/definitions/uint32 diff --git a/Documentation/devicetree/bindings/net/xlnx,emaclite.yaml b/Documentation/devicetree/bindings/net/xlnx,emaclite.yaml new file mode 100644 index 000000000000..92d8ade988f6 --- /dev/null +++ b/Documentation/devicetree/bindings/net/xlnx,emaclite.yaml @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/xlnx,emaclite.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Xilinx Emaclite Ethernet controller + +maintainers: + - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com> + - Harini Katakam <harini.katakam@amd.com> + +allOf: + - $ref: ethernet-controller.yaml# + +properties: + compatible: + enum: + - xlnx,opb-ethernetlite-1.01.a + - xlnx,opb-ethernetlite-1.01.b + - xlnx,xps-ethernetlite-1.00.a + - xlnx,xps-ethernetlite-2.00.a + - xlnx,xps-ethernetlite-2.01.a + - xlnx,xps-ethernetlite-3.00.a + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + phy-handle: true + + local-mac-address: true + + xlnx,tx-ping-pong: + type: boolean + description: hardware supports tx ping pong buffer. + + xlnx,rx-ping-pong: + type: boolean + description: hardware supports rx ping pong buffer. + +required: + - compatible + - reg + - interrupts + - phy-handle + +additionalProperties: false + +examples: + - | + axi_ethernetlite_1: ethernet@40e00000 { + compatible = "xlnx,xps-ethernetlite-3.00.a"; + reg = <0x40e00000 0x10000>; + interrupt-parent = <&axi_intc_1>; + interrupts = <1>; + local-mac-address = [00 00 00 00 00 00]; + phy-handle = <&phy0>; + xlnx,rx-ping-pong; + xlnx,tx-ping-pong; + }; diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 43be3782e5df..53a18ff7cf23 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -780,6 +780,17 @@ peer_notif_delay value is 0 which means to match the value of the link monitor interval. +prio + Slave priority. A higher number means higher priority. + The primary slave has the highest priority. This option also + follows the primary_reselect rules. + + This option could only be configured via netlink, and is only valid + for active-backup(1), balance-tlb (5) and balance-alb (6) mode. + The valid value range is a signed 32 bit integer. + + The default value is 0. + primary A string (eth0, eth2, etc) specifying which slave is the diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst index f34cb0e4460e..ebc822e605f5 100644 --- a/Documentation/networking/can.rst +++ b/Documentation/networking/can.rst @@ -168,7 +168,7 @@ reflect the correct [#f1]_ traffic on the node the loopback of the sent data has to be performed right after a successful transmission. If the CAN network interface is not capable of performing the loopback for some reason the SocketCAN core can do this task as a fallback solution. -See :ref:`socketcan-local-loopback1` for details (recommended). +See :ref:`socketcan-local-loopback2` for details (recommended). The loopback functionality is enabled by default to reflect standard networking behaviour for CAN applications. Due to some requests from diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst index 8cb2cd4e2a80..be8e10c14b05 100644 --- a/Documentation/networking/tls.rst +++ b/Documentation/networking/tls.rst @@ -214,6 +214,31 @@ of calling send directly after a handshake using gnutls. Since it doesn't implement a full record layer, control messages are not supported. +Optional optimizations +---------------------- + +There are certain condition-specific optimizations the TLS ULP can make, +if requested. Those optimizations are either not universally beneficial +or may impact correctness, hence they require an opt-in. +All options are set per-socket using setsockopt(), and their +state can be checked using getsockopt() and via socket diag (``ss``). + +TLS_TX_ZEROCOPY_RO +~~~~~~~~~~~~~~~~~~ + +For device offload only. Allow sendfile() data to be transmitted directly +to the NIC without making an in-kernel copy. This allows true zero-copy +behavior when device offload is enabled. + +The application must make sure that the data is not modified between being +submitted and transmission completing. In other words this is mostly +applicable if the data sent on a socket via sendfile() is read-only. + +Modifying the data may result in different versions of the data being used +for the original TCP transmission and TCP retransmissions. To the receiver +this will look like TLS records had been tampered with and will result +in record authentication failures. + Statistics ========== diff --git a/MAINTAINERS b/MAINTAINERS index 725fbdcf3558..e9e9e99a2296 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -171,7 +171,6 @@ F: drivers/scsi/53c700* 6LOWPAN GENERIC (BTLE/IEEE 802.15.4) M: Alexander Aring <alex.aring@gmail.com> -M: Jukka Rissanen <jukka.rissanen@linux.intel.com> L: linux-bluetooth@vger.kernel.org L: linux-wpan@vger.kernel.org S: Maintained @@ -7433,6 +7432,13 @@ S: Maintained F: include/linux/errseq.h F: lib/errseq.c +ESD CAN/USB DRIVERS +M: Frank Jungclaus <frank.jungclaus@esd.eu> +R: socketcan@esd.eu +L: linux-can@vger.kernel.org +S: Maintained +F: drivers/net/can/usb/esd_usb.c + ET131X NETWORK DRIVER M: Mark Einon <mark.einon@gmail.com> S: Odd Fixes @@ -17117,6 +17123,19 @@ S: Supported F: Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml F: drivers/iio/adc/rzg2l_adc.c +RENESAS RZ/N1 A5PSW SWITCH DRIVER +M: Clément Léger <clement.leger@bootlin.com> +L: linux-renesas-soc@vger.kernel.org +L: netdev@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml +F: Documentation/devicetree/bindings/net/pcs/renesas,rzn1-miic.yaml +F: drivers/net/dsa/rzn1_a5psw* +F: drivers/net/pcs/pcs-rzn1-miic.c +F: include/dt-bindings/net/pcs-rzn1-miic.h +F: include/linux/pcs-rzn1-miic.h +F: net/dsa/tag_rzn1_a5psw.c + RENESAS RZ/N1 RTC CONTROLLER DRIVER M: Miquel Raynal <miquel.raynal@bootlin.com> L: linux-rtc@vger.kernel.org diff --git a/arch/arm/boot/dts/r9a06g032-rzn1d400-db.dts b/arch/arm/boot/dts/r9a06g032-rzn1d400-db.dts index ca39e1d681c1..ac8b7be2f49e 100644 --- a/arch/arm/boot/dts/r9a06g032-rzn1d400-db.dts +++ b/arch/arm/boot/dts/r9a06g032-rzn1d400-db.dts @@ -8,6 +8,8 @@ /dts-v1/; +#include <dt-bindings/pinctrl/rzn1-pinctrl.h> +#include <dt-bindings/net/pcs-rzn1-miic.h> #include "r9a06g032.dtsi" / { @@ -35,3 +37,118 @@ timeout-sec = <60>; status = "okay"; }; + +&gmac2 { + status = "okay"; + phy-mode = "gmii"; + fixed-link { + speed = <1000>; + full-duplex; + }; +}; + +&switch { + status = "okay"; + #address-cells = <1>; + #size-cells = <0>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_mdio1>, <&pins_eth3>, <&pins_eth4>; + + dsa,member = <0 0>; + + mdio { + clock-frequency = <2500000>; + + #address-cells = <1>; + #size-cells = <0>; + + switch0phy4: ethernet-phy@4 { + reg = <4>; + micrel,led-mode = <1>; + }; + + switch0phy5: ethernet-phy@5 { + reg = <5>; + micrel,led-mode = <1>; + }; + }; +}; + +&switch_port0 { + label = "lan0"; + phy-mode = "mii"; + phy-handle = <&switch0phy5>; + status = "okay"; +}; + +&switch_port1 { + label = "lan1"; + phy-mode = "mii"; + phy-handle = <&switch0phy4>; + status = "okay"; +}; + +&switch_port4 { + status = "okay"; +}; + +ð_miic { + status = "okay"; + renesas,miic-switch-portin = <MIIC_GMAC2_PORT>; +}; + +&mii_conv4 { + renesas,miic-input = <MIIC_SWITCH_PORTB>; + status = "okay"; +}; + +&mii_conv5 { + renesas,miic-input = <MIIC_SWITCH_PORTA>; + status = "okay"; +}; + +&pinctrl{ + pins_mdio1: pins_mdio1 { + pinmux = < + RZN1_PINMUX(152, RZN1_FUNC_MDIO1_SWITCH) + RZN1_PINMUX(153, RZN1_FUNC_MDIO1_SWITCH) + >; + }; + pins_eth3: pins_eth3 { + pinmux = < + RZN1_PINMUX(36, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(37, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(38, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(39, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(40, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(41, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(42, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(43, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(44, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(45, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(46, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(47, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + >; + drive-strength = <6>; + bias-disable; + }; + pins_eth4: pins_eth4 { + pinmux = < + RZN1_PINMUX(48, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(49, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(50, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(51, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(52, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(53, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(54, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(55, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(56, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(57, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(58, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + RZN1_PINMUX(59, RZN1_FUNC_CLK_ETH_MII_RGMII_RMII) + >; + drive-strength = <6>; + bias-disable; + }; +}; diff --git a/arch/arm/boot/dts/r9a06g032.dtsi b/arch/arm/boot/dts/r9a06g032.dtsi index d3665910958b..5b97fa85474f 100644 --- a/arch/arm/boot/dts/r9a06g032.dtsi +++ b/arch/arm/boot/dts/r9a06g032.dtsi @@ -304,6 +304,114 @@ data-width = <8>; }; + gmac2: ethernet@44002000 { + compatible = "renesas,r9a06g032-gmac", "renesas,rzn1-gmac", "snps,dwmac"; + reg = <0x44002000 0x2000>; + interrupt-parent = <&gic>; + interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_wake_irq", "eth_lpi"; + clocks = <&sysctrl R9A06G032_HCLK_GMAC1>; + clock-names = "stmmaceth"; + power-domains = <&sysctrl>; + snps,multicast-filter-bins = <256>; + snps,perfect-filter-entries = <128>; + tx-fifo-depth = <2048>; + rx-fifo-depth = <4096>; + status = "disabled"; + }; + + eth_miic: eth-miic@44030000 { + compatible = "renesas,r9a06g032-miic", "renesas,rzn1-miic"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x44030000 0x10000>; + clocks = <&sysctrl R9A06G032_CLK_MII_REF>, + <&sysctrl R9A06G032_CLK_RGMII_REF>, + <&sysctrl R9A06G032_CLK_RMII_REF>, + <&sysctrl R9A06G032_HCLK_SWITCH_RG>; + clock-names = "mii_ref", "rgmii_ref", "rmii_ref", "hclk"; + power-domains = <&sysctrl>; + status = "disabled"; + + mii_conv1: mii-conv@1 { + reg = <1>; + status = "disabled"; + }; + + mii_conv2: mii-conv@2 { + reg = <2>; + status = "disabled"; + }; + + mii_conv3: mii-conv@3 { + reg = <3>; + status = "disabled"; + }; + + mii_conv4: mii-conv@4 { + reg = <4>; + status = "disabled"; + }; + + mii_conv5: mii-conv@5 { + reg = <5>; + status = "disabled"; + }; + }; + + switch: switch@44050000 { + compatible = "renesas,r9a06g032-a5psw", "renesas,rzn1-a5psw"; + reg = <0x44050000 0x10000>; + clocks = <&sysctrl R9A06G032_HCLK_SWITCH>, + <&sysctrl R9A06G032_CLK_SWITCH>; + clock-names = "hclk", "clk"; + power-domains = <&sysctrl>; + status = "disabled"; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + switch_port0: port@0 { + reg = <0>; + pcs-handle = <&mii_conv5>; + status = "disabled"; + }; + + switch_port1: port@1 { + reg = <1>; + pcs-handle = <&mii_conv4>; + status = "disabled"; + }; + + switch_port2: port@2 { + reg = <2>; + pcs-handle = <&mii_conv3>; + status = "disabled"; + }; + + switch_port3: port@3 { + reg = <3>; + pcs-handle = <&mii_conv2>; + status = "disabled"; + }; + + switch_port4: port@4 { + reg = <4>; + ethernet = <&gmac2>; + label = "cpu"; + phy-mode = "internal"; + status = "disabled"; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + }; + gic: interrupt-controller@44101000 { compatible = "arm,gic-400", "arm,cortex-a7-gic"; interrupt-controller; diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 9e457156ad4d..6a1c9fca5260 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -712,22 +712,6 @@ static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, } } -/* ALU operation (32 bit) - * dst = dst (op) src - */ -static inline void emit_a32_alu_r(const s8 dst, const s8 src, - struct jit_ctx *ctx, const bool is64, - const bool hi, const u8 op) { - const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rn, rd; - - rn = arm_bpf_get_reg32(src, tmp[1], ctx); - rd = arm_bpf_get_reg32(dst, tmp[0], ctx); - /* ALU operation */ - emit_alu_r(rd, rn, is64, hi, op, ctx); - arm_bpf_put_reg32(dst, rd, ctx); -} - /* ALU operation (64 bit) */ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx, diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index 1d3ffbf3cde8..5e34bd0b214d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -458,6 +458,54 @@ status = "okay"; }; +&mdio0 { + #address-cells = <1>; + #size-cells = <0>; + + switch@0 { + compatible = "mediatek,mt7531"; + reg = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@1 { + reg = <1>; + label = "lan0"; + }; + + port@2 { + reg = <2>; + label = "lan1"; + }; + + port@3 { + reg = <3>; + label = "lan2"; + }; + + port@4 { + reg = <4>; + label = "lan3"; + }; + + port@5 { + reg = <5>; + label = "cpu"; + ethernet = <&gmac0>; + phy-mode = "rgmii"; + + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + }; + }; +}; + &mdio1 { rgmii_phy1: ethernet-phy@0 { compatible = "ethernet-phy-ieee802.3-c22"; diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index c490d6e4bf1a..45efd35d50c5 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -336,6 +336,24 @@ status = "disabled"; }; + can0: can@2010c000 { + compatible = "microchip,mpfs-can"; + reg = <0x0 0x2010c000 0x0 0x1000>; + clocks = <&clkcfg CLK_CAN0>; + interrupt-parent = <&plic>; + interrupts = <56>; + status = "disabled"; + }; + + can1: can@2010d000 { + compatible = "microchip,mpfs-can"; + reg = <0x0 0x2010d000 0x0 0x1000>; + clocks = <&clkcfg CLK_CAN1>; + interrupt-parent = <&plic>; + interrupts = <57>; + status = "disabled"; + }; + mac0: ethernet@20110000 { compatible = "cdns,macb"; reg = <0x0 0x20110000 0x0 0x2000>; diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index 2a3715bf29fe..d926e0f7ef57 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -69,6 +69,7 @@ struct rv_jit_context { struct bpf_prog *prog; u16 *insns; /* RV insns */ int ninsns; + int body_len; int epilogue_offset; int *offset; /* BPF to RV */ int nexentries; diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index be743d700aa7..737baf8715da 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -44,7 +44,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) unsigned int prog_size = 0, extable_size = 0; bool tmp_blinded = false, extra_pass = false; struct bpf_prog *tmp, *orig_prog = prog; - int pass = 0, prev_ninsns = 0, i; + int pass = 0, prev_ninsns = 0, prologue_len, i; struct rv_jit_data *jit_data; struct rv_jit_context *ctx; @@ -95,6 +95,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = orig_prog; goto out_offset; } + ctx->body_len = ctx->ninsns; bpf_jit_build_prologue(ctx); ctx->epilogue_offset = ctx->ninsns; bpf_jit_build_epilogue(ctx); @@ -161,6 +162,11 @@ skip_init_ctx: if (!prog->is_func || extra_pass) { bpf_jit_binary_lock_ro(jit_data->header); + prologue_len = ctx->epilogue_offset - ctx->body_len; + for (i = 0; i < prog->len; i++) + ctx->offset[i] = ninsns_rvoff(prologue_len + + ctx->offset[i]); + bpf_prog_fill_jited_linfo(prog, ctx->offset); out_offset: kfree(ctx->offset); kfree(jit_data); diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index 3e726ee91fdc..324148686953 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -739,7 +739,7 @@ static u16 ia_eeprom_get (IADEV *iadev, u32 addr) u32 t; int i; /* - * Read the first bit that was clocked with the falling edge of the + * Read the first bit that was clocked with the falling edge of * the last command data clock */ NVRAM_CMD(IAREAD + addr); diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index cd5642cef01f..651f2f8f685b 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -1557,7 +1557,7 @@ reset_hfcsusb(struct hfcsusb *hw) write_reg(hw, HFCUSB_USB_SIZE, (hw->packet_size / 8) | ((hw->packet_size / 8) << 4)); - /* set USB_SIZE_I to match the the wMaxPacketSize for ISO transfers */ + /* set USB_SIZE_I to match the wMaxPacketSize for ISO transfers */ write_reg(hw, HFCUSB_USB_SIZE_I, hw->iso_packet_size); /* enable PCM/GCI master mode */ diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index b2a4f998c180..5de243899de8 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -499,6 +499,8 @@ config NET_SB1000 source "drivers/net/phy/Kconfig" +source "drivers/net/can/Kconfig" + source "drivers/net/mctp/Kconfig" source "drivers/net/mdio/Kconfig" diff --git a/drivers/net/amt.c b/drivers/net/amt.c index be2719a3ba70..732f4c0daa73 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -1373,11 +1373,11 @@ static void amt_add_srcs(struct amt_dev *amt, struct amt_tunnel_list *tunnel, int i; if (!v6) { - igmp_grec = (struct igmpv3_grec *)grec; + igmp_grec = grec; nsrcs = ntohs(igmp_grec->grec_nsrcs); } else { #if IS_ENABLED(CONFIG_IPV6) - mld_grec = (struct mld2_grec *)grec; + mld_grec = grec; nsrcs = ntohs(mld_grec->grec_nsrcs); #else return; @@ -1458,11 +1458,11 @@ static void amt_lookup_act_srcs(struct amt_tunnel_list *tunnel, int i, j; if (!v6) { - igmp_grec = (struct igmpv3_grec *)grec; + igmp_grec = grec; nsrcs = ntohs(igmp_grec->grec_nsrcs); } else { #if IS_ENABLED(CONFIG_IPV6) - mld_grec = (struct mld2_grec *)grec; + mld_grec = grec; nsrcs = ntohs(mld_grec->grec_nsrcs); #else return; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6ba4c83fe5fc..e75acb14d066 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1026,12 +1026,38 @@ out: } +/** + * bond_choose_primary_or_current - select the primary or high priority slave + * @bond: our bonding struct + * + * - Check if there is a primary link. If the primary link was set and is up, + * go on and do link reselection. + * + * - If primary link is not set or down, find the highest priority link. + * If the highest priority link is not current slave, set it as primary + * link and do link reselection. + */ static struct slave *bond_choose_primary_or_current(struct bonding *bond) { struct slave *prim = rtnl_dereference(bond->primary_slave); struct slave *curr = rtnl_dereference(bond->curr_active_slave); + struct slave *slave, *hprio = NULL; + struct list_head *iter; if (!prim || prim->link != BOND_LINK_UP) { + bond_for_each_slave(bond, slave, iter) { + if (slave->link == BOND_LINK_UP) { + hprio = hprio ?: slave; + if (slave->prio > hprio->prio) + hprio = slave; + } + } + + if (hprio && hprio != curr) { + prim = hprio; + goto link_reselect; + } + if (!curr || curr->link != BOND_LINK_UP) return NULL; return curr; @@ -1042,6 +1068,7 @@ static struct slave *bond_choose_primary_or_current(struct bonding *bond) return prim; } +link_reselect: if (!curr || curr->link != BOND_LINK_UP) return prim; @@ -6220,45 +6247,33 @@ int bond_create(struct net *net, const char *name) { struct net_device *bond_dev; struct bonding *bond; - struct alb_bond_info *bond_info; - int res; + int res = -ENOMEM; rtnl_lock(); bond_dev = alloc_netdev_mq(sizeof(struct bonding), name ? name : "bond%d", NET_NAME_UNKNOWN, bond_setup, tx_queues); - if (!bond_dev) { - pr_err("%s: eek! can't alloc netdev!\n", name); - rtnl_unlock(); - return -ENOMEM; - } + if (!bond_dev) + goto out; - /* - * Initialize rx_hashtbl_used_head to RLB_NULL_INDEX. - * It is set to 0 by default which is wrong. - */ bond = netdev_priv(bond_dev); - bond_info = &(BOND_ALB_INFO(bond)); - bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; - dev_net_set(bond_dev, net); bond_dev->rtnl_link_ops = &bond_link_ops; res = register_netdevice(bond_dev); if (res < 0) { free_netdev(bond_dev); - rtnl_unlock(); - - return res; + goto out; } netif_carrier_off(bond_dev); bond_work_init_all(bond); +out: rtnl_unlock(); - return 0; + return res; } static int __net_init bond_net_init(struct net *net) diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 6f404f9c34e3..c2d080fc4fc4 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -27,6 +27,7 @@ static size_t bond_get_slave_size(const struct net_device *bond_dev, nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_AGGREGATOR_ID */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */ + nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */ 0; } @@ -53,6 +54,9 @@ static int bond_fill_slave_info(struct sk_buff *skb, if (nla_put_u16(skb, IFLA_BOND_SLAVE_QUEUE_ID, slave->queue_id)) goto nla_put_failure; + if (nla_put_s32(skb, IFLA_BOND_SLAVE_PRIO, slave->prio)) + goto nla_put_failure; + if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { const struct aggregator *agg; const struct port *ad_port; @@ -117,6 +121,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { [IFLA_BOND_SLAVE_QUEUE_ID] = { .type = NLA_U16 }, + [IFLA_BOND_SLAVE_PRIO] = { .type = NLA_S32 }, }; static int bond_validate(struct nlattr *tb[], struct nlattr *data[], @@ -151,7 +156,18 @@ static int bond_slave_changelink(struct net_device *bond_dev, snprintf(queue_id_str, sizeof(queue_id_str), "%s:%u\n", slave_dev->name, queue_id); bond_opt_initstr(&newval, queue_id_str); - err = __bond_opt_set(bond, BOND_OPT_QUEUE_ID, &newval); + err = __bond_opt_set(bond, BOND_OPT_QUEUE_ID, &newval, + data[IFLA_BOND_SLAVE_QUEUE_ID], extack); + if (err) + return err; + } + + if (data[IFLA_BOND_SLAVE_PRIO]) { + int prio = nla_get_s32(data[IFLA_BOND_SLAVE_PRIO]); + + bond_opt_slave_initval(&newval, &slave_dev, prio); + err = __bond_opt_set(bond, BOND_OPT_PRIO, &newval, + data[IFLA_BOND_SLAVE_PRIO], extack); if (err) return err; } @@ -175,7 +191,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], int mode = nla_get_u8(data[IFLA_BOND_MODE]); bond_opt_initval(&newval, mode); - err = __bond_opt_set(bond, BOND_OPT_MODE, &newval); + err = __bond_opt_set(bond, BOND_OPT_MODE, &newval, + data[IFLA_BOND_MODE], extack); if (err) return err; } @@ -192,7 +209,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], active_slave = slave_dev->name; } bond_opt_initstr(&newval, active_slave); - err = __bond_opt_set(bond, BOND_OPT_ACTIVE_SLAVE, &newval); + err = __bond_opt_set(bond, BOND_OPT_ACTIVE_SLAVE, &newval, + data[IFLA_BOND_ACTIVE_SLAVE], extack); if (err) return err; } @@ -200,7 +218,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], miimon = nla_get_u32(data[IFLA_BOND_MIIMON]); bond_opt_initval(&newval, miimon); - err = __bond_opt_set(bond, BOND_OPT_MIIMON, &newval); + err = __bond_opt_set(bond, BOND_OPT_MIIMON, &newval, + data[IFLA_BOND_MIIMON], extack); if (err) return err; } @@ -208,7 +227,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], int updelay = nla_get_u32(data[IFLA_BOND_UPDELAY]); bond_opt_initval(&newval, updelay); - err = __bond_opt_set(bond, BOND_OPT_UPDELAY, &newval); + err = __bond_opt_set(bond, BOND_OPT_UPDELAY, &newval, + data[IFLA_BOND_UPDELAY], extack); if (err) return err; } @@ -216,7 +236,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], int downdelay = nla_get_u32(data[IFLA_BOND_DOWNDELAY]); bond_opt_initval(&newval, downdelay); - err = __bond_opt_set(bond, BOND_OPT_DOWNDELAY, &newval); + err = __bond_opt_set(bond, BOND_OPT_DOWNDELAY, &newval, + data[IFLA_BOND_DOWNDELAY], extack); if (err) return err; } @@ -224,7 +245,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], int delay = nla_get_u32(data[IFLA_BOND_PEER_NOTIF_DELAY]); bond_opt_initval(&newval, delay); - err = __bond_opt_set(bond, BOND_OPT_PEER_NOTIF_DELAY, &newval); + err = __bond_opt_set(bond, BOND_OPT_PEER_NOTIF_DELAY, &newval, + data[IFLA_BOND_PEER_NOTIF_DELAY], extack); if (err) return err; } @@ -232,7 +254,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], int use_carrier = nla_get_u8(data[IFLA_BOND_USE_CARRIER]); bond_opt_initval(&newval, use_carrier); - err = __bond_opt_set(bond, BOND_OPT_USE_CARRIER, &newval); + err = __bond_opt_set(bond, BOND_OPT_USE_CARRIER, &newval, + data[IFLA_BOND_USE_CARRIER], extack); if (err) return err; } @@ -240,12 +263,14 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], int arp_interval = nla_get_u32(data[IFLA_BOND_ARP_INTERVAL]); if (arp_interval && miimon) { - netdev_err(bond->dev, "ARP monitoring cannot be used with MII monitoring\n"); + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_BOND_ARP_INTERVAL], + "ARP monitoring cannot be used with MII monitoring"); return -EINVAL; } bond_opt_initval(&newval, arp_interval); - err = __bond_opt_set(bond, BOND_OPT_ARP_INTERVAL, &newval); + err = __bond_opt_set(bond, BOND_OPT_ARP_INTERVAL, &newval, + data[IFLA_BOND_ARP_INTERVAL], extack); if (err) return err; } @@ -264,7 +289,9 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], bond_opt_initval(&newval, (__force u64)target); err = __bond_opt_set(bond, BOND_OPT_ARP_TARGETS, - &newval); + &newval, + data[IFLA_BOND_ARP_IP_TARGET], + extack); if (err) break; i++; @@ -292,7 +319,9 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], bond_opt_initextra(&newval, &addr6, sizeof(addr6)); err = __bond_opt_set(bond, BOND_OPT_NS_TARGETS, - &newval); + &newval, + data[IFLA_BOND_NS_IP6_TARGET], + extack); if (err) break; i++; @@ -307,12 +336,14 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], int arp_validate = nla_get_u32(data[IFLA_BOND_ARP_VALIDATE]); if (arp_validate && miimon) { - netdev_err(bond->dev, "ARP validating cannot be used with MII monitoring\n"); + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_BOND_ARP_INTERVAL], + "ARP validating cannot be used with MII monitoring"); return -EINVAL; } bond_opt_initval(&newval, arp_validate); - err = __bond_opt_set(bond, BOND_OPT_ARP_VALIDATE, &newval); + err = __bond_opt_set(bond, BOND_OPT_ARP_VALIDATE, &newval, + data[IFLA_BOND_ARP_VALIDATE], extack); if (err) return err; } @@ -321,7 +352,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u32(data[IFLA_BOND_ARP_ALL_TARGETS]); bond_opt_initval(&newval, arp_all_targets); - err = __bond_opt_set(bond, BOND_OPT_ARP_ALL_TARGETS, &newval); + err = __bond_opt_set(bond, BOND_OPT_ARP_ALL_TARGETS, &newval, + data[IFLA_BOND_ARP_ALL_TARGETS], extack); if (err) return err; } @@ -335,7 +367,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], primary = dev->name; bond_opt_initstr(&newval, primary); - err = __bond_opt_set(bond, BOND_OPT_PRIMARY, &newval); + err = __bond_opt_set(bond, BOND_OPT_PRIMARY, &newval, + data[IFLA_BOND_PRIMARY], extack); if (err) return err; } @@ -344,7 +377,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u8(data[IFLA_BOND_PRIMARY_RESELECT]); bond_opt_initval(&newval, primary_reselect); - err = __bond_opt_set(bond, BOND_OPT_PRIMARY_RESELECT, &newval); + err = __bond_opt_set(bond, BOND_OPT_PRIMARY_RESELECT, &newval, + data[IFLA_BOND_PRIMARY_RESELECT], extack); if (err) return err; } @@ -353,7 +387,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u8(data[IFLA_BOND_FAIL_OVER_MAC]); bond_opt_initval(&newval, fail_over_mac); - err = __bond_opt_set(bond, BOND_OPT_FAIL_OVER_MAC, &newval); + err = __bond_opt_set(bond, BOND_OPT_FAIL_OVER_MAC, &newval, + data[IFLA_BOND_FAIL_OVER_MAC], extack); if (err) return err; } @@ -362,7 +397,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u8(data[IFLA_BOND_XMIT_HASH_POLICY]); bond_opt_initval(&newval, xmit_hash_policy); - err = __bond_opt_set(bond, BOND_OPT_XMIT_HASH, &newval); + err = __bond_opt_set(bond, BOND_OPT_XMIT_HASH, &newval, + data[IFLA_BOND_XMIT_HASH_POLICY], extack); if (err) return err; } @@ -371,7 +407,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u32(data[IFLA_BOND_RESEND_IGMP]); bond_opt_initval(&newval, resend_igmp); - err = __bond_opt_set(bond, BOND_OPT_RESEND_IGMP, &newval); + err = __bond_opt_set(bond, BOND_OPT_RESEND_IGMP, &newval, + data[IFLA_BOND_RESEND_IGMP], extack); if (err) return err; } @@ -380,7 +417,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u8(data[IFLA_BOND_NUM_PEER_NOTIF]); bond_opt_initval(&newval, num_peer_notif); - err = __bond_opt_set(bond, BOND_OPT_NUM_PEER_NOTIF, &newval); + err = __bond_opt_set(bond, BOND_OPT_NUM_PEER_NOTIF, &newval, + data[IFLA_BOND_NUM_PEER_NOTIF], extack); if (err) return err; } @@ -389,7 +427,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u8(data[IFLA_BOND_ALL_SLAVES_ACTIVE]); bond_opt_initval(&newval, all_slaves_active); - err = __bond_opt_set(bond, BOND_OPT_ALL_SLAVES_ACTIVE, &newval); + err = __bond_opt_set(bond, BOND_OPT_ALL_SLAVES_ACTIVE, &newval, + data[IFLA_BOND_ALL_SLAVES_ACTIVE], extack); if (err) return err; } @@ -398,7 +437,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u32(data[IFLA_BOND_MIN_LINKS]); bond_opt_initval(&newval, min_links); - err = __bond_opt_set(bond, BOND_OPT_MINLINKS, &newval); + err = __bond_opt_set(bond, BOND_OPT_MINLINKS, &newval, + data[IFLA_BOND_MIN_LINKS], extack); if (err) return err; } @@ -407,7 +447,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u32(data[IFLA_BOND_LP_INTERVAL]); bond_opt_initval(&newval, lp_interval); - err = __bond_opt_set(bond, BOND_OPT_LP_INTERVAL, &newval); + err = __bond_opt_set(bond, BOND_OPT_LP_INTERVAL, &newval, + data[IFLA_BOND_LP_INTERVAL], extack); if (err) return err; } @@ -416,7 +457,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u32(data[IFLA_BOND_PACKETS_PER_SLAVE]); bond_opt_initval(&newval, packets_per_slave); - err = __bond_opt_set(bond, BOND_OPT_PACKETS_PER_SLAVE, &newval); + err = __bond_opt_set(bond, BOND_OPT_PACKETS_PER_SLAVE, &newval, + data[IFLA_BOND_PACKETS_PER_SLAVE], extack); if (err) return err; } @@ -425,7 +467,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], int lacp_active = nla_get_u8(data[IFLA_BOND_AD_LACP_ACTIVE]); bond_opt_initval(&newval, lacp_active); - err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval); + err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval, + data[IFLA_BOND_AD_LACP_ACTIVE], extack); if (err) return err; } @@ -435,7 +478,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]); bond_opt_initval(&newval, lacp_rate); - err = __bond_opt_set(bond, BOND_OPT_LACP_RATE, &newval); + err = __bond_opt_set(bond, BOND_OPT_LACP_RATE, &newval, + data[IFLA_BOND_AD_LACP_RATE], extack); if (err) return err; } @@ -444,7 +488,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u8(data[IFLA_BOND_AD_SELECT]); bond_opt_initval(&newval, ad_select); - err = __bond_opt_set(bond, BOND_OPT_AD_SELECT, &newval); + err = __bond_opt_set(bond, BOND_OPT_AD_SELECT, &newval, + data[IFLA_BOND_AD_SELECT], extack); if (err) return err; } @@ -453,7 +498,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u16(data[IFLA_BOND_AD_ACTOR_SYS_PRIO]); bond_opt_initval(&newval, actor_sys_prio); - err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYS_PRIO, &newval); + err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYS_PRIO, &newval, + data[IFLA_BOND_AD_ACTOR_SYS_PRIO], extack); if (err) return err; } @@ -462,7 +508,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], nla_get_u16(data[IFLA_BOND_AD_USER_PORT_KEY]); bond_opt_initval(&newval, port_key); - err = __bond_opt_set(bond, BOND_OPT_AD_USER_PORT_KEY, &newval); + err = __bond_opt_set(bond, BOND_OPT_AD_USER_PORT_KEY, &newval, + data[IFLA_BOND_AD_USER_PORT_KEY], extack); if (err) return err; } @@ -472,7 +519,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], bond_opt_initval(&newval, nla_get_u64(data[IFLA_BOND_AD_ACTOR_SYSTEM])); - err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYSTEM, &newval); + err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYSTEM, &newval, + data[IFLA_BOND_AD_ACTOR_SYSTEM], extack); if (err) return err; } @@ -480,7 +528,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], int dynamic_lb = nla_get_u8(data[IFLA_BOND_TLB_DYNAMIC_LB]); bond_opt_initval(&newval, dynamic_lb); - err = __bond_opt_set(bond, BOND_OPT_TLB_DYNAMIC_LB, &newval); + err = __bond_opt_set(bond, BOND_OPT_TLB_DYNAMIC_LB, &newval, + data[IFLA_BOND_TLB_DYNAMIC_LB], extack); if (err) return err; } @@ -489,7 +538,8 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], int missed_max = nla_get_u8(data[IFLA_BOND_MISSED_MAX]); bond_opt_initval(&newval, missed_max); - err = __bond_opt_set(bond, BOND_OPT_MISSED_MAX, &newval); + err = __bond_opt_set(bond, BOND_OPT_MISSED_MAX, &newval, + data[IFLA_BOND_MISSED_MAX], extack); if (err) return err; } diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 1f8323ad5282..3498db1c1b3c 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -40,6 +40,8 @@ static int bond_option_arp_validate_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_arp_all_targets_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_prio_set(struct bonding *bond, + const struct bond_opt_value *newval); static int bond_option_primary_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_primary_reselect_set(struct bonding *bond, @@ -365,6 +367,16 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .values = bond_intmax_tbl, .set = bond_option_miimon_set }, + [BOND_OPT_PRIO] = { + .id = BOND_OPT_PRIO, + .name = "prio", + .desc = "Link priority for failover re-selection", + .flags = BOND_OPTFLAG_RAWVAL, + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_ACTIVEBACKUP) | + BIT(BOND_MODE_TLB) | + BIT(BOND_MODE_ALB)), + .set = bond_option_prio_set + }, [BOND_OPT_PRIMARY] = { .id = BOND_OPT_PRIMARY, .name = "primary", @@ -632,27 +644,35 @@ static int bond_opt_check_deps(struct bonding *bond, } static void bond_opt_dep_print(struct bonding *bond, - const struct bond_option *opt) + const struct bond_option *opt, + struct nlattr *bad_attr, + struct netlink_ext_ack *extack) { const struct bond_opt_value *modeval; struct bond_params *params; params = &bond->params; modeval = bond_opt_get_val(BOND_OPT_MODE, params->mode); - if (test_bit(params->mode, &opt->unsuppmodes)) + if (test_bit(params->mode, &opt->unsuppmodes)) { netdev_err(bond->dev, "option %s: mode dependency failed, not supported in mode %s(%llu)\n", opt->name, modeval->string, modeval->value); + NL_SET_ERR_MSG_ATTR(extack, bad_attr, + "option not supported in mode"); + } } static void bond_opt_error_interpret(struct bonding *bond, const struct bond_option *opt, - int error, const struct bond_opt_value *val) + int error, const struct bond_opt_value *val, + struct nlattr *bad_attr, + struct netlink_ext_ack *extack) { const struct bond_opt_value *minval, *maxval; char *p; switch (error) { case -EINVAL: + NL_SET_ERR_MSG_ATTR(extack, bad_attr, "invalid option value"); if (val) { if (val->string) { /* sometimes RAWVAL opts may have new lines */ @@ -674,13 +694,17 @@ static void bond_opt_error_interpret(struct bonding *bond, opt->name, minval ? minval->value : 0, maxval->value); break; case -EACCES: - bond_opt_dep_print(bond, opt); + bond_opt_dep_print(bond, opt, bad_attr, extack); break; case -ENOTEMPTY: + NL_SET_ERR_MSG_ATTR(extack, bad_attr, + "unable to set option because the bond device has slaves"); netdev_err(bond->dev, "option %s: unable to set because the bond device has slaves\n", opt->name); break; case -EBUSY: + NL_SET_ERR_MSG_ATTR(extack, bad_attr, + "unable to set option because the bond is up"); netdev_err(bond->dev, "option %s: unable to set because the bond device is up\n", opt->name); break; @@ -691,6 +715,8 @@ static void bond_opt_error_interpret(struct bonding *bond, *p = '\0'; netdev_err(bond->dev, "option %s: interface %s does not exist!\n", opt->name, val->string); + NL_SET_ERR_MSG_ATTR(extack, bad_attr, + "interface does not exist"); } break; default: @@ -703,13 +729,17 @@ static void bond_opt_error_interpret(struct bonding *bond, * @bond: target bond device * @option: option to set * @val: value to set it to + * @bad_attr: netlink attribue that caused the error + * @extack: extended netlink error structure, used when an error message + * needs to be returned to the caller via netlink * * This function is used to change the bond's option value, it can be * used for both enabling/changing an option and for disabling it. RTNL lock * must be obtained before calling this function. */ int __bond_opt_set(struct bonding *bond, - unsigned int option, struct bond_opt_value *val) + unsigned int option, struct bond_opt_value *val, + struct nlattr *bad_attr, struct netlink_ext_ack *extack) { const struct bond_opt_value *retval = NULL; const struct bond_option *opt; @@ -731,7 +761,7 @@ int __bond_opt_set(struct bonding *bond, ret = opt->set(bond, retval); out: if (ret) - bond_opt_error_interpret(bond, opt, ret, val); + bond_opt_error_interpret(bond, opt, ret, val, bad_attr, extack); return ret; } @@ -753,7 +783,7 @@ int __bond_opt_set_notify(struct bonding *bond, ASSERT_RTNL(); - ret = __bond_opt_set(bond, option, val); + ret = __bond_opt_set(bond, option, val, NULL, NULL); if (!ret && (bond->dev->reg_state == NETREG_REGISTERED)) call_netdevice_notifiers(NETDEV_CHANGEINFODATA, bond->dev); @@ -1288,6 +1318,27 @@ static int bond_option_missed_max_set(struct bonding *bond, return 0; } +static int bond_option_prio_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + struct slave *slave; + + slave = bond_slave_get_rtnl(newval->slave_dev); + if (!slave) { + netdev_dbg(newval->slave_dev, "%s called on NULL slave\n", __func__); + return -ENODEV; + } + slave->prio = newval->value; + + if (rtnl_dereference(bond->primary_slave)) + slave_warn(bond->dev, slave->dev, + "prio updated, but will not affect failover re-selection as primary slave have been set\n"); + else + bond_select_active_slave(bond); + + return 0; +} + static int bond_option_primary_set(struct bonding *bond, const struct bond_opt_value *newval) { diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index b2dcc1e5a388..806f15146f69 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -1,5 +1,26 @@ # SPDX-License-Identifier: GPL-2.0-only -menu "CAN Device Drivers" + +menuconfig CAN_DEV + tristate "CAN Device Drivers" + default y + depends on CAN + help + Controller Area Network (CAN) is serial communications protocol up to + 1Mbit/s for its original release (now known as Classical CAN) and up + to 8Mbit/s for the more recent CAN with Flexible Data-Rate + (CAN-FD). The CAN bus was originally mainly for automotive, but is now + widely used in marine (NMEA2000), industrial, and medical + applications. More information on the CAN network protocol family + PF_CAN is contained in <Documentation/networking/can.rst>. + + This section contains all the CAN(-FD) device drivers including the + virtual ones. If you own such devices or plan to use the virtual CAN + interfaces to develop applications, say Y here. + + To compile as a module, choose M here: the module will be called + can-dev. + +if CAN_DEV config CAN_VCAN tristate "Virtual Local CAN Interface (vcan)" @@ -48,15 +69,22 @@ config CAN_SLCAN can be changed by the 'maxdev=xx' module option. This driver can also be built as a module. If so, the module will be called slcan. -config CAN_DEV - tristate "Platform CAN drivers with Netlink support" +config CAN_NETLINK + bool "CAN device drivers with Netlink support" default y help - Enables the common framework for platform CAN drivers with Netlink - support. This is the standard library for CAN drivers. - If unsure, say Y. + Enables the common framework for CAN device drivers. This is the + standard library and provides features for the Netlink interface such + as bittiming validation, support of CAN error states, device restart + and others. -if CAN_DEV + The additional features selected by this option will be added to the + can-dev module. + + This is required by all platform and hardware CAN drivers. If you + plan to use such devices or if unsure, say Y. + +if CAN_NETLINK config CAN_CALC_BITTIMING bool "CAN bit-timing calculation" @@ -69,8 +97,15 @@ config CAN_CALC_BITTIMING source clock frequencies. Disabling saves some space, but then the bit-timing parameters must be specified directly using the Netlink arguments "tq", "prop_seg", "phase_seg1", "phase_seg2" and "sjw". + + The additional features selected by this option will be added to the + can-dev module. + If unsure, say Y. +config CAN_RX_OFFLOAD + bool + config CAN_AT91 tristate "Atmel AT91 onchip CAN controller" depends on (ARCH_AT91 || COMPILE_TEST) && HAS_IOMEM @@ -82,6 +117,7 @@ config CAN_FLEXCAN tristate "Support for Freescale FLEXCAN based chips" depends on OF || COLDFIRE || COMPILE_TEST depends on HAS_IOMEM + select CAN_RX_OFFLOAD help Say Y here if you want to support for Freescale FlexCAN. @@ -131,6 +167,7 @@ config CAN_SUN4I config CAN_TI_HECC depends on ARM tristate "TI High End CAN Controller" + select CAN_RX_OFFLOAD help Driver for TI HECC (High End CAN Controller) module found on many TI devices. The device specifications are available from www.ti.com @@ -164,7 +201,7 @@ source "drivers/net/can/softing/Kconfig" source "drivers/net/can/spi/Kconfig" source "drivers/net/can/usb/Kconfig" -endif +endif #CAN_NETLINK config CAN_DEBUG_DEVICES bool "CAN devices debugging messages" @@ -174,4 +211,4 @@ config CAN_DEBUG_DEVICES a problem with CAN support and want to see more of what is going on. -endmenu +endif #CAN_DEV diff --git a/drivers/net/can/dev/Makefile b/drivers/net/can/dev/Makefile index af2901db473c..633687d6b6c0 100644 --- a/drivers/net/can/dev/Makefile +++ b/drivers/net/can/dev/Makefile @@ -1,9 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CAN_DEV) += can-dev.o -can-dev-y += bittiming.o -can-dev-y += dev.o -can-dev-y += length.o -can-dev-y += netlink.o -can-dev-y += rx-offload.o -can-dev-y += skb.o +obj-$(CONFIG_CAN_DEV) += can-dev.o + +can-dev-y += skb.o + +can-dev-$(CONFIG_CAN_CALC_BITTIMING) += calc_bittiming.o +can-dev-$(CONFIG_CAN_NETLINK) += bittiming.o +can-dev-$(CONFIG_CAN_NETLINK) += dev.o +can-dev-$(CONFIG_CAN_NETLINK) += length.o +can-dev-$(CONFIG_CAN_NETLINK) += netlink.o +can-dev-$(CONFIG_CAN_RX_OFFLOAD) += rx-offload.o diff --git a/drivers/net/can/dev/bittiming.c b/drivers/net/can/dev/bittiming.c index c1e76f0a5064..7ae80763c960 100644 --- a/drivers/net/can/dev/bittiming.c +++ b/drivers/net/can/dev/bittiming.c @@ -4,205 +4,8 @@ * Copyright (C) 2008-2009 Wolfgang Grandegger <wg@grandegger.com> */ -#include <linux/units.h> #include <linux/can/dev.h> -#ifdef CONFIG_CAN_CALC_BITTIMING -#define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */ - -/* Bit-timing calculation derived from: - * - * Code based on LinCAN sources and H8S2638 project - * Copyright 2004-2006 Pavel Pisa - DCE FELK CVUT cz - * Copyright 2005 Stanislav Marek - * email: pisa@cmp.felk.cvut.cz - * - * Calculates proper bit-timing parameters for a specified bit-rate - * and sample-point, which can then be used to set the bit-timing - * registers of the CAN controller. You can find more information - * in the header file linux/can/netlink.h. - */ -static int -can_update_sample_point(const struct can_bittiming_const *btc, - const unsigned int sample_point_nominal, const unsigned int tseg, - unsigned int *tseg1_ptr, unsigned int *tseg2_ptr, - unsigned int *sample_point_error_ptr) -{ - unsigned int sample_point_error, best_sample_point_error = UINT_MAX; - unsigned int sample_point, best_sample_point = 0; - unsigned int tseg1, tseg2; - int i; - - for (i = 0; i <= 1; i++) { - tseg2 = tseg + CAN_SYNC_SEG - - (sample_point_nominal * (tseg + CAN_SYNC_SEG)) / - 1000 - i; - tseg2 = clamp(tseg2, btc->tseg2_min, btc->tseg2_max); - tseg1 = tseg - tseg2; - if (tseg1 > btc->tseg1_max) { - tseg1 = btc->tseg1_max; - tseg2 = tseg - tseg1; - } - - sample_point = 1000 * (tseg + CAN_SYNC_SEG - tseg2) / - (tseg + CAN_SYNC_SEG); - sample_point_error = abs(sample_point_nominal - sample_point); - - if (sample_point <= sample_point_nominal && - sample_point_error < best_sample_point_error) { - best_sample_point = sample_point; - best_sample_point_error = sample_point_error; - *tseg1_ptr = tseg1; - *tseg2_ptr = tseg2; - } - } - - if (sample_point_error_ptr) - *sample_point_error_ptr = best_sample_point_error; - - return best_sample_point; -} - -int can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, - const struct can_bittiming_const *btc) -{ - struct can_priv *priv = netdev_priv(dev); - unsigned int bitrate; /* current bitrate */ - unsigned int bitrate_error; /* difference between current and nominal value */ - unsigned int best_bitrate_error = UINT_MAX; - unsigned int sample_point_error; /* difference between current and nominal value */ - unsigned int best_sample_point_error = UINT_MAX; - unsigned int sample_point_nominal; /* nominal sample point */ - unsigned int best_tseg = 0; /* current best value for tseg */ - unsigned int best_brp = 0; /* current best value for brp */ - unsigned int brp, tsegall, tseg, tseg1 = 0, tseg2 = 0; - u64 v64; - - /* Use CiA recommended sample points */ - if (bt->sample_point) { - sample_point_nominal = bt->sample_point; - } else { - if (bt->bitrate > 800 * KILO /* BPS */) - sample_point_nominal = 750; - else if (bt->bitrate > 500 * KILO /* BPS */) - sample_point_nominal = 800; - else - sample_point_nominal = 875; - } - - /* tseg even = round down, odd = round up */ - for (tseg = (btc->tseg1_max + btc->tseg2_max) * 2 + 1; - tseg >= (btc->tseg1_min + btc->tseg2_min) * 2; tseg--) { - tsegall = CAN_SYNC_SEG + tseg / 2; - - /* Compute all possible tseg choices (tseg=tseg1+tseg2) */ - brp = priv->clock.freq / (tsegall * bt->bitrate) + tseg % 2; - - /* choose brp step which is possible in system */ - brp = (brp / btc->brp_inc) * btc->brp_inc; - if (brp < btc->brp_min || brp > btc->brp_max) - continue; - - bitrate = priv->clock.freq / (brp * tsegall); - bitrate_error = abs(bt->bitrate - bitrate); - - /* tseg brp biterror */ - if (bitrate_error > best_bitrate_error) - continue; - - /* reset sample point error if we have a better bitrate */ - if (bitrate_error < best_bitrate_error) - best_sample_point_error = UINT_MAX; - - can_update_sample_point(btc, sample_point_nominal, tseg / 2, - &tseg1, &tseg2, &sample_point_error); - if (sample_point_error >= best_sample_point_error) - continue; - - best_sample_point_error = sample_point_error; - best_bitrate_error = bitrate_error; - best_tseg = tseg / 2; - best_brp = brp; - - if (bitrate_error == 0 && sample_point_error == 0) - break; - } - - if (best_bitrate_error) { - /* Error in one-tenth of a percent */ - v64 = (u64)best_bitrate_error * 1000; - do_div(v64, bt->bitrate); - bitrate_error = (u32)v64; - if (bitrate_error > CAN_CALC_MAX_ERROR) { - netdev_err(dev, - "bitrate error %d.%d%% too high\n", - bitrate_error / 10, bitrate_error % 10); - return -EDOM; - } - netdev_warn(dev, "bitrate error %d.%d%%\n", - bitrate_error / 10, bitrate_error % 10); - } - - /* real sample point */ - bt->sample_point = can_update_sample_point(btc, sample_point_nominal, - best_tseg, &tseg1, &tseg2, - NULL); - - v64 = (u64)best_brp * 1000 * 1000 * 1000; - do_div(v64, priv->clock.freq); - bt->tq = (u32)v64; - bt->prop_seg = tseg1 / 2; - bt->phase_seg1 = tseg1 - bt->prop_seg; - bt->phase_seg2 = tseg2; - - /* check for sjw user settings */ - if (!bt->sjw || !btc->sjw_max) { - bt->sjw = 1; - } else { - /* bt->sjw is at least 1 -> sanitize upper bound to sjw_max */ - if (bt->sjw > btc->sjw_max) - bt->sjw = btc->sjw_max; - /* bt->sjw must not be higher than tseg2 */ - if (tseg2 < bt->sjw) - bt->sjw = tseg2; - } - - bt->brp = best_brp; - - /* real bitrate */ - bt->bitrate = priv->clock.freq / - (bt->brp * (CAN_SYNC_SEG + tseg1 + tseg2)); - - return 0; -} - -void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, - const struct can_bittiming *dbt, - u32 *ctrlmode, u32 ctrlmode_supported) - -{ - if (!tdc_const || !(ctrlmode_supported & CAN_CTRLMODE_TDC_AUTO)) - return; - - *ctrlmode &= ~CAN_CTRLMODE_TDC_MASK; - - /* As specified in ISO 11898-1 section 11.3.3 "Transmitter - * delay compensation" (TDC) is only applicable if data BRP is - * one or two. - */ - if (dbt->brp == 1 || dbt->brp == 2) { - /* Sample point in clock periods */ - u32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg + - dbt->phase_seg1) * dbt->brp; - - if (sample_point_in_tc < tdc_const->tdco_min) - return; - tdc->tdco = min(sample_point_in_tc, tdc_const->tdco_max); - *ctrlmode |= CAN_CTRLMODE_TDC_AUTO; - } -} -#endif /* CONFIG_CAN_CALC_BITTIMING */ - /* Checks the validity of the specified bit-timing parameters prop_seg, * phase_seg1, phase_seg2 and sjw and tries to determine the bitrate * prescaler value brp. You can find more information in the header diff --git a/drivers/net/can/dev/calc_bittiming.c b/drivers/net/can/dev/calc_bittiming.c new file mode 100644 index 000000000000..d3caa040614d --- /dev/null +++ b/drivers/net/can/dev/calc_bittiming.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2005 Marc Kleine-Budde, Pengutronix + * Copyright (C) 2006 Andrey Volkov, Varma Electronics + * Copyright (C) 2008-2009 Wolfgang Grandegger <wg@grandegger.com> + */ + +#include <linux/units.h> +#include <linux/can/dev.h> + +#define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */ + +/* Bit-timing calculation derived from: + * + * Code based on LinCAN sources and H8S2638 project + * Copyright 2004-2006 Pavel Pisa - DCE FELK CVUT cz + * Copyright 2005 Stanislav Marek + * email: pisa@cmp.felk.cvut.cz + * + * Calculates proper bit-timing parameters for a specified bit-rate + * and sample-point, which can then be used to set the bit-timing + * registers of the CAN controller. You can find more information + * in the header file linux/can/netlink.h. + */ +static int +can_update_sample_point(const struct can_bittiming_const *btc, + const unsigned int sample_point_nominal, const unsigned int tseg, + unsigned int *tseg1_ptr, unsigned int *tseg2_ptr, + unsigned int *sample_point_error_ptr) +{ + unsigned int sample_point_error, best_sample_point_error = UINT_MAX; + unsigned int sample_point, best_sample_point = 0; + unsigned int tseg1, tseg2; + int i; + + for (i = 0; i <= 1; i++) { + tseg2 = tseg + CAN_SYNC_SEG - + (sample_point_nominal * (tseg + CAN_SYNC_SEG)) / + 1000 - i; + tseg2 = clamp(tseg2, btc->tseg2_min, btc->tseg2_max); + tseg1 = tseg - tseg2; + if (tseg1 > btc->tseg1_max) { + tseg1 = btc->tseg1_max; + tseg2 = tseg - tseg1; + } + + sample_point = 1000 * (tseg + CAN_SYNC_SEG - tseg2) / + (tseg + CAN_SYNC_SEG); + sample_point_error = abs(sample_point_nominal - sample_point); + + if (sample_point <= sample_point_nominal && + sample_point_error < best_sample_point_error) { + best_sample_point = sample_point; + best_sample_point_error = sample_point_error; + *tseg1_ptr = tseg1; + *tseg2_ptr = tseg2; + } + } + + if (sample_point_error_ptr) + *sample_point_error_ptr = best_sample_point_error; + + return best_sample_point; +} + +int can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, + const struct can_bittiming_const *btc) +{ + struct can_priv *priv = netdev_priv(dev); + unsigned int bitrate; /* current bitrate */ + unsigned int bitrate_error; /* difference between current and nominal value */ + unsigned int best_bitrate_error = UINT_MAX; + unsigned int sample_point_error; /* difference between current and nominal value */ + unsigned int best_sample_point_error = UINT_MAX; + unsigned int sample_point_nominal; /* nominal sample point */ + unsigned int best_tseg = 0; /* current best value for tseg */ + unsigned int best_brp = 0; /* current best value for brp */ + unsigned int brp, tsegall, tseg, tseg1 = 0, tseg2 = 0; + u64 v64; + + /* Use CiA recommended sample points */ + if (bt->sample_point) { + sample_point_nominal = bt->sample_point; + } else { + if (bt->bitrate > 800 * KILO /* BPS */) + sample_point_nominal = 750; + else if (bt->bitrate > 500 * KILO /* BPS */) + sample_point_nominal = 800; + else + sample_point_nominal = 875; + } + + /* tseg even = round down, odd = round up */ + for (tseg = (btc->tseg1_max + btc->tseg2_max) * 2 + 1; + tseg >= (btc->tseg1_min + btc->tseg2_min) * 2; tseg--) { + tsegall = CAN_SYNC_SEG + tseg / 2; + + /* Compute all possible tseg choices (tseg=tseg1+tseg2) */ + brp = priv->clock.freq / (tsegall * bt->bitrate) + tseg % 2; + + /* choose brp step which is possible in system */ + brp = (brp / btc->brp_inc) * btc->brp_inc; + if (brp < btc->brp_min || brp > btc->brp_max) + continue; + + bitrate = priv->clock.freq / (brp * tsegall); + bitrate_error = abs(bt->bitrate - bitrate); + + /* tseg brp biterror */ + if (bitrate_error > best_bitrate_error) + continue; + + /* reset sample point error if we have a better bitrate */ + if (bitrate_error < best_bitrate_error) + best_sample_point_error = UINT_MAX; + + can_update_sample_point(btc, sample_point_nominal, tseg / 2, + &tseg1, &tseg2, &sample_point_error); + if (sample_point_error >= best_sample_point_error) + continue; + + best_sample_point_error = sample_point_error; + best_bitrate_error = bitrate_error; + best_tseg = tseg / 2; + best_brp = brp; + + if (bitrate_error == 0 && sample_point_error == 0) + break; + } + + if (best_bitrate_error) { + /* Error in one-tenth of a percent */ + v64 = (u64)best_bitrate_error * 1000; + do_div(v64, bt->bitrate); + bitrate_error = (u32)v64; + if (bitrate_error > CAN_CALC_MAX_ERROR) { + netdev_err(dev, + "bitrate error %d.%d%% too high\n", + bitrate_error / 10, bitrate_error % 10); + return -EDOM; + } + netdev_warn(dev, "bitrate error %d.%d%%\n", + bitrate_error / 10, bitrate_error % 10); + } + + /* real sample point */ + bt->sample_point = can_update_sample_point(btc, sample_point_nominal, + best_tseg, &tseg1, &tseg2, + NULL); + + v64 = (u64)best_brp * 1000 * 1000 * 1000; + do_div(v64, priv->clock.freq); + bt->tq = (u32)v64; + bt->prop_seg = tseg1 / 2; + bt->phase_seg1 = tseg1 - bt->prop_seg; + bt->phase_seg2 = tseg2; + + /* check for sjw user settings */ + if (!bt->sjw || !btc->sjw_max) { + bt->sjw = 1; + } else { + /* bt->sjw is at least 1 -> sanitize upper bound to sjw_max */ + if (bt->sjw > btc->sjw_max) + bt->sjw = btc->sjw_max; + /* bt->sjw must not be higher than tseg2 */ + if (tseg2 < bt->sjw) + bt->sjw = tseg2; + } + + bt->brp = best_brp; + + /* real bitrate */ + bt->bitrate = priv->clock.freq / + (bt->brp * (CAN_SYNC_SEG + tseg1 + tseg2)); + + return 0; +} + +void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, + const struct can_bittiming *dbt, + u32 *ctrlmode, u32 ctrlmode_supported) + +{ + if (!tdc_const || !(ctrlmode_supported & CAN_CTRLMODE_TDC_AUTO)) + return; + + *ctrlmode &= ~CAN_CTRLMODE_TDC_MASK; + + /* As specified in ISO 11898-1 section 11.3.3 "Transmitter + * delay compensation" (TDC) is only applicable if data BRP is + * one or two. + */ + if (dbt->brp == 1 || dbt->brp == 2) { + /* Sample point in clock periods */ + u32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg + + dbt->phase_seg1) * dbt->brp; + + if (sample_point_in_tc < tdc_const->tdco_min) + return; + tdc->tdco = min(sample_point_in_tc, tdc_const->tdco_max); + *ctrlmode |= CAN_CTRLMODE_TDC_AUTO; + } +} diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 96c9d9db00cf..523eaacfe29e 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -4,7 +4,6 @@ * Copyright (C) 2008-2009 Wolfgang Grandegger <wg@grandegger.com> */ -#include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/netdevice.h> @@ -17,12 +16,6 @@ #include <linux/gpio/consumer.h> #include <linux/of.h> -#define MOD_DESC "CAN device driver interface" - -MODULE_DESCRIPTION(MOD_DESC); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>"); - static void can_update_state_error_stats(struct net_device *dev, enum can_state new_state) { @@ -513,7 +506,7 @@ static __init int can_dev_init(void) err = can_netlink_register(); if (!err) - pr_info(MOD_DESC "\n"); + pr_info("CAN device driver interface\n"); return err; } diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 7633d98e3912..037824011266 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -176,7 +176,8 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], * directly via do_set_bitrate(). Bail out if neither * is given. */ - if (!priv->bittiming_const && !priv->do_set_bittiming) + if (!priv->bittiming_const && !priv->do_set_bittiming && + !priv->bitrate_const) return -EOPNOTSUPP; memcpy(&bt, nla_data(data[IFLA_CAN_BITTIMING]), sizeof(bt)); @@ -278,7 +279,8 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], * directly via do_set_bitrate(). Bail out if neither * is given. */ - if (!priv->data_bittiming_const && !priv->do_set_data_bittiming) + if (!priv->data_bittiming_const && !priv->do_set_data_bittiming && + !priv->data_bitrate_const) return -EOPNOTSUPP; memcpy(&dbt, nla_data(data[IFLA_CAN_DATA_BITTIMING]), diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c index 61660248c69e..8bb62dd864c8 100644 --- a/drivers/net/can/dev/skb.c +++ b/drivers/net/can/dev/skb.c @@ -5,6 +5,14 @@ */ #include <linux/can/dev.h> +#include <linux/can/netlink.h> +#include <linux/module.h> + +#define MOD_DESC "CAN device driver interface" + +MODULE_DESCRIPTION(MOD_DESC); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>"); /* Local echo of CAN messages * @@ -252,3 +260,67 @@ struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf) return skb; } EXPORT_SYMBOL_GPL(alloc_can_err_skb); + +/* Check for outgoing skbs that have not been created by the CAN subsystem */ +static bool can_skb_headroom_valid(struct net_device *dev, struct sk_buff *skb) +{ + /* af_packet creates a headroom of HH_DATA_MOD bytes which is fine */ + if (WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct can_skb_priv))) + return false; + + /* af_packet does not apply CAN skb specific settings */ + if (skb->ip_summed == CHECKSUM_NONE) { + /* init headroom */ + can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* perform proper loopback on capable devices */ + if (dev->flags & IFF_ECHO) + skb->pkt_type = PACKET_LOOPBACK; + else + skb->pkt_type = PACKET_HOST; + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + } + + return true; +} + +/* Drop a given socketbuffer if it does not contain a valid CAN frame. */ +bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb) +{ + const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; + struct can_priv *priv = netdev_priv(dev); + + if (skb->protocol == htons(ETH_P_CAN)) { + if (unlikely(skb->len != CAN_MTU || + cfd->len > CAN_MAX_DLEN)) + goto inval_skb; + } else if (skb->protocol == htons(ETH_P_CANFD)) { + if (unlikely(skb->len != CANFD_MTU || + cfd->len > CANFD_MAX_DLEN)) + goto inval_skb; + } else { + goto inval_skb; + } + + if (!can_skb_headroom_valid(dev, skb)) { + goto inval_skb; + } else if (priv->ctrlmode & CAN_CTRLMODE_LISTENONLY) { + netdev_info_once(dev, + "interface in listen only mode, dropping skb\n"); + goto inval_skb; + } + + return false; + +inval_skb: + kfree_skb(skb); + dev->stats.tx_dropped++; + return true; +} +EXPORT_SYMBOL_GPL(can_dropped_invalid_skb); diff --git a/drivers/net/can/m_can/Kconfig b/drivers/net/can/m_can/Kconfig index 45ad1b3f0cd0..fc2afab36279 100644 --- a/drivers/net/can/m_can/Kconfig +++ b/drivers/net/can/m_can/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig CAN_M_CAN tristate "Bosch M_CAN support" + select CAN_RX_OFFLOAD help Say Y here if you want support for Bosch M_CAN controller framework. This is common support for devices that embed the Bosch M_CAN IP. diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 5d0c82d8b9a9..4f90e17354f2 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1348,8 +1348,8 @@ static void m_can_chip_config(struct net_device *dev) /* set bittiming params */ m_can_set_bittiming(dev); - /* enable internal timestamp generation, with a prescalar of 16. The - * prescalar is applied to the nominal bit timing + /* enable internal timestamp generation, with a prescaler of 16. The + * prescaler is applied to the nominal bit timing */ m_can_write(cdev, M_CAN_TSCC, FIELD_PREP(TSCC_TCP_MASK, 0xf)); diff --git a/drivers/net/can/spi/mcp251xfd/Kconfig b/drivers/net/can/spi/mcp251xfd/Kconfig index dd0fc0a54be1..877e4356010d 100644 --- a/drivers/net/can/spi/mcp251xfd/Kconfig +++ b/drivers/net/can/spi/mcp251xfd/Kconfig @@ -2,6 +2,7 @@ config CAN_MCP251XFD tristate "Microchip MCP251xFD SPI CAN controllers" + select CAN_RX_OFFLOAD select REGMAP select WANT_DEV_COREDUMP help diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig index f959215c9d53..1218f9642f33 100644 --- a/drivers/net/can/usb/Kconfig +++ b/drivers/net/can/usb/Kconfig @@ -14,11 +14,18 @@ config CAN_EMS_USB This driver is for the one channel CPC-USB/ARM7 CAN/USB interface from EMS Dr. Thomas Wuensche (http://www.ems-wuensche.de). -config CAN_ESD_USB2 - tristate "ESD USB/2 CAN/USB interface" +config CAN_ESD_USB + tristate "esd electronics gmbh CAN/USB interfaces" help - This driver supports the CAN-USB/2 interface - from esd electronic system design gmbh (http://www.esd.eu). + This driver adds supports for several CAN/USB interfaces + from esd electronics gmbh (https://www.esd.eu). + + The drivers supports the following devices: + - esd CAN-USB/2 + - esd CAN-USB/Micro + + To compile this driver as a module, choose M here: the module + will be called esd_usb. config CAN_ETAS_ES58X tristate "ETAS ES58X CAN/USB interfaces" diff --git a/drivers/net/can/usb/Makefile b/drivers/net/can/usb/Makefile index 748cf31a0d53..1ea16be5743b 100644 --- a/drivers/net/can/usb/Makefile +++ b/drivers/net/can/usb/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_CAN_8DEV_USB) += usb_8dev.o obj-$(CONFIG_CAN_EMS_USB) += ems_usb.o -obj-$(CONFIG_CAN_ESD_USB2) += esd_usb2.o +obj-$(CONFIG_CAN_ESD_USB) += esd_usb.o obj-$(CONFIG_CAN_ETAS_ES58X) += etas_es58x/ obj-$(CONFIG_CAN_GS_USB) += gs_usb.o obj-$(CONFIG_CAN_KVASER_USB) += kvaser_usb/ diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb.c index 286daaaea0b8..8a4bf2961f3d 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * CAN driver for esd CAN-USB/2 and CAN-USB/Micro + * CAN driver for esd electronics gmbh CAN-USB/2 and CAN-USB/Micro * - * Copyright (C) 2010-2012 Matthias Fuchs <matthias.fuchs@esd.eu>, esd gmbh + * Copyright (C) 2010-2012 esd electronic system design gmbh, Matthias Fuchs <socketcan@esd.eu> + * Copyright (C) 2022 esd electronics gmbh, Frank Jungclaus <frank.jungclaus@esd.eu> */ #include <linux/signal.h> #include <linux/slab.h> @@ -14,20 +15,24 @@ #include <linux/can/dev.h> #include <linux/can/error.h> -MODULE_AUTHOR("Matthias Fuchs <matthias.fuchs@esd.eu>"); -MODULE_DESCRIPTION("CAN driver for esd CAN-USB/2 and CAN-USB/Micro interfaces"); +MODULE_AUTHOR("Matthias Fuchs <socketcan@esd.eu>"); +MODULE_AUTHOR("Frank Jungclaus <frank.jungclaus@esd.eu>"); +MODULE_DESCRIPTION("CAN driver for esd electronics gmbh CAN-USB/2 and CAN-USB/Micro interfaces"); MODULE_LICENSE("GPL v2"); -/* Define these values to match your devices */ +/* USB vendor and product ID */ #define USB_ESDGMBH_VENDOR_ID 0x0ab4 #define USB_CANUSB2_PRODUCT_ID 0x0010 #define USB_CANUSBM_PRODUCT_ID 0x0011 +/* CAN controller clock frequencies */ #define ESD_USB2_CAN_CLOCK 60000000 #define ESD_USBM_CAN_CLOCK 36000000 -#define ESD_USB2_MAX_NETS 2 -/* USB2 commands */ +/* Maximum number of CAN nets */ +#define ESD_USB_MAX_NETS 2 + +/* USB commands */ #define CMD_VERSION 1 /* also used for VERSION_REPLY */ #define CMD_CAN_RX 2 /* device to host only */ #define CMD_CAN_TX 3 /* also used for TX_DONE */ @@ -43,13 +48,15 @@ MODULE_LICENSE("GPL v2"); #define ESD_EVENT 0x40000000 #define ESD_IDMASK 0x1fffffff -/* esd CAN event ids used by this driver */ -#define ESD_EV_CAN_ERROR_EXT 2 +/* esd CAN event ids */ +#define ESD_EV_CAN_ERROR_EXT 2 /* CAN controller specific diagnostic data */ /* baudrate message flags */ -#define ESD_USB2_UBR 0x80000000 -#define ESD_USB2_LOM 0x40000000 -#define ESD_USB2_NO_BAUDRATE 0x7fffffff +#define ESD_USB_UBR 0x80000000 +#define ESD_USB_LOM 0x40000000 +#define ESD_USB_NO_BAUDRATE 0x7fffffff + +/* bit timing CAN-USB/2 */ #define ESD_USB2_TSEG1_MIN 1 #define ESD_USB2_TSEG1_MAX 16 #define ESD_USB2_TSEG1_SHIFT 16 @@ -68,7 +75,7 @@ MODULE_LICENSE("GPL v2"); #define ESD_ID_ENABLE 0x80 #define ESD_MAX_ID_SEGMENT 64 -/* SJA1000 ECC register (emulated by usb2 firmware) */ +/* SJA1000 ECC register (emulated by usb firmware) */ #define SJA1000_ECC_SEG 0x1F #define SJA1000_ECC_DIR 0x20 #define SJA1000_ECC_ERR 0x06 @@ -158,7 +165,7 @@ struct set_baudrate_msg { }; /* Main message type used between library and application */ -struct __attribute__ ((packed)) esd_usb2_msg { +struct __packed esd_usb_msg { union { struct header_msg hdr; struct version_msg version; @@ -171,23 +178,23 @@ struct __attribute__ ((packed)) esd_usb2_msg { } msg; }; -static struct usb_device_id esd_usb2_table[] = { +static struct usb_device_id esd_usb_table[] = { {USB_DEVICE(USB_ESDGMBH_VENDOR_ID, USB_CANUSB2_PRODUCT_ID)}, {USB_DEVICE(USB_ESDGMBH_VENDOR_ID, USB_CANUSBM_PRODUCT_ID)}, {} }; -MODULE_DEVICE_TABLE(usb, esd_usb2_table); +MODULE_DEVICE_TABLE(usb, esd_usb_table); -struct esd_usb2_net_priv; +struct esd_usb_net_priv; struct esd_tx_urb_context { - struct esd_usb2_net_priv *priv; + struct esd_usb_net_priv *priv; u32 echo_index; }; -struct esd_usb2 { +struct esd_usb { struct usb_device *udev; - struct esd_usb2_net_priv *nets[ESD_USB2_MAX_NETS]; + struct esd_usb_net_priv *nets[ESD_USB_MAX_NETS]; struct usb_anchor rx_submitted; @@ -198,22 +205,22 @@ struct esd_usb2 { dma_addr_t rxbuf_dma[MAX_RX_URBS]; }; -struct esd_usb2_net_priv { +struct esd_usb_net_priv { struct can_priv can; /* must be the first member */ atomic_t active_tx_jobs; struct usb_anchor tx_submitted; struct esd_tx_urb_context tx_contexts[MAX_TX_URBS]; - struct esd_usb2 *usb2; + struct esd_usb *usb; struct net_device *netdev; int index; u8 old_state; struct can_berr_counter bec; }; -static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv, - struct esd_usb2_msg *msg) +static void esd_usb_rx_event(struct esd_usb_net_priv *priv, + struct esd_usb_msg *msg) { struct net_device_stats *stats = &priv->netdev->stats; struct can_frame *cf; @@ -296,8 +303,8 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv, } } -static void esd_usb2_rx_can_msg(struct esd_usb2_net_priv *priv, - struct esd_usb2_msg *msg) +static void esd_usb_rx_can_msg(struct esd_usb_net_priv *priv, + struct esd_usb_msg *msg) { struct net_device_stats *stats = &priv->netdev->stats; struct can_frame *cf; @@ -311,7 +318,7 @@ static void esd_usb2_rx_can_msg(struct esd_usb2_net_priv *priv, id = le32_to_cpu(msg->msg.rx.id); if (id & ESD_EVENT) { - esd_usb2_rx_event(priv, msg); + esd_usb_rx_event(priv, msg); } else { skb = alloc_can_skb(priv->netdev, &cf); if (skb == NULL) { @@ -338,12 +345,10 @@ static void esd_usb2_rx_can_msg(struct esd_usb2_net_priv *priv, netif_rx(skb); } - - return; } -static void esd_usb2_tx_done_msg(struct esd_usb2_net_priv *priv, - struct esd_usb2_msg *msg) +static void esd_usb_tx_done_msg(struct esd_usb_net_priv *priv, + struct esd_usb_msg *msg) { struct net_device_stats *stats = &priv->netdev->stats; struct net_device *netdev = priv->netdev; @@ -370,9 +375,9 @@ static void esd_usb2_tx_done_msg(struct esd_usb2_net_priv *priv, netif_wake_queue(netdev); } -static void esd_usb2_read_bulk_callback(struct urb *urb) +static void esd_usb_read_bulk_callback(struct urb *urb) { - struct esd_usb2 *dev = urb->context; + struct esd_usb *dev = urb->context; int retval; int pos = 0; int i; @@ -394,9 +399,9 @@ static void esd_usb2_read_bulk_callback(struct urb *urb) } while (pos < urb->actual_length) { - struct esd_usb2_msg *msg; + struct esd_usb_msg *msg; - msg = (struct esd_usb2_msg *)(urb->transfer_buffer + pos); + msg = (struct esd_usb_msg *)(urb->transfer_buffer + pos); switch (msg->msg.hdr.cmd) { case CMD_CAN_RX: @@ -405,7 +410,7 @@ static void esd_usb2_read_bulk_callback(struct urb *urb) break; } - esd_usb2_rx_can_msg(dev->nets[msg->msg.rx.net], msg); + esd_usb_rx_can_msg(dev->nets[msg->msg.rx.net], msg); break; case CMD_CAN_TX: @@ -414,8 +419,8 @@ static void esd_usb2_read_bulk_callback(struct urb *urb) break; } - esd_usb2_tx_done_msg(dev->nets[msg->msg.txdone.net], - msg); + esd_usb_tx_done_msg(dev->nets[msg->msg.txdone.net], + msg); break; } @@ -430,7 +435,7 @@ static void esd_usb2_read_bulk_callback(struct urb *urb) resubmit_urb: usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1), urb->transfer_buffer, RX_BUFFER_SIZE, - esd_usb2_read_bulk_callback, dev); + esd_usb_read_bulk_callback, dev); retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval == -ENODEV) { @@ -442,19 +447,15 @@ resubmit_urb: dev_err(dev->udev->dev.parent, "failed resubmitting read bulk urb: %d\n", retval); } - - return; } -/* - * callback for bulk IN urb - */ -static void esd_usb2_write_bulk_callback(struct urb *urb) +/* callback for bulk IN urb */ +static void esd_usb_write_bulk_callback(struct urb *urb) { struct esd_tx_urb_context *context = urb->context; - struct esd_usb2_net_priv *priv; + struct esd_usb_net_priv *priv; struct net_device *netdev; - size_t size = sizeof(struct esd_usb2_msg); + size_t size = sizeof(struct esd_usb_msg); WARN_ON(!context); @@ -478,7 +479,7 @@ static ssize_t firmware_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); - struct esd_usb2 *dev = usb_get_intfdata(intf); + struct esd_usb *dev = usb_get_intfdata(intf); return sprintf(buf, "%d.%d.%d\n", (dev->version >> 12) & 0xf, @@ -491,7 +492,7 @@ static ssize_t hardware_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); - struct esd_usb2 *dev = usb_get_intfdata(intf); + struct esd_usb *dev = usb_get_intfdata(intf); return sprintf(buf, "%d.%d.%d\n", (dev->version >> 28) & 0xf, @@ -504,13 +505,13 @@ static ssize_t nets_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); - struct esd_usb2 *dev = usb_get_intfdata(intf); + struct esd_usb *dev = usb_get_intfdata(intf); return sprintf(buf, "%d", dev->net_count); } static DEVICE_ATTR_RO(nets); -static int esd_usb2_send_msg(struct esd_usb2 *dev, struct esd_usb2_msg *msg) +static int esd_usb_send_msg(struct esd_usb *dev, struct esd_usb_msg *msg) { int actual_length; @@ -522,8 +523,8 @@ static int esd_usb2_send_msg(struct esd_usb2 *dev, struct esd_usb2_msg *msg) 1000); } -static int esd_usb2_wait_msg(struct esd_usb2 *dev, - struct esd_usb2_msg *msg) +static int esd_usb_wait_msg(struct esd_usb *dev, + struct esd_usb_msg *msg) { int actual_length; @@ -535,7 +536,7 @@ static int esd_usb2_wait_msg(struct esd_usb2 *dev, 1000); } -static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) +static int esd_usb_setup_rx_urbs(struct esd_usb *dev) { int i, err = 0; @@ -568,7 +569,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1), buf, RX_BUFFER_SIZE, - esd_usb2_read_bulk_callback, dev); + esd_usb_read_bulk_callback, dev); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_anchor_urb(urb, &dev->rx_submitted); @@ -606,14 +607,12 @@ freeurb: return 0; } -/* - * Start interface - */ -static int esd_usb2_start(struct esd_usb2_net_priv *priv) +/* Start interface */ +static int esd_usb_start(struct esd_usb_net_priv *priv) { - struct esd_usb2 *dev = priv->usb2; + struct esd_usb *dev = priv->usb; struct net_device *netdev = priv->netdev; - struct esd_usb2_msg *msg; + struct esd_usb_msg *msg; int err, i; msg = kmalloc(sizeof(*msg), GFP_KERNEL); @@ -622,8 +621,7 @@ static int esd_usb2_start(struct esd_usb2_net_priv *priv) goto out; } - /* - * Enable all IDs + /* Enable all IDs * The IDADD message takes up to 64 32 bit bitmasks (2048 bits). * Each bit represents one 11 bit CAN identifier. A set bit * enables reception of the corresponding CAN identifier. A cleared @@ -644,11 +642,11 @@ static int esd_usb2_start(struct esd_usb2_net_priv *priv) /* enable 29bit extended IDs */ msg->msg.filter.mask[ESD_MAX_ID_SEGMENT] = cpu_to_le32(0x00000001); - err = esd_usb2_send_msg(dev, msg); + err = esd_usb_send_msg(dev, msg); if (err) goto out; - err = esd_usb2_setup_rx_urbs(dev); + err = esd_usb_setup_rx_urbs(dev); if (err) goto out; @@ -664,9 +662,9 @@ out: return err; } -static void unlink_all_urbs(struct esd_usb2 *dev) +static void unlink_all_urbs(struct esd_usb *dev) { - struct esd_usb2_net_priv *priv; + struct esd_usb_net_priv *priv; int i, j; usb_kill_anchored_urbs(&dev->rx_submitted); @@ -687,9 +685,9 @@ static void unlink_all_urbs(struct esd_usb2 *dev) } } -static int esd_usb2_open(struct net_device *netdev) +static int esd_usb_open(struct net_device *netdev) { - struct esd_usb2_net_priv *priv = netdev_priv(netdev); + struct esd_usb_net_priv *priv = netdev_priv(netdev); int err; /* common open */ @@ -698,7 +696,7 @@ static int esd_usb2_open(struct net_device *netdev) return err; /* finally start device */ - err = esd_usb2_start(priv); + err = esd_usb_start(priv); if (err) { netdev_warn(netdev, "couldn't start device: %d\n", err); close_candev(netdev); @@ -710,20 +708,20 @@ static int esd_usb2_open(struct net_device *netdev) return 0; } -static netdev_tx_t esd_usb2_start_xmit(struct sk_buff *skb, +static netdev_tx_t esd_usb_start_xmit(struct sk_buff *skb, struct net_device *netdev) { - struct esd_usb2_net_priv *priv = netdev_priv(netdev); - struct esd_usb2 *dev = priv->usb2; + struct esd_usb_net_priv *priv = netdev_priv(netdev); + struct esd_usb *dev = priv->usb; struct esd_tx_urb_context *context = NULL; struct net_device_stats *stats = &netdev->stats; struct can_frame *cf = (struct can_frame *)skb->data; - struct esd_usb2_msg *msg; + struct esd_usb_msg *msg; struct urb *urb; u8 *buf; int i, err; int ret = NETDEV_TX_OK; - size_t size = sizeof(struct esd_usb2_msg); + size_t size = sizeof(struct esd_usb_msg); if (can_dropped_invalid_skb(netdev, skb)) return NETDEV_TX_OK; @@ -745,7 +743,7 @@ static netdev_tx_t esd_usb2_start_xmit(struct sk_buff *skb, goto nobufmem; } - msg = (struct esd_usb2_msg *)buf; + msg = (struct esd_usb_msg *)buf; msg->msg.hdr.len = 3; /* minimal length */ msg->msg.hdr.cmd = CMD_CAN_TX; @@ -771,9 +769,7 @@ static netdev_tx_t esd_usb2_start_xmit(struct sk_buff *skb, } } - /* - * This may never happen. - */ + /* This may never happen */ if (!context) { netdev_warn(netdev, "couldn't find free context\n"); ret = NETDEV_TX_BUSY; @@ -788,7 +784,7 @@ static netdev_tx_t esd_usb2_start_xmit(struct sk_buff *skb, usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), buf, msg->msg.hdr.len << 2, - esd_usb2_write_bulk_callback, context); + esd_usb_write_bulk_callback, context); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; @@ -821,8 +817,7 @@ static netdev_tx_t esd_usb2_start_xmit(struct sk_buff *skb, netif_trans_update(netdev); - /* - * Release our reference to this URB, the USB core will eventually free + /* Release our reference to this URB, the USB core will eventually free * it entirely. */ usb_free_urb(urb); @@ -839,24 +834,24 @@ nourbmem: return ret; } -static int esd_usb2_close(struct net_device *netdev) +static int esd_usb_close(struct net_device *netdev) { - struct esd_usb2_net_priv *priv = netdev_priv(netdev); - struct esd_usb2_msg *msg; + struct esd_usb_net_priv *priv = netdev_priv(netdev); + struct esd_usb_msg *msg; int i; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; - /* Disable all IDs (see esd_usb2_start()) */ + /* Disable all IDs (see esd_usb_start()) */ msg->msg.hdr.cmd = CMD_IDADD; msg->msg.hdr.len = 2 + ESD_MAX_ID_SEGMENT; msg->msg.filter.net = priv->index; msg->msg.filter.option = ESD_ID_ENABLE; /* start with segment 0 */ for (i = 0; i <= ESD_MAX_ID_SEGMENT; i++) msg->msg.filter.mask[i] = 0; - if (esd_usb2_send_msg(priv->usb2, msg) < 0) + if (esd_usb_send_msg(priv->usb, msg) < 0) netdev_err(netdev, "sending idadd message failed\n"); /* set CAN controller to reset mode */ @@ -864,8 +859,8 @@ static int esd_usb2_close(struct net_device *netdev) msg->msg.hdr.cmd = CMD_SETBAUD; msg->msg.setbaud.net = priv->index; msg->msg.setbaud.rsvd = 0; - msg->msg.setbaud.baud = cpu_to_le32(ESD_USB2_NO_BAUDRATE); - if (esd_usb2_send_msg(priv->usb2, msg) < 0) + msg->msg.setbaud.baud = cpu_to_le32(ESD_USB_NO_BAUDRATE); + if (esd_usb_send_msg(priv->usb, msg) < 0) netdev_err(netdev, "sending setbaud message failed\n"); priv->can.state = CAN_STATE_STOPPED; @@ -879,10 +874,10 @@ static int esd_usb2_close(struct net_device *netdev) return 0; } -static const struct net_device_ops esd_usb2_netdev_ops = { - .ndo_open = esd_usb2_open, - .ndo_stop = esd_usb2_close, - .ndo_start_xmit = esd_usb2_start_xmit, +static const struct net_device_ops esd_usb_netdev_ops = { + .ndo_open = esd_usb_open, + .ndo_stop = esd_usb_close, + .ndo_start_xmit = esd_usb_start_xmit, .ndo_change_mtu = can_change_mtu, }; @@ -900,20 +895,20 @@ static const struct can_bittiming_const esd_usb2_bittiming_const = { static int esd_usb2_set_bittiming(struct net_device *netdev) { - struct esd_usb2_net_priv *priv = netdev_priv(netdev); + struct esd_usb_net_priv *priv = netdev_priv(netdev); struct can_bittiming *bt = &priv->can.bittiming; - struct esd_usb2_msg *msg; + struct esd_usb_msg *msg; int err; u32 canbtr; int sjw_shift; - canbtr = ESD_USB2_UBR; + canbtr = ESD_USB_UBR; if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) - canbtr |= ESD_USB2_LOM; + canbtr |= ESD_USB_LOM; canbtr |= (bt->brp - 1) & (ESD_USB2_BRP_MAX - 1); - if (le16_to_cpu(priv->usb2->udev->descriptor.idProduct) == + if (le16_to_cpu(priv->usb->udev->descriptor.idProduct) == USB_CANUSBM_PRODUCT_ID) sjw_shift = ESD_USBM_SJW_SHIFT; else @@ -941,16 +936,16 @@ static int esd_usb2_set_bittiming(struct net_device *netdev) netdev_info(netdev, "setting BTR=%#x\n", canbtr); - err = esd_usb2_send_msg(priv->usb2, msg); + err = esd_usb_send_msg(priv->usb, msg); kfree(msg); return err; } -static int esd_usb2_get_berr_counter(const struct net_device *netdev, - struct can_berr_counter *bec) +static int esd_usb_get_berr_counter(const struct net_device *netdev, + struct can_berr_counter *bec) { - struct esd_usb2_net_priv *priv = netdev_priv(netdev); + struct esd_usb_net_priv *priv = netdev_priv(netdev); bec->txerr = priv->bec.txerr; bec->rxerr = priv->bec.rxerr; @@ -958,7 +953,7 @@ static int esd_usb2_get_berr_counter(const struct net_device *netdev, return 0; } -static int esd_usb2_set_mode(struct net_device *netdev, enum can_mode mode) +static int esd_usb_set_mode(struct net_device *netdev, enum can_mode mode) { switch (mode) { case CAN_MODE_START: @@ -972,11 +967,11 @@ static int esd_usb2_set_mode(struct net_device *netdev, enum can_mode mode) return 0; } -static int esd_usb2_probe_one_net(struct usb_interface *intf, int index) +static int esd_usb_probe_one_net(struct usb_interface *intf, int index) { - struct esd_usb2 *dev = usb_get_intfdata(intf); + struct esd_usb *dev = usb_get_intfdata(intf); struct net_device *netdev; - struct esd_usb2_net_priv *priv; + struct esd_usb_net_priv *priv; int err = 0; int i; @@ -995,7 +990,7 @@ static int esd_usb2_probe_one_net(struct usb_interface *intf, int index) for (i = 0; i < MAX_TX_URBS; i++) priv->tx_contexts[i].echo_index = MAX_TX_URBS; - priv->usb2 = dev; + priv->usb = dev; priv->netdev = netdev; priv->index = index; @@ -1013,12 +1008,12 @@ static int esd_usb2_probe_one_net(struct usb_interface *intf, int index) priv->can.bittiming_const = &esd_usb2_bittiming_const; priv->can.do_set_bittiming = esd_usb2_set_bittiming; - priv->can.do_set_mode = esd_usb2_set_mode; - priv->can.do_get_berr_counter = esd_usb2_get_berr_counter; + priv->can.do_set_mode = esd_usb_set_mode; + priv->can.do_get_berr_counter = esd_usb_get_berr_counter; netdev->flags |= IFF_ECHO; /* we support local echo */ - netdev->netdev_ops = &esd_usb2_netdev_ops; + netdev->netdev_ops = &esd_usb_netdev_ops; SET_NETDEV_DEV(netdev, &intf->dev); netdev->dev_id = index; @@ -1038,17 +1033,16 @@ done: return err; } -/* - * probe function for new USB2 devices +/* probe function for new USB devices * * check version information and number of available * CAN interfaces */ -static int esd_usb2_probe(struct usb_interface *intf, +static int esd_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { - struct esd_usb2 *dev; - struct esd_usb2_msg *msg; + struct esd_usb *dev; + struct esd_usb_msg *msg; int i, err; dev = kzalloc(sizeof(*dev), GFP_KERNEL); @@ -1076,13 +1070,13 @@ static int esd_usb2_probe(struct usb_interface *intf, msg->msg.version.flags = 0; msg->msg.version.drv_version = 0; - err = esd_usb2_send_msg(dev, msg); + err = esd_usb_send_msg(dev, msg); if (err < 0) { dev_err(&intf->dev, "sending version message failed\n"); goto free_msg; } - err = esd_usb2_wait_msg(dev, msg); + err = esd_usb_wait_msg(dev, msg); if (err < 0) { dev_err(&intf->dev, "no version message answer\n"); goto free_msg; @@ -1105,7 +1099,7 @@ static int esd_usb2_probe(struct usb_interface *intf, /* do per device probing */ for (i = 0; i < dev->net_count; i++) - esd_usb2_probe_one_net(intf, i); + esd_usb_probe_one_net(intf, i); free_msg: kfree(msg); @@ -1115,12 +1109,10 @@ done: return err; } -/* - * called by the usb core when the device is removed from the system - */ -static void esd_usb2_disconnect(struct usb_interface *intf) +/* called by the usb core when the device is removed from the system */ +static void esd_usb_disconnect(struct usb_interface *intf) { - struct esd_usb2 *dev = usb_get_intfdata(intf); + struct esd_usb *dev = usb_get_intfdata(intf); struct net_device *netdev; int i; @@ -1144,11 +1136,11 @@ static void esd_usb2_disconnect(struct usb_interface *intf) } /* usb specific object needed to register this driver with the usb subsystem */ -static struct usb_driver esd_usb2_driver = { - .name = "esd_usb2", - .probe = esd_usb2_probe, - .disconnect = esd_usb2_disconnect, - .id_table = esd_usb2_table, +static struct usb_driver esd_usb_driver = { + .name = "esd_usb", + .probe = esd_usb_probe, + .disconnect = esd_usb_disconnect, + .id_table = esd_usb_table, }; -module_usb_driver(esd_usb2_driver); +module_usb_driver(esd_usb_driver); diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 2d73ebbf3836..7353745f92d7 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -1707,7 +1707,7 @@ static int es58x_alloc_rx_urbs(struct es58x_device *es58x_dev) { const struct device *dev = es58x_dev->dev; const struct es58x_parameters *param = es58x_dev->param; - size_t rx_buf_len = es58x_dev->rx_max_packet_size; + u16 rx_buf_len = usb_maxpacket(es58x_dev->udev, es58x_dev->rx_pipe); struct urb *urb; u8 *buf; int i; @@ -1739,7 +1739,7 @@ static int es58x_alloc_rx_urbs(struct es58x_device *es58x_dev) dev_err(dev, "%s: Could not setup any rx URBs\n", __func__); return ret; } - dev_dbg(dev, "%s: Allocated %d rx URBs each of size %zu\n", + dev_dbg(dev, "%s: Allocated %d rx URBs each of size %u\n", __func__, i, rx_buf_len); return ret; @@ -2223,7 +2223,6 @@ static struct es58x_device *es58x_init_es58x_dev(struct usb_interface *intf, ep_in->bEndpointAddress); es58x_dev->tx_pipe = usb_sndbulkpipe(es58x_dev->udev, ep_out->bEndpointAddress); - es58x_dev->rx_max_packet_size = le16_to_cpu(ep_in->wMaxPacketSize); return es58x_dev; } diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.h b/drivers/net/can/usb/etas_es58x/es58x_core.h index e5033cb5e695..d769bdf740b7 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.h +++ b/drivers/net/can/usb/etas_es58x/es58x_core.h @@ -380,7 +380,6 @@ struct es58x_operators { * @timestamps: a temporary buffer to store the time stamps before * feeding them to es58x_can_get_echo_skb(). Can only be used * in RX branches. - * @rx_max_packet_size: Maximum length of bulk-in URB. * @num_can_ch: Number of CAN channel (i.e. number of elements of @netdev). * @opened_channel_cnt: number of channels opened. Free of race * conditions because its two users (net_device_ops:ndo_open() @@ -401,8 +400,8 @@ struct es58x_device { const struct es58x_parameters *param; const struct es58x_operators *ops; - int rx_pipe; - int tx_pipe; + unsigned int rx_pipe; + unsigned int tx_pipe; struct usb_anchor rx_urbs; struct usb_anchor tx_urbs_busy; @@ -414,7 +413,6 @@ struct es58x_device { u64 timestamps[ES58X_ECHO_BULK_MAX]; - u16 rx_max_packet_size; u8 num_can_ch; u8 opened_channel_cnt; diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 8a3b7b103ca4..393b2d9f9d2a 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* Xilinx CAN device driver * - * Copyright (C) 2012 - 2014 Xilinx, Inc. + * Copyright (C) 2012 - 2022 Xilinx, Inc. * Copyright (C) 2009 PetaLogix. All rights reserved. * Copyright (C) 2017 - 2018 Sandvik Mining and Construction Oy * @@ -9,6 +9,7 @@ * This driver is developed for Axi CAN IP and for Zynq CANPS Controller. */ +#include <linux/bitfield.h> #include <linux/clk.h> #include <linux/errno.h> #include <linux/init.h> @@ -50,7 +51,7 @@ enum xcan_reg { /* only on CAN FD cores */ XCAN_F_BRPR_OFFSET = 0x088, /* Data Phase Baud Rate - * Prescalar + * Prescaler */ XCAN_F_BTR_OFFSET = 0x08C, /* Data Phase Bit Timing */ XCAN_TRR_OFFSET = 0x0090, /* TX Buffer Ready Request */ @@ -86,6 +87,8 @@ enum xcan_reg { #define XCAN_MSR_LBACK_MASK 0x00000002 /* Loop back mode select */ #define XCAN_MSR_SLEEP_MASK 0x00000001 /* Sleep mode select */ #define XCAN_BRPR_BRP_MASK 0x000000FF /* Baud rate prescaler */ +#define XCAN_BRPR_TDCO_MASK GENMASK(12, 8) /* TDCO */ +#define XCAN_2_BRPR_TDCO_MASK GENMASK(13, 8) /* TDCO for CANFD 2.0 */ #define XCAN_BTR_SJW_MASK 0x00000180 /* Synchronous jump width */ #define XCAN_BTR_TS2_MASK 0x00000070 /* Time segment 2 */ #define XCAN_BTR_TS1_MASK 0x0000000F /* Time segment 1 */ @@ -99,6 +102,7 @@ enum xcan_reg { #define XCAN_ESR_STER_MASK 0x00000004 /* Stuff error */ #define XCAN_ESR_FMER_MASK 0x00000002 /* Form error */ #define XCAN_ESR_CRCER_MASK 0x00000001 /* CRC error */ +#define XCAN_SR_TDCV_MASK GENMASK(22, 16) /* TDCV Value */ #define XCAN_SR_TXFLL_MASK 0x00000400 /* TX FIFO is full */ #define XCAN_SR_ESTAT_MASK 0x00000180 /* Error status */ #define XCAN_SR_ERRWRN_MASK 0x00000040 /* Error warning */ @@ -132,6 +136,7 @@ enum xcan_reg { #define XCAN_DLCR_BRS_MASK 0x04000000 /* BRS Mask in DLC */ /* CAN register bit shift - XCAN_<REG>_<BIT>_SHIFT */ +#define XCAN_BRPR_TDC_ENABLE BIT(16) /* Transmitter Delay Compensation (TDC) Enable */ #define XCAN_BTR_SJW_SHIFT 7 /* Synchronous jump width */ #define XCAN_BTR_TS2_SHIFT 4 /* Time segment 2 */ #define XCAN_BTR_SJW_SHIFT_CANFD 16 /* Synchronous jump width */ @@ -276,6 +281,26 @@ static const struct can_bittiming_const xcan_data_bittiming_const_canfd2 = { .brp_inc = 1, }; +/* Transmission Delay Compensation constants for CANFD 1.0 */ +static const struct can_tdc_const xcan_tdc_const_canfd = { + .tdcv_min = 0, + .tdcv_max = 0, /* Manual mode not supported. */ + .tdco_min = 0, + .tdco_max = 32, + .tdcf_min = 0, /* Filter window not supported */ + .tdcf_max = 0, +}; + +/* Transmission Delay Compensation constants for CANFD 2.0 */ +static const struct can_tdc_const xcan_tdc_const_canfd2 = { + .tdcv_min = 0, + .tdcv_max = 0, /* Manual mode not supported. */ + .tdco_min = 0, + .tdco_max = 64, + .tdcf_min = 0, /* Filter window not supported */ + .tdcf_max = 0, +}; + /** * xcan_write_reg_le - Write a value to the device register little endian * @priv: Driver private data structure @@ -405,7 +430,7 @@ static int xcan_set_bittiming(struct net_device *ndev) return -EPERM; } - /* Setting Baud Rate prescalar value in BRPR Register */ + /* Setting Baud Rate prescaler value in BRPR Register */ btr0 = (bt->brp - 1); /* Setting Time Segment 1 in BTR Register */ @@ -422,8 +447,16 @@ static int xcan_set_bittiming(struct net_device *ndev) if (priv->devtype.cantype == XAXI_CANFD || priv->devtype.cantype == XAXI_CANFD_2_0) { - /* Setting Baud Rate prescalar value in F_BRPR Register */ + /* Setting Baud Rate prescaler value in F_BRPR Register */ btr0 = dbt->brp - 1; + if (can_tdc_is_enabled(&priv->can)) { + if (priv->devtype.cantype == XAXI_CANFD) + btr0 |= FIELD_PREP(XCAN_BRPR_TDCO_MASK, priv->can.tdc.tdco) | + XCAN_BRPR_TDC_ENABLE; + else + btr0 |= FIELD_PREP(XCAN_2_BRPR_TDCO_MASK, priv->can.tdc.tdco) | + XCAN_BRPR_TDC_ENABLE; + } /* Setting Time Segment 1 in BTR Register */ btr1 = dbt->prop_seg + dbt->phase_seg1 - 1; @@ -1483,6 +1516,22 @@ static int xcan_get_berr_counter(const struct net_device *ndev, return 0; } +/** + * xcan_get_auto_tdcv - Get Transmitter Delay Compensation Value + * @ndev: Pointer to net_device structure + * @tdcv: Pointer to TDCV value + * + * Return: 0 on success + */ +static int xcan_get_auto_tdcv(const struct net_device *ndev, u32 *tdcv) +{ + struct xcan_priv *priv = netdev_priv(ndev); + + *tdcv = FIELD_GET(XCAN_SR_TDCV_MASK, priv->read_reg(priv, XCAN_SR_OFFSET)); + + return 0; +} + static const struct net_device_ops xcan_netdev_ops = { .ndo_open = xcan_open, .ndo_stop = xcan_close, @@ -1735,17 +1784,24 @@ static int xcan_probe(struct platform_device *pdev) priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_BERR_REPORTING; - if (devtype->cantype == XAXI_CANFD) + if (devtype->cantype == XAXI_CANFD) { priv->can.data_bittiming_const = &xcan_data_bittiming_const_canfd; + priv->can.tdc_const = &xcan_tdc_const_canfd; + } - if (devtype->cantype == XAXI_CANFD_2_0) + if (devtype->cantype == XAXI_CANFD_2_0) { priv->can.data_bittiming_const = &xcan_data_bittiming_const_canfd2; + priv->can.tdc_const = &xcan_tdc_const_canfd2; + } if (devtype->cantype == XAXI_CANFD || - devtype->cantype == XAXI_CANFD_2_0) - priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD; + devtype->cantype == XAXI_CANFD_2_0) { + priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD | + CAN_CTRLMODE_TDC_AUTO; + priv->can.do_get_auto_tdcv = xcan_get_auto_tdcv; + } priv->reg_base = addr; priv->tx_max = tx_max; diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 6d1fcb08bba1..702d68ae435a 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -70,6 +70,15 @@ config NET_DSA_QCA8K source "drivers/net/dsa/realtek/Kconfig" +config NET_DSA_RZN1_A5PSW + tristate "Renesas RZ/N1 A5PSW Ethernet switch support" + depends on OF && ARCH_RZN1 + select NET_DSA_TAG_RZN1_A5PSW + select PCS_RZN1_MIIC + help + This driver supports the A5PSW switch, which is embedded in Renesas + RZ/N1 SoC. + config NET_DSA_SMSC_LAN9303 tristate select NET_DSA_TAG_LAN9303 diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index e73838c12256..b32907afa702 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_NET_DSA_LANTIQ_GSWIP) += lantiq_gswip.o obj-$(CONFIG_NET_DSA_MT7530) += mt7530.o obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o +obj-$(CONFIG_NET_DSA_RZN1_A5PSW) += rzn1_a5psw.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303) += lan9303-core.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303_I2C) += lan9303_i2c.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303_MDIO) += lan9303_mdio.o diff --git a/drivers/net/dsa/microchip/Kconfig b/drivers/net/dsa/microchip/Kconfig index c9e2a8989556..2edb88080790 100644 --- a/drivers/net/dsa/microchip/Kconfig +++ b/drivers/net/dsa/microchip/Kconfig @@ -1,49 +1,29 @@ # SPDX-License-Identifier: GPL-2.0-only -config NET_DSA_MICROCHIP_KSZ_COMMON - select NET_DSA_TAG_KSZ - tristate - -menuconfig NET_DSA_MICROCHIP_KSZ9477 - tristate "Microchip KSZ9477 series switch support" +menuconfig NET_DSA_MICROCHIP_KSZ_COMMON + tristate "Microchip KSZ8795/KSZ9477 series switch support" depends on NET_DSA - select NET_DSA_MICROCHIP_KSZ_COMMON + select NET_DSA_TAG_KSZ help - This driver adds support for Microchip KSZ9477 switch chips. + This driver adds support for Microchip KSZ9477 series switch and + KSZ8795/KSZ88x3 switch chips. config NET_DSA_MICROCHIP_KSZ9477_I2C - tristate "KSZ9477 series I2C connected switch driver" - depends on NET_DSA_MICROCHIP_KSZ9477 && I2C + tristate "KSZ series I2C connected switch driver" + depends on NET_DSA_MICROCHIP_KSZ_COMMON && I2C select REGMAP_I2C help Select to enable support for registering switches configured through I2C. -config NET_DSA_MICROCHIP_KSZ9477_SPI - tristate "KSZ9477 series SPI connected switch driver" - depends on NET_DSA_MICROCHIP_KSZ9477 && SPI +config NET_DSA_MICROCHIP_KSZ_SPI + tristate "KSZ series SPI connected switch driver" + depends on NET_DSA_MICROCHIP_KSZ_COMMON && SPI select REGMAP_SPI help Select to enable support for registering switches configured through SPI. -menuconfig NET_DSA_MICROCHIP_KSZ8795 - tristate "Microchip KSZ8795 series switch support" - depends on NET_DSA - select NET_DSA_MICROCHIP_KSZ_COMMON - help - This driver adds support for Microchip KSZ8795/KSZ88X3 switch chips. - -config NET_DSA_MICROCHIP_KSZ8795_SPI - tristate "KSZ8795 series SPI connected switch driver" - depends on NET_DSA_MICROCHIP_KSZ8795 && SPI - select REGMAP_SPI - help - This driver accesses KSZ8795 chip through SPI. - - It is required to use the KSZ8795 switch driver as the only access - is through SPI. - config NET_DSA_MICROCHIP_KSZ8863_SMI tristate "KSZ series SMI connected switch driver" - depends on NET_DSA_MICROCHIP_KSZ8795 + depends on NET_DSA_MICROCHIP_KSZ_COMMON select MDIO_BITBANG help Select to enable support for registering switches configured through diff --git a/drivers/net/dsa/microchip/Makefile b/drivers/net/dsa/microchip/Makefile index 2a03b21a3386..b2ba7c1bcb93 100644 --- a/drivers/net/dsa/microchip/Makefile +++ b/drivers/net/dsa/microchip/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON) += ksz_common.o -obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ9477) += ksz9477.o +obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON) += ksz_switch.o +ksz_switch-objs := ksz_common.o +ksz_switch-objs += ksz9477.o +ksz_switch-objs += ksz8795.o obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ9477_I2C) += ksz9477_i2c.o -obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ9477_SPI) += ksz9477_spi.o -obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ8795) += ksz8795.o -obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ8795_SPI) += ksz8795_spi.o +obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ_SPI) += ksz_spi.o obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ8863_SMI) += ksz8863_smi.o diff --git a/drivers/net/dsa/microchip/ksz8.h b/drivers/net/dsa/microchip/ksz8.h index 03da369675c6..de246989c81b 100644 --- a/drivers/net/dsa/microchip/ksz8.h +++ b/drivers/net/dsa/microchip/ksz8.h @@ -7,7 +7,10 @@ #ifndef __KSZ8XXX_H #define __KSZ8XXX_H -#include <linux/kernel.h> + +#include <linux/types.h> +#include <net/dsa.h> +#include "ksz_common.h" enum ksz_regs { REG_IND_CTRL_0, @@ -67,4 +70,50 @@ struct ksz8 { void *priv; }; +int ksz8_setup(struct dsa_switch *ds); +u32 ksz8_get_port_addr(int port, int offset); +void ksz8_cfg_port_member(struct ksz_device *dev, int port, u8 member); +void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port); +void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port); +void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val); +void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val); +int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr, + u8 *fid, u8 *src_port, u8 *timestamp, u16 *entries); +int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr, + struct alu_struct *alu); +void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr, + struct alu_struct *alu); +void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt); +void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr, + u64 *dropped, u64 *cnt); +void ksz8_freeze_mib(struct ksz_device *dev, int port, bool freeze); +void ksz8_port_init_cnt(struct ksz_device *dev, int port); +int ksz8_fdb_dump(struct ksz_device *dev, int port, + dsa_fdb_dump_cb_t *cb, void *data); +int ksz8_mdb_add(struct ksz_device *dev, int port, + const struct switchdev_obj_port_mdb *mdb, struct dsa_db db); +int ksz8_mdb_del(struct ksz_device *dev, int port, + const struct switchdev_obj_port_mdb *mdb, struct dsa_db db); +int ksz8_port_vlan_filtering(struct ksz_device *dev, int port, bool flag, + struct netlink_ext_ack *extack); +int ksz8_port_vlan_add(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack); +int ksz8_port_vlan_del(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan); +int ksz8_port_mirror_add(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress, struct netlink_ext_ack *extack); +void ksz8_port_mirror_del(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror); +int ksz8_get_stp_reg(void); +void ksz8_get_caps(struct ksz_device *dev, int port, + struct phylink_config *config); +void ksz8_config_cpu_port(struct dsa_switch *ds); +int ksz8_enable_stp_addr(struct ksz_device *dev); +int ksz8_reset_switch(struct ksz_device *dev); +int ksz8_switch_detect(struct ksz_device *dev); +int ksz8_switch_init(struct ksz_device *dev); +void ksz8_switch_exit(struct ksz_device *dev); + #endif diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 12a599d5e61a..df7d782e3fcd 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -162,7 +162,7 @@ static int ksz8_ind_write8(struct ksz_device *dev, u8 table, u16 addr, u8 data) return ret; } -static int ksz8_reset_switch(struct ksz_device *dev) +int ksz8_reset_switch(struct ksz_device *dev) { if (ksz_is_ksz88x3(dev)) { /* reset switch */ @@ -213,7 +213,7 @@ static void ksz8795_set_prio_queue(struct ksz_device *dev, int port, int queue) true); } -static void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt) +void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt) { struct ksz8 *ksz8 = dev->priv; const u32 *masks; @@ -334,8 +334,8 @@ static void ksz8863_r_mib_pkt(struct ksz_device *dev, int port, u16 addr, } } -static void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr, - u64 *dropped, u64 *cnt) +void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr, + u64 *dropped, u64 *cnt) { if (ksz_is_ksz88x3(dev)) ksz8863_r_mib_pkt(dev, port, addr, dropped, cnt); @@ -343,7 +343,7 @@ static void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr, ksz8795_r_mib_pkt(dev, port, addr, dropped, cnt); } -static void ksz8_freeze_mib(struct ksz_device *dev, int port, bool freeze) +void ksz8_freeze_mib(struct ksz_device *dev, int port, bool freeze) { if (ksz_is_ksz88x3(dev)) return; @@ -358,7 +358,7 @@ static void ksz8_freeze_mib(struct ksz_device *dev, int port, bool freeze) ksz_cfg(dev, REG_SW_CTRL_6, BIT(port), false); } -static void ksz8_port_init_cnt(struct ksz_device *dev, int port) +void ksz8_port_init_cnt(struct ksz_device *dev, int port) { struct ksz_port_mib *mib = &dev->ports[port].mib; u64 *dropped; @@ -447,9 +447,8 @@ static int ksz8_valid_dyn_entry(struct ksz_device *dev, u8 *data) return 0; } -static int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, - u8 *mac_addr, u8 *fid, u8 *src_port, - u8 *timestamp, u16 *entries) +int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr, + u8 *fid, u8 *src_port, u8 *timestamp, u16 *entries) { struct ksz8 *ksz8 = dev->priv; u32 data_hi, data_lo; @@ -512,8 +511,8 @@ static int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, return rc; } -static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr, - struct alu_struct *alu) +int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr, + struct alu_struct *alu) { struct ksz8 *ksz8 = dev->priv; u32 data_hi, data_lo; @@ -551,8 +550,8 @@ static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr, return -ENXIO; } -static void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr, - struct alu_struct *alu) +void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr, + struct alu_struct *alu) { struct ksz8 *ksz8 = dev->priv; u32 data_hi, data_lo; @@ -663,7 +662,7 @@ static void ksz8_w_vlan_table(struct ksz_device *dev, u16 vid, u16 vlan) ksz8_w_table(dev, TABLE_VLAN, addr, buf); } -static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val) +void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val) { struct ksz8 *ksz8 = dev->priv; u8 restart, speed, ctrl, link; @@ -786,7 +785,7 @@ static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val) *val = data; } -static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val) +void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val) { struct ksz8 *ksz8 = dev->priv; u8 restart, speed, ctrl, data; @@ -898,30 +897,7 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val) } } -static enum dsa_tag_protocol ksz8_get_tag_protocol(struct dsa_switch *ds, - int port, - enum dsa_tag_protocol mp) -{ - struct ksz_device *dev = ds->priv; - - /* ksz88x3 uses the same tag schema as KSZ9893 */ - return ksz_is_ksz88x3(dev) ? - DSA_TAG_PROTO_KSZ9893 : DSA_TAG_PROTO_KSZ8795; -} - -static u32 ksz8_sw_get_phy_flags(struct dsa_switch *ds, int port) -{ - /* Silicon Errata Sheet (DS80000830A): - * Port 1 does not work with LinkMD Cable-Testing. - * Port 1 does not respond to received PAUSE control frames. - */ - if (!port) - return MICREL_KSZ8_P1_ERRATA; - - return 0; -} - -static void ksz8_cfg_port_member(struct ksz_device *dev, int port, u8 member) +void ksz8_cfg_port_member(struct ksz_device *dev, int port, u8 member) { u8 data; @@ -931,12 +907,7 @@ static void ksz8_cfg_port_member(struct ksz_device *dev, int port, u8 member) ksz_pwrite8(dev, port, P_MIRROR_CTRL, data); } -static void ksz8_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) -{ - ksz_port_stp_state_set(ds, port, state, P_STP_CTRL); -} - -static void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port) +void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port) { u8 learn[DSA_MAX_PORTS]; int first, index, cnt; @@ -969,11 +940,109 @@ static void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port) } } -static int ksz8_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag, - struct netlink_ext_ack *extack) +int ksz8_fdb_dump(struct ksz_device *dev, int port, + dsa_fdb_dump_cb_t *cb, void *data) { - struct ksz_device *dev = ds->priv; + int ret = 0; + u16 i = 0; + u16 entries = 0; + u8 timestamp = 0; + u8 fid; + u8 member; + struct alu_struct alu; + + do { + alu.is_static = false; + ret = ksz8_r_dyn_mac_table(dev, i, alu.mac, &fid, &member, + ×tamp, &entries); + if (!ret && (member & BIT(port))) { + ret = cb(alu.mac, alu.fid, alu.is_static, data); + if (ret) + break; + } + i++; + } while (i < entries); + if (i >= entries) + ret = 0; + + return ret; +} + +int ksz8_mdb_add(struct ksz_device *dev, int port, + const struct switchdev_obj_port_mdb *mdb, struct dsa_db db) +{ + struct alu_struct alu; + int index; + int empty = 0; + + alu.port_forward = 0; + for (index = 0; index < dev->info->num_statics; index++) { + if (!ksz8_r_sta_mac_table(dev, index, &alu)) { + /* Found one already in static MAC table. */ + if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) && + alu.fid == mdb->vid) + break; + /* Remember the first empty entry. */ + } else if (!empty) { + empty = index + 1; + } + } + + /* no available entry */ + if (index == dev->info->num_statics && !empty) + return -ENOSPC; + + /* add entry */ + if (index == dev->info->num_statics) { + index = empty - 1; + memset(&alu, 0, sizeof(alu)); + memcpy(alu.mac, mdb->addr, ETH_ALEN); + alu.is_static = true; + } + alu.port_forward |= BIT(port); + if (mdb->vid) { + alu.is_use_fid = true; + /* Need a way to map VID to FID. */ + alu.fid = mdb->vid; + } + ksz8_w_sta_mac_table(dev, index, &alu); + + return 0; +} + +int ksz8_mdb_del(struct ksz_device *dev, int port, + const struct switchdev_obj_port_mdb *mdb, struct dsa_db db) +{ + struct alu_struct alu; + int index; + + for (index = 0; index < dev->info->num_statics; index++) { + if (!ksz8_r_sta_mac_table(dev, index, &alu)) { + /* Found one already in static MAC table. */ + if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) && + alu.fid == mdb->vid) + break; + } + } + + /* no available entry */ + if (index == dev->info->num_statics) + goto exit; + + /* clear port */ + alu.port_forward &= ~BIT(port); + if (!alu.port_forward) + alu.is_static = false; + ksz8_w_sta_mac_table(dev, index, &alu); + +exit: + return 0; +} + +int ksz8_port_vlan_filtering(struct ksz_device *dev, int port, bool flag, + struct netlink_ext_ack *extack) +{ if (ksz_is_ksz88x3(dev)) return -ENOTSUPP; @@ -998,12 +1067,11 @@ static void ksz8_port_enable_pvid(struct ksz_device *dev, int port, bool state) } } -static int ksz8_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct netlink_ext_ack *extack) +int ksz8_port_vlan_add(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) { bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - struct ksz_device *dev = ds->priv; struct ksz_port *p = &dev->ports[port]; u16 data, new_pvid = 0; u8 fid, member, valid; @@ -1071,10 +1139,9 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port, return 0; } -static int ksz8_port_vlan_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +int ksz8_port_vlan_del(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan) { - struct ksz_device *dev = ds->priv; u16 data, pvid; u8 fid, member, valid; @@ -1104,12 +1171,10 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port, return 0; } -static int ksz8_port_mirror_add(struct dsa_switch *ds, int port, - struct dsa_mall_mirror_tc_entry *mirror, - bool ingress, struct netlink_ext_ack *extack) +int ksz8_port_mirror_add(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress, struct netlink_ext_ack *extack) { - struct ksz_device *dev = ds->priv; - if (ingress) { ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, true); dev->mirror_rx |= BIT(port); @@ -1128,10 +1193,9 @@ static int ksz8_port_mirror_add(struct dsa_switch *ds, int port, return 0; } -static void ksz8_port_mirror_del(struct dsa_switch *ds, int port, - struct dsa_mall_mirror_tc_entry *mirror) +void ksz8_port_mirror_del(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror) { - struct ksz_device *dev = ds->priv; u8 data; if (mirror->ingress) { @@ -1197,7 +1261,7 @@ static void ksz8795_cpu_interface_select(struct ksz_device *dev, int port) p->phydev.duplex = 1; } -static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port) +void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port) { struct dsa_switch *ds = dev->ds; struct ksz8 *ksz8 = dev->priv; @@ -1234,7 +1298,7 @@ static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port) ksz8_cfg_port_member(dev, port, member); } -static void ksz8_config_cpu_port(struct dsa_switch *ds) +void ksz8_config_cpu_port(struct dsa_switch *ds) { struct ksz_device *dev = ds->priv; struct ksz8 *ksz8 = dev->priv; @@ -1258,7 +1322,7 @@ static void ksz8_config_cpu_port(struct dsa_switch *ds) for (i = 0; i < dev->phy_port_cnt; i++) { p = &dev->ports[i]; - ksz8_port_stp_state_set(ds, i, BR_STATE_DISABLED); + ksz_port_stp_state_set(ds, i, BR_STATE_DISABLED); /* Last port may be disabled. */ if (i == dev->phy_port_cnt) @@ -1272,7 +1336,7 @@ static void ksz8_config_cpu_port(struct dsa_switch *ds) continue; if (!ksz_is_ksz88x3(dev)) { ksz_pread8(dev, i, regs[P_REMOTE_STATUS], &remote); - if (remote & PORT_FIBER_MODE) + if (remote & KSZ8_PORT_FIBER_MODE) p->fiber = 1; } if (p->fiber) @@ -1301,22 +1365,26 @@ static int ksz8_handle_global_errata(struct dsa_switch *ds) return ret; } -static int ksz8_setup(struct dsa_switch *ds) +int ksz8_enable_stp_addr(struct ksz_device *dev) { - struct ksz_device *dev = ds->priv; struct alu_struct alu; - int i, ret = 0; - dev->vlan_cache = devm_kcalloc(dev->dev, sizeof(struct vlan_table), - dev->info->num_vlans, GFP_KERNEL); - if (!dev->vlan_cache) - return -ENOMEM; + /* Setup STP address for STP operation. */ + memset(&alu, 0, sizeof(alu)); + ether_addr_copy(alu.mac, eth_stp_addr); + alu.is_static = true; + alu.is_override = true; + alu.port_forward = dev->info->cpu_ports; - ret = ksz8_reset_switch(dev); - if (ret) { - dev_err(ds->dev, "failed to reset switch\n"); - return ret; - } + ksz8_w_sta_mac_table(dev, 0, &alu); + + return 0; +} + +int ksz8_setup(struct dsa_switch *ds) +{ + struct ksz_device *dev = ds->priv; + int i; ksz_cfg(dev, S_REPLACE_VID_CTRL, SW_FLOW_CTRL, true); @@ -1335,10 +1403,6 @@ static int ksz8_setup(struct dsa_switch *ds) UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP, UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP); - ksz8_config_cpu_port(ds); - - ksz_cfg(dev, REG_SW_CTRL_2, MULTICAST_STORM_DISABLE, true); - ksz_cfg(dev, S_REPLACE_VID_CTRL, SW_REPLACE_VID, false); ksz_cfg(dev, S_MIRROR_CTRL, SW_MIRROR_RX_TX, false); @@ -1346,38 +1410,15 @@ static int ksz8_setup(struct dsa_switch *ds) if (!ksz_is_ksz88x3(dev)) ksz_cfg(dev, REG_SW_CTRL_19, SW_INS_TAG_ENABLE, true); - /* set broadcast storm protection 10% rate */ - regmap_update_bits(dev->regmap[1], S_REPLACE_VID_CTRL, - BROADCAST_STORM_RATE, - (BROADCAST_STORM_VALUE * - BROADCAST_STORM_PROT_RATE) / 100); - for (i = 0; i < (dev->info->num_vlans / 4); i++) ksz8_r_vlan_entries(dev, i); - /* Setup STP address for STP operation. */ - memset(&alu, 0, sizeof(alu)); - ether_addr_copy(alu.mac, eth_stp_addr); - alu.is_static = true; - alu.is_override = true; - alu.port_forward = dev->info->cpu_ports; - - ksz8_w_sta_mac_table(dev, 0, &alu); - - ksz_init_mib_timer(dev); - - ds->configure_vlan_while_not_filtering = false; - return ksz8_handle_global_errata(ds); } -static void ksz8_get_caps(struct dsa_switch *ds, int port, - struct phylink_config *config) +void ksz8_get_caps(struct ksz_device *dev, int port, + struct phylink_config *config) { - struct ksz_device *dev = ds->priv; - - ksz_phylink_get_caps(ds, port, config); - config->mac_capabilities = MAC_10 | MAC_100; /* Silicon Errata Sheet (DS80000830A): @@ -1393,88 +1434,15 @@ static void ksz8_get_caps(struct dsa_switch *ds, int port, config->mac_capabilities |= MAC_ASYM_PAUSE; } -static const struct dsa_switch_ops ksz8_switch_ops = { - .get_tag_protocol = ksz8_get_tag_protocol, - .get_phy_flags = ksz8_sw_get_phy_flags, - .setup = ksz8_setup, - .phy_read = ksz_phy_read16, - .phy_write = ksz_phy_write16, - .phylink_get_caps = ksz8_get_caps, - .phylink_mac_link_down = ksz_mac_link_down, - .port_enable = ksz_enable_port, - .get_strings = ksz_get_strings, - .get_ethtool_stats = ksz_get_ethtool_stats, - .get_sset_count = ksz_sset_count, - .port_bridge_join = ksz_port_bridge_join, - .port_bridge_leave = ksz_port_bridge_leave, - .port_stp_state_set = ksz8_port_stp_state_set, - .port_fast_age = ksz_port_fast_age, - .port_vlan_filtering = ksz8_port_vlan_filtering, - .port_vlan_add = ksz8_port_vlan_add, - .port_vlan_del = ksz8_port_vlan_del, - .port_fdb_dump = ksz_port_fdb_dump, - .port_mdb_add = ksz_port_mdb_add, - .port_mdb_del = ksz_port_mdb_del, - .port_mirror_add = ksz8_port_mirror_add, - .port_mirror_del = ksz8_port_mirror_del, -}; - -static u32 ksz8_get_port_addr(int port, int offset) +u32 ksz8_get_port_addr(int port, int offset) { return PORT_CTRL_ADDR(port, offset); } -static int ksz8_switch_detect(struct ksz_device *dev) -{ - u8 id1, id2; - u16 id16; - int ret; - - /* read chip id */ - ret = ksz_read16(dev, REG_CHIP_ID0, &id16); - if (ret) - return ret; - - id1 = id16 >> 8; - id2 = id16 & SW_CHIP_ID_M; - - switch (id1) { - case KSZ87_FAMILY_ID: - if ((id2 != CHIP_ID_94 && id2 != CHIP_ID_95)) - return -ENODEV; - - if (id2 == CHIP_ID_95) { - u8 val; - - id2 = 0x95; - ksz_read8(dev, REG_PORT_STATUS_0, &val); - if (val & PORT_FIBER_MODE) - id2 = 0x65; - } else if (id2 == CHIP_ID_94) { - id2 = 0x94; - } - break; - case KSZ88_FAMILY_ID: - if (id2 != CHIP_ID_63) - return -ENODEV; - break; - default: - dev_err(dev->dev, "invalid family id: %d\n", id1); - return -ENODEV; - } - id16 &= ~0xff; - id16 |= id2; - dev->chip_id = id16; - - return 0; -} - -static int ksz8_switch_init(struct ksz_device *dev) +int ksz8_switch_init(struct ksz_device *dev) { struct ksz8 *ksz8 = dev->priv; - dev->ds->ops = &ksz8_switch_ops; - dev->cpu_port = fls(dev->info->cpu_ports) - 1; dev->phy_port_cnt = dev->info->port_cnt - 1; dev->port_mask = (BIT(dev->phy_port_cnt) - 1) | dev->info->cpu_ports; @@ -1502,37 +1470,11 @@ static int ksz8_switch_init(struct ksz_device *dev) return 0; } -static void ksz8_switch_exit(struct ksz_device *dev) +void ksz8_switch_exit(struct ksz_device *dev) { ksz8_reset_switch(dev); } -static const struct ksz_dev_ops ksz8_dev_ops = { - .get_port_addr = ksz8_get_port_addr, - .cfg_port_member = ksz8_cfg_port_member, - .flush_dyn_mac_table = ksz8_flush_dyn_mac_table, - .port_setup = ksz8_port_setup, - .r_phy = ksz8_r_phy, - .w_phy = ksz8_w_phy, - .r_dyn_mac_table = ksz8_r_dyn_mac_table, - .r_sta_mac_table = ksz8_r_sta_mac_table, - .w_sta_mac_table = ksz8_w_sta_mac_table, - .r_mib_cnt = ksz8_r_mib_cnt, - .r_mib_pkt = ksz8_r_mib_pkt, - .freeze_mib = ksz8_freeze_mib, - .port_init_cnt = ksz8_port_init_cnt, - .shutdown = ksz8_reset_switch, - .detect = ksz8_switch_detect, - .init = ksz8_switch_init, - .exit = ksz8_switch_exit, -}; - -int ksz8_switch_register(struct ksz_device *dev) -{ - return ksz_switch_register(dev, &ksz8_dev_ops); -} -EXPORT_SYMBOL(ksz8_switch_register); - MODULE_AUTHOR("Tristram Ha <Tristram.Ha@microchip.com>"); MODULE_DESCRIPTION("Microchip KSZ8795 Series Switch DSA Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h index 4109433b6b6c..32d985296520 100644 --- a/drivers/net/dsa/microchip/ksz8795_reg.h +++ b/drivers/net/dsa/microchip/ksz8795_reg.h @@ -14,22 +14,8 @@ #define KS_PRIO_M 0x3 #define KS_PRIO_S 2 -#define REG_CHIP_ID0 0x00 - -#define KSZ87_FAMILY_ID 0x87 -#define KSZ88_FAMILY_ID 0x88 - -#define REG_CHIP_ID1 0x01 - -#define SW_CHIP_ID_M 0xF0 -#define SW_CHIP_ID_S 4 #define SW_REVISION_M 0x0E #define SW_REVISION_S 1 -#define SW_START 0x01 - -#define CHIP_ID_94 0x60 -#define CHIP_ID_95 0x90 -#define CHIP_ID_63 0x30 #define KSZ8863_REG_SW_RESET 0x43 @@ -57,7 +43,6 @@ #define REG_SW_CTRL_2 0x04 #define UNICAST_VLAN_BOUNDARY BIT(7) -#define MULTICAST_STORM_DISABLE BIT(6) #define SW_BACK_PRESSURE BIT(5) #define FAIR_FLOW_CTRL BIT(4) #define NO_EXC_COLLISION_DROP BIT(3) @@ -77,13 +62,9 @@ #define SW_FLOW_CTRL BIT(5) #define SW_10_MBIT BIT(4) #define SW_REPLACE_VID BIT(3) -#define BROADCAST_STORM_RATE_HI 0x07 #define REG_SW_CTRL_5 0x07 -#define BROADCAST_STORM_RATE_LO 0xFF -#define BROADCAST_STORM_RATE 0x07FF - #define REG_SW_CTRL_6 0x08 #define SW_MIB_COUNTER_FLUSH BIT(7) @@ -217,8 +198,6 @@ #define REG_PORT_4_STATUS_0 0x48 /* For KSZ8765. */ -#define PORT_FIBER_MODE BIT(7) - #define PORT_REMOTE_ASYM_PAUSE BIT(5) #define PORT_REMOTE_SYM_PAUSE BIT(4) #define PORT_REMOTE_100BTX_FD BIT(3) @@ -322,7 +301,6 @@ #define REG_PORT_CTRL_5 0x05 -#define REG_PORT_STATUS_0 0x08 #define REG_PORT_STATUS_1 0x09 #define REG_PORT_LINK_MD_CTRL 0x0A #define REG_PORT_LINK_MD_RESULT 0x0B @@ -813,12 +791,6 @@ #define REG_IND_EEE_GLOB2_LO 0x34 #define REG_IND_EEE_GLOB2_HI 0x35 -/* Driver set switch broadcast storm protection at 10% rate. */ -#define BROADCAST_STORM_PROT_RATE 10 - -/* 148,800 frames * 67 ms / 100 */ -#define BROADCAST_STORM_VALUE 9969 - /** * MIB_COUNTER_VALUE 00-00000000-3FFFFFFF * MIB_TOTAL_BYTES 00-0000000F-FFFFFFFF diff --git a/drivers/net/dsa/microchip/ksz8863_smi.c b/drivers/net/dsa/microchip/ksz8863_smi.c index b6f99e641dca..d71df05b8b7b 100644 --- a/drivers/net/dsa/microchip/ksz8863_smi.c +++ b/drivers/net/dsa/microchip/ksz8863_smi.c @@ -174,7 +174,7 @@ static int ksz8863_smi_probe(struct mdio_device *mdiodev) if (mdiodev->dev.platform_data) dev->pdata = mdiodev->dev.platform_data; - ret = ksz8_switch_register(dev); + ret = ksz_switch_register(dev); /* Main DSA driver may not be started yet. */ if (ret) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index ab40b700cf1a..fa498ad8ca40 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -17,6 +17,7 @@ #include "ksz9477_reg.h" #include "ksz_common.h" +#include "ksz9477.h" /* Used with variable features to indicate capabilities. */ #define GBIT_SUPPORT BIT(0) @@ -47,9 +48,8 @@ static void ksz9477_port_cfg32(struct ksz_device *dev, int port, int offset, bits, set ? bits : 0); } -static int ksz9477_change_mtu(struct dsa_switch *ds, int port, int mtu) +int ksz9477_change_mtu(struct ksz_device *dev, int port, int mtu) { - struct ksz_device *dev = ds->priv; u16 frame_size, max_frame = 0; int i; @@ -65,7 +65,7 @@ static int ksz9477_change_mtu(struct dsa_switch *ds, int port, int mtu) REG_SW_MTU_MASK, max_frame); } -static int ksz9477_max_mtu(struct dsa_switch *ds, int port) +int ksz9477_max_mtu(struct ksz_device *dev, int port) { return KSZ9477_MAX_FRAME_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN; } @@ -175,7 +175,7 @@ static int ksz9477_wait_alu_sta_ready(struct ksz_device *dev) 10, 1000); } -static int ksz9477_reset_switch(struct ksz_device *dev) +int ksz9477_reset_switch(struct ksz_device *dev) { u8 data8; u32 data32; @@ -198,12 +198,6 @@ static int ksz9477_reset_switch(struct ksz_device *dev) ksz_write32(dev, REG_SW_PORT_INT_MASK__4, 0x7F); ksz_read32(dev, REG_SW_PORT_INT_STATUS__4, &data32); - /* set broadcast storm protection 10% rate */ - regmap_update_bits(dev->regmap[1], REG_SW_MAC_CTRL_2, - BROADCAST_STORM_RATE, - (BROADCAST_STORM_VALUE * - BROADCAST_STORM_PROT_RATE) / 100); - data8 = SW_ENABLE_REFCLKO; if (dev->synclko_disable) data8 = 0; @@ -214,8 +208,7 @@ static int ksz9477_reset_switch(struct ksz_device *dev) return 0; } -static void ksz9477_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, - u64 *cnt) +void ksz9477_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt) { struct ksz_port *p = &dev->ports[port]; unsigned int val; @@ -242,14 +235,14 @@ static void ksz9477_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, *cnt += data; } -static void ksz9477_r_mib_pkt(struct ksz_device *dev, int port, u16 addr, - u64 *dropped, u64 *cnt) +void ksz9477_r_mib_pkt(struct ksz_device *dev, int port, u16 addr, + u64 *dropped, u64 *cnt) { addr = dev->info->mib_names[addr].index; ksz9477_r_mib_cnt(dev, port, addr, cnt); } -static void ksz9477_freeze_mib(struct ksz_device *dev, int port, bool freeze) +void ksz9477_freeze_mib(struct ksz_device *dev, int port, bool freeze) { u32 val = freeze ? MIB_COUNTER_FLUSH_FREEZE : 0; struct ksz_port *p = &dev->ports[port]; @@ -263,7 +256,7 @@ static void ksz9477_freeze_mib(struct ksz_device *dev, int port, bool freeze) mutex_unlock(&p->mib.cnt_mutex); } -static void ksz9477_port_init_cnt(struct ksz_device *dev, int port) +void ksz9477_port_init_cnt(struct ksz_device *dev, int port) { struct ksz_port_mib *mib = &dev->ports[port].mib; @@ -276,21 +269,8 @@ static void ksz9477_port_init_cnt(struct ksz_device *dev, int port) mutex_unlock(&mib->cnt_mutex); } -static enum dsa_tag_protocol ksz9477_get_tag_protocol(struct dsa_switch *ds, - int port, - enum dsa_tag_protocol mp) -{ - enum dsa_tag_protocol proto = DSA_TAG_PROTO_KSZ9477; - struct ksz_device *dev = ds->priv; - - if (dev->features & IS_9893) - proto = DSA_TAG_PROTO_KSZ9893; - return proto; -} - -static int ksz9477_phy_read16(struct dsa_switch *ds, int addr, int reg) +void ksz9477_r_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 *data) { - struct ksz_device *dev = ds->priv; u16 val = 0xffff; /* No real PHY after this. Simulate the PHY. @@ -335,39 +315,28 @@ static int ksz9477_phy_read16(struct dsa_switch *ds, int addr, int reg) ksz_pread16(dev, addr, 0x100 + (reg << 1), &val); } - return val; + *data = val; } -static int ksz9477_phy_write16(struct dsa_switch *ds, int addr, int reg, - u16 val) +void ksz9477_w_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 val) { - struct ksz_device *dev = ds->priv; - /* No real PHY after this. */ if (addr >= dev->phy_port_cnt) - return 0; + return; /* No gigabit support. Do not write to this register. */ if (!(dev->features & GBIT_SUPPORT) && reg == MII_CTRL1000) - return 0; - ksz_pwrite16(dev, addr, 0x100 + (reg << 1), val); + return; - return 0; + ksz_pwrite16(dev, addr, 0x100 + (reg << 1), val); } -static void ksz9477_cfg_port_member(struct ksz_device *dev, int port, - u8 member) +void ksz9477_cfg_port_member(struct ksz_device *dev, int port, u8 member) { ksz_pwrite32(dev, port, REG_PORT_VLAN_MEMBERSHIP__4, member); } -static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port, - u8 state) -{ - ksz_port_stp_state_set(ds, port, state, P_STP_CTRL); -} - -static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port) +void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port) { u8 data; @@ -389,12 +358,9 @@ static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port) } } -static int ksz9477_port_vlan_filtering(struct dsa_switch *ds, int port, - bool flag, - struct netlink_ext_ack *extack) +int ksz9477_port_vlan_filtering(struct ksz_device *dev, int port, + bool flag, struct netlink_ext_ack *extack) { - struct ksz_device *dev = ds->priv; - if (flag) { ksz_port_cfg(dev, port, REG_PORT_LUE_CTRL, PORT_VLAN_LOOKUP_VID_0, true); @@ -408,11 +374,10 @@ static int ksz9477_port_vlan_filtering(struct dsa_switch *ds, int port, return 0; } -static int ksz9477_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct netlink_ext_ack *extack) +int ksz9477_port_vlan_add(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) { - struct ksz_device *dev = ds->priv; u32 vlan_table[3]; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; int err; @@ -445,10 +410,9 @@ static int ksz9477_port_vlan_add(struct dsa_switch *ds, int port, return 0; } -static int ksz9477_port_vlan_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +int ksz9477_port_vlan_del(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan) { - struct ksz_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; u32 vlan_table[3]; u16 pvid; @@ -479,11 +443,9 @@ static int ksz9477_port_vlan_del(struct dsa_switch *ds, int port, return 0; } -static int ksz9477_port_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid, - struct dsa_db db) +int ksz9477_fdb_add(struct ksz_device *dev, int port, + const unsigned char *addr, u16 vid, struct dsa_db db) { - struct ksz_device *dev = ds->priv; u32 alu_table[4]; u32 data; int ret = 0; @@ -537,11 +499,9 @@ exit: return ret; } -static int ksz9477_port_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid, - struct dsa_db db) +int ksz9477_fdb_del(struct ksz_device *dev, int port, + const unsigned char *addr, u16 vid, struct dsa_db db) { - struct ksz_device *dev = ds->priv; u32 alu_table[4]; u32 data; int ret = 0; @@ -628,10 +588,9 @@ static void ksz9477_convert_alu(struct alu_struct *alu, u32 *alu_table) alu->mac[5] = alu_table[3] & 0xFF; } -static int ksz9477_port_fdb_dump(struct dsa_switch *ds, int port, - dsa_fdb_dump_cb_t *cb, void *data) +int ksz9477_fdb_dump(struct ksz_device *dev, int port, + dsa_fdb_dump_cb_t *cb, void *data) { - struct ksz_device *dev = ds->priv; int ret = 0; u32 ksz_data; u32 alu_table[4]; @@ -680,11 +639,9 @@ exit: return ret; } -static int ksz9477_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct dsa_db db) +int ksz9477_mdb_add(struct ksz_device *dev, int port, + const struct switchdev_obj_port_mdb *mdb, struct dsa_db db) { - struct ksz_device *dev = ds->priv; u32 static_table[4]; u32 data; int index; @@ -756,11 +713,9 @@ exit: return err; } -static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct dsa_db db) +int ksz9477_mdb_del(struct ksz_device *dev, int port, + const struct switchdev_obj_port_mdb *mdb, struct dsa_db db) { - struct ksz_device *dev = ds->priv; u32 static_table[4]; u32 data; int index; @@ -832,11 +787,10 @@ exit: return ret; } -static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port, - struct dsa_mall_mirror_tc_entry *mirror, - bool ingress, struct netlink_ext_ack *extack) +int ksz9477_port_mirror_add(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress, struct netlink_ext_ack *extack) { - struct ksz_device *dev = ds->priv; u8 data; int p; @@ -872,10 +826,9 @@ static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port, return 0; } -static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port, - struct dsa_mall_mirror_tc_entry *mirror) +void ksz9477_port_mirror_del(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror) { - struct ksz_device *dev = ds->priv; bool in_use = false; u8 data; int p; @@ -1097,16 +1050,17 @@ static void ksz9477_phy_errata_setup(struct ksz_device *dev, int port) ksz9477_port_mmd_write(dev, port, 0x1c, 0x20, 0xeeee); } -static void ksz9477_get_caps(struct dsa_switch *ds, int port, - struct phylink_config *config) +void ksz9477_get_caps(struct ksz_device *dev, int port, + struct phylink_config *config) { - ksz_phylink_get_caps(ds, port, config); + config->mac_capabilities = MAC_10 | MAC_100 | MAC_ASYM_PAUSE | + MAC_SYM_PAUSE; - config->mac_capabilities = MAC_10 | MAC_100 | MAC_1000FD | - MAC_ASYM_PAUSE | MAC_SYM_PAUSE; + if (dev->features & GBIT_SUPPORT) + config->mac_capabilities |= MAC_1000FD; } -static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) +void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) { struct ksz_port *p = &dev->ports[port]; struct dsa_switch *ds = dev->ds; @@ -1203,7 +1157,7 @@ static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) ksz_pread16(dev, port, REG_PORT_PHY_INT_ENABLE, &data16); } -static void ksz9477_config_cpu_port(struct dsa_switch *ds) +void ksz9477_config_cpu_port(struct dsa_switch *ds) { struct ksz_device *dev = ds->priv; struct ksz_port *p; @@ -1260,7 +1214,7 @@ static void ksz9477_config_cpu_port(struct dsa_switch *ds) continue; p = &dev->ports[i]; - ksz9477_port_stp_state_set(ds, i, BR_STATE_DISABLED); + ksz_port_stp_state_set(ds, i, BR_STATE_DISABLED); p->on = 1; if (i < dev->phy_port_cnt) p->phy = 1; @@ -1273,22 +1227,41 @@ static void ksz9477_config_cpu_port(struct dsa_switch *ds) } } -static int ksz9477_setup(struct dsa_switch *ds) +int ksz9477_enable_stp_addr(struct ksz_device *dev) { - struct ksz_device *dev = ds->priv; - int ret = 0; + u32 data; + int ret; - dev->vlan_cache = devm_kcalloc(dev->dev, sizeof(struct vlan_table), - dev->info->num_vlans, GFP_KERNEL); - if (!dev->vlan_cache) - return -ENOMEM; + /* Enable Reserved multicast table */ + ksz_cfg(dev, REG_SW_LUE_CTRL_0, SW_RESV_MCAST_ENABLE, true); - ret = ksz9477_reset_switch(dev); - if (ret) { - dev_err(ds->dev, "failed to reset switch\n"); + /* Set the Override bit for forwarding BPDU packet to CPU */ + ret = ksz_write32(dev, REG_SW_ALU_VAL_B, + ALU_V_OVERRIDE | BIT(dev->cpu_port)); + if (ret < 0) + return ret; + + data = ALU_STAT_START | ALU_RESV_MCAST_ADDR; + + ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); + if (ret < 0) + return ret; + + /* wait to be finished */ + ret = ksz9477_wait_alu_sta_ready(dev); + if (ret < 0) { + dev_err(dev->dev, "Failed to update Reserved Multicast table\n"); return ret; } + return 0; +} + +int ksz9477_setup(struct dsa_switch *ds) +{ + struct ksz_device *dev = ds->priv; + int ret = 0; + /* Required for port partitioning. */ ksz9477_cfg32(dev, REG_SW_QM_CTRL__4, UNICAST_VLAN_BOUNDARY, true); @@ -1305,69 +1278,27 @@ static int ksz9477_setup(struct dsa_switch *ds) if (ret) return ret; - ksz9477_config_cpu_port(ds); - - ksz_cfg(dev, REG_SW_MAC_CTRL_1, MULTICAST_STORM_DISABLE, true); - /* queue based egress rate limit */ ksz_cfg(dev, REG_SW_MAC_CTRL_5, SW_OUT_RATE_LIMIT_QUEUE_BASED, true); /* enable global MIB counter freeze function */ ksz_cfg(dev, REG_SW_MAC_CTRL_6, SW_MIB_COUNTER_FREEZE, true); - /* start switch */ - ksz_cfg(dev, REG_SW_OPERATION, SW_START, true); - - ksz_init_mib_timer(dev); - - ds->configure_vlan_while_not_filtering = false; - return 0; } -static const struct dsa_switch_ops ksz9477_switch_ops = { - .get_tag_protocol = ksz9477_get_tag_protocol, - .setup = ksz9477_setup, - .phy_read = ksz9477_phy_read16, - .phy_write = ksz9477_phy_write16, - .phylink_mac_link_down = ksz_mac_link_down, - .phylink_get_caps = ksz9477_get_caps, - .port_enable = ksz_enable_port, - .get_strings = ksz_get_strings, - .get_ethtool_stats = ksz_get_ethtool_stats, - .get_sset_count = ksz_sset_count, - .port_bridge_join = ksz_port_bridge_join, - .port_bridge_leave = ksz_port_bridge_leave, - .port_stp_state_set = ksz9477_port_stp_state_set, - .port_fast_age = ksz_port_fast_age, - .port_vlan_filtering = ksz9477_port_vlan_filtering, - .port_vlan_add = ksz9477_port_vlan_add, - .port_vlan_del = ksz9477_port_vlan_del, - .port_fdb_dump = ksz9477_port_fdb_dump, - .port_fdb_add = ksz9477_port_fdb_add, - .port_fdb_del = ksz9477_port_fdb_del, - .port_mdb_add = ksz9477_port_mdb_add, - .port_mdb_del = ksz9477_port_mdb_del, - .port_mirror_add = ksz9477_port_mirror_add, - .port_mirror_del = ksz9477_port_mirror_del, - .get_stats64 = ksz_get_stats64, - .port_change_mtu = ksz9477_change_mtu, - .port_max_mtu = ksz9477_max_mtu, -}; - -static u32 ksz9477_get_port_addr(int port, int offset) +u32 ksz9477_get_port_addr(int port, int offset) { return PORT_CTRL_ADDR(port, offset); } -static int ksz9477_switch_detect(struct ksz_device *dev) +int ksz9477_switch_init(struct ksz_device *dev) { u8 data8; - u8 id_hi; - u8 id_lo; - u32 id32; int ret; + dev->port_mask = (1 << dev->info->port_cnt) - 1; + /* turn off SPI DO Edge select */ ret = ksz_read8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, &data8); if (ret) @@ -1378,10 +1309,6 @@ static int ksz9477_switch_detect(struct ksz_device *dev) if (ret) return ret; - /* read chip id */ - ret = ksz_read32(dev, REG_CHIP_ID0__1, &id32); - if (ret) - return ret; ret = ksz_read8(dev, REG_GLOBAL_OPTIONS, &data8); if (ret) return ret; @@ -1392,12 +1319,7 @@ static int ksz9477_switch_detect(struct ksz_device *dev) /* Default capability is gigabit capable. */ dev->features = GBIT_SUPPORT; - dev_dbg(dev->dev, "Switch detect: ID=%08x%02x\n", id32, data8); - id_hi = (u8)(id32 >> 16); - id_lo = (u8)(id32 >> 8); - if ((id_lo & 0xf) == 3) { - /* Chip is from KSZ9893 design. */ - dev_info(dev->dev, "Found KSZ9893\n"); + if (dev->chip_id == KSZ9893_CHIP_ID) { dev->features |= IS_9893; /* Chip does not support gigabit. */ @@ -1405,7 +1327,6 @@ static int ksz9477_switch_detect(struct ksz_device *dev) dev->features &= ~GBIT_SUPPORT; dev->phy_port_cnt = 2; } else { - dev_info(dev->dev, "Found KSZ9477 or compatible\n"); /* Chip uses new XMII register definitions. */ dev->features |= NEW_XMII; @@ -1414,72 +1335,14 @@ static int ksz9477_switch_detect(struct ksz_device *dev) dev->features &= ~GBIT_SUPPORT; } - /* Change chip id to known ones so it can be matched against them. */ - id32 = (id_hi << 16) | (id_lo << 8); - - dev->chip_id = id32; - return 0; } -static int ksz9477_switch_init(struct ksz_device *dev) -{ - dev->ds->ops = &ksz9477_switch_ops; - - dev->port_mask = (1 << dev->info->port_cnt) - 1; - - return 0; -} - -static void ksz9477_switch_exit(struct ksz_device *dev) +void ksz9477_switch_exit(struct ksz_device *dev) { ksz9477_reset_switch(dev); } -static const struct ksz_dev_ops ksz9477_dev_ops = { - .get_port_addr = ksz9477_get_port_addr, - .cfg_port_member = ksz9477_cfg_port_member, - .flush_dyn_mac_table = ksz9477_flush_dyn_mac_table, - .port_setup = ksz9477_port_setup, - .r_mib_cnt = ksz9477_r_mib_cnt, - .r_mib_pkt = ksz9477_r_mib_pkt, - .r_mib_stat64 = ksz_r_mib_stats64, - .freeze_mib = ksz9477_freeze_mib, - .port_init_cnt = ksz9477_port_init_cnt, - .shutdown = ksz9477_reset_switch, - .detect = ksz9477_switch_detect, - .init = ksz9477_switch_init, - .exit = ksz9477_switch_exit, -}; - -int ksz9477_switch_register(struct ksz_device *dev) -{ - int ret, i; - struct phy_device *phydev; - - ret = ksz_switch_register(dev, &ksz9477_dev_ops); - if (ret) - return ret; - - for (i = 0; i < dev->phy_port_cnt; ++i) { - if (!dsa_is_user_port(dev->ds, i)) - continue; - - phydev = dsa_to_port(dev->ds, i)->slave->phydev; - - /* The MAC actually cannot run in 1000 half-duplex mode. */ - phy_remove_link_mode(phydev, - ETHTOOL_LINK_MODE_1000baseT_Half_BIT); - - /* PHY does not support gigabit. */ - if (!(dev->features & GBIT_SUPPORT)) - phy_remove_link_mode(phydev, - ETHTOOL_LINK_MODE_1000baseT_Full_BIT); - } - return ret; -} -EXPORT_SYMBOL(ksz9477_switch_register); - MODULE_AUTHOR("Woojung Huh <Woojung.Huh@microchip.com>"); MODULE_DESCRIPTION("Microchip KSZ9477 Series Switch DSA Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/microchip/ksz9477.h b/drivers/net/dsa/microchip/ksz9477.h new file mode 100644 index 000000000000..cd278b307b3c --- /dev/null +++ b/drivers/net/dsa/microchip/ksz9477.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Microchip KSZ9477 series Header file + * + * Copyright (C) 2017-2022 Microchip Technology Inc. + */ + +#ifndef __KSZ9477_H +#define __KSZ9477_H + +#include <net/dsa.h> +#include "ksz_common.h" + +int ksz9477_setup(struct dsa_switch *ds); +u32 ksz9477_get_port_addr(int port, int offset); +void ksz9477_cfg_port_member(struct ksz_device *dev, int port, u8 member); +void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port); +void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port); +void ksz9477_r_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 *data); +void ksz9477_w_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 val); +void ksz9477_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt); +void ksz9477_r_mib_pkt(struct ksz_device *dev, int port, u16 addr, + u64 *dropped, u64 *cnt); +void ksz9477_freeze_mib(struct ksz_device *dev, int port, bool freeze); +void ksz9477_port_init_cnt(struct ksz_device *dev, int port); +int ksz9477_port_vlan_filtering(struct ksz_device *dev, int port, + bool flag, struct netlink_ext_ack *extack); +int ksz9477_port_vlan_add(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack); +int ksz9477_port_vlan_del(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan); +int ksz9477_port_mirror_add(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress, struct netlink_ext_ack *extack); +void ksz9477_port_mirror_del(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror); +int ksz9477_get_stp_reg(void); +void ksz9477_get_caps(struct ksz_device *dev, int port, + struct phylink_config *config); +int ksz9477_fdb_dump(struct ksz_device *dev, int port, + dsa_fdb_dump_cb_t *cb, void *data); +int ksz9477_fdb_add(struct ksz_device *dev, int port, + const unsigned char *addr, u16 vid, struct dsa_db db); +int ksz9477_fdb_del(struct ksz_device *dev, int port, + const unsigned char *addr, u16 vid, struct dsa_db db); +int ksz9477_mdb_add(struct ksz_device *dev, int port, + const struct switchdev_obj_port_mdb *mdb, struct dsa_db db); +int ksz9477_mdb_del(struct ksz_device *dev, int port, + const struct switchdev_obj_port_mdb *mdb, struct dsa_db db); +int ksz9477_change_mtu(struct ksz_device *dev, int port, int mtu); +int ksz9477_max_mtu(struct ksz_device *dev, int port); +void ksz9477_config_cpu_port(struct dsa_switch *ds); +int ksz9477_enable_stp_addr(struct ksz_device *dev); +int ksz9477_reset_switch(struct ksz_device *dev); +int ksz9477_dsa_init(struct ksz_device *dev); +int ksz9477_switch_init(struct ksz_device *dev); +void ksz9477_switch_exit(struct ksz_device *dev); + +#endif diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c index faa3163c86b0..99966514d444 100644 --- a/drivers/net/dsa/microchip/ksz9477_i2c.c +++ b/drivers/net/dsa/microchip/ksz9477_i2c.c @@ -41,7 +41,7 @@ static int ksz9477_i2c_probe(struct i2c_client *i2c, if (i2c->dev.platform_data) dev->pdata = i2c->dev.platform_data; - ret = ksz9477_switch_register(dev); + ret = ksz_switch_register(dev); /* Main DSA driver may not be started yet. */ if (ret) @@ -71,8 +71,8 @@ static void ksz9477_i2c_shutdown(struct i2c_client *i2c) if (!dev) return; - if (dev->dev_ops->shutdown) - dev->dev_ops->shutdown(dev); + if (dev->dev_ops->reset) + dev->dev_ops->reset(dev); dsa_switch_shutdown(dev->ds); diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index 7a2c8d4767af..c0ad83753b13 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -25,7 +25,6 @@ #define REG_CHIP_ID2__1 0x0002 -#define CHIP_ID_63 0x63 #define CHIP_ID_66 0x66 #define CHIP_ID_67 0x67 #define CHIP_ID_77 0x77 @@ -166,7 +165,6 @@ #define SW_DOUBLE_TAG BIT(7) #define SW_RESET BIT(1) -#define SW_START BIT(0) #define REG_SW_MAC_ADDR_0 0x0302 #define REG_SW_MAC_ADDR_1 0x0303 @@ -266,7 +264,6 @@ #define REG_SW_MAC_CTRL_1 0x0331 -#define MULTICAST_STORM_DISABLE BIT(6) #define SW_BACK_PRESSURE BIT(5) #define FAIR_FLOW_CTRL BIT(4) #define NO_EXC_COLLISION_DROP BIT(3) @@ -277,13 +274,9 @@ #define REG_SW_MAC_CTRL_2 0x0332 #define SW_REPLACE_VID BIT(3) -#define BROADCAST_STORM_RATE_HI 0x07 #define REG_SW_MAC_CTRL_3 0x0333 -#define BROADCAST_STORM_RATE_LO 0xFF -#define BROADCAST_STORM_RATE 0x07FF - #define REG_SW_MAC_CTRL_4 0x0334 #define SW_PASS_PAUSE BIT(3) @@ -1653,12 +1646,6 @@ #define PTP_TRIG_UNIT_M (BIT(MAX_TRIG_UNIT) - 1) #define PTP_TS_UNIT_M (BIT(MAX_TIMESTAMP_UNIT) - 1) -/* Driver set switch broadcast storm protection at 10% rate. */ -#define BROADCAST_STORM_PROT_RATE 10 - -/* 148,800 frames * 67 ms / 100 */ -#define BROADCAST_STORM_VALUE 9969 - #define KSZ9477_MAX_FRAME_SIZE 9000 #endif /* KSZ9477_REGS_H */ diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c deleted file mode 100644 index 1bc8b0cbe458..000000000000 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ /dev/null @@ -1,150 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Microchip KSZ9477 series register access through SPI - * - * Copyright (C) 2017-2019 Microchip Technology Inc. - */ - -#include <asm/unaligned.h> - -#include <linux/delay.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/regmap.h> -#include <linux/spi/spi.h> - -#include "ksz_common.h" - -#define SPI_ADDR_SHIFT 24 -#define SPI_ADDR_ALIGN 3 -#define SPI_TURNAROUND_SHIFT 5 - -KSZ_REGMAP_TABLE(ksz9477, 32, SPI_ADDR_SHIFT, - SPI_TURNAROUND_SHIFT, SPI_ADDR_ALIGN); - -static int ksz9477_spi_probe(struct spi_device *spi) -{ - struct regmap_config rc; - struct ksz_device *dev; - int i, ret; - - dev = ksz_switch_alloc(&spi->dev, spi); - if (!dev) - return -ENOMEM; - - for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) { - rc = ksz9477_regmap_config[i]; - rc.lock_arg = &dev->regmap_mutex; - dev->regmap[i] = devm_regmap_init_spi(spi, &rc); - if (IS_ERR(dev->regmap[i])) { - ret = PTR_ERR(dev->regmap[i]); - dev_err(&spi->dev, - "Failed to initialize regmap%i: %d\n", - ksz9477_regmap_config[i].val_bits, ret); - return ret; - } - } - - if (spi->dev.platform_data) - dev->pdata = spi->dev.platform_data; - - /* setup spi */ - spi->mode = SPI_MODE_3; - ret = spi_setup(spi); - if (ret) - return ret; - - ret = ksz9477_switch_register(dev); - - /* Main DSA driver may not be started yet. */ - if (ret) - return ret; - - spi_set_drvdata(spi, dev); - - return 0; -} - -static void ksz9477_spi_remove(struct spi_device *spi) -{ - struct ksz_device *dev = spi_get_drvdata(spi); - - if (dev) - ksz_switch_remove(dev); - - spi_set_drvdata(spi, NULL); -} - -static void ksz9477_spi_shutdown(struct spi_device *spi) -{ - struct ksz_device *dev = spi_get_drvdata(spi); - - if (dev) - dsa_switch_shutdown(dev->ds); - - spi_set_drvdata(spi, NULL); -} - -static const struct of_device_id ksz9477_dt_ids[] = { - { - .compatible = "microchip,ksz9477", - .data = &ksz_switch_chips[KSZ9477] - }, - { - .compatible = "microchip,ksz9897", - .data = &ksz_switch_chips[KSZ9897] - }, - { - .compatible = "microchip,ksz9893", - .data = &ksz_switch_chips[KSZ9893] - }, - { - .compatible = "microchip,ksz9563", - .data = &ksz_switch_chips[KSZ9893] - }, - { - .compatible = "microchip,ksz8563", - .data = &ksz_switch_chips[KSZ9893] - }, - { - .compatible = "microchip,ksz9567", - .data = &ksz_switch_chips[KSZ9567] - }, - {}, -}; -MODULE_DEVICE_TABLE(of, ksz9477_dt_ids); - -static const struct spi_device_id ksz9477_spi_ids[] = { - { "ksz9477" }, - { "ksz9897" }, - { "ksz9893" }, - { "ksz9563" }, - { "ksz8563" }, - { "ksz9567" }, - { }, -}; -MODULE_DEVICE_TABLE(spi, ksz9477_spi_ids); - -static struct spi_driver ksz9477_spi_driver = { - .driver = { - .name = "ksz9477-switch", - .owner = THIS_MODULE, - .of_match_table = of_match_ptr(ksz9477_dt_ids), - }, - .id_table = ksz9477_spi_ids, - .probe = ksz9477_spi_probe, - .remove = ksz9477_spi_remove, - .shutdown = ksz9477_spi_shutdown, -}; - -module_spi_driver(ksz9477_spi_driver); - -MODULE_ALIAS("spi:ksz9477"); -MODULE_ALIAS("spi:ksz9897"); -MODULE_ALIAS("spi:ksz9893"); -MODULE_ALIAS("spi:ksz9563"); -MODULE_ALIAS("spi:ksz8563"); -MODULE_ALIAS("spi:ksz9567"); -MODULE_AUTHOR("Woojung Huh <Woojung.Huh@microchip.com>"); -MODULE_DESCRIPTION("Microchip KSZ9477 Series Switch SPI access Driver"); -MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 9ca8c8d7740f..59582eb3bcaf 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -16,10 +16,13 @@ #include <linux/if_bridge.h> #include <linux/of_device.h> #include <linux/of_net.h> +#include <linux/micrel_phy.h> #include <net/dsa.h> #include <net/switchdev.h> #include "ksz_common.h" +#include "ksz8.h" +#include "ksz9477.h" #define MIB_COUNTER_NUM 0x20 @@ -138,6 +141,66 @@ static const struct ksz_mib_names ksz9477_mib_names[] = { { 0x83, "tx_discards" }, }; +static const struct ksz_dev_ops ksz8_dev_ops = { + .setup = ksz8_setup, + .get_port_addr = ksz8_get_port_addr, + .cfg_port_member = ksz8_cfg_port_member, + .flush_dyn_mac_table = ksz8_flush_dyn_mac_table, + .port_setup = ksz8_port_setup, + .r_phy = ksz8_r_phy, + .w_phy = ksz8_w_phy, + .r_mib_pkt = ksz8_r_mib_pkt, + .freeze_mib = ksz8_freeze_mib, + .port_init_cnt = ksz8_port_init_cnt, + .fdb_dump = ksz8_fdb_dump, + .mdb_add = ksz8_mdb_add, + .mdb_del = ksz8_mdb_del, + .vlan_filtering = ksz8_port_vlan_filtering, + .vlan_add = ksz8_port_vlan_add, + .vlan_del = ksz8_port_vlan_del, + .mirror_add = ksz8_port_mirror_add, + .mirror_del = ksz8_port_mirror_del, + .get_caps = ksz8_get_caps, + .config_cpu_port = ksz8_config_cpu_port, + .enable_stp_addr = ksz8_enable_stp_addr, + .reset = ksz8_reset_switch, + .init = ksz8_switch_init, + .exit = ksz8_switch_exit, +}; + +static const struct ksz_dev_ops ksz9477_dev_ops = { + .setup = ksz9477_setup, + .get_port_addr = ksz9477_get_port_addr, + .cfg_port_member = ksz9477_cfg_port_member, + .flush_dyn_mac_table = ksz9477_flush_dyn_mac_table, + .port_setup = ksz9477_port_setup, + .r_phy = ksz9477_r_phy, + .w_phy = ksz9477_w_phy, + .r_mib_cnt = ksz9477_r_mib_cnt, + .r_mib_pkt = ksz9477_r_mib_pkt, + .r_mib_stat64 = ksz_r_mib_stats64, + .freeze_mib = ksz9477_freeze_mib, + .port_init_cnt = ksz9477_port_init_cnt, + .vlan_filtering = ksz9477_port_vlan_filtering, + .vlan_add = ksz9477_port_vlan_add, + .vlan_del = ksz9477_port_vlan_del, + .mirror_add = ksz9477_port_mirror_add, + .mirror_del = ksz9477_port_mirror_del, + .get_caps = ksz9477_get_caps, + .fdb_dump = ksz9477_fdb_dump, + .fdb_add = ksz9477_fdb_add, + .fdb_del = ksz9477_fdb_del, + .mdb_add = ksz9477_mdb_add, + .mdb_del = ksz9477_mdb_del, + .change_mtu = ksz9477_change_mtu, + .max_mtu = ksz9477_max_mtu, + .config_cpu_port = ksz9477_config_cpu_port, + .enable_stp_addr = ksz9477_enable_stp_addr, + .reset = ksz9477_reset_switch, + .init = ksz9477_switch_init, + .exit = ksz9477_switch_exit, +}; + const struct ksz_chip_data ksz_switch_chips[] = { [KSZ8795] = { .chip_id = KSZ8795_CHIP_ID, @@ -147,10 +210,15 @@ const struct ksz_chip_data ksz_switch_chips[] = { .num_statics = 8, .cpu_ports = 0x10, /* can be configured as cpu port */ .port_cnt = 5, /* total cpu and user ports */ + .ops = &ksz8_dev_ops, .ksz87xx_eee_link_erratum = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, + .stp_ctrl_reg = 0x02, + .broadcast_ctrl_reg = 0x06, + .multicast_ctrl_reg = 0x04, + .start_ctrl_reg = 0x01, .supports_mii = {false, false, false, false, true}, .supports_rmii = {false, false, false, false, true}, .supports_rgmii = {false, false, false, false, true}, @@ -179,10 +247,15 @@ const struct ksz_chip_data ksz_switch_chips[] = { .num_statics = 8, .cpu_ports = 0x10, /* can be configured as cpu port */ .port_cnt = 5, /* total cpu and user ports */ + .ops = &ksz8_dev_ops, .ksz87xx_eee_link_erratum = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, + .stp_ctrl_reg = 0x02, + .broadcast_ctrl_reg = 0x06, + .multicast_ctrl_reg = 0x04, + .start_ctrl_reg = 0x01, .supports_mii = {false, false, false, false, true}, .supports_rmii = {false, false, false, false, true}, .supports_rgmii = {false, false, false, false, true}, @@ -197,10 +270,15 @@ const struct ksz_chip_data ksz_switch_chips[] = { .num_statics = 8, .cpu_ports = 0x10, /* can be configured as cpu port */ .port_cnt = 5, /* total cpu and user ports */ + .ops = &ksz8_dev_ops, .ksz87xx_eee_link_erratum = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, + .stp_ctrl_reg = 0x02, + .broadcast_ctrl_reg = 0x06, + .multicast_ctrl_reg = 0x04, + .start_ctrl_reg = 0x01, .supports_mii = {false, false, false, false, true}, .supports_rmii = {false, false, false, false, true}, .supports_rgmii = {false, false, false, false, true}, @@ -215,9 +293,14 @@ const struct ksz_chip_data ksz_switch_chips[] = { .num_statics = 8, .cpu_ports = 0x4, /* can be configured as cpu port */ .port_cnt = 3, + .ops = &ksz8_dev_ops, .mib_names = ksz88xx_mib_names, .mib_cnt = ARRAY_SIZE(ksz88xx_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, + .stp_ctrl_reg = 0x02, + .broadcast_ctrl_reg = 0x06, + .multicast_ctrl_reg = 0x04, + .start_ctrl_reg = 0x01, .supports_mii = {false, false, true}, .supports_rmii = {false, false, true}, .internal_phy = {true, true, false}, @@ -231,10 +314,15 @@ const struct ksz_chip_data ksz_switch_chips[] = { .num_statics = 16, .cpu_ports = 0x7F, /* can be configured as cpu port */ .port_cnt = 7, /* total physical port count */ + .ops = &ksz9477_dev_ops, .phy_errata_9477 = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, + .stp_ctrl_reg = 0x0B04, + .broadcast_ctrl_reg = 0x0332, + .multicast_ctrl_reg = 0x0331, + .start_ctrl_reg = 0x0300, .supports_mii = {false, false, false, false, false, true, false}, .supports_rmii = {false, false, false, false, @@ -253,10 +341,15 @@ const struct ksz_chip_data ksz_switch_chips[] = { .num_statics = 16, .cpu_ports = 0x7F, /* can be configured as cpu port */ .port_cnt = 7, /* total physical port count */ + .ops = &ksz9477_dev_ops, .phy_errata_9477 = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, + .stp_ctrl_reg = 0x0B04, + .broadcast_ctrl_reg = 0x0332, + .multicast_ctrl_reg = 0x0331, + .start_ctrl_reg = 0x0300, .supports_mii = {false, false, false, false, false, true, true}, .supports_rmii = {false, false, false, false, @@ -275,9 +368,14 @@ const struct ksz_chip_data ksz_switch_chips[] = { .num_statics = 16, .cpu_ports = 0x07, /* can be configured as cpu port */ .port_cnt = 3, /* total port count */ + .ops = &ksz9477_dev_ops, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, + .stp_ctrl_reg = 0x0B04, + .broadcast_ctrl_reg = 0x0332, + .multicast_ctrl_reg = 0x0331, + .start_ctrl_reg = 0x0300, .supports_mii = {false, false, true}, .supports_rmii = {false, false, true}, .supports_rgmii = {false, false, true}, @@ -292,10 +390,15 @@ const struct ksz_chip_data ksz_switch_chips[] = { .num_statics = 16, .cpu_ports = 0x7F, /* can be configured as cpu port */ .port_cnt = 7, /* total physical port count */ + .ops = &ksz9477_dev_ops, .phy_errata_9477 = true, .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, + .stp_ctrl_reg = 0x0B04, + .broadcast_ctrl_reg = 0x0332, + .multicast_ctrl_reg = 0x0331, + .start_ctrl_reg = 0x0300, .supports_mii = {false, false, false, false, false, true, true}, .supports_rmii = {false, false, false, false, @@ -317,6 +420,10 @@ const struct ksz_chip_data ksz_switch_chips[] = { .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, + .stp_ctrl_reg = 0x0B04, + .broadcast_ctrl_reg = 0x0332, + .multicast_ctrl_reg = 0x0331, + .start_ctrl_reg = 0x0300, .supports_mii = {false, false, false, false, true}, .supports_rmii = {false, false, false, false, true}, .supports_rgmii = {false, false, false, false, true}, @@ -334,6 +441,10 @@ const struct ksz_chip_data ksz_switch_chips[] = { .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, + .stp_ctrl_reg = 0x0B04, + .broadcast_ctrl_reg = 0x0332, + .multicast_ctrl_reg = 0x0331, + .start_ctrl_reg = 0x0300, .supports_mii = {false, false, false, false, true, true}, .supports_rmii = {false, false, false, false, true, true}, .supports_rgmii = {false, false, false, false, true, true}, @@ -351,6 +462,10 @@ const struct ksz_chip_data ksz_switch_chips[] = { .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, + .stp_ctrl_reg = 0x0B04, + .broadcast_ctrl_reg = 0x0332, + .multicast_ctrl_reg = 0x0331, + .start_ctrl_reg = 0x0300, .supports_mii = {false, false, false, false, true, true, false, false}, .supports_rmii = {false, false, false, false, @@ -372,6 +487,10 @@ const struct ksz_chip_data ksz_switch_chips[] = { .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, + .stp_ctrl_reg = 0x0B04, + .broadcast_ctrl_reg = 0x0332, + .multicast_ctrl_reg = 0x0331, + .start_ctrl_reg = 0x0300, .supports_mii = {false, false, false, false, true, true, false, false}, .supports_rmii = {false, false, false, false, @@ -393,6 +512,10 @@ const struct ksz_chip_data ksz_switch_chips[] = { .mib_names = ksz9477_mib_names, .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), .reg_mib_cnt = MIB_COUNTER_NUM, + .stp_ctrl_reg = 0x0B04, + .broadcast_ctrl_reg = 0x0332, + .multicast_ctrl_reg = 0x0331, + .start_ctrl_reg = 0x0300, .supports_mii = {false, false, false, false, true, true, false, false}, .supports_rmii = {false, false, false, false, @@ -436,8 +559,8 @@ static int ksz_check_device_id(struct ksz_device *dev) return 0; } -void ksz_phylink_get_caps(struct dsa_switch *ds, int port, - struct phylink_config *config) +static void ksz_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) { struct ksz_device *dev = ds->priv; @@ -456,8 +579,10 @@ void ksz_phylink_get_caps(struct dsa_switch *ds, int port, if (dev->info->internal_phy[port]) __set_bit(PHY_INTERFACE_MODE_INTERNAL, config->supported_interfaces); + + if (dev->dev_ops->get_caps) + dev->dev_ops->get_caps(dev, port, config); } -EXPORT_SYMBOL_GPL(ksz_phylink_get_caps); void ksz_r_mib_stats64(struct ksz_device *dev, int port) { @@ -500,10 +625,9 @@ void ksz_r_mib_stats64(struct ksz_device *dev, int port) spin_unlock(&mib->stats64_lock); } -EXPORT_SYMBOL_GPL(ksz_r_mib_stats64); -void ksz_get_stats64(struct dsa_switch *ds, int port, - struct rtnl_link_stats64 *s) +static void ksz_get_stats64(struct dsa_switch *ds, int port, + struct rtnl_link_stats64 *s) { struct ksz_device *dev = ds->priv; struct ksz_port_mib *mib; @@ -514,10 +638,9 @@ void ksz_get_stats64(struct dsa_switch *ds, int port, memcpy(s, &mib->stats64, sizeof(*s)); spin_unlock(&mib->stats64_lock); } -EXPORT_SYMBOL_GPL(ksz_get_stats64); -void ksz_get_strings(struct dsa_switch *ds, int port, - u32 stringset, uint8_t *buf) +static void ksz_get_strings(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *buf) { struct ksz_device *dev = ds->priv; int i; @@ -530,9 +653,8 @@ void ksz_get_strings(struct dsa_switch *ds, int port, dev->info->mib_names[i].string, ETH_GSTRING_LEN); } } -EXPORT_SYMBOL_GPL(ksz_get_strings); -void ksz_update_port_member(struct ksz_device *dev, int port) +static void ksz_update_port_member(struct ksz_device *dev, int port) { struct ksz_port *p = &dev->ports[port]; struct dsa_switch *ds = dev->ds; @@ -589,7 +711,52 @@ void ksz_update_port_member(struct ksz_device *dev, int port) dev->dev_ops->cfg_port_member(dev, port, port_member | cpu_port); } -EXPORT_SYMBOL_GPL(ksz_update_port_member); + +static int ksz_setup(struct dsa_switch *ds) +{ + struct ksz_device *dev = ds->priv; + int ret; + + dev->vlan_cache = devm_kcalloc(dev->dev, sizeof(struct vlan_table), + dev->info->num_vlans, GFP_KERNEL); + if (!dev->vlan_cache) + return -ENOMEM; + + ret = dev->dev_ops->reset(dev); + if (ret) { + dev_err(ds->dev, "failed to reset switch\n"); + return ret; + } + + /* set broadcast storm protection 10% rate */ + regmap_update_bits(dev->regmap[1], dev->info->broadcast_ctrl_reg, + BROADCAST_STORM_RATE, + (BROADCAST_STORM_VALUE * + BROADCAST_STORM_PROT_RATE) / 100); + + dev->dev_ops->config_cpu_port(ds); + + dev->dev_ops->enable_stp_addr(dev); + + regmap_update_bits(dev->regmap[0], dev->info->multicast_ctrl_reg, + MULTICAST_STORM_DISABLE, MULTICAST_STORM_DISABLE); + + ksz_init_mib_timer(dev); + + ds->configure_vlan_while_not_filtering = false; + + if (dev->dev_ops->setup) { + ret = dev->dev_ops->setup(ds); + if (ret) + return ret; + } + + /* start switch */ + regmap_update_bits(dev->regmap[0], dev->info->start_ctrl_reg, + SW_START, SW_START); + + return 0; +} static void port_r_cnt(struct ksz_device *dev, int port) { @@ -667,9 +834,8 @@ void ksz_init_mib_timer(struct ksz_device *dev) memset(mib->counters, 0, dev->info->mib_cnt * sizeof(u64)); } } -EXPORT_SYMBOL_GPL(ksz_init_mib_timer); -int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg) +static int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg) { struct ksz_device *dev = ds->priv; u16 val = 0xffff; @@ -678,9 +844,8 @@ int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg) return val; } -EXPORT_SYMBOL_GPL(ksz_phy_read16); -int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val) +static int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val) { struct ksz_device *dev = ds->priv; @@ -688,10 +853,25 @@ int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val) return 0; } -EXPORT_SYMBOL_GPL(ksz_phy_write16); -void ksz_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode, - phy_interface_t interface) +static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) +{ + struct ksz_device *dev = ds->priv; + + if (dev->chip_id == KSZ8830_CHIP_ID) { + /* Silicon Errata Sheet (DS80000830A): + * Port 1 does not work with LinkMD Cable-Testing. + * Port 1 does not respond to received PAUSE control frames. + */ + if (!port) + return MICREL_KSZ8_P1_ERRATA; + } + + return 0; +} + +static void ksz_mac_link_down(struct dsa_switch *ds, int port, + unsigned int mode, phy_interface_t interface) { struct ksz_device *dev = ds->priv; struct ksz_port *p = &dev->ports[port]; @@ -702,9 +882,8 @@ void ksz_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode, if (dev->mib_read_interval) schedule_delayed_work(&dev->mib_read, 0); } -EXPORT_SYMBOL_GPL(ksz_mac_link_down); -int ksz_sset_count(struct dsa_switch *ds, int port, int sset) +static int ksz_sset_count(struct dsa_switch *ds, int port, int sset) { struct ksz_device *dev = ds->priv; @@ -713,9 +892,9 @@ int ksz_sset_count(struct dsa_switch *ds, int port, int sset) return dev->info->mib_cnt; } -EXPORT_SYMBOL_GPL(ksz_sset_count); -void ksz_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *buf) +static void ksz_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *buf) { const struct dsa_port *dp = dsa_to_port(ds, port); struct ksz_device *dev = ds->priv; @@ -731,12 +910,11 @@ void ksz_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *buf) memcpy(buf, mib->counters, dev->info->mib_cnt * sizeof(u64)); mutex_unlock(&mib->cnt_mutex); } -EXPORT_SYMBOL_GPL(ksz_get_ethtool_stats); -int ksz_port_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge, - bool *tx_fwd_offload, - struct netlink_ext_ack *extack) +static int ksz_port_bridge_join(struct dsa_switch *ds, int port, + struct dsa_bridge bridge, + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { /* port_stp_state_set() will be called after to put the port in * appropriate state so there is no need to do anything. @@ -744,135 +922,83 @@ int ksz_port_bridge_join(struct dsa_switch *ds, int port, return 0; } -EXPORT_SYMBOL_GPL(ksz_port_bridge_join); -void ksz_port_bridge_leave(struct dsa_switch *ds, int port, - struct dsa_bridge bridge) +static void ksz_port_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_bridge bridge) { /* port_stp_state_set() will be called after to put the port in * forwarding state so there is no need to do anything. */ } -EXPORT_SYMBOL_GPL(ksz_port_bridge_leave); -void ksz_port_fast_age(struct dsa_switch *ds, int port) +static void ksz_port_fast_age(struct dsa_switch *ds, int port) { struct ksz_device *dev = ds->priv; dev->dev_ops->flush_dyn_mac_table(dev, port); } -EXPORT_SYMBOL_GPL(ksz_port_fast_age); -int ksz_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, - void *data) +static int ksz_port_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db) { struct ksz_device *dev = ds->priv; - int ret = 0; - u16 i = 0; - u16 entries = 0; - u8 timestamp = 0; - u8 fid; - u8 member; - struct alu_struct alu; - - do { - alu.is_static = false; - ret = dev->dev_ops->r_dyn_mac_table(dev, i, alu.mac, &fid, - &member, ×tamp, - &entries); - if (!ret && (member & BIT(port))) { - ret = cb(alu.mac, alu.fid, alu.is_static, data); - if (ret) - break; - } - i++; - } while (i < entries); - if (i >= entries) - ret = 0; - return ret; + if (!dev->dev_ops->fdb_add) + return -EOPNOTSUPP; + + return dev->dev_ops->fdb_add(dev, port, addr, vid, db); } -EXPORT_SYMBOL_GPL(ksz_port_fdb_dump); -int ksz_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct dsa_db db) +static int ksz_port_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, + u16 vid, struct dsa_db db) { struct ksz_device *dev = ds->priv; - struct alu_struct alu; - int index; - int empty = 0; - - alu.port_forward = 0; - for (index = 0; index < dev->info->num_statics; index++) { - if (!dev->dev_ops->r_sta_mac_table(dev, index, &alu)) { - /* Found one already in static MAC table. */ - if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) && - alu.fid == mdb->vid) - break; - /* Remember the first empty entry. */ - } else if (!empty) { - empty = index + 1; - } - } - /* no available entry */ - if (index == dev->info->num_statics && !empty) - return -ENOSPC; + if (!dev->dev_ops->fdb_del) + return -EOPNOTSUPP; - /* add entry */ - if (index == dev->info->num_statics) { - index = empty - 1; - memset(&alu, 0, sizeof(alu)); - memcpy(alu.mac, mdb->addr, ETH_ALEN); - alu.is_static = true; - } - alu.port_forward |= BIT(port); - if (mdb->vid) { - alu.is_use_fid = true; + return dev->dev_ops->fdb_del(dev, port, addr, vid, db); +} - /* Need a way to map VID to FID. */ - alu.fid = mdb->vid; - } - dev->dev_ops->w_sta_mac_table(dev, index, &alu); +static int ksz_port_fdb_dump(struct dsa_switch *ds, int port, + dsa_fdb_dump_cb_t *cb, void *data) +{ + struct ksz_device *dev = ds->priv; - return 0; + if (!dev->dev_ops->fdb_dump) + return -EOPNOTSUPP; + + return dev->dev_ops->fdb_dump(dev, port, cb, data); } -EXPORT_SYMBOL_GPL(ksz_port_mdb_add); -int ksz_port_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct dsa_db db) +static int ksz_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) { struct ksz_device *dev = ds->priv; - struct alu_struct alu; - int index; - - for (index = 0; index < dev->info->num_statics; index++) { - if (!dev->dev_ops->r_sta_mac_table(dev, index, &alu)) { - /* Found one already in static MAC table. */ - if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) && - alu.fid == mdb->vid) - break; - } - } - /* no available entry */ - if (index == dev->info->num_statics) - goto exit; + if (!dev->dev_ops->mdb_add) + return -EOPNOTSUPP; - /* clear port */ - alu.port_forward &= ~BIT(port); - if (!alu.port_forward) - alu.is_static = false; - dev->dev_ops->w_sta_mac_table(dev, index, &alu); + return dev->dev_ops->mdb_add(dev, port, mdb, db); +} -exit: - return 0; +static int ksz_port_mdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db) +{ + struct ksz_device *dev = ds->priv; + + if (!dev->dev_ops->mdb_del) + return -EOPNOTSUPP; + + return dev->dev_ops->mdb_del(dev, port, mdb, db); } -EXPORT_SYMBOL_GPL(ksz_port_mdb_del); -int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) +static int ksz_enable_port(struct dsa_switch *ds, int port, + struct phy_device *phy) { struct ksz_device *dev = ds->priv; @@ -888,14 +1014,15 @@ int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) return 0; } -EXPORT_SYMBOL_GPL(ksz_enable_port); -void ksz_port_stp_state_set(struct dsa_switch *ds, int port, - u8 state, int reg) +void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) { struct ksz_device *dev = ds->priv; struct ksz_port *p; u8 data; + int reg; + + reg = dev->info->stp_ctrl_reg; ksz_pread8(dev, port, reg, &data); data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE); @@ -928,7 +1055,202 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, ksz_update_port_member(dev, port); } -EXPORT_SYMBOL_GPL(ksz_port_stp_state_set); + +static enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds, + int port, + enum dsa_tag_protocol mp) +{ + struct ksz_device *dev = ds->priv; + enum dsa_tag_protocol proto = DSA_TAG_PROTO_NONE; + + if (dev->chip_id == KSZ8795_CHIP_ID || + dev->chip_id == KSZ8794_CHIP_ID || + dev->chip_id == KSZ8765_CHIP_ID) + proto = DSA_TAG_PROTO_KSZ8795; + + if (dev->chip_id == KSZ8830_CHIP_ID || + dev->chip_id == KSZ9893_CHIP_ID) + proto = DSA_TAG_PROTO_KSZ9893; + + if (dev->chip_id == KSZ9477_CHIP_ID || + dev->chip_id == KSZ9897_CHIP_ID || + dev->chip_id == KSZ9567_CHIP_ID) + proto = DSA_TAG_PROTO_KSZ9477; + + return proto; +} + +static int ksz_port_vlan_filtering(struct dsa_switch *ds, int port, + bool flag, struct netlink_ext_ack *extack) +{ + struct ksz_device *dev = ds->priv; + + if (!dev->dev_ops->vlan_filtering) + return -EOPNOTSUPP; + + return dev->dev_ops->vlan_filtering(dev, port, flag, extack); +} + +static int ksz_port_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) +{ + struct ksz_device *dev = ds->priv; + + if (!dev->dev_ops->vlan_add) + return -EOPNOTSUPP; + + return dev->dev_ops->vlan_add(dev, port, vlan, extack); +} + +static int ksz_port_vlan_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) +{ + struct ksz_device *dev = ds->priv; + + if (!dev->dev_ops->vlan_del) + return -EOPNOTSUPP; + + return dev->dev_ops->vlan_del(dev, port, vlan); +} + +static int ksz_port_mirror_add(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress, struct netlink_ext_ack *extack) +{ + struct ksz_device *dev = ds->priv; + + if (!dev->dev_ops->mirror_add) + return -EOPNOTSUPP; + + return dev->dev_ops->mirror_add(dev, port, mirror, ingress, extack); +} + +static void ksz_port_mirror_del(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror) +{ + struct ksz_device *dev = ds->priv; + + if (dev->dev_ops->mirror_del) + dev->dev_ops->mirror_del(dev, port, mirror); +} + +static int ksz_change_mtu(struct dsa_switch *ds, int port, int mtu) +{ + struct ksz_device *dev = ds->priv; + + if (!dev->dev_ops->change_mtu) + return -EOPNOTSUPP; + + return dev->dev_ops->change_mtu(dev, port, mtu); +} + +static int ksz_max_mtu(struct dsa_switch *ds, int port) +{ + struct ksz_device *dev = ds->priv; + + if (!dev->dev_ops->max_mtu) + return -EOPNOTSUPP; + + return dev->dev_ops->max_mtu(dev, port); +} + +static int ksz_switch_detect(struct ksz_device *dev) +{ + u8 id1, id2; + u16 id16; + u32 id32; + int ret; + + /* read chip id */ + ret = ksz_read16(dev, REG_CHIP_ID0, &id16); + if (ret) + return ret; + + id1 = FIELD_GET(SW_FAMILY_ID_M, id16); + id2 = FIELD_GET(SW_CHIP_ID_M, id16); + + switch (id1) { + case KSZ87_FAMILY_ID: + if (id2 == KSZ87_CHIP_ID_95) { + u8 val; + + dev->chip_id = KSZ8795_CHIP_ID; + + ksz_read8(dev, KSZ8_PORT_STATUS_0, &val); + if (val & KSZ8_PORT_FIBER_MODE) + dev->chip_id = KSZ8765_CHIP_ID; + } else if (id2 == KSZ87_CHIP_ID_94) { + dev->chip_id = KSZ8794_CHIP_ID; + } else { + return -ENODEV; + } + break; + case KSZ88_FAMILY_ID: + if (id2 == KSZ88_CHIP_ID_63) + dev->chip_id = KSZ8830_CHIP_ID; + else + return -ENODEV; + break; + default: + ret = ksz_read32(dev, REG_CHIP_ID0, &id32); + if (ret) + return ret; + + dev->chip_rev = FIELD_GET(SW_REV_ID_M, id32); + id32 &= ~0xFF; + + switch (id32) { + case KSZ9477_CHIP_ID: + case KSZ9897_CHIP_ID: + case KSZ9893_CHIP_ID: + case KSZ9567_CHIP_ID: + case LAN9370_CHIP_ID: + case LAN9371_CHIP_ID: + case LAN9372_CHIP_ID: + case LAN9373_CHIP_ID: + case LAN9374_CHIP_ID: + dev->chip_id = id32; + break; + default: + dev_err(dev->dev, + "unsupported switch detected %x)\n", id32); + return -ENODEV; + } + } + return 0; +} + +static const struct dsa_switch_ops ksz_switch_ops = { + .get_tag_protocol = ksz_get_tag_protocol, + .get_phy_flags = ksz_get_phy_flags, + .setup = ksz_setup, + .phy_read = ksz_phy_read16, + .phy_write = ksz_phy_write16, + .phylink_get_caps = ksz_phylink_get_caps, + .phylink_mac_link_down = ksz_mac_link_down, + .port_enable = ksz_enable_port, + .get_strings = ksz_get_strings, + .get_ethtool_stats = ksz_get_ethtool_stats, + .get_sset_count = ksz_sset_count, + .port_bridge_join = ksz_port_bridge_join, + .port_bridge_leave = ksz_port_bridge_leave, + .port_stp_state_set = ksz_port_stp_state_set, + .port_fast_age = ksz_port_fast_age, + .port_vlan_filtering = ksz_port_vlan_filtering, + .port_vlan_add = ksz_port_vlan_add, + .port_vlan_del = ksz_port_vlan_del, + .port_fdb_dump = ksz_port_fdb_dump, + .port_fdb_add = ksz_port_fdb_add, + .port_fdb_del = ksz_port_fdb_del, + .port_mdb_add = ksz_port_mdb_add, + .port_mdb_del = ksz_port_mdb_del, + .port_mirror_add = ksz_port_mirror_add, + .port_mirror_del = ksz_port_mirror_del, + .get_stats64 = ksz_get_stats64, + .port_change_mtu = ksz_change_mtu, + .port_max_mtu = ksz_max_mtu, +}; struct ksz_device *ksz_switch_alloc(struct device *base, void *priv) { @@ -941,6 +1263,7 @@ struct ksz_device *ksz_switch_alloc(struct device *base, void *priv) ds->dev = base; ds->num_ports = DSA_MAX_PORTS; + ds->ops = &ksz_switch_ops; swdev = devm_kzalloc(base, sizeof(*swdev), GFP_KERNEL); if (!swdev) @@ -956,8 +1279,7 @@ struct ksz_device *ksz_switch_alloc(struct device *base, void *priv) } EXPORT_SYMBOL(ksz_switch_alloc); -int ksz_switch_register(struct ksz_device *dev, - const struct ksz_dev_ops *ops) +int ksz_switch_register(struct ksz_device *dev) { const struct ksz_chip_data *info; struct device_node *port, *ports; @@ -986,10 +1308,9 @@ int ksz_switch_register(struct ksz_device *dev, mutex_init(&dev->alu_mutex); mutex_init(&dev->vlan_mutex); - dev->dev_ops = ops; - - if (dev->dev_ops->detect(dev)) - return -EINVAL; + ret = ksz_switch_detect(dev); + if (ret) + return ret; info = ksz_lookup_info(dev->chip_id); if (!info) @@ -998,10 +1319,15 @@ int ksz_switch_register(struct ksz_device *dev, /* Update the compatible info with the probed one */ dev->info = info; + dev_info(dev->dev, "found switch: %s, rev %i\n", + dev->info->dev_name, dev->chip_rev); + ret = ksz_check_device_id(dev); if (ret) return ret; + dev->dev_ops = dev->info->ops; + ret = dev->dev_ops->init(dev); if (ret) return ret; @@ -1072,7 +1398,7 @@ int ksz_switch_register(struct ksz_device *dev, /* Start the MIB timer. */ schedule_delayed_work(&dev->mib_read, 0); - return 0; + return ret; } EXPORT_SYMBOL(ksz_switch_register); diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 8500eaedad67..0e7f15efbb79 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -41,11 +41,16 @@ struct ksz_chip_data { int num_statics; int cpu_ports; int port_cnt; + const struct ksz_dev_ops *ops; bool phy_errata_9477; bool ksz87xx_eee_link_erratum; const struct ksz_mib_names *mib_names; int mib_cnt; u8 reg_mib_cnt; + int stp_ctrl_reg; + int broadcast_ctrl_reg; + int multicast_ctrl_reg; + int start_ctrl_reg; bool supports_mii[KSZ_MAX_NUM_PORTS]; bool supports_rmii[KSZ_MAX_NUM_PORTS]; bool supports_rgmii[KSZ_MAX_NUM_PORTS]; @@ -90,6 +95,7 @@ struct ksz_device { /* chip specific data */ u32 chip_id; + u8 chip_rev; int cpu_port; /* port connected to CPU */ int phy_port_cnt; phy_interface_t compat_interface; @@ -160,6 +166,7 @@ struct alu_struct { }; struct ksz_dev_ops { + int (*setup)(struct dsa_switch *ds); u32 (*get_port_addr)(int port, int offset); void (*cfg_port_member)(struct ksz_device *dev, int port, u8 member); void (*flush_dyn_mac_table)(struct ksz_device *dev, int port); @@ -167,71 +174,57 @@ struct ksz_dev_ops { void (*port_setup)(struct ksz_device *dev, int port, bool cpu_port); void (*r_phy)(struct ksz_device *dev, u16 phy, u16 reg, u16 *val); void (*w_phy)(struct ksz_device *dev, u16 phy, u16 reg, u16 val); - int (*r_dyn_mac_table)(struct ksz_device *dev, u16 addr, u8 *mac_addr, - u8 *fid, u8 *src_port, u8 *timestamp, - u16 *entries); - int (*r_sta_mac_table)(struct ksz_device *dev, u16 addr, - struct alu_struct *alu); - void (*w_sta_mac_table)(struct ksz_device *dev, u16 addr, - struct alu_struct *alu); void (*r_mib_cnt)(struct ksz_device *dev, int port, u16 addr, u64 *cnt); void (*r_mib_pkt)(struct ksz_device *dev, int port, u16 addr, u64 *dropped, u64 *cnt); void (*r_mib_stat64)(struct ksz_device *dev, int port); + int (*vlan_filtering)(struct ksz_device *dev, int port, + bool flag, struct netlink_ext_ack *extack); + int (*vlan_add)(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack); + int (*vlan_del)(struct ksz_device *dev, int port, + const struct switchdev_obj_port_vlan *vlan); + int (*mirror_add)(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress, struct netlink_ext_ack *extack); + void (*mirror_del)(struct ksz_device *dev, int port, + struct dsa_mall_mirror_tc_entry *mirror); + int (*fdb_add)(struct ksz_device *dev, int port, + const unsigned char *addr, u16 vid, struct dsa_db db); + int (*fdb_del)(struct ksz_device *dev, int port, + const unsigned char *addr, u16 vid, struct dsa_db db); + int (*fdb_dump)(struct ksz_device *dev, int port, + dsa_fdb_dump_cb_t *cb, void *data); + int (*mdb_add)(struct ksz_device *dev, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); + int (*mdb_del)(struct ksz_device *dev, int port, + const struct switchdev_obj_port_mdb *mdb, + struct dsa_db db); + void (*get_caps)(struct ksz_device *dev, int port, + struct phylink_config *config); + int (*change_mtu)(struct ksz_device *dev, int port, int mtu); + int (*max_mtu)(struct ksz_device *dev, int port); void (*freeze_mib)(struct ksz_device *dev, int port, bool freeze); void (*port_init_cnt)(struct ksz_device *dev, int port); - int (*shutdown)(struct ksz_device *dev); - int (*detect)(struct ksz_device *dev); + void (*config_cpu_port)(struct dsa_switch *ds); + int (*enable_stp_addr)(struct ksz_device *dev); + int (*reset)(struct ksz_device *dev); int (*init)(struct ksz_device *dev); void (*exit)(struct ksz_device *dev); }; struct ksz_device *ksz_switch_alloc(struct device *base, void *priv); -int ksz_switch_register(struct ksz_device *dev, - const struct ksz_dev_ops *ops); +int ksz_switch_register(struct ksz_device *dev); void ksz_switch_remove(struct ksz_device *dev); -int ksz8_switch_register(struct ksz_device *dev); -int ksz9477_switch_register(struct ksz_device *dev); - -void ksz_update_port_member(struct ksz_device *dev, int port); void ksz_init_mib_timer(struct ksz_device *dev); void ksz_r_mib_stats64(struct ksz_device *dev, int port); -void ksz_get_stats64(struct dsa_switch *ds, int port, - struct rtnl_link_stats64 *s); -void ksz_phylink_get_caps(struct dsa_switch *ds, int port, - struct phylink_config *config); +void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state); extern const struct ksz_chip_data ksz_switch_chips[]; -/* Common DSA access functions */ - -int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg); -int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val); -void ksz_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode, - phy_interface_t interface); -int ksz_sset_count(struct dsa_switch *ds, int port, int sset); -void ksz_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *buf); -int ksz_port_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge, bool *tx_fwd_offload, - struct netlink_ext_ack *extack); -void ksz_port_bridge_leave(struct dsa_switch *ds, int port, - struct dsa_bridge bridge); -void ksz_port_stp_state_set(struct dsa_switch *ds, int port, - u8 state, int reg); -void ksz_port_fast_age(struct dsa_switch *ds, int port); -int ksz_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, - void *data); -int ksz_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct dsa_db db); -int ksz_port_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb, - struct dsa_db db); -int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy); -void ksz_get_strings(struct dsa_switch *ds, int port, - u32 stringset, uint8_t *buf); - /* Common register access functions */ static inline int ksz_read8(struct ksz_device *dev, u32 reg, u8 *val) @@ -353,6 +346,37 @@ static inline void ksz_regmap_unlock(void *__mtx) #define PORT_RX_ENABLE BIT(1) #define PORT_LEARN_DISABLE BIT(0) +/* Switch ID Defines */ +#define REG_CHIP_ID0 0x00 + +#define SW_FAMILY_ID_M GENMASK(15, 8) +#define KSZ87_FAMILY_ID 0x87 +#define KSZ88_FAMILY_ID 0x88 + +#define KSZ8_PORT_STATUS_0 0x08 +#define KSZ8_PORT_FIBER_MODE BIT(7) + +#define SW_CHIP_ID_M GENMASK(7, 4) +#define KSZ87_CHIP_ID_94 0x6 +#define KSZ87_CHIP_ID_95 0x9 +#define KSZ88_CHIP_ID_63 0x3 + +#define SW_REV_ID_M GENMASK(7, 4) + +/* Driver set switch broadcast storm protection at 10% rate. */ +#define BROADCAST_STORM_PROT_RATE 10 + +/* 148,800 frames * 67 ms / 100 */ +#define BROADCAST_STORM_VALUE 9969 + +#define BROADCAST_STORM_RATE_HI 0x07 +#define BROADCAST_STORM_RATE_LO 0xFF +#define BROADCAST_STORM_RATE 0x07FF + +#define MULTICAST_STORM_DISABLE BIT(6) + +#define SW_START 0x01 + /* Regmap tables generation */ #define KSZ_SPI_OP_RD 3 #define KSZ_SPI_OP_WR 2 diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz_spi.c index 961a74c359a8..344ff92db099 100644 --- a/drivers/net/dsa/microchip/ksz8795_spi.c +++ b/drivers/net/dsa/microchip/ksz_spi.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * Microchip KSZ8795 series register access through SPI + * Microchip ksz series register access through SPI * * Copyright (C) 2017 Microchip Technology Inc. * Tristram Ha <Tristram.Ha@microchip.com> @@ -25,13 +25,20 @@ #define KSZ8863_SPI_ADDR_ALIGN 8 #define KSZ8863_SPI_TURNAROUND_SHIFT 0 +#define KSZ9477_SPI_ADDR_SHIFT 24 +#define KSZ9477_SPI_ADDR_ALIGN 3 +#define KSZ9477_SPI_TURNAROUND_SHIFT 5 + KSZ_REGMAP_TABLE(ksz8795, 16, KSZ8795_SPI_ADDR_SHIFT, KSZ8795_SPI_TURNAROUND_SHIFT, KSZ8795_SPI_ADDR_ALIGN); KSZ_REGMAP_TABLE(ksz8863, 16, KSZ8863_SPI_ADDR_SHIFT, KSZ8863_SPI_TURNAROUND_SHIFT, KSZ8863_SPI_ADDR_ALIGN); -static int ksz8795_spi_probe(struct spi_device *spi) +KSZ_REGMAP_TABLE(ksz9477, 32, KSZ9477_SPI_ADDR_SHIFT, + KSZ9477_SPI_TURNAROUND_SHIFT, KSZ9477_SPI_ADDR_ALIGN); + +static int ksz_spi_probe(struct spi_device *spi) { const struct regmap_config *regmap_config; const struct ksz_chip_data *chip; @@ -57,8 +64,12 @@ static int ksz8795_spi_probe(struct spi_device *spi) if (chip->chip_id == KSZ8830_CHIP_ID) regmap_config = ksz8863_regmap_config; - else + else if (chip->chip_id == KSZ8795_CHIP_ID || + chip->chip_id == KSZ8794_CHIP_ID || + chip->chip_id == KSZ8765_CHIP_ID) regmap_config = ksz8795_regmap_config; + else + regmap_config = ksz9477_regmap_config; for (i = 0; i < ARRAY_SIZE(ksz8795_regmap_config); i++) { rc = regmap_config[i]; @@ -82,7 +93,7 @@ static int ksz8795_spi_probe(struct spi_device *spi) if (ret) return ret; - ret = ksz8_switch_register(dev); + ret = ksz_switch_register(dev); /* Main DSA driver may not be started yet. */ if (ret) @@ -93,7 +104,7 @@ static int ksz8795_spi_probe(struct spi_device *spi) return 0; } -static void ksz8795_spi_remove(struct spi_device *spi) +static void ksz_spi_remove(struct spi_device *spi) { struct ksz_device *dev = spi_get_drvdata(spi); @@ -103,22 +114,22 @@ static void ksz8795_spi_remove(struct spi_device *spi) spi_set_drvdata(spi, NULL); } -static void ksz8795_spi_shutdown(struct spi_device *spi) +static void ksz_spi_shutdown(struct spi_device *spi) { struct ksz_device *dev = spi_get_drvdata(spi); if (!dev) return; - if (dev->dev_ops->shutdown) - dev->dev_ops->shutdown(dev); + if (dev->dev_ops->reset) + dev->dev_ops->reset(dev); dsa_switch_shutdown(dev->ds); spi_set_drvdata(spi, NULL); } -static const struct of_device_id ksz8795_dt_ids[] = { +static const struct of_device_id ksz_dt_ids[] = { { .compatible = "microchip,ksz8765", .data = &ksz_switch_chips[KSZ8765] @@ -139,34 +150,70 @@ static const struct of_device_id ksz8795_dt_ids[] = { .compatible = "microchip,ksz8873", .data = &ksz_switch_chips[KSZ8830] }, + { + .compatible = "microchip,ksz9477", + .data = &ksz_switch_chips[KSZ9477] + }, + { + .compatible = "microchip,ksz9897", + .data = &ksz_switch_chips[KSZ9897] + }, + { + .compatible = "microchip,ksz9893", + .data = &ksz_switch_chips[KSZ9893] + }, + { + .compatible = "microchip,ksz9563", + .data = &ksz_switch_chips[KSZ9893] + }, + { + .compatible = "microchip,ksz8563", + .data = &ksz_switch_chips[KSZ9893] + }, + { + .compatible = "microchip,ksz9567", + .data = &ksz_switch_chips[KSZ9567] + }, {}, }; -MODULE_DEVICE_TABLE(of, ksz8795_dt_ids); +MODULE_DEVICE_TABLE(of, ksz_dt_ids); -static const struct spi_device_id ksz8795_spi_ids[] = { +static const struct spi_device_id ksz_spi_ids[] = { { "ksz8765" }, { "ksz8794" }, { "ksz8795" }, { "ksz8863" }, { "ksz8873" }, + { "ksz9477" }, + { "ksz9897" }, + { "ksz9893" }, + { "ksz9563" }, + { "ksz8563" }, + { "ksz9567" }, { }, }; -MODULE_DEVICE_TABLE(spi, ksz8795_spi_ids); +MODULE_DEVICE_TABLE(spi, ksz_spi_ids); -static struct spi_driver ksz8795_spi_driver = { +static struct spi_driver ksz_spi_driver = { .driver = { - .name = "ksz8795-switch", + .name = "ksz-switch", .owner = THIS_MODULE, - .of_match_table = of_match_ptr(ksz8795_dt_ids), + .of_match_table = of_match_ptr(ksz_dt_ids), }, - .id_table = ksz8795_spi_ids, - .probe = ksz8795_spi_probe, - .remove = ksz8795_spi_remove, - .shutdown = ksz8795_spi_shutdown, + .id_table = ksz_spi_ids, + .probe = ksz_spi_probe, + .remove = ksz_spi_remove, + .shutdown = ksz_spi_shutdown, }; -module_spi_driver(ksz8795_spi_driver); +module_spi_driver(ksz_spi_driver); +MODULE_ALIAS("spi:ksz9477"); +MODULE_ALIAS("spi:ksz9897"); +MODULE_ALIAS("spi:ksz9893"); +MODULE_ALIAS("spi:ksz9563"); +MODULE_ALIAS("spi:ksz8563"); +MODULE_ALIAS("spi:ksz9567"); MODULE_AUTHOR("Tristram Ha <Tristram.Ha@microchip.com>"); -MODULE_DESCRIPTION("Microchip KSZ8795 Series Switch SPI Driver"); +MODULE_DESCRIPTION("Microchip ksz Series Switch SPI Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 2b02d823d497..835807911be0 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1038,6 +1038,7 @@ static int mt7530_port_enable(struct dsa_switch *ds, int port, struct phy_device *phy) { + struct dsa_port *dp = dsa_to_port(ds, port); struct mt7530_priv *priv = ds->priv; mutex_lock(&priv->reg_mutex); @@ -1046,7 +1047,11 @@ mt7530_port_enable(struct dsa_switch *ds, int port, * restore the port matrix if the port is the member of a certain * bridge. */ - priv->ports[port].pm |= PCR_MATRIX(BIT(MT7530_CPU_PORT)); + if (dsa_port_is_user(dp)) { + struct dsa_port *cpu_dp = dp->cpu_dp; + + priv->ports[port].pm |= PCR_MATRIX(BIT(cpu_dp->index)); + } priv->ports[port].enable = true; mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK, priv->ports[port].pm); @@ -1195,7 +1200,8 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port, struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_to_port(ds, port), *other_dp; - u32 port_bitmap = BIT(MT7530_CPU_PORT); + struct dsa_port *cpu_dp = dp->cpu_dp; + u32 port_bitmap = BIT(cpu_dp->index); struct mt7530_priv *priv = ds->priv; mutex_lock(&priv->reg_mutex); @@ -1272,9 +1278,12 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port) * the CPU port get out of VLAN filtering mode. */ if (all_user_ports_removed) { - mt7530_write(priv, MT7530_PCR_P(MT7530_CPU_PORT), + struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_port *cpu_dp = dp->cpu_dp; + + mt7530_write(priv, MT7530_PCR_P(cpu_dp->index), PCR_MATRIX(dsa_user_ports(priv->ds))); - mt7530_write(priv, MT7530_PVC_P(MT7530_CPU_PORT), PORT_SPEC_TAG + mt7530_write(priv, MT7530_PVC_P(cpu_dp->index), PORT_SPEC_TAG | PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } } @@ -1312,6 +1321,7 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) { struct dsa_port *dp = dsa_to_port(ds, port), *other_dp; + struct dsa_port *cpu_dp = dp->cpu_dp; struct mt7530_priv *priv = ds->priv; mutex_lock(&priv->reg_mutex); @@ -1340,8 +1350,8 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port, */ if (priv->ports[port].enable) mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK, - PCR_MATRIX(BIT(MT7530_CPU_PORT))); - priv->ports[port].pm = PCR_MATRIX(BIT(MT7530_CPU_PORT)); + PCR_MATRIX(BIT(cpu_dp->index))); + priv->ports[port].pm = PCR_MATRIX(BIT(cpu_dp->index)); /* When a port is removed from the bridge, the port would be set up * back to the default as is at initial boot which is a VLAN-unaware @@ -1508,6 +1518,9 @@ static int mt7530_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, struct netlink_ext_ack *extack) { + struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_port *cpu_dp = dp->cpu_dp; + if (vlan_filtering) { /* The port is being kept as VLAN-unaware port when bridge is * set up with vlan_filtering not being set, Otherwise, the @@ -1515,7 +1528,7 @@ mt7530_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, * for becoming a VLAN-aware port. */ mt7530_port_set_vlan_aware(ds, port); - mt7530_port_set_vlan_aware(ds, MT7530_CPU_PORT); + mt7530_port_set_vlan_aware(ds, cpu_dp->index); } else { mt7530_port_set_vlan_unaware(ds, port); } @@ -1527,11 +1540,11 @@ static void mt7530_hw_vlan_add(struct mt7530_priv *priv, struct mt7530_hw_vlan_entry *entry) { + struct dsa_port *dp = dsa_to_port(priv->ds, entry->port); u8 new_members; u32 val; - new_members = entry->old_members | BIT(entry->port) | - BIT(MT7530_CPU_PORT); + new_members = entry->old_members | BIT(entry->port); /* Validate the entry with independent learning, create egress tag per * VLAN and joining the port as one of the port members. @@ -1542,22 +1555,20 @@ mt7530_hw_vlan_add(struct mt7530_priv *priv, /* Decide whether adding tag or not for those outgoing packets from the * port inside the VLAN. - */ - val = entry->untagged ? MT7530_VLAN_EGRESS_UNTAG : - MT7530_VLAN_EGRESS_TAG; - mt7530_rmw(priv, MT7530_VAWD2, - ETAG_CTRL_P_MASK(entry->port), - ETAG_CTRL_P(entry->port, val)); - - /* CPU port is always taken as a tagged port for serving more than one + * CPU port is always taken as a tagged port for serving more than one * VLANs across and also being applied with egress type stack mode for * that VLAN tags would be appended after hardware special tag used as * DSA tag. */ + if (dsa_port_is_cpu(dp)) + val = MT7530_VLAN_EGRESS_STACK; + else if (entry->untagged) + val = MT7530_VLAN_EGRESS_UNTAG; + else + val = MT7530_VLAN_EGRESS_TAG; mt7530_rmw(priv, MT7530_VAWD2, - ETAG_CTRL_P_MASK(MT7530_CPU_PORT), - ETAG_CTRL_P(MT7530_CPU_PORT, - MT7530_VLAN_EGRESS_STACK)); + ETAG_CTRL_P_MASK(entry->port), + ETAG_CTRL_P(entry->port, val)); } static void @@ -1576,11 +1587,7 @@ mt7530_hw_vlan_del(struct mt7530_priv *priv, return; } - /* If certain member apart from CPU port is still alive in the VLAN, - * the entry would be kept valid. Otherwise, the entry is got to be - * disabled. - */ - if (new_members && new_members != BIT(MT7530_CPU_PORT)) { + if (new_members) { val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | VLAN_VALID; mt7530_write(priv, MT7530_VAWD1, val); @@ -2098,11 +2105,12 @@ static int mt7530_setup(struct dsa_switch *ds) { struct mt7530_priv *priv = ds->priv; + struct device_node *dn = NULL; struct device_node *phy_node; struct device_node *mac_np; struct mt7530_dummy_poll p; phy_interface_t interface; - struct device_node *dn; + struct dsa_port *cpu_dp; u32 id, val; int ret, i; @@ -2110,7 +2118,19 @@ mt7530_setup(struct dsa_switch *ds) * controller also is the container for two GMACs nodes representing * as two netdev instances. */ - dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent; + dsa_switch_for_each_cpu_port(cpu_dp, ds) { + dn = cpu_dp->master->dev.of_node->parent; + /* It doesn't matter which CPU port is found first, + * their masters should share the same parent OF node + */ + break; + } + + if (!dn) { + dev_err(ds->dev, "parent OF node of DSA master not found"); + return -EINVAL; + } + ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; @@ -2272,6 +2292,7 @@ mt7531_setup(struct dsa_switch *ds) { struct mt7530_priv *priv = ds->priv; struct mt7530_dummy_poll p; + struct dsa_port *cpu_dp; u32 val, id; int ret, i; @@ -2344,8 +2365,11 @@ mt7531_setup(struct dsa_switch *ds) CORE_PLL_GROUP4, val); /* BPDU to CPU port */ - mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK, - BIT(MT7530_CPU_PORT)); + dsa_switch_for_each_cpu_port(cpu_dp, ds) { + mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK, + BIT(cpu_dp->index)); + break; + } mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK, MT753X_BPDU_CPU_ONLY); diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 71e36b69b96d..e509af95c354 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -8,7 +8,6 @@ #define MT7530_NUM_PORTS 7 #define MT7530_NUM_PHYS 5 -#define MT7530_CPU_PORT 6 #define MT7530_NUM_FDB_RECORDS 2048 #define MT7530_ALL_MEMBERS 0xff diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 0b49d243e00b..37b649501500 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -449,9 +449,6 @@ static int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, goto restore_link; } - if (speed == SPEED_MAX && chip->info->ops->port_max_speed_mode) - mode = chip->info->ops->port_max_speed_mode(port); - if (chip->info->ops->port_set_pause) { err = chip->info->ops->port_set_pause(chip, port, pause); if (err) @@ -3280,28 +3277,51 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) { struct device_node *phy_handle = NULL; struct dsa_switch *ds = chip->ds; + phy_interface_t mode; struct dsa_port *dp; - int tx_amp; + int tx_amp, speed; int err; u16 reg; chip->ports[port].chip = chip; chip->ports[port].port = port; + dp = dsa_to_port(ds, port); + /* MAC Forcing register: don't force link, speed, duplex or flow control * state to any particular values on physical ports, but force the CPU * port and all DSA ports to their maximum bandwidth and full duplex. */ - if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { + unsigned long caps = dp->pl_config.mac_capabilities; + + if (chip->info->ops->port_max_speed_mode) + mode = chip->info->ops->port_max_speed_mode(port); + else + mode = PHY_INTERFACE_MODE_NA; + + if (caps & MAC_10000FD) + speed = SPEED_10000; + else if (caps & MAC_5000FD) + speed = SPEED_5000; + else if (caps & MAC_2500FD) + speed = SPEED_2500; + else if (caps & MAC_1000) + speed = SPEED_1000; + else if (caps & MAC_100) + speed = SPEED_100; + else + speed = SPEED_10; + err = mv88e6xxx_port_setup_mac(chip, port, LINK_FORCED_UP, - SPEED_MAX, DUPLEX_FULL, - PAUSE_OFF, - PHY_INTERFACE_MODE_NA); - else + speed, DUPLEX_FULL, + PAUSE_OFF, mode); + } else { err = mv88e6xxx_port_setup_mac(chip, port, LINK_UNFORCED, SPEED_UNFORCED, DUPLEX_UNFORCED, PAUSE_ON, PHY_INTERFACE_MODE_NA); + } if (err) return err; @@ -3473,7 +3493,6 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) } if (chip->info->ops->serdes_set_tx_amplitude) { - dp = dsa_to_port(ds, port); if (dp) phy_handle = of_parse_phandle(dp->dn, "phy-handle", 0); diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 5e03cfe50156..e693154cf803 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -488,14 +488,13 @@ struct mv88e6xxx_ops { int (*port_set_pause)(struct mv88e6xxx_chip *chip, int port, int pause); -#define SPEED_MAX INT_MAX #define SPEED_UNFORCED -2 #define DUPLEX_UNFORCED -2 /* Port's MAC speed (in Mbps) and MAC duplex mode * * Depending on the chip, 10, 100, 200, 1000, 2500, 10000 are valid. - * Use SPEED_UNFORCED for normal detection, SPEED_MAX for max value. + * Use SPEED_UNFORCED for normal detection. * * Use DUPLEX_HALF or DUPLEX_FULL to force half or full duplex, * or DUPLEX_UNFORCED for normal duplex detection. diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 795b3128768f..90c55f23b7c9 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -294,28 +294,10 @@ static int mv88e6xxx_port_set_speed_duplex(struct mv88e6xxx_chip *chip, return 0; } -/* Support 10, 100, 200 Mbps (e.g. 88E6065 family) */ -int mv88e6065_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, - int speed, int duplex) -{ - if (speed == SPEED_MAX) - speed = 200; - - if (speed > 200) - return -EOPNOTSUPP; - - /* Setting 200 Mbps on port 0 to 3 selects 100 Mbps */ - return mv88e6xxx_port_set_speed_duplex(chip, port, speed, false, false, - duplex); -} - /* Support 10, 100, 1000 Mbps (e.g. 88E6185 family) */ int mv88e6185_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, int speed, int duplex) { - if (speed == SPEED_MAX) - speed = 1000; - if (speed == 200 || speed > 1000) return -EOPNOTSUPP; @@ -327,9 +309,6 @@ int mv88e6185_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, int mv88e6250_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, int speed, int duplex) { - if (speed == SPEED_MAX) - speed = 100; - if (speed > 100) return -EOPNOTSUPP; @@ -341,9 +320,6 @@ int mv88e6250_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, int mv88e6341_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, int speed, int duplex) { - if (speed == SPEED_MAX) - speed = port < 5 ? 1000 : 2500; - if (speed > 2500) return -EOPNOTSUPP; @@ -369,9 +345,6 @@ phy_interface_t mv88e6341_port_max_speed_mode(int port) int mv88e6352_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, int speed, int duplex) { - if (speed == SPEED_MAX) - speed = 1000; - if (speed > 1000) return -EOPNOTSUPP; @@ -386,9 +359,6 @@ int mv88e6352_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, int mv88e6390_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, int speed, int duplex) { - if (speed == SPEED_MAX) - speed = port < 9 ? 1000 : 2500; - if (speed > 2500) return -EOPNOTSUPP; @@ -414,9 +384,6 @@ phy_interface_t mv88e6390_port_max_speed_mode(int port) int mv88e6390x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, int speed, int duplex) { - if (speed == SPEED_MAX) - speed = port < 9 ? 1000 : 10000; - if (speed == 200 && port != 0) return -EOPNOTSUPP; @@ -445,9 +412,6 @@ int mv88e6393x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, u16 reg, ctrl; int err; - if (speed == SPEED_MAX) - speed = (port > 0 && port < 9) ? 1000 : 10000; - if (speed == 200 && port != 0) return -EOPNOTSUPP; diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index e0a705d82019..cb04243f37c1 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -342,8 +342,6 @@ int mv88e6xxx_port_set_link(struct mv88e6xxx_chip *chip, int port, int link); int mv88e6xxx_port_sync_link(struct mv88e6xxx_chip *chip, int port, unsigned int mode, bool isup); int mv88e6185_port_sync_link(struct mv88e6xxx_chip *chip, int port, unsigned int mode, bool isup); -int mv88e6065_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, - int speed, int duplex); int mv88e6185_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, int speed, int duplex); int mv88e6250_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 570d0204b7be..dd9085ae0922 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1196,10 +1196,13 @@ static void vsc9959_tas_gcl_set(struct ocelot *ocelot, const u32 gcl_ix, static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, struct tc_taprio_qopt_offload *taprio) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; struct timespec64 base_ts; int ret, i; u32 val; + mutex_lock(&ocelot->tas_lock); + if (!taprio->enable) { ocelot_rmw_rix(ocelot, QSYS_TAG_CONFIG_INIT_GATE_STATE(0xFF), @@ -1207,15 +1210,20 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, QSYS_TAG_CONFIG_INIT_GATE_STATE_M, QSYS_TAG_CONFIG, port); + mutex_unlock(&ocelot->tas_lock); return 0; } if (taprio->cycle_time > NSEC_PER_SEC || - taprio->cycle_time_extension >= NSEC_PER_SEC) - return -EINVAL; + taprio->cycle_time_extension >= NSEC_PER_SEC) { + ret = -EINVAL; + goto err; + } - if (taprio->num_entries > VSC9959_TAS_GCL_ENTRY_MAX) - return -ERANGE; + if (taprio->num_entries > VSC9959_TAS_GCL_ENTRY_MAX) { + ret = -ERANGE; + goto err; + } /* Enable guard band. The switch will schedule frames without taking * their length into account. Thus we'll always need to enable the @@ -1236,8 +1244,10 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, * config is pending, need reset the TAS module */ val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_8); - if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) - return -EBUSY; + if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) { + ret = -EBUSY; + goto err; + } ocelot_rmw_rix(ocelot, QSYS_TAG_CONFIG_ENABLE | @@ -1248,6 +1258,8 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, QSYS_TAG_CONFIG_SCH_TRAFFIC_QUEUES_M, QSYS_TAG_CONFIG, port); + ocelot_port->base_time = taprio->base_time; + vsc9959_new_base_time(ocelot, taprio->base_time, taprio->cycle_time, &base_ts); ocelot_write(ocelot, base_ts.tv_nsec, QSYS_PARAM_CFG_REG_1); @@ -1271,9 +1283,67 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, !(val & QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE), 10, 100000); +err: + mutex_unlock(&ocelot->tas_lock); + return ret; } +static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) +{ + struct ocelot_port *ocelot_port; + struct timespec64 base_ts; + u64 cycletime; + int port; + u32 val; + + mutex_lock(&ocelot->tas_lock); + + for (port = 0; port < ocelot->num_phys_ports; port++) { + val = ocelot_read_rix(ocelot, QSYS_TAG_CONFIG, port); + if (!(val & QSYS_TAG_CONFIG_ENABLE)) + continue; + + ocelot_rmw(ocelot, + QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port), + QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM_M, + QSYS_TAS_PARAM_CFG_CTRL); + + ocelot_rmw_rix(ocelot, + QSYS_TAG_CONFIG_INIT_GATE_STATE(0xFF), + QSYS_TAG_CONFIG_ENABLE | + QSYS_TAG_CONFIG_INIT_GATE_STATE_M, + QSYS_TAG_CONFIG, port); + + cycletime = ocelot_read(ocelot, QSYS_PARAM_CFG_REG_4); + ocelot_port = ocelot->ports[port]; + + vsc9959_new_base_time(ocelot, ocelot_port->base_time, + cycletime, &base_ts); + + ocelot_write(ocelot, base_ts.tv_nsec, QSYS_PARAM_CFG_REG_1); + ocelot_write(ocelot, lower_32_bits(base_ts.tv_sec), + QSYS_PARAM_CFG_REG_2); + val = upper_32_bits(base_ts.tv_sec); + ocelot_rmw(ocelot, + QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB(val), + QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB_M, + QSYS_PARAM_CFG_REG_3); + + ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, + QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, + QSYS_TAS_PARAM_CFG_CTRL); + + ocelot_rmw_rix(ocelot, + QSYS_TAG_CONFIG_INIT_GATE_STATE(0xFF) | + QSYS_TAG_CONFIG_ENABLE, + QSYS_TAG_CONFIG_ENABLE | + QSYS_TAG_CONFIG_INIT_GATE_STATE_M, + QSYS_TAG_CONFIG, port); + } + mutex_unlock(&ocelot->tas_lock); +} + static int vsc9959_qos_port_cbs_set(struct dsa_switch *ds, int port, struct tc_cbs_qopt_offload *cbs_qopt) { @@ -2210,6 +2280,7 @@ static const struct ocelot_ops vsc9959_ops = { .psfp_filter_del = vsc9959_psfp_filter_del, .psfp_stats_get = vsc9959_psfp_stats_get, .cut_through_fwd = vsc9959_cut_through_fwd, + .tas_clock_adjust = vsc9959_tas_clock_adjust, }; static const struct felix_info felix_info_vsc9959 = { diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index e5098cfe44bc..f23ce56fa591 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -818,7 +818,7 @@ static int __ar9331_mdio_write(struct mii_bus *sbus, u8 mode, u16 reg, u16 val) FIELD_GET(AR9331_SW_LOW_ADDR_PHY, reg); r = FIELD_GET(AR9331_SW_LOW_ADDR_REG, reg); - return mdiobus_write(sbus, p, r, val); + return __mdiobus_write(sbus, p, r, val); } static int __ar9331_mdio_read(struct mii_bus *sbus, u16 reg) @@ -829,7 +829,7 @@ static int __ar9331_mdio_read(struct mii_bus *sbus, u16 reg) FIELD_GET(AR9331_SW_LOW_ADDR_PHY, reg); r = FIELD_GET(AR9331_SW_LOW_ADDR_REG, reg); - return mdiobus_read(sbus, p, r); + return __mdiobus_read(sbus, p, r); } static int ar9331_mdio_read(void *ctx, const void *reg_buf, size_t reg_len, @@ -849,6 +849,8 @@ static int ar9331_mdio_read(void *ctx, const void *reg_buf, size_t reg_len, return 0; } + mutex_lock_nested(&sbus->mdio_lock, MDIO_MUTEX_NESTED); + ret = __ar9331_mdio_read(sbus, reg); if (ret < 0) goto error; @@ -860,9 +862,13 @@ static int ar9331_mdio_read(void *ctx, const void *reg_buf, size_t reg_len, *(u32 *)val_buf |= ret << 16; + mutex_unlock(&sbus->mdio_lock); + return 0; error: + mutex_unlock(&sbus->mdio_lock); dev_err_ratelimited(&sbus->dev, "Bus error. Failed to read register.\n"); + return ret; } @@ -872,12 +878,15 @@ static int ar9331_mdio_write(void *ctx, u32 reg, u32 val) struct mii_bus *sbus = priv->sbus; int ret; + mutex_lock_nested(&sbus->mdio_lock, MDIO_MUTEX_NESTED); if (reg == AR9331_SW_REG_PAGE) { ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_PAGE, 0, val); if (ret < 0) goto error; + mutex_unlock(&sbus->mdio_lock); + return 0; } @@ -897,10 +906,14 @@ static int ar9331_mdio_write(void *ctx, u32 reg, u32 val) if (ret < 0) goto error; + mutex_unlock(&sbus->mdio_lock); + return 0; error: + mutex_unlock(&sbus->mdio_lock); dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n"); + return ret; } diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c index 769f672e9128..da31d8b839ac 100644 --- a/drivers/net/dsa/realtek/rtl8365mb.c +++ b/drivers/net/dsa/realtek/rtl8365mb.c @@ -101,27 +101,14 @@ #include "realtek.h" -/* Chip-specific data and limits */ -#define RTL8365MB_CHIP_ID_8365MB_VC 0x6367 -#define RTL8365MB_CHIP_VER_8365MB_VC 0x0040 - -#define RTL8365MB_CHIP_ID_8367S 0x6367 -#define RTL8365MB_CHIP_VER_8367S 0x00A0 - -#define RTL8365MB_CHIP_ID_8367RB 0x6367 -#define RTL8365MB_CHIP_VER_8367RB 0x0020 - /* Family-specific data and limits */ #define RTL8365MB_PHYADDRMAX 7 #define RTL8365MB_NUM_PHYREGS 32 #define RTL8365MB_PHYREGMAX (RTL8365MB_NUM_PHYREGS - 1) -/* RTL8370MB and RTL8310SR, possibly suportable by this driver, have 10 ports */ -#define RTL8365MB_MAX_NUM_PORTS 10 +#define RTL8365MB_MAX_NUM_PORTS 11 +#define RTL8365MB_MAX_NUM_EXTINTS 3 #define RTL8365MB_LEARN_LIMIT_MAX 2112 -/* valid for all 6-port or less variants */ -static const int rtl8365mb_extint_port_map[] = { -1, -1, -1, -1, -1, -1, 1, 2, -1, -1}; - /* Chip identification registers */ #define RTL8365MB_CHIP_ID_REG 0x1300 @@ -201,7 +188,7 @@ static const int rtl8365mb_extint_port_map[] = { -1, -1, -1, -1, -1, -1, 1, 2, /* The PHY OCP addresses of PHY registers 0~31 start here */ #define RTL8365MB_PHY_OCP_ADDR_PHYREG_BASE 0xA400 -/* EXT interface port mode values - used in DIGITAL_INTERFACE_SELECT */ +/* External interface port mode values - used in DIGITAL_INTERFACE_SELECT */ #define RTL8365MB_EXT_PORT_MODE_DISABLE 0 #define RTL8365MB_EXT_PORT_MODE_RGMII 1 #define RTL8365MB_EXT_PORT_MODE_MII_MAC 2 @@ -217,19 +204,7 @@ static const int rtl8365mb_extint_port_map[] = { -1, -1, -1, -1, -1, -1, 1, 2, #define RTL8365MB_EXT_PORT_MODE_1000X 12 #define RTL8365MB_EXT_PORT_MODE_100FX 13 -/* Realtek docs and driver uses logic number as EXT_PORT0=16, EXT_PORT1=17, - * EXT_PORT2=18, to interact with switch ports. That logic number is internally - * converted to either a physical port number (0..9) or an external interface id (0..2), - * depending on which function was called. The external interface id is calculated as - * (ext_id=logic_port-15), while the logical to physical map depends on the chip id/version. - * - * EXT_PORT0 mentioned in datasheets and rtl8367c driver is used in this driver - * as extid==1, EXT_PORT2, mentioned in Realtek rtl8367c driver for 10-port switches, - * would have an ext_id of 3 (out of range for most extint macros) and ext_id 0 does - * not seem to be used as well for this family. - */ - -/* EXT interface mode configuration registers 0~1 */ +/* External interface mode configuration registers 0~1 */ #define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG0 0x1305 /* EXT1 */ #define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG1 0x13C3 /* EXT2 */ #define RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(_extint) \ @@ -241,7 +216,7 @@ static const int rtl8365mb_extint_port_map[] = { -1, -1, -1, -1, -1, -1, 1, 2, #define RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET(_extint) \ (((_extint) % 2) * 4) -/* EXT interface RGMII TX/RX delay configuration registers 0~2 */ +/* External interface RGMII TX/RX delay configuration registers 0~2 */ #define RTL8365MB_EXT_RGMXF_REG0 0x1306 /* EXT0 */ #define RTL8365MB_EXT_RGMXF_REG1 0x1307 /* EXT1 */ #define RTL8365MB_EXT_RGMXF_REG2 0x13C5 /* EXT2 */ @@ -258,7 +233,7 @@ static const int rtl8365mb_extint_port_map[] = { -1, -1, -1, -1, -1, -1, 1, 2, #define RTL8365MB_PORT_SPEED_100M 1 #define RTL8365MB_PORT_SPEED_1000M 2 -/* EXT interface force configuration registers 0~2 */ +/* External interface force configuration registers 0~2 */ #define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG0 0x1310 /* EXT0 */ #define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG1 0x1311 /* EXT1 */ #define RTL8365MB_DIGITAL_INTERFACE_FORCE_REG2 0x13C4 /* EXT2 */ @@ -490,6 +465,95 @@ static const struct rtl8365mb_jam_tbl_entry rtl8365mb_init_jam_common[] = { { 0x1D32, 0x0002 }, }; +enum rtl8365mb_phy_interface_mode { + RTL8365MB_PHY_INTERFACE_MODE_INVAL = 0, + RTL8365MB_PHY_INTERFACE_MODE_INTERNAL = BIT(0), + RTL8365MB_PHY_INTERFACE_MODE_MII = BIT(1), + RTL8365MB_PHY_INTERFACE_MODE_TMII = BIT(2), + RTL8365MB_PHY_INTERFACE_MODE_RMII = BIT(3), + RTL8365MB_PHY_INTERFACE_MODE_RGMII = BIT(4), + RTL8365MB_PHY_INTERFACE_MODE_SGMII = BIT(5), + RTL8365MB_PHY_INTERFACE_MODE_HSGMII = BIT(6), +}; + +/** + * struct rtl8365mb_extint - external interface info + * @port: the port with an external interface + * @id: the external interface ID, which is either 0, 1, or 2 + * @supported_interfaces: a bitmask of supported PHY interface modes + * + * Represents a mapping: port -> { id, supported_interfaces }. To be embedded + * in &struct rtl8365mb_chip_info for every port with an external interface. + */ +struct rtl8365mb_extint { + int port; + int id; + unsigned int supported_interfaces; +}; + +/** + * struct rtl8365mb_chip_info - static chip-specific info + * @name: human-readable chip name + * @chip_id: chip identifier + * @chip_ver: chip silicon revision + * @extints: available external interfaces + * @jam_table: chip-specific initialization jam table + * @jam_size: size of the chip's jam table + * + * These data are specific to a given chip in the family of switches supported + * by this driver. When adding support for another chip in the family, a new + * chip info should be added to the rtl8365mb_chip_infos array. + */ +struct rtl8365mb_chip_info { + const char *name; + u32 chip_id; + u32 chip_ver; + const struct rtl8365mb_extint extints[RTL8365MB_MAX_NUM_EXTINTS]; + const struct rtl8365mb_jam_tbl_entry *jam_table; + size_t jam_size; +}; + +/* Chip info for each supported switch in the family */ +#define PHY_INTF(_mode) (RTL8365MB_PHY_INTERFACE_MODE_ ## _mode) +static const struct rtl8365mb_chip_info rtl8365mb_chip_infos[] = { + { + .name = "RTL8365MB-VC", + .chip_id = 0x6367, + .chip_ver = 0x0040, + .extints = { + { 6, 1, PHY_INTF(MII) | PHY_INTF(TMII) | + PHY_INTF(RMII) | PHY_INTF(RGMII) }, + }, + .jam_table = rtl8365mb_init_jam_8365mb_vc, + .jam_size = ARRAY_SIZE(rtl8365mb_init_jam_8365mb_vc), + }, + { + .name = "RTL8367S", + .chip_id = 0x6367, + .chip_ver = 0x00A0, + .extints = { + { 6, 1, PHY_INTF(SGMII) | PHY_INTF(HSGMII) }, + { 7, 2, PHY_INTF(MII) | PHY_INTF(TMII) | + PHY_INTF(RMII) | PHY_INTF(RGMII) }, + }, + .jam_table = rtl8365mb_init_jam_8365mb_vc, + .jam_size = ARRAY_SIZE(rtl8365mb_init_jam_8365mb_vc), + }, + { + .name = "RTL8367RB-VB", + .chip_id = 0x6367, + .chip_ver = 0x0020, + .extints = { + { 6, 1, PHY_INTF(MII) | PHY_INTF(TMII) | + PHY_INTF(RMII) | PHY_INTF(RGMII) }, + { 7, 2, PHY_INTF(MII) | PHY_INTF(TMII) | + PHY_INTF(RMII) | PHY_INTF(RGMII) }, + }, + .jam_table = rtl8365mb_init_jam_8365mb_vc, + .jam_size = ARRAY_SIZE(rtl8365mb_init_jam_8365mb_vc), + }, +}; + enum rtl8365mb_stp_state { RTL8365MB_STP_STATE_DISABLED = 0, RTL8365MB_STP_STATE_BLOCKING = 1, @@ -559,33 +623,23 @@ struct rtl8365mb_port { }; /** - * struct rtl8365mb - private chip-specific driver data + * struct rtl8365mb - driver private data * @priv: pointer to parent realtek_priv data * @irq: registered IRQ or zero - * @chip_id: chip identifier - * @chip_ver: chip silicon revision - * @port_mask: mask of all ports - * @learn_limit_max: maximum number of L2 addresses the chip can learn + * @chip_info: chip-specific info about the attached switch * @cpu: CPU tagging and CPU port configuration for this chip * @mib_lock: prevent concurrent reads of MIB counters * @ports: per-port data - * @jam_table: chip-specific initialization jam table - * @jam_size: size of the chip's jam table * * Private data for this driver. */ struct rtl8365mb { struct realtek_priv *priv; int irq; - u32 chip_id; - u32 chip_ver; - u32 port_mask; - u32 learn_limit_max; + const struct rtl8365mb_chip_info *chip_info; struct rtl8365mb_cpu cpu; struct mutex mib_lock; struct rtl8365mb_port ports[RTL8365MB_MAX_NUM_PORTS]; - const struct rtl8365mb_jam_tbl_entry *jam_table; - size_t jam_size; }; static int rtl8365mb_phy_poll_busy(struct realtek_priv *priv) @@ -780,6 +834,26 @@ static int rtl8365mb_dsa_phy_write(struct dsa_switch *ds, int phy, int regnum, return rtl8365mb_phy_write(ds->priv, phy, regnum, val); } +static const struct rtl8365mb_extint * +rtl8365mb_get_port_extint(struct realtek_priv *priv, int port) +{ + struct rtl8365mb *mb = priv->chip_data; + int i; + + for (i = 0; i < RTL8365MB_MAX_NUM_EXTINTS; i++) { + const struct rtl8365mb_extint *extint = + &mb->chip_info->extints[i]; + + if (!extint->supported_interfaces) + continue; + + if (extint->port == port) + return extint; + } + + return NULL; +} + static enum dsa_tag_protocol rtl8365mb_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mp) @@ -800,20 +874,17 @@ rtl8365mb_get_tag_protocol(struct dsa_switch *ds, int port, static int rtl8365mb_ext_config_rgmii(struct realtek_priv *priv, int port, phy_interface_t interface) { + const struct rtl8365mb_extint *extint = + rtl8365mb_get_port_extint(priv, port); struct device_node *dn; struct dsa_port *dp; int tx_delay = 0; int rx_delay = 0; - int ext_int; u32 val; int ret; - ext_int = rtl8365mb_extint_port_map[port]; - - if (ext_int <= 0) { - dev_err(priv->dev, "Port %d is not an external interface port\n", port); - return -EINVAL; - } + if (!extint) + return -ENODEV; dp = dsa_to_port(priv->ds, port); dn = dp->dn; @@ -847,7 +918,7 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_priv *priv, int port, tx_delay = val / 2; else dev_warn(priv->dev, - "EXT interface TX delay must be 0 or 2 ns\n"); + "RGMII TX delay must be 0 or 2 ns\n"); } if (!of_property_read_u32(dn, "rx-internal-delay-ps", &val)) { @@ -857,11 +928,11 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_priv *priv, int port, rx_delay = val; else dev_warn(priv->dev, - "EXT interface RX delay must be 0 to 2.1 ns\n"); + "RGMII RX delay must be 0 to 2.1 ns\n"); } ret = regmap_update_bits( - priv->map, RTL8365MB_EXT_RGMXF_REG(ext_int), + priv->map, RTL8365MB_EXT_RGMXF_REG(extint->id), RTL8365MB_EXT_RGMXF_TXDELAY_MASK | RTL8365MB_EXT_RGMXF_RXDELAY_MASK, FIELD_PREP(RTL8365MB_EXT_RGMXF_TXDELAY_MASK, tx_delay) | @@ -870,11 +941,11 @@ static int rtl8365mb_ext_config_rgmii(struct realtek_priv *priv, int port, return ret; ret = regmap_update_bits( - priv->map, RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(ext_int), - RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(ext_int), + priv->map, RTL8365MB_DIGITAL_INTERFACE_SELECT_REG(extint->id), + RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_MASK(extint->id), RTL8365MB_EXT_PORT_MODE_RGMII << RTL8365MB_DIGITAL_INTERFACE_SELECT_MODE_OFFSET( - ext_int)); + extint->id)); if (ret) return ret; @@ -885,21 +956,18 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_priv *priv, int port, bool link, int speed, int duplex, bool tx_pause, bool rx_pause) { + const struct rtl8365mb_extint *extint = + rtl8365mb_get_port_extint(priv, port); u32 r_tx_pause; u32 r_rx_pause; u32 r_duplex; u32 r_speed; u32 r_link; - int ext_int; int val; int ret; - ext_int = rtl8365mb_extint_port_map[port]; - - if (ext_int <= 0) { - dev_err(priv->dev, "Port %d is not an external interface port\n", port); - return -EINVAL; - } + if (!extint) + return -ENODEV; if (link) { /* Force the link up with the desired configuration */ @@ -947,7 +1015,7 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_priv *priv, int port, r_duplex) | FIELD_PREP(RTL8365MB_DIGITAL_INTERFACE_FORCE_SPEED_MASK, r_speed); ret = regmap_write(priv->map, - RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(ext_int), + RTL8365MB_DIGITAL_INTERFACE_FORCE_REG(extint->id), val); if (ret) return ret; @@ -958,7 +1026,13 @@ static int rtl8365mb_ext_config_forcemode(struct realtek_priv *priv, int port, static void rtl8365mb_phylink_get_caps(struct dsa_switch *ds, int port, struct phylink_config *config) { - if (dsa_is_user_port(ds, port)) { + const struct rtl8365mb_extint *extint = + rtl8365mb_get_port_extint(ds->priv, port); + + config->mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE | + MAC_10 | MAC_100 | MAC_1000FD; + + if (!extint) { __set_bit(PHY_INTERFACE_MODE_INTERNAL, config->supported_interfaces); @@ -967,12 +1041,16 @@ static void rtl8365mb_phylink_get_caps(struct dsa_switch *ds, int port, */ __set_bit(PHY_INTERFACE_MODE_GMII, config->supported_interfaces); - } else if (dsa_is_cpu_port(ds, port)) { - phy_interface_set_rgmii(config->supported_interfaces); + return; } - config->mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE | - MAC_10 | MAC_100 | MAC_1000FD; + /* Populate according to the modes supported by _this driver_, + * not necessarily the modes supported by the hardware, some of + * which remain unimplemented. + */ + + if (extint->supported_interfaces & RTL8365MB_PHY_INTERFACE_MODE_RGMII) + phy_interface_set_rgmii(config->supported_interfaces); } static void rtl8365mb_phylink_mac_config(struct dsa_switch *ds, int port, @@ -1091,15 +1169,13 @@ static void rtl8365mb_port_stp_state_set(struct dsa_switch *ds, int port, static int rtl8365mb_port_set_learning(struct realtek_priv *priv, int port, bool enable) { - struct rtl8365mb *mb = priv->chip_data; - /* Enable/disable learning by limiting the number of L2 addresses the * port can learn. Realtek documentation states that a limit of zero * disables learning. When enabling learning, set it to the chip's * maximum. */ return regmap_write(priv->map, RTL8365MB_LUT_PORT_LEARN_LIMIT_REG(port), - enable ? mb->learn_limit_max : 0); + enable ? RTL8365MB_LEARN_LIMIT_MAX : 0); } static int rtl8365mb_port_set_isolation(struct realtek_priv *priv, int port, @@ -1489,13 +1565,10 @@ static irqreturn_t rtl8365mb_irq(int irq, void *data) { struct realtek_priv *priv = data; unsigned long line_changes = 0; - struct rtl8365mb *mb; u32 stat; int line; int ret; - mb = priv->chip_data; - ret = rtl8365mb_get_and_clear_status_reg(priv, RTL8365MB_INTR_STATUS_REG, &stat); if (ret) @@ -1520,7 +1593,7 @@ static irqreturn_t rtl8365mb_irq(int irq, void *data) linkdown_ind = FIELD_GET(RTL8365MB_PORT_LINKDOWN_IND_MASK, val); - line_changes = (linkup_ind | linkdown_ind) & mb->port_mask; + line_changes = linkup_ind | linkdown_ind; } if (!line_changes) @@ -1792,14 +1865,17 @@ static int rtl8365mb_change_tag_protocol(struct dsa_switch *ds, static int rtl8365mb_switch_init(struct realtek_priv *priv) { struct rtl8365mb *mb = priv->chip_data; + const struct rtl8365mb_chip_info *ci; int ret; int i; + ci = mb->chip_info; + /* Do any chip-specific init jam before getting to the common stuff */ - if (mb->jam_table) { - for (i = 0; i < mb->jam_size; i++) { - ret = regmap_write(priv->map, mb->jam_table[i].reg, - mb->jam_table[i].val); + if (ci->jam_table) { + for (i = 0; i < ci->jam_size; i++) { + ret = regmap_write(priv->map, ci->jam_table[i].reg, + ci->jam_table[i].val); if (ret) return ret; } @@ -1972,6 +2048,7 @@ static int rtl8365mb_detect(struct realtek_priv *priv) u32 chip_id; u32 chip_ver; int ret; + int i; ret = rtl8365mb_get_chip_id_and_ver(priv->map, &chip_id, &chip_ver); if (ret) { @@ -1980,54 +2057,32 @@ static int rtl8365mb_detect(struct realtek_priv *priv) return ret; } - switch (chip_id) { - case RTL8365MB_CHIP_ID_8365MB_VC: - switch (chip_ver) { - case RTL8365MB_CHIP_VER_8365MB_VC: - dev_info(priv->dev, - "found an RTL8365MB-VC switch (ver=0x%04x)\n", - chip_ver); - break; - case RTL8365MB_CHIP_VER_8367RB: - dev_info(priv->dev, - "found an RTL8367RB-VB switch (ver=0x%04x)\n", - chip_ver); - break; - case RTL8365MB_CHIP_VER_8367S: - dev_info(priv->dev, - "found an RTL8367S switch (ver=0x%04x)\n", - chip_ver); + for (i = 0; i < ARRAY_SIZE(rtl8365mb_chip_infos); i++) { + const struct rtl8365mb_chip_info *ci = &rtl8365mb_chip_infos[i]; + + if (ci->chip_id == chip_id && ci->chip_ver == chip_ver) { + mb->chip_info = ci; break; - default: - dev_err(priv->dev, "unrecognized switch version (ver=0x%04x)", - chip_ver); - return -ENODEV; } + } - priv->num_ports = RTL8365MB_MAX_NUM_PORTS; - - mb->priv = priv; - mb->chip_id = chip_id; - mb->chip_ver = chip_ver; - mb->port_mask = GENMASK(priv->num_ports - 1, 0); - mb->learn_limit_max = RTL8365MB_LEARN_LIMIT_MAX; - mb->jam_table = rtl8365mb_init_jam_8365mb_vc; - mb->jam_size = ARRAY_SIZE(rtl8365mb_init_jam_8365mb_vc); - - mb->cpu.trap_port = RTL8365MB_MAX_NUM_PORTS; - mb->cpu.insert = RTL8365MB_CPU_INSERT_TO_ALL; - mb->cpu.position = RTL8365MB_CPU_POS_AFTER_SA; - mb->cpu.rx_length = RTL8365MB_CPU_RXLEN_64BYTES; - mb->cpu.format = RTL8365MB_CPU_FORMAT_8BYTES; - - break; - default: + if (!mb->chip_info) { dev_err(priv->dev, - "found an unknown Realtek switch (id=0x%04x, ver=0x%04x)\n", - chip_id, chip_ver); + "unrecognized switch (id=0x%04x, ver=0x%04x)", chip_id, + chip_ver); return -ENODEV; } + dev_info(priv->dev, "found an %s switch\n", mb->chip_info->name); + + priv->num_ports = RTL8365MB_MAX_NUM_PORTS; + mb->priv = priv; + mb->cpu.trap_port = RTL8365MB_MAX_NUM_PORTS; + mb->cpu.insert = RTL8365MB_CPU_INSERT_TO_ALL; + mb->cpu.position = RTL8365MB_CPU_POS_AFTER_SA; + mb->cpu.rx_length = RTL8365MB_CPU_RXLEN_64BYTES; + mb->cpu.format = RTL8365MB_CPU_FORMAT_8BYTES; + return 0; } diff --git a/drivers/net/dsa/rzn1_a5psw.c b/drivers/net/dsa/rzn1_a5psw.c new file mode 100644 index 000000000000..3e910da98ae2 --- /dev/null +++ b/drivers/net/dsa/rzn1_a5psw.c @@ -0,0 +1,1062 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 Schneider-Electric + * + * Clément Léger <clement.leger@bootlin.com> + */ + +#include <linux/clk.h> +#include <linux/etherdevice.h> +#include <linux/if_bridge.h> +#include <linux/if_ether.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_mdio.h> +#include <net/dsa.h> + +#include "rzn1_a5psw.h" + +struct a5psw_stats { + u16 offset; + const char name[ETH_GSTRING_LEN]; +}; + +#define STAT_DESC(_offset) { \ + .offset = A5PSW_##_offset, \ + .name = __stringify(_offset), \ +} + +static const struct a5psw_stats a5psw_stats[] = { + STAT_DESC(aFramesTransmittedOK), + STAT_DESC(aFramesReceivedOK), + STAT_DESC(aFrameCheckSequenceErrors), + STAT_DESC(aAlignmentErrors), + STAT_DESC(aOctetsTransmittedOK), + STAT_DESC(aOctetsReceivedOK), + STAT_DESC(aTxPAUSEMACCtrlFrames), + STAT_DESC(aRxPAUSEMACCtrlFrames), + STAT_DESC(ifInErrors), + STAT_DESC(ifOutErrors), + STAT_DESC(ifInUcastPkts), + STAT_DESC(ifInMulticastPkts), + STAT_DESC(ifInBroadcastPkts), + STAT_DESC(ifOutDiscards), + STAT_DESC(ifOutUcastPkts), + STAT_DESC(ifOutMulticastPkts), + STAT_DESC(ifOutBroadcastPkts), + STAT_DESC(etherStatsDropEvents), + STAT_DESC(etherStatsOctets), + STAT_DESC(etherStatsPkts), + STAT_DESC(etherStatsUndersizePkts), + STAT_DESC(etherStatsOversizePkts), + STAT_DESC(etherStatsPkts64Octets), + STAT_DESC(etherStatsPkts65to127Octets), + STAT_DESC(etherStatsPkts128to255Octets), + STAT_DESC(etherStatsPkts256to511Octets), + STAT_DESC(etherStatsPkts1024to1518Octets), + STAT_DESC(etherStatsPkts1519toXOctets), + STAT_DESC(etherStatsJabbers), + STAT_DESC(etherStatsFragments), + STAT_DESC(VLANReceived), + STAT_DESC(VLANTransmitted), + STAT_DESC(aDeferred), + STAT_DESC(aMultipleCollisions), + STAT_DESC(aSingleCollisions), + STAT_DESC(aLateCollisions), + STAT_DESC(aExcessiveCollisions), + STAT_DESC(aCarrierSenseErrors), +}; + +static void a5psw_reg_writel(struct a5psw *a5psw, int offset, u32 value) +{ + writel(value, a5psw->base + offset); +} + +static u32 a5psw_reg_readl(struct a5psw *a5psw, int offset) +{ + return readl(a5psw->base + offset); +} + +static void a5psw_reg_rmw(struct a5psw *a5psw, int offset, u32 mask, u32 val) +{ + u32 reg; + + spin_lock(&a5psw->reg_lock); + + reg = a5psw_reg_readl(a5psw, offset); + reg &= ~mask; + reg |= val; + a5psw_reg_writel(a5psw, offset, reg); + + spin_unlock(&a5psw->reg_lock); +} + +static enum dsa_tag_protocol a5psw_get_tag_protocol(struct dsa_switch *ds, + int port, + enum dsa_tag_protocol mp) +{ + return DSA_TAG_PROTO_RZN1_A5PSW; +} + +static void a5psw_port_pattern_set(struct a5psw *a5psw, int port, int pattern, + bool enable) +{ + u32 rx_match = 0; + + if (enable) + rx_match |= A5PSW_RXMATCH_CONFIG_PATTERN(pattern); + + a5psw_reg_rmw(a5psw, A5PSW_RXMATCH_CONFIG(port), + A5PSW_RXMATCH_CONFIG_PATTERN(pattern), rx_match); +} + +static void a5psw_port_mgmtfwd_set(struct a5psw *a5psw, int port, bool enable) +{ + /* Enable "management forward" pattern matching, this will forward + * packets from this port only towards the management port and thus + * isolate the port. + */ + a5psw_port_pattern_set(a5psw, port, A5PSW_PATTERN_MGMTFWD, enable); +} + +static void a5psw_port_enable_set(struct a5psw *a5psw, int port, bool enable) +{ + u32 port_ena = 0; + + if (enable) + port_ena |= A5PSW_PORT_ENA_TX_RX(port); + + a5psw_reg_rmw(a5psw, A5PSW_PORT_ENA, A5PSW_PORT_ENA_TX_RX(port), + port_ena); +} + +static int a5psw_lk_execute_ctrl(struct a5psw *a5psw, u32 *ctrl) +{ + int ret; + + a5psw_reg_writel(a5psw, A5PSW_LK_ADDR_CTRL, *ctrl); + + ret = readl_poll_timeout(a5psw->base + A5PSW_LK_ADDR_CTRL, *ctrl, + !(*ctrl & A5PSW_LK_ADDR_CTRL_BUSY), + A5PSW_LK_BUSY_USEC_POLL, A5PSW_CTRL_TIMEOUT); + if (ret) + dev_err(a5psw->dev, "LK_CTRL timeout waiting for BUSY bit\n"); + + return ret; +} + +static void a5psw_port_fdb_flush(struct a5psw *a5psw, int port) +{ + u32 ctrl = A5PSW_LK_ADDR_CTRL_DELETE_PORT | BIT(port); + + mutex_lock(&a5psw->lk_lock); + a5psw_lk_execute_ctrl(a5psw, &ctrl); + mutex_unlock(&a5psw->lk_lock); +} + +static void a5psw_port_authorize_set(struct a5psw *a5psw, int port, + bool authorize) +{ + u32 reg = a5psw_reg_readl(a5psw, A5PSW_AUTH_PORT(port)); + + if (authorize) + reg |= A5PSW_AUTH_PORT_AUTHORIZED; + else + reg &= ~A5PSW_AUTH_PORT_AUTHORIZED; + + a5psw_reg_writel(a5psw, A5PSW_AUTH_PORT(port), reg); +} + +static void a5psw_port_disable(struct dsa_switch *ds, int port) +{ + struct a5psw *a5psw = ds->priv; + + a5psw_port_authorize_set(a5psw, port, false); + a5psw_port_enable_set(a5psw, port, false); +} + +static int a5psw_port_enable(struct dsa_switch *ds, int port, + struct phy_device *phy) +{ + struct a5psw *a5psw = ds->priv; + + a5psw_port_authorize_set(a5psw, port, true); + a5psw_port_enable_set(a5psw, port, true); + + return 0; +} + +static int a5psw_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu) +{ + struct a5psw *a5psw = ds->priv; + + new_mtu += ETH_HLEN + A5PSW_EXTRA_MTU_LEN + ETH_FCS_LEN; + a5psw_reg_writel(a5psw, A5PSW_FRM_LENGTH(port), new_mtu); + + return 0; +} + +static int a5psw_port_max_mtu(struct dsa_switch *ds, int port) +{ + return A5PSW_MAX_MTU; +} + +static void a5psw_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) +{ + unsigned long *intf = config->supported_interfaces; + + config->mac_capabilities = MAC_1000FD; + + if (dsa_is_cpu_port(ds, port)) { + /* GMII is used internally and GMAC2 is connected to the switch + * using 1000Mbps Full-Duplex mode only (cf ethernet manual) + */ + __set_bit(PHY_INTERFACE_MODE_GMII, intf); + } else { + config->mac_capabilities |= MAC_100 | MAC_10; + phy_interface_set_rgmii(intf); + __set_bit(PHY_INTERFACE_MODE_RMII, intf); + __set_bit(PHY_INTERFACE_MODE_MII, intf); + } +} + +static struct phylink_pcs * +a5psw_phylink_mac_select_pcs(struct dsa_switch *ds, int port, + phy_interface_t interface) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + struct a5psw *a5psw = ds->priv; + + if (!dsa_port_is_cpu(dp) && a5psw->pcs[port]) + return a5psw->pcs[port]; + + return NULL; +} + +static void a5psw_phylink_mac_link_down(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface) +{ + struct a5psw *a5psw = ds->priv; + u32 cmd_cfg; + + cmd_cfg = a5psw_reg_readl(a5psw, A5PSW_CMD_CFG(port)); + cmd_cfg &= ~(A5PSW_CMD_CFG_RX_ENA | A5PSW_CMD_CFG_TX_ENA); + a5psw_reg_writel(a5psw, A5PSW_CMD_CFG(port), cmd_cfg); +} + +static void a5psw_phylink_mac_link_up(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface, + struct phy_device *phydev, int speed, + int duplex, bool tx_pause, bool rx_pause) +{ + u32 cmd_cfg = A5PSW_CMD_CFG_RX_ENA | A5PSW_CMD_CFG_TX_ENA | + A5PSW_CMD_CFG_TX_CRC_APPEND; + struct a5psw *a5psw = ds->priv; + + if (speed == SPEED_1000) + cmd_cfg |= A5PSW_CMD_CFG_ETH_SPEED; + + if (duplex == DUPLEX_HALF) + cmd_cfg |= A5PSW_CMD_CFG_HD_ENA; + + cmd_cfg |= A5PSW_CMD_CFG_CNTL_FRM_ENA; + + if (!rx_pause) + cmd_cfg &= ~A5PSW_CMD_CFG_PAUSE_IGNORE; + + a5psw_reg_writel(a5psw, A5PSW_CMD_CFG(port), cmd_cfg); +} + +static int a5psw_set_ageing_time(struct dsa_switch *ds, unsigned int msecs) +{ + struct a5psw *a5psw = ds->priv; + unsigned long rate; + u64 max, tmp; + u32 agetime; + + rate = clk_get_rate(a5psw->clk); + max = div64_ul(((u64)A5PSW_LK_AGETIME_MASK * A5PSW_TABLE_ENTRIES * 1024), + rate) * 1000; + if (msecs > max) + return -EINVAL; + + tmp = div_u64(rate, MSEC_PER_SEC); + agetime = div_u64(msecs * tmp, 1024 * A5PSW_TABLE_ENTRIES); + + a5psw_reg_writel(a5psw, A5PSW_LK_AGETIME, agetime); + + return 0; +} + +static void a5psw_flooding_set_resolution(struct a5psw *a5psw, int port, + bool set) +{ + u8 offsets[] = {A5PSW_UCAST_DEF_MASK, A5PSW_BCAST_DEF_MASK, + A5PSW_MCAST_DEF_MASK}; + int i; + + if (set) + a5psw->bridged_ports |= BIT(port); + else + a5psw->bridged_ports &= ~BIT(port); + + for (i = 0; i < ARRAY_SIZE(offsets); i++) + a5psw_reg_writel(a5psw, offsets[i], a5psw->bridged_ports); +} + +static int a5psw_port_bridge_join(struct dsa_switch *ds, int port, + struct dsa_bridge bridge, + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + struct a5psw *a5psw = ds->priv; + + /* We only support 1 bridge device */ + if (a5psw->br_dev && bridge.dev != a5psw->br_dev) { + NL_SET_ERR_MSG_MOD(extack, + "Forwarding offload supported for a single bridge"); + return -EOPNOTSUPP; + } + + a5psw->br_dev = bridge.dev; + a5psw_flooding_set_resolution(a5psw, port, true); + a5psw_port_mgmtfwd_set(a5psw, port, false); + + return 0; +} + +static void a5psw_port_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_bridge bridge) +{ + struct a5psw *a5psw = ds->priv; + + a5psw_flooding_set_resolution(a5psw, port, false); + a5psw_port_mgmtfwd_set(a5psw, port, true); + + /* No more ports bridged */ + if (a5psw->bridged_ports == BIT(A5PSW_CPU_PORT)) + a5psw->br_dev = NULL; +} + +static void a5psw_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) +{ + u32 mask = A5PSW_INPUT_LEARN_DIS(port) | A5PSW_INPUT_LEARN_BLOCK(port); + struct a5psw *a5psw = ds->priv; + u32 reg = 0; + + switch (state) { + case BR_STATE_DISABLED: + case BR_STATE_BLOCKING: + reg |= A5PSW_INPUT_LEARN_DIS(port); + reg |= A5PSW_INPUT_LEARN_BLOCK(port); + break; + case BR_STATE_LISTENING: + reg |= A5PSW_INPUT_LEARN_DIS(port); + break; + case BR_STATE_LEARNING: + reg |= A5PSW_INPUT_LEARN_BLOCK(port); + break; + case BR_STATE_FORWARDING: + default: + break; + } + + a5psw_reg_rmw(a5psw, A5PSW_INPUT_LEARN, mask, reg); +} + +static void a5psw_port_fast_age(struct dsa_switch *ds, int port) +{ + struct a5psw *a5psw = ds->priv; + + a5psw_port_fdb_flush(a5psw, port); +} + +static int a5psw_lk_execute_lookup(struct a5psw *a5psw, union lk_data *lk_data, + u16 *entry) +{ + u32 ctrl; + int ret; + + a5psw_reg_writel(a5psw, A5PSW_LK_DATA_LO, lk_data->lo); + a5psw_reg_writel(a5psw, A5PSW_LK_DATA_HI, lk_data->hi); + + ctrl = A5PSW_LK_ADDR_CTRL_LOOKUP; + ret = a5psw_lk_execute_ctrl(a5psw, &ctrl); + if (ret) + return ret; + + *entry = ctrl & A5PSW_LK_ADDR_CTRL_ADDRESS; + + return 0; +} + +static int a5psw_port_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct a5psw *a5psw = ds->priv; + union lk_data lk_data = {0}; + bool inc_learncount = false; + int ret = 0; + u16 entry; + u32 reg; + + ether_addr_copy(lk_data.entry.mac, addr); + lk_data.entry.port_mask = BIT(port); + + mutex_lock(&a5psw->lk_lock); + + /* Set the value to be written in the lookup table */ + ret = a5psw_lk_execute_lookup(a5psw, &lk_data, &entry); + if (ret) + goto lk_unlock; + + lk_data.hi = a5psw_reg_readl(a5psw, A5PSW_LK_DATA_HI); + if (!lk_data.entry.valid) { + inc_learncount = true; + /* port_mask set to 0x1f when entry is not valid, clear it */ + lk_data.entry.port_mask = 0; + lk_data.entry.prio = 0; + } + + lk_data.entry.port_mask |= BIT(port); + lk_data.entry.is_static = 1; + lk_data.entry.valid = 1; + + a5psw_reg_writel(a5psw, A5PSW_LK_DATA_HI, lk_data.hi); + + reg = A5PSW_LK_ADDR_CTRL_WRITE | entry; + ret = a5psw_lk_execute_ctrl(a5psw, ®); + if (ret) + goto lk_unlock; + + if (inc_learncount) { + reg = A5PSW_LK_LEARNCOUNT_MODE_INC; + a5psw_reg_writel(a5psw, A5PSW_LK_LEARNCOUNT, reg); + } + +lk_unlock: + mutex_unlock(&a5psw->lk_lock); + + return ret; +} + +static int a5psw_port_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, + struct dsa_db db) +{ + struct a5psw *a5psw = ds->priv; + union lk_data lk_data = {0}; + bool clear = false; + u16 entry; + u32 reg; + int ret; + + ether_addr_copy(lk_data.entry.mac, addr); + + mutex_lock(&a5psw->lk_lock); + + ret = a5psw_lk_execute_lookup(a5psw, &lk_data, &entry); + if (ret) + goto lk_unlock; + + lk_data.hi = a5psw_reg_readl(a5psw, A5PSW_LK_DATA_HI); + + /* Our hardware does not associate any VID to the FDB entries so this + * means that if two entries were added for the same mac but for + * different VID, then, on the deletion of the first one, we would also + * delete the second one. Since there is unfortunately nothing we can do + * about that, do not return an error... + */ + if (!lk_data.entry.valid) + goto lk_unlock; + + lk_data.entry.port_mask &= ~BIT(port); + /* If there is no more port in the mask, clear the entry */ + if (lk_data.entry.port_mask == 0) + clear = true; + + a5psw_reg_writel(a5psw, A5PSW_LK_DATA_HI, lk_data.hi); + + reg = entry; + if (clear) + reg |= A5PSW_LK_ADDR_CTRL_CLEAR; + else + reg |= A5PSW_LK_ADDR_CTRL_WRITE; + + ret = a5psw_lk_execute_ctrl(a5psw, ®); + if (ret) + goto lk_unlock; + + /* Decrement LEARNCOUNT */ + if (clear) { + reg = A5PSW_LK_LEARNCOUNT_MODE_DEC; + a5psw_reg_writel(a5psw, A5PSW_LK_LEARNCOUNT, reg); + } + +lk_unlock: + mutex_unlock(&a5psw->lk_lock); + + return ret; +} + +static int a5psw_port_fdb_dump(struct dsa_switch *ds, int port, + dsa_fdb_dump_cb_t *cb, void *data) +{ + struct a5psw *a5psw = ds->priv; + union lk_data lk_data; + int i = 0, ret = 0; + u32 reg; + + mutex_lock(&a5psw->lk_lock); + + for (i = 0; i < A5PSW_TABLE_ENTRIES; i++) { + reg = A5PSW_LK_ADDR_CTRL_READ | A5PSW_LK_ADDR_CTRL_WAIT | i; + + ret = a5psw_lk_execute_ctrl(a5psw, ®); + if (ret) + goto out_unlock; + + lk_data.hi = a5psw_reg_readl(a5psw, A5PSW_LK_DATA_HI); + /* If entry is not valid or does not contain the port, skip */ + if (!lk_data.entry.valid || + !(lk_data.entry.port_mask & BIT(port))) + continue; + + lk_data.lo = a5psw_reg_readl(a5psw, A5PSW_LK_DATA_LO); + + ret = cb(lk_data.entry.mac, 0, lk_data.entry.is_static, data); + if (ret) + goto out_unlock; + } + +out_unlock: + mutex_unlock(&a5psw->lk_lock); + + return ret; +} + +static u64 a5psw_read_stat(struct a5psw *a5psw, u32 offset, int port) +{ + u32 reg_lo, reg_hi; + + reg_lo = a5psw_reg_readl(a5psw, offset + A5PSW_PORT_OFFSET(port)); + /* A5PSW_STATS_HIWORD is latched on stat read */ + reg_hi = a5psw_reg_readl(a5psw, A5PSW_STATS_HIWORD); + + return ((u64)reg_hi << 32) | reg_lo; +} + +static void a5psw_get_strings(struct dsa_switch *ds, int port, u32 stringset, + uint8_t *data) +{ + unsigned int u; + + if (stringset != ETH_SS_STATS) + return; + + for (u = 0; u < ARRAY_SIZE(a5psw_stats); u++) { + memcpy(data + u * ETH_GSTRING_LEN, a5psw_stats[u].name, + ETH_GSTRING_LEN); + } +} + +static void a5psw_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data) +{ + struct a5psw *a5psw = ds->priv; + unsigned int u; + + for (u = 0; u < ARRAY_SIZE(a5psw_stats); u++) + data[u] = a5psw_read_stat(a5psw, a5psw_stats[u].offset, port); +} + +static int a5psw_get_sset_count(struct dsa_switch *ds, int port, int sset) +{ + if (sset != ETH_SS_STATS) + return 0; + + return ARRAY_SIZE(a5psw_stats); +} + +static void a5psw_get_eth_mac_stats(struct dsa_switch *ds, int port, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct a5psw *a5psw = ds->priv; + +#define RD(name) a5psw_read_stat(a5psw, A5PSW_##name, port) + mac_stats->FramesTransmittedOK = RD(aFramesTransmittedOK); + mac_stats->SingleCollisionFrames = RD(aSingleCollisions); + mac_stats->MultipleCollisionFrames = RD(aMultipleCollisions); + mac_stats->FramesReceivedOK = RD(aFramesReceivedOK); + mac_stats->FrameCheckSequenceErrors = RD(aFrameCheckSequenceErrors); + mac_stats->AlignmentErrors = RD(aAlignmentErrors); + mac_stats->OctetsTransmittedOK = RD(aOctetsTransmittedOK); + mac_stats->FramesWithDeferredXmissions = RD(aDeferred); + mac_stats->LateCollisions = RD(aLateCollisions); + mac_stats->FramesAbortedDueToXSColls = RD(aExcessiveCollisions); + mac_stats->FramesLostDueToIntMACXmitError = RD(ifOutErrors); + mac_stats->CarrierSenseErrors = RD(aCarrierSenseErrors); + mac_stats->OctetsReceivedOK = RD(aOctetsReceivedOK); + mac_stats->FramesLostDueToIntMACRcvError = RD(ifInErrors); + mac_stats->MulticastFramesXmittedOK = RD(ifOutMulticastPkts); + mac_stats->BroadcastFramesXmittedOK = RD(ifOutBroadcastPkts); + mac_stats->FramesWithExcessiveDeferral = RD(aDeferred); + mac_stats->MulticastFramesReceivedOK = RD(ifInMulticastPkts); + mac_stats->BroadcastFramesReceivedOK = RD(ifInBroadcastPkts); +#undef RD +} + +static const struct ethtool_rmon_hist_range a5psw_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, A5PSW_MAX_MTU }, + {} +}; + +static void a5psw_get_rmon_stats(struct dsa_switch *ds, int port, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct a5psw *a5psw = ds->priv; + +#define RD(name) a5psw_read_stat(a5psw, A5PSW_##name, port) + rmon_stats->undersize_pkts = RD(etherStatsUndersizePkts); + rmon_stats->oversize_pkts = RD(etherStatsOversizePkts); + rmon_stats->fragments = RD(etherStatsFragments); + rmon_stats->jabbers = RD(etherStatsJabbers); + rmon_stats->hist[0] = RD(etherStatsPkts64Octets); + rmon_stats->hist[1] = RD(etherStatsPkts65to127Octets); + rmon_stats->hist[2] = RD(etherStatsPkts128to255Octets); + rmon_stats->hist[3] = RD(etherStatsPkts256to511Octets); + rmon_stats->hist[4] = RD(etherStatsPkts512to1023Octets); + rmon_stats->hist[5] = RD(etherStatsPkts1024to1518Octets); + rmon_stats->hist[6] = RD(etherStatsPkts1519toXOctets); +#undef RD + + *ranges = a5psw_rmon_ranges; +} + +static void a5psw_get_eth_ctrl_stats(struct dsa_switch *ds, int port, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct a5psw *a5psw = ds->priv; + u64 stat; + + stat = a5psw_read_stat(a5psw, A5PSW_aTxPAUSEMACCtrlFrames, port); + ctrl_stats->MACControlFramesTransmitted = stat; + stat = a5psw_read_stat(a5psw, A5PSW_aRxPAUSEMACCtrlFrames, port); + ctrl_stats->MACControlFramesReceived = stat; +} + +static int a5psw_setup(struct dsa_switch *ds) +{ + struct a5psw *a5psw = ds->priv; + int port, vlan, ret; + struct dsa_port *dp; + u32 reg; + + /* Validate that there is only 1 CPU port with index A5PSW_CPU_PORT */ + dsa_switch_for_each_cpu_port(dp, ds) { + if (dp->index != A5PSW_CPU_PORT) { + dev_err(a5psw->dev, "Invalid CPU port\n"); + return -EINVAL; + } + } + + /* Configure management port */ + reg = A5PSW_CPU_PORT | A5PSW_MGMT_CFG_DISCARD; + a5psw_reg_writel(a5psw, A5PSW_MGMT_CFG, reg); + + /* Set pattern 0 to forward all frame to mgmt port */ + a5psw_reg_writel(a5psw, A5PSW_PATTERN_CTRL(A5PSW_PATTERN_MGMTFWD), + A5PSW_PATTERN_CTRL_MGMTFWD); + + /* Enable port tagging */ + reg = FIELD_PREP(A5PSW_MGMT_TAG_CFG_TAGFIELD, ETH_P_DSA_A5PSW); + reg |= A5PSW_MGMT_TAG_CFG_ENABLE | A5PSW_MGMT_TAG_CFG_ALL_FRAMES; + a5psw_reg_writel(a5psw, A5PSW_MGMT_TAG_CFG, reg); + + /* Enable normal switch operation */ + reg = A5PSW_LK_ADDR_CTRL_BLOCKING | A5PSW_LK_ADDR_CTRL_LEARNING | + A5PSW_LK_ADDR_CTRL_AGEING | A5PSW_LK_ADDR_CTRL_ALLOW_MIGR | + A5PSW_LK_ADDR_CTRL_CLEAR_TABLE; + a5psw_reg_writel(a5psw, A5PSW_LK_CTRL, reg); + + ret = readl_poll_timeout(a5psw->base + A5PSW_LK_CTRL, reg, + !(reg & A5PSW_LK_ADDR_CTRL_CLEAR_TABLE), + A5PSW_LK_BUSY_USEC_POLL, A5PSW_CTRL_TIMEOUT); + if (ret) { + dev_err(a5psw->dev, "Failed to clear lookup table\n"); + return ret; + } + + /* Reset learn count to 0 */ + reg = A5PSW_LK_LEARNCOUNT_MODE_SET; + a5psw_reg_writel(a5psw, A5PSW_LK_LEARNCOUNT, reg); + + /* Clear VLAN resource table */ + reg = A5PSW_VLAN_RES_WR_PORTMASK | A5PSW_VLAN_RES_WR_TAGMASK; + for (vlan = 0; vlan < A5PSW_VLAN_COUNT; vlan++) + a5psw_reg_writel(a5psw, A5PSW_VLAN_RES(vlan), reg); + + /* Reset all ports */ + dsa_switch_for_each_port(dp, ds) { + port = dp->index; + + /* Reset the port */ + a5psw_reg_writel(a5psw, A5PSW_CMD_CFG(port), + A5PSW_CMD_CFG_SW_RESET); + + /* Enable only CPU port */ + a5psw_port_enable_set(a5psw, port, dsa_port_is_cpu(dp)); + + if (dsa_port_is_unused(dp)) + continue; + + /* Enable egress flooding for CPU port */ + if (dsa_port_is_cpu(dp)) + a5psw_flooding_set_resolution(a5psw, port, true); + + /* Enable management forward only for user ports */ + if (dsa_port_is_user(dp)) + a5psw_port_mgmtfwd_set(a5psw, port, true); + } + + return 0; +} + +static const struct dsa_switch_ops a5psw_switch_ops = { + .get_tag_protocol = a5psw_get_tag_protocol, + .setup = a5psw_setup, + .port_disable = a5psw_port_disable, + .port_enable = a5psw_port_enable, + .phylink_get_caps = a5psw_phylink_get_caps, + .phylink_mac_select_pcs = a5psw_phylink_mac_select_pcs, + .phylink_mac_link_down = a5psw_phylink_mac_link_down, + .phylink_mac_link_up = a5psw_phylink_mac_link_up, + .port_change_mtu = a5psw_port_change_mtu, + .port_max_mtu = a5psw_port_max_mtu, + .get_sset_count = a5psw_get_sset_count, + .get_strings = a5psw_get_strings, + .get_ethtool_stats = a5psw_get_ethtool_stats, + .get_eth_mac_stats = a5psw_get_eth_mac_stats, + .get_eth_ctrl_stats = a5psw_get_eth_ctrl_stats, + .get_rmon_stats = a5psw_get_rmon_stats, + .set_ageing_time = a5psw_set_ageing_time, + .port_bridge_join = a5psw_port_bridge_join, + .port_bridge_leave = a5psw_port_bridge_leave, + .port_stp_state_set = a5psw_port_stp_state_set, + .port_fast_age = a5psw_port_fast_age, + .port_fdb_add = a5psw_port_fdb_add, + .port_fdb_del = a5psw_port_fdb_del, + .port_fdb_dump = a5psw_port_fdb_dump, +}; + +static int a5psw_mdio_wait_busy(struct a5psw *a5psw) +{ + u32 status; + int err; + + err = readl_poll_timeout(a5psw->base + A5PSW_MDIO_CFG_STATUS, status, + !(status & A5PSW_MDIO_CFG_STATUS_BUSY), 10, + 1000 * USEC_PER_MSEC); + if (err) + dev_err(a5psw->dev, "MDIO command timeout\n"); + + return err; +} + +static int a5psw_mdio_read(struct mii_bus *bus, int phy_id, int phy_reg) +{ + struct a5psw *a5psw = bus->priv; + u32 cmd, status; + int ret; + + if (phy_reg & MII_ADDR_C45) + return -EOPNOTSUPP; + + cmd = A5PSW_MDIO_COMMAND_READ; + cmd |= FIELD_PREP(A5PSW_MDIO_COMMAND_REG_ADDR, phy_reg); + cmd |= FIELD_PREP(A5PSW_MDIO_COMMAND_PHY_ADDR, phy_id); + + a5psw_reg_writel(a5psw, A5PSW_MDIO_COMMAND, cmd); + + ret = a5psw_mdio_wait_busy(a5psw); + if (ret) + return ret; + + ret = a5psw_reg_readl(a5psw, A5PSW_MDIO_DATA) & A5PSW_MDIO_DATA_MASK; + + status = a5psw_reg_readl(a5psw, A5PSW_MDIO_CFG_STATUS); + if (status & A5PSW_MDIO_CFG_STATUS_READERR) + return -EIO; + + return ret; +} + +static int a5psw_mdio_write(struct mii_bus *bus, int phy_id, int phy_reg, + u16 phy_data) +{ + struct a5psw *a5psw = bus->priv; + u32 cmd; + + if (phy_reg & MII_ADDR_C45) + return -EOPNOTSUPP; + + cmd = FIELD_PREP(A5PSW_MDIO_COMMAND_REG_ADDR, phy_reg); + cmd |= FIELD_PREP(A5PSW_MDIO_COMMAND_PHY_ADDR, phy_id); + + a5psw_reg_writel(a5psw, A5PSW_MDIO_COMMAND, cmd); + a5psw_reg_writel(a5psw, A5PSW_MDIO_DATA, phy_data); + + return a5psw_mdio_wait_busy(a5psw); +} + +static int a5psw_mdio_config(struct a5psw *a5psw, u32 mdio_freq) +{ + unsigned long rate; + unsigned long div; + u32 cfgstatus; + + rate = clk_get_rate(a5psw->hclk); + div = ((rate / mdio_freq) / 2); + if (div > FIELD_MAX(A5PSW_MDIO_CFG_STATUS_CLKDIV) || + div < A5PSW_MDIO_CLK_DIV_MIN) { + dev_err(a5psw->dev, "MDIO clock div %ld out of range\n", div); + return -ERANGE; + } + + cfgstatus = FIELD_PREP(A5PSW_MDIO_CFG_STATUS_CLKDIV, div); + + a5psw_reg_writel(a5psw, A5PSW_MDIO_CFG_STATUS, cfgstatus); + + return 0; +} + +static int a5psw_probe_mdio(struct a5psw *a5psw, struct device_node *node) +{ + struct device *dev = a5psw->dev; + struct mii_bus *bus; + u32 mdio_freq; + int ret; + + if (of_property_read_u32(node, "clock-frequency", &mdio_freq)) + mdio_freq = A5PSW_MDIO_DEF_FREQ; + + ret = a5psw_mdio_config(a5psw, mdio_freq); + if (ret) + return ret; + + bus = devm_mdiobus_alloc(dev); + if (!bus) + return -ENOMEM; + + bus->name = "a5psw_mdio"; + bus->read = a5psw_mdio_read; + bus->write = a5psw_mdio_write; + bus->priv = a5psw; + bus->parent = dev; + snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev)); + + a5psw->mii_bus = bus; + + return devm_of_mdiobus_register(dev, bus, node); +} + +static void a5psw_pcs_free(struct a5psw *a5psw) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(a5psw->pcs); i++) { + if (a5psw->pcs[i]) + miic_destroy(a5psw->pcs[i]); + } +} + +static int a5psw_pcs_get(struct a5psw *a5psw) +{ + struct device_node *ports, *port, *pcs_node; + struct phylink_pcs *pcs; + int ret; + u32 reg; + + ports = of_get_child_by_name(a5psw->dev->of_node, "ethernet-ports"); + if (!ports) + return -EINVAL; + + for_each_available_child_of_node(ports, port) { + pcs_node = of_parse_phandle(port, "pcs-handle", 0); + if (!pcs_node) + continue; + + if (of_property_read_u32(port, "reg", ®)) { + ret = -EINVAL; + goto free_pcs; + } + + if (reg >= ARRAY_SIZE(a5psw->pcs)) { + ret = -ENODEV; + goto free_pcs; + } + + pcs = miic_create(a5psw->dev, pcs_node); + if (IS_ERR(pcs)) { + dev_err(a5psw->dev, "Failed to create PCS for port %d\n", + reg); + ret = PTR_ERR(pcs); + goto free_pcs; + } + + a5psw->pcs[reg] = pcs; + } + of_node_put(ports); + + return 0; + +free_pcs: + of_node_put(port); + of_node_put(ports); + a5psw_pcs_free(a5psw); + + return ret; +} + +static int a5psw_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *mdio; + struct dsa_switch *ds; + struct a5psw *a5psw; + int ret; + + a5psw = devm_kzalloc(dev, sizeof(*a5psw), GFP_KERNEL); + if (!a5psw) + return -ENOMEM; + + a5psw->dev = dev; + mutex_init(&a5psw->lk_lock); + spin_lock_init(&a5psw->reg_lock); + a5psw->base = devm_platform_ioremap_resource(pdev, 0); + if (!a5psw->base) + return -EINVAL; + + ret = a5psw_pcs_get(a5psw); + if (ret) + return ret; + + a5psw->hclk = devm_clk_get(dev, "hclk"); + if (IS_ERR(a5psw->hclk)) { + dev_err(dev, "failed get hclk clock\n"); + ret = PTR_ERR(a5psw->hclk); + goto free_pcs; + } + + a5psw->clk = devm_clk_get(dev, "clk"); + if (IS_ERR(a5psw->clk)) { + dev_err(dev, "failed get clk_switch clock\n"); + ret = PTR_ERR(a5psw->clk); + goto free_pcs; + } + + ret = clk_prepare_enable(a5psw->clk); + if (ret) + goto free_pcs; + + ret = clk_prepare_enable(a5psw->hclk); + if (ret) + goto clk_disable; + + mdio = of_get_child_by_name(dev->of_node, "mdio"); + if (of_device_is_available(mdio)) { + ret = a5psw_probe_mdio(a5psw, mdio); + if (ret) { + of_node_put(mdio); + dev_err(dev, "Failed to register MDIO: %d\n", ret); + goto hclk_disable; + } + } + + of_node_put(mdio); + + ds = &a5psw->ds; + ds->dev = dev; + ds->num_ports = A5PSW_PORTS_NUM; + ds->ops = &a5psw_switch_ops; + ds->priv = a5psw; + + ret = dsa_register_switch(ds); + if (ret) { + dev_err(dev, "Failed to register DSA switch: %d\n", ret); + goto hclk_disable; + } + + return 0; + +hclk_disable: + clk_disable_unprepare(a5psw->hclk); +clk_disable: + clk_disable_unprepare(a5psw->clk); +free_pcs: + a5psw_pcs_free(a5psw); + + return ret; +} + +static int a5psw_remove(struct platform_device *pdev) +{ + struct a5psw *a5psw = platform_get_drvdata(pdev); + + if (!a5psw) + return 0; + + dsa_unregister_switch(&a5psw->ds); + a5psw_pcs_free(a5psw); + clk_disable_unprepare(a5psw->hclk); + clk_disable_unprepare(a5psw->clk); + + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static void a5psw_shutdown(struct platform_device *pdev) +{ + struct a5psw *a5psw = platform_get_drvdata(pdev); + + if (!a5psw) + return; + + dsa_switch_shutdown(&a5psw->ds); + + platform_set_drvdata(pdev, NULL); +} + +static const struct of_device_id a5psw_of_mtable[] = { + { .compatible = "renesas,rzn1-a5psw", }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, a5psw_of_mtable); + +static struct platform_driver a5psw_driver = { + .driver = { + .name = "rzn1_a5psw", + .of_match_table = of_match_ptr(a5psw_of_mtable), + }, + .probe = a5psw_probe, + .remove = a5psw_remove, + .shutdown = a5psw_shutdown, +}; +module_platform_driver(a5psw_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Renesas RZ/N1 Advanced 5-port Switch driver"); +MODULE_AUTHOR("Clément Léger <clement.leger@bootlin.com>"); diff --git a/drivers/net/dsa/rzn1_a5psw.h b/drivers/net/dsa/rzn1_a5psw.h new file mode 100644 index 000000000000..c67abd49c013 --- /dev/null +++ b/drivers/net/dsa/rzn1_a5psw.h @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 Schneider Electric + * + * Clément Léger <clement.leger@bootlin.com> + */ + +#include <linux/clk.h> +#include <linux/debugfs.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_mdio.h> +#include <linux/platform_device.h> +#include <linux/pcs-rzn1-miic.h> +#include <net/dsa.h> + +#define A5PSW_REVISION 0x0 +#define A5PSW_PORT_OFFSET(port) (0x400 * (port)) + +#define A5PSW_PORT_ENA 0x8 +#define A5PSW_PORT_ENA_RX_SHIFT 16 +#define A5PSW_PORT_ENA_TX_RX(port) (BIT((port) + A5PSW_PORT_ENA_RX_SHIFT) | \ + BIT(port)) +#define A5PSW_UCAST_DEF_MASK 0xC + +#define A5PSW_VLAN_VERIFY 0x10 +#define A5PSW_VLAN_VERI_SHIFT 0 +#define A5PSW_VLAN_DISC_SHIFT 16 + +#define A5PSW_BCAST_DEF_MASK 0x14 +#define A5PSW_MCAST_DEF_MASK 0x18 + +#define A5PSW_INPUT_LEARN 0x1C +#define A5PSW_INPUT_LEARN_DIS(p) BIT((p) + 16) +#define A5PSW_INPUT_LEARN_BLOCK(p) BIT(p) + +#define A5PSW_MGMT_CFG 0x20 +#define A5PSW_MGMT_CFG_DISCARD BIT(7) + +#define A5PSW_MODE_CFG 0x24 +#define A5PSW_MODE_STATS_RESET BIT(31) + +#define A5PSW_VLAN_IN_MODE 0x28 +#define A5PSW_VLAN_IN_MODE_PORT_SHIFT(port) ((port) * 2) +#define A5PSW_VLAN_IN_MODE_PORT(port) (GENMASK(1, 0) << \ + A5PSW_VLAN_IN_MODE_PORT_SHIFT(port)) +#define A5PSW_VLAN_IN_MODE_SINGLE_PASSTHROUGH 0x0 +#define A5PSW_VLAN_IN_MODE_SINGLE_REPLACE 0x1 +#define A5PSW_VLAN_IN_MODE_TAG_ALWAYS 0x2 + +#define A5PSW_VLAN_OUT_MODE 0x2C +#define A5PSW_VLAN_OUT_MODE_PORT(port) (GENMASK(1, 0) << ((port) * 2)) +#define A5PSW_VLAN_OUT_MODE_DIS 0x0 +#define A5PSW_VLAN_OUT_MODE_STRIP 0x1 +#define A5PSW_VLAN_OUT_MODE_TAG_THROUGH 0x2 +#define A5PSW_VLAN_OUT_MODE_TRANSPARENT 0x3 + +#define A5PSW_VLAN_IN_MODE_ENA 0x30 +#define A5PSW_VLAN_TAG_ID 0x34 + +#define A5PSW_SYSTEM_TAGINFO(port) (0x200 + A5PSW_PORT_OFFSET(port)) + +#define A5PSW_AUTH_PORT(port) (0x240 + 4 * (port)) +#define A5PSW_AUTH_PORT_AUTHORIZED BIT(0) + +#define A5PSW_VLAN_RES(entry) (0x280 + 4 * (entry)) +#define A5PSW_VLAN_RES_WR_PORTMASK BIT(30) +#define A5PSW_VLAN_RES_WR_TAGMASK BIT(29) +#define A5PSW_VLAN_RES_RD_TAGMASK BIT(28) +#define A5PSW_VLAN_RES_ID GENMASK(16, 5) +#define A5PSW_VLAN_RES_PORTMASK GENMASK(4, 0) + +#define A5PSW_RXMATCH_CONFIG(port) (0x3e80 + 4 * (port)) +#define A5PSW_RXMATCH_CONFIG_PATTERN(p) BIT(p) + +#define A5PSW_PATTERN_CTRL(p) (0x3eb0 + 4 * (p)) +#define A5PSW_PATTERN_CTRL_MGMTFWD BIT(1) + +#define A5PSW_LK_CTRL 0x400 +#define A5PSW_LK_ADDR_CTRL_BLOCKING BIT(0) +#define A5PSW_LK_ADDR_CTRL_LEARNING BIT(1) +#define A5PSW_LK_ADDR_CTRL_AGEING BIT(2) +#define A5PSW_LK_ADDR_CTRL_ALLOW_MIGR BIT(3) +#define A5PSW_LK_ADDR_CTRL_CLEAR_TABLE BIT(6) + +#define A5PSW_LK_ADDR_CTRL 0x408 +#define A5PSW_LK_ADDR_CTRL_BUSY BIT(31) +#define A5PSW_LK_ADDR_CTRL_DELETE_PORT BIT(30) +#define A5PSW_LK_ADDR_CTRL_CLEAR BIT(29) +#define A5PSW_LK_ADDR_CTRL_LOOKUP BIT(28) +#define A5PSW_LK_ADDR_CTRL_WAIT BIT(27) +#define A5PSW_LK_ADDR_CTRL_READ BIT(26) +#define A5PSW_LK_ADDR_CTRL_WRITE BIT(25) +#define A5PSW_LK_ADDR_CTRL_ADDRESS GENMASK(12, 0) + +#define A5PSW_LK_DATA_LO 0x40C +#define A5PSW_LK_DATA_HI 0x410 +#define A5PSW_LK_DATA_HI_VALID BIT(16) +#define A5PSW_LK_DATA_HI_PORT BIT(16) + +#define A5PSW_LK_LEARNCOUNT 0x418 +#define A5PSW_LK_LEARNCOUNT_COUNT GENMASK(13, 0) +#define A5PSW_LK_LEARNCOUNT_MODE GENMASK(31, 30) +#define A5PSW_LK_LEARNCOUNT_MODE_SET 0x0 +#define A5PSW_LK_LEARNCOUNT_MODE_INC 0x1 +#define A5PSW_LK_LEARNCOUNT_MODE_DEC 0x2 + +#define A5PSW_MGMT_TAG_CFG 0x480 +#define A5PSW_MGMT_TAG_CFG_TAGFIELD GENMASK(31, 16) +#define A5PSW_MGMT_TAG_CFG_ALL_FRAMES BIT(1) +#define A5PSW_MGMT_TAG_CFG_ENABLE BIT(0) + +#define A5PSW_LK_AGETIME 0x41C +#define A5PSW_LK_AGETIME_MASK GENMASK(23, 0) + +#define A5PSW_MDIO_CFG_STATUS 0x700 +#define A5PSW_MDIO_CFG_STATUS_CLKDIV GENMASK(15, 7) +#define A5PSW_MDIO_CFG_STATUS_READERR BIT(1) +#define A5PSW_MDIO_CFG_STATUS_BUSY BIT(0) + +#define A5PSW_MDIO_COMMAND 0x704 +/* Register is named TRAININIT in datasheet and should be set when reading */ +#define A5PSW_MDIO_COMMAND_READ BIT(15) +#define A5PSW_MDIO_COMMAND_PHY_ADDR GENMASK(9, 5) +#define A5PSW_MDIO_COMMAND_REG_ADDR GENMASK(4, 0) + +#define A5PSW_MDIO_DATA 0x708 +#define A5PSW_MDIO_DATA_MASK GENMASK(15, 0) + +#define A5PSW_CMD_CFG(port) (0x808 + A5PSW_PORT_OFFSET(port)) +#define A5PSW_CMD_CFG_CNTL_FRM_ENA BIT(23) +#define A5PSW_CMD_CFG_SW_RESET BIT(13) +#define A5PSW_CMD_CFG_TX_CRC_APPEND BIT(11) +#define A5PSW_CMD_CFG_HD_ENA BIT(10) +#define A5PSW_CMD_CFG_PAUSE_IGNORE BIT(8) +#define A5PSW_CMD_CFG_CRC_FWD BIT(6) +#define A5PSW_CMD_CFG_ETH_SPEED BIT(3) +#define A5PSW_CMD_CFG_RX_ENA BIT(1) +#define A5PSW_CMD_CFG_TX_ENA BIT(0) + +#define A5PSW_FRM_LENGTH(port) (0x814 + A5PSW_PORT_OFFSET(port)) +#define A5PSW_FRM_LENGTH_MASK GENMASK(13, 0) + +#define A5PSW_STATUS(port) (0x840 + A5PSW_PORT_OFFSET(port)) + +#define A5PSW_STATS_HIWORD 0x900 + +/* Stats */ +#define A5PSW_aFramesTransmittedOK 0x868 +#define A5PSW_aFramesReceivedOK 0x86C +#define A5PSW_aFrameCheckSequenceErrors 0x870 +#define A5PSW_aAlignmentErrors 0x874 +#define A5PSW_aOctetsTransmittedOK 0x878 +#define A5PSW_aOctetsReceivedOK 0x87C +#define A5PSW_aTxPAUSEMACCtrlFrames 0x880 +#define A5PSW_aRxPAUSEMACCtrlFrames 0x884 +/* If */ +#define A5PSW_ifInErrors 0x888 +#define A5PSW_ifOutErrors 0x88C +#define A5PSW_ifInUcastPkts 0x890 +#define A5PSW_ifInMulticastPkts 0x894 +#define A5PSW_ifInBroadcastPkts 0x898 +#define A5PSW_ifOutDiscards 0x89C +#define A5PSW_ifOutUcastPkts 0x8A0 +#define A5PSW_ifOutMulticastPkts 0x8A4 +#define A5PSW_ifOutBroadcastPkts 0x8A8 +/* Ether */ +#define A5PSW_etherStatsDropEvents 0x8AC +#define A5PSW_etherStatsOctets 0x8B0 +#define A5PSW_etherStatsPkts 0x8B4 +#define A5PSW_etherStatsUndersizePkts 0x8B8 +#define A5PSW_etherStatsOversizePkts 0x8BC +#define A5PSW_etherStatsPkts64Octets 0x8C0 +#define A5PSW_etherStatsPkts65to127Octets 0x8C4 +#define A5PSW_etherStatsPkts128to255Octets 0x8C8 +#define A5PSW_etherStatsPkts256to511Octets 0x8CC +#define A5PSW_etherStatsPkts512to1023Octets 0x8D0 +#define A5PSW_etherStatsPkts1024to1518Octets 0x8D4 +#define A5PSW_etherStatsPkts1519toXOctets 0x8D8 +#define A5PSW_etherStatsJabbers 0x8DC +#define A5PSW_etherStatsFragments 0x8E0 + +#define A5PSW_VLANReceived 0x8E8 +#define A5PSW_VLANTransmitted 0x8EC + +#define A5PSW_aDeferred 0x910 +#define A5PSW_aMultipleCollisions 0x914 +#define A5PSW_aSingleCollisions 0x918 +#define A5PSW_aLateCollisions 0x91C +#define A5PSW_aExcessiveCollisions 0x920 +#define A5PSW_aCarrierSenseErrors 0x924 + +#define A5PSW_VLAN_TAG(prio, id) (((prio) << 12) | (id)) +#define A5PSW_PORTS_NUM 5 +#define A5PSW_CPU_PORT (A5PSW_PORTS_NUM - 1) +#define A5PSW_MDIO_DEF_FREQ 2500000 +#define A5PSW_MDIO_TIMEOUT 100 +#define A5PSW_JUMBO_LEN (10 * SZ_1K) +#define A5PSW_MDIO_CLK_DIV_MIN 5 +#define A5PSW_TAG_LEN 8 +#define A5PSW_VLAN_COUNT 32 + +/* Ensure enough space for 2 VLAN tags */ +#define A5PSW_EXTRA_MTU_LEN (A5PSW_TAG_LEN + 8) +#define A5PSW_MAX_MTU (A5PSW_JUMBO_LEN - A5PSW_EXTRA_MTU_LEN) + +#define A5PSW_PATTERN_MGMTFWD 0 + +#define A5PSW_LK_BUSY_USEC_POLL 10 +#define A5PSW_CTRL_TIMEOUT 1000 +#define A5PSW_TABLE_ENTRIES 8192 + +struct fdb_entry { + u8 mac[ETH_ALEN]; + u16 valid:1; + u16 is_static:1; + u16 prio:3; + u16 port_mask:5; + u16 reserved:6; +} __packed; + +union lk_data { + struct { + u32 lo; + u32 hi; + }; + struct fdb_entry entry; +}; + +/** + * struct a5psw - switch struct + * @base: Base address of the switch + * @hclk: hclk_switch clock + * @clk: clk_switch clock + * @dev: Device associated to the switch + * @mii_bus: MDIO bus struct + * @mdio_freq: MDIO bus frequency requested + * @pcs: Array of PCS connected to the switch ports (not for the CPU) + * @ds: DSA switch struct + * @stats_lock: lock to access statistics (shared HI counter) + * @lk_lock: Lock for the lookup table + * @reg_lock: Lock for register read-modify-write operation + * @bridged_ports: Mask of ports that are bridged and should be flooded + * @br_dev: Bridge net device + */ +struct a5psw { + void __iomem *base; + struct clk *hclk; + struct clk *clk; + struct device *dev; + struct mii_bus *mii_bus; + struct phylink_pcs *pcs[A5PSW_PORTS_NUM - 1]; + struct dsa_switch ds; + struct mutex lk_lock; + spinlock_t reg_lock; + u32 bridged_ports; + struct net_device *br_dev; +}; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 72b6fc1932b5..b253e27bcfb4 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2330,7 +2330,7 @@ int sja1105_static_config_reload(struct sja1105_private *priv, else mode = MLO_AN_PHY; - rc = xpcs_do_config(xpcs, priv->phy_mode[i], mode); + rc = xpcs_do_config(xpcs, priv->phy_mode[i], mode, NULL); if (rc < 0) goto out; diff --git a/drivers/net/eql.c b/drivers/net/eql.c index 557ca8ff9dec..ca3e4700a813 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -225,7 +225,7 @@ static void eql_kill_one_slave(slave_queue_t *queue, slave_t *slave) list_del(&slave->list); queue->num_slaves--; slave->dev->flags &= ~IFF_SLAVE; - dev_put_track(slave->dev, &slave->dev_tracker); + netdev_put(slave->dev, &slave->dev_tracker); kfree(slave); } @@ -399,7 +399,7 @@ static int __eql_insert_slave(slave_queue_t *queue, slave_t *slave) if (duplicate_slave) eql_kill_one_slave(queue, duplicate_slave); - dev_hold_track(slave->dev, &slave->dev_tracker, GFP_ATOMIC); + netdev_hold(slave->dev, &slave->dev_tracker, GFP_ATOMIC); list_add(&slave->list, &queue->all_slaves); queue->num_slaves++; slave->dev->flags |= IFF_SLAVE; diff --git a/drivers/net/ethernet/altera/altera_utils.h b/drivers/net/ethernet/altera/altera_utils.h index b7d772f2dcbb..3c2e32fb7389 100644 --- a/drivers/net/ethernet/altera/altera_utils.h +++ b/drivers/net/ethernet/altera/altera_utils.h @@ -3,11 +3,12 @@ * Copyright (C) 2014 Altera Corporation. All rights reserved */ -#include <linux/kernel.h> - #ifndef __ALTERA_UTILS_H__ #define __ALTERA_UTILS_H__ +#include <linux/compiler.h> +#include <linux/types.h> + void tse_set_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask); void tse_clear_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask); int tse_bit_is_set(void __iomem *ioaddr, size_t offs, u32 bit_mask); diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index cac509708e9d..1c6ea6766aa1 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -946,7 +946,7 @@ static unsigned int ag71xx_max_frame_len(unsigned int mtu) return ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN; } -static void ag71xx_hw_set_macaddr(struct ag71xx *ag, unsigned char *mac) +static void ag71xx_hw_set_macaddr(struct ag71xx *ag, const unsigned char *mac) { u32 t; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 698438a2ee0f..514d61dd91c7 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -388,7 +388,7 @@ static int bcm_enet_receive_queue(struct net_device *dev, int budget) priv->rx_buf_size, DMA_FROM_DEVICE); priv->rx_buf[desc_idx] = NULL; - skb = build_skb(buf, priv->rx_frag_size); + skb = napi_build_skb(buf, priv->rx_frag_size); if (unlikely(!skb)) { skb_free_frag(buf); dev->stats.rx_dropped++; @@ -468,7 +468,7 @@ static int bcm_enet_tx_reclaim(struct net_device *dev, int force) dev->stats.tx_errors++; bytes += skb->len; - dev_kfree_skb(skb); + napi_consume_skb(skb, !force); released++; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 56b46b8206a7..b474a4fe4039 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10658,7 +10658,7 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init, while (bnxt_drv_busy(bp)) msleep(20); - /* Flush rings and and disable interrupts */ + /* Flush rings and disable interrupts */ bnxt_shutdown_nic(bp, irq_re_init); /* TODO CHIMP_FW: Link/PHY related cleanup if (link_re_init) */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index ddf2f3963abe..c4ed43604ddc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -307,7 +307,7 @@ int bnxt_set_vf_bw(struct net_device *dev, int vf_id, int min_tx_rate, return -EINVAL; } - if (min_tx_rate > pf_link_speed || min_tx_rate > max_tx_rate) { + if (min_tx_rate > pf_link_speed) { netdev_info(bp->dev, "min tx rate %d is invalid for VF %d\n", min_tx_rate, vf_id); return -EINVAL; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index d89098f4ede8..d0ea8dbfa213 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4913,8 +4913,8 @@ static int macb_probe(struct platform_device *pdev) /* MTU range: 68 - 1500 or 10240 */ dev->min_mtu = GEM_MTU_MIN_SIZE; - if (bp->caps & MACB_CAPS_JUMBO) - dev->max_mtu = gem_readl(bp, JML) - ETH_HLEN - ETH_FCS_LEN; + if ((bp->caps & MACB_CAPS_JUMBO) && bp->jumbo_max_len) + dev->max_mtu = bp->jumbo_max_len - ETH_HLEN - ETH_FCS_LEN; else dev->max_mtu = ETH_DATA_LEN; diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index 9559c16078f9..e6cb20aaa76a 100644 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -434,7 +434,7 @@ int gem_get_hwtst(struct net_device *dev, struct ifreq *rq) return 0; } -static int gem_ptp_set_one_step_sync(struct macb *bp, u8 enable) +static void gem_ptp_set_one_step_sync(struct macb *bp, u8 enable) { u32 reg_val; @@ -444,8 +444,6 @@ static int gem_ptp_set_one_step_sync(struct macb *bp, u8 enable) macb_writel(bp, NCR, reg_val | MACB_BIT(OSSMODE)); else macb_writel(bp, NCR, reg_val & ~MACB_BIT(OSSMODE)); - - return 0; } int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -468,8 +466,7 @@ int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd) case HWTSTAMP_TX_OFF: break; case HWTSTAMP_TX_ONESTEP_SYNC: - if (gem_ptp_set_one_step_sync(bp, 1) != 0) - return -ERANGE; + gem_ptp_set_one_step_sync(bp, 1); tx_bd_control = TSTAMP_ALL_FRAMES; break; case HWTSTAMP_TX_ON: diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 7de3800437c9..c2822e635f89 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2859,7 +2859,7 @@ static const struct net_device_ops cxgb4vf_netdev_ops = { * address stored on the adapter * @adapter: The adapter * - * Find the the port mask for the VF based on the index of mac + * Find the port mask for the VF based on the index of mac * address stored in the adapter. If no mac address is stored on * the adapter for the VF, use the port mask received from the * firmware. diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index d546993bda09..1c52592d3b65 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -877,7 +877,7 @@ int t4vf_get_sge_params(struct adapter *adapter) /* T4 uses a single control field to specify both the PCIe Padding and * Packing Boundary. T5 introduced the ability to specify these - * separately with the Padding Boundary in SGE_CONTROL and and Packing + * separately with the Padding Boundary in SGE_CONTROL and Packing * Boundary in SGE_CONTROL2. So for T5 and later we need to grab * SGE_CONTROL in order to determine how ingress packet data will be * laid out in Packed Buffer Mode. Unfortunately, older versions of diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c index d081168c95fa..da42dd53a87c 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c @@ -78,6 +78,7 @@ static const char * const txq_stat_names[] = { "tx_cso", "tx_tso", "tx_encapsulated_tso", + "tx_uso", "tx_more", "tx_queue_stops", "tx_queue_restarts", @@ -778,6 +779,7 @@ static void fun_get_ethtool_stats(struct net_device *netdev, ADD_STAT(txs.tx_cso); ADD_STAT(txs.tx_tso); ADD_STAT(txs.tx_encap_tso); + ADD_STAT(txs.tx_uso); ADD_STAT(txs.tx_more); ADD_STAT(txs.tx_nstops); ADD_STAT(txs.tx_nrestarts); diff --git a/drivers/net/ethernet/fungible/funeth/funeth_main.c b/drivers/net/ethernet/fungible/funeth/funeth_main.c index 9485cf699c5d..f247b7ad3a88 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_main.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_main.c @@ -1357,7 +1357,8 @@ static const struct net_device_ops fun_netdev_ops = { #define GSO_ENCAP_FLAGS (NETIF_F_GSO_GRE | NETIF_F_GSO_IPXIP4 | \ NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) -#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) +#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | \ + NETIF_F_GSO_UDP_L4) #define VLAN_FEAT (NETIF_F_SG | NETIF_F_HW_CSUM | TSO_FLAGS | \ GSO_ENCAP_FLAGS | NETIF_F_HIGHDMA) diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c index ff6e29237253..0a4a590218ba 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_tx.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c @@ -130,6 +130,7 @@ static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q, struct fun_dataop_gl *gle; const struct tcphdr *th; unsigned int ngle, i; + unsigned int l4_hlen; u16 flags; if (unlikely(map_skb(skb, q->dma_dev, addrs, lens))) { @@ -178,6 +179,7 @@ static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q, FUN_ETH_UPDATE_INNER_L3_LEN; } th = inner_tcp_hdr(skb); + l4_hlen = __tcp_hdrlen(th); fun_eth_offload_init(&req->offload, flags, shinfo->gso_size, tcp_hdr_doff_flags(th), 0, @@ -185,6 +187,24 @@ static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q, skb_inner_transport_offset(skb), skb_network_offset(skb), ol4_ofst); FUN_QSTAT_INC(q, tx_encap_tso); + } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { + flags = FUN_ETH_INNER_LSO | FUN_ETH_INNER_UDP | + FUN_ETH_UPDATE_INNER_L4_CKSUM | + FUN_ETH_UPDATE_INNER_L4_LEN | + FUN_ETH_UPDATE_INNER_L3_LEN; + + if (ip_hdr(skb)->version == 4) + flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM; + else + flags |= FUN_ETH_INNER_IPV6; + + l4_hlen = sizeof(struct udphdr); + fun_eth_offload_init(&req->offload, flags, + shinfo->gso_size, + cpu_to_be16(l4_hlen << 10), 0, + skb_network_offset(skb), + skb_transport_offset(skb), 0, 0); + FUN_QSTAT_INC(q, tx_uso); } else { /* HW considers one set of headers as inner */ flags = FUN_ETH_INNER_LSO | @@ -195,6 +215,7 @@ static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q, else flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM; th = tcp_hdr(skb); + l4_hlen = __tcp_hdrlen(th); fun_eth_offload_init(&req->offload, flags, shinfo->gso_size, tcp_hdr_doff_flags(th), 0, @@ -209,7 +230,7 @@ static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q, extra_pkts = shinfo->gso_segs - 1; extra_bytes = (be16_to_cpu(req->offload.inner_l4_off) + - __tcp_hdrlen(th)) * extra_pkts; + l4_hlen) * extra_pkts; } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { flags = FUN_ETH_UPDATE_INNER_L4_CKSUM; if (skb->csum_offset == offsetof(struct udphdr, check)) diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h index 04c9f91b7489..1711f82cad71 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h +++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h @@ -82,6 +82,7 @@ struct funeth_txq_stats { /* per Tx queue SW counters */ u64 tx_cso; /* # of packets with checksum offload */ u64 tx_tso; /* # of non-encapsulated TSO super-packets */ u64 tx_encap_tso; /* # of encapsulated TSO super-packets */ + u64 tx_uso; /* # of non-encapsulated UDP LSO super-packets */ u64 tx_more; /* # of DBs elided due to xmit_more */ u64 tx_nstops; /* # of times the queue has stopped */ u64 tx_nrestarts; /* # of times the queue has restarted */ diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c index 01e7d3c0b68e..df555847afb5 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c @@ -852,12 +852,6 @@ int hinic_ndo_set_vf_bw(struct net_device *netdev, return -EINVAL; } - if (max_tx_rate < min_tx_rate) { - netif_err(nic_dev, drv, netdev, "Max rate %d must be greater than or equal to min rate %d\n", - max_tx_rate, min_tx_rate); - return -EINVAL; - } - err = hinic_port_link_state(nic_dev, &link_state); if (err) { netif_err(nic_dev, drv, netdev, diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c index 1042e79a1397..f8860f24ede0 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_hw.c +++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c @@ -4376,7 +4376,7 @@ void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) /** * e1000_write_vfta - Writes a value to the specified offset in the VLAN filter table. * @hw: Struct containing variables accessed by shared code - * @offset: Offset in VLAN filer table to write + * @offset: Offset in VLAN filter table to write * @value: Value to write into VLAN filter table */ void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) @@ -4396,7 +4396,7 @@ void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) } /** - * e1000_clear_vfta - Clears the VLAN filer table + * e1000_clear_vfta - Clears the VLAN filter table * @hw: Struct containing variables accessed by shared code */ static void e1000_clear_vfta(struct e1000_hw *hw) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c index 30ca9ee1900b..f2fba6e1d0f7 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c @@ -1825,7 +1825,7 @@ static void fm10k_sm_mbx_process_error(struct fm10k_mbx_info *mbx) fm10k_sm_mbx_connect_reset(mbx); break; case FM10K_STATE_CONNECT: - /* try connnecting at lower version */ + /* try connecting at lower version */ if (mbx->remote) { while (mbx->local > 1) mbx->local--; diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 18558a019353..57f4ec4f8d2f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -565,6 +565,7 @@ struct i40e_pf { #define I40E_FLAG_DISABLE_FW_LLDP BIT(24) #define I40E_FLAG_RS_FEC BIT(25) #define I40E_FLAG_BASE_R_FEC BIT(26) +#define I40E_FLAG_VF_VLAN_PRUNING BIT(27) /* TOTAL_PORT_SHUTDOWN * Allows to physically disable the link on the NIC's port. * If enabled, (after link down request from the OS) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 19704f5c8291..55841816272b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -457,6 +457,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = { I40E_PRIV_FLAG("disable-fw-lldp", I40E_FLAG_DISABLE_FW_LLDP, 0), I40E_PRIV_FLAG("rs-fec", I40E_FLAG_RS_FEC, 0), I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0), + I40E_PRIV_FLAG("vf-vlan-pruning", + I40E_FLAG_VF_VLAN_PRUNING, 0), }; #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags) @@ -1141,6 +1143,71 @@ static int i40e_get_link_ksettings(struct net_device *netdev, return 0; } +#define I40E_LBIT_SIZE 8 +/** + * i40e_speed_to_link_speed - Translate decimal speed to i40e_aq_link_speed + * @speed: speed in decimal + * @ks: ethtool ksettings + * + * Return i40e_aq_link_speed based on speed + **/ +static enum i40e_aq_link_speed +i40e_speed_to_link_speed(__u32 speed, const struct ethtool_link_ksettings *ks) +{ + enum i40e_aq_link_speed link_speed = I40E_LINK_SPEED_UNKNOWN; + bool speed_changed = false; + int i, j; + + static const struct { + __u32 speed; + enum i40e_aq_link_speed link_speed; + __u8 bit[I40E_LBIT_SIZE]; + } i40e_speed_lut[] = { +#define I40E_LBIT(mode) ETHTOOL_LINK_MODE_ ## mode ##_Full_BIT + {SPEED_100, I40E_LINK_SPEED_100MB, {I40E_LBIT(100baseT)} }, + {SPEED_1000, I40E_LINK_SPEED_1GB, + {I40E_LBIT(1000baseT), I40E_LBIT(1000baseX), + I40E_LBIT(1000baseKX)} }, + {SPEED_10000, I40E_LINK_SPEED_10GB, + {I40E_LBIT(10000baseT), I40E_LBIT(10000baseKR), + I40E_LBIT(10000baseLR), I40E_LBIT(10000baseCR), + I40E_LBIT(10000baseSR), I40E_LBIT(10000baseKX4)} }, + + {SPEED_25000, I40E_LINK_SPEED_25GB, + {I40E_LBIT(25000baseCR), I40E_LBIT(25000baseKR), + I40E_LBIT(25000baseSR)} }, + {SPEED_40000, I40E_LINK_SPEED_40GB, + {I40E_LBIT(40000baseKR4), I40E_LBIT(40000baseCR4), + I40E_LBIT(40000baseSR4), I40E_LBIT(40000baseLR4)} }, + {SPEED_20000, I40E_LINK_SPEED_20GB, + {I40E_LBIT(20000baseKR2)} }, + {SPEED_2500, I40E_LINK_SPEED_2_5GB, {I40E_LBIT(2500baseT)} }, + {SPEED_5000, I40E_LINK_SPEED_5GB, {I40E_LBIT(2500baseT)} } +#undef I40E_LBIT +}; + + for (i = 0; i < ARRAY_SIZE(i40e_speed_lut); i++) { + if (i40e_speed_lut[i].speed == speed) { + for (j = 0; j < I40E_LBIT_SIZE; j++) { + if (test_bit(i40e_speed_lut[i].bit[j], + ks->link_modes.supported)) { + speed_changed = true; + break; + } + if (!i40e_speed_lut[i].bit[j]) + break; + } + if (speed_changed) { + link_speed = i40e_speed_lut[i].link_speed; + break; + } + } + } + return link_speed; +} + +#undef I40E_LBIT_SIZE + /** * i40e_set_link_ksettings - Set Speed and Duplex * @netdev: network interface device structure @@ -1157,12 +1224,14 @@ static int i40e_set_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings copy_ks; struct i40e_aq_set_phy_config config; struct i40e_pf *pf = np->vsi->back; + enum i40e_aq_link_speed link_speed; struct i40e_vsi *vsi = np->vsi; struct i40e_hw *hw = &pf->hw; bool autoneg_changed = false; i40e_status status = 0; int timeout = 50; int err = 0; + __u32 speed; u8 autoneg; /* Changing port settings is not supported if this isn't the @@ -1195,6 +1264,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* save autoneg out of ksettings */ autoneg = copy_ks.base.autoneg; + speed = copy_ks.base.speed; /* get our own copy of the bits to check against */ memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings)); @@ -1213,6 +1283,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* set autoneg back to what it currently is */ copy_ks.base.autoneg = safe_ks.base.autoneg; + copy_ks.base.speed = safe_ks.base.speed; /* If copy_ks.base and safe_ks.base are not the same now, then they are * trying to set something that we do not support. @@ -1329,6 +1400,27 @@ static int i40e_set_link_ksettings(struct net_device *netdev, 40000baseLR4_Full)) config.link_speed |= I40E_LINK_SPEED_40GB; + /* Autonegotiation must be disabled to change speed */ + if ((speed != SPEED_UNKNOWN && safe_ks.base.speed != speed) && + (autoneg == AUTONEG_DISABLE || + (safe_ks.base.autoneg == AUTONEG_DISABLE && !autoneg_changed))) { + link_speed = i40e_speed_to_link_speed(speed, ks); + if (link_speed == I40E_LINK_SPEED_UNKNOWN) { + netdev_info(netdev, "Given speed is not supported\n"); + err = -EOPNOTSUPP; + goto done; + } else { + config.link_speed = link_speed; + } + } else { + if (safe_ks.base.speed != speed) { + netdev_info(netdev, + "Unable to set speed, disable autoneg\n"); + err = -EOPNOTSUPP; + goto done; + } + } + /* If speed didn't get set, set it to what it currently is. * This is needed because if advertise is 0 (as it is when autoneg * is disabled) then speed won't get set. @@ -5294,6 +5386,13 @@ flags_complete: return -EOPNOTSUPP; } + if ((changed_flags & I40E_FLAG_VF_VLAN_PRUNING) && + pf->num_alloc_vfs) { + dev_warn(&pf->pdev->dev, + "Changing vf-vlan-pruning flag while VF(s) are active is not supported\n"); + return -EOPNOTSUPP; + } + if ((changed_flags & new_flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) && (new_flags & I40E_FLAG_MFP_ENABLED)) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 72576bb3e94d..797f61b2cd96 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1369,6 +1369,114 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, } /** + * i40e_get_vf_new_vlan - Get new vlan id on a vf + * @vsi: the vsi to configure + * @new_mac: new mac filter to be added + * @f: existing mac filter, replaced with new_mac->f if new_mac is not NULL + * @vlan_filters: the number of active VLAN filters + * @trusted: flag if the VF is trusted + * + * Get new VLAN id based on current VLAN filters, trust, PVID + * and vf-vlan-prune-disable flag. + * + * Returns the value of the new vlan filter or + * the old value if no new filter is needed. + */ +static s16 i40e_get_vf_new_vlan(struct i40e_vsi *vsi, + struct i40e_new_mac_filter *new_mac, + struct i40e_mac_filter *f, + int vlan_filters, + bool trusted) +{ + s16 pvid = le16_to_cpu(vsi->info.pvid); + struct i40e_pf *pf = vsi->back; + bool is_any; + + if (new_mac) + f = new_mac->f; + + if (pvid && f->vlan != pvid) + return pvid; + + is_any = (trusted || + !(pf->flags & I40E_FLAG_VF_VLAN_PRUNING)); + + if ((vlan_filters && f->vlan == I40E_VLAN_ANY) || + (!is_any && !vlan_filters && f->vlan == I40E_VLAN_ANY) || + (is_any && !vlan_filters && f->vlan == 0)) { + if (is_any) + return I40E_VLAN_ANY; + else + return 0; + } + + return f->vlan; +} + +/** + * i40e_correct_vf_mac_vlan_filters - Correct non-VLAN VF filters if necessary + * @vsi: the vsi to configure + * @tmp_add_list: list of filters ready to be added + * @tmp_del_list: list of filters ready to be deleted + * @vlan_filters: the number of active VLAN filters + * @trusted: flag if the VF is trusted + * + * Correct VF VLAN filters based on current VLAN filters, trust, PVID + * and vf-vlan-prune-disable flag. + * + * In case of memory allocation failure return -ENOMEM. Otherwise, return 0. + * + * This function is only expected to be called from within + * i40e_sync_vsi_filters. + * + * NOTE: This function expects to be called while under the + * mac_filter_hash_lock + */ +static int i40e_correct_vf_mac_vlan_filters(struct i40e_vsi *vsi, + struct hlist_head *tmp_add_list, + struct hlist_head *tmp_del_list, + int vlan_filters, + bool trusted) +{ + struct i40e_mac_filter *f, *add_head; + struct i40e_new_mac_filter *new_mac; + struct hlist_node *h; + int bkt, new_vlan; + + hlist_for_each_entry(new_mac, tmp_add_list, hlist) { + new_mac->f->vlan = i40e_get_vf_new_vlan(vsi, new_mac, NULL, + vlan_filters, trusted); + } + + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + new_vlan = i40e_get_vf_new_vlan(vsi, NULL, f, vlan_filters, + trusted); + if (new_vlan != f->vlan) { + add_head = i40e_add_filter(vsi, f->macaddr, new_vlan); + if (!add_head) + return -ENOMEM; + /* Create a temporary i40e_new_mac_filter */ + new_mac = kzalloc(sizeof(*new_mac), GFP_ATOMIC); + if (!new_mac) + return -ENOMEM; + new_mac->f = add_head; + new_mac->state = add_head->state; + + /* Add the new filter to the tmp list */ + hlist_add_head(&new_mac->hlist, tmp_add_list); + + /* Put the original filter into the delete list */ + f->state = I40E_FILTER_REMOVE; + hash_del(&f->hlist); + hlist_add_head(&f->hlist, tmp_del_list); + } + } + + vsi->has_vlan_filter = !!vlan_filters; + return 0; +} + +/** * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM * @vsi: the PF Main VSI - inappropriate for any other VSI * @macaddr: the MAC address @@ -2423,10 +2531,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) vlan_filters++; } - retval = i40e_correct_mac_vlan_filters(vsi, - &tmp_add_list, - &tmp_del_list, - vlan_filters); + if (vsi->type != I40E_VSI_SRIOV) + retval = i40e_correct_mac_vlan_filters + (vsi, &tmp_add_list, &tmp_del_list, + vlan_filters); + else + retval = i40e_correct_vf_mac_vlan_filters + (vsi, &tmp_add_list, &tmp_del_list, + vlan_filters, pf->vf[vsi->vf_id].trusted); hlist_for_each_entry(new, &tmp_add_list, hlist) netdev_hw_addr_refcnt(new->f, vsi->netdev, 1); @@ -2855,8 +2967,21 @@ int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid) int bkt; hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { - if (f->state == I40E_FILTER_REMOVE) + /* If we're asked to add a filter that has been marked for + * removal, it is safe to simply restore it to active state. + * __i40e_del_filter will have simply deleted any filters which + * were previously marked NEW or FAILED, so if it is currently + * marked REMOVE it must have previously been ACTIVE. Since we + * haven't yet run the sync filters task, just restore this + * filter to the ACTIVE state so that the sync task leaves it + * in place. + */ + if (f->state == I40E_FILTER_REMOVE && f->vlan == vid) { + f->state = I40E_FILTER_ACTIVE; + continue; + } else if (f->state == I40E_FILTER_REMOVE) { continue; + } add_f = i40e_add_filter(vsi, f->macaddr, vid); if (!add_f) { dev_info(&vsi->back->pdev->dev, @@ -4037,7 +4162,6 @@ static void i40e_free_misc_vector(struct i40e_pf *pf) i40e_flush(&pf->hw); if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) { - synchronize_irq(pf->msix_entries[0].vector); free_irq(pf->msix_entries[0].vector, pf); clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state); } @@ -4776,7 +4900,6 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) irq_set_affinity_notifier(irq_num, NULL); /* remove our suggested affinity mask for this IRQ */ irq_update_affinity_hint(irq_num, NULL); - synchronize_irq(irq_num); free_irq(irq_num, vsi->q_vectors[i]); /* Tear down the interrupt queue link list diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 7bc1174edf6b..a327189deda0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1483,10 +1483,8 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) if (!rx_ring->rx_bi) return; - if (rx_ring->skb) { - dev_kfree_skb(rx_ring->skb); - rx_ring->skb = NULL; - } + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; if (rx_ring->xsk_pool) { i40e_xsk_clean_rx_ring(rx_ring); @@ -2291,16 +2289,14 @@ int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring) * i40e_run_xdp - run an XDP program * @rx_ring: Rx ring being processed * @xdp: XDP buffer containing the frame + * @xdp_prog: XDP program to run **/ -static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp) +static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { int err, result = I40E_XDP_PASS; struct i40e_ring *xdp_ring; - struct bpf_prog *xdp_prog; u32 act; - xdp_prog = READ_ONCE(rx_ring->xdp_prog); - if (!xdp_prog) goto xdp_out; @@ -2445,6 +2441,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) unsigned int offset = rx_ring->rx_offset; struct sk_buff *skb = rx_ring->skb; unsigned int xdp_xmit = 0; + struct bpf_prog *xdp_prog; bool failure = false; struct xdp_buff xdp; int xdp_res = 0; @@ -2454,6 +2451,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) #endif xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + while (likely(total_rx_packets < (unsigned int)budget)) { struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; @@ -2509,11 +2508,12 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) hard_start = page_address(rx_buffer->page) + rx_buffer->page_offset - offset; xdp_prepare_buff(&xdp, hard_start, offset, size, true); + xdp_buff_clear_frags_flag(&xdp); #if (PAGE_SIZE > 4096) /* At larger PAGE_SIZE, frame_sz depend on len size */ xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size); #endif - xdp_res = i40e_run_xdp(rx_ring, &xdp); + xdp_res = i40e_run_xdp(rx_ring, &xdp, xdp_prog); } if (xdp_res) { @@ -3713,35 +3713,55 @@ u16 i40e_lan_select_queue(struct net_device *netdev, static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf, struct i40e_ring *xdp_ring) { - u16 i = xdp_ring->next_to_use; - struct i40e_tx_buffer *tx_bi; - struct i40e_tx_desc *tx_desc; + struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); + u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0; + u16 i = 0, index = xdp_ring->next_to_use; + struct i40e_tx_buffer *tx_head = &xdp_ring->tx_bi[index]; + struct i40e_tx_buffer *tx_bi = tx_head; + struct i40e_tx_desc *tx_desc = I40E_TX_DESC(xdp_ring, index); void *data = xdpf->data; u32 size = xdpf->len; - dma_addr_t dma; - if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) { + if (unlikely(I40E_DESC_UNUSED(xdp_ring) < 1 + nr_frags)) { xdp_ring->tx_stats.tx_busy++; return I40E_XDP_CONSUMED; } - dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE); - if (dma_mapping_error(xdp_ring->dev, dma)) - return I40E_XDP_CONSUMED; - tx_bi = &xdp_ring->tx_bi[i]; - tx_bi->bytecount = size; - tx_bi->gso_segs = 1; - tx_bi->xdpf = xdpf; + tx_head->bytecount = xdp_get_frame_len(xdpf); + tx_head->gso_segs = 1; + tx_head->xdpf = xdpf; - /* record length, and DMA address */ - dma_unmap_len_set(tx_bi, len, size); - dma_unmap_addr_set(tx_bi, dma, dma); + for (;;) { + dma_addr_t dma; - tx_desc = I40E_TX_DESC(xdp_ring, i); - tx_desc->buffer_addr = cpu_to_le64(dma); - tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC - | I40E_TXD_CMD, - 0, size, 0); + dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE); + if (dma_mapping_error(xdp_ring->dev, dma)) + goto unmap; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_bi, len, size); + dma_unmap_addr_set(tx_bi, dma, dma); + + tx_desc->buffer_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = + build_ctob(I40E_TX_DESC_CMD_ICRC, 0, size, 0); + + if (++index == xdp_ring->count) + index = 0; + + if (i == nr_frags) + break; + + tx_bi = &xdp_ring->tx_bi[index]; + tx_desc = I40E_TX_DESC(xdp_ring, index); + + data = skb_frag_address(&sinfo->frags[i]); + size = skb_frag_size(&sinfo->frags[i]); + i++; + } + + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT); /* Make certain all of the status bits have been updated * before next_to_watch is written. @@ -3749,14 +3769,30 @@ static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf, smp_wmb(); xdp_ring->xdp_tx_active++; - i++; - if (i == xdp_ring->count) - i = 0; - tx_bi->next_to_watch = tx_desc; - xdp_ring->next_to_use = i; + tx_head->next_to_watch = tx_desc; + xdp_ring->next_to_use = index; return I40E_XDP_TX; + +unmap: + for (;;) { + tx_bi = &xdp_ring->tx_bi[index]; + if (dma_unmap_len(tx_bi, len)) + dma_unmap_page(xdp_ring->dev, + dma_unmap_addr(tx_bi, dma), + dma_unmap_len(tx_bi, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_bi, len, 0); + if (tx_bi == tx_head) + break; + + if (!index) + index += xdp_ring->count; + index--; + } + + return I40E_XDP_CONSUMED; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 033ea71763e3..d01fb592778c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -4349,6 +4349,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, /* duplicate request, so just return success */ goto error_pvid; + i40e_vlan_stripping_enable(vsi); i40e_vc_reset_vf(vf, true); /* During reset the VF got a new VSI, so refresh a pointer. */ vsi = pf->vsi[vf->lan_vsi_idx]; @@ -4364,7 +4365,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, * MAC addresses deleted. */ if ((!(vlan_id || qos) || - vlanprio != le16_to_cpu(vsi->info.pvid)) && + vlanprio != le16_to_cpu(vsi->info.pvid)) && vsi->info.pvid) { ret = i40e_add_vlan_all_mac(vsi, I40E_VLAN_ANY); if (ret) { @@ -4727,6 +4728,11 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) goto out; vf->trusted = setting; + + /* request PF to sync mac/vlan filters for the VF */ + set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state); + pf->vsi[vf->lan_vsi_idx]->flags |= I40E_VSI_FLAG_FILTER_CHANGED; + i40e_vc_reset_vf(vf, true); dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", vf_id, setting ? "" : "un"); diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index af3e7e6afc85..6d4009e0cbd6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -143,20 +143,17 @@ int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool, * i40e_run_xdp_zc - Executes an XDP program on an xdp_buff * @rx_ring: Rx ring * @xdp: xdp_buff used as input to the XDP program + * @xdp_prog: XDP program to run * * Returns any of I40E_XDP_{PASS, CONSUMED, TX, REDIR} **/ -static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) +static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) { int err, result = I40E_XDP_PASS; struct i40e_ring *xdp_ring; - struct bpf_prog *xdp_prog; u32 act; - /* NB! xdp_prog will always be !NULL, due to the fact that - * this path is enabled by setting an XDP program. - */ - xdp_prog = READ_ONCE(rx_ring->xdp_prog); act = bpf_prog_run_xdp(xdp_prog, xdp); if (likely(act == XDP_REDIRECT)) { @@ -339,9 +336,15 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) u16 next_to_clean = rx_ring->next_to_clean; u16 count_mask = rx_ring->count - 1; unsigned int xdp_res, xdp_xmit = 0; + struct bpf_prog *xdp_prog; bool failure = false; u16 cleaned_count; + /* NB! xdp_prog will always be !NULL, due to the fact that + * this path is enabled by setting an XDP program. + */ + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + while (likely(total_rx_packets < (unsigned int)budget)) { union i40e_rx_desc *rx_desc; unsigned int rx_packets; @@ -378,7 +381,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) xsk_buff_set_size(bi, size); xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool); - xdp_res = i40e_run_xdp_zc(rx_ring, bi); + xdp_res = i40e_run_xdp_zc(rx_ring, bi, xdp_prog); i40e_handle_xdp_result_zc(rx_ring, bi, rx_desc, &rx_packets, &rx_bytes, size, xdp_res, &failure); if (failure) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 49aed3e506a6..fda1198d2c00 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -146,7 +146,8 @@ struct iavf_mac_filter { u8 remove:1; /* filter needs to be removed */ u8 add:1; /* filter needs to be added */ u8 is_primary:1; /* filter is a default VF MAC */ - u8 padding:4; + u8 add_handled:1; /* received response for filter add */ + u8 padding:3; }; }; @@ -248,6 +249,7 @@ struct iavf_adapter { struct work_struct adminq_task; struct delayed_work client_task; wait_queue_head_t down_waitqueue; + wait_queue_head_t vc_waitqueue; struct iavf_q_vector *q_vectors; struct list_head vlan_filter_list; struct list_head mac_filter_list; @@ -292,6 +294,7 @@ struct iavf_adapter { #define IAVF_FLAG_QUEUES_DISABLED BIT(17) #define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) #define IAVF_FLAG_REINIT_MSIX_NEEDED BIT(20) +#define IAVF_FLAG_INITIAL_MAC_SET BIT(23) /* duplicates for common code */ #define IAVF_FLAG_DCB_ENABLED 0 /* flags for admin queue service task */ @@ -559,6 +562,8 @@ void iavf_enable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_disable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_enable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_disable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); +int iavf_replace_primary_mac(struct iavf_adapter *adapter, + const u8 *new_mac); void iavf_set_vlan_offload_features(struct iavf_adapter *adapter, netdev_features_t prev_features, diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index f3ecb3bca33d..541103909ef4 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -983,6 +983,7 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, list_add_tail(&f->list, &adapter->mac_filter_list); f->add = true; + f->add_handled = false; f->is_new_mac = true; f->is_primary = ether_addr_equal(macaddr, adapter->hw.mac.addr); adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; @@ -994,47 +995,132 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, } /** - * iavf_set_mac - NDO callback to set port mac address - * @netdev: network interface device structure - * @p: pointer to an address structure + * iavf_replace_primary_mac - Replace current primary address + * @adapter: board private structure + * @new_mac: new MAC address to be applied * - * Returns 0 on success, negative on failure + * Replace current dev_addr and send request to PF for removal of previous + * primary MAC address filter and addition of new primary MAC filter. + * Return 0 for success, -ENOMEM for failure. + * + * Do not call this with mac_vlan_list_lock! **/ -static int iavf_set_mac(struct net_device *netdev, void *p) +int iavf_replace_primary_mac(struct iavf_adapter *adapter, + const u8 *new_mac) { - struct iavf_adapter *adapter = netdev_priv(netdev); struct iavf_hw *hw = &adapter->hw; struct iavf_mac_filter *f; - struct sockaddr *addr = p; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - - if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) - return 0; spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry(f, &adapter->mac_filter_list, list) { + f->is_primary = false; + } + f = iavf_find_filter(adapter, hw->mac.addr); if (f) { f->remove = true; - f->is_primary = true; adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; } - f = iavf_add_filter(adapter, addr->sa_data); + f = iavf_add_filter(adapter, new_mac); + if (f) { + /* Always send the request to add if changing primary MAC + * even if filter is already present on the list + */ f->is_primary = true; - ether_addr_copy(hw->mac.addr, addr->sa_data); + f->add = true; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; + ether_addr_copy(hw->mac.addr, new_mac); } spin_unlock_bh(&adapter->mac_vlan_list_lock); /* schedule the watchdog task to immediately process the request */ - if (f) + if (f) { queue_work(iavf_wq, &adapter->watchdog_task.work); + return 0; + } + return -ENOMEM; +} + +/** + * iavf_is_mac_set_handled - wait for a response to set MAC from PF + * @netdev: network interface device structure + * @macaddr: MAC address to set + * + * Returns true on success, false on failure + */ +static bool iavf_is_mac_set_handled(struct net_device *netdev, + const u8 *macaddr) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + struct iavf_mac_filter *f; + bool ret = false; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + f = iavf_find_filter(adapter, macaddr); - return (f == NULL) ? -ENOMEM : 0; + if (!f || (!f->add && f->add_handled)) + ret = true; + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + return ret; +} + +/** + * iavf_set_mac - NDO callback to set port MAC address + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + */ +static int iavf_set_mac(struct net_device *netdev, void *p) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + struct sockaddr *addr = p; + bool handle_mac = iavf_is_mac_set_handled(netdev, addr->sa_data); + int ret; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + ret = iavf_replace_primary_mac(adapter, addr->sa_data); + + if (ret) + return ret; + + /* If this is an initial set MAC during VF spawn do not wait */ + if (adapter->flags & IAVF_FLAG_INITIAL_MAC_SET) { + adapter->flags &= ~IAVF_FLAG_INITIAL_MAC_SET; + return 0; + } + + if (handle_mac) + goto done; + + ret = wait_event_interruptible_timeout(adapter->vc_waitqueue, false, msecs_to_jiffies(2500)); + + /* If ret < 0 then it means wait was interrupted. + * If ret == 0 then it means we got a timeout. + * else it means we got response for set MAC from PF, + * check if netdev MAC was updated to requested MAC, + * if yes then set MAC succeeded otherwise it failed return -EACCES + */ + if (ret < 0) + return ret; + + if (!ret) + return -EAGAIN; + +done: + if (!ether_addr_equal(netdev->dev_addr, addr->sa_data)) + return -EACCES; + + return 0; } /** @@ -2451,6 +2537,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } + adapter->flags |= IAVF_FLAG_INITIAL_MAC_SET; + adapter->tx_desc_count = IAVF_DEFAULT_TXD; adapter->rx_desc_count = IAVF_DEFAULT_RXD; err = iavf_init_interrupt_scheme(adapter); @@ -4681,6 +4769,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup the wait queue for indicating transition to down status */ init_waitqueue_head(&adapter->down_waitqueue); + /* Setup the wait queue for indicating virtchannel events */ + init_waitqueue_head(&adapter->vc_waitqueue); + return 0; err_ioremap: diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 782450d5c12f..e2b4ba98f71e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -598,6 +598,8 @@ static void iavf_mac_add_ok(struct iavf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { f->is_new_mac = false; + if (!f->add && !f->add_handled) + f->add_handled = true; } spin_unlock_bh(&adapter->mac_vlan_list_lock); } @@ -618,6 +620,9 @@ static void iavf_mac_add_reject(struct iavf_adapter *adapter) if (f->remove && ether_addr_equal(f->macaddr, netdev->dev_addr)) f->remove = false; + if (!f->add && !f->add_handled) + f->add_handled = true; + if (f->is_new_mac) { list_del(&f->list); kfree(f); @@ -1932,6 +1937,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, iavf_mac_add_reject(adapter); /* restore administratively set MAC address */ ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); + wake_up(&adapter->vc_waitqueue); break; case VIRTCHNL_OP_DEL_VLAN: dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n", @@ -2091,7 +2097,13 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, if (!v_retval) iavf_mac_add_ok(adapter); if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr)) - eth_hw_addr_set(netdev, adapter->hw.mac.addr); + if (!ether_addr_equal(netdev->dev_addr, + adapter->hw.mac.addr)) { + netif_addr_lock_bh(netdev); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); + netif_addr_unlock_bh(netdev); + } + wake_up(&adapter->vc_waitqueue); break; case VIRTCHNL_OP_GET_STATS: { struct iavf_eth_stats *stats = @@ -2121,10 +2133,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, /* restore current mac address */ ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); } else { + netif_addr_lock_bh(netdev); /* refresh current mac address if changed */ - eth_hw_addr_set(netdev, adapter->hw.mac.addr); ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); + netif_addr_unlock_bh(netdev); } spin_lock_bh(&adapter->mac_vlan_list_lock); iavf_add_filter(adapter, adapter->hw.mac.addr); @@ -2160,6 +2173,10 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, } fallthrough; case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS: { + struct iavf_mac_filter *f; + bool was_mac_changed; + u64 aq_required = 0; + if (v_opcode == VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS) memcpy(&adapter->vlan_v2_caps, msg, min_t(u16, msglen, @@ -2167,6 +2184,46 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, iavf_process_config(adapter); adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES; + was_mac_changed = !ether_addr_equal(netdev->dev_addr, + adapter->hw.mac.addr); + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + /* re-add all MAC filters */ + list_for_each_entry(f, &adapter->mac_filter_list, list) { + if (was_mac_changed && + ether_addr_equal(netdev->dev_addr, f->macaddr)) + ether_addr_copy(f->macaddr, + adapter->hw.mac.addr); + + f->is_new_mac = true; + f->add = true; + f->add_handled = false; + f->remove = false; + } + + /* re-add all VLAN filters */ + if (VLAN_FILTERING_ALLOWED(adapter)) { + struct iavf_vlan_filter *vlf; + + if (!list_empty(&adapter->vlan_filter_list)) { + list_for_each_entry(vlf, + &adapter->vlan_filter_list, + list) + vlf->add = true; + + aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; + } + } + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + netif_addr_lock_bh(netdev); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); + netif_addr_unlock_bh(netdev); + + adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER | + aq_required; } break; case VIRTCHNL_OP_ENABLE_QUEUES: diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 5d10c4f84a36..ead6d50fc0ad 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -852,7 +852,7 @@ ice_create_init_fdir_rule(struct ice_pf *pf, enum ice_fltr_ptype flow) if (!seg) return -ENOMEM; - tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX, + tun_seg = devm_kcalloc(dev, ICE_FD_HW_SEG_MAX, sizeof(*tun_seg), GFP_KERNEL); if (!tun_seg) { devm_kfree(dev, seg); @@ -1214,7 +1214,7 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp, if (!seg) return -ENOMEM; - tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX, + tun_seg = devm_kcalloc(dev, ICE_FD_HW_SEG_MAX, sizeof(*tun_seg), GFP_KERNEL); if (!tun_seg) { devm_kfree(dev, seg); diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index 57586a2e6dec..c6d755f707aa 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -17,13 +17,13 @@ static void ice_gnss_read(struct kthread_work *work) struct gnss_serial *gnss = container_of(work, struct gnss_serial, read_work.work); struct ice_aqc_link_topo_addr link_topo; - u8 i2c_params, bytes_read; + unsigned int i, bytes_read, data_len; struct tty_port *port; struct ice_pf *pf; struct ice_hw *hw; __be16 data_len_b; char *buf = NULL; - u16 i, data_len; + u8 i2c_params; int err = 0; pf = gnss->back; @@ -65,7 +65,7 @@ static void ice_gnss_read(struct kthread_work *work) mdelay(10); } - data_len = min(data_len, (u16)PAGE_SIZE); + data_len = min_t(typeof(data_len), data_len, PAGE_SIZE); data_len = tty_buffer_request_room(port, data_len); if (!data_len) { err = -ENOMEM; @@ -74,9 +74,10 @@ static void ice_gnss_read(struct kthread_work *work) /* Read received data */ for (i = 0; i < data_len; i += bytes_read) { - u16 bytes_left = data_len - i; + unsigned int bytes_left = data_len - i; - bytes_read = min_t(typeof(bytes_left), bytes_left, ICE_MAX_I2C_DATA_SIZE); + bytes_read = min_t(typeof(bytes_left), bytes_left, + ICE_MAX_I2C_DATA_SIZE); err = ice_aq_read_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR, cpu_to_le16(ICE_GNSS_UBX_EMPTY_DATA), diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index f7f9c973ec54..a6c4be5e5566 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -887,6 +887,9 @@ static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt) (ICE_AQ_VSI_OUTER_TAG_VLAN_8100 << ICE_AQ_VSI_OUTER_TAG_TYPE_S) & ICE_AQ_VSI_OUTER_TAG_TYPE_M; + ctxt->info.outer_vlan_flags |= + FIELD_PREP(ICE_AQ_VSI_OUTER_VLAN_EMODE_M, + ICE_AQ_VSI_OUTER_VLAN_EMODE_NOTHING); } /* Have 1:1 UP mapping for both ingress/egress tables */ table |= ICE_UP_TABLE_TRANSLATE(0, 0); @@ -2419,7 +2422,7 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) agg_id); return; } - /* aggregator node is created, store the neeeded info */ + /* aggregator node is created, store the needed info */ agg_node->valid = true; agg_node->agg_id = agg_id; } diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index bb1721f1321d..86093b2511d8 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1593,16 +1593,6 @@ ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, goto out_put_vf; } - /* when max_tx_rate is zero that means no max Tx rate limiting, so only - * check if max_tx_rate is non-zero - */ - if (max_tx_rate && min_tx_rate > max_tx_rate) { - dev_err(dev, "Cannot set min Tx rate %d Mbps greater than max Tx rate %d Mbps\n", - min_tx_rate, max_tx_rate); - ret = -EINVAL; - goto out_put_vf; - } - if (min_tx_rate && ice_is_dcb_active(pf)) { dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate limiting not allowed on this PF.\n"); ret = -EOPNOTSUPP; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 4547bc1f7cee..b2b5d2ee83a5 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -360,6 +360,54 @@ static u16 ice_vc_get_max_frame_size(struct ice_vf *vf) } /** + * ice_vc_get_vlan_caps + * @hw: pointer to the hw + * @vf: pointer to the VF info + * @vsi: pointer to the VSI + * @driver_caps: current driver caps + * + * Return 0 if there is no VLAN caps supported, or VLAN caps value + */ +static u32 +ice_vc_get_vlan_caps(struct ice_hw *hw, struct ice_vf *vf, struct ice_vsi *vsi, + u32 driver_caps) +{ + if (ice_is_eswitch_mode_switchdev(vf->pf)) + /* In switchdev setting VLAN from VF isn't supported */ + return 0; + + if (driver_caps & VIRTCHNL_VF_OFFLOAD_VLAN_V2) { + /* VLAN offloads based on current device configuration */ + return VIRTCHNL_VF_OFFLOAD_VLAN_V2; + } else if (driver_caps & VIRTCHNL_VF_OFFLOAD_VLAN) { + /* allow VF to negotiate VIRTCHNL_VF_OFFLOAD explicitly for + * these two conditions, which amounts to guest VLAN filtering + * and offloads being based on the inner VLAN or the + * inner/single VLAN respectively and don't allow VF to + * negotiate VIRTCHNL_VF_OFFLOAD in any other cases + */ + if (ice_is_dvm_ena(hw) && ice_vf_is_port_vlan_ena(vf)) { + return VIRTCHNL_VF_OFFLOAD_VLAN; + } else if (!ice_is_dvm_ena(hw) && + !ice_vf_is_port_vlan_ena(vf)) { + /* configure backward compatible support for VFs that + * only support VIRTCHNL_VF_OFFLOAD_VLAN, the PF is + * configured in SVM, and no port VLAN is configured + */ + ice_vf_vsi_cfg_svm_legacy_vlan_mode(vsi); + return VIRTCHNL_VF_OFFLOAD_VLAN; + } else if (ice_is_dvm_ena(hw)) { + /* configure software offloaded VLAN support when DVM + * is enabled, but no port VLAN is enabled + */ + ice_vf_vsi_cfg_dvm_legacy_vlan_mode(vsi); + } + } + + return 0; +} + +/** * ice_vc_get_vf_res_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -402,33 +450,8 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) goto err; } - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_VLAN_V2) { - /* VLAN offloads based on current device configuration */ - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN_V2; - } else if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_VLAN) { - /* allow VF to negotiate VIRTCHNL_VF_OFFLOAD explicitly for - * these two conditions, which amounts to guest VLAN filtering - * and offloads being based on the inner VLAN or the - * inner/single VLAN respectively and don't allow VF to - * negotiate VIRTCHNL_VF_OFFLOAD in any other cases - */ - if (ice_is_dvm_ena(hw) && ice_vf_is_port_vlan_ena(vf)) { - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN; - } else if (!ice_is_dvm_ena(hw) && - !ice_vf_is_port_vlan_ena(vf)) { - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN; - /* configure backward compatible support for VFs that - * only support VIRTCHNL_VF_OFFLOAD_VLAN, the PF is - * configured in SVM, and no port VLAN is configured - */ - ice_vf_vsi_cfg_svm_legacy_vlan_mode(vsi); - } else if (ice_is_dvm_ena(hw)) { - /* configure software offloaded VLAN support when DVM - * is enabled, but no port VLAN is enabled - */ - ice_vf_vsi_cfg_dvm_legacy_vlan_mode(vsi); - } - } + vfres->vf_cap_flags |= ice_vc_get_vlan_caps(hw, vf, vsi, + vf->driver_caps); if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF; @@ -3528,42 +3551,6 @@ ice_vc_repr_del_mac(struct ice_vf __always_unused *vf, u8 __always_unused *msg) VIRTCHNL_STATUS_SUCCESS, NULL, 0); } -static int ice_vc_repr_add_vlan(struct ice_vf *vf, u8 __always_unused *msg) -{ - dev_dbg(ice_pf_to_dev(vf->pf), - "Can't add VLAN in switchdev mode for VF %d\n", vf->vf_id); - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN, - VIRTCHNL_STATUS_SUCCESS, NULL, 0); -} - -static int ice_vc_repr_del_vlan(struct ice_vf *vf, u8 __always_unused *msg) -{ - dev_dbg(ice_pf_to_dev(vf->pf), - "Can't delete VLAN in switchdev mode for VF %d\n", vf->vf_id); - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN, - VIRTCHNL_STATUS_SUCCESS, NULL, 0); -} - -static int ice_vc_repr_ena_vlan_stripping(struct ice_vf *vf) -{ - dev_dbg(ice_pf_to_dev(vf->pf), - "Can't enable VLAN stripping in switchdev mode for VF %d\n", - vf->vf_id); - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, - VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, - NULL, 0); -} - -static int ice_vc_repr_dis_vlan_stripping(struct ice_vf *vf) -{ - dev_dbg(ice_pf_to_dev(vf->pf), - "Can't disable VLAN stripping in switchdev mode for VF %d\n", - vf->vf_id); - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING, - VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, - NULL, 0); -} - static int ice_vc_repr_cfg_promiscuous_mode(struct ice_vf *vf, u8 __always_unused *msg) { @@ -3590,10 +3577,10 @@ static const struct ice_virtchnl_ops ice_virtchnl_repr_ops = { .config_rss_lut = ice_vc_config_rss_lut, .get_stats_msg = ice_vc_get_stats_msg, .cfg_promiscuous_mode_msg = ice_vc_repr_cfg_promiscuous_mode, - .add_vlan_msg = ice_vc_repr_add_vlan, - .remove_vlan_msg = ice_vc_repr_del_vlan, - .ena_vlan_stripping = ice_vc_repr_ena_vlan_stripping, - .dis_vlan_stripping = ice_vc_repr_dis_vlan_stripping, + .add_vlan_msg = ice_vc_add_vlan_msg, + .remove_vlan_msg = ice_vc_remove_vlan_msg, + .ena_vlan_stripping = ice_vc_ena_vlan_stripping, + .dis_vlan_stripping = ice_vc_dis_vlan_stripping, .handle_rss_cfg_msg = ice_vc_handle_rss_cfg, .add_fdir_fltr_msg = ice_vc_add_fdir_fltr, .del_fdir_fltr_msg = ice_vc_del_fdir_fltr, diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index ca5429774994..fa028928482f 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -1033,9 +1033,6 @@ #define E1000_VFTA_ENTRY_MASK 0x7F #define E1000_VFTA_ENTRY_BIT_SHIFT_MASK 0x1F -/* DMA Coalescing register fields */ -#define E1000_PCIEMISC_LX_DECISION 0x00000080 /* Lx power on DMA coal */ - /* Tx Rate-Scheduler Config fields */ #define E1000_RTTBCNRC_RS_ENA 0x80000000 #define E1000_RTTBCNRC_RF_DEC_MASK 0x00003FFF diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 9cb49980ec2d..eb9f6da9208a 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -116,7 +116,6 @@ #define E1000_DMCRTRH 0x05DD0 /* Receive Packet Rate Threshold */ #define E1000_DMCCNT 0x05DD4 /* Current Rx Count */ #define E1000_FCRTC 0x02170 /* Flow Control Rx high watermark */ -#define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */ /* TX Rate Limit Registers */ #define E1000_RTTDQSEL 0x3604 /* Tx Desc Plane Queue Select - WO */ diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c index c8d1e815ec6b..98bd3267b99b 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c @@ -576,7 +576,7 @@ ixgb_rar_set(struct ixgb_hw *hw, * Writes a value to the specified offset in the VLAN filter table. * * hw - Struct containing variables accessed by shared code - * offset - Offset in VLAN filer table to write + * offset - Offset in VLAN filter table to write * value - Value to write into VLAN filter table *****************************************************************************/ void @@ -588,7 +588,7 @@ ixgb_write_vfta(struct ixgb_hw *hw, } /****************************************************************************** - * Clears the VLAN filer table + * Clears the VLAN filter table * * hw - Struct containing variables accessed by shared code *****************************************************************************/ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 921a4d977d65..48444ab9e0b1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -167,12 +167,46 @@ enum ixgbe_tx_flags { #define IXGBE_82599_VF_DEVICE_ID 0x10ED #define IXGBE_X540_VF_DEVICE_ID 0x1515 +#define UPDATE_VF_COUNTER_32bit(reg, last_counter, counter) \ + { \ + u32 current_counter = IXGBE_READ_REG(hw, reg); \ + if (current_counter < last_counter) \ + counter += 0x100000000LL; \ + last_counter = current_counter; \ + counter &= 0xFFFFFFFF00000000LL; \ + counter |= current_counter; \ + } + +#define UPDATE_VF_COUNTER_36bit(reg_lsb, reg_msb, last_counter, counter) \ + { \ + u64 current_counter_lsb = IXGBE_READ_REG(hw, reg_lsb); \ + u64 current_counter_msb = IXGBE_READ_REG(hw, reg_msb); \ + u64 current_counter = (current_counter_msb << 32) | \ + current_counter_lsb; \ + if (current_counter < last_counter) \ + counter += 0x1000000000LL; \ + last_counter = current_counter; \ + counter &= 0xFFFFFFF000000000LL; \ + counter |= current_counter; \ + } + +struct vf_stats { + u64 gprc; + u64 gorc; + u64 gptc; + u64 gotc; + u64 mprc; +}; + struct vf_data_storage { struct pci_dev *vfdev; unsigned char vf_mac_addresses[ETH_ALEN]; u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES]; u16 num_vf_mc_hashes; bool clear_to_send; + struct vf_stats vfstats; + struct vf_stats last_vfstats; + struct vf_stats saved_rst_vfstats; bool pf_set_mac; u16 pf_vlan; /* When set, guest VLAN config not allowed. */ u16 pf_qos; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 95c92fe890a1..100388968e4d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -879,7 +879,7 @@ static s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, * ixgbe_clear_vfta_82598 - Clear VLAN filter table * @hw: pointer to hardware structure * - * Clears the VLAN filer table, and the VMDq index associated with the filter + * Clears the VLAN filter table, and the VMDq index associated with the filter **/ static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 4c26c4b92f07..38c4609bd429 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -3237,7 +3237,7 @@ vfta_update: * ixgbe_clear_vfta_generic - Clear VLAN filter table * @hw: pointer to hardware structure * - * Clears the VLAN filer table, and the VMDq index associated with the filter + * Clears the VLAN filter table, and the VMDq index associated with the filter **/ s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 77c2e70b0860..5c62e9963650 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5549,6 +5549,47 @@ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) return ret; } +/** + * ixgbe_clear_vf_stats_counters - Clear out VF stats after reset + * @adapter: board private structure + * + * On a reset we need to clear out the VF stats or accounting gets + * messed up because they're not clear on read. + **/ +static void ixgbe_clear_vf_stats_counters(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + + for (i = 0; i < adapter->num_vfs; i++) { + adapter->vfinfo[i].last_vfstats.gprc = + IXGBE_READ_REG(hw, IXGBE_PVFGPRC(i)); + adapter->vfinfo[i].saved_rst_vfstats.gprc += + adapter->vfinfo[i].vfstats.gprc; + adapter->vfinfo[i].vfstats.gprc = 0; + adapter->vfinfo[i].last_vfstats.gptc = + IXGBE_READ_REG(hw, IXGBE_PVFGPTC(i)); + adapter->vfinfo[i].saved_rst_vfstats.gptc += + adapter->vfinfo[i].vfstats.gptc; + adapter->vfinfo[i].vfstats.gptc = 0; + adapter->vfinfo[i].last_vfstats.gorc = + IXGBE_READ_REG(hw, IXGBE_PVFGORC_LSB(i)); + adapter->vfinfo[i].saved_rst_vfstats.gorc += + adapter->vfinfo[i].vfstats.gorc; + adapter->vfinfo[i].vfstats.gorc = 0; + adapter->vfinfo[i].last_vfstats.gotc = + IXGBE_READ_REG(hw, IXGBE_PVFGOTC_LSB(i)); + adapter->vfinfo[i].saved_rst_vfstats.gotc += + adapter->vfinfo[i].vfstats.gotc; + adapter->vfinfo[i].vfstats.gotc = 0; + adapter->vfinfo[i].last_vfstats.mprc = + IXGBE_READ_REG(hw, IXGBE_PVFMPRC(i)); + adapter->vfinfo[i].saved_rst_vfstats.mprc += + adapter->vfinfo[i].vfstats.mprc; + adapter->vfinfo[i].vfstats.mprc = 0; + } +} + static void ixgbe_setup_gpie(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -5684,6 +5725,7 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter) adapter->link_check_timeout = jiffies; mod_timer(&adapter->service_timer, jiffies); + ixgbe_clear_vf_stats_counters(adapter); /* Set PF Reset Done bit so PF/VF Mail Ops can work */ ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD; @@ -7271,6 +7313,32 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) netdev->stats.rx_length_errors = hwstats->rlec; netdev->stats.rx_crc_errors = hwstats->crcerrs; netdev->stats.rx_missed_errors = total_mpc; + + /* VF Stats Collection - skip while resetting because these + * are not clear on read and otherwise you'll sometimes get + * crazy values. + */ + if (!test_bit(__IXGBE_RESETTING, &adapter->state)) { + for (i = 0; i < adapter->num_vfs; i++) { + UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPRC(i), + adapter->vfinfo[i].last_vfstats.gprc, + adapter->vfinfo[i].vfstats.gprc); + UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPTC(i), + adapter->vfinfo[i].last_vfstats.gptc, + adapter->vfinfo[i].vfstats.gptc); + UPDATE_VF_COUNTER_36bit(IXGBE_PVFGORC_LSB(i), + IXGBE_PVFGORC_MSB(i), + adapter->vfinfo[i].last_vfstats.gorc, + adapter->vfinfo[i].vfstats.gorc); + UPDATE_VF_COUNTER_36bit(IXGBE_PVFGOTC_LSB(i), + IXGBE_PVFGOTC_MSB(i), + adapter->vfinfo[i].last_vfstats.gotc, + adapter->vfinfo[i].vfstats.gotc); + UPDATE_VF_COUNTER_32bit(IXGBE_PVFMPRC(i), + adapter->vfinfo[i].last_vfstats.mprc, + adapter->vfinfo[i].vfstats.mprc); + } + } } /** @@ -9022,6 +9090,23 @@ static void ixgbe_get_stats64(struct net_device *netdev, stats->rx_missed_errors = netdev->stats.rx_missed_errors; } +static int ixgbe_ndo_get_vf_stats(struct net_device *netdev, int vf, + struct ifla_vf_stats *vf_stats) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (vf < 0 || vf >= adapter->num_vfs) + return -EINVAL; + + vf_stats->rx_packets = adapter->vfinfo[vf].vfstats.gprc; + vf_stats->rx_bytes = adapter->vfinfo[vf].vfstats.gorc; + vf_stats->tx_packets = adapter->vfinfo[vf].vfstats.gptc; + vf_stats->tx_bytes = adapter->vfinfo[vf].vfstats.gotc; + vf_stats->multicast = adapter->vfinfo[vf].vfstats.mprc; + + return 0; +} + #ifdef CONFIG_IXGBE_DCB /** * ixgbe_validate_rtr - verify 802.1Qp to Rx packet buffer mapping is valid. @@ -10338,6 +10423,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_set_vf_rss_query_en = ixgbe_ndo_set_vf_rss_query_en, .ndo_set_vf_trust = ixgbe_ndo_set_vf_trust, .ndo_get_vf_config = ixgbe_ndo_get_vf_config, + .ndo_get_vf_stats = ixgbe_ndo_get_vf_stats, .ndo_get_stats64 = ixgbe_get_stats64, .ndo_setup_tc = __ixgbe_setup_tc, #ifdef IXGBE_FCOE diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index d4e63f0644c3..67e49aac50fe 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -77,7 +77,7 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); adapter->bridge_mode = BRIDGE_MODE_VEB; - /* limit trafffic classes based on VFs enabled */ + /* limit traffic classes based on VFs enabled */ if ((adapter->hw.mac.type == ixgbe_mac_82599EB) && (num_vfs < 16)) { adapter->dcb_cfg.num_tcs.pg_tcs = MAX_TRAFFIC_CLASS; adapter->dcb_cfg.num_tcs.pfc_tcs = MAX_TRAFFIC_CLASS; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 6da9880d766a..7f7ea468ffa9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -2533,6 +2533,13 @@ enum { #define IXGBE_PVFTXDCTL(P) (0x06028 + (0x40 * (P))) #define IXGBE_PVFTDWBAL(P) (0x06038 + (0x40 * (P))) #define IXGBE_PVFTDWBAH(P) (0x0603C + (0x40 * (P))) +#define IXGBE_PVFGPRC(x) (0x0101C + (0x40 * (x))) +#define IXGBE_PVFGPTC(x) (0x08300 + (0x04 * (x))) +#define IXGBE_PVFGORC_LSB(x) (0x01020 + (0x40 * (x))) +#define IXGBE_PVFGORC_MSB(x) (0x0D020 + (0x40 * (x))) +#define IXGBE_PVFGOTC_LSB(x) (0x08400 + (0x08 * (x))) +#define IXGBE_PVFGOTC_MSB(x) (0x08404 + (0x08 * (x))) +#define IXGBE_PVFMPRC(x) (0x0D01C + (0x40 * (x))) #define IXGBE_PVFTDWBALn(q_per_pool, vf_number, vf_q_index) \ (IXGBE_PVFTDWBAL((q_per_pool)*(vf_number) + (vf_q_index))) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 59c9a10f83ba..6beb3d4873a3 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1444,8 +1444,8 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, int done = 0, bytes = 0; while (done < budget) { + unsigned int pktlen, *rxdcsum; struct net_device *netdev; - unsigned int pktlen; dma_addr_t dma_addr; u32 hash, reason; int mac = 0; @@ -1512,7 +1512,13 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, pktlen = RX_DMA_GET_PLEN0(trxd.rxd2); skb->dev = netdev; skb_put(skb, pktlen); - if (trxd.rxd4 & eth->soc->txrx.rx_dma_l4_valid) + + if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) + rxdcsum = &trxd.rxd3; + else + rxdcsum = &trxd.rxd4; + + if (*rxdcsum & eth->soc->txrx.rx_dma_l4_valid) skb->ip_summed = CHECKSUM_UNNECESSARY; else skb_checksum_none_assert(skb); @@ -3761,6 +3767,7 @@ static const struct mtk_soc_data mt7986_data = { .txd_size = sizeof(struct mtk_tx_dma_v2), .rxd_size = sizeof(struct mtk_rx_dma_v2), .rx_irq_done_mask = MTK_RX_DONE_INT_V2, + .rx_dma_l4_valid = RX_DMA_L4_VALID_V2, .dma_max_len = MTK_TX_DMA_BUF_LEN_V2, .dma_len_offset = 8, }, diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 84621b4cb15b..b03e1c66bac0 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -19,8 +19,6 @@ #include "mlxbf_gige.h" #include "mlxbf_gige_regs.h" -#define DRV_NAME "mlxbf_gige" - /* Allocate SKB whose payload pointer aligns with the Bluefield * hardware DMA limitation, i.e. DMA operation can't cross * a 4KB boundary. A maximum packet size of 2KB is assumed in the @@ -427,7 +425,7 @@ static struct platform_driver mlxbf_gige_driver = { .remove = mlxbf_gige_remove, .shutdown = mlxbf_gige_shutdown, .driver = { - .name = DRV_NAME, + .name = KBUILD_MODNAME, .acpi_match_table = ACPI_PTR(mlxbf_gige_acpi_match), }, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 1a465fd5d8b3..c57e293cca25 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -12,7 +12,6 @@ mlxsw_i2c-objs := i2c.o obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_switchdev.o spectrum_router.o \ - spectrum_router_xm.o \ spectrum1_kvdl.o spectrum2_kvdl.o \ spectrum_kvdl.o \ spectrum_acl_tcam.o spectrum_acl_ctcam.o \ diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index 51b260d54237..8a89c2773294 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -343,23 +343,6 @@ static inline int mlxsw_cmd_boardinfo(struct mlxsw_core *mlxsw_core, 0, 0, false, out_mbox, MLXSW_CMD_MBOX_SIZE); } -/* cmd_mbox_xm_num_local_ports - * Number of local_ports connected to the xm. - * Each local port is a 4x - * Spectrum-2/3: 25G - * Spectrum-4: 50G - */ -MLXSW_ITEM32(cmd_mbox, boardinfo, xm_num_local_ports, 0x00, 4, 3); - -/* cmd_mbox_xm_exists - * An XM (eXtanded Mezanine, e.g. used for the XLT) is connected on the board. - */ -MLXSW_ITEM32(cmd_mbox, boardinfo, xm_exists, 0x00, 0, 1); - -/* cmd_mbox_xm_local_port_entry - */ -MLXSW_ITEM_BIT_ARRAY(cmd_mbox, boardinfo, xm_local_port_entry, 0x04, 4, 8); - /* cmd_mbox_boardinfo_intapin * When PCIe interrupt messages are being used, this value is used for clearing * an interrupt. When using MSI-X, this register is not used. @@ -674,12 +657,6 @@ MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1); */ MLXSW_ITEM32(cmd_mbox, config_profile, set_cqe_version, 0x08, 0, 1); -/* cmd_mbox_config_set_kvh_xlt_cache_mode - * Capability bit. Setting a bit to 1 configures the profile - * according to the mailbox contents. - */ -MLXSW_ITEM32(cmd_mbox, config_profile, set_kvh_xlt_cache_mode, 0x08, 3, 1); - /* cmd_mbox_config_profile_max_vepa_channels * Maximum number of VEPA channels per port (0 through 16) * 0 - multi-channel VEPA is disabled @@ -736,16 +713,25 @@ MLXSW_ITEM32(cmd_mbox, config_profile, max_flood_tables, 0x30, 16, 4); */ MLXSW_ITEM32(cmd_mbox, config_profile, max_vid_flood_tables, 0x30, 8, 4); +enum mlxsw_cmd_mbox_config_profile_flood_mode { + /* Mixed mode, where: + * max_flood_tables indicates the number of single-entry tables. + * max_vid_flood_tables indicates the number of per-VID tables. + * max_fid_offset_flood_tables indicates the number of FID-offset + * tables. max_fid_flood_tables indicates the number of per-FID tables. + * Reserved when unified bridge model is used. + */ + MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_MIXED = 3, + /* Controlled flood tables. Reserved when legacy bridge model is + * used. + */ + MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CONTROLLED = 4, +}; + /* cmd_mbox_config_profile_flood_mode * Flooding mode to use. - * 0-2 - Backward compatible modes for SwitchX devices. - * 3 - Mixed mode, where: - * max_flood_tables indicates the number of single-entry tables. - * max_vid_flood_tables indicates the number of per-VID tables. - * max_fid_offset_flood_tables indicates the number of FID-offset tables. - * max_fid_flood_tables indicates the number of per-FID tables. */ -MLXSW_ITEM32(cmd_mbox, config_profile, flood_mode, 0x30, 0, 2); +MLXSW_ITEM32(cmd_mbox, config_profile, flood_mode, 0x30, 0, 3); /* cmd_mbox_config_profile_max_fid_offset_flood_tables * Maximum number of FID-offset flooding tables. @@ -806,13 +792,6 @@ MLXSW_ITEM32(cmd_mbox, config_profile, adaptive_routing_group_cap, 0x4C, 0, 16); */ MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1); -/* cmd_mbox_config_profile_kvh_xlt_cache_mode - * KVH XLT cache mode: - * 0 - XLT can use all KVH as best-effort - * 1 - XLT cache uses 1/2 KVH - */ -MLXSW_ITEM32(cmd_mbox, config_profile, kvh_xlt_cache_mode, 0x50, 8, 4); - /* cmd_mbox_config_kvd_linear_size * KVD Linear Size * Valid for Spectrum only diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index fc52832241b3..ab1cebf227fb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -3151,18 +3151,6 @@ mlxsw_core_port_linecard_get(struct mlxsw_core *mlxsw_core, return mlxsw_core_port->linecard; } -bool mlxsw_core_port_is_xm(const struct mlxsw_core *mlxsw_core, u16 local_port) -{ - const struct mlxsw_bus_info *bus_info = mlxsw_core->bus_info; - int i; - - for (i = 0; i < bus_info->xm_local_ports_count; i++) - if (bus_info->xm_local_ports[i] == local_port) - return true; - return false; -} -EXPORT_SYMBOL(mlxsw_core_port_is_xm); - void mlxsw_core_ports_remove_selected(struct mlxsw_core *mlxsw_core, bool (*selector)(void *priv, u16 local_port), void *priv) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index c2a891287047..d1e8b8b8d0c1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -261,7 +261,6 @@ mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core, struct mlxsw_linecard * mlxsw_core_port_linecard_get(struct mlxsw_core *mlxsw_core, u16 local_port); -bool mlxsw_core_port_is_xm(const struct mlxsw_core *mlxsw_core, u16 local_port); void mlxsw_core_ports_remove_selected(struct mlxsw_core *mlxsw_core, bool (*selector)(void *priv, u16 local_port), @@ -296,8 +295,7 @@ struct mlxsw_config_profile { used_max_pkey:1, used_ar_sec:1, used_adaptive_routing_group_cap:1, - used_kvd_sizes:1, - used_kvh_xlt_cache_mode:1; + used_kvd_sizes:1; u8 max_vepa_channels; u16 max_mid; u16 max_pgt; @@ -319,7 +317,6 @@ struct mlxsw_config_profile { u32 kvd_linear_size; u8 kvd_hash_single_parts; u8 kvd_hash_double_parts; - u8 kvh_xlt_cache_mode; struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT]; }; @@ -478,8 +475,6 @@ struct mlxsw_fw_rev { u16 can_reset_minor; }; -#define MLXSW_BUS_INFO_XM_LOCAL_PORTS_MAX 4 - struct mlxsw_bus_info { const char *device_kind; const char *device_name; @@ -488,10 +483,7 @@ struct mlxsw_bus_info { u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN]; u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN]; u8 low_frequency:1, - read_frc_capable:1, - xm_exists:1; - u8 xm_local_ports_count; - u8 xm_local_ports[MLXSW_BUS_INFO_XM_LOCAL_PORTS_MAX]; + read_frc_capable:1; }; struct mlxsw_hwmon; diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index d9660d4cce96..d9bf584234a6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -359,8 +359,7 @@ static int mlxsw_m_ports_create(struct mlxsw_m *mlxsw_m) /* Create port objects for each valid entry */ devl_lock(devlink); for (i = 0; i < mlxsw_m->max_ports; i++) { - if (mlxsw_m->module_to_port[i] > 0 && - !mlxsw_core_port_is_xm(mlxsw_m->core, i)) { + if (mlxsw_m->module_to_port[i] > 0) { err = mlxsw_m_port_create(mlxsw_m, mlxsw_m->module_to_port[i], i); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index f91dde4df152..4687dabaaf09 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1252,12 +1252,6 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(mbox, MLXSW_RES_GET(res, KVD_DOUBLE_SIZE)); } - if (profile->used_kvh_xlt_cache_mode) { - mlxsw_cmd_mbox_config_profile_set_kvh_xlt_cache_mode_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_kvh_xlt_cache_mode_set( - mbox, profile->kvh_xlt_cache_mode); - } for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++) mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i, @@ -1271,30 +1265,6 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, return mlxsw_cmd_config_profile_set(mlxsw_pci->core, mbox); } -static int mlxsw_pci_boardinfo_xm_process(struct mlxsw_pci *mlxsw_pci, - struct mlxsw_bus_info *bus_info, - char *mbox) -{ - int count = mlxsw_cmd_mbox_boardinfo_xm_num_local_ports_get(mbox); - int i; - - if (!mlxsw_cmd_mbox_boardinfo_xm_exists_get(mbox)) - return 0; - - bus_info->xm_exists = true; - - if (count > MLXSW_BUS_INFO_XM_LOCAL_PORTS_MAX) { - dev_err(&mlxsw_pci->pdev->dev, "Invalid number of XM local ports\n"); - return -EINVAL; - } - bus_info->xm_local_ports_count = count; - for (i = 0; i < count; i++) - bus_info->xm_local_ports[i] = - mlxsw_cmd_mbox_boardinfo_xm_local_port_entry_get(mbox, - i); - return 0; -} - static int mlxsw_pci_boardinfo(struct mlxsw_pci *mlxsw_pci, char *mbox) { struct mlxsw_bus_info *bus_info = &mlxsw_pci->bus_info; @@ -1306,8 +1276,7 @@ static int mlxsw_pci_boardinfo(struct mlxsw_pci *mlxsw_pci, char *mbox) return err; mlxsw_cmd_mbox_boardinfo_vsd_memcpy_from(mbox, bus_info->vsd); mlxsw_cmd_mbox_boardinfo_psid_memcpy_from(mbox, bus_info->psid); - - return mlxsw_pci_boardinfo_xm_process(mlxsw_pci, bus_info, mbox); + return 0; } static int mlxsw_pci_fw_area_init(struct mlxsw_pci *mlxsw_pci, char *mbox, @@ -1582,6 +1551,14 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_config_profile; + /* Some resources depend on unified bridge model, which is configured + * as part of config_profile. Query the resources again to get correct + * values. + */ + err = mlxsw_core_resources_query(mlxsw_core, mbox, res); + if (err) + goto err_requery_resources; + err = mlxsw_pci_aqs_init(mlxsw_pci, mbox); if (err) goto err_aqs_init; @@ -1599,6 +1576,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, err_request_eq_irq: mlxsw_pci_aqs_fini(mlxsw_pci); err_aqs_init: +err_requery_resources: err_config_profile: err_cqe_v_check: err_query_resources: diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h index 741fd2989d12..ac4d4ea51597 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/port.h +++ b/drivers/net/ethernet/mellanox/mlxsw/port.h @@ -15,8 +15,6 @@ #define MLXSW_PORT_SWID_TYPE_IB 1 #define MLXSW_PORT_SWID_TYPE_ETH 2 -#define MLXSW_PORT_MID 0xd000 - #define MLXSW_PORT_MAX_IB_PHY_PORTS 36 #define MLXSW_PORT_MAX_IB_PORTS (MLXSW_PORT_MAX_IB_PHY_PORTS + 1) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 93af6c974ece..7961f0c55fa6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -322,6 +322,18 @@ MLXSW_ITEM32_INDEXED(reg, sfd, rec_action, MLXSW_REG_SFD_BASE_LEN, 28, 4, MLXSW_ITEM32_INDEXED(reg, sfd, uc_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8, MLXSW_REG_SFD_REC_LEN, 0x08, false); +/* reg_sfd_uc_set_vid + * Set VID. + * 0 - Do not update VID. + * 1 - Set VID. + * For Spectrum-2 when set_vid=0 and smpe_valid=1, the smpe will modify the vid. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_set_vid, MLXSW_REG_SFD_BASE_LEN, 31, 1, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + /* reg_sfd_uc_fid_vid * Filtering ID or VLAN ID * For SwitchX and SwitchX-2: @@ -335,6 +347,15 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8, MLXSW_ITEM32_INDEXED(reg, sfd, uc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, MLXSW_REG_SFD_REC_LEN, 0x08, false); +/* reg_sfd_uc_vid + * New VID when set_vid=1. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and when set_vid=0. + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_vid, MLXSW_REG_SFD_BASE_LEN, 16, 12, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + /* reg_sfd_uc_system_port * Unique port identifier for the final destination of the packet. * Access: RW @@ -379,6 +400,18 @@ static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8, MLXSW_REG_SFD_REC_LEN, 0x08, false); +/* reg_sfd_uc_lag_set_vid + * Set VID. + * 0 - Do not update VID. + * 1 - Set VID. + * For Spectrum-2 when set_vid=0 and smpe_valid=1, the smpe will modify the vid. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_set_vid, MLXSW_REG_SFD_BASE_LEN, 31, 1, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + /* reg_sfd_uc_lag_fid_vid * Filtering ID or VLAN ID * For SwitchX and SwitchX-2: @@ -393,8 +426,10 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, MLXSW_REG_SFD_REC_LEN, 0x08, false); /* reg_sfd_uc_lag_lag_vid - * Indicates VID in case of vFIDs. Reserved for FIDs. + * New vlan ID. * Access: RW + * + * Note: Reserved when legacy bridge model is used and set_vid=0. */ MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_vid, MLXSW_REG_SFD_BASE_LEN, 16, 12, MLXSW_REG_SFD_REC_LEN, 0x0C, false); @@ -997,7 +1032,7 @@ static inline void mlxsw_reg_spaft_pack(char *payload, u16 local_port, * to packet types used for flooding. */ #define MLXSW_REG_SFGC_ID 0x2011 -#define MLXSW_REG_SFGC_LEN 0x10 +#define MLXSW_REG_SFGC_LEN 0x14 MLXSW_REG_DEFINE(sfgc, MLXSW_REG_SFGC_ID, MLXSW_REG_SFGC_LEN); @@ -1054,12 +1089,6 @@ MLXSW_ITEM32(reg, sfgc, table_type, 0x04, 16, 3); */ MLXSW_ITEM32(reg, sfgc, flood_table, 0x04, 0, 6); -/* reg_sfgc_mid - * The multicast ID for the swid. Not supported for Spectrum - * Access: RW - */ -MLXSW_ITEM32(reg, sfgc, mid, 0x08, 0, 16); - /* reg_sfgc_counter_set_type * Counter Set Type for flow counters. * Access: RW @@ -1072,6 +1101,14 @@ MLXSW_ITEM32(reg, sfgc, counter_set_type, 0x0C, 24, 8); */ MLXSW_ITEM32(reg, sfgc, counter_index, 0x0C, 0, 24); +/* reg_sfgc_mid_base + * MID Base. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32(reg, sfgc, mid_base, 0x10, 0, 16); + static inline void mlxsw_reg_sfgc_pack(char *payload, enum mlxsw_reg_sfgc_type type, enum mlxsw_reg_sfgc_bridge_type bridge_type, @@ -1083,7 +1120,6 @@ mlxsw_reg_sfgc_pack(char *payload, enum mlxsw_reg_sfgc_type type, mlxsw_reg_sfgc_bridge_type_set(payload, bridge_type); mlxsw_reg_sfgc_table_type_set(payload, table_type); mlxsw_reg_sfgc_flood_table_set(payload, flood_table); - mlxsw_reg_sfgc_mid_set(payload, MLXSW_PORT_MID); } /* SFDF - Switch Filtering DB Flush @@ -1516,7 +1552,7 @@ static inline void mlxsw_reg_spmlr_pack(char *payload, u16 local_port, * virtualized ports. */ #define MLXSW_REG_SVFA_ID 0x201C -#define MLXSW_REG_SVFA_LEN 0x10 +#define MLXSW_REG_SVFA_LEN 0x18 MLXSW_REG_DEFINE(svfa, MLXSW_REG_SVFA_ID, MLXSW_REG_SVFA_LEN); @@ -1537,6 +1573,7 @@ MLXSW_ITEM32_LP(reg, svfa, 0x00, 16, 0x00, 12); enum mlxsw_reg_svfa_mt { MLXSW_REG_SVFA_MT_VID_TO_FID, MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, + MLXSW_REG_SVFA_MT_VNI_TO_FID, }; /* reg_svfa_mapping_table @@ -1586,20 +1623,73 @@ MLXSW_ITEM32(reg, svfa, counter_set_type, 0x08, 24, 8); */ MLXSW_ITEM32(reg, svfa, counter_index, 0x08, 0, 24); -static inline void mlxsw_reg_svfa_pack(char *payload, u16 local_port, - enum mlxsw_reg_svfa_mt mt, bool valid, - u16 fid, u16 vid) +/* reg_svfa_vni + * Virtual Network Identifier. + * Access: Index + * + * Note: Reserved when mapping_table is not 2 (VNI mapping table). + */ +MLXSW_ITEM32(reg, svfa, vni, 0x10, 0, 24); + +/* reg_svfa_irif_v + * Ingress RIF valid. + * 0 - Ingress RIF is not valid, no ingress RIF assigned. + * 1 - Ingress RIF valid. + * Must not be set for a non enabled RIF. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32(reg, svfa, irif_v, 0x14, 24, 1); + +/* reg_svfa_irif + * Ingress RIF (Router Interface). + * Range is 0..cap_max_router_interfaces-1. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and when irif_v=0. + */ +MLXSW_ITEM32(reg, svfa, irif, 0x14, 0, 16); + +static inline void __mlxsw_reg_svfa_pack(char *payload, + enum mlxsw_reg_svfa_mt mt, bool valid, + u16 fid) { MLXSW_REG_ZERO(svfa, payload); - local_port = mt == MLXSW_REG_SVFA_MT_VID_TO_FID ? 0 : local_port; mlxsw_reg_svfa_swid_set(payload, 0); - mlxsw_reg_svfa_local_port_set(payload, local_port); mlxsw_reg_svfa_mapping_table_set(payload, mt); mlxsw_reg_svfa_v_set(payload, valid); mlxsw_reg_svfa_fid_set(payload, fid); +} + +static inline void mlxsw_reg_svfa_port_vid_pack(char *payload, u16 local_port, + bool valid, u16 fid, u16 vid) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; + + __mlxsw_reg_svfa_pack(payload, mt, valid, fid); + mlxsw_reg_svfa_local_port_set(payload, local_port); mlxsw_reg_svfa_vid_set(payload, vid); } +static inline void mlxsw_reg_svfa_vid_pack(char *payload, bool valid, u16 fid, + u16 vid) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VID_TO_FID; + + __mlxsw_reg_svfa_pack(payload, mt, valid, fid); + mlxsw_reg_svfa_vid_set(payload, vid); +} + +static inline void mlxsw_reg_svfa_vni_pack(char *payload, bool valid, u16 fid, + u32 vni) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VNI_TO_FID; + + __mlxsw_reg_svfa_pack(payload, mt, valid, fid); + mlxsw_reg_svfa_vni_set(payload, vni); +} + /* SPVTR - Switch Port VLAN Stacking Register * ------------------------------------------ * The Switch Port VLAN Stacking register configures the VLAN mode of the port @@ -1741,7 +1831,7 @@ static inline void mlxsw_reg_svpe_pack(char *payload, u16 local_port, * Creates and configures FIDs. */ #define MLXSW_REG_SFMR_ID 0x201F -#define MLXSW_REG_SFMR_LEN 0x18 +#define MLXSW_REG_SFMR_LEN 0x30 MLXSW_REG_DEFINE(sfmr, MLXSW_REG_SFMR_ID, MLXSW_REG_SFMR_LEN); @@ -1764,6 +1854,28 @@ MLXSW_ITEM32(reg, sfmr, op, 0x00, 24, 4); */ MLXSW_ITEM32(reg, sfmr, fid, 0x00, 0, 16); +/* reg_sfmr_flood_rsp + * Router sub-port flooding table. + * 0 - Regular flooding table. + * 1 - Router sub-port flooding table. For this FID the flooding is per + * router-sub-port local_port. Must not be set for a FID which is not a + * router-sub-port and must be set prior to enabling the relevant RIF. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32(reg, sfmr, flood_rsp, 0x08, 31, 1); + +/* reg_sfmr_flood_bridge_type + * Flood bridge type (see SFGC.bridge_type). + * 0 - type_0. + * 1 - type_1. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and when flood_rsp=1. + */ +MLXSW_ITEM32(reg, sfmr, flood_bridge_type, 0x08, 28, 1); + /* reg_sfmr_fid_offset * FID offset. * Used to point into the flooding table selected by SFGC register if @@ -1800,12 +1912,52 @@ MLXSW_ITEM32(reg, sfmr, vv, 0x10, 31, 1); /* reg_sfmr_vni * Virtual Network Identifier. + * When legacy bridge model is used, a given VNI can only be assigned to one + * FID. When unified bridge model is used, it configures only the FID->VNI, + * the VNI->FID is done by SVFA. * Access: RW - * - * Note: A given VNI can only be assigned to one FID. */ MLXSW_ITEM32(reg, sfmr, vni, 0x10, 0, 24); +/* reg_sfmr_irif_v + * Ingress RIF valid. + * 0 - Ingress RIF is not valid, no ingress RIF assigned. + * 1 - Ingress RIF valid. + * Must not be set for a non valid RIF. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32(reg, sfmr, irif_v, 0x14, 24, 1); + +/* reg_sfmr_irif + * Ingress RIF (Router Interface). + * Range is 0..cap_max_router_interfaces-1. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and when irif_v=0. + */ +MLXSW_ITEM32(reg, sfmr, irif, 0x14, 0, 16); + +/* reg_sfmr_smpe_valid + * SMPE is valid. + * Access: RW + * + * Note: Reserved when legacy bridge model is used, when flood_rsp=1 and on + * Spectrum-1. + */ +MLXSW_ITEM32(reg, sfmr, smpe_valid, 0x28, 20, 1); + +/* reg_sfmr_smpe + * Switch multicast port to egress VID. + * Range is 0..cap_max_rmpe-1 + * Access: RW + * + * Note: Reserved when legacy bridge model is used, when flood_rsp=1 and on + * Spectrum-1. + */ +MLXSW_ITEM32(reg, sfmr, smpe, 0x28, 0, 16); + static inline void mlxsw_reg_sfmr_pack(char *payload, enum mlxsw_reg_sfmr_op op, u16 fid, u16 fid_offset) @@ -2013,6 +2165,45 @@ static inline void mlxsw_reg_spevet_pack(char *payload, u16 local_port, mlxsw_reg_spevet_et_vlan_set(payload, et_vlan); } +/* SMPE - Switch Multicast Port to Egress VID + * ------------------------------------------ + * The switch multicast port to egress VID maps + * {egress_port, SMPE index} -> {VID}. + */ +#define MLXSW_REG_SMPE_ID 0x202B +#define MLXSW_REG_SMPE_LEN 0x0C + +MLXSW_REG_DEFINE(smpe, MLXSW_REG_SMPE_ID, MLXSW_REG_SMPE_LEN); + +/* reg_smpe_local_port + * Local port number. + * CPU port is not supported. + * Access: Index + */ +MLXSW_ITEM32_LP(reg, smpe, 0x00, 16, 0x00, 12); + +/* reg_smpe_smpe_index + * Switch multicast port to egress VID. + * Range is 0..cap_max_rmpe-1. + * Access: Index + */ +MLXSW_ITEM32(reg, smpe, smpe_index, 0x04, 0, 16); + +/* reg_smpe_evid + * Egress VID. + * Access: RW + */ +MLXSW_ITEM32(reg, smpe, evid, 0x08, 0, 12); + +static inline void mlxsw_reg_smpe_pack(char *payload, u16 local_port, + u16 smpe_index, u16 evid) +{ + MLXSW_REG_ZERO(smpe, payload); + mlxsw_reg_smpe_local_port_set(payload, local_port); + mlxsw_reg_smpe_smpe_index_set(payload, smpe_index); + mlxsw_reg_smpe_evid_set(payload, evid); +} + /* SFTR-V2 - Switch Flooding Table Version 2 Register * -------------------------------------------------- * The switch flooding table is used for flooding packet replication. The table @@ -2107,6 +2298,23 @@ MLXSW_ITEM32(reg, smid2, swid, 0x00, 24, 8); */ MLXSW_ITEM32(reg, smid2, mid, 0x00, 0, 16); +/* reg_smid2_smpe_valid + * SMPE is valid. + * When not valid, the egress VID will not be modified by the SMPE table. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and on Spectrum-2. + */ +MLXSW_ITEM32(reg, smid2, smpe_valid, 0x08, 20, 1); + +/* reg_smid2_smpe + * Switch multicast port to egress VID. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and on Spectrum-2. + */ +MLXSW_ITEM32(reg, smid2, smpe, 0x08, 0, 16); + /* reg_smid2_port * Local port memebership (1 bit per port). * Access: RW @@ -2120,13 +2328,15 @@ MLXSW_ITEM_BIT_ARRAY(reg, smid2, port, 0x20, 0x80, 1); MLXSW_ITEM_BIT_ARRAY(reg, smid2, port_mask, 0xA0, 0x80, 1); static inline void mlxsw_reg_smid2_pack(char *payload, u16 mid, u16 port, - bool set) + bool set, bool smpe_valid, u16 smpe) { MLXSW_REG_ZERO(smid2, payload); mlxsw_reg_smid2_swid_set(payload, 0); mlxsw_reg_smid2_mid_set(payload, mid); mlxsw_reg_smid2_port_set(payload, port, set); mlxsw_reg_smid2_port_mask_set(payload, port, 1); + mlxsw_reg_smid2_smpe_valid_set(payload, smpe_valid); + mlxsw_reg_smid2_smpe_set(payload, smpe_valid ? smpe : 0); } /* CWTP - Congetion WRED ECN TClass Profile @@ -6701,31 +6911,32 @@ MLXSW_ITEM32(reg, ritr, if_vrrp_id_ipv4, 0x1C, 0, 8); /* VLAN Interface */ -/* reg_ritr_vlan_if_vid +/* reg_ritr_vlan_if_vlan_id * VLAN ID. * Access: RW */ -MLXSW_ITEM32(reg, ritr, vlan_if_vid, 0x08, 0, 12); +MLXSW_ITEM32(reg, ritr, vlan_if_vlan_id, 0x08, 0, 12); + +/* reg_ritr_vlan_if_efid + * Egress FID. + * Used to connect the RIF to a bridge. + * Access: RW + * + * Note: Reserved when legacy bridge model is used and on Spectrum-1. + */ +MLXSW_ITEM32(reg, ritr, vlan_if_efid, 0x0C, 0, 16); /* FID Interface */ /* reg_ritr_fid_if_fid - * Filtering ID. Used to connect a bridge to the router. Only FIDs from - * the vFID range are supported. + * Filtering ID. Used to connect a bridge to the router. + * When legacy bridge model is used, only FIDs from the vFID range are + * supported. When unified bridge model is used, this is the egress FID for + * router to bridge. * Access: RW */ MLXSW_ITEM32(reg, ritr, fid_if_fid, 0x08, 0, 16); -static inline void mlxsw_reg_ritr_fid_set(char *payload, - enum mlxsw_reg_ritr_if_type rif_type, - u16 fid) -{ - if (rif_type == MLXSW_REG_RITR_FID_IF) - mlxsw_reg_ritr_fid_if_fid_set(payload, fid); - else - mlxsw_reg_ritr_vlan_if_vid_set(payload, fid); -} - /* Sub-port Interface */ /* reg_ritr_sp_if_lag @@ -6742,6 +6953,16 @@ MLXSW_ITEM32(reg, ritr, sp_if_lag, 0x08, 24, 1); */ MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16); +/* reg_ritr_sp_if_efid + * Egress filtering ID. + * Used to connect the eRIF to a bridge if eRIF-ACL has modified the DMAC or + * the VID. + * Access: RW + * + * Note: Reserved when legacy bridge model is used. + */ +MLXSW_ITEM32(reg, ritr, sp_if_efid, 0x0C, 0, 16); + /* reg_ritr_sp_if_vid * VLAN ID. * Access: RW @@ -6918,6 +7139,20 @@ static inline void mlxsw_reg_ritr_mac_pack(char *payload, const char *mac) } static inline void +mlxsw_reg_ritr_vlan_if_pack(char *payload, bool enable, u16 rif, u16 vr_id, + u16 mtu, const char *mac, u8 mac_profile_id, + u16 vlan_id, u16 efid) +{ + enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_VLAN_IF; + + mlxsw_reg_ritr_pack(payload, enable, type, rif, vr_id, mtu); + mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); + mlxsw_reg_ritr_if_mac_profile_id_set(payload, mac_profile_id); + mlxsw_reg_ritr_vlan_if_vlan_id_set(payload, vlan_id); + mlxsw_reg_ritr_vlan_if_efid_set(payload, efid); +} + +static inline void mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload, enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, enum mlxsw_reg_ritr_loopback_ipip_options options, @@ -7848,11 +8083,10 @@ static inline void mlxsw_reg_ralue_pack4(char *payload, enum mlxsw_reg_ralxx_protocol protocol, enum mlxsw_reg_ralue_op op, u16 virtual_router, u8 prefix_len, - u32 *dip) + u32 dip) { mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); - if (dip) - mlxsw_reg_ralue_dip4_set(payload, *dip); + mlxsw_reg_ralue_dip4_set(payload, dip); } static inline void mlxsw_reg_ralue_pack6(char *payload, @@ -7862,8 +8096,7 @@ static inline void mlxsw_reg_ralue_pack6(char *payload, const void *dip) { mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); - if (dip) - mlxsw_reg_ralue_dip6_memcpy_to(payload, dip); + mlxsw_reg_ralue_dip6_memcpy_to(payload, dip); } static inline void @@ -8926,656 +9159,62 @@ mlxsw_reg_rmft2_ipv6_pack(char *payload, bool v, u16 offset, u16 virtual_router, mlxsw_reg_rmft2_sip6_mask_memcpy_to(payload, (void *)&sip6_mask); } -/* RXLTE - Router XLT Enable Register - * ---------------------------------- - * The RXLTE enables XLT (eXtended Lookup Table) LPM lookups if a capable - * XM is present on the system. - */ - -#define MLXSW_REG_RXLTE_ID 0x8050 -#define MLXSW_REG_RXLTE_LEN 0x0C - -MLXSW_REG_DEFINE(rxlte, MLXSW_REG_RXLTE_ID, MLXSW_REG_RXLTE_LEN); - -/* reg_rxlte_virtual_router - * Virtual router ID associated with the router interface. - * Range is 0..cap_max_virtual_routers-1 - * Access: Index - */ -MLXSW_ITEM32(reg, rxlte, virtual_router, 0x00, 0, 16); - -enum mlxsw_reg_rxlte_protocol { - MLXSW_REG_RXLTE_PROTOCOL_IPV4, - MLXSW_REG_RXLTE_PROTOCOL_IPV6, -}; - -/* reg_rxlte_protocol - * Access: Index - */ -MLXSW_ITEM32(reg, rxlte, protocol, 0x04, 0, 4); - -/* reg_rxlte_lpm_xlt_en - * Access: RW - */ -MLXSW_ITEM32(reg, rxlte, lpm_xlt_en, 0x08, 0, 1); - -static inline void mlxsw_reg_rxlte_pack(char *payload, u16 virtual_router, - enum mlxsw_reg_rxlte_protocol protocol, - bool lpm_xlt_en) -{ - MLXSW_REG_ZERO(rxlte, payload); - mlxsw_reg_rxlte_virtual_router_set(payload, virtual_router); - mlxsw_reg_rxlte_protocol_set(payload, protocol); - mlxsw_reg_rxlte_lpm_xlt_en_set(payload, lpm_xlt_en); -} - -/* RXLTM - Router XLT M select Register - * ------------------------------------ - * The RXLTM configures and selects the M for the XM lookups. - */ - -#define MLXSW_REG_RXLTM_ID 0x8051 -#define MLXSW_REG_RXLTM_LEN 0x14 - -MLXSW_REG_DEFINE(rxltm, MLXSW_REG_RXLTM_ID, MLXSW_REG_RXLTM_LEN); - -/* reg_rxltm_m0_val_v6 - * Global M0 value For IPv6. - * Range 0..128 - * Access: RW - */ -MLXSW_ITEM32(reg, rxltm, m0_val_v6, 0x10, 16, 8); - -/* reg_rxltm_m0_val_v4 - * Global M0 value For IPv4. - * Range 0..32 - * Access: RW - */ -MLXSW_ITEM32(reg, rxltm, m0_val_v4, 0x10, 0, 6); - -static inline void mlxsw_reg_rxltm_pack(char *payload, u8 m0_val_v4, u8 m0_val_v6) -{ - MLXSW_REG_ZERO(rxltm, payload); - mlxsw_reg_rxltm_m0_val_v6_set(payload, m0_val_v6); - mlxsw_reg_rxltm_m0_val_v4_set(payload, m0_val_v4); -} - -/* RLCMLD - Router LPM Cache ML Delete Register - * -------------------------------------------- - * The RLCMLD register is used to bulk delete the XLT-LPM cache ML entries. - * This can be used by SW when L is increased or decreased, thus need to - * remove entries with old ML values. - */ - -#define MLXSW_REG_RLCMLD_ID 0x8055 -#define MLXSW_REG_RLCMLD_LEN 0x30 - -MLXSW_REG_DEFINE(rlcmld, MLXSW_REG_RLCMLD_ID, MLXSW_REG_RLCMLD_LEN); - -enum mlxsw_reg_rlcmld_select { - MLXSW_REG_RLCMLD_SELECT_ML_ENTRIES, - MLXSW_REG_RLCMLD_SELECT_M_ENTRIES, - MLXSW_REG_RLCMLD_SELECT_M_AND_ML_ENTRIES, -}; - -/* reg_rlcmld_select - * Which entries to delete. - * Access: Index - */ -MLXSW_ITEM32(reg, rlcmld, select, 0x00, 16, 2); - -enum mlxsw_reg_rlcmld_filter_fields { - MLXSW_REG_RLCMLD_FILTER_FIELDS_BY_PROTOCOL = 0x04, - MLXSW_REG_RLCMLD_FILTER_FIELDS_BY_VIRTUAL_ROUTER = 0x08, - MLXSW_REG_RLCMLD_FILTER_FIELDS_BY_DIP = 0x10, -}; - -/* reg_rlcmld_filter_fields - * If a bit is '0' then the relevant field is ignored. - * Access: Index +/* REIV - Router Egress Interface to VID Register + * ---------------------------------------------- + * The REIV register maps {eRIF, egress_port} -> VID. + * This mapping is done at the egress, after the ACLs. + * This mapping always takes effect after router, regardless of cast + * (for unicast/multicast/port-base multicast), regardless of eRIF type and + * regardless of bridge decisions (e.g. SFD for unicast or SMPE). + * Reserved when the RIF is a loopback RIF. + * + * Note: Reserved when legacy bridge model is used. */ -MLXSW_ITEM32(reg, rlcmld, filter_fields, 0x00, 0, 8); +#define MLXSW_REG_REIV_ID 0x8034 +#define MLXSW_REG_REIV_BASE_LEN 0x20 /* base length, without records */ +#define MLXSW_REG_REIV_REC_LEN 0x04 /* record length */ +#define MLXSW_REG_REIV_REC_MAX_COUNT 256 /* firmware limitation */ +#define MLXSW_REG_REIV_LEN (MLXSW_REG_REIV_BASE_LEN + \ + MLXSW_REG_REIV_REC_LEN * \ + MLXSW_REG_REIV_REC_MAX_COUNT) -enum mlxsw_reg_rlcmld_protocol { - MLXSW_REG_RLCMLD_PROTOCOL_UC_IPV4, - MLXSW_REG_RLCMLD_PROTOCOL_UC_IPV6, -}; - -/* reg_rlcmld_protocol - * Access: Index - */ -MLXSW_ITEM32(reg, rlcmld, protocol, 0x08, 0, 4); +MLXSW_REG_DEFINE(reiv, MLXSW_REG_REIV_ID, MLXSW_REG_REIV_LEN); -/* reg_rlcmld_virtual_router - * Virtual router ID. - * Range is 0..cap_max_virtual_routers-1 +/* reg_reiv_port_page + * Port page - elport_record[0] is 256*port_page. * Access: Index */ -MLXSW_ITEM32(reg, rlcmld, virtual_router, 0x0C, 0, 16); +MLXSW_ITEM32(reg, reiv, port_page, 0x00, 0, 4); -/* reg_rlcmld_dip - * The prefix of the route or of the marker that the object of the LPM - * is compared with. The most significant bits of the dip are the prefix. +/* reg_reiv_erif + * Egress RIF. + * Range is 0..cap_max_router_interfaces-1. * Access: Index */ -MLXSW_ITEM32(reg, rlcmld, dip4, 0x1C, 0, 32); -MLXSW_ITEM_BUF(reg, rlcmld, dip6, 0x10, 16); - -/* reg_rlcmld_dip_mask - * per bit: - * 0: no match - * 1: match - * Access: Index - */ -MLXSW_ITEM32(reg, rlcmld, dip_mask4, 0x2C, 0, 32); -MLXSW_ITEM_BUF(reg, rlcmld, dip_mask6, 0x20, 16); - -static inline void __mlxsw_reg_rlcmld_pack(char *payload, - enum mlxsw_reg_rlcmld_select select, - enum mlxsw_reg_rlcmld_protocol protocol, - u16 virtual_router) -{ - u8 filter_fields = MLXSW_REG_RLCMLD_FILTER_FIELDS_BY_PROTOCOL | - MLXSW_REG_RLCMLD_FILTER_FIELDS_BY_VIRTUAL_ROUTER | - MLXSW_REG_RLCMLD_FILTER_FIELDS_BY_DIP; - - MLXSW_REG_ZERO(rlcmld, payload); - mlxsw_reg_rlcmld_select_set(payload, select); - mlxsw_reg_rlcmld_filter_fields_set(payload, filter_fields); - mlxsw_reg_rlcmld_protocol_set(payload, protocol); - mlxsw_reg_rlcmld_virtual_router_set(payload, virtual_router); -} - -static inline void mlxsw_reg_rlcmld_pack4(char *payload, - enum mlxsw_reg_rlcmld_select select, - u16 virtual_router, - u32 dip, u32 dip_mask) -{ - __mlxsw_reg_rlcmld_pack(payload, select, - MLXSW_REG_RLCMLD_PROTOCOL_UC_IPV4, - virtual_router); - mlxsw_reg_rlcmld_dip4_set(payload, dip); - mlxsw_reg_rlcmld_dip_mask4_set(payload, dip_mask); -} - -static inline void mlxsw_reg_rlcmld_pack6(char *payload, - enum mlxsw_reg_rlcmld_select select, - u16 virtual_router, - const void *dip, const void *dip_mask) -{ - __mlxsw_reg_rlcmld_pack(payload, select, - MLXSW_REG_RLCMLD_PROTOCOL_UC_IPV6, - virtual_router); - mlxsw_reg_rlcmld_dip6_memcpy_to(payload, dip); - mlxsw_reg_rlcmld_dip_mask6_memcpy_to(payload, dip_mask); -} +MLXSW_ITEM32(reg, reiv, erif, 0x04, 0, 16); -/* RLPMCE - Router LPM Cache Enable Register - * ----------------------------------------- - * Allows disabling the LPM cache. Can be changed on the fly. - */ - -#define MLXSW_REG_RLPMCE_ID 0x8056 -#define MLXSW_REG_RLPMCE_LEN 0x4 - -MLXSW_REG_DEFINE(rlpmce, MLXSW_REG_RLPMCE_ID, MLXSW_REG_RLPMCE_LEN); - -/* reg_rlpmce_flush - * Flush: - * 0: do not flush the cache (default) - * 1: flush (clear) the cache - * Access: WO - */ -MLXSW_ITEM32(reg, rlpmce, flush, 0x00, 4, 1); - -/* reg_rlpmce_disable - * LPM cache: - * 0: enabled (default) - * 1: disabled - * Access: RW - */ -MLXSW_ITEM32(reg, rlpmce, disable, 0x00, 0, 1); - -static inline void mlxsw_reg_rlpmce_pack(char *payload, bool flush, - bool disable) -{ - MLXSW_REG_ZERO(rlpmce, payload); - mlxsw_reg_rlpmce_flush_set(payload, flush); - mlxsw_reg_rlpmce_disable_set(payload, disable); -} - -/* Note that XLTQ, XMDR, XRMT and XRALXX register positions violate the rule - * of ordering register definitions by the ID. However, XRALXX pack helpers are - * using RALXX pack helpers, RALXX registers have higher IDs. - * Also XMDR is using RALUE enums. XLRQ and XRMT are just put alongside with the - * related registers. - */ - -/* XLTQ - XM Lookup Table Query Register - * ------------------------------------- - */ -#define MLXSW_REG_XLTQ_ID 0x7802 -#define MLXSW_REG_XLTQ_LEN 0x2C - -MLXSW_REG_DEFINE(xltq, MLXSW_REG_XLTQ_ID, MLXSW_REG_XLTQ_LEN); - -enum mlxsw_reg_xltq_xm_device_id { - MLXSW_REG_XLTQ_XM_DEVICE_ID_UNKNOWN, - MLXSW_REG_XLTQ_XM_DEVICE_ID_XLT = 0xCF71, -}; - -/* reg_xltq_xm_device_id - * XM device ID. - * Access: RO - */ -MLXSW_ITEM32(reg, xltq, xm_device_id, 0x04, 0, 16); - -/* reg_xltq_xlt_cap_ipv4_lpm - * Access: RO - */ -MLXSW_ITEM32(reg, xltq, xlt_cap_ipv4_lpm, 0x10, 0, 1); - -/* reg_xltq_xlt_cap_ipv6_lpm - * Access: RO - */ -MLXSW_ITEM32(reg, xltq, xlt_cap_ipv6_lpm, 0x10, 1, 1); - -/* reg_xltq_cap_xlt_entries - * Number of XLT entries - * Note: SW must not fill more than 80% in order to avoid overflow - * Access: RO - */ -MLXSW_ITEM32(reg, xltq, cap_xlt_entries, 0x20, 0, 32); - -/* reg_xltq_cap_xlt_mtable - * XLT M-Table max size - * Access: RO - */ -MLXSW_ITEM32(reg, xltq, cap_xlt_mtable, 0x24, 0, 32); - -static inline void mlxsw_reg_xltq_pack(char *payload) -{ - MLXSW_REG_ZERO(xltq, payload); -} - -static inline void mlxsw_reg_xltq_unpack(char *payload, u16 *xm_device_id, bool *xlt_cap_ipv4_lpm, - bool *xlt_cap_ipv6_lpm, u32 *cap_xlt_entries, - u32 *cap_xlt_mtable) -{ - *xm_device_id = mlxsw_reg_xltq_xm_device_id_get(payload); - *xlt_cap_ipv4_lpm = mlxsw_reg_xltq_xlt_cap_ipv4_lpm_get(payload); - *xlt_cap_ipv6_lpm = mlxsw_reg_xltq_xlt_cap_ipv6_lpm_get(payload); - *cap_xlt_entries = mlxsw_reg_xltq_cap_xlt_entries_get(payload); - *cap_xlt_mtable = mlxsw_reg_xltq_cap_xlt_mtable_get(payload); -} - -/* XMDR - XM Direct Register - * ------------------------- - * The XMDR allows direct access to the XM device via the switch. - * Working in synchronous mode. FW waits for response from the XLT - * for each command. FW acks the XMDR accordingly. - */ -#define MLXSW_REG_XMDR_ID 0x7803 -#define MLXSW_REG_XMDR_BASE_LEN 0x20 -#define MLXSW_REG_XMDR_TRANS_LEN 0x80 -#define MLXSW_REG_XMDR_LEN (MLXSW_REG_XMDR_BASE_LEN + \ - MLXSW_REG_XMDR_TRANS_LEN) - -MLXSW_REG_DEFINE(xmdr, MLXSW_REG_XMDR_ID, MLXSW_REG_XMDR_LEN); - -/* reg_xmdr_bulk_entry - * Bulk_entry - * 0: Last entry - immediate flush of XRT-cache - * 1: Bulk entry - do not flush the XRT-cache - * Access: OP - */ -MLXSW_ITEM32(reg, xmdr, bulk_entry, 0x04, 8, 1); - -/* reg_xmdr_num_rec - * Number of records for Direct access to XM - * Supported: 0..4 commands (except NOP which is a filler) - * 0 commands is reserved when bulk_entry = 1. - * 0 commands is allowed when bulk_entry = 0 for immediate XRT-cache flush. +/* reg_reiv_rec_update + * Update enable (when write): + * 0 - Do not update the entry. + * 1 - Update the entry. * Access: OP */ -MLXSW_ITEM32(reg, xmdr, num_rec, 0x04, 0, 4); - -/* reg_xmdr_reply_vect - * Reply Vector - * Bit i for command index i+1 - * values per bit: - * 0: failed - * 1: succeeded - * e.g. if commands 1, 2, 4 succeeded and command 3 failed then binary - * value will be 0b1011 - * Access: RO - */ -MLXSW_ITEM_BIT_ARRAY(reg, xmdr, reply_vect, 0x08, 4, 1); - -static inline void mlxsw_reg_xmdr_pack(char *payload, bool bulk_entry) -{ - MLXSW_REG_ZERO(xmdr, payload); - mlxsw_reg_xmdr_bulk_entry_set(payload, bulk_entry); -} - -enum mlxsw_reg_xmdr_c_cmd_id { - MLXSW_REG_XMDR_C_CMD_ID_LT_ROUTE_V4 = 0x30, - MLXSW_REG_XMDR_C_CMD_ID_LT_ROUTE_V6 = 0x31, -}; - -#define MLXSW_REG_XMDR_C_LT_ROUTE_V4_LEN 32 -#define MLXSW_REG_XMDR_C_LT_ROUTE_V6_LEN 48 - -/* reg_xmdr_c_cmd_id - */ -MLXSW_ITEM32(reg, xmdr_c, cmd_id, 0x00, 24, 8); - -/* reg_xmdr_c_seq_number - */ -MLXSW_ITEM32(reg, xmdr_c, seq_number, 0x00, 12, 12); - -enum mlxsw_reg_xmdr_c_ltr_op { - /* Activity is set */ - MLXSW_REG_XMDR_C_LTR_OP_WRITE = 0, - /* There is no update mask. All fields are updated. */ - MLXSW_REG_XMDR_C_LTR_OP_UPDATE = 1, - MLXSW_REG_XMDR_C_LTR_OP_DELETE = 2, -}; - -/* reg_xmdr_c_ltr_op - * Operation. - */ -MLXSW_ITEM32(reg, xmdr_c, ltr_op, 0x04, 24, 8); - -/* reg_xmdr_c_ltr_trap_action - * Trap action. - * Values are defined in enum mlxsw_reg_ralue_trap_action. - */ -MLXSW_ITEM32(reg, xmdr_c, ltr_trap_action, 0x04, 20, 4); - -enum mlxsw_reg_xmdr_c_ltr_trap_id_num { - MLXSW_REG_XMDR_C_LTR_TRAP_ID_NUM_RTR_INGRESS0, - MLXSW_REG_XMDR_C_LTR_TRAP_ID_NUM_RTR_INGRESS1, - MLXSW_REG_XMDR_C_LTR_TRAP_ID_NUM_RTR_INGRESS2, - MLXSW_REG_XMDR_C_LTR_TRAP_ID_NUM_RTR_INGRESS3, -}; - -/* reg_xmdr_c_ltr_trap_id_num - * Trap-ID number. - */ -MLXSW_ITEM32(reg, xmdr_c, ltr_trap_id_num, 0x04, 16, 4); - -/* reg_xmdr_c_ltr_virtual_router - * Virtual Router ID. - * Range is 0..cap_max_virtual_routers-1 - */ -MLXSW_ITEM32(reg, xmdr_c, ltr_virtual_router, 0x04, 0, 16); - -/* reg_xmdr_c_ltr_prefix_len - * Number of bits in the prefix of the LPM route. - */ -MLXSW_ITEM32(reg, xmdr_c, ltr_prefix_len, 0x08, 24, 8); - -/* reg_xmdr_c_ltr_bmp_len - * The best match prefix length in the case that there is no match for - * longer prefixes. - * If (entry_type != MARKER_ENTRY), bmp_len must be equal to prefix_len - */ -MLXSW_ITEM32(reg, xmdr_c, ltr_bmp_len, 0x08, 16, 8); - -/* reg_xmdr_c_ltr_entry_type - * Entry type. - * Values are defined in enum mlxsw_reg_ralue_entry_type. - */ -MLXSW_ITEM32(reg, xmdr_c, ltr_entry_type, 0x08, 4, 4); - -enum mlxsw_reg_xmdr_c_ltr_action_type { - MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_LOCAL, - MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_REMOTE, - MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_IP2ME, -}; - -/* reg_xmdr_c_ltr_action_type - * Action Type. - */ -MLXSW_ITEM32(reg, xmdr_c, ltr_action_type, 0x08, 0, 4); - -/* reg_xmdr_c_ltr_erif - * Egress Router Interface. - * Only relevant in case of LOCAL action. - */ -MLXSW_ITEM32(reg, xmdr_c, ltr_erif, 0x10, 0, 16); +MLXSW_ITEM32_INDEXED(reg, reiv, rec_update, MLXSW_REG_REIV_BASE_LEN, 31, 1, + MLXSW_REG_REIV_REC_LEN, 0x00, false); -/* reg_xmdr_c_ltr_adjacency_index - * Points to the first entry of the group-based ECMP. - * Only relevant in case of REMOTE action. - */ -MLXSW_ITEM32(reg, xmdr_c, ltr_adjacency_index, 0x10, 0, 24); - -#define MLXSW_REG_XMDR_C_LTR_POINTER_TO_TUNNEL_DISABLED_MAGIC 0xFFFFFF - -/* reg_xmdr_c_ltr_pointer_to_tunnel - * Only relevant in case of IP2ME action. - */ -MLXSW_ITEM32(reg, xmdr_c, ltr_pointer_to_tunnel, 0x10, 0, 24); - -/* reg_xmdr_c_ltr_ecmp_size - * Amount of sequential entries starting - * from the adjacency_index (the number of ECMPs). - * The valid range is 1-64, 512, 1024, 2048 and 4096. - * Only relevant in case of REMOTE action. - */ -MLXSW_ITEM32(reg, xmdr_c, ltr_ecmp_size, 0x14, 0, 32); - -/* reg_xmdr_c_ltr_dip* - * The prefix of the route or of the marker that the object of the LPM - * is compared with. The most significant bits of the dip are the prefix. - * The least significant bits must be '0' if the prefix_len is smaller - * than 128 for IPv6 or smaller than 32 for IPv4. - */ -MLXSW_ITEM32(reg, xmdr_c, ltr_dip4, 0x1C, 0, 32); -MLXSW_ITEM_BUF(reg, xmdr_c, ltr_dip6, 0x1C, 16); - -static inline void -mlxsw_reg_xmdr_c_ltr_pack(char *xmdr_payload, unsigned int trans_offset, - enum mlxsw_reg_xmdr_c_cmd_id cmd_id, u16 seq_number, - enum mlxsw_reg_xmdr_c_ltr_op op, u16 virtual_router, - u8 prefix_len) -{ - char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset; - u8 num_rec = mlxsw_reg_xmdr_num_rec_get(xmdr_payload); - - mlxsw_reg_xmdr_num_rec_set(xmdr_payload, num_rec + 1); - - mlxsw_reg_xmdr_c_cmd_id_set(payload, cmd_id); - mlxsw_reg_xmdr_c_seq_number_set(payload, seq_number); - mlxsw_reg_xmdr_c_ltr_op_set(payload, op); - mlxsw_reg_xmdr_c_ltr_virtual_router_set(payload, virtual_router); - mlxsw_reg_xmdr_c_ltr_prefix_len_set(payload, prefix_len); - mlxsw_reg_xmdr_c_ltr_entry_type_set(payload, - MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY); - mlxsw_reg_xmdr_c_ltr_bmp_len_set(payload, prefix_len); -} - -static inline unsigned int -mlxsw_reg_xmdr_c_ltr_pack4(char *xmdr_payload, unsigned int trans_offset, - u16 seq_number, enum mlxsw_reg_xmdr_c_ltr_op op, - u16 virtual_router, u8 prefix_len, u32 *dip) -{ - char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset; - - mlxsw_reg_xmdr_c_ltr_pack(xmdr_payload, trans_offset, - MLXSW_REG_XMDR_C_CMD_ID_LT_ROUTE_V4, - seq_number, op, virtual_router, prefix_len); - if (dip) - mlxsw_reg_xmdr_c_ltr_dip4_set(payload, *dip); - return MLXSW_REG_XMDR_C_LT_ROUTE_V4_LEN; -} - -static inline unsigned int -mlxsw_reg_xmdr_c_ltr_pack6(char *xmdr_payload, unsigned int trans_offset, - u16 seq_number, enum mlxsw_reg_xmdr_c_ltr_op op, - u16 virtual_router, u8 prefix_len, const void *dip) -{ - char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset; - - mlxsw_reg_xmdr_c_ltr_pack(xmdr_payload, trans_offset, - MLXSW_REG_XMDR_C_CMD_ID_LT_ROUTE_V6, - seq_number, op, virtual_router, prefix_len); - if (dip) - mlxsw_reg_xmdr_c_ltr_dip6_memcpy_to(payload, dip); - return MLXSW_REG_XMDR_C_LT_ROUTE_V6_LEN; -} - -static inline void -mlxsw_reg_xmdr_c_ltr_act_remote_pack(char *xmdr_payload, unsigned int trans_offset, - enum mlxsw_reg_ralue_trap_action trap_action, - enum mlxsw_reg_xmdr_c_ltr_trap_id_num trap_id_num, - u32 adjacency_index, u16 ecmp_size) -{ - char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset; - - mlxsw_reg_xmdr_c_ltr_action_type_set(payload, MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_REMOTE); - mlxsw_reg_xmdr_c_ltr_trap_action_set(payload, trap_action); - mlxsw_reg_xmdr_c_ltr_trap_id_num_set(payload, trap_id_num); - mlxsw_reg_xmdr_c_ltr_adjacency_index_set(payload, adjacency_index); - mlxsw_reg_xmdr_c_ltr_ecmp_size_set(payload, ecmp_size); -} - -static inline void -mlxsw_reg_xmdr_c_ltr_act_local_pack(char *xmdr_payload, unsigned int trans_offset, - enum mlxsw_reg_ralue_trap_action trap_action, - enum mlxsw_reg_xmdr_c_ltr_trap_id_num trap_id_num, u16 erif) -{ - char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset; - - mlxsw_reg_xmdr_c_ltr_action_type_set(payload, MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_LOCAL); - mlxsw_reg_xmdr_c_ltr_trap_action_set(payload, trap_action); - mlxsw_reg_xmdr_c_ltr_trap_id_num_set(payload, trap_id_num); - mlxsw_reg_xmdr_c_ltr_erif_set(payload, erif); -} - -static inline void mlxsw_reg_xmdr_c_ltr_act_ip2me_pack(char *xmdr_payload, - unsigned int trans_offset) -{ - char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset; - - mlxsw_reg_xmdr_c_ltr_action_type_set(payload, MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_IP2ME); - mlxsw_reg_xmdr_c_ltr_pointer_to_tunnel_set(payload, - MLXSW_REG_XMDR_C_LTR_POINTER_TO_TUNNEL_DISABLED_MAGIC); -} - -static inline void mlxsw_reg_xmdr_c_ltr_act_ip2me_tun_pack(char *xmdr_payload, - unsigned int trans_offset, - u32 pointer_to_tunnel) -{ - char *payload = xmdr_payload + MLXSW_REG_XMDR_BASE_LEN + trans_offset; - - mlxsw_reg_xmdr_c_ltr_action_type_set(payload, MLXSW_REG_XMDR_C_LTR_ACTION_TYPE_IP2ME); - mlxsw_reg_xmdr_c_ltr_pointer_to_tunnel_set(payload, pointer_to_tunnel); -} - -/* XRMT - XM Router M Table Register - * --------------------------------- - * The XRMT configures the M-Table for the XLT-LPM. - */ -#define MLXSW_REG_XRMT_ID 0x7810 -#define MLXSW_REG_XRMT_LEN 0x14 - -MLXSW_REG_DEFINE(xrmt, MLXSW_REG_XRMT_ID, MLXSW_REG_XRMT_LEN); - -/* reg_xrmt_index - * Index in M-Table. - * Range 0..cap_xlt_mtable-1 - * Access: Index - */ -MLXSW_ITEM32(reg, xrmt, index, 0x04, 0, 20); - -/* reg_xrmt_l0_val +/* reg_reiv_rec_evid + * Egress VID. + * Range is 0..4095. * Access: RW */ -MLXSW_ITEM32(reg, xrmt, l0_val, 0x10, 24, 8); - -static inline void mlxsw_reg_xrmt_pack(char *payload, u32 index, u8 l0_val) -{ - MLXSW_REG_ZERO(xrmt, payload); - mlxsw_reg_xrmt_index_set(payload, index); - mlxsw_reg_xrmt_l0_val_set(payload, l0_val); -} - -/* XRALTA - XM Router Algorithmic LPM Tree Allocation Register - * ----------------------------------------------------------- - * The XRALTA is used to allocate the XLT LPM trees. - * - * This register embeds original RALTA register. - */ -#define MLXSW_REG_XRALTA_ID 0x7811 -#define MLXSW_REG_XRALTA_LEN 0x08 -#define MLXSW_REG_XRALTA_RALTA_OFFSET 0x04 +MLXSW_ITEM32_INDEXED(reg, reiv, rec_evid, MLXSW_REG_REIV_BASE_LEN, 0, 12, + MLXSW_REG_REIV_REC_LEN, 0x00, false); -MLXSW_REG_DEFINE(xralta, MLXSW_REG_XRALTA_ID, MLXSW_REG_XRALTA_LEN); - -static inline void mlxsw_reg_xralta_pack(char *payload, bool alloc, - enum mlxsw_reg_ralxx_protocol protocol, - u8 tree_id) +static inline void mlxsw_reg_reiv_pack(char *payload, u8 port_page, u16 erif) { - char *ralta_payload = payload + MLXSW_REG_XRALTA_RALTA_OFFSET; - - MLXSW_REG_ZERO(xralta, payload); - mlxsw_reg_ralta_pack(ralta_payload, alloc, protocol, tree_id); -} - -/* XRALST - XM Router Algorithmic LPM Structure Tree Register - * ---------------------------------------------------------- - * The XRALST is used to set and query the structure of an XLT LPM tree. - * - * This register embeds original RALST register. - */ -#define MLXSW_REG_XRALST_ID 0x7812 -#define MLXSW_REG_XRALST_LEN 0x108 -#define MLXSW_REG_XRALST_RALST_OFFSET 0x04 - -MLXSW_REG_DEFINE(xralst, MLXSW_REG_XRALST_ID, MLXSW_REG_XRALST_LEN); - -static inline void mlxsw_reg_xralst_pack(char *payload, u8 root_bin, u8 tree_id) -{ - char *ralst_payload = payload + MLXSW_REG_XRALST_RALST_OFFSET; - - MLXSW_REG_ZERO(xralst, payload); - mlxsw_reg_ralst_pack(ralst_payload, root_bin, tree_id); -} - -static inline void mlxsw_reg_xralst_bin_pack(char *payload, u8 bin_number, - u8 left_child_bin, - u8 right_child_bin) -{ - char *ralst_payload = payload + MLXSW_REG_XRALST_RALST_OFFSET; - - mlxsw_reg_ralst_bin_pack(ralst_payload, bin_number, left_child_bin, - right_child_bin); -} - -/* XRALTB - XM Router Algorithmic LPM Tree Binding Register - * -------------------------------------------------------- - * The XRALTB register is used to bind virtual router and protocol - * to an allocated LPM tree. - * - * This register embeds original RALTB register. - */ -#define MLXSW_REG_XRALTB_ID 0x7813 -#define MLXSW_REG_XRALTB_LEN 0x08 -#define MLXSW_REG_XRALTB_RALTB_OFFSET 0x04 - -MLXSW_REG_DEFINE(xraltb, MLXSW_REG_XRALTB_ID, MLXSW_REG_XRALTB_LEN); - -static inline void mlxsw_reg_xraltb_pack(char *payload, u16 virtual_router, - enum mlxsw_reg_ralxx_protocol protocol, - u8 tree_id) -{ - char *raltb_payload = payload + MLXSW_REG_XRALTB_RALTB_OFFSET; - - MLXSW_REG_ZERO(xraltb, payload); - mlxsw_reg_raltb_pack(raltb_payload, virtual_router, protocol, tree_id); + MLXSW_REG_ZERO(reiv, payload); + mlxsw_reg_reiv_port_page_set(payload, port_page); + mlxsw_reg_reiv_erif_set(payload, erif); } /* MFCR - Management Fan Control Register @@ -13011,6 +12650,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(spvmlr), MLXSW_REG(spvc), MLXSW_REG(spevet), + MLXSW_REG(smpe), MLXSW_REG(sftr2), MLXSW_REG(smid2), MLXSW_REG(cwtp), @@ -13084,16 +12724,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rigr2), MLXSW_REG(recr2), MLXSW_REG(rmft2), - MLXSW_REG(rxlte), - MLXSW_REG(rxltm), - MLXSW_REG(rlcmld), - MLXSW_REG(rlpmce), - MLXSW_REG(xltq), - MLXSW_REG(xmdr), - MLXSW_REG(xrmt), - MLXSW_REG(xralta), - MLXSW_REG(xralst), - MLXSW_REG(xraltb), + MLXSW_REG(reiv), MLXSW_REG(mfcr), MLXSW_REG(mfsc), MLXSW_REG(mfsm), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index cafd206e8d7e..e58acd397edf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2105,9 +2105,6 @@ static int mlxsw_sp_port_module_info_init(struct mlxsw_sp *mlxsw_sp) return -ENOMEM; for (i = 1; i < max_ports; i++) { - if (mlxsw_core_port_is_xm(mlxsw_sp->core, i)) - continue; - port_mapping = &mlxsw_sp->port_mapping[i]; err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, port_mapping); if (err) @@ -3235,6 +3232,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->router_ops = &mlxsw_sp1_router_ops; mlxsw_sp->listeners = mlxsw_sp1_listener; mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener); + mlxsw_sp->fid_family_arr = mlxsw_sp1_fid_family_arr; mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); @@ -3267,6 +3265,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->router_ops = &mlxsw_sp2_router_ops; mlxsw_sp->listeners = mlxsw_sp2_listener; mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp2_listener); + mlxsw_sp->fid_family_arr = mlxsw_sp2_fid_family_arr; mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); @@ -3299,6 +3298,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->router_ops = &mlxsw_sp2_router_ops; mlxsw_sp->listeners = mlxsw_sp2_listener; mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp2_listener); + mlxsw_sp->fid_family_arr = mlxsw_sp2_fid_family_arr; mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); @@ -3331,6 +3331,7 @@ static int mlxsw_sp4_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->router_ops = &mlxsw_sp2_router_ops; mlxsw_sp->listeners = mlxsw_sp2_listener; mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp2_listener); + mlxsw_sp->fid_family_arr = mlxsw_sp2_fid_family_arr; mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP4; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); @@ -3379,7 +3380,7 @@ static const struct mlxsw_config_profile mlxsw_sp1_config_profile = { .max_mid = MLXSW_SP_MID_MAX, .used_flood_tables = 1, .used_flood_mode = 1, - .flood_mode = 3, + .flood_mode = MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_MIXED, .max_fid_flood_tables = 3, .fid_flood_table_size = MLXSW_SP_FID_FLOOD_TABLE_SIZE, .used_max_ib_mc = 1, @@ -3403,15 +3404,13 @@ static const struct mlxsw_config_profile mlxsw_sp2_config_profile = { .max_mid = MLXSW_SP_MID_MAX, .used_flood_tables = 1, .used_flood_mode = 1, - .flood_mode = 3, + .flood_mode = MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_MIXED, .max_fid_flood_tables = 3, .fid_flood_table_size = MLXSW_SP_FID_FLOOD_TABLE_SIZE, .used_max_ib_mc = 1, .max_ib_mc = 0, .used_max_pkey = 1, .max_pkey = 0, - .used_kvh_xlt_cache_mode = 1, - .kvh_xlt_cache_mode = 1, .swid_config = { { .used_type = 1, @@ -3585,6 +3584,25 @@ mlxsw_sp_resources_rif_mac_profile_register(struct mlxsw_core *mlxsw_core) &size_params); } +static int mlxsw_sp_resources_rifs_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params size_params; + u64 max_rifs; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_RIFS)) + return -EIO; + + max_rifs = MLXSW_CORE_RES_GET(mlxsw_core, MAX_RIFS); + devlink_resource_size_params_init(&size_params, max_rifs, max_rifs, + 1, DEVLINK_RESOURCE_UNIT_ENTRY); + + return devlink_resource_register(devlink, "rifs", max_rifs, + MLXSW_SP_RESOURCE_RIFS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); +} + static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) { int err; @@ -3609,8 +3627,13 @@ static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_rif_mac_profile_register; + err = mlxsw_sp_resources_rifs_register(mlxsw_core); + if (err) + goto err_resources_rifs_register; + return 0; +err_resources_rifs_register: err_resources_rif_mac_profile_register: err_policer_resources_register: err_resources_counter_register: @@ -3643,8 +3666,13 @@ static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_rif_mac_profile_register; + err = mlxsw_sp_resources_rifs_register(mlxsw_core); + if (err) + goto err_resources_rifs_register; + return 0; +err_resources_rifs_register: err_resources_rif_mac_profile_register: err_policer_resources_register: err_resources_counter_register: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a60d2bbd3aa6..80006a631333 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -68,6 +68,7 @@ enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_GLOBAL_POLICERS, MLXSW_SP_RESOURCE_SINGLE_RATE_POLICERS, MLXSW_SP_RESOURCE_RIF_MAC_PROFILES, + MLXSW_SP_RESOURCE_RIFS, }; struct mlxsw_sp_port; @@ -83,7 +84,7 @@ struct mlxsw_sp_upper { enum mlxsw_sp_rif_type { MLXSW_SP_RIF_TYPE_SUBPORT, - MLXSW_SP_RIF_TYPE_VLAN, + MLXSW_SP_RIF_TYPE_VLAN_EMU, MLXSW_SP_RIF_TYPE_FID, MLXSW_SP_RIF_TYPE_IPIP_LB, /* IP-in-IP loopback. */ MLXSW_SP_RIF_TYPE_MAX, @@ -210,6 +211,7 @@ struct mlxsw_sp { const struct mlxsw_sp_mall_ops *mall_ops; const struct mlxsw_sp_router_ops *router_ops; const struct mlxsw_listener *listeners; + const struct mlxsw_sp_fid_family **fid_family_arr; size_t listeners_count; u32 lowest_shaper_bs; struct rhashtable ipv6_addr_ht; @@ -1236,7 +1238,6 @@ int mlxsw_sp_setup_tc_block_qevent_mark(struct mlxsw_sp_port *mlxsw_sp_port, /* spectrum_fid.c */ bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index); -bool mlxsw_sp_fid_lag_vid_valid(const struct mlxsw_sp_fid *fid); struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp, u16 fid_index); int mlxsw_sp_fid_nve_ifindex(const struct mlxsw_sp_fid *fid, int *nve_ifindex); @@ -1286,6 +1287,9 @@ void mlxsw_sp_port_fids_fini(struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp); +extern const struct mlxsw_sp_fid_family *mlxsw_sp1_fid_family_arr[]; +extern const struct mlxsw_sp_fid_family *mlxsw_sp2_fid_family_arr[]; + /* spectrum_mr.c */ enum mlxsw_sp_mr_route_prio { MLXSW_SP_MR_ROUTE_PRIO_SG, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index ce80931f0402..fb04fbec7c82 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -22,11 +22,18 @@ struct mlxsw_sp_fid_core { unsigned int *port_fid_mappings; }; +struct mlxsw_sp_fid_port_vid { + struct list_head list; + u16 local_port; + u16 vid; +}; + struct mlxsw_sp_fid { struct list_head list; struct mlxsw_sp_rif *rif; refcount_t ref_count; u16 fid_index; + u16 fid_offset; struct mlxsw_sp_fid_family *fid_family; struct rhash_head ht_node; @@ -37,6 +44,7 @@ struct mlxsw_sp_fid { int nve_ifindex; u8 vni_valid:1, nve_flood_index_valid:1; + struct list_head port_vid_list; /* Ordered by local port. */ }; struct mlxsw_sp_fid_8021q { @@ -81,10 +89,9 @@ struct mlxsw_sp_fid_ops { struct mlxsw_sp_port *port, u16 vid); void (*port_vid_unmap)(struct mlxsw_sp_fid *fid, struct mlxsw_sp_port *port, u16 vid); - int (*vni_set)(struct mlxsw_sp_fid *fid, __be32 vni); + int (*vni_set)(struct mlxsw_sp_fid *fid); void (*vni_clear)(struct mlxsw_sp_fid *fid); - int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid, - u32 nve_flood_index); + int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid); void (*nve_flood_index_clear)(struct mlxsw_sp_fid *fid); void (*fdb_clear_offload)(const struct mlxsw_sp_fid *fid, const struct net_device *nve_dev); @@ -102,7 +109,6 @@ struct mlxsw_sp_fid_family { enum mlxsw_sp_rif_type rif_type; const struct mlxsw_sp_fid_ops *ops; struct mlxsw_sp *mlxsw_sp; - u8 lag_vid_valid:1; }; static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { @@ -137,11 +143,6 @@ bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index) return fid_family->start_index == fid_index; } -bool mlxsw_sp_fid_lag_vid_valid(const struct mlxsw_sp_fid *fid) -{ - return fid->fid_family->lag_vid_valid; -} - struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp, u16 fid_index) { @@ -206,17 +207,20 @@ int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid, const struct mlxsw_sp_fid_ops *ops = fid_family->ops; int err; - if (WARN_ON(!ops->nve_flood_index_set || fid->nve_flood_index_valid)) + if (WARN_ON(fid->nve_flood_index_valid)) return -EINVAL; - err = ops->nve_flood_index_set(fid, nve_flood_index); - if (err) - return err; - fid->nve_flood_index = nve_flood_index; fid->nve_flood_index_valid = true; + err = ops->nve_flood_index_set(fid); + if (err) + goto err_nve_flood_index_set; return 0; + +err_nve_flood_index_set: + fid->nve_flood_index_valid = false; + return err; } void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid) @@ -224,7 +228,7 @@ void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid) struct mlxsw_sp_fid_family *fid_family = fid->fid_family; const struct mlxsw_sp_fid_ops *ops = fid_family->ops; - if (WARN_ON(!ops->nve_flood_index_clear || !fid->nve_flood_index_valid)) + if (WARN_ON(!fid->nve_flood_index_valid)) return; fid->nve_flood_index_valid = false; @@ -244,7 +248,7 @@ int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_nve_type type, struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; int err; - if (WARN_ON(!ops->vni_set || fid->vni_valid)) + if (WARN_ON(fid->vni_valid)) return -EINVAL; fid->nve_type = type; @@ -256,15 +260,15 @@ int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_nve_type type, if (err) return err; - err = ops->vni_set(fid, vni); + fid->vni_valid = true; + err = ops->vni_set(fid); if (err) goto err_vni_set; - fid->vni_valid = true; - return 0; err_vni_set: + fid->vni_valid = false; rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node, mlxsw_sp_fid_vni_ht_params); return err; @@ -276,7 +280,7 @@ void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid) const struct mlxsw_sp_fid_ops *ops = fid_family->ops; struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; - if (WARN_ON(!ops->vni_clear || !fid->vni_valid)) + if (WARN_ON(!fid->vni_valid)) return; fid->vni_valid = false; @@ -405,6 +409,7 @@ static void mlxsw_sp_fid_8021q_setup(struct mlxsw_sp_fid *fid, const void *arg) u16 vid = *(u16 *) arg; mlxsw_sp_fid_8021q_fid(fid)->vid = vid; + fid->fid_offset = 0; } static enum mlxsw_reg_sfmr_op mlxsw_sp_sfmr_op(bool valid) @@ -413,38 +418,38 @@ static enum mlxsw_reg_sfmr_op mlxsw_sp_sfmr_op(bool valid) MLXSW_REG_SFMR_OP_DESTROY_FID; } -static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index, - u16 fid_offset, bool valid) +static int mlxsw_sp_fid_op(const struct mlxsw_sp_fid *fid, bool valid) { + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; char sfmr_pl[MLXSW_REG_SFMR_LEN]; - mlxsw_reg_sfmr_pack(sfmr_pl, mlxsw_sp_sfmr_op(valid), fid_index, - fid_offset); + mlxsw_reg_sfmr_pack(sfmr_pl, mlxsw_sp_sfmr_op(valid), fid->fid_index, + fid->fid_offset); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); } -static int mlxsw_sp_fid_vni_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index, - __be32 vni, bool vni_valid, u32 nve_flood_index, - bool nve_flood_index_valid) +static int mlxsw_sp_fid_edit_op(const struct mlxsw_sp_fid *fid) { + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; char sfmr_pl[MLXSW_REG_SFMR_LEN]; - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid_index, - 0); - mlxsw_reg_sfmr_vv_set(sfmr_pl, vni_valid); - mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(vni)); - mlxsw_reg_sfmr_vtfp_set(sfmr_pl, nve_flood_index_valid); - mlxsw_reg_sfmr_nve_tunnel_flood_ptr_set(sfmr_pl, nve_flood_index); + mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, + fid->fid_index, fid->fid_offset); + mlxsw_reg_sfmr_vv_set(sfmr_pl, fid->vni_valid); + mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(fid->vni)); + mlxsw_reg_sfmr_vtfp_set(sfmr_pl, fid->nve_flood_index_valid); + mlxsw_reg_sfmr_nve_tunnel_flood_ptr_set(sfmr_pl, fid->nve_flood_index); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); } -static int __mlxsw_sp_fid_port_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index, +static int __mlxsw_sp_fid_port_vid_map(const struct mlxsw_sp_fid *fid, u16 local_port, u16 vid, bool valid) { - enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; char svfa_pl[MLXSW_REG_SVFA_LEN]; - mlxsw_reg_svfa_pack(svfa_pl, local_port, mt, valid, fid_index, vid); + mlxsw_reg_svfa_port_vid_pack(svfa_pl, local_port, valid, fid->fid_index, + vid); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); } @@ -459,20 +464,19 @@ static void mlxsw_sp_fid_8021d_setup(struct mlxsw_sp_fid *fid, const void *arg) int br_ifindex = *(int *) arg; mlxsw_sp_fid_8021d_fid(fid)->br_ifindex = br_ifindex; + fid->fid_offset = 0; } static int mlxsw_sp_fid_8021d_configure(struct mlxsw_sp_fid *fid) { - struct mlxsw_sp_fid_family *fid_family = fid->fid_family; - - return mlxsw_sp_fid_op(fid_family->mlxsw_sp, fid->fid_index, 0, true); + return mlxsw_sp_fid_op(fid, true); } static void mlxsw_sp_fid_8021d_deconfigure(struct mlxsw_sp_fid *fid) { if (fid->vni_valid) mlxsw_sp_nve_fid_disable(fid->fid_family->mlxsw_sp, fid); - mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false); + mlxsw_sp_fid_op(fid, false); } static int mlxsw_sp_fid_8021d_index_alloc(struct mlxsw_sp_fid *fid, @@ -505,7 +509,6 @@ static u16 mlxsw_sp_fid_8021d_flood_index(const struct mlxsw_sp_fid *fid) static int mlxsw_sp_port_vp_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; int err; @@ -517,7 +520,7 @@ static int mlxsw_sp_port_vp_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) if (!fid) continue; - err = __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, + err = __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, true); if (err) @@ -540,8 +543,7 @@ err_fid_port_vid_map: if (!fid) continue; - __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, - mlxsw_sp_port->local_port, vid, + __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, false); } return err; @@ -549,7 +551,6 @@ err_fid_port_vid_map: static void mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); @@ -562,12 +563,49 @@ static void mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) if (!fid) continue; - __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, - mlxsw_sp_port->local_port, vid, + __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, false); } } +static int +mlxsw_sp_fid_port_vid_list_add(struct mlxsw_sp_fid *fid, u16 local_port, + u16 vid) +{ + struct mlxsw_sp_fid_port_vid *port_vid, *tmp_port_vid; + + port_vid = kzalloc(sizeof(*port_vid), GFP_KERNEL); + if (!port_vid) + return -ENOMEM; + + port_vid->local_port = local_port; + port_vid->vid = vid; + + list_for_each_entry(tmp_port_vid, &fid->port_vid_list, list) { + if (tmp_port_vid->local_port > local_port) + break; + } + + list_add_tail(&port_vid->list, &tmp_port_vid->list); + return 0; +} + +static void +mlxsw_sp_fid_port_vid_list_del(struct mlxsw_sp_fid *fid, u16 local_port, + u16 vid) +{ + struct mlxsw_sp_fid_port_vid *port_vid, *tmp; + + list_for_each_entry_safe(port_vid, tmp, &fid->port_vid_list, list) { + if (port_vid->local_port != local_port || port_vid->vid != vid) + continue; + + list_del(&port_vid->list); + kfree(port_vid); + return; + } +} + static int mlxsw_sp_fid_8021d_port_vid_map(struct mlxsw_sp_fid *fid, struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) @@ -576,11 +614,16 @@ static int mlxsw_sp_fid_8021d_port_vid_map(struct mlxsw_sp_fid *fid, u16 local_port = mlxsw_sp_port->local_port; int err; - err = __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, - mlxsw_sp_port->local_port, vid, true); + err = __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, + true); if (err) return err; + err = mlxsw_sp_fid_port_vid_list_add(fid, mlxsw_sp_port->local_port, + vid); + if (err) + goto err_port_vid_list_add; + if (mlxsw_sp->fid_core->port_fid_mappings[local_port]++ == 0) { err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port); if (err) @@ -591,8 +634,9 @@ static int mlxsw_sp_fid_8021d_port_vid_map(struct mlxsw_sp_fid *fid, err_port_vp_mode_trans: mlxsw_sp->fid_core->port_fid_mappings[local_port]--; - __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, - mlxsw_sp_port->local_port, vid, false); + mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid); +err_port_vid_list_add: + __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, false); return err; } @@ -606,43 +650,28 @@ mlxsw_sp_fid_8021d_port_vid_unmap(struct mlxsw_sp_fid *fid, if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 1) mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); mlxsw_sp->fid_core->port_fid_mappings[local_port]--; - __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, - mlxsw_sp_port->local_port, vid, false); + mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid); + __mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port->local_port, vid, false); } -static int mlxsw_sp_fid_8021d_vni_set(struct mlxsw_sp_fid *fid, __be32 vni) +static int mlxsw_sp_fid_8021d_vni_set(struct mlxsw_sp_fid *fid) { - struct mlxsw_sp_fid_family *fid_family = fid->fid_family; - - return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, vni, - true, fid->nve_flood_index, - fid->nve_flood_index_valid); + return mlxsw_sp_fid_edit_op(fid); } static void mlxsw_sp_fid_8021d_vni_clear(struct mlxsw_sp_fid *fid) { - struct mlxsw_sp_fid_family *fid_family = fid->fid_family; - - mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, 0, false, - fid->nve_flood_index, fid->nve_flood_index_valid); + mlxsw_sp_fid_edit_op(fid); } -static int mlxsw_sp_fid_8021d_nve_flood_index_set(struct mlxsw_sp_fid *fid, - u32 nve_flood_index) +static int mlxsw_sp_fid_8021d_nve_flood_index_set(struct mlxsw_sp_fid *fid) { - struct mlxsw_sp_fid_family *fid_family = fid->fid_family; - - return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, - fid->vni, fid->vni_valid, nve_flood_index, - true); + return mlxsw_sp_fid_edit_op(fid); } static void mlxsw_sp_fid_8021d_nve_flood_index_clear(struct mlxsw_sp_fid *fid) { - struct mlxsw_sp_fid_family *fid_family = fid->fid_family; - - mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, fid->vni, - fid->vni_valid, 0, false); + mlxsw_sp_fid_edit_op(fid); } static void @@ -699,7 +728,6 @@ static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021d_family = { .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables), .rif_type = MLXSW_SP_RIF_TYPE_FID, .ops = &mlxsw_sp_fid_8021d_ops, - .lag_vid_valid = 1, }; static bool @@ -746,11 +774,15 @@ static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021q_emu_family = { .end_index = MLXSW_SP_FID_8021Q_EMU_END, .flood_tables = mlxsw_sp_fid_8021d_flood_tables, .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables), - .rif_type = MLXSW_SP_RIF_TYPE_VLAN, + .rif_type = MLXSW_SP_RIF_TYPE_VLAN_EMU, .ops = &mlxsw_sp_fid_8021q_emu_ops, - .lag_vid_valid = 1, }; +static void mlxsw_sp_fid_rfid_setup(struct mlxsw_sp_fid *fid, const void *arg) +{ + fid->fid_offset = 0; +} + static int mlxsw_sp_fid_rfid_configure(struct mlxsw_sp_fid *fid) { /* rFIDs are allocated by the device during init */ @@ -787,6 +819,11 @@ static int mlxsw_sp_fid_rfid_port_vid_map(struct mlxsw_sp_fid *fid, u16 local_port = mlxsw_sp_port->local_port; int err; + err = mlxsw_sp_fid_port_vid_list_add(fid, mlxsw_sp_port->local_port, + vid); + if (err) + return err; + /* We only need to transition the port to virtual mode since * {Port, VID} => FID is done by the firmware upon RIF creation. */ @@ -800,6 +837,7 @@ static int mlxsw_sp_fid_rfid_port_vid_map(struct mlxsw_sp_fid *fid, err_port_vp_mode_trans: mlxsw_sp->fid_core->port_fid_mappings[local_port]--; + mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid); return err; } @@ -813,15 +851,41 @@ mlxsw_sp_fid_rfid_port_vid_unmap(struct mlxsw_sp_fid *fid, if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 1) mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); mlxsw_sp->fid_core->port_fid_mappings[local_port]--; + mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid); +} + +static int mlxsw_sp_fid_rfid_vni_set(struct mlxsw_sp_fid *fid) +{ + return -EOPNOTSUPP; +} + +static void mlxsw_sp_fid_rfid_vni_clear(struct mlxsw_sp_fid *fid) +{ + WARN_ON_ONCE(1); +} + +static int mlxsw_sp_fid_rfid_nve_flood_index_set(struct mlxsw_sp_fid *fid) +{ + return -EOPNOTSUPP; +} + +static void mlxsw_sp_fid_rfid_nve_flood_index_clear(struct mlxsw_sp_fid *fid) +{ + WARN_ON_ONCE(1); } static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_rfid_ops = { + .setup = mlxsw_sp_fid_rfid_setup, .configure = mlxsw_sp_fid_rfid_configure, .deconfigure = mlxsw_sp_fid_rfid_deconfigure, .index_alloc = mlxsw_sp_fid_rfid_index_alloc, .compare = mlxsw_sp_fid_rfid_compare, .port_vid_map = mlxsw_sp_fid_rfid_port_vid_map, .port_vid_unmap = mlxsw_sp_fid_rfid_port_vid_unmap, + .vni_set = mlxsw_sp_fid_rfid_vni_set, + .vni_clear = mlxsw_sp_fid_rfid_vni_clear, + .nve_flood_index_set = mlxsw_sp_fid_rfid_nve_flood_index_set, + .nve_flood_index_clear = mlxsw_sp_fid_rfid_nve_flood_index_clear, }; #define MLXSW_SP_RFID_BASE (15 * 1024) @@ -836,16 +900,19 @@ static const struct mlxsw_sp_fid_family mlxsw_sp_fid_rfid_family = { .ops = &mlxsw_sp_fid_rfid_ops, }; -static int mlxsw_sp_fid_dummy_configure(struct mlxsw_sp_fid *fid) +static void mlxsw_sp_fid_dummy_setup(struct mlxsw_sp_fid *fid, const void *arg) { - struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + fid->fid_offset = 0; +} - return mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, 0, true); +static int mlxsw_sp_fid_dummy_configure(struct mlxsw_sp_fid *fid) +{ + return mlxsw_sp_fid_op(fid, true); } static void mlxsw_sp_fid_dummy_deconfigure(struct mlxsw_sp_fid *fid) { - mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false); + mlxsw_sp_fid_op(fid, false); } static int mlxsw_sp_fid_dummy_index_alloc(struct mlxsw_sp_fid *fid, @@ -862,11 +929,36 @@ static bool mlxsw_sp_fid_dummy_compare(const struct mlxsw_sp_fid *fid, return true; } +static int mlxsw_sp_fid_dummy_vni_set(struct mlxsw_sp_fid *fid) +{ + return -EOPNOTSUPP; +} + +static void mlxsw_sp_fid_dummy_vni_clear(struct mlxsw_sp_fid *fid) +{ + WARN_ON_ONCE(1); +} + +static int mlxsw_sp_fid_dummy_nve_flood_index_set(struct mlxsw_sp_fid *fid) +{ + return -EOPNOTSUPP; +} + +static void mlxsw_sp_fid_dummy_nve_flood_index_clear(struct mlxsw_sp_fid *fid) +{ + WARN_ON_ONCE(1); +} + static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_dummy_ops = { + .setup = mlxsw_sp_fid_dummy_setup, .configure = mlxsw_sp_fid_dummy_configure, .deconfigure = mlxsw_sp_fid_dummy_deconfigure, .index_alloc = mlxsw_sp_fid_dummy_index_alloc, .compare = mlxsw_sp_fid_dummy_compare, + .vni_set = mlxsw_sp_fid_dummy_vni_set, + .vni_clear = mlxsw_sp_fid_dummy_vni_clear, + .nve_flood_index_set = mlxsw_sp_fid_dummy_nve_flood_index_set, + .nve_flood_index_clear = mlxsw_sp_fid_dummy_nve_flood_index_clear, }; static const struct mlxsw_sp_fid_family mlxsw_sp_fid_dummy_family = { @@ -877,7 +969,14 @@ static const struct mlxsw_sp_fid_family mlxsw_sp_fid_dummy_family = { .ops = &mlxsw_sp_fid_dummy_ops, }; -static const struct mlxsw_sp_fid_family *mlxsw_sp_fid_family_arr[] = { +const struct mlxsw_sp_fid_family *mlxsw_sp1_fid_family_arr[] = { + [MLXSW_SP_FID_TYPE_8021Q] = &mlxsw_sp_fid_8021q_emu_family, + [MLXSW_SP_FID_TYPE_8021D] = &mlxsw_sp_fid_8021d_family, + [MLXSW_SP_FID_TYPE_RFID] = &mlxsw_sp_fid_rfid_family, + [MLXSW_SP_FID_TYPE_DUMMY] = &mlxsw_sp_fid_dummy_family, +}; + +const struct mlxsw_sp_fid_family *mlxsw_sp2_fid_family_arr[] = { [MLXSW_SP_FID_TYPE_8021Q] = &mlxsw_sp_fid_8021q_emu_family, [MLXSW_SP_FID_TYPE_8021D] = &mlxsw_sp_fid_8021d_family, [MLXSW_SP_FID_TYPE_RFID] = &mlxsw_sp_fid_rfid_family, @@ -919,6 +1018,8 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, fid = kzalloc(fid_family->fid_size, GFP_KERNEL); if (!fid) return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&fid->port_vid_list); fid->fid_family = fid_family; err = fid->fid_family->ops->index_alloc(fid, arg, &fid_index); @@ -927,8 +1028,7 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, fid->fid_index = fid_index; __set_bit(fid_index - fid_family->start_index, fid_family->fids_bitmap); - if (fid->fid_family->ops->setup) - fid->fid_family->ops->setup(fid, arg); + fid->fid_family->ops->setup(fid, arg); err = fid->fid_family->ops->configure(fid); if (err) @@ -967,6 +1067,7 @@ void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid) fid->fid_family->ops->deconfigure(fid); __clear_bit(fid->fid_index - fid_family->start_index, fid_family->fids_bitmap); + WARN_ON_ONCE(!list_empty(&fid->port_vid_list)); kfree(fid); } @@ -1144,7 +1245,7 @@ int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp) for (i = 0; i < MLXSW_SP_FID_TYPE_MAX; i++) { err = mlxsw_sp_fid_family_register(mlxsw_sp, - mlxsw_sp_fid_family_arr[i]); + mlxsw_sp->fid_family_arr[i]); if (err) goto err_fid_ops_register; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9dbb573d53ea..0b103fc68a1a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -443,65 +443,12 @@ struct mlxsw_sp_fib_entry_decap { u32 tunnel_index; }; -static struct mlxsw_sp_fib_entry_priv * -mlxsw_sp_fib_entry_priv_create(const struct mlxsw_sp_router_ll_ops *ll_ops) -{ - struct mlxsw_sp_fib_entry_priv *priv; - - if (!ll_ops->fib_entry_priv_size) - /* No need to have priv */ - return NULL; - - priv = kzalloc(sizeof(*priv) + ll_ops->fib_entry_priv_size, GFP_KERNEL); - if (!priv) - return ERR_PTR(-ENOMEM); - refcount_set(&priv->refcnt, 1); - return priv; -} - -static void -mlxsw_sp_fib_entry_priv_destroy(struct mlxsw_sp_fib_entry_priv *priv) -{ - kfree(priv); -} - -static void mlxsw_sp_fib_entry_priv_hold(struct mlxsw_sp_fib_entry_priv *priv) -{ - refcount_inc(&priv->refcnt); -} - -static void mlxsw_sp_fib_entry_priv_put(struct mlxsw_sp_fib_entry_priv *priv) -{ - if (!priv || !refcount_dec_and_test(&priv->refcnt)) - return; - mlxsw_sp_fib_entry_priv_destroy(priv); -} - -static void mlxsw_sp_fib_entry_op_ctx_priv_hold(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - struct mlxsw_sp_fib_entry_priv *priv) -{ - if (!priv) - return; - mlxsw_sp_fib_entry_priv_hold(priv); - list_add(&priv->list, &op_ctx->fib_entry_priv_list); -} - -static void mlxsw_sp_fib_entry_op_ctx_priv_put_all(struct mlxsw_sp_fib_entry_op_ctx *op_ctx) -{ - struct mlxsw_sp_fib_entry_priv *priv, *tmp; - - list_for_each_entry_safe(priv, tmp, &op_ctx->fib_entry_priv_list, list) - mlxsw_sp_fib_entry_priv_put(priv); - INIT_LIST_HEAD(&op_ctx->fib_entry_priv_list); -} - struct mlxsw_sp_fib_entry { struct mlxsw_sp_fib_node *fib_node; enum mlxsw_sp_fib_entry_type type; struct list_head nexthop_group_node; struct mlxsw_sp_nexthop_group *nh_group; struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */ - struct mlxsw_sp_fib_entry_priv *priv; }; struct mlxsw_sp_fib4_entry { @@ -537,7 +484,6 @@ struct mlxsw_sp_fib { struct mlxsw_sp_vr *vr; struct mlxsw_sp_lpm_tree *lpm_tree; enum mlxsw_sp_l3proto proto; - const struct mlxsw_sp_router_ll_ops *ll_ops; }; struct mlxsw_sp_vr { @@ -551,45 +497,16 @@ struct mlxsw_sp_vr { refcount_t ul_rif_refcnt; }; -static int mlxsw_sp_router_ll_basic_init(struct mlxsw_sp *mlxsw_sp, u16 vr_id, - enum mlxsw_sp_l3proto proto) -{ - return 0; -} - -static int mlxsw_sp_router_ll_basic_ralta_write(struct mlxsw_sp *mlxsw_sp, char *xralta_pl) -{ - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), - xralta_pl + MLXSW_REG_XRALTA_RALTA_OFFSET); -} - -static int mlxsw_sp_router_ll_basic_ralst_write(struct mlxsw_sp *mlxsw_sp, char *xralst_pl) -{ - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), - xralst_pl + MLXSW_REG_XRALST_RALST_OFFSET); -} - -static int mlxsw_sp_router_ll_basic_raltb_write(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl) -{ - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), - xraltb_pl + MLXSW_REG_XRALTB_RALTB_OFFSET); -} - static const struct rhashtable_params mlxsw_sp_fib_ht_params; static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, enum mlxsw_sp_l3proto proto) { - const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto]; struct mlxsw_sp_lpm_tree *lpm_tree; struct mlxsw_sp_fib *fib; int err; - err = ll_ops->init(mlxsw_sp, vr->id, proto); - if (err) - return ERR_PTR(err); - lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto]; fib = kzalloc(sizeof(*fib), GFP_KERNEL); if (!fib) @@ -601,7 +518,6 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp, fib->proto = proto; fib->vr = vr; fib->lpm_tree = lpm_tree; - fib->ll_ops = ll_ops; mlxsw_sp_lpm_tree_hold(lpm_tree); err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id); if (err) @@ -640,36 +556,33 @@ mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp) } static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_lpm_tree *lpm_tree) { - char xralta_pl[MLXSW_REG_XRALTA_LEN]; + char ralta_pl[MLXSW_REG_RALTA_LEN]; - mlxsw_reg_xralta_pack(xralta_pl, true, - (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, - lpm_tree->id); - return ll_ops->ralta_write(mlxsw_sp, xralta_pl); + mlxsw_reg_ralta_pack(ralta_pl, true, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_lpm_tree *lpm_tree) { - char xralta_pl[MLXSW_REG_XRALTA_LEN]; + char ralta_pl[MLXSW_REG_RALTA_LEN]; - mlxsw_reg_xralta_pack(xralta_pl, false, - (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, - lpm_tree->id); - ll_ops->ralta_write(mlxsw_sp, xralta_pl); + mlxsw_reg_ralta_pack(ralta_pl, false, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } static int mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_prefix_usage *prefix_usage, struct mlxsw_sp_lpm_tree *lpm_tree) { - char xralst_pl[MLXSW_REG_XRALST_LEN]; + char ralst_pl[MLXSW_REG_RALST_LEN]; u8 root_bin = 0; u8 prefix; u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD; @@ -677,20 +590,19 @@ mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) root_bin = prefix; - mlxsw_reg_xralst_pack(xralst_pl, root_bin, lpm_tree->id); + mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id); mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) { if (prefix == 0) continue; - mlxsw_reg_xralst_bin_pack(xralst_pl, prefix, last_prefix, - MLXSW_REG_RALST_BIN_NO_CHILD); + mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix, + MLXSW_REG_RALST_BIN_NO_CHILD); last_prefix = prefix; } - return ll_ops->ralst_write(mlxsw_sp, xralst_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); } static struct mlxsw_sp_lpm_tree * mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_prefix_usage *prefix_usage, enum mlxsw_sp_l3proto proto) { @@ -701,11 +613,12 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, if (!lpm_tree) return ERR_PTR(-EBUSY); lpm_tree->proto = proto; - err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, ll_ops, lpm_tree); + err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree); if (err) return ERR_PTR(err); - err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, ll_ops, prefix_usage, lpm_tree); + err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage, + lpm_tree); if (err) goto err_left_struct_set; memcpy(&lpm_tree->prefix_usage, prefix_usage, @@ -716,15 +629,14 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, return lpm_tree; err_left_struct_set: - mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree); + mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); return ERR_PTR(err); } static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_lpm_tree *lpm_tree) { - mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree); + mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); } static struct mlxsw_sp_lpm_tree * @@ -732,7 +644,6 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_prefix_usage *prefix_usage, enum mlxsw_sp_l3proto proto) { - const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto]; struct mlxsw_sp_lpm_tree *lpm_tree; int i; @@ -746,7 +657,7 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, return lpm_tree; } } - return mlxsw_sp_lpm_tree_create(mlxsw_sp, ll_ops, prefix_usage, proto); + return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto); } static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree) @@ -757,11 +668,8 @@ static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree) static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_lpm_tree *lpm_tree) { - const struct mlxsw_sp_router_ll_ops *ll_ops = - mlxsw_sp->router->proto_ll_ops[lpm_tree->proto]; - if (--lpm_tree->ref_count == 0) - mlxsw_sp_lpm_tree_destroy(mlxsw_sp, ll_ops, lpm_tree); + mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); } #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */ @@ -851,23 +759,23 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_fib *fib, u8 tree_id) { - char xraltb_pl[MLXSW_REG_XRALTB_LEN]; + char raltb_pl[MLXSW_REG_RALTB_LEN]; - mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id, - (enum mlxsw_reg_ralxx_protocol) fib->proto, - tree_id); - return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl); + mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, + (enum mlxsw_reg_ralxx_protocol) fib->proto, + tree_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_fib *fib) { - char xraltb_pl[MLXSW_REG_XRALTB_LEN]; + char raltb_pl[MLXSW_REG_RALTB_LEN]; /* Bind to tree 0 which is default */ - mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id, - (enum mlxsw_reg_ralxx_protocol) fib->proto, 0); - return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl); + mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, + (enum mlxsw_reg_ralxx_protocol) fib->proto, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } static u32 mlxsw_sp_fix_tb_id(u32 tb_id) @@ -5778,14 +5686,13 @@ mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_sp_fib_entry_op op) + enum mlxsw_reg_ralue_op op) { switch (op) { - case MLXSW_SP_FIB_ENTRY_OP_WRITE: - case MLXSW_SP_FIB_ENTRY_OP_UPDATE: + case MLXSW_REG_RALUE_OP_WRITE_WRITE: mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry); break; - case MLXSW_SP_FIB_ENTRY_OP_DELETE: + case MLXSW_REG_RALUE_OP_WRITE_DELETE: mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry); break; default: @@ -5793,140 +5700,39 @@ mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp, } } -struct mlxsw_sp_fib_entry_op_ctx_basic { - char ralue_pl[MLXSW_REG_RALUE_LEN]; -}; - static void -mlxsw_sp_router_ll_basic_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - enum mlxsw_sp_l3proto proto, - enum mlxsw_sp_fib_entry_op op, - u16 virtual_router, u8 prefix_len, - unsigned char *addr, - struct mlxsw_sp_fib_entry_priv *priv) +mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl, + const struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { - struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; - enum mlxsw_reg_ralxx_protocol ralxx_proto; - char *ralue_pl = op_ctx_basic->ralue_pl; - enum mlxsw_reg_ralue_op ralue_op; + struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; + enum mlxsw_reg_ralxx_protocol proto; + u32 *p_dip; - ralxx_proto = (enum mlxsw_reg_ralxx_protocol) proto; + proto = (enum mlxsw_reg_ralxx_protocol) fib->proto; - switch (op) { - case MLXSW_SP_FIB_ENTRY_OP_WRITE: - case MLXSW_SP_FIB_ENTRY_OP_UPDATE: - ralue_op = MLXSW_REG_RALUE_OP_WRITE_WRITE; - break; - case MLXSW_SP_FIB_ENTRY_OP_DELETE: - ralue_op = MLXSW_REG_RALUE_OP_WRITE_DELETE; - break; - default: - WARN_ON_ONCE(1); - return; - } - - switch (proto) { + switch (fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: - mlxsw_reg_ralue_pack4(ralue_pl, ralxx_proto, ralue_op, - virtual_router, prefix_len, (u32 *) addr); + p_dip = (u32 *) fib_entry->fib_node->key.addr; + mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + *p_dip); break; case MLXSW_SP_L3_PROTO_IPV6: - mlxsw_reg_ralue_pack6(ralue_pl, ralxx_proto, ralue_op, - virtual_router, prefix_len, addr); + mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + fib_entry->fib_node->key.addr); break; } } -static void -mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - enum mlxsw_reg_ralue_trap_action trap_action, - u16 trap_id, u32 adjacency_index, u16 ecmp_size) -{ - struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; - - mlxsw_reg_ralue_act_remote_pack(op_ctx_basic->ralue_pl, trap_action, - trap_id, adjacency_index, ecmp_size); -} - -static void -mlxsw_sp_router_ll_basic_fib_entry_act_local_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - enum mlxsw_reg_ralue_trap_action trap_action, - u16 trap_id, u16 local_erif) -{ - struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; - - mlxsw_reg_ralue_act_local_pack(op_ctx_basic->ralue_pl, trap_action, - trap_id, local_erif); -} - -static void -mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx) -{ - struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; - - mlxsw_reg_ralue_act_ip2me_pack(op_ctx_basic->ralue_pl); -} - -static void -mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - u32 tunnel_ptr) -{ - struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; - - mlxsw_reg_ralue_act_ip2me_tun_pack(op_ctx_basic->ralue_pl, tunnel_ptr); -} - -static int -mlxsw_sp_router_ll_basic_fib_entry_commit(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - bool *postponed_for_bulk) -{ - struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; - - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), - op_ctx_basic->ralue_pl); -} - -static bool -mlxsw_sp_router_ll_basic_fib_entry_is_committed(struct mlxsw_sp_fib_entry_priv *priv) -{ - return true; -} - -static void mlxsw_sp_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_sp_fib_entry_op op) -{ - struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; - - mlxsw_sp_fib_entry_op_ctx_priv_hold(op_ctx, fib_entry->priv); - fib->ll_ops->fib_entry_pack(op_ctx, fib->proto, op, fib->vr->id, - fib_entry->fib_node->key.prefix_len, - fib_entry->fib_node->key.addr, - fib_entry->priv); -} - -static int mlxsw_sp_fib_entry_commit(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - const struct mlxsw_sp_router_ll_ops *ll_ops) -{ - bool postponed_for_bulk = false; - int err; - - err = ll_ops->fib_entry_commit(mlxsw_sp, op_ctx, &postponed_for_bulk); - if (!postponed_for_bulk) - mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); - return err; -} - static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_sp_fib_entry_op op) + enum mlxsw_reg_ralue_op op) { - const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi; + char ralue_pl[MLXSW_REG_RALUE_LEN]; enum mlxsw_reg_ralue_trap_action trap_action; u16 trap_id = 0; u32 adjacency_index = 0; @@ -5949,20 +5755,19 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); - ll_ops->fib_entry_act_remote_pack(op_ctx, trap_action, trap_id, - adjacency_index, ecmp_size); - return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, + adjacency_index, ecmp_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_sp_fib_entry_op op) + enum mlxsw_reg_ralue_op op) { - const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; struct mlxsw_sp_rif *rif = fib_entry->nh_group->nhgi->nh_rif; enum mlxsw_reg_ralue_trap_action trap_action; + char ralue_pl[MLXSW_REG_RALUE_LEN]; u16 trap_id = 0; u16 rif_index = 0; @@ -5974,64 +5779,61 @@ static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); - ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, rif_index); - return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, + rif_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_sp_fib_entry_op op) + enum mlxsw_reg_ralue_op op) { - const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; + char ralue_pl[MLXSW_REG_RALUE_LEN]; - mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); - ll_ops->fib_entry_act_ip2me_pack(op_ctx); - return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_sp_fib_entry_op op) + enum mlxsw_reg_ralue_op op) { - const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; enum mlxsw_reg_ralue_trap_action trap_action; + char ralue_pl[MLXSW_REG_RALUE_LEN]; trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR; - mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); - ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, 0, 0); - return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } static int mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_sp_fib_entry_op op) + enum mlxsw_reg_ralue_op op) { - const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; enum mlxsw_reg_ralue_trap_action trap_action; + char ralue_pl[MLXSW_REG_RALUE_LEN]; u16 trap_id; trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; trap_id = MLXSW_TRAP_ID_RTR_INGRESS1; - mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); - ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, 0); - return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } static int mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_sp_fib_entry_op op) + enum mlxsw_reg_ralue_op op) { - const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry; const struct mlxsw_sp_ipip_ops *ipip_ops; + char ralue_pl[MLXSW_REG_RALUE_LEN]; int err; if (WARN_ON(!ipip_entry)) @@ -6043,55 +5845,54 @@ mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, if (err) return err; - mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); - ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx, - fib_entry->decap.tunnel_index); - return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, + fib_entry->decap.tunnel_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_sp_fib_entry_op op) + enum mlxsw_reg_ralue_op op) { - const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; + char ralue_pl[MLXSW_REG_RALUE_LEN]; - mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); - ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx, - fib_entry->decap.tunnel_index); - return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, + fib_entry->decap.tunnel_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_sp_fib_entry_op op) + enum mlxsw_reg_ralue_op op) { switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: - return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, op_ctx, fib_entry, op); + return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: - return mlxsw_sp_fib_entry_op_local(mlxsw_sp, op_ctx, fib_entry, op); + return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: - return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, op_ctx, fib_entry, op); + return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: - return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, op_ctx, fib_entry, op); + return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE: - return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, op_ctx, fib_entry, op); + return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry, + op); case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: - return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, op_ctx, fib_entry, op); + return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, + fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: - return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, op_ctx, fib_entry, op); + return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op); } return -EINVAL; } static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_sp_fib_entry_op op) + enum mlxsw_reg_ralue_op op) { - int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry, op); + int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); if (err) return err; @@ -6101,35 +5902,18 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, return err; } -static int __mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - struct mlxsw_sp_fib_entry *fib_entry, - bool is_new) -{ - return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry, - is_new ? MLXSW_SP_FIB_ENTRY_OP_WRITE : - MLXSW_SP_FIB_ENTRY_OP_UPDATE); -} - static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { - struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx; - - mlxsw_sp_fib_entry_op_ctx_clear(op_ctx); - return __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, false); + return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, + MLXSW_REG_RALUE_OP_WRITE_WRITE); } static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry) { - const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; - - if (!ll_ops->fib_entry_is_committed(fib_entry->priv)) - return 0; - return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry, - MLXSW_SP_FIB_ENTRY_OP_DELETE); + return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, + MLXSW_REG_RALUE_OP_WRITE_DELETE); } static int @@ -6224,12 +6008,6 @@ mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, return ERR_PTR(-ENOMEM); fib_entry = &fib4_entry->common; - fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops); - if (IS_ERR(fib_entry->priv)) { - err = PTR_ERR(fib_entry->priv); - goto err_fib_entry_priv_create; - } - err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi); if (err) goto err_nexthop4_group_get; @@ -6258,8 +6036,6 @@ err_fib4_entry_type_set: err_nexthop_group_vr_link: mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); err_nexthop4_group_get: - mlxsw_sp_fib_entry_priv_put(fib_entry->priv); -err_fib_entry_priv_create: kfree(fib4_entry); return ERR_PTR(err); } @@ -6274,7 +6050,6 @@ static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group, fib_node->fib); mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); - mlxsw_sp_fib_entry_priv_put(fib4_entry->common.priv); kfree(fib4_entry); } @@ -6512,16 +6287,14 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - bool is_new = !fib_node->fib_entry; int err; fib_node->fib_entry = fib_entry; - err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, is_new); + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); if (err) goto err_fib_entry_update; @@ -6532,25 +6305,14 @@ err_fib_entry_update: return err; } -static int __mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - struct mlxsw_sp_fib_entry *fib_entry) +static void +mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - int err; - err = mlxsw_sp_fib_entry_del(mlxsw_sp, op_ctx, fib_entry); + mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); fib_node->fib_entry = NULL; - return err; -} - -static void mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) -{ - struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx; - - mlxsw_sp_fib_entry_op_ctx_clear(op_ctx); - __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, fib_entry); } static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry) @@ -6572,7 +6334,6 @@ static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry) static int mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced; @@ -6607,7 +6368,7 @@ mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, } replaced = fib_node->fib_entry; - err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib4_entry->common); + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); goto err_fib_node_entry_link; @@ -6632,23 +6393,20 @@ err_fib4_entry_create: return err; } -static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - struct fib_entry_notifier_info *fen_info) +static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, + struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; - int err; fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); if (!fib4_entry) - return 0; + return; fib_node = fib4_entry->common.fib_node; - err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib4_entry->common); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common); mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); - return err; } static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt) @@ -6946,9 +6704,9 @@ static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp); } -static int mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - struct mlxsw_sp_fib6_entry *fib6_entry) +static int +mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) { struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group; struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; @@ -6971,8 +6729,7 @@ static int mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, * currently associated with it in the device's table is that * of the old group. Start using the new one instead. */ - err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, - &fib6_entry->common, false); + err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common); if (err) goto err_fib_entry_update; @@ -6996,7 +6753,6 @@ err_nexthop6_group_get: static int mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib6_entry *fib6_entry, struct fib6_info **rt_arr, unsigned int nrt6) { @@ -7014,7 +6770,7 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, fib6_entry->nrt6++; } - err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry); + err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); if (err) goto err_rt6_unwind; @@ -7033,7 +6789,6 @@ err_rt6_unwind: static void mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib6_entry *fib6_entry, struct fib6_info **rt_arr, unsigned int nrt6) { @@ -7051,7 +6806,7 @@ mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); } - mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry); + mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); } static int @@ -7137,12 +6892,6 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, return ERR_PTR(-ENOMEM); fib_entry = &fib6_entry->common; - fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops); - if (IS_ERR(fib_entry->priv)) { - err = PTR_ERR(fib_entry->priv); - goto err_fib_entry_priv_create; - } - INIT_LIST_HEAD(&fib6_entry->rt6_list); for (i = 0; i < nrt6; i++) { @@ -7184,8 +6933,6 @@ err_rt6_unwind: list_del(&mlxsw_sp_rt6->list); mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); } - mlxsw_sp_fib_entry_priv_put(fib_entry->priv); -err_fib_entry_priv_create: kfree(fib6_entry); return ERR_PTR(err); } @@ -7208,7 +6955,6 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry); WARN_ON(fib6_entry->nrt6); - mlxsw_sp_fib_entry_priv_put(fib6_entry->common.priv); kfree(fib6_entry); } @@ -7266,8 +7012,8 @@ static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry) } static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - struct fib6_info **rt_arr, unsigned int nrt6) + struct fib6_info **rt_arr, + unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced; struct mlxsw_sp_fib_entry *replaced; @@ -7306,7 +7052,7 @@ static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp, } replaced = fib_node->fib_entry; - err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib6_entry->common); + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common); if (err) goto err_fib_node_entry_link; @@ -7330,8 +7076,8 @@ err_fib6_entry_create: } static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - struct fib6_info **rt_arr, unsigned int nrt6) + struct fib6_info **rt_arr, + unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; @@ -7359,7 +7105,8 @@ static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp, fib6_entry = container_of(fib_node->fib_entry, struct mlxsw_sp_fib6_entry, common); - err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6); + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr, + nrt6); if (err) goto err_fib6_entry_nexthop_add; @@ -7370,17 +7117,16 @@ err_fib6_entry_nexthop_add: return err; } -static int mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - struct fib6_info **rt_arr, unsigned int nrt6) +static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; struct fib6_info *rt = rt_arr[0]; - int err; if (mlxsw_sp_fib6_rt_should_ignore(rt)) - return 0; + return; /* Multipath routes are first added to the FIB trie and only then * notified. If we vetoed the addition, we will get a delete @@ -7389,22 +7135,22 @@ static int mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, */ fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt); if (!fib6_entry) - return 0; + return; /* If not all the nexthops are deleted, then only reduce the nexthop * group. */ if (nrt6 != fib6_entry->nrt6) { - mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6); - return 0; + mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr, + nrt6); + return; } fib_node = fib6_entry->common.fib_node; - err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib6_entry->common); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common); mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); - return err; } static struct mlxsw_sp_mr_table * @@ -7557,15 +7303,15 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) } } -struct mlxsw_sp_fib6_event { +struct mlxsw_sp_fib6_event_work { struct fib6_info **rt_arr; unsigned int nrt6; }; -struct mlxsw_sp_fib_event { - struct list_head list; /* node in fib queue */ +struct mlxsw_sp_fib_event_work { + struct work_struct work; union { - struct mlxsw_sp_fib6_event fib6_event; + struct mlxsw_sp_fib6_event_work fib6_work; struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; @@ -7574,12 +7320,11 @@ struct mlxsw_sp_fib_event { }; struct mlxsw_sp *mlxsw_sp; unsigned long event; - int family; }; static int -mlxsw_sp_router_fib6_event_init(struct mlxsw_sp_fib6_event *fib6_event, - struct fib6_entry_notifier_info *fen6_info) +mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work, + struct fib6_entry_notifier_info *fen6_info) { struct fib6_info *rt = fen6_info->rt; struct fib6_info **rt_arr; @@ -7593,8 +7338,8 @@ mlxsw_sp_router_fib6_event_init(struct mlxsw_sp_fib6_event *fib6_event, if (!rt_arr) return -ENOMEM; - fib6_event->rt_arr = rt_arr; - fib6_event->nrt6 = nrt6; + fib6_work->rt_arr = rt_arr; + fib6_work->nrt6 = nrt6; rt_arr[0] = rt; fib6_info_hold(rt); @@ -7616,242 +7361,182 @@ mlxsw_sp_router_fib6_event_init(struct mlxsw_sp_fib6_event *fib6_event, } static void -mlxsw_sp_router_fib6_event_fini(struct mlxsw_sp_fib6_event *fib6_event) +mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work) { int i; - for (i = 0; i < fib6_event->nrt6; i++) - mlxsw_sp_rt6_release(fib6_event->rt_arr[i]); - kfree(fib6_event->rt_arr); + for (i = 0; i < fib6_work->nrt6; i++) + mlxsw_sp_rt6_release(fib6_work->rt_arr[i]); + kfree(fib6_work->rt_arr); } -static void mlxsw_sp_router_fib4_event_process(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - struct mlxsw_sp_fib_event *fib_event) +static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) { + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; int err; + mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); - switch (fib_event->event) { + switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: - err = mlxsw_sp_router_fib4_replace(mlxsw_sp, op_ctx, &fib_event->fen_info); + err = mlxsw_sp_router_fib4_replace(mlxsw_sp, + &fib_work->fen_info); if (err) { - mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n"); mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp, - &fib_event->fen_info); + &fib_work->fen_info); } - fib_info_put(fib_event->fen_info.fi); + fib_info_put(fib_work->fen_info.fi); break; case FIB_EVENT_ENTRY_DEL: - err = mlxsw_sp_router_fib4_del(mlxsw_sp, op_ctx, &fib_event->fen_info); - if (err) - mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); - fib_info_put(fib_event->fen_info.fi); + mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); + fib_info_put(fib_work->fen_info.fi); break; case FIB_EVENT_NH_ADD: case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop4_event(mlxsw_sp, fib_event->event, fib_event->fnh_info.fib_nh); - fib_info_put(fib_event->fnh_info.fib_nh->nh_parent); + mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event, + fib_work->fnh_info.fib_nh); + fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); break; } + mutex_unlock(&mlxsw_sp->router->lock); + kfree(fib_work); } -static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - struct mlxsw_sp_fib_event *fib_event) +static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) { - struct mlxsw_sp_fib6_event *fib6_event = &fib_event->fib6_event; + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp_fib6_event_work *fib6_work = &fib_work->fib6_work; + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; int err; + mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); - switch (fib_event->event) { + switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: - err = mlxsw_sp_router_fib6_replace(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr, - fib_event->fib6_event.nrt6); + err = mlxsw_sp_router_fib6_replace(mlxsw_sp, + fib6_work->rt_arr, + fib6_work->nrt6); if (err) { - mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n"); mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp, - fib6_event->rt_arr, - fib6_event->nrt6); + fib6_work->rt_arr, + fib6_work->nrt6); } - mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event); + mlxsw_sp_router_fib6_work_fini(fib6_work); break; case FIB_EVENT_ENTRY_APPEND: - err = mlxsw_sp_router_fib6_append(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr, - fib_event->fib6_event.nrt6); + err = mlxsw_sp_router_fib6_append(mlxsw_sp, + fib6_work->rt_arr, + fib6_work->nrt6); if (err) { - mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n"); mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp, - fib6_event->rt_arr, - fib6_event->nrt6); + fib6_work->rt_arr, + fib6_work->nrt6); } - mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event); + mlxsw_sp_router_fib6_work_fini(fib6_work); break; case FIB_EVENT_ENTRY_DEL: - err = mlxsw_sp_router_fib6_del(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr, - fib_event->fib6_event.nrt6); - if (err) - mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); - mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event); + mlxsw_sp_router_fib6_del(mlxsw_sp, + fib6_work->rt_arr, + fib6_work->nrt6); + mlxsw_sp_router_fib6_work_fini(fib6_work); break; } + mutex_unlock(&mlxsw_sp->router->lock); + kfree(fib_work); } -static void mlxsw_sp_router_fibmr_event_process(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_event *fib_event) +static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) { + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; bool replace; int err; rtnl_lock(); mutex_lock(&mlxsw_sp->router->lock); - switch (fib_event->event) { + switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_ADD: - replace = fib_event->event == FIB_EVENT_ENTRY_REPLACE; + replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; - err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_event->men_info, replace); + err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info, + replace); if (err) dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n"); - mr_cache_put(fib_event->men_info.mfc); + mr_cache_put(fib_work->men_info.mfc); break; case FIB_EVENT_ENTRY_DEL: - mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_event->men_info); - mr_cache_put(fib_event->men_info.mfc); + mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info); + mr_cache_put(fib_work->men_info.mfc); break; case FIB_EVENT_VIF_ADD: err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp, - &fib_event->ven_info); + &fib_work->ven_info); if (err) dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n"); - dev_put(fib_event->ven_info.dev); + dev_put(fib_work->ven_info.dev); break; case FIB_EVENT_VIF_DEL: - mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, &fib_event->ven_info); - dev_put(fib_event->ven_info.dev); + mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, + &fib_work->ven_info); + dev_put(fib_work->ven_info.dev); break; } mutex_unlock(&mlxsw_sp->router->lock); rtnl_unlock(); + kfree(fib_work); } -static void mlxsw_sp_router_fib_event_work(struct work_struct *work) -{ - struct mlxsw_sp_router *router = container_of(work, struct mlxsw_sp_router, fib_event_work); - struct mlxsw_sp_fib_entry_op_ctx *op_ctx = router->ll_op_ctx; - struct mlxsw_sp *mlxsw_sp = router->mlxsw_sp; - struct mlxsw_sp_fib_event *next_fib_event; - struct mlxsw_sp_fib_event *fib_event; - int last_family = AF_UNSPEC; - LIST_HEAD(fib_event_queue); - - spin_lock_bh(&router->fib_event_queue_lock); - list_splice_init(&router->fib_event_queue, &fib_event_queue); - spin_unlock_bh(&router->fib_event_queue_lock); - - /* Router lock is held here to make sure per-instance - * operation context is not used in between FIB4/6 events - * processing. - */ - mutex_lock(&router->lock); - mlxsw_sp_fib_entry_op_ctx_clear(op_ctx); - list_for_each_entry_safe(fib_event, next_fib_event, - &fib_event_queue, list) { - /* Check if the next entry in the queue exists and it is - * of the same type (family and event) as the currect one. - * In that case it is permitted to do the bulking - * of multiple FIB entries to a single register write. - */ - op_ctx->bulk_ok = !list_is_last(&fib_event->list, &fib_event_queue) && - fib_event->family == next_fib_event->family && - fib_event->event == next_fib_event->event; - op_ctx->event = fib_event->event; - - /* In case family of this and the previous entry are different, context - * reinitialization is going to be needed now, indicate that. - * Note that since last_family is initialized to AF_UNSPEC, this is always - * going to happen for the first entry processed in the work. - */ - if (fib_event->family != last_family) - op_ctx->initialized = false; - - switch (fib_event->family) { - case AF_INET: - mlxsw_sp_router_fib4_event_process(mlxsw_sp, op_ctx, - fib_event); - break; - case AF_INET6: - mlxsw_sp_router_fib6_event_process(mlxsw_sp, op_ctx, - fib_event); - break; - case RTNL_FAMILY_IP6MR: - case RTNL_FAMILY_IPMR: - /* Unlock here as inside FIBMR the lock is taken again - * under RTNL. The per-instance operation context - * is not used by FIBMR. - */ - mutex_unlock(&router->lock); - mlxsw_sp_router_fibmr_event_process(mlxsw_sp, - fib_event); - mutex_lock(&router->lock); - break; - default: - WARN_ON_ONCE(1); - } - last_family = fib_event->family; - kfree(fib_event); - cond_resched(); - } - WARN_ON_ONCE(!list_empty(&router->ll_op_ctx->fib_entry_priv_list)); - mutex_unlock(&router->lock); -} - -static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event *fib_event, +static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { struct fib_entry_notifier_info *fen_info; struct fib_nh_notifier_info *fnh_info; - switch (fib_event->event) { + switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_DEL: fen_info = container_of(info, struct fib_entry_notifier_info, info); - fib_event->fen_info = *fen_info; + fib_work->fen_info = *fen_info; /* Take reference on fib_info to prevent it from being - * freed while event is queued. Release it afterwards. + * freed while work is queued. Release it afterwards. */ - fib_info_hold(fib_event->fen_info.fi); + fib_info_hold(fib_work->fen_info.fi); break; case FIB_EVENT_NH_ADD: case FIB_EVENT_NH_DEL: fnh_info = container_of(info, struct fib_nh_notifier_info, info); - fib_event->fnh_info = *fnh_info; - fib_info_hold(fib_event->fnh_info.fib_nh->nh_parent); + fib_work->fnh_info = *fnh_info; + fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); break; } } -static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event *fib_event, +static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { struct fib6_entry_notifier_info *fen6_info; int err; - switch (fib_event->event) { + switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_APPEND: case FIB_EVENT_ENTRY_DEL: fen6_info = container_of(info, struct fib6_entry_notifier_info, info); - err = mlxsw_sp_router_fib6_event_init(&fib_event->fib6_event, - fen6_info); + err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work, + fen6_info); if (err) return err; break; @@ -7861,20 +7546,20 @@ static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event *fib_event, } static void -mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event *fib_event, +mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { - switch (fib_event->event) { + switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_ADD: case FIB_EVENT_ENTRY_DEL: - memcpy(&fib_event->men_info, info, sizeof(fib_event->men_info)); - mr_cache_hold(fib_event->men_info.mfc); + memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info)); + mr_cache_hold(fib_work->men_info.mfc); break; case FIB_EVENT_VIF_ADD: case FIB_EVENT_VIF_DEL: - memcpy(&fib_event->ven_info, info, sizeof(fib_event->ven_info)); - dev_hold(fib_event->ven_info.dev); + memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info)); + dev_hold(fib_work->ven_info.dev); break; } } @@ -7928,7 +7613,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event, static int mlxsw_sp_router_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) { - struct mlxsw_sp_fib_event *fib_event; + struct mlxsw_sp_fib_event_work *fib_work; struct fib_notifier_info *info = ptr; struct mlxsw_sp_router *router; int err; @@ -7960,39 +7645,37 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, break; } - fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC); - if (!fib_event) + fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); + if (!fib_work) return NOTIFY_BAD; - fib_event->mlxsw_sp = router->mlxsw_sp; - fib_event->event = event; - fib_event->family = info->family; + fib_work->mlxsw_sp = router->mlxsw_sp; + fib_work->event = event; switch (info->family) { case AF_INET: - mlxsw_sp_router_fib4_event(fib_event, info); + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work); + mlxsw_sp_router_fib4_event(fib_work, info); break; case AF_INET6: - err = mlxsw_sp_router_fib6_event(fib_event, info); + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); + err = mlxsw_sp_router_fib6_event(fib_work, info); if (err) goto err_fib_event; break; case RTNL_FAMILY_IP6MR: case RTNL_FAMILY_IPMR: - mlxsw_sp_router_fibmr_event(fib_event, info); + INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work); + mlxsw_sp_router_fibmr_event(fib_work, info); break; } - /* Enqueue the event and trigger the work */ - spin_lock_bh(&router->fib_event_queue_lock); - list_add_tail(&fib_event->list, &router->fib_event_queue); - spin_unlock_bh(&router->fib_event_queue_lock); - mlxsw_core_schedule_work(&router->fib_event_work); + mlxsw_core_schedule_work(&fib_work->work); return NOTIFY_DONE; err_fib_event: - kfree(fib_event); + kfree(fib_work); return NOTIFY_BAD; } @@ -8035,7 +7718,7 @@ u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) /* We only return the VID for VLAN RIFs. Otherwise we return an * invalid value (0). */ - if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN) + if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN_EMU) goto out; vid = mlxsw_sp_fid_8021q_vid(rif->fid); @@ -8451,6 +8134,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_rif_counters_alloc(rif); } + atomic_inc(&mlxsw_sp->router->rifs_count); return rif; err_stats_enable: @@ -8480,6 +8164,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) struct mlxsw_sp_vr *vr; int i; + atomic_dec(&mlxsw_sp->router->rifs_count); mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); vr = &mlxsw_sp->router->vrs[rif->vr_id]; @@ -8638,6 +8323,13 @@ static u64 mlxsw_sp_rif_mac_profiles_occ_get(void *priv) return atomic_read(&mlxsw_sp->router->rif_mac_profiles_count); } +static u64 mlxsw_sp_rifs_occ_get(void *priv) +{ + const struct mlxsw_sp *mlxsw_sp = priv; + + return atomic_read(&mlxsw_sp->router->rifs_count); +} + static struct mlxsw_sp_rif_mac_profile * mlxsw_sp_rif_mac_profile_create(struct mlxsw_sp *mlxsw_sp, const char *mac, struct netlink_ext_ack *extack) @@ -9685,10 +9377,9 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = { .fid_get = mlxsw_sp_rif_subport_fid_get, }; -static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif, - enum mlxsw_reg_ritr_if_type type, - u16 vid_fid, bool enable) +static int mlxsw_sp_rif_fid_op(struct mlxsw_sp_rif *rif, u16 fid, bool enable) { + enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_FID_IF; struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; char ritr_pl[MLXSW_REG_RITR_LEN]; @@ -9696,7 +9387,7 @@ static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif, rif->dev->mtu); mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id); - mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid); + mlxsw_reg_ritr_fid_if_fid_set(ritr_pl, fid); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); } @@ -9720,10 +9411,9 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif, return err; rif->mac_profile_id = mac_profile; - err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, - true); + err = mlxsw_sp_rif_fid_op(rif, fid_index, true); if (err) - goto err_rif_vlan_fid_op; + goto err_rif_fid_op; err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, mlxsw_sp_router_port(mlxsw_sp), true); @@ -9750,8 +9440,8 @@ err_fid_bc_flood_set: mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, mlxsw_sp_router_port(mlxsw_sp), false); err_fid_mc_flood_set: - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); -err_rif_vlan_fid_op: + mlxsw_sp_rif_fid_op(rif, fid_index, false); +err_rif_fid_op: mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile); return err; } @@ -9770,7 +9460,7 @@ static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) mlxsw_sp_router_port(mlxsw_sp), false); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, mlxsw_sp_router_port(mlxsw_sp), false); - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); + mlxsw_sp_rif_fid_op(rif, fid_index, false); mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id); } @@ -9848,7 +9538,7 @@ static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) } static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = { - .type = MLXSW_SP_RIF_TYPE_VLAN, + .type = MLXSW_SP_RIF_TYPE_VLAN_EMU, .rif_size = sizeof(struct mlxsw_sp_rif), .configure = mlxsw_sp_rif_fid_configure, .deconfigure = mlxsw_sp_rif_fid_deconfigure, @@ -9926,7 +9616,7 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = { static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = { [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, - [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops, + [MLXSW_SP_RIF_TYPE_VLAN_EMU] = &mlxsw_sp_rif_vlan_emu_ops, [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp1_rif_ipip_lb_ops, }; @@ -9969,6 +9659,7 @@ mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, if (err) goto ul_rif_op_err; + atomic_inc(&mlxsw_sp->router->rifs_count); return ul_rif; ul_rif_op_err: @@ -9981,6 +9672,7 @@ static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif) { struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + atomic_dec(&mlxsw_sp->router->rifs_count); mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false); mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL; kfree(ul_rif); @@ -10112,7 +9804,7 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = { static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = { [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, - [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops, + [MLXSW_SP_RIF_TYPE_VLAN_EMU] = &mlxsw_sp_rif_vlan_emu_ops, [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops, }; @@ -10136,10 +9828,15 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) idr_init(&mlxsw_sp->router->rif_mac_profiles_idr); atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0); + atomic_set(&mlxsw_sp->router->rifs_count, 0); devlink_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_RIF_MAC_PROFILES, mlxsw_sp_rif_mac_profiles_occ_get, mlxsw_sp); + devlink_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_RIFS, + mlxsw_sp_rifs_occ_get, + mlxsw_sp); return 0; } @@ -10149,9 +9846,11 @@ static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); int i; + WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count)); for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) WARN_ON_ONCE(mlxsw_sp->router->rifs[i]); + devlink_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS); devlink_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIF_MAC_PROFILES); WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr)); @@ -10533,46 +10232,6 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } -static const struct mlxsw_sp_router_ll_ops mlxsw_sp_router_ll_basic_ops = { - .init = mlxsw_sp_router_ll_basic_init, - .ralta_write = mlxsw_sp_router_ll_basic_ralta_write, - .ralst_write = mlxsw_sp_router_ll_basic_ralst_write, - .raltb_write = mlxsw_sp_router_ll_basic_raltb_write, - .fib_entry_op_ctx_size = sizeof(struct mlxsw_sp_fib_entry_op_ctx_basic), - .fib_entry_pack = mlxsw_sp_router_ll_basic_fib_entry_pack, - .fib_entry_act_remote_pack = mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack, - .fib_entry_act_local_pack = mlxsw_sp_router_ll_basic_fib_entry_act_local_pack, - .fib_entry_act_ip2me_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack, - .fib_entry_act_ip2me_tun_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack, - .fib_entry_commit = mlxsw_sp_router_ll_basic_fib_entry_commit, - .fib_entry_is_committed = mlxsw_sp_router_ll_basic_fib_entry_is_committed, -}; - -static int mlxsw_sp_router_ll_op_ctx_init(struct mlxsw_sp_router *router) -{ - size_t max_size = 0; - int i; - - for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) { - size_t size = router->proto_ll_ops[i]->fib_entry_op_ctx_size; - - if (size > max_size) - max_size = size; - } - router->ll_op_ctx = kzalloc(sizeof(*router->ll_op_ctx) + max_size, - GFP_KERNEL); - if (!router->ll_op_ctx) - return -ENOMEM; - INIT_LIST_HEAD(&router->ll_op_ctx->fib_entry_priv_list); - return 0; -} - -static void mlxsw_sp_router_ll_op_ctx_fini(struct mlxsw_sp_router *router) -{ - WARN_ON(!list_empty(&router->ll_op_ctx->fib_entry_priv_list)); - kfree(router->ll_op_ctx); -} - static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp) { u16 lb_rif_index; @@ -10646,23 +10305,9 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_router_ops_init; - err = mlxsw_sp_router_xm_init(mlxsw_sp); - if (err) - goto err_xm_init; - - router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV4] = mlxsw_sp_router_xm_ipv4_is_supported(mlxsw_sp) ? - &mlxsw_sp_router_ll_xm_ops : - &mlxsw_sp_router_ll_basic_ops; - router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_router_ll_basic_ops; - - err = mlxsw_sp_router_ll_op_ctx_init(router); - if (err) - goto err_ll_op_ctx_init; - INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list); INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw, mlxsw_sp_nh_grp_activity_work); - INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list); err = __mlxsw_sp_router_init(mlxsw_sp); if (err) @@ -10715,10 +10360,6 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_dscp_init; - INIT_WORK(&router->fib_event_work, mlxsw_sp_router_fib_event_work); - INIT_LIST_HEAD(&router->fib_event_queue); - spin_lock_init(&router->fib_event_queue_lock); - router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event; err = register_inetaddr_notifier(&router->inetaddr_nb); if (err) @@ -10773,7 +10414,6 @@ err_register_inet6addr_notifier: unregister_inetaddr_notifier(&router->inetaddr_nb); err_register_inetaddr_notifier: mlxsw_core_flush_owq(); - WARN_ON(!list_empty(&router->fib_event_queue)); err_dscp_init: err_mp_hash_init: mlxsw_sp_neigh_fini(mlxsw_sp); @@ -10797,10 +10437,6 @@ err_rifs_init: __mlxsw_sp_router_fini(mlxsw_sp); err_router_init: cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw); - mlxsw_sp_router_ll_op_ctx_fini(router); -err_ll_op_ctx_init: - mlxsw_sp_router_xm_fini(mlxsw_sp); -err_xm_init: err_router_ops_init: mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); @@ -10819,7 +10455,6 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); mlxsw_core_flush_owq(); - WARN_ON(!list_empty(&mlxsw_sp->router->fib_event_queue)); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_lb_rif_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); @@ -10831,8 +10466,6 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_rifs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw); - mlxsw_sp_router_ll_op_ctx_fini(mlxsw_sp->router); - mlxsw_sp_router_xm_fini(mlxsw_sp); mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 37411b74c3e6..b5c83ec7a87f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -15,32 +15,12 @@ struct mlxsw_sp_router_nve_decap { u8 valid:1; }; -struct mlxsw_sp_fib_entry_op_ctx { - u8 bulk_ok:1, /* Indicate to the low-level op it is ok to bulk - * the actual entry with the one that is the next - * in queue. - */ - initialized:1; /* Bit that the low-level op sets in case - * the context priv is initialized. - */ - struct list_head fib_entry_priv_list; - unsigned long event; - unsigned long ll_priv[]; -}; - -static inline void -mlxsw_sp_fib_entry_op_ctx_clear(struct mlxsw_sp_fib_entry_op_ctx *op_ctx) -{ - WARN_ON_ONCE(!list_empty(&op_ctx->fib_entry_priv_list)); - memset(op_ctx, 0, sizeof(*op_ctx)); - INIT_LIST_HEAD(&op_ctx->fib_entry_priv_list); -} - struct mlxsw_sp_router { struct mlxsw_sp *mlxsw_sp; struct mlxsw_sp_rif **rifs; struct idr rif_mac_profiles_idr; atomic_t rif_mac_profiles_count; + atomic_t rifs_count; u8 max_rif_mac_profile; struct mlxsw_sp_vr *vrs; struct rhashtable neigh_ht; @@ -72,14 +52,8 @@ struct mlxsw_sp_router { const struct mlxsw_sp_ipip_ops **ipip_ops_arr; struct mlxsw_sp_router_nve_decap nve_decap_config; struct mutex lock; /* Protects shared router resources */ - struct work_struct fib_event_work; - struct list_head fib_event_queue; - spinlock_t fib_event_queue_lock; /* Protects fib event queue list */ - /* One set of ops for each protocol: IPv4 and IPv6 */ - const struct mlxsw_sp_router_ll_ops *proto_ll_ops[MLXSW_SP_L3_PROTO_MAX]; struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx; u16 lb_rif_index; - struct mlxsw_sp_router_xm *xm; const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges; size_t adj_grp_size_ranges_count; struct delayed_work nh_grp_activity_dw; @@ -89,48 +63,6 @@ struct mlxsw_sp_router { u32 adj_trap_index; }; -struct mlxsw_sp_fib_entry_priv { - refcount_t refcnt; - struct list_head list; /* Member in op_ctx->fib_entry_priv_list */ - unsigned long priv[]; -}; - -enum mlxsw_sp_fib_entry_op { - MLXSW_SP_FIB_ENTRY_OP_WRITE, - MLXSW_SP_FIB_ENTRY_OP_UPDATE, - MLXSW_SP_FIB_ENTRY_OP_DELETE, -}; - -/* Low-level router ops. Basically this is to handle the different - * register sets to work with ordinary and XM trees and FIB entries. - */ -struct mlxsw_sp_router_ll_ops { - int (*init)(struct mlxsw_sp *mlxsw_sp, u16 vr_id, - enum mlxsw_sp_l3proto proto); - int (*ralta_write)(struct mlxsw_sp *mlxsw_sp, char *xralta_pl); - int (*ralst_write)(struct mlxsw_sp *mlxsw_sp, char *xralst_pl); - int (*raltb_write)(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl); - size_t fib_entry_op_ctx_size; - size_t fib_entry_priv_size; - void (*fib_entry_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - enum mlxsw_sp_l3proto proto, enum mlxsw_sp_fib_entry_op op, - u16 virtual_router, u8 prefix_len, unsigned char *addr, - struct mlxsw_sp_fib_entry_priv *priv); - void (*fib_entry_act_remote_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - enum mlxsw_reg_ralue_trap_action trap_action, - u16 trap_id, u32 adjacency_index, u16 ecmp_size); - void (*fib_entry_act_local_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - enum mlxsw_reg_ralue_trap_action trap_action, - u16 trap_id, u16 local_erif); - void (*fib_entry_act_ip2me_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx); - void (*fib_entry_act_ip2me_tun_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - u32 tunnel_ptr); - int (*fib_entry_commit)(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - bool *postponed_for_bulk); - bool (*fib_entry_is_committed)(struct mlxsw_sp_fib_entry_priv *priv); -}; - struct mlxsw_sp_rif_ipip_lb; struct mlxsw_sp_rif_ipip_lb_config { enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; @@ -232,10 +164,4 @@ int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp); struct net_device * mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev); -extern const struct mlxsw_sp_router_ll_ops mlxsw_sp_router_ll_xm_ops; - -int mlxsw_sp_router_xm_init(struct mlxsw_sp *mlxsw_sp); -void mlxsw_sp_router_xm_fini(struct mlxsw_sp *mlxsw_sp); -bool mlxsw_sp_router_xm_ipv4_is_supported(const struct mlxsw_sp *mlxsw_sp); - #endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router_xm.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router_xm.c deleted file mode 100644 index d213af723a2a..000000000000 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router_xm.c +++ /dev/null @@ -1,812 +0,0 @@ -// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 -/* Copyright (c) 2020 Mellanox Technologies. All rights reserved */ - -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/rhashtable.h> - -#include "spectrum.h" -#include "core.h" -#include "reg.h" -#include "spectrum_router.h" - -#define MLXSW_SP_ROUTER_XM_M_VAL 16 - -static const u8 mlxsw_sp_router_xm_m_val[] = { - [MLXSW_SP_L3_PROTO_IPV4] = MLXSW_SP_ROUTER_XM_M_VAL, - [MLXSW_SP_L3_PROTO_IPV6] = 0, /* Currently unused. */ -}; - -#define MLXSW_SP_ROUTER_XM_L_VAL_MAX 16 - -struct mlxsw_sp_router_xm { - bool ipv4_supported; - bool ipv6_supported; - unsigned int entries_size; - struct rhashtable ltable_ht; - struct rhashtable flush_ht; /* Stores items about to be flushed from cache */ - unsigned int flush_count; - bool flush_all_mode; -}; - -struct mlxsw_sp_router_xm_ltable_node { - struct rhash_head ht_node; /* Member of router_xm->ltable_ht */ - u16 mindex; - u8 current_lvalue; - refcount_t refcnt; - unsigned int lvalue_ref[MLXSW_SP_ROUTER_XM_L_VAL_MAX + 1]; -}; - -static const struct rhashtable_params mlxsw_sp_router_xm_ltable_ht_params = { - .key_offset = offsetof(struct mlxsw_sp_router_xm_ltable_node, mindex), - .head_offset = offsetof(struct mlxsw_sp_router_xm_ltable_node, ht_node), - .key_len = sizeof(u16), - .automatic_shrinking = true, -}; - -struct mlxsw_sp_router_xm_flush_info { - bool all; - enum mlxsw_sp_l3proto proto; - u16 virtual_router; - u8 prefix_len; - unsigned char addr[sizeof(struct in6_addr)]; -}; - -struct mlxsw_sp_router_xm_fib_entry { - bool committed; - struct mlxsw_sp_router_xm_ltable_node *ltable_node; /* Parent node */ - u16 mindex; /* Store for processing from commit op */ - u8 lvalue; - struct mlxsw_sp_router_xm_flush_info flush_info; -}; - -#define MLXSW_SP_ROUTE_LL_XM_ENTRIES_MAX \ - (MLXSW_REG_XMDR_TRANS_LEN / MLXSW_REG_XMDR_C_LT_ROUTE_V4_LEN) - -struct mlxsw_sp_fib_entry_op_ctx_xm { - bool initialized; - char xmdr_pl[MLXSW_REG_XMDR_LEN]; - unsigned int trans_offset; /* Offset of the current command within one - * transaction of XMDR register. - */ - unsigned int trans_item_len; /* The current command length. This is used - * to advance 'trans_offset' when the next - * command is appended. - */ - unsigned int entries_count; - struct mlxsw_sp_router_xm_fib_entry *entries[MLXSW_SP_ROUTE_LL_XM_ENTRIES_MAX]; -}; - -static int mlxsw_sp_router_ll_xm_init(struct mlxsw_sp *mlxsw_sp, u16 vr_id, - enum mlxsw_sp_l3proto proto) -{ - char rxlte_pl[MLXSW_REG_RXLTE_LEN]; - - mlxsw_reg_rxlte_pack(rxlte_pl, vr_id, - (enum mlxsw_reg_rxlte_protocol) proto, true); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rxlte), rxlte_pl); -} - -static int mlxsw_sp_router_ll_xm_ralta_write(struct mlxsw_sp *mlxsw_sp, char *xralta_pl) -{ - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xralta), xralta_pl); -} - -static int mlxsw_sp_router_ll_xm_ralst_write(struct mlxsw_sp *mlxsw_sp, char *xralst_pl) -{ - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xralst), xralst_pl); -} - -static int mlxsw_sp_router_ll_xm_raltb_write(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl) -{ - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xraltb), xraltb_pl); -} - -static u16 mlxsw_sp_router_ll_xm_mindex_get4(const u32 addr) -{ - /* Currently the M-index is set to linear mode. That means it is defined - * as 16 MSB of IP address. - */ - return addr >> MLXSW_SP_ROUTER_XM_L_VAL_MAX; -} - -static u16 mlxsw_sp_router_ll_xm_mindex_get6(const unsigned char *addr) -{ - WARN_ON_ONCE(1); - return 0; /* currently unused */ -} - -static void mlxsw_sp_router_ll_xm_op_ctx_check_init(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm) -{ - if (op_ctx->initialized) - return; - op_ctx->initialized = true; - - mlxsw_reg_xmdr_pack(op_ctx_xm->xmdr_pl, true); - op_ctx_xm->trans_offset = 0; - op_ctx_xm->entries_count = 0; -} - -static void mlxsw_sp_router_ll_xm_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - enum mlxsw_sp_l3proto proto, - enum mlxsw_sp_fib_entry_op op, - u16 virtual_router, u8 prefix_len, - unsigned char *addr, - struct mlxsw_sp_fib_entry_priv *priv) -{ - struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv; - struct mlxsw_sp_router_xm_fib_entry *fib_entry = (void *) priv->priv; - struct mlxsw_sp_router_xm_flush_info *flush_info; - enum mlxsw_reg_xmdr_c_ltr_op xmdr_c_ltr_op; - unsigned int len; - - mlxsw_sp_router_ll_xm_op_ctx_check_init(op_ctx, op_ctx_xm); - - switch (op) { - case MLXSW_SP_FIB_ENTRY_OP_WRITE: - xmdr_c_ltr_op = MLXSW_REG_XMDR_C_LTR_OP_WRITE; - break; - case MLXSW_SP_FIB_ENTRY_OP_UPDATE: - xmdr_c_ltr_op = MLXSW_REG_XMDR_C_LTR_OP_UPDATE; - break; - case MLXSW_SP_FIB_ENTRY_OP_DELETE: - xmdr_c_ltr_op = MLXSW_REG_XMDR_C_LTR_OP_DELETE; - break; - default: - WARN_ON_ONCE(1); - return; - } - - switch (proto) { - case MLXSW_SP_L3_PROTO_IPV4: - len = mlxsw_reg_xmdr_c_ltr_pack4(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset, - op_ctx_xm->entries_count, xmdr_c_ltr_op, - virtual_router, prefix_len, (u32 *) addr); - fib_entry->mindex = mlxsw_sp_router_ll_xm_mindex_get4(*((u32 *) addr)); - break; - case MLXSW_SP_L3_PROTO_IPV6: - len = mlxsw_reg_xmdr_c_ltr_pack6(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset, - op_ctx_xm->entries_count, xmdr_c_ltr_op, - virtual_router, prefix_len, addr); - fib_entry->mindex = mlxsw_sp_router_ll_xm_mindex_get6(addr); - break; - default: - WARN_ON_ONCE(1); - return; - } - if (!op_ctx_xm->trans_offset) - op_ctx_xm->trans_item_len = len; - else - WARN_ON_ONCE(op_ctx_xm->trans_item_len != len); - - op_ctx_xm->entries[op_ctx_xm->entries_count] = fib_entry; - - fib_entry->lvalue = prefix_len > mlxsw_sp_router_xm_m_val[proto] ? - prefix_len - mlxsw_sp_router_xm_m_val[proto] : 0; - - flush_info = &fib_entry->flush_info; - flush_info->proto = proto; - flush_info->virtual_router = virtual_router; - flush_info->prefix_len = prefix_len; - if (addr) - memcpy(flush_info->addr, addr, sizeof(flush_info->addr)); - else - memset(flush_info->addr, 0, sizeof(flush_info->addr)); -} - -static void -mlxsw_sp_router_ll_xm_fib_entry_act_remote_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - enum mlxsw_reg_ralue_trap_action trap_action, - u16 trap_id, u32 adjacency_index, u16 ecmp_size) -{ - struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv; - - mlxsw_reg_xmdr_c_ltr_act_remote_pack(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset, - trap_action, trap_id, adjacency_index, ecmp_size); -} - -static void -mlxsw_sp_router_ll_xm_fib_entry_act_local_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - enum mlxsw_reg_ralue_trap_action trap_action, - u16 trap_id, u16 local_erif) -{ - struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv; - - mlxsw_reg_xmdr_c_ltr_act_local_pack(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset, - trap_action, trap_id, local_erif); -} - -static void -mlxsw_sp_router_ll_xm_fib_entry_act_ip2me_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx) -{ - struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv; - - mlxsw_reg_xmdr_c_ltr_act_ip2me_pack(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset); -} - -static void -mlxsw_sp_router_ll_xm_fib_entry_act_ip2me_tun_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - u32 tunnel_ptr) -{ - struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv; - - mlxsw_reg_xmdr_c_ltr_act_ip2me_tun_pack(op_ctx_xm->xmdr_pl, op_ctx_xm->trans_offset, - tunnel_ptr); -} - -static struct mlxsw_sp_router_xm_ltable_node * -mlxsw_sp_router_xm_ltable_node_get(struct mlxsw_sp_router_xm *router_xm, u16 mindex) -{ - struct mlxsw_sp_router_xm_ltable_node *ltable_node; - int err; - - ltable_node = rhashtable_lookup_fast(&router_xm->ltable_ht, &mindex, - mlxsw_sp_router_xm_ltable_ht_params); - if (ltable_node) { - refcount_inc(<able_node->refcnt); - return ltable_node; - } - ltable_node = kzalloc(sizeof(*ltable_node), GFP_KERNEL); - if (!ltable_node) - return ERR_PTR(-ENOMEM); - ltable_node->mindex = mindex; - refcount_set(<able_node->refcnt, 1); - - err = rhashtable_insert_fast(&router_xm->ltable_ht, <able_node->ht_node, - mlxsw_sp_router_xm_ltable_ht_params); - if (err) - goto err_insert; - - return ltable_node; - -err_insert: - kfree(ltable_node); - return ERR_PTR(err); -} - -static void mlxsw_sp_router_xm_ltable_node_put(struct mlxsw_sp_router_xm *router_xm, - struct mlxsw_sp_router_xm_ltable_node *ltable_node) -{ - if (!refcount_dec_and_test(<able_node->refcnt)) - return; - rhashtable_remove_fast(&router_xm->ltable_ht, <able_node->ht_node, - mlxsw_sp_router_xm_ltable_ht_params); - kfree(ltable_node); -} - -static int mlxsw_sp_router_xm_ltable_lvalue_set(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_router_xm_ltable_node *ltable_node) -{ - char xrmt_pl[MLXSW_REG_XRMT_LEN]; - - mlxsw_reg_xrmt_pack(xrmt_pl, ltable_node->mindex, ltable_node->current_lvalue); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xrmt), xrmt_pl); -} - -struct mlxsw_sp_router_xm_flush_node { - struct rhash_head ht_node; /* Member of router_xm->flush_ht */ - struct list_head list; - struct mlxsw_sp_router_xm_flush_info flush_info; - struct delayed_work dw; - struct mlxsw_sp *mlxsw_sp; - unsigned long start_jiffies; - unsigned int reuses; /* By how many flush calls this was reused. */ - refcount_t refcnt; -}; - -static const struct rhashtable_params mlxsw_sp_router_xm_flush_ht_params = { - .key_offset = offsetof(struct mlxsw_sp_router_xm_flush_node, flush_info), - .head_offset = offsetof(struct mlxsw_sp_router_xm_flush_node, ht_node), - .key_len = sizeof(struct mlxsw_sp_router_xm_flush_info), - .automatic_shrinking = true, -}; - -static struct mlxsw_sp_router_xm_flush_node * -mlxsw_sp_router_xm_cache_flush_node_create(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_router_xm_flush_info *flush_info) -{ - struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; - struct mlxsw_sp_router_xm_flush_node *flush_node; - int err; - - flush_node = kzalloc(sizeof(*flush_node), GFP_KERNEL); - if (!flush_node) - return ERR_PTR(-ENOMEM); - - flush_node->flush_info = *flush_info; - err = rhashtable_insert_fast(&router_xm->flush_ht, &flush_node->ht_node, - mlxsw_sp_router_xm_flush_ht_params); - if (err) { - kfree(flush_node); - return ERR_PTR(err); - } - router_xm->flush_count++; - flush_node->mlxsw_sp = mlxsw_sp; - flush_node->start_jiffies = jiffies; - refcount_set(&flush_node->refcnt, 1); - return flush_node; -} - -static void -mlxsw_sp_router_xm_cache_flush_node_hold(struct mlxsw_sp_router_xm_flush_node *flush_node) -{ - if (!flush_node) - return; - refcount_inc(&flush_node->refcnt); -} - -static void -mlxsw_sp_router_xm_cache_flush_node_put(struct mlxsw_sp_router_xm_flush_node *flush_node) -{ - if (!flush_node || !refcount_dec_and_test(&flush_node->refcnt)) - return; - kfree(flush_node); -} - -static void -mlxsw_sp_router_xm_cache_flush_node_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_router_xm_flush_node *flush_node) -{ - struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; - - router_xm->flush_count--; - rhashtable_remove_fast(&router_xm->flush_ht, &flush_node->ht_node, - mlxsw_sp_router_xm_flush_ht_params); - mlxsw_sp_router_xm_cache_flush_node_put(flush_node); -} - -static u32 mlxsw_sp_router_xm_flush_mask4(u8 prefix_len) -{ - return GENMASK(31, 32 - prefix_len); -} - -static unsigned char *mlxsw_sp_router_xm_flush_mask6(u8 prefix_len) -{ - static unsigned char mask[sizeof(struct in6_addr)]; - - memset(mask, 0, sizeof(mask)); - memset(mask, 0xff, prefix_len / 8); - mask[prefix_len / 8] = GENMASK(8, 8 - prefix_len % 8); - return mask; -} - -#define MLXSW_SP_ROUTER_XM_CACHE_PARALLEL_FLUSHES_LIMIT 15 -#define MLXSW_SP_ROUTER_XM_CACHE_FLUSH_ALL_MIN_REUSES 15 -#define MLXSW_SP_ROUTER_XM_CACHE_DELAY 50 /* usecs */ -#define MLXSW_SP_ROUTER_XM_CACHE_MAX_WAIT (MLXSW_SP_ROUTER_XM_CACHE_DELAY * 10) - -static void mlxsw_sp_router_xm_cache_flush_work(struct work_struct *work) -{ - struct mlxsw_sp_router_xm_flush_info *flush_info; - struct mlxsw_sp_router_xm_flush_node *flush_node; - char rlcmld_pl[MLXSW_REG_RLCMLD_LEN]; - enum mlxsw_reg_rlcmld_select select; - struct mlxsw_sp *mlxsw_sp; - u32 addr4; - int err; - - flush_node = container_of(work, struct mlxsw_sp_router_xm_flush_node, - dw.work); - mlxsw_sp = flush_node->mlxsw_sp; - flush_info = &flush_node->flush_info; - - if (flush_info->all) { - char rlpmce_pl[MLXSW_REG_RLPMCE_LEN]; - - mlxsw_reg_rlpmce_pack(rlpmce_pl, true, false); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rlpmce), - rlpmce_pl); - if (err) - dev_err(mlxsw_sp->bus_info->dev, "Failed to flush XM cache\n"); - - if (flush_node->reuses < - MLXSW_SP_ROUTER_XM_CACHE_FLUSH_ALL_MIN_REUSES) - /* Leaving flush-all mode. */ - mlxsw_sp->router->xm->flush_all_mode = false; - goto out; - } - - select = MLXSW_REG_RLCMLD_SELECT_M_AND_ML_ENTRIES; - - switch (flush_info->proto) { - case MLXSW_SP_L3_PROTO_IPV4: - addr4 = *((u32 *) flush_info->addr); - addr4 &= mlxsw_sp_router_xm_flush_mask4(flush_info->prefix_len); - - /* In case the flush prefix length is bigger than M-value, - * it makes no sense to flush M entries. So just flush - * the ML entries. - */ - if (flush_info->prefix_len > MLXSW_SP_ROUTER_XM_M_VAL) - select = MLXSW_REG_RLCMLD_SELECT_ML_ENTRIES; - - mlxsw_reg_rlcmld_pack4(rlcmld_pl, select, - flush_info->virtual_router, addr4, - mlxsw_sp_router_xm_flush_mask4(flush_info->prefix_len)); - break; - case MLXSW_SP_L3_PROTO_IPV6: - mlxsw_reg_rlcmld_pack6(rlcmld_pl, select, - flush_info->virtual_router, flush_info->addr, - mlxsw_sp_router_xm_flush_mask6(flush_info->prefix_len)); - break; - default: - WARN_ON(true); - goto out; - } - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rlcmld), rlcmld_pl); - if (err) - dev_err(mlxsw_sp->bus_info->dev, "Failed to flush XM cache\n"); - -out: - mlxsw_sp_router_xm_cache_flush_node_destroy(mlxsw_sp, flush_node); -} - -static bool -mlxsw_sp_router_xm_cache_flush_may_cancel(struct mlxsw_sp_router_xm_flush_node *flush_node) -{ - unsigned long max_wait = usecs_to_jiffies(MLXSW_SP_ROUTER_XM_CACHE_MAX_WAIT); - unsigned long delay = usecs_to_jiffies(MLXSW_SP_ROUTER_XM_CACHE_DELAY); - - /* In case there is the same flushing work pending, check - * if we can consolidate with it. We can do it up to MAX_WAIT. - * Cancel the delayed work. If the work was still pending. - */ - if (time_is_before_jiffies(flush_node->start_jiffies + max_wait - delay) && - cancel_delayed_work_sync(&flush_node->dw)) - return true; - return false; -} - -static int -mlxsw_sp_router_xm_cache_flush_schedule(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_router_xm_flush_info *flush_info) -{ - unsigned long delay = usecs_to_jiffies(MLXSW_SP_ROUTER_XM_CACHE_DELAY); - struct mlxsw_sp_router_xm_flush_info flush_all_info = {.all = true}; - struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; - struct mlxsw_sp_router_xm_flush_node *flush_node; - - /* Check if the queued number of flushes reached critical amount after - * which it is better to just flush the whole cache. - */ - if (router_xm->flush_count == MLXSW_SP_ROUTER_XM_CACHE_PARALLEL_FLUSHES_LIMIT) - /* Entering flush-all mode. */ - router_xm->flush_all_mode = true; - - if (router_xm->flush_all_mode) - flush_info = &flush_all_info; - - rcu_read_lock(); - flush_node = rhashtable_lookup_fast(&router_xm->flush_ht, flush_info, - mlxsw_sp_router_xm_flush_ht_params); - /* Take a reference so the object is not freed before possible - * delayed work cancel could be done. - */ - mlxsw_sp_router_xm_cache_flush_node_hold(flush_node); - rcu_read_unlock(); - - if (flush_node && mlxsw_sp_router_xm_cache_flush_may_cancel(flush_node)) { - flush_node->reuses++; - mlxsw_sp_router_xm_cache_flush_node_put(flush_node); - /* Original work was within wait period and was canceled. - * That means that the reference is still held and the - * flush_node_put() call above did not free the flush_node. - * Reschedule it with fresh delay. - */ - goto schedule_work; - } else { - mlxsw_sp_router_xm_cache_flush_node_put(flush_node); - } - - flush_node = mlxsw_sp_router_xm_cache_flush_node_create(mlxsw_sp, flush_info); - if (IS_ERR(flush_node)) - return PTR_ERR(flush_node); - INIT_DELAYED_WORK(&flush_node->dw, mlxsw_sp_router_xm_cache_flush_work); - -schedule_work: - mlxsw_core_schedule_dw(&flush_node->dw, delay); - return 0; -} - -static int -mlxsw_sp_router_xm_ml_entry_add(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_router_xm_fib_entry *fib_entry) -{ - struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; - struct mlxsw_sp_router_xm_ltable_node *ltable_node; - u8 lvalue = fib_entry->lvalue; - int err; - - ltable_node = mlxsw_sp_router_xm_ltable_node_get(router_xm, - fib_entry->mindex); - if (IS_ERR(ltable_node)) - return PTR_ERR(ltable_node); - if (lvalue > ltable_node->current_lvalue) { - /* The L-value is bigger then the one currently set, update. */ - ltable_node->current_lvalue = lvalue; - err = mlxsw_sp_router_xm_ltable_lvalue_set(mlxsw_sp, - ltable_node); - if (err) - goto err_lvalue_set; - - /* The L value for prefix/M is increased. - * Therefore, all entries in M and ML caches matching - * {prefix/M, proto, VR} need to be flushed. Set the flush - * prefix length to M to achieve that. - */ - fib_entry->flush_info.prefix_len = MLXSW_SP_ROUTER_XM_M_VAL; - } - - ltable_node->lvalue_ref[lvalue]++; - fib_entry->ltable_node = ltable_node; - - return 0; - -err_lvalue_set: - mlxsw_sp_router_xm_ltable_node_put(router_xm, ltable_node); - return err; -} - -static void -mlxsw_sp_router_xm_ml_entry_del(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_router_xm_fib_entry *fib_entry) -{ - struct mlxsw_sp_router_xm_ltable_node *ltable_node = - fib_entry->ltable_node; - struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; - u8 lvalue = fib_entry->lvalue; - - ltable_node->lvalue_ref[lvalue]--; - if (lvalue == ltable_node->current_lvalue && lvalue && - !ltable_node->lvalue_ref[lvalue]) { - u8 new_lvalue = lvalue - 1; - - /* Find the biggest L-value left out there. */ - while (new_lvalue > 0 && !ltable_node->lvalue_ref[lvalue]) - new_lvalue--; - - ltable_node->current_lvalue = new_lvalue; - mlxsw_sp_router_xm_ltable_lvalue_set(mlxsw_sp, ltable_node); - - /* The L value for prefix/M is decreased. - * Therefore, all entries in M and ML caches matching - * {prefix/M, proto, VR} need to be flushed. Set the flush - * prefix length to M to achieve that. - */ - fib_entry->flush_info.prefix_len = MLXSW_SP_ROUTER_XM_M_VAL; - } - mlxsw_sp_router_xm_ltable_node_put(router_xm, ltable_node); -} - -static int -mlxsw_sp_router_xm_ml_entries_add(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm) -{ - struct mlxsw_sp_router_xm_fib_entry *fib_entry; - int err; - int i; - - for (i = 0; i < op_ctx_xm->entries_count; i++) { - fib_entry = op_ctx_xm->entries[i]; - err = mlxsw_sp_router_xm_ml_entry_add(mlxsw_sp, fib_entry); - if (err) - goto rollback; - } - return 0; - -rollback: - for (i--; i >= 0; i--) { - fib_entry = op_ctx_xm->entries[i]; - mlxsw_sp_router_xm_ml_entry_del(mlxsw_sp, fib_entry); - } - return err; -} - -static void -mlxsw_sp_router_xm_ml_entries_del(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm) -{ - struct mlxsw_sp_router_xm_fib_entry *fib_entry; - int i; - - for (i = 0; i < op_ctx_xm->entries_count; i++) { - fib_entry = op_ctx_xm->entries[i]; - mlxsw_sp_router_xm_ml_entry_del(mlxsw_sp, fib_entry); - } -} - -static void -mlxsw_sp_router_xm_ml_entries_cache_flush(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm) -{ - struct mlxsw_sp_router_xm_fib_entry *fib_entry; - int err; - int i; - - for (i = 0; i < op_ctx_xm->entries_count; i++) { - fib_entry = op_ctx_xm->entries[i]; - err = mlxsw_sp_router_xm_cache_flush_schedule(mlxsw_sp, - &fib_entry->flush_info); - if (err) - dev_err(mlxsw_sp->bus_info->dev, "Failed to flush XM cache\n"); - } -} - -static int mlxsw_sp_router_ll_xm_fib_entry_commit(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry_op_ctx *op_ctx, - bool *postponed_for_bulk) -{ - struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv; - struct mlxsw_sp_router_xm_fib_entry *fib_entry; - u8 num_rec; - int err; - int i; - - op_ctx_xm->trans_offset += op_ctx_xm->trans_item_len; - op_ctx_xm->entries_count++; - - /* Check if bulking is possible and there is still room for another - * FIB entry record. The size of 'trans_item_len' is either size of IPv4 - * command or size of IPv6 command. Not possible to mix those in a - * single XMDR write. - */ - if (op_ctx->bulk_ok && - op_ctx_xm->trans_offset + op_ctx_xm->trans_item_len <= MLXSW_REG_XMDR_TRANS_LEN) { - if (postponed_for_bulk) - *postponed_for_bulk = true; - return 0; - } - - if (op_ctx->event == FIB_EVENT_ENTRY_REPLACE) { - /* The L-table is updated inside. It has to be done before - * the prefix is inserted. - */ - err = mlxsw_sp_router_xm_ml_entries_add(mlxsw_sp, op_ctx_xm); - if (err) - goto out; - } - - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xmdr), op_ctx_xm->xmdr_pl); - if (err) - goto out; - num_rec = mlxsw_reg_xmdr_num_rec_get(op_ctx_xm->xmdr_pl); - if (num_rec > op_ctx_xm->entries_count) { - dev_err(mlxsw_sp->bus_info->dev, "Invalid XMDR number of records\n"); - err = -EIO; - goto out; - } - for (i = 0; i < num_rec; i++) { - if (!mlxsw_reg_xmdr_reply_vect_get(op_ctx_xm->xmdr_pl, i)) { - dev_err(mlxsw_sp->bus_info->dev, "Command send over XMDR failed\n"); - err = -EIO; - goto out; - } else { - fib_entry = op_ctx_xm->entries[i]; - fib_entry->committed = true; - } - } - - if (op_ctx->event == FIB_EVENT_ENTRY_DEL) - /* The L-table is updated inside. It has to be done after - * the prefix was removed. - */ - mlxsw_sp_router_xm_ml_entries_del(mlxsw_sp, op_ctx_xm); - - /* At the very end, do the XLT cache flushing to evict stale - * M and ML cache entries after prefixes were inserted/removed. - */ - mlxsw_sp_router_xm_ml_entries_cache_flush(mlxsw_sp, op_ctx_xm); - -out: - /* Next pack call is going to do reinitialization */ - op_ctx->initialized = false; - return err; -} - -static bool mlxsw_sp_router_ll_xm_fib_entry_is_committed(struct mlxsw_sp_fib_entry_priv *priv) -{ - struct mlxsw_sp_router_xm_fib_entry *fib_entry = (void *) priv->priv; - - return fib_entry->committed; -} - -const struct mlxsw_sp_router_ll_ops mlxsw_sp_router_ll_xm_ops = { - .init = mlxsw_sp_router_ll_xm_init, - .ralta_write = mlxsw_sp_router_ll_xm_ralta_write, - .ralst_write = mlxsw_sp_router_ll_xm_ralst_write, - .raltb_write = mlxsw_sp_router_ll_xm_raltb_write, - .fib_entry_op_ctx_size = sizeof(struct mlxsw_sp_fib_entry_op_ctx_xm), - .fib_entry_priv_size = sizeof(struct mlxsw_sp_router_xm_fib_entry), - .fib_entry_pack = mlxsw_sp_router_ll_xm_fib_entry_pack, - .fib_entry_act_remote_pack = mlxsw_sp_router_ll_xm_fib_entry_act_remote_pack, - .fib_entry_act_local_pack = mlxsw_sp_router_ll_xm_fib_entry_act_local_pack, - .fib_entry_act_ip2me_pack = mlxsw_sp_router_ll_xm_fib_entry_act_ip2me_pack, - .fib_entry_act_ip2me_tun_pack = mlxsw_sp_router_ll_xm_fib_entry_act_ip2me_tun_pack, - .fib_entry_commit = mlxsw_sp_router_ll_xm_fib_entry_commit, - .fib_entry_is_committed = mlxsw_sp_router_ll_xm_fib_entry_is_committed, -}; - -#define MLXSW_SP_ROUTER_XM_MINDEX_SIZE (64 * 1024) - -int mlxsw_sp_router_xm_init(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_sp_router_xm *router_xm; - char rxltm_pl[MLXSW_REG_RXLTM_LEN]; - char xltq_pl[MLXSW_REG_XLTQ_LEN]; - u32 mindex_size; - u16 device_id; - int err; - - if (!mlxsw_sp->bus_info->xm_exists) - return 0; - - router_xm = kzalloc(sizeof(*router_xm), GFP_KERNEL); - if (!router_xm) - return -ENOMEM; - - mlxsw_reg_xltq_pack(xltq_pl); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(xltq), xltq_pl); - if (err) - goto err_xltq_query; - mlxsw_reg_xltq_unpack(xltq_pl, &device_id, &router_xm->ipv4_supported, - &router_xm->ipv6_supported, &router_xm->entries_size, &mindex_size); - - if (device_id != MLXSW_REG_XLTQ_XM_DEVICE_ID_XLT) { - dev_err(mlxsw_sp->bus_info->dev, "Invalid XM device id\n"); - err = -EINVAL; - goto err_device_id_check; - } - - if (mindex_size != MLXSW_SP_ROUTER_XM_MINDEX_SIZE) { - dev_err(mlxsw_sp->bus_info->dev, "Unexpected M-index size\n"); - err = -EINVAL; - goto err_mindex_size_check; - } - - mlxsw_reg_rxltm_pack(rxltm_pl, mlxsw_sp_router_xm_m_val[MLXSW_SP_L3_PROTO_IPV4], - mlxsw_sp_router_xm_m_val[MLXSW_SP_L3_PROTO_IPV6]); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rxltm), rxltm_pl); - if (err) - goto err_rxltm_write; - - err = rhashtable_init(&router_xm->ltable_ht, &mlxsw_sp_router_xm_ltable_ht_params); - if (err) - goto err_ltable_ht_init; - - err = rhashtable_init(&router_xm->flush_ht, &mlxsw_sp_router_xm_flush_ht_params); - if (err) - goto err_flush_ht_init; - - mlxsw_sp->router->xm = router_xm; - return 0; - -err_flush_ht_init: - rhashtable_destroy(&router_xm->ltable_ht); -err_ltable_ht_init: -err_rxltm_write: -err_mindex_size_check: -err_device_id_check: -err_xltq_query: - kfree(router_xm); - return err; -} - -void mlxsw_sp_router_xm_fini(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; - - if (!mlxsw_sp->bus_info->xm_exists) - return; - - rhashtable_destroy(&router_xm->flush_ht); - rhashtable_destroy(&router_xm->ltable_ht); - kfree(router_xm); -} - -bool mlxsw_sp_router_xm_ipv4_is_supported(const struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm; - - return router_xm && router_xm->ipv4_supported; -} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index a6d2e806cba9..863c8055746b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -111,10 +111,10 @@ static void mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_port *bridge_port); -static void -mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, +static int +mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_bridge_device - *bridge_device); + *bridge_device, bool mc_enabled); static void mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port, @@ -643,6 +643,64 @@ err_port_bridge_vlan_flood_set: } static int +mlxsw_sp_bridge_vlans_flood_set(struct mlxsw_sp_bridge_vlan *bridge_vlan, + enum mlxsw_sp_flood_type packet_type, + bool member) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + int err; + + list_for_each_entry(mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list, + bridge_vlan_node) { + u16 local_port = mlxsw_sp_port_vlan->mlxsw_sp_port->local_port; + + err = mlxsw_sp_fid_flood_set(mlxsw_sp_port_vlan->fid, + packet_type, local_port, member); + if (err) + goto err_fid_flood_set; + } + + return 0; + +err_fid_flood_set: + list_for_each_entry_continue_reverse(mlxsw_sp_port_vlan, + &bridge_vlan->port_vlan_list, + list) { + u16 local_port = mlxsw_sp_port_vlan->mlxsw_sp_port->local_port; + + mlxsw_sp_fid_flood_set(mlxsw_sp_port_vlan->fid, packet_type, + local_port, !member); + } + + return err; +} + +static int +mlxsw_sp_bridge_ports_flood_table_set(struct mlxsw_sp_bridge_port *bridge_port, + enum mlxsw_sp_flood_type packet_type, + bool member) +{ + struct mlxsw_sp_bridge_vlan *bridge_vlan; + int err; + + list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) { + err = mlxsw_sp_bridge_vlans_flood_set(bridge_vlan, packet_type, + member); + if (err) + goto err_bridge_vlans_flood_set; + } + + return 0; + +err_bridge_vlans_flood_set: + list_for_each_entry_continue_reverse(bridge_vlan, + &bridge_port->vlans_list, list) + mlxsw_sp_bridge_vlans_flood_set(bridge_vlan, packet_type, + !member); + return err; +} + +static int mlxsw_sp_port_bridge_vlan_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_vlan *bridge_vlan, bool set) @@ -842,6 +900,7 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *orig_dev, bool mc_disabled) { + enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_bridge_device *bridge_device; struct mlxsw_sp_bridge_port *bridge_port; @@ -854,26 +913,40 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, if (!bridge_device) return 0; - if (bridge_device->multicast_enabled != !mc_disabled) { - bridge_device->multicast_enabled = !mc_disabled; - mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp_port, - bridge_device); - } + if (bridge_device->multicast_enabled == !mc_disabled) + return 0; + + bridge_device->multicast_enabled = !mc_disabled; + err = mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp, bridge_device, + !mc_disabled); + if (err) + goto err_mc_enable_sync; list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { - enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC; bool member = mlxsw_sp_mc_flood(bridge_port); - err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, - bridge_port, - packet_type, member); + err = mlxsw_sp_bridge_ports_flood_table_set(bridge_port, + packet_type, + member); if (err) - return err; + goto err_flood_table_set; } - bridge_device->multicast_enabled = !mc_disabled; - return 0; + +err_flood_table_set: + list_for_each_entry_continue_reverse(bridge_port, + &bridge_device->ports_list, list) { + bool member = mlxsw_sp_mc_flood(bridge_port); + + mlxsw_sp_bridge_ports_flood_table_set(bridge_port, packet_type, + !member); + } + mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp, bridge_device, + mc_disabled); +err_mc_enable_sync: + bridge_device->multicast_enabled = mc_disabled; + return err; } static int mlxsw_sp_smid_router_port_set(struct mlxsw_sp *mlxsw_sp, @@ -887,7 +960,7 @@ static int mlxsw_sp_smid_router_port_set(struct mlxsw_sp *mlxsw_sp, return -ENOMEM; mlxsw_reg_smid2_pack(smid2_pl, mid_idx, - mlxsw_sp_router_port(mlxsw_sp), add); + mlxsw_sp_router_port(mlxsw_sp), add, false, 0); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid2), smid2_pl); kfree(smid2_pl); return err; @@ -1584,7 +1657,7 @@ static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx, if (!smid2_pl) return -ENOMEM; - mlxsw_reg_smid2_pack(smid2_pl, mid_idx, 0, false); + mlxsw_reg_smid2_pack(smid2_pl, mid_idx, 0, false, false, 0); for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) { if (mlxsw_sp->ports[i]) mlxsw_reg_smid2_port_mask_set(smid2_pl, i, 1); @@ -1615,7 +1688,8 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, if (!smid2_pl) return -ENOMEM; - mlxsw_reg_smid2_pack(smid2_pl, mid_idx, mlxsw_sp_port->local_port, add); + mlxsw_reg_smid2_pack(smid2_pl, mid_idx, mlxsw_sp_port->local_port, add, + false, 0); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid2), smid2_pl); kfree(smid2_pl); return err; @@ -1676,7 +1750,7 @@ mlxsw_sp_mc_get_mrouters_bitmap(unsigned long *flood_bitmap, } } -static bool +static int mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mid *mid, struct mlxsw_sp_bridge_device *bridge_device) @@ -1689,12 +1763,12 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, mid_idx = find_first_zero_bit(mlxsw_sp->bridge->mids_bitmap, MLXSW_SP_MID_MAX); if (mid_idx == MLXSW_SP_MID_MAX) - return false; + return -ENOBUFS; num_of_ports = mlxsw_core_max_ports(mlxsw_sp->core); flood_bitmap = bitmap_alloc(num_of_ports, GFP_KERNEL); if (!flood_bitmap) - return false; + return -ENOMEM; bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports); mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp); @@ -1704,16 +1778,16 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, bridge_device->mrouter); bitmap_free(flood_bitmap); if (err) - return false; + return err; err = mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid_idx, true); if (err) - return false; + return err; set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap); mid->in_hw = true; - return true; + return 0; } static int mlxsw_sp_mc_remove_mdb_entry(struct mlxsw_sp *mlxsw_sp, @@ -1735,6 +1809,7 @@ mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, u16 fid) { struct mlxsw_sp_mid *mid; + int err; mid = kzalloc(sizeof(*mid), GFP_KERNEL); if (!mid) @@ -1752,7 +1827,8 @@ mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, if (!bridge_device->multicast_enabled) goto out; - if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, bridge_device)) + err = mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, bridge_device); + if (err) goto err_write_mdb_entry; out: @@ -1839,24 +1915,37 @@ err_out: return err; } -static void -mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_bridge_device - *bridge_device) +static int +mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + bool mc_enabled) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_mid *mid; - bool mc_enabled; - - mc_enabled = bridge_device->multicast_enabled; + int err; list_for_each_entry(mid, &bridge_device->mids_list, list) { if (mc_enabled) - mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, - bridge_device); + err = mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, + bridge_device); else + err = mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid); + + if (err) + goto err_mdb_entry_update; + } + + return 0; + +err_mdb_entry_update: + list_for_each_entry_continue_reverse(mid, &bridge_device->mids_list, + list) { + if (mc_enabled) mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid); + else + mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, + bridge_device); } + return err; } static void @@ -2729,8 +2818,7 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, bridge_device = bridge_port->bridge_device; vid = bridge_device->vlan_enabled ? mlxsw_sp_port_vlan->vid : 0; - lag_vid = mlxsw_sp_fid_lag_vid_valid(mlxsw_sp_port_vlan->fid) ? - mlxsw_sp_port_vlan->vid : 0; + lag_vid = mlxsw_sp_port_vlan->vid; do_fdb_op: err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index ed4d0d3448f3..d0baba38d2a3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -953,16 +953,16 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = { .trap = MLXSW_SP_TRAP_CONTROL(ARP_REQUEST, NEIGH_DISCOVERY, MIRROR), .listeners_arr = { - MLXSW_SP_RXL_MARK(ARPBC, NEIGH_DISCOVERY, MIRROR_TO_CPU, - false), + MLXSW_SP_RXL_MARK(ROUTER_ARPBC, NEIGH_DISCOVERY, + TRAP_TO_CPU, false), }, }, { .trap = MLXSW_SP_TRAP_CONTROL(ARP_RESPONSE, NEIGH_DISCOVERY, MIRROR), .listeners_arr = { - MLXSW_SP_RXL_MARK(ARPUC, NEIGH_DISCOVERY, MIRROR_TO_CPU, - false), + MLXSW_SP_RXL_MARK(ROUTER_ARPUC, NEIGH_DISCOVERY, + TRAP_TO_CPU, false), }, }, { diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index d888498aed33..8da169663bda 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -27,8 +27,6 @@ enum { MLXSW_TRAP_ID_PKT_SAMPLE = 0x38, MLXSW_TRAP_ID_FID_MISS = 0x3D, MLXSW_TRAP_ID_DECAP_ECN0 = 0x40, - MLXSW_TRAP_ID_ARPBC = 0x50, - MLXSW_TRAP_ID_ARPUC = 0x51, MLXSW_TRAP_ID_MTUERROR = 0x52, MLXSW_TRAP_ID_TTLERROR = 0x53, MLXSW_TRAP_ID_LBERROR = 0x54, @@ -71,6 +69,8 @@ enum { MLXSW_TRAP_ID_IPV6_BFD = 0xD1, MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, + MLXSW_TRAP_ID_ROUTER_ARPBC = 0xE0, + MLXSW_TRAP_ID_ROUTER_ARPUC = 0xE1, MLXSW_TRAP_ID_DISCARD_NON_ROUTABLE = 0x11A, MLXSW_TRAP_ID_DISCARD_ROUTER2 = 0x130, MLXSW_TRAP_ID_DISCARD_ROUTER3 = 0x131, diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index c8fe8b31f07b..b1c74e6cb012 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -155,8 +155,8 @@ static int lan743x_otp_write(struct lan743x_adapter *adapter, u32 offset, return 0; } -static int lan743x_hs_syslock_acquire(struct lan743x_adapter *adapter, - u16 timeout) +int lan743x_hs_syslock_acquire(struct lan743x_adapter *adapter, + u16 timeout) { u16 timeout_cnt = 0; u32 val; @@ -192,7 +192,7 @@ static int lan743x_hs_syslock_acquire(struct lan743x_adapter *adapter, return 0; } -static void lan743x_hs_syslock_release(struct lan743x_adapter *adapter) +void lan743x_hs_syslock_release(struct lan743x_adapter *adapter) { u32 val; @@ -1149,7 +1149,12 @@ static void lan743x_ethtool_get_wol(struct net_device *netdev, wol->supported |= WAKE_BCAST | WAKE_UCAST | WAKE_MCAST | WAKE_MAGIC | WAKE_PHY | WAKE_ARP; + if (adapter->is_pci11x1x) + wol->supported |= WAKE_MAGICSECURE; + wol->wolopts |= adapter->wolopts; + if (adapter->wolopts & WAKE_MAGICSECURE) + memcpy(wol->sopass, adapter->sopass, sizeof(wol->sopass)); } static int lan743x_ethtool_set_wol(struct net_device *netdev, @@ -1170,6 +1175,13 @@ static int lan743x_ethtool_set_wol(struct net_device *netdev, adapter->wolopts |= WAKE_PHY; if (wol->wolopts & WAKE_ARP) adapter->wolopts |= WAKE_ARP; + if (wol->wolopts & WAKE_MAGICSECURE && + wol->wolopts & WAKE_MAGIC) { + memcpy(adapter->sopass, wol->sopass, sizeof(wol->sopass)); + adapter->wolopts |= WAKE_MAGICSECURE; + } else { + memset(adapter->sopass, 0, sizeof(u8) * SOPASS_MAX); + } device_set_wakeup_enable(&adapter->pdev->dev, (bool)wol->wolopts); @@ -1178,6 +1190,49 @@ static int lan743x_ethtool_set_wol(struct net_device *netdev, } #endif /* CONFIG_PM */ +static void lan743x_common_regs(struct net_device *dev, + struct ethtool_regs *regs, void *p) + +{ + struct lan743x_adapter *adapter = netdev_priv(dev); + u32 *rb = p; + + memset(p, 0, (MAX_LAN743X_ETH_REGS * sizeof(u32))); + + rb[ETH_PRIV_FLAGS] = adapter->flags; + rb[ETH_ID_REV] = lan743x_csr_read(adapter, ID_REV); + rb[ETH_FPGA_REV] = lan743x_csr_read(adapter, FPGA_REV); + rb[ETH_STRAP_READ] = lan743x_csr_read(adapter, STRAP_READ); + rb[ETH_INT_STS] = lan743x_csr_read(adapter, INT_STS); + rb[ETH_HW_CFG] = lan743x_csr_read(adapter, HW_CFG); + rb[ETH_PMT_CTL] = lan743x_csr_read(adapter, PMT_CTL); + rb[ETH_E2P_CMD] = lan743x_csr_read(adapter, E2P_CMD); + rb[ETH_E2P_DATA] = lan743x_csr_read(adapter, E2P_DATA); + rb[ETH_MAC_CR] = lan743x_csr_read(adapter, MAC_CR); + rb[ETH_MAC_RX] = lan743x_csr_read(adapter, MAC_RX); + rb[ETH_MAC_TX] = lan743x_csr_read(adapter, MAC_TX); + rb[ETH_FLOW] = lan743x_csr_read(adapter, MAC_FLOW); + rb[ETH_MII_ACC] = lan743x_csr_read(adapter, MAC_MII_ACC); + rb[ETH_MII_DATA] = lan743x_csr_read(adapter, MAC_MII_DATA); + rb[ETH_EEE_TX_LPI_REQ_DLY] = lan743x_csr_read(adapter, + MAC_EEE_TX_LPI_REQ_DLY_CNT); + rb[ETH_WUCSR] = lan743x_csr_read(adapter, MAC_WUCSR); + rb[ETH_WK_SRC] = lan743x_csr_read(adapter, MAC_WK_SRC); +} + +static int lan743x_get_regs_len(struct net_device *dev) +{ + return MAX_LAN743X_ETH_REGS * sizeof(u32); +} + +static void lan743x_get_regs(struct net_device *dev, + struct ethtool_regs *regs, void *p) +{ + regs->version = LAN743X_ETH_REG_VERSION; + + lan743x_common_regs(dev, regs, p); +} + const struct ethtool_ops lan743x_ethtool_ops = { .get_drvinfo = lan743x_ethtool_get_drvinfo, .get_msglevel = lan743x_ethtool_get_msglevel, @@ -1202,6 +1257,8 @@ const struct ethtool_ops lan743x_ethtool_ops = { .set_eee = lan743x_ethtool_set_eee, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_regs_len = lan743x_get_regs_len, + .get_regs = lan743x_get_regs, #ifdef CONFIG_PM .get_wol = lan743x_ethtool_get_wol, .set_wol = lan743x_ethtool_set_wol, diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.h b/drivers/net/ethernet/microchip/lan743x_ethtool.h index d0d11a777a58..7f5996a52488 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.h +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.h @@ -6,6 +6,32 @@ #include "linux/ethtool.h" +#define LAN743X_ETH_REG_VERSION 1 + +enum { + ETH_PRIV_FLAGS, + ETH_ID_REV, + ETH_FPGA_REV, + ETH_STRAP_READ, + ETH_INT_STS, + ETH_HW_CFG, + ETH_PMT_CTL, + ETH_E2P_CMD, + ETH_E2P_DATA, + ETH_MAC_CR, + ETH_MAC_RX, + ETH_MAC_TX, + ETH_FLOW, + ETH_MII_ACC, + ETH_MII_DATA, + ETH_EEE_TX_LPI_REQ_DLY, + ETH_WUCSR, + ETH_WK_SRC, + + /* Add new registers above */ + MAX_LAN743X_ETH_REGS +}; + extern const struct ethtool_ops lan743x_ethtool_ops; #endif /* _LAN743X_ETHTOOL_H */ diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index af81236b4b4e..a9a1dea6d731 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -22,20 +22,36 @@ #define MMD_ACCESS_WRITE 1 #define MMD_ACCESS_READ 2 #define MMD_ACCESS_READ_INC 3 +#define PCS_POWER_STATE_DOWN 0x6 +#define PCS_POWER_STATE_UP 0x4 static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter) { u32 chip_rev; + u32 cfg_load; + u32 hw_cfg; u32 strap; + int ret; + + /* Timeout = 100 (i.e. 1 sec (10 msce * 100)) */ + ret = lan743x_hs_syslock_acquire(adapter, 100); + if (ret < 0) { + netif_err(adapter, drv, adapter->netdev, + "Sys Lock acquire failed ret:%d\n", ret); + return; + } - strap = lan743x_csr_read(adapter, STRAP_READ); - if (strap & STRAP_READ_USE_SGMII_EN_) { + cfg_load = lan743x_csr_read(adapter, ETH_SYS_CONFIG_LOAD_STARTED_REG); + lan743x_hs_syslock_release(adapter); + hw_cfg = lan743x_csr_read(adapter, HW_CFG); + + if (cfg_load & GEN_SYS_LOAD_STARTED_REG_ETH_ || + hw_cfg & HW_CFG_RST_PROTECT_) { + strap = lan743x_csr_read(adapter, STRAP_READ); if (strap & STRAP_READ_SGMII_EN_) adapter->is_sgmii_en = true; else adapter->is_sgmii_en = false; - netif_dbg(adapter, drv, adapter->netdev, - "STRAP_READ: 0x%08X\n", strap); } else { chip_rev = lan743x_csr_read(adapter, FPGA_REV); if (chip_rev) { @@ -43,12 +59,12 @@ static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter) adapter->is_sgmii_en = true; else adapter->is_sgmii_en = false; - netif_dbg(adapter, drv, adapter->netdev, - "FPGA_REV: 0x%08X\n", chip_rev); } else { adapter->is_sgmii_en = false; } } + netif_dbg(adapter, drv, adapter->netdev, + "SGMII I/F %sable\n", adapter->is_sgmii_en ? "En" : "Dis"); } static bool is_pci11x1x_chip(struct lan743x_adapter *adapter) @@ -909,6 +925,318 @@ static int lan743x_mdiobus_c45_write(struct mii_bus *bus, return ret; } +static int lan743x_sgmii_wait_till_not_busy(struct lan743x_adapter *adapter) +{ + u32 data; + int ret; + + ret = readx_poll_timeout(LAN743X_CSR_READ_OP, SGMII_ACC, data, + !(data & SGMII_ACC_SGMII_BZY_), 100, 1000000); + if (ret < 0) + netif_err(adapter, drv, adapter->netdev, + "%s: error %d sgmii wait timeout\n", __func__, ret); + + return ret; +} + +static int lan743x_sgmii_read(struct lan743x_adapter *adapter, u8 mmd, u16 addr) +{ + u32 mmd_access; + int ret; + u32 val; + + if (mmd > 31) { + netif_err(adapter, probe, adapter->netdev, + "%s mmd should <= 31\n", __func__); + return -EINVAL; + } + + mutex_lock(&adapter->sgmii_rw_lock); + /* Load Register Address */ + mmd_access = mmd << SGMII_ACC_SGMII_MMD_SHIFT_; + mmd_access |= (addr | SGMII_ACC_SGMII_BZY_); + lan743x_csr_write(adapter, SGMII_ACC, mmd_access); + ret = lan743x_sgmii_wait_till_not_busy(adapter); + if (ret < 0) + goto sgmii_unlock; + + val = lan743x_csr_read(adapter, SGMII_DATA); + ret = (int)(val & SGMII_DATA_MASK_); + +sgmii_unlock: + mutex_unlock(&adapter->sgmii_rw_lock); + + return ret; +} + +static int lan743x_sgmii_write(struct lan743x_adapter *adapter, + u8 mmd, u16 addr, u16 val) +{ + u32 mmd_access; + int ret; + + if (mmd > 31) { + netif_err(adapter, probe, adapter->netdev, + "%s mmd should <= 31\n", __func__); + return -EINVAL; + } + mutex_lock(&adapter->sgmii_rw_lock); + /* Load Register Data */ + lan743x_csr_write(adapter, SGMII_DATA, (u32)(val & SGMII_DATA_MASK_)); + /* Load Register Address */ + mmd_access = mmd << SGMII_ACC_SGMII_MMD_SHIFT_; + mmd_access |= (addr | SGMII_ACC_SGMII_BZY_ | SGMII_ACC_SGMII_WR_); + lan743x_csr_write(adapter, SGMII_ACC, mmd_access); + ret = lan743x_sgmii_wait_till_not_busy(adapter); + mutex_unlock(&adapter->sgmii_rw_lock); + + return ret; +} + +static int lan743x_sgmii_mpll_set(struct lan743x_adapter *adapter, + u16 baud) +{ + int mpllctrl0; + int mpllctrl1; + int miscctrl1; + int ret; + + mpllctrl0 = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2, + VR_MII_GEN2_4_MPLL_CTRL0); + if (mpllctrl0 < 0) + return mpllctrl0; + + mpllctrl0 &= ~VR_MII_MPLL_CTRL0_USE_REFCLK_PAD_; + if (baud == VR_MII_BAUD_RATE_1P25GBPS) { + mpllctrl1 = VR_MII_MPLL_MULTIPLIER_100; + /* mpll_baud_clk/4 */ + miscctrl1 = 0xA; + } else { + mpllctrl1 = VR_MII_MPLL_MULTIPLIER_125; + /* mpll_baud_clk/2 */ + miscctrl1 = 0x5; + } + + ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2, + VR_MII_GEN2_4_MPLL_CTRL0, mpllctrl0); + if (ret < 0) + return ret; + + ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2, + VR_MII_GEN2_4_MPLL_CTRL1, mpllctrl1); + if (ret < 0) + return ret; + + return lan743x_sgmii_write(adapter, MDIO_MMD_VEND2, + VR_MII_GEN2_4_MISC_CTRL1, miscctrl1); +} + +static int lan743x_sgmii_2_5G_mode_set(struct lan743x_adapter *adapter, + bool enable) +{ + if (enable) + return lan743x_sgmii_mpll_set(adapter, + VR_MII_BAUD_RATE_3P125GBPS); + else + return lan743x_sgmii_mpll_set(adapter, + VR_MII_BAUD_RATE_1P25GBPS); +} + +static int lan743x_is_sgmii_2_5G_mode(struct lan743x_adapter *adapter, + bool *status) +{ + int ret; + + ret = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2, + VR_MII_GEN2_4_MPLL_CTRL1); + if (ret < 0) + return ret; + + if (ret == VR_MII_MPLL_MULTIPLIER_125 || + ret == VR_MII_MPLL_MULTIPLIER_50) + *status = true; + else + *status = false; + + return 0; +} + +static int lan743x_sgmii_aneg_update(struct lan743x_adapter *adapter) +{ + enum lan743x_sgmii_lsd lsd = adapter->sgmii_lsd; + int mii_ctrl; + int dgt_ctrl; + int an_ctrl; + int ret; + + if (lsd == LINK_2500_MASTER || lsd == LINK_2500_SLAVE) + /* Switch to 2.5 Gbps */ + ret = lan743x_sgmii_2_5G_mode_set(adapter, true); + else + /* Switch to 10/100/1000 Mbps clock */ + ret = lan743x_sgmii_2_5G_mode_set(adapter, false); + if (ret < 0) + return ret; + + /* Enable SGMII Auto NEG */ + mii_ctrl = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2, MII_BMCR); + if (mii_ctrl < 0) + return mii_ctrl; + + an_ctrl = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2, VR_MII_AN_CTRL); + if (an_ctrl < 0) + return an_ctrl; + + dgt_ctrl = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2, + VR_MII_DIG_CTRL1); + if (dgt_ctrl < 0) + return dgt_ctrl; + + if (lsd == LINK_2500_MASTER || lsd == LINK_2500_SLAVE) { + mii_ctrl &= ~(BMCR_ANENABLE | BMCR_ANRESTART | BMCR_SPEED100); + mii_ctrl |= BMCR_SPEED1000; + dgt_ctrl |= VR_MII_DIG_CTRL1_CL37_TMR_OVR_RIDE_; + dgt_ctrl &= ~VR_MII_DIG_CTRL1_MAC_AUTO_SW_; + /* In order for Auto-Negotiation to operate properly at + * 2.5 Gbps the 1.6ms link timer values must be adjusted + * The VR_MII_LINK_TIMER_CTRL Register must be set to + * 16'h7A1 and The CL37_TMR_OVR_RIDE bit of the + * VR_MII_DIG_CTRL1 Register set to 1 + */ + ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2, + VR_MII_LINK_TIMER_CTRL, 0x7A1); + if (ret < 0) + return ret; + } else { + mii_ctrl |= (BMCR_ANENABLE | BMCR_ANRESTART); + an_ctrl &= ~VR_MII_AN_CTRL_SGMII_LINK_STS_; + dgt_ctrl &= ~VR_MII_DIG_CTRL1_CL37_TMR_OVR_RIDE_; + dgt_ctrl |= VR_MII_DIG_CTRL1_MAC_AUTO_SW_; + } + + ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2, MII_BMCR, + mii_ctrl); + if (ret < 0) + return ret; + + ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2, + VR_MII_DIG_CTRL1, dgt_ctrl); + if (ret < 0) + return ret; + + return lan743x_sgmii_write(adapter, MDIO_MMD_VEND2, + VR_MII_AN_CTRL, an_ctrl); +} + +static int lan743x_pcs_seq_state(struct lan743x_adapter *adapter, u8 state) +{ + u8 wait_cnt = 0; + u32 dig_sts; + + do { + dig_sts = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2, + VR_MII_DIG_STS); + if (((dig_sts & VR_MII_DIG_STS_PSEQ_STATE_MASK_) >> + VR_MII_DIG_STS_PSEQ_STATE_POS_) == state) + break; + usleep_range(1000, 2000); + } while (wait_cnt++ < 10); + + if (wait_cnt >= 10) + return -ETIMEDOUT; + + return 0; +} + +static int lan743x_sgmii_config(struct lan743x_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct phy_device *phydev = netdev->phydev; + enum lan743x_sgmii_lsd lsd = POWER_DOWN; + int mii_ctl; + bool status; + int ret; + + switch (phydev->speed) { + case SPEED_2500: + if (phydev->master_slave_state == MASTER_SLAVE_STATE_MASTER) + lsd = LINK_2500_MASTER; + else + lsd = LINK_2500_SLAVE; + break; + case SPEED_1000: + if (phydev->master_slave_state == MASTER_SLAVE_STATE_MASTER) + lsd = LINK_1000_MASTER; + else + lsd = LINK_1000_SLAVE; + break; + case SPEED_100: + if (phydev->duplex) + lsd = LINK_100FD; + else + lsd = LINK_100HD; + break; + case SPEED_10: + if (phydev->duplex) + lsd = LINK_10FD; + else + lsd = LINK_10HD; + break; + default: + netif_err(adapter, drv, adapter->netdev, + "Invalid speed %d\n", phydev->speed); + return -EINVAL; + } + + adapter->sgmii_lsd = lsd; + ret = lan743x_sgmii_aneg_update(adapter); + if (ret < 0) { + netif_err(adapter, drv, adapter->netdev, + "error %d SGMII cfg failed\n", ret); + return ret; + } + + ret = lan743x_is_sgmii_2_5G_mode(adapter, &status); + if (ret < 0) { + netif_err(adapter, drv, adapter->netdev, + "erro %d SGMII get mode failed\n", ret); + return ret; + } + + if (status) + netif_dbg(adapter, drv, adapter->netdev, + "SGMII 2.5G mode enable\n"); + else + netif_dbg(adapter, drv, adapter->netdev, + "SGMII 1G mode enable\n"); + + /* SGMII/1000/2500BASE-X PCS power down */ + mii_ctl = lan743x_sgmii_read(adapter, MDIO_MMD_VEND2, MII_BMCR); + if (mii_ctl < 0) + return mii_ctl; + + mii_ctl |= BMCR_PDOWN; + ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2, MII_BMCR, mii_ctl); + if (ret < 0) + return ret; + + ret = lan743x_pcs_seq_state(adapter, PCS_POWER_STATE_DOWN); + if (ret < 0) + return ret; + + /* SGMII/1000/2500BASE-X PCS power up */ + mii_ctl &= ~BMCR_PDOWN; + ret = lan743x_sgmii_write(adapter, MDIO_MMD_VEND2, MII_BMCR, mii_ctl); + if (ret < 0) + return ret; + + ret = lan743x_pcs_seq_state(adapter, PCS_POWER_STATE_UP); + if (ret < 0) + return ret; + + return 0; +} + static void lan743x_mac_set_address(struct lan743x_adapter *adapter, u8 *addr) { @@ -1124,6 +1452,10 @@ static void lan743x_phy_link_status_change(struct net_device *netdev) data |= MAC_CR_CFG_H_; data &= ~MAC_CR_CFG_L_; break; + case SPEED_2500: + data |= MAC_CR_CFG_H_; + data |= MAC_CR_CFG_L_; + break; } lan743x_csr_write(adapter, MAC_CR, data); @@ -1135,6 +1467,10 @@ static void lan743x_phy_link_status_change(struct net_device *netdev) lan743x_phy_update_flowcontrol(adapter, local_advertisement, remote_advertisement); lan743x_ptp_update_latency(adapter, phydev->speed); + if (phydev->interface == PHY_INTERFACE_MODE_SGMII || + phydev->interface == PHY_INTERFACE_MODE_1000BASEX || + phydev->interface == PHY_INTERFACE_MODE_2500BASEX) + lan743x_sgmii_config(adapter); } } @@ -2875,6 +3211,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, adapter->max_vector_count = PCI11X1X_MAX_VECTOR_COUNT; pci11x1x_strap_get_status(adapter); spin_lock_init(&adapter->eth_syslock_spinlock); + mutex_init(&adapter->sgmii_rw_lock); } else { adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; @@ -3124,6 +3461,7 @@ static void lan743x_pm_set_wol(struct lan743x_adapter *adapter) const u8 ipv6_multicast[3] = { 0x33, 0x33 }; const u8 arp_type[2] = { 0x08, 0x06 }; int mask_index; + u32 sopass; u32 pmtctl; u32 wucsr; u32 macrx; @@ -3218,6 +3556,14 @@ static void lan743x_pm_set_wol(struct lan743x_adapter *adapter) pmtctl |= PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_; } + if (adapter->wolopts & WAKE_MAGICSECURE) { + sopass = *(u32 *)adapter->sopass; + lan743x_csr_write(adapter, MAC_MP_SO_LO, sopass); + sopass = *(u16 *)&adapter->sopass[4]; + lan743x_csr_write(adapter, MAC_MP_SO_HI, sopass); + wucsr |= MAC_MP_SO_EN_; + } + lan743x_csr_write(adapter, MAC_WUCSR, wucsr); lan743x_csr_write(adapter, PMT_CTL, pmtctl); lan743x_csr_write(adapter, MAC_RX, macrx); @@ -3228,6 +3574,7 @@ static int lan743x_pm_suspend(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); struct lan743x_adapter *adapter = netdev_priv(netdev); + u32 data; lan743x_pcidev_shutdown(pdev); @@ -3239,6 +3586,18 @@ static int lan743x_pm_suspend(struct device *dev) if (adapter->wolopts) lan743x_pm_set_wol(adapter); + if (adapter->is_pci11x1x) { + /* Save HW_CFG to config again in PM resume */ + data = lan743x_csr_read(adapter, HW_CFG); + adapter->hw_cfg = data; + data |= (HW_CFG_RST_PROTECT_PCIE_ | + HW_CFG_D3_RESET_DIS_ | + HW_CFG_D3_VAUX_OVR_ | + HW_CFG_HOT_RESET_DIS_ | + HW_CFG_RST_PROTECT_); + lan743x_csr_write(adapter, HW_CFG, data); + } + /* Host sets PME_En, put D3hot */ return pci_prepare_to_sleep(pdev); } @@ -3254,6 +3613,10 @@ static int lan743x_pm_resume(struct device *dev) pci_restore_state(pdev); pci_save_state(pdev); + /* Restore HW_CFG that was saved during pm suspend */ + if (adapter->is_pci11x1x) + lan743x_csr_write(adapter, HW_CFG, adapter->hw_cfg); + ret = lan743x_hardware_init(adapter, pdev); if (ret) { netif_err(adapter, probe, adapter->netdev, @@ -3270,6 +3633,9 @@ static int lan743x_pm_resume(struct device *dev) lan743x_netdev_open(netdev); netif_device_attach(netdev); + ret = lan743x_csr_read(adapter, MAC_WK_SRC); + netif_info(adapter, drv, adapter->netdev, + "Wakeup source : 0x%08X\n", ret); return 0; } diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 1ca5f3216403..72adae4f2aa0 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -43,6 +43,11 @@ #define STRAP_READ_ADV_PM_DISABLE_ BIT(0) #define HW_CFG (0x010) +#define HW_CFG_RST_PROTECT_PCIE_ BIT(19) +#define HW_CFG_HOT_RESET_DIS_ BIT(15) +#define HW_CFG_D3_VAUX_OVR_ BIT(14) +#define HW_CFG_D3_RESET_DIS_ BIT(13) +#define HW_CFG_RST_PROTECT_ BIT(12) #define HW_CFG_RELOAD_TYPE_ALL_ (0x00000FC0) #define HW_CFG_EE_OTP_RELOAD_ BIT(4) #define HW_CFG_LRST_ BIT(1) @@ -92,6 +97,11 @@ #define CONFIG_REG_ADDR_BASE (0x0000) #define ETH_EEPROM_REG_ADDR_BASE (0x0E00) #define ETH_OTP_REG_ADDR_BASE (0x1000) +#define GEN_SYS_CONFIG_LOAD_STARTED_REG (0x0078) +#define ETH_SYS_CONFIG_LOAD_STARTED_REG (ETH_SYS_REG_ADDR_BASE + \ + CONFIG_REG_ADDR_BASE + \ + GEN_SYS_CONFIG_LOAD_STARTED_REG) +#define GEN_SYS_LOAD_STARTED_REG_ETH_ BIT(4) #define SYS_LOCK_REG (0x00A0) #define SYS_LOCK_REG_MAIN_LOCK_ BIT(7) #define SYS_LOCK_REG_GEN_PERI_LOCK_ BIT(5) @@ -214,6 +224,7 @@ #define MAC_EEE_TX_LPI_REQ_DLY_CNT (0x130) #define MAC_WUCSR (0x140) +#define MAC_MP_SO_EN_ BIT(21) #define MAC_WUCSR_RFE_WAKE_EN_ BIT(14) #define MAC_WUCSR_PFDA_EN_ BIT(3) #define MAC_WUCSR_WAKE_EN_ BIT(2) @@ -221,6 +232,8 @@ #define MAC_WUCSR_BCST_EN_ BIT(0) #define MAC_WK_SRC (0x144) +#define MAC_MP_SO_HI (0x148) +#define MAC_MP_SO_LO (0x14C) #define MAC_WUF_CFG0 (0x150) #define MAC_NUM_OF_WUF_CFG (32) @@ -280,11 +293,82 @@ #define MAC_WUCSR2 (0x600) +#define SGMII_ACC (0x720) +#define SGMII_ACC_SGMII_BZY_ BIT(31) +#define SGMII_ACC_SGMII_WR_ BIT(30) +#define SGMII_ACC_SGMII_MMD_SHIFT_ (16) +#define SGMII_ACC_SGMII_MMD_MASK_ GENMASK(20, 16) +#define SGMII_ACC_SGMII_MMD_VSR_ BIT(15) +#define SGMII_ACC_SGMII_ADDR_SHIFT_ (0) +#define SGMII_ACC_SGMII_ADDR_MASK_ GENMASK(15, 0) +#define SGMII_DATA (0x724) +#define SGMII_DATA_SHIFT_ (0) +#define SGMII_DATA_MASK_ GENMASK(15, 0) #define SGMII_CTL (0x728) #define SGMII_CTL_SGMII_ENABLE_ BIT(31) #define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8) #define SGMII_CTL_SGMII_POWER_DN_ BIT(1) +/* Vendor Specific SGMII MMD details */ +#define SR_VSMMD_PCS_ID1 0x0004 +#define SR_VSMMD_PCS_ID2 0x0005 +#define SR_VSMMD_STS 0x0008 +#define SR_VSMMD_CTRL 0x0009 + +#define VR_MII_DIG_CTRL1 0x8000 +#define VR_MII_DIG_CTRL1_VR_RST_ BIT(15) +#define VR_MII_DIG_CTRL1_R2TLBE_ BIT(14) +#define VR_MII_DIG_CTRL1_EN_VSMMD1_ BIT(13) +#define VR_MII_DIG_CTRL1_CS_EN_ BIT(10) +#define VR_MII_DIG_CTRL1_MAC_AUTO_SW_ BIT(9) +#define VR_MII_DIG_CTRL1_INIT_ BIT(8) +#define VR_MII_DIG_CTRL1_DTXLANED_0_ BIT(4) +#define VR_MII_DIG_CTRL1_CL37_TMR_OVR_RIDE_ BIT(3) +#define VR_MII_DIG_CTRL1_EN_2_5G_MODE_ BIT(2) +#define VR_MII_DIG_CTRL1_BYP_PWRUP_ BIT(1) +#define VR_MII_DIG_CTRL1_PHY_MODE_CTRL_ BIT(0) +#define VR_MII_AN_CTRL 0x8001 +#define VR_MII_AN_CTRL_MII_CTRL_ BIT(8) +#define VR_MII_AN_CTRL_SGMII_LINK_STS_ BIT(4) +#define VR_MII_AN_CTRL_TX_CONFIG_ BIT(3) +#define VR_MII_AN_CTRL_1000BASE_X_ (0) +#define VR_MII_AN_CTRL_SGMII_MODE_ (2) +#define VR_MII_AN_CTRL_QSGMII_MODE_ (3) +#define VR_MII_AN_CTRL_PCS_MODE_SHIFT_ (1) +#define VR_MII_AN_CTRL_PCS_MODE_MASK_ GENMASK(2, 1) +#define VR_MII_AN_CTRL_MII_AN_INTR_EN_ BIT(0) +#define VR_MII_AN_INTR_STS 0x8002 +#define VR_MII_AN_INTR_STS_LINK_UP_ BIT(4) +#define VR_MII_AN_INTR_STS_SPEED_MASK_ GENMASK(3, 2) +#define VR_MII_AN_INTR_STS_1000_MBPS_ BIT(3) +#define VR_MII_AN_INTR_STS_100_MBPS_ BIT(2) +#define VR_MII_AN_INTR_STS_10_MBPS_ (0) +#define VR_MII_AN_INTR_STS_FDX_ BIT(1) +#define VR_MII_AN_INTR_STS_CL37_ANCMPLT_INTR_ BIT(0) + +#define VR_MII_LINK_TIMER_CTRL 0x800A +#define VR_MII_DIG_STS 0x8010 +#define VR_MII_DIG_STS_PSEQ_STATE_MASK_ GENMASK(4, 2) +#define VR_MII_DIG_STS_PSEQ_STATE_POS_ (2) +#define VR_MII_GEN2_4_MPLL_CTRL0 0x8078 +#define VR_MII_MPLL_CTRL0_REF_CLK_DIV2_ BIT(12) +#define VR_MII_MPLL_CTRL0_USE_REFCLK_PAD_ BIT(4) +#define VR_MII_GEN2_4_MPLL_CTRL1 0x8079 +#define VR_MII_MPLL_CTRL1_MPLL_MULTIPLIER_ GENMASK(6, 0) +#define VR_MII_BAUD_RATE_3P125GBPS (3125) +#define VR_MII_BAUD_RATE_1P25GBPS (1250) +#define VR_MII_MPLL_MULTIPLIER_125 (125) +#define VR_MII_MPLL_MULTIPLIER_100 (100) +#define VR_MII_MPLL_MULTIPLIER_50 (50) +#define VR_MII_MPLL_MULTIPLIER_40 (40) +#define VR_MII_GEN2_4_MISC_CTRL1 0x809A +#define VR_MII_CTRL1_RX_RATE_0_MASK_ GENMASK(3, 2) +#define VR_MII_CTRL1_RX_RATE_0_SHIFT_ (2) +#define VR_MII_CTRL1_TX_RATE_0_MASK_ GENMASK(1, 0) +#define VR_MII_MPLL_BAUD_CLK (0) +#define VR_MII_MPLL_BAUD_CLK_DIV_2 (1) +#define VR_MII_MPLL_BAUD_CLK_DIV_4 (2) + #define INT_STS (0x780) #define INT_BIT_DMA_RX_(channel) BIT(24 + (channel)) #define INT_BIT_ALL_RX_ (0x0F000000) @@ -906,12 +990,28 @@ struct lan743x_rx { struct sk_buff *skb_head, *skb_tail; }; +/* SGMII Link Speed Duplex status */ +enum lan743x_sgmii_lsd { + POWER_DOWN = 0, + LINK_DOWN, + ANEG_BUSY, + LINK_10HD, + LINK_10FD, + LINK_100HD, + LINK_100FD, + LINK_1000_MASTER, + LINK_1000_SLAVE, + LINK_2500_MASTER, + LINK_2500_SLAVE +}; + struct lan743x_adapter { struct net_device *netdev; struct mii_bus *mdiobus; int msg_enable; #ifdef CONFIG_PM u32 wolopts; + u8 sopass[SOPASS_MAX]; #endif struct pci_dev *pdev; struct lan743x_csr csr; @@ -931,12 +1031,16 @@ struct lan743x_adapter { spinlock_t eth_syslock_spinlock; bool eth_syslock_en; u32 eth_syslock_acquire_cnt; + struct mutex sgmii_rw_lock; + /* SGMII Link Speed & Duplex status */ + enum lan743x_sgmii_lsd sgmii_lsd; u8 max_tx_channels; u8 used_tx_channels; u8 max_vector_count; #define LAN743X_ADAPTER_FLAG_OTP BIT(0) u32 flags; + u32 hw_cfg; }; #define LAN743X_COMPONENT_FLAG_RX(channel) BIT(20 + (channel)) @@ -1049,5 +1153,7 @@ struct lan743x_rx_buffer_info { u32 lan743x_csr_read(struct lan743x_adapter *adapter, int offset); void lan743x_csr_write(struct lan743x_adapter *adapter, int offset, u32 data); +int lan743x_hs_syslock_acquire(struct lan743x_adapter *adapter, u16 timeout); +void lan743x_hs_syslock_release(struct lan743x_adapter *adapter); #endif /* _LAN743X_H */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c index 3429660cd2e5..40ef9fad3a77 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c @@ -394,12 +394,10 @@ static int sparx5_handle_port_mdb_add(struct net_device *dev, struct sparx5 *spx5 = port->sparx5; u16 pgid_idx, vid; u32 mact_entry; + bool is_host; int res, err; - if (netif_is_bridge_master(v->obj.orig_dev)) { - sparx5_mact_learn(spx5, PGID_CPU, v->addr, v->vid); - return 0; - } + is_host = netif_is_bridge_master(v->obj.orig_dev); /* When VLAN unaware the vlan value is not parsed and we receive vid 0. * Fall back to bridge vid 1. @@ -416,17 +414,33 @@ static int sparx5_handle_port_mdb_add(struct net_device *dev, /* MC_IDX starts after the port masks in the PGID table */ pgid_idx += SPX5_PORTS; - sparx5_pgid_update_mask(port, pgid_idx, true); + + if (is_host) + spx5_rmw(ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_SET(1), + ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA, spx5, + ANA_AC_PGID_MISC_CFG(pgid_idx)); + else + sparx5_pgid_update_mask(port, pgid_idx, true); + } else { err = sparx5_pgid_alloc_mcast(spx5, &pgid_idx); if (err) { netdev_warn(dev, "multicast pgid table full\n"); return err; } - sparx5_pgid_update_mask(port, pgid_idx, true); + + if (is_host) + spx5_rmw(ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_SET(1), + ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA, spx5, + ANA_AC_PGID_MISC_CFG(pgid_idx)); + else + sparx5_pgid_update_mask(port, pgid_idx, true); + err = sparx5_mact_learn(spx5, pgid_idx, v->addr, vid); + if (err) { netdev_warn(dev, "could not learn mac address %pM\n", v->addr); + sparx5_pgid_free(spx5, pgid_idx); sparx5_pgid_update_mask(port, pgid_idx, false); return err; } @@ -463,13 +477,8 @@ static int sparx5_handle_port_mdb_del(struct net_device *dev, struct sparx5_port *port = netdev_priv(dev); struct sparx5 *spx5 = port->sparx5; u16 pgid_idx, vid; - u32 mact_entry, res, pgid_entry[3]; - int err; - - if (netif_is_bridge_master(v->obj.orig_dev)) { - sparx5_mact_forget(spx5, v->addr, v->vid); - return 0; - } + u32 mact_entry, res, pgid_entry[3], misc_cfg; + bool host_ena; if (!br_vlan_enabled(spx5->hw_bridge_dev)) vid = 1; @@ -483,15 +492,21 @@ static int sparx5_handle_port_mdb_del(struct net_device *dev, /* MC_IDX starts after the port masks in the PGID table */ pgid_idx += SPX5_PORTS; - sparx5_pgid_update_mask(port, pgid_idx, false); + + if (netif_is_bridge_master(v->obj.orig_dev)) + spx5_rmw(ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_SET(0), + ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA, spx5, + ANA_AC_PGID_MISC_CFG(pgid_idx)); + else + sparx5_pgid_update_mask(port, pgid_idx, false); + + misc_cfg = spx5_rd(spx5, ANA_AC_PGID_MISC_CFG(pgid_idx)); + host_ena = ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_GET(misc_cfg); sparx5_pgid_read_mask(spx5, pgid_idx, pgid_entry); - if (bitmap_empty((unsigned long *)pgid_entry, SPX5_PORTS)) { - /* No ports are in MC group. Remove entry */ - err = sparx5_mdb_del_entry(dev, spx5, v->addr, vid, pgid_idx); - if (err) - return err; - } + if (bitmap_empty((unsigned long *)pgid_entry, SPX5_PORTS) && !host_ena) + /* No ports or CPU are in MC group. Remove entry */ + return sparx5_mdb_del_entry(dev, spx5, v->addr, vid, pgid_idx); } return 0; diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h index 41ecd156e95f..4a6efe6ada08 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma.h +++ b/drivers/net/ethernet/microsoft/mana/gdma.h @@ -348,6 +348,7 @@ struct gdma_context { struct completion eq_test_event; u32 test_event_eq_id; + bool is_pf; void __iomem *bar0_va; void __iomem *shm_base; void __iomem *db_page_base; @@ -469,6 +470,15 @@ struct gdma_eqe { #define GDMA_REG_DB_PAGE_SIZE 0x10 #define GDMA_REG_SHM_OFFSET 0x18 +#define GDMA_PF_REG_DB_PAGE_SIZE 0xD0 +#define GDMA_PF_REG_DB_PAGE_OFF 0xC8 +#define GDMA_PF_REG_SHM_OFF 0x70 + +#define GDMA_SRIOV_REG_CFG_BASE_OFF 0x108 + +#define MANA_PF_DEVICE_ID 0x00B9 +#define MANA_VF_DEVICE_ID 0x00BA + struct gdma_posted_wqe_info { u32 wqe_size_in_bu; }; diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 49b85ca578b0..5f9240182351 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -18,7 +18,24 @@ static u64 mana_gd_r64(struct gdma_context *g, u64 offset) return readq(g->bar0_va + offset); } -static void mana_gd_init_registers(struct pci_dev *pdev) +static void mana_gd_init_pf_regs(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + void __iomem *sriov_base_va; + u64 sriov_base_off; + + gc->db_page_size = mana_gd_r32(gc, GDMA_PF_REG_DB_PAGE_SIZE) & 0xFFFF; + gc->db_page_base = gc->bar0_va + + mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + + sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF); + + sriov_base_va = gc->bar0_va + sriov_base_off; + gc->shm_base = sriov_base_va + + mana_gd_r64(gc, sriov_base_off + GDMA_PF_REG_SHM_OFF); +} + +static void mana_gd_init_vf_regs(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); @@ -30,6 +47,16 @@ static void mana_gd_init_registers(struct pci_dev *pdev) gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); } +static void mana_gd_init_registers(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + + if (gc->is_pf) + mana_gd_init_pf_regs(pdev); + else + mana_gd_init_vf_regs(pdev); +} + static int mana_gd_query_max_resources(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); @@ -1304,6 +1331,11 @@ static void mana_gd_cleanup(struct pci_dev *pdev) mana_gd_remove_irqs(pdev); } +static bool mana_is_pf(unsigned short dev_id) +{ + return dev_id == MANA_PF_DEVICE_ID; +} + static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct gdma_context *gc; @@ -1340,10 +1372,10 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!bar0_va) goto free_gc; + gc->is_pf = mana_is_pf(pdev->device); gc->bar0_va = bar0_va; gc->dev = &pdev->dev; - err = mana_gd_setup(pdev); if (err) goto unmap_bar; @@ -1438,7 +1470,8 @@ static void mana_gd_shutdown(struct pci_dev *pdev) #endif static const struct pci_device_id mana_id_table[] = { - { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, 0x00BA) }, + { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, MANA_PF_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, MANA_VF_DEVICE_ID) }, { } }; diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index 078d6a5a0768..543a5d5c304f 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -158,6 +158,14 @@ static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, hwc->rxq->msg_buf->gpa_mkey = val; hwc->txq->msg_buf->gpa_mkey = val; break; + + case HWC_INIT_DATA_PF_DEST_RQ_ID: + hwc->pf_dest_vrq_id = val; + break; + + case HWC_INIT_DATA_PF_DEST_CQ_ID: + hwc->pf_dest_vrcq_id = val; + break; } break; @@ -773,10 +781,13 @@ void mana_hwc_destroy_channel(struct gdma_context *gc) int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, const void *req, u32 resp_len, void *resp) { + struct gdma_context *gc = hwc->gdma_dev->gdma_context; struct hwc_work_request *tx_wr; struct hwc_wq *txq = hwc->txq; struct gdma_req_hdr *req_msg; struct hwc_caller_ctx *ctx; + u32 dest_vrcq = 0; + u32 dest_vrq = 0; u16 msg_id; int err; @@ -803,7 +814,12 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, tx_wr->msg_size = req_len; - err = mana_hwc_post_tx_wqe(txq, tx_wr, 0, 0, false); + if (gc->is_pf) { + dest_vrq = hwc->pf_dest_vrq_id; + dest_vrcq = hwc->pf_dest_vrcq_id; + } + + err = mana_hwc_post_tx_wqe(txq, tx_wr, dest_vrq, dest_vrcq, false); if (err) { dev_err(hwc->dev, "HWC: Failed to post send WQE: %d\n", err); goto out; diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.h b/drivers/net/ethernet/microsoft/mana/hw_channel.h index 31c6e83c454a..6a757a6e2732 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.h +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.h @@ -20,6 +20,8 @@ #define HWC_INIT_DATA_MAX_NUM_CQS 7 #define HWC_INIT_DATA_PDID 8 #define HWC_INIT_DATA_GPA_MKEY 9 +#define HWC_INIT_DATA_PF_DEST_RQ_ID 10 +#define HWC_INIT_DATA_PF_DEST_CQ_ID 11 /* Structures labeled with "HW DATA" are exchanged with the hardware. All of * them are naturally aligned and hence don't need __packed. @@ -178,6 +180,9 @@ struct hw_channel_context { struct semaphore sema; struct gdma_resource inflight_msg_res; + u32 pf_dest_vrq_id; + u32 pf_dest_vrcq_id; + struct hwc_caller_ctx *caller_ctx; }; diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h index d36405af9432..d58be64374c8 100644 --- a/drivers/net/ethernet/microsoft/mana/mana.h +++ b/drivers/net/ethernet/microsoft/mana/mana.h @@ -53,12 +53,14 @@ struct mana_stats_rx { u64 bytes; u64 xdp_drop; u64 xdp_tx; + u64 xdp_redirect; struct u64_stats_sync syncp; }; struct mana_stats_tx { u64 packets; u64 bytes; + u64 xdp_xmit; struct u64_stats_sync syncp; }; @@ -311,6 +313,8 @@ struct mana_rxq { struct bpf_prog __rcu *bpf_prog; struct xdp_rxq_info xdp_rxq; struct page *xdp_save_page; + bool xdp_flush; + int xdp_rc; /* XDP redirect return code */ /* MUST BE THE LAST MEMBER: * Each receive buffer has an associated mana_recv_buf_oob. @@ -374,6 +378,7 @@ struct mana_port_context { unsigned int num_queues; mana_handle_t port_handle; + mana_handle_t pf_filter_handle; u16 port_idx; @@ -395,6 +400,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming); void mana_remove(struct gdma_dev *gd, bool suspending); void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev); +int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames, + u32 flags); u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, struct xdp_buff *xdp, void *buf_va, uint pkt_len); struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); @@ -420,6 +427,12 @@ enum mana_command_code { MANA_FENCE_RQ = 0x20006, MANA_CONFIG_VPORT_RX = 0x20007, MANA_QUERY_VPORT_CONFIG = 0x20008, + + /* Privileged commands for the PF mode */ + MANA_REGISTER_FILTER = 0x28000, + MANA_DEREGISTER_FILTER = 0x28001, + MANA_REGISTER_HW_PORT = 0x28003, + MANA_DEREGISTER_HW_PORT = 0x28004, }; /* Query Device Configuration */ @@ -547,6 +560,63 @@ struct mana_cfg_rx_steer_resp { struct gdma_resp_hdr hdr; }; /* HW DATA */ +/* Register HW vPort */ +struct mana_register_hw_vport_req { + struct gdma_req_hdr hdr; + u16 attached_gfid; + u8 is_pf_default_vport; + u8 reserved1; + u8 allow_all_ether_types; + u8 reserved2; + u8 reserved3; + u8 reserved4; +}; /* HW DATA */ + +struct mana_register_hw_vport_resp { + struct gdma_resp_hdr hdr; + mana_handle_t hw_vport_handle; +}; /* HW DATA */ + +/* Deregister HW vPort */ +struct mana_deregister_hw_vport_req { + struct gdma_req_hdr hdr; + mana_handle_t hw_vport_handle; +}; /* HW DATA */ + +struct mana_deregister_hw_vport_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +/* Register filter */ +struct mana_register_filter_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u8 mac_addr[6]; + u8 reserved1; + u8 reserved2; + u8 reserved3; + u8 reserved4; + u16 reserved5; + u32 reserved6; + u32 reserved7; + u32 reserved8; +}; /* HW DATA */ + +struct mana_register_filter_resp { + struct gdma_resp_hdr hdr; + mana_handle_t filter_handle; +}; /* HW DATA */ + +/* Deregister filter */ +struct mana_deregister_filter_req { + struct gdma_req_hdr hdr; + mana_handle_t filter_handle; +}; /* HW DATA */ + +struct mana_deregister_filter_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + #define MANA_MAX_NUM_QUEUES 64 #define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1) diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c index 1d2f948b5c00..421fd39ff3a8 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c +++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c @@ -32,9 +32,55 @@ void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev) ndev->stats.tx_dropped++; } +static int mana_xdp_xmit_fm(struct net_device *ndev, struct xdp_frame *frame, + u16 q_idx) +{ + struct sk_buff *skb; + + skb = xdp_build_skb_from_frame(frame, ndev); + if (unlikely(!skb)) + return -ENOMEM; + + skb_set_queue_mapping(skb, q_idx); + + mana_xdp_tx(skb, ndev); + + return 0; +} + +int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames, + u32 flags) +{ + struct mana_port_context *apc = netdev_priv(ndev); + struct mana_stats_tx *tx_stats; + int i, count = 0; + u16 q_idx; + + if (unlikely(!apc->port_is_up)) + return 0; + + q_idx = smp_processor_id() % ndev->real_num_tx_queues; + + for (i = 0; i < n; i++) { + if (mana_xdp_xmit_fm(ndev, frames[i], q_idx)) + break; + + count++; + } + + tx_stats = &apc->tx_qp[q_idx].txq.stats; + + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->xdp_xmit += count; + u64_stats_update_end(&tx_stats->syncp); + + return count; +} + u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, struct xdp_buff *xdp, void *buf_va, uint pkt_len) { + struct mana_stats_rx *rx_stats; struct bpf_prog *prog; u32 act = XDP_PASS; @@ -49,12 +95,30 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, act = bpf_prog_run_xdp(prog, xdp); + rx_stats = &rxq->stats; + switch (act) { case XDP_PASS: case XDP_TX: case XDP_DROP: break; + case XDP_REDIRECT: + rxq->xdp_rc = xdp_do_redirect(ndev, xdp, prog); + if (!rxq->xdp_rc) { + rxq->xdp_flush = true; + + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->packets++; + rx_stats->bytes += pkt_len; + rx_stats->xdp_redirect++; + u64_stats_update_end(&rx_stats->syncp); + + break; + } + + fallthrough; + case XDP_ABORTED: trace_xdp_exception(ndev, prog, act); break; diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index b1d773823232..9259a74eca40 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -6,6 +6,7 @@ #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> +#include <linux/filter.h> #include <linux/mm.h> #include <net/checksum.h> @@ -382,6 +383,7 @@ static const struct net_device_ops mana_devops = { .ndo_validate_addr = eth_validate_addr, .ndo_get_stats64 = mana_get_stats64, .ndo_bpf = mana_bpf, + .ndo_xdp_xmit = mana_xdp_xmit, }; static void mana_cleanup_port_context(struct mana_port_context *apc) @@ -446,6 +448,119 @@ static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr, return 0; } +static int mana_pf_register_hw_vport(struct mana_port_context *apc) +{ + struct mana_register_hw_vport_resp resp = {}; + struct mana_register_hw_vport_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_HW_PORT, + sizeof(req), sizeof(resp)); + req.attached_gfid = 1; + req.is_pf_default_vport = 1; + req.allow_all_ether_types = 1; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to register hw vPort: %d\n", err); + return err; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_HW_PORT, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(apc->ndev, "Failed to register hw vPort: %d, 0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + apc->port_handle = resp.hw_vport_handle; + return 0; +} + +static void mana_pf_deregister_hw_vport(struct mana_port_context *apc) +{ + struct mana_deregister_hw_vport_resp resp = {}; + struct mana_deregister_hw_vport_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_HW_PORT, + sizeof(req), sizeof(resp)); + req.hw_vport_handle = apc->port_handle; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n", + err); + return; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_HW_PORT, + sizeof(resp)); + if (err || resp.hdr.status) + netdev_err(apc->ndev, + "Failed to deregister hw vPort: %d, 0x%x\n", + err, resp.hdr.status); +} + +static int mana_pf_register_filter(struct mana_port_context *apc) +{ + struct mana_register_filter_resp resp = {}; + struct mana_register_filter_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_FILTER, + sizeof(req), sizeof(resp)); + req.vport = apc->port_handle; + memcpy(req.mac_addr, apc->mac_addr, ETH_ALEN); + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to register filter: %d\n", err); + return err; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_FILTER, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(apc->ndev, "Failed to register filter: %d, 0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + apc->pf_filter_handle = resp.filter_handle; + return 0; +} + +static void mana_pf_deregister_filter(struct mana_port_context *apc) +{ + struct mana_deregister_filter_resp resp = {}; + struct mana_deregister_filter_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_FILTER, + sizeof(req), sizeof(resp)); + req.filter_handle = apc->pf_filter_handle; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to unregister filter: %d\n", + err); + return; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_FILTER, + sizeof(resp)); + if (err || resp.hdr.status) + netdev_err(apc->ndev, + "Failed to deregister filter: %d, 0x%x\n", + err, resp.hdr.status); +} + static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, u32 proto_minor_ver, u32 proto_micro_ver, u16 *max_num_vports) @@ -1007,6 +1122,9 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len); + if (act == XDP_REDIRECT && !rxq->xdp_rc) + return; + if (act != XDP_PASS && act != XDP_TX) goto drop_xdp; @@ -1162,11 +1280,14 @@ drop: static void mana_poll_rx_cq(struct mana_cq *cq) { struct gdma_comp *comp = cq->gdma_comp_buf; + struct mana_rxq *rxq = cq->rxq; int comp_read, i; comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); + rxq->xdp_flush = false; + for (i = 0; i < comp_read; i++) { if (WARN_ON_ONCE(comp[i].is_sq)) return; @@ -1175,8 +1296,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq) if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id)) return; - mana_process_rx_cqe(cq->rxq, cq, &comp[i]); + mana_process_rx_cqe(rxq, cq, &comp[i]); } + + if (rxq->xdp_flush) + xdp_do_flush(); } static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) @@ -1653,6 +1777,7 @@ out: static void mana_destroy_vport(struct mana_port_context *apc) { + struct gdma_dev *gd = apc->ac->gdma_dev; struct mana_rxq *rxq; u32 rxq_idx; @@ -1666,6 +1791,9 @@ static void mana_destroy_vport(struct mana_port_context *apc) } mana_destroy_txq(apc); + + if (gd->gdma_context->is_pf) + mana_pf_deregister_hw_vport(apc); } static int mana_create_vport(struct mana_port_context *apc, @@ -1676,6 +1804,12 @@ static int mana_create_vport(struct mana_port_context *apc, apc->default_rxobj = INVALID_MANA_HANDLE; + if (gd->gdma_context->is_pf) { + err = mana_pf_register_hw_vport(apc); + if (err) + return err; + } + err = mana_cfg_vport(apc, gd->pdid, gd->doorbell); if (err) return err; @@ -1755,6 +1889,7 @@ reset_apc: int mana_alloc_queues(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); + struct gdma_dev *gd = apc->ac->gdma_dev; int err; err = mana_create_vport(apc, ndev); @@ -1781,6 +1916,12 @@ int mana_alloc_queues(struct net_device *ndev) if (err) goto destroy_vport; + if (gd->gdma_context->is_pf) { + err = mana_pf_register_filter(apc); + if (err) + goto destroy_vport; + } + mana_chn_setxdp(apc, mana_xdp_get(apc)); return 0; @@ -1825,6 +1966,7 @@ int mana_attach(struct net_device *ndev) static int mana_dealloc_queues(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); + struct gdma_dev *gd = apc->ac->gdma_dev; struct mana_txq *txq; int i, err; @@ -1833,6 +1975,9 @@ static int mana_dealloc_queues(struct net_device *ndev) mana_chn_setxdp(apc, NULL); + if (gd->gdma_context->is_pf) + mana_pf_deregister_filter(apc); + /* No packet can be transmitted now since apc->port_is_up is false. * There is still a tiny chance that mana_poll_tx_cq() can re-enable * a txq because it may not timely see apc->port_is_up being cleared @@ -1915,6 +2060,7 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, apc->max_queues = gc->max_num_queues; apc->num_queues = gc->max_num_queues; apc->port_handle = INVALID_MANA_HANDLE; + apc->pf_filter_handle = INVALID_MANA_HANDLE; apc->port_idx = port_idx; ndev->netdev_ops = &mana_devops; diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index e13f2453eabb..c530db76880f 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -23,7 +23,7 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset) if (stringset != ETH_SS_STATS) return -EINVAL; - return ARRAY_SIZE(mana_eth_stats) + num_queues * 6; + return ARRAY_SIZE(mana_eth_stats) + num_queues * 8; } static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) @@ -50,6 +50,8 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) p += ETH_GSTRING_LEN; sprintf(p, "rx_%d_xdp_tx", i); p += ETH_GSTRING_LEN; + sprintf(p, "rx_%d_xdp_redirect", i); + p += ETH_GSTRING_LEN; } for (i = 0; i < num_queues; i++) { @@ -57,6 +59,8 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) p += ETH_GSTRING_LEN; sprintf(p, "tx_%d_bytes", i); p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_xdp_xmit", i); + p += ETH_GSTRING_LEN; } } @@ -70,6 +74,8 @@ static void mana_get_ethtool_stats(struct net_device *ndev, struct mana_stats_tx *tx_stats; unsigned int start; u64 packets, bytes; + u64 xdp_redirect; + u64 xdp_xmit; u64 xdp_drop; u64 xdp_tx; int q, i = 0; @@ -89,12 +95,14 @@ static void mana_get_ethtool_stats(struct net_device *ndev, bytes = rx_stats->bytes; xdp_drop = rx_stats->xdp_drop; xdp_tx = rx_stats->xdp_tx; + xdp_redirect = rx_stats->xdp_redirect; } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; data[i++] = xdp_drop; data[i++] = xdp_tx; + data[i++] = xdp_redirect; } for (q = 0; q < num_queues; q++) { @@ -104,10 +112,12 @@ static void mana_get_ethtool_stats(struct net_device *ndev, start = u64_stats_fetch_begin_irq(&tx_stats->syncp); packets = tx_stats->packets; bytes = tx_stats->bytes; + xdp_xmit = tx_stats->xdp_xmit; } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; + data[i++] = xdp_xmit; } } diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 8da7e25a47c9..d4649e4ee0e7 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -3367,6 +3367,7 @@ int ocelot_init(struct ocelot *ocelot) mutex_init(&ocelot->ptp_lock); mutex_init(&ocelot->mact_lock); mutex_init(&ocelot->fwd_domain_lock); + mutex_init(&ocelot->tas_lock); spin_lock_init(&ocelot->ptp_clock_lock); spin_lock_init(&ocelot->ts_id_lock); snprintf(queue_name, sizeof(queue_name), "%s-stats", diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c index 87ad2137ba06..09c703efe946 100644 --- a/drivers/net/ethernet/mscc/ocelot_ptp.c +++ b/drivers/net/ethernet/mscc/ocelot_ptp.c @@ -72,6 +72,10 @@ int ocelot_ptp_settime64(struct ptp_clock_info *ptp, ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN); spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags); + + if (ocelot->ops->tas_clock_adjust) + ocelot->ops->tas_clock_adjust(ocelot); + return 0; } EXPORT_SYMBOL(ocelot_ptp_settime64); @@ -105,6 +109,9 @@ int ocelot_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN); spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags); + + if (ocelot->ops->tas_clock_adjust) + ocelot->ops->tas_clock_adjust(ocelot); } else { /* Fall back using ocelot_ptp_settime64 which is not exact. */ struct timespec64 ts; @@ -117,6 +124,7 @@ int ocelot_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) ocelot_ptp_settime64(ptp, &ts); } + return 0; } EXPORT_SYMBOL(ocelot_ptp_adjtime); diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 0147de405365..b456e81a73a4 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -674,9 +674,9 @@ nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask, fl_hl_mask->hop_limit; break; case round_down(offsetof(struct ipv6hdr, flow_lbl), 4): - if (mask & ~IPV6_FLOW_LABEL_MASK || - exact & ~IPV6_FLOW_LABEL_MASK) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 flow label action"); + if (mask & ~IPV6_FLOWINFO_MASK || + exact & ~IPV6_FLOWINFO_MASK) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 flow info action"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 68e8a2fb1a29..2df2af1da716 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -96,8 +96,6 @@ #define NFP_FL_PUSH_VLAN_PRIO GENMASK(15, 13) #define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0) -#define IPV6_FLOW_LABEL_MASK cpu_to_be32(0x000fffff) - /* LAG ports */ #define NFP_FL_LAG_OUT 0xC0DE0000 diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c index 7db56abaa582..f9410d59146d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c @@ -282,7 +282,7 @@ netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev) txd = &tx_ring->txds[wr_idx]; txd->offset_eop = (nr_frags ? 0 : NFD3_DESC_TX_EOP) | md_bytes; txd->dma_len = cpu_to_le16(skb_headlen(skb)); - nfp_desc_set_dma_addr(txd, dma_addr); + nfp_desc_set_dma_addr_40b(txd, dma_addr); txd->data_len = cpu_to_le16(skb->len); txd->flags = 0; @@ -320,7 +320,7 @@ netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev) txd = &tx_ring->txds[wr_idx]; txd->dma_len = cpu_to_le16(fsize); - nfp_desc_set_dma_addr(txd, dma_addr); + nfp_desc_set_dma_addr_40b(txd, dma_addr); txd->offset_eop = md_bytes | ((f == nr_frags - 1) ? NFD3_DESC_TX_EOP : 0); txd->vals8[1] = second_half; @@ -562,8 +562,12 @@ nfp_nfd3_rx_give_one(const struct nfp_net_dp *dp, /* Fill freelist descriptor */ rx_ring->rxds[wr_idx].fld.reserved = 0; rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; - nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, - dma_addr + dp->rx_dma_off); + /* DMA address is expanded to 48-bit width in freelist for NFP3800, + * so the *_48b macro is used accordingly, it's also OK to fill + * a 40-bit address since the top 8 bits are get set to 0. + */ + nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, + dma_addr + dp->rx_dma_off); rx_ring->wr_p++; if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { @@ -817,7 +821,7 @@ nfp_nfd3_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, txd = &tx_ring->txds[wr_idx]; txd->offset_eop = NFD3_DESC_TX_EOP; txd->dma_len = cpu_to_le16(pkt_len); - nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + dma_off); + nfp_desc_set_dma_addr_40b(txd, rxbuf->dma_addr + dma_off); txd->data_len = cpu_to_le16(pkt_len); txd->flags = 0; @@ -1193,7 +1197,7 @@ nfp_nfd3_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, txd = &tx_ring->txds[wr_idx]; txd->offset_eop = meta_len | NFD3_DESC_TX_EOP; txd->dma_len = cpu_to_le16(skb_headlen(skb)); - nfp_desc_set_dma_addr(txd, dma_addr); + nfp_desc_set_dma_addr_40b(txd, dma_addr); txd->data_len = cpu_to_le16(skb->len); txd->flags = 0; diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/rings.c b/drivers/net/ethernet/netronome/nfp/nfd3/rings.c index 47604d5e25eb..f31eabdc0631 100644 --- a/drivers/net/ethernet/netronome/nfp/nfd3/rings.c +++ b/drivers/net/ethernet/netronome/nfp/nfd3/rings.c @@ -260,6 +260,7 @@ const struct nfp_dp_ops nfp_nfd3_ops = { .version = NFP_NFD_VER_NFD3, .tx_min_desc_per_pkt = 1, .cap_mask = NFP_NFD3_CFG_CTRL_SUPPORTED, + .dma_mask = DMA_BIT_MASK(40), .poll = nfp_nfd3_poll, .xsk_poll = nfp_nfd3_xsk_poll, .ctrl_poll = nfp_nfd3_ctrl_poll, diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c b/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c index c16c4b42ecfd..454fea4c8be2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c +++ b/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c @@ -40,7 +40,7 @@ nfp_nfd3_xsk_tx_xdp(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, txd = &tx_ring->txds[wr_idx]; txd->offset_eop = NFD3_DESC_TX_EOP; txd->dma_len = cpu_to_le16(pkt_len); - nfp_desc_set_dma_addr(txd, xrxbuf->dma_addr + pkt_off); + nfp_desc_set_dma_addr_40b(txd, xrxbuf->dma_addr + pkt_off); txd->data_len = cpu_to_le16(pkt_len); txd->flags = 0; @@ -361,10 +361,8 @@ static void nfp_nfd3_xsk_tx(struct nfp_net_tx_ring *tx_ring) /* Build TX descriptor. */ txd = &tx_ring->txds[wr_idx]; - nfp_desc_set_dma_addr(txd, - xsk_buff_raw_get_dma(xsk_pool, - desc[i].addr - )); + nfp_desc_set_dma_addr_40b(txd, + xsk_buff_raw_get_dma(xsk_pool, desc[i].addr)); txd->offset_eop = NFD3_DESC_TX_EOP; txd->dma_len = cpu_to_le16(desc[i].len); txd->data_len = cpu_to_le16(desc[i].len); diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index e509d6dcba5c..300637e576a8 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -314,7 +314,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); - nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); + nfp_desc_set_dma_addr_48b(txd, dma_addr); /* starts at bit 0 */ BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1)); @@ -339,7 +339,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); txd->dma_len_type = cpu_to_le16(dlen_type); - nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); + nfp_desc_set_dma_addr_48b(txd, dma_addr); dma_len -= dlen_type; dma_addr += dlen_type + 1; @@ -595,8 +595,8 @@ nfp_nfdk_rx_give_one(const struct nfp_net_dp *dp, /* Fill freelist descriptor */ rx_ring->rxds[wr_idx].fld.reserved = 0; rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; - nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, - dma_addr + dp->rx_dma_off); + nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, + dma_addr + dp->rx_dma_off); rx_ring->wr_p++; if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { @@ -929,7 +929,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); - nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); + nfp_desc_set_dma_addr_48b(txd, dma_addr); tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; dma_len -= tmp_dlen; @@ -940,7 +940,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, dma_len -= 1; dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); txd->dma_len_type = cpu_to_le16(dlen_type); - nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); + nfp_desc_set_dma_addr_48b(txd, dma_addr); dlen_type &= NFDK_DESC_TX_DMA_LEN; dma_len -= dlen_type; @@ -1332,7 +1332,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); - nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); + nfp_desc_set_dma_addr_48b(txd, dma_addr); tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; dma_len -= tmp_dlen; @@ -1343,7 +1343,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, dma_len -= 1; dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); txd->dma_len_type = cpu_to_le16(dlen_type); - nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); + nfp_desc_set_dma_addr_48b(txd, dma_addr); dlen_type &= NFDK_DESC_TX_DMA_LEN; dma_len -= dlen_type; diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/rings.c b/drivers/net/ethernet/netronome/nfp/nfdk/rings.c index 301f11108826..f4d94ae0a349 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/rings.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/rings.c @@ -181,6 +181,7 @@ const struct nfp_dp_ops nfp_nfdk_ops = { .version = NFP_NFD_VER_NFDK, .tx_min_desc_per_pkt = NFDK_TX_DESC_PER_SIMPLE_PKT, .cap_mask = NFP_NFDK_CFG_CTRL_SUPPORTED, + .dma_mask = DMA_BIT_MASK(48), .poll = nfp_nfdk_poll, .ctrl_poll = nfp_nfdk_ctrl_poll, .xmit = nfp_nfdk_tx, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 4f88d17536c3..36b173039024 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -410,7 +410,9 @@ nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf) return NULL; } - fw_model = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"); + fw_model = nfp_hwinfo_lookup(pf->hwinfo, "nffw.partno"); + if (!fw_model) + fw_model = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"); if (!fw_model) { dev_err(&pdev->dev, "Error: can't read part number\n"); return NULL; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 3dd3a92d2e7f..b07cea8e354c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -115,7 +115,7 @@ struct nfp_nfdk_tx_buf; #define D_IDX(ring, idx) ((idx) & ((ring)->cnt - 1)) /* Convenience macro for writing dma address into RX/TX descriptors */ -#define nfp_desc_set_dma_addr(desc, dma_addr) \ +#define nfp_desc_set_dma_addr_40b(desc, dma_addr) \ do { \ __typeof__(desc) __d = (desc); \ dma_addr_t __addr = (dma_addr); \ @@ -124,13 +124,13 @@ struct nfp_nfdk_tx_buf; __d->dma_addr_hi = upper_32_bits(__addr) & 0xff; \ } while (0) -#define nfp_nfdk_tx_desc_set_dma_addr(desc, dma_addr) \ - do { \ - __typeof__(desc) __d = (desc); \ - dma_addr_t __addr = (dma_addr); \ - \ - __d->dma_addr_hi = cpu_to_le16(upper_32_bits(__addr) & 0xff); \ - __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \ +#define nfp_desc_set_dma_addr_48b(desc, dma_addr) \ + do { \ + __typeof__(desc) __d = (desc); \ + dma_addr_t __addr = (dma_addr); \ + \ + __d->dma_addr_hi = cpu_to_le16(upper_32_bits(__addr)); \ + __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \ } while (0) /** @@ -225,8 +225,8 @@ struct nfp_net_tx_ring { struct nfp_net_rx_desc { union { struct { - u8 dma_addr_hi; /* High bits of the buf address */ - __le16 reserved; /* Must be zero */ + __le16 dma_addr_hi; /* High bits of the buf address */ + u8 reserved; /* Must be zero */ u8 meta_len_dd; /* Must be zero */ __le32 dma_addr_lo; /* Low bits of the buffer address */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 4e56a99087fa..57f284eefeb3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2040,6 +2040,7 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info, void __iomem *ctrl_bar, bool needs_netdev, unsigned int max_tx_rings, unsigned int max_rx_rings) { + u64 dma_mask = dma_get_mask(&pdev->dev); struct nfp_net *nn; int err; @@ -2085,6 +2086,14 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info, goto err_free_nn; } + if ((dma_mask & nn->dp.ops->dma_mask) != dma_mask) { + dev_err(&pdev->dev, + "DMA mask of loaded firmware: %llx, required DMA mask: %llx\n", + nn->dp.ops->dma_mask, dma_mask); + err = -EINVAL; + goto err_free_nn; + } + nn->max_tx_rings = max_tx_rings; nn->max_rx_rings = max_rx_rings; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h index c934cc2d3208..83becb338478 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h @@ -117,6 +117,7 @@ enum nfp_nfd_version { * @version: Indicate dp type * @tx_min_desc_per_pkt: Minimal TX descs needed for each packet * @cap_mask: Mask of supported features + * @dma_mask: DMA addressing capability * @poll: Napi poll for normal rx/tx * @xsk_poll: Napi poll when xsk is enabled * @ctrl_poll: Tasklet poll for ctrl rx/tx @@ -134,6 +135,7 @@ struct nfp_dp_ops { enum nfp_nfd_version version; unsigned int tx_min_desc_per_pkt; u32 cap_mask; + u64 dma_mask; int (*poll)(struct napi_struct *napi, int budget); int (*xsk_poll)(struct napi_struct *napi, int budget); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index df0afd271a21..7475b209353f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1460,6 +1460,55 @@ static int nfp_net_set_channels(struct net_device *netdev, return nfp_net_set_num_rings(nn, total_rx, total_tx); } +static void nfp_port_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return; + + /* Currently pause frame support is fixed */ + pause->autoneg = AUTONEG_DISABLE; + pause->rx_pause = 1; + pause->tx_pause = 1; +} + +static int nfp_net_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + int err; + + port = nfp_port_from_netdev(netdev); + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + /* Control LED to blink */ + err = nfp_eth_set_idmode(port->app->cpp, eth_port->index, 1); + break; + + case ETHTOOL_ID_INACTIVE: + /* Control LED to normal mode */ + err = nfp_eth_set_idmode(port->app->cpp, eth_port->index, 0); + break; + + case ETHTOOL_ID_ON: + case ETHTOOL_ID_OFF: + default: + return -EOPNOTSUPP; + } + + return err; +} + static const struct ethtool_ops nfp_net_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | @@ -1492,6 +1541,8 @@ static const struct ethtool_ops nfp_net_ethtool_ops = { .set_link_ksettings = nfp_net_set_link_ksettings, .get_fecparam = nfp_port_get_fecparam, .set_fecparam = nfp_port_set_fecparam, + .get_pauseparam = nfp_port_get_pauseparam, + .set_phys_id = nfp_net_set_phys_id, }; const struct ethtool_ops nfp_port_ethtool_ops = { @@ -1509,6 +1560,8 @@ const struct ethtool_ops nfp_port_ethtool_ops = { .set_link_ksettings = nfp_net_set_link_ksettings, .get_fecparam = nfp_port_get_fecparam, .set_fecparam = nfp_port_set_fecparam, + .get_pauseparam = nfp_port_get_pauseparam, + .set_phys_id = nfp_net_set_phys_id, }; void nfp_net_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c index 86829446c637..aea507aed49d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c @@ -70,8 +70,12 @@ void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring) nfp_net_xsk_rx_bufs_stash(rx_ring, wr_idx, xdp); - nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, - rx_ring->xsk_rxbufs[wr_idx].dma_addr); + /* DMA address is expanded to 48-bit width in freelist for NFP3800, + * so the *_48b macro is used accordingly, it's also OK to fill + * a 40-bit address since the top 8 bits are get set to 0. + */ + nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, + rx_ring->xsk_rxbufs[wr_idx].dma_addr); rx_ring->wr_p++; wr_ptr_add++; diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h b/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h index afab6f0fc564..6ad43c7cefe6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h @@ -4,7 +4,6 @@ #ifndef NFP_CRC32_H #define NFP_CRC32_H -#include <linux/kernel.h> #include <linux/crc32.h> /** diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h index ddb34bfb9bef..3d379e937184 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h @@ -13,36 +13,22 @@ #include <linux/ctype.h> #include <linux/types.h> #include <linux/sizes.h> -#include <linux/stringify.h> #ifndef NFP_SUBSYS #define NFP_SUBSYS "nfp" #endif -#define string_format(x) __FILE__ ":" __stringify(__LINE__) ": " x - -#define __nfp_err(cpp, fmt, args...) \ - dev_err(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) -#define __nfp_warn(cpp, fmt, args...) \ - dev_warn(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) -#define __nfp_info(cpp, fmt, args...) \ - dev_info(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) -#define __nfp_dbg(cpp, fmt, args...) \ - dev_dbg(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) -#define __nfp_printk(level, cpp, fmt, args...) \ - dev_printk(level, nfp_cpp_device(cpp)->parent, \ - NFP_SUBSYS ": " fmt, ## args) - #define nfp_err(cpp, fmt, args...) \ - __nfp_err(cpp, string_format(fmt), ## args) + dev_err(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) #define nfp_warn(cpp, fmt, args...) \ - __nfp_warn(cpp, string_format(fmt), ## args) + dev_warn(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) #define nfp_info(cpp, fmt, args...) \ - __nfp_info(cpp, string_format(fmt), ## args) + dev_info(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) #define nfp_dbg(cpp, fmt, args...) \ - __nfp_dbg(cpp, string_format(fmt), ## args) + dev_dbg(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) #define nfp_printk(level, cpp, fmt, args...) \ - __nfp_printk(level, cpp, string_format(fmt), ## args) + dev_printk(level, nfp_cpp_device(cpp)->parent, \ + NFP_SUBSYS ": " fmt, ## args) #define PCI_64BIT_BAR_COUNT 3 diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c index 28384d6d1c6f..0725b51c2a95 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c @@ -9,7 +9,7 @@ const struct nfp_dev_info nfp_dev_info[NFP_DEV_CNT] = { [NFP_DEV_NFP3800] = { - .dma_mask = DMA_BIT_MASK(40), + .dma_mask = DMA_BIT_MASK(48), .qc_idx_mask = GENMASK(8, 0), .qc_addr_offset = 0x400000, .min_qc_size = 512, @@ -21,7 +21,7 @@ const struct nfp_dev_info nfp_dev_info[NFP_DEV_CNT] = { .qc_area_sz = 0x100000, }, [NFP_DEV_NFP3800_VF] = { - .dma_mask = DMA_BIT_MASK(40), + .dma_mask = DMA_BIT_MASK(48), .qc_idx_mask = GENMASK(8, 0), .qc_addr_offset = 0, .min_qc_size = 512, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h index f5360bae6f75..77d66855be42 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h @@ -196,6 +196,8 @@ int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, int nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode); +int nfp_eth_set_idmode(struct nfp_cpp *cpp, unsigned int idx, bool state); + static inline bool nfp_eth_can_support_fec(struct nfp_eth_table_port *eth_port) { return !!eth_port->fec_modes_supported; diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c index 311a5be25acb..edd300033735 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c @@ -49,6 +49,7 @@ #define NSP_ETH_CTRL_SET_LANES BIT_ULL(5) #define NSP_ETH_CTRL_SET_ANEG BIT_ULL(6) #define NSP_ETH_CTRL_SET_FEC BIT_ULL(7) +#define NSP_ETH_CTRL_SET_IDMODE BIT_ULL(8) enum nfp_eth_raw { NSP_ETH_RAW_PORT = 0, @@ -492,6 +493,35 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, return 0; } +int nfp_eth_set_idmode(struct nfp_cpp *cpp, unsigned int idx, bool state) +{ + union eth_table_entry *entries; + struct nfp_nsp *nsp; + u64 reg; + + nsp = nfp_eth_config_start(cpp, idx); + if (IS_ERR(nsp)) + return PTR_ERR(nsp); + + /* Set this features were added in ABI 0.32 */ + if (nfp_nsp_get_abi_ver_minor(nsp) < 32) { + nfp_err(nfp_nsp_cpp(nsp), + "set id mode operation not supported, please update flash\n"); + return -EOPNOTSUPP; + } + + entries = nfp_nsp_config_entries(nsp); + + reg = le64_to_cpu(entries[idx].control); + reg &= ~NSP_ETH_CTRL_SET_IDMODE; + reg |= FIELD_PREP(NSP_ETH_CTRL_SET_IDMODE, state); + entries[idx].control = cpu_to_le64(reg); + + nfp_nsp_config_set_modified(nsp, true); + + return nfp_eth_config_commit_end(nsp); +} + #define NFP_ETH_SET_BIT_CONFIG(nsp, raw_idx, mask, val, ctrl_bit) \ ({ \ __BF_FIELD_CHECK(mask, 0ULL, val, "NFP_ETH_SET_BIT_CONFIG: "); \ diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index e90fa97c0ae6..8dd7aa08ecfb 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -1869,8 +1869,7 @@ int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, if (!min_tx_rate) min_tx_rate = QLC_VF_MIN_TX_RATE; - if (max_tx_rate && - (max_tx_rate >= 10000 || max_tx_rate < min_tx_rate)) { + if (max_tx_rate && max_tx_rate >= 10000) { netdev_err(netdev, "Invalid max Tx rate, allowed range is [%d - %d]", min_tx_rate, QLC_VF_MAX_TX_RATE); @@ -1880,8 +1879,7 @@ int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, if (!max_tx_rate) max_tx_rate = 10000; - if (min_tx_rate && - (min_tx_rate > max_tx_rate || min_tx_rate < QLC_VF_MIN_TX_RATE)) { + if (min_tx_rate && min_tx_rate < QLC_VF_MIN_TX_RATE) { netdev_err(netdev, "Invalid min Tx rate, allowed range is [%d - %d]", QLC_VF_MIN_TX_RATE, max_tx_rate); diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 06104d2ff5b3..80c95c331c82 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1465,7 +1465,7 @@ netdev_tx_t emac_mac_tx_buf_send(struct emac_adapter *adpt, /* Make sure the are enough free descriptors to hold one * maximum-sized SKB. We need one desc for each fragment, * one for the checksum (emac_tso_csum), one for TSO, and - * and one for the SKB header. + * one for the SKB header. */ if (emac_tpd_num_free_descs(tx_q) < (MAX_SKB_FRAGS + 3)) netif_stop_queue(adpt->netdev); diff --git a/drivers/net/ethernet/sfc/falcon/bitfield.h b/drivers/net/ethernet/sfc/falcon/bitfield.h index 5eb178d0c149..78537a53009e 100644 --- a/drivers/net/ethernet/sfc/falcon/bitfield.h +++ b/drivers/net/ethernet/sfc/falcon/bitfield.h @@ -117,7 +117,7 @@ typedef union ef4_oword { * * ( element ) << 4 * - * The result will contain the relevant bits filled in in the range + * The result will contain the relevant bits filled in the range * [0,high-low), with garbage in bits [high-low+1,...). */ #define EF4_EXTRACT_NATIVE(native_element, min, max, low, high) \ diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 50baf62b2cbc..a3425b6be3f7 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -1261,7 +1261,7 @@ static void efx_mcdi_ev_death(struct efx_nic *efx, int rc) } /* The MC is going down in to BIST mode. set the BIST flag to block - * new MCDI, cancel any outstanding MCDI and and schedule a BIST-type reset + * new MCDI, cancel any outstanding MCDI and schedule a BIST-type reset * (which doesn't actually execute a reset, it waits for the controlling * function to reset it). */ diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index ff617b1b38d3..7984f6f84a3c 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -274,7 +274,7 @@ * MC_CMD_WORKAROUND_BUG26807. * May also returned for other operations such as sub-variant switching. */ #define MC_CMD_ERR_FILTERS_PRESENT 0x1014 -/* The clock whose frequency you've attempted to set set +/* The clock whose frequency you've attempted to set * doesn't exist on this NIC */ #define MC_CMD_ERR_NO_CLOCK 0x1015 /* Returned by MC_CMD_TESTASSERT if the action that should @@ -7782,7 +7782,7 @@ * large number (253) it is not anticipated that this will be needed in the * near future, so can currently be ignored. * - * On Riverhead this command is implemented as a a wrapper for `list` in the + * On Riverhead this command is implemented as a wrapper for `list` in the * sensor_query SPHINX service. */ #define MC_CMD_DYNAMIC_SENSORS_LIST 0x66 @@ -7827,7 +7827,7 @@ * update is in progress, and effectively means the set of usable sensors is * the intersection between the sets of sensors known to the driver and the MC. * - * On Riverhead this command is implemented as a a wrapper for + * On Riverhead this command is implemented as a wrapper for * `get_descriptions` in the sensor_query SPHINX service. */ #define MC_CMD_DYNAMIC_SENSORS_GET_DESCRIPTIONS 0x67 @@ -7876,7 +7876,7 @@ * update is in progress, and effectively means the set of usable sensors is * the intersection between the sets of sensors known to the driver and the MC. * - * On Riverhead this command is implemented as a a wrapper for `get_readings` + * On Riverhead this command is implemented as a wrapper for `get_readings` * in the sensor_query SPHINX service. */ #define MC_CMD_DYNAMIC_SENSORS_GET_READINGS 0x68 @@ -19322,7 +19322,7 @@ * TLV_PORT_MODE_*). A superset of MC_CMD_GET_PORT_MODES_OUT/MODES that * contains all modes implemented in firmware for a particular board. Modes * listed in MODES are considered production modes and should be exposed in - * userland tools. Modes listed in in ENGINEERING_MODES, but not in MODES + * userland tools. Modes listed in ENGINEERING_MODES, but not in MODES * should be considered hidden (not to be exposed in userland tools) and for * engineering use only. There are no other semantic differences and any mode * listed in either MODES or ENGINEERING_MODES can be set on the board. diff --git a/drivers/net/ethernet/sfc/siena/mcdi.c b/drivers/net/ethernet/sfc/siena/mcdi.c index 3df0f0eca3b7..3f7899daa86a 100644 --- a/drivers/net/ethernet/sfc/siena/mcdi.c +++ b/drivers/net/ethernet/sfc/siena/mcdi.c @@ -1264,7 +1264,7 @@ static void efx_mcdi_ev_death(struct efx_nic *efx, int rc) } /* The MC is going down in to BIST mode. set the BIST flag to block - * new MCDI, cancel any outstanding MCDI and and schedule a BIST-type reset + * new MCDI, cancel any outstanding MCDI and schedule a BIST-type reset * (which doesn't actually execute a reset, it waits for the controlling * function to reset it). */ diff --git a/drivers/net/ethernet/sfc/siena/mcdi_pcol.h b/drivers/net/ethernet/sfc/siena/mcdi_pcol.h index 89a7fd47b057..a3cc8b7ec732 100644 --- a/drivers/net/ethernet/sfc/siena/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/siena/mcdi_pcol.h @@ -274,7 +274,7 @@ * MC_CMD_WORKAROUND_BUG26807. * May also returned for other operations such as sub-variant switching. */ #define MC_CMD_ERR_FILTERS_PRESENT 0x1014 -/* The clock whose frequency you've attempted to set set +/* The clock whose frequency you've attempted to set * doesn't exist on this NIC */ #define MC_CMD_ERR_NO_CLOCK 0x1015 /* Returned by MC_CMD_TESTASSERT if the action that should @@ -7782,7 +7782,7 @@ * large number (253) it is not anticipated that this will be needed in the * near future, so can currently be ignored. * - * On Riverhead this command is implemented as a a wrapper for `list` in the + * On Riverhead this command is implemented as a wrapper for `list` in the * sensor_query SPHINX service. */ #define MC_CMD_DYNAMIC_SENSORS_LIST 0x66 @@ -7827,7 +7827,7 @@ * update is in progress, and effectively means the set of usable sensors is * the intersection between the sets of sensors known to the driver and the MC. * - * On Riverhead this command is implemented as a a wrapper for + * On Riverhead this command is implemented as a wrapper for * `get_descriptions` in the sensor_query SPHINX service. */ #define MC_CMD_DYNAMIC_SENSORS_GET_DESCRIPTIONS 0x67 @@ -7876,7 +7876,7 @@ * update is in progress, and effectively means the set of usable sensors is * the intersection between the sets of sensors known to the driver and the MC. * - * On Riverhead this command is implemented as a a wrapper for `get_readings` + * On Riverhead this command is implemented as a wrapper for `get_readings` * in the sensor_query SPHINX service. */ #define MC_CMD_DYNAMIC_SENSORS_GET_READINGS 0x68 @@ -16682,7 +16682,7 @@ * TLV_PORT_MODE_*). A superset of MC_CMD_GET_PORT_MODES_OUT/MODES that * contains all modes implemented in firmware for a particular board. Modes * listed in MODES are considered production modes and should be exposed in - * userland tools. Modes listed in in ENGINEERING_MODES, but not in MODES + * userland tools. Modes listed in ENGINEERING_MODES, but not in MODES * should be considered hidden (not to be exposed in userland tools) and for * engineering use only. There are no other semantic differences and any mode * listed in either MODES or ENGINEERING_MODES can be set on the board. diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 929cfc22cd0c..31ff35174034 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -91,6 +91,9 @@ config DWMAC_IPQ806X acceleration features available on this SoC. Network devices will behave like standard non-accelerated ethernet interfaces. + Select the QCOM_SOCINFO config flag to enable specific dwmac + fixup based on the ipq806x SoC revision. + config DWMAC_LPC18XX tristate "NXP LPC18xx/43xx DWMAC support" default ARCH_LPC18XX diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 38fe77d1035e..d0e82cb5ae03 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -251,7 +251,6 @@ static void intel_speed_mode_2500(struct net_device *ndev, void *intel_data) priv->plat->mdio_bus_data->xpcs_an_inband = false; } else { priv->plat->max_speed = 1000; - priv->plat->phy_interface = PHY_INTERFACE_MODE_SGMII; priv->plat->mdio_bus_data->xpcs_an_inband = true; } } @@ -443,6 +442,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat) static int intel_mgbe_common_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { + struct fwnode_handle *fwnode; char clk_name[20]; int ret; int i; @@ -561,12 +561,42 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, /* Use the last Rx queue */ plat->vlan_fail_q = plat->rx_queues_to_use - 1; + /* For fixed-link setup, we allow phy-mode setting */ + fwnode = dev_fwnode(&pdev->dev); + if (fwnode) { + int phy_mode; + + /* "phy-mode" setting is optional. If it is set, + * we allow either sgmii or 1000base-x for now. + */ + phy_mode = fwnode_get_phy_mode(fwnode); + if (phy_mode >= 0) { + if (phy_mode == PHY_INTERFACE_MODE_SGMII || + phy_mode == PHY_INTERFACE_MODE_1000BASEX) + plat->phy_interface = phy_mode; + else + dev_warn(&pdev->dev, "Invalid phy-mode\n"); + } + } + /* Intel mgbe SGMII interface uses pcs-xcps */ - if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII) { + if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII || + plat->phy_interface == PHY_INTERFACE_MODE_1000BASEX) { plat->mdio_bus_data->has_xpcs = true; plat->mdio_bus_data->xpcs_an_inband = true; } + /* For fixed-link setup, we clear xpcs_an_inband */ + if (fwnode) { + struct fwnode_handle *fixed_node; + + fixed_node = fwnode_get_named_child_node(fwnode, "fixed-link"); + if (fixed_node) + plat->mdio_bus_data->xpcs_an_inband = false; + + fwnode_handle_put(fixed_node); + } + /* Ensure mdio bus scan skips intel serdes and pcs-xpcs */ plat->mdio_bus_data->phy_mask = 1 << INTEL_MGBE_ADHOC_ADDR; plat->mdio_bus_data->phy_mask |= 1 << INTEL_MGBE_XPCS_ADDR; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index f7dc8458cde8..e888c8a9c830 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -27,6 +27,8 @@ #include <linux/stmmac.h> #include <linux/of_mdio.h> #include <linux/module.h> +#include <linux/sys_soc.h> +#include <linux/bitfield.h> #include "stmmac_platform.h" @@ -64,6 +66,17 @@ #define NSS_COMMON_CLK_DIV_SGMII_100 4 #define NSS_COMMON_CLK_DIV_SGMII_10 49 +#define QSGMII_PCS_ALL_CH_CTL 0x80 +#define QSGMII_PCS_CH_SPEED_FORCE BIT(1) +#define QSGMII_PCS_CH_SPEED_10 0x0 +#define QSGMII_PCS_CH_SPEED_100 BIT(2) +#define QSGMII_PCS_CH_SPEED_1000 BIT(3) +#define QSGMII_PCS_CH_SPEED_MASK (QSGMII_PCS_CH_SPEED_FORCE | \ + QSGMII_PCS_CH_SPEED_10 | \ + QSGMII_PCS_CH_SPEED_100 | \ + QSGMII_PCS_CH_SPEED_1000) +#define QSGMII_PCS_CH_SPEED_SHIFT(x) ((x) * 4) + #define QSGMII_PCS_CAL_LCKDT_CTL 0x120 #define QSGMII_PCS_CAL_LCKDT_CTL_RST BIT(19) @@ -75,11 +88,20 @@ #define QSGMII_PHY_RX_SIGNAL_DETECT_EN BIT(2) #define QSGMII_PHY_TX_DRIVER_EN BIT(3) #define QSGMII_PHY_QSGMII_EN BIT(7) -#define QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET 12 -#define QSGMII_PHY_RX_DC_BIAS_OFFSET 18 -#define QSGMII_PHY_RX_INPUT_EQU_OFFSET 20 -#define QSGMII_PHY_CDR_PI_SLEW_OFFSET 22 -#define QSGMII_PHY_TX_DRV_AMP_OFFSET 28 +#define QSGMII_PHY_DEEMPHASIS_LVL_MASK GENMASK(11, 10) +#define QSGMII_PHY_DEEMPHASIS_LVL(x) FIELD_PREP(QSGMII_PHY_DEEMPHASIS_LVL_MASK, (x)) +#define QSGMII_PHY_PHASE_LOOP_GAIN_MASK GENMASK(14, 12) +#define QSGMII_PHY_PHASE_LOOP_GAIN(x) FIELD_PREP(QSGMII_PHY_PHASE_LOOP_GAIN_MASK, (x)) +#define QSGMII_PHY_RX_DC_BIAS_MASK GENMASK(19, 18) +#define QSGMII_PHY_RX_DC_BIAS(x) FIELD_PREP(QSGMII_PHY_RX_DC_BIAS_MASK, (x)) +#define QSGMII_PHY_RX_INPUT_EQU_MASK GENMASK(21, 20) +#define QSGMII_PHY_RX_INPUT_EQU(x) FIELD_PREP(QSGMII_PHY_RX_INPUT_EQU_MASK, (x)) +#define QSGMII_PHY_CDR_PI_SLEW_MASK GENMASK(23, 22) +#define QSGMII_PHY_CDR_PI_SLEW(x) FIELD_PREP(QSGMII_PHY_CDR_PI_SLEW_MASK, (x)) +#define QSGMII_PHY_TX_SLEW_MASK GENMASK(27, 26) +#define QSGMII_PHY_TX_SLEW(x) FIELD_PREP(QSGMII_PHY_TX_SLEW_MASK, (x)) +#define QSGMII_PHY_TX_DRV_AMP_MASK GENMASK(31, 28) +#define QSGMII_PHY_TX_DRV_AMP(x) FIELD_PREP(QSGMII_PHY_TX_DRV_AMP_MASK, (x)) struct ipq806x_gmac { struct platform_device *pdev; @@ -242,6 +264,113 @@ static void ipq806x_gmac_fix_mac_speed(void *priv, unsigned int speed) ipq806x_gmac_set_speed(gmac, speed); } +static int +ipq806x_gmac_configure_qsgmii_pcs_speed(struct ipq806x_gmac *gmac) +{ + struct platform_device *pdev = gmac->pdev; + struct device *dev = &pdev->dev; + struct device_node *dn; + int link_speed; + int val = 0; + int ret; + + /* Some bootloader may apply wrong configuration and cause + * not functioning port. If fixed link is not set, + * reset the force speed bit. + */ + if (!of_phy_is_fixed_link(pdev->dev.of_node)) + goto write; + + dn = of_get_child_by_name(pdev->dev.of_node, "fixed-link"); + ret = of_property_read_u32(dn, "speed", &link_speed); + of_node_put(dn); + if (ret) { + dev_err(dev, "found fixed-link node with no speed"); + return ret; + } + + val = QSGMII_PCS_CH_SPEED_FORCE; + + switch (link_speed) { + case SPEED_1000: + val |= QSGMII_PCS_CH_SPEED_1000; + break; + case SPEED_100: + val |= QSGMII_PCS_CH_SPEED_100; + break; + case SPEED_10: + val |= QSGMII_PCS_CH_SPEED_10; + break; + } + +write: + regmap_update_bits(gmac->qsgmii_csr, QSGMII_PCS_ALL_CH_CTL, + QSGMII_PCS_CH_SPEED_MASK << + QSGMII_PCS_CH_SPEED_SHIFT(gmac->id), + val << + QSGMII_PCS_CH_SPEED_SHIFT(gmac->id)); + + return 0; +} + +static const struct soc_device_attribute ipq806x_gmac_soc_v1[] = { + { + .revision = "1.*", + }, + { + /* sentinel */ + } +}; + +static int +ipq806x_gmac_configure_qsgmii_params(struct ipq806x_gmac *gmac) +{ + struct platform_device *pdev = gmac->pdev; + const struct soc_device_attribute *soc; + struct device *dev = &pdev->dev; + u32 qsgmii_param; + + switch (gmac->id) { + case 1: + soc = soc_device_match(ipq806x_gmac_soc_v1); + + if (soc) + qsgmii_param = QSGMII_PHY_TX_DRV_AMP(0xc) | + QSGMII_PHY_TX_SLEW(0x2) | + QSGMII_PHY_DEEMPHASIS_LVL(0x2); + else + qsgmii_param = QSGMII_PHY_TX_DRV_AMP(0xd) | + QSGMII_PHY_TX_SLEW(0x0) | + QSGMII_PHY_DEEMPHASIS_LVL(0x0); + + qsgmii_param |= QSGMII_PHY_RX_DC_BIAS(0x2); + break; + case 2: + case 3: + qsgmii_param = QSGMII_PHY_RX_DC_BIAS(0x3) | + QSGMII_PHY_TX_DRV_AMP(0xc); + break; + default: /* gmac 0 can't be set in SGMII mode */ + dev_err(dev, "gmac id %d can't be in SGMII mode", gmac->id); + return -EINVAL; + } + + /* Common params across all gmac id */ + qsgmii_param |= QSGMII_PHY_CDR_EN | + QSGMII_PHY_RX_FRONT_EN | + QSGMII_PHY_RX_SIGNAL_DETECT_EN | + QSGMII_PHY_TX_DRIVER_EN | + QSGMII_PHY_QSGMII_EN | + QSGMII_PHY_PHASE_LOOP_GAIN(0x4) | + QSGMII_PHY_RX_INPUT_EQU(0x1) | + QSGMII_PHY_CDR_PI_SLEW(0x2); + + regmap_write(gmac->qsgmii_csr, QSGMII_PHY_SGMII_CTL(gmac->id), + qsgmii_param); + + return 0; +} + static int ipq806x_gmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; @@ -328,17 +457,13 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) regmap_write(gmac->nss_common, NSS_COMMON_CLK_GATE, val); if (gmac->phy_mode == PHY_INTERFACE_MODE_SGMII) { - regmap_write(gmac->qsgmii_csr, QSGMII_PHY_SGMII_CTL(gmac->id), - QSGMII_PHY_CDR_EN | - QSGMII_PHY_RX_FRONT_EN | - QSGMII_PHY_RX_SIGNAL_DETECT_EN | - QSGMII_PHY_TX_DRIVER_EN | - QSGMII_PHY_QSGMII_EN | - 0x4ul << QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET | - 0x3ul << QSGMII_PHY_RX_DC_BIAS_OFFSET | - 0x1ul << QSGMII_PHY_RX_INPUT_EQU_OFFSET | - 0x2ul << QSGMII_PHY_CDR_PI_SLEW_OFFSET | - 0xCul << QSGMII_PHY_TX_DRV_AMP_OFFSET); + err = ipq806x_gmac_configure_qsgmii_params(gmac); + if (err) + goto err_remove_config_dt; + + err = ipq806x_gmac_configure_qsgmii_pcs_speed(gmac); + if (err) + goto err_remove_config_dt; } plat_dat->has_gmac = true; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d1a7cf4567bc..fe263cad8248 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1128,18 +1128,20 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) static int stmmac_init_phy(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - struct device_node *node; + struct fwnode_handle *fwnode; int ret; - node = priv->plat->phylink_node; + fwnode = of_fwnode_handle(priv->plat->phylink_node); + if (!fwnode) + fwnode = dev_fwnode(priv->device); - if (node) - ret = phylink_of_phy_connect(priv->phylink, node, 0); + if (fwnode) + ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0); /* Some DT bindings do not set-up the PHY handle. Let's try to * manually parse it */ - if (!node || ret) { + if (!fwnode || ret) { int addr = priv->plat->phy_addr; struct phy_device *phydev; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 03d3d1f7aa4b..5f177ea80725 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -434,9 +434,11 @@ int stmmac_mdio_register(struct net_device *ndev) int err = 0; struct mii_bus *new_bus; struct stmmac_priv *priv = netdev_priv(ndev); + struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node); struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data; struct device_node *mdio_node = priv->plat->mdio_node; struct device *dev = ndev->dev.parent; + struct fwnode_handle *fixed_node; int addr, found, max_addr; if (!mdio_bus_data) @@ -490,6 +492,18 @@ int stmmac_mdio_register(struct net_device *ndev) if (priv->plat->has_xgmac) stmmac_xgmac2_mdio_read(new_bus, 0, MII_ADDR_C45); + /* If fixed-link is set, skip PHY scanning */ + if (!fwnode) + fwnode = dev_fwnode(priv->device); + + if (fwnode) { + fixed_node = fwnode_get_named_child_node(fwnode, "fixed-link"); + if (fixed_node) { + fwnode_handle_put(fixed_node); + goto bus_register_done; + } + } + if (priv->plat->phy_node || mdio_node) goto bus_register_done; diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 9cfe84319ee4..4e46974a69ec 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -823,7 +823,7 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell) /* Now update the scratch registers for GPI protocol */ gpi = &scr.gpi; - gpi->max_outstanding_tre = gsi_channel_trans_tre_max(gsi, channel_id) * + gpi->max_outstanding_tre = channel->trans_tre_max * GSI_RING_ELEMENT_SIZE; gpi->outstanding_threshold = 2 * GSI_RING_ELEMENT_SIZE; @@ -991,75 +991,66 @@ void gsi_resume(struct gsi *gsi) enable_irq(gsi->irq); } -/** - * gsi_channel_tx_queued() - Report queued TX transfers for a channel - * @channel: Channel for which to report - * - * Report to the network stack the number of bytes and transactions that - * have been queued to hardware since last call. This and the next function - * supply information used by the network stack for throttling. - * - * For each channel we track the number of transactions used and bytes of - * data those transactions represent. We also track what those values are - * each time this function is called. Subtracting the two tells us - * the number of bytes and transactions that have been added between - * successive calls. - * - * Calling this each time we ring the channel doorbell allows us to - * provide accurate information to the network stack about how much - * work we've given the hardware at any point in time. - */ -void gsi_channel_tx_queued(struct gsi_channel *channel) +void gsi_trans_tx_committed(struct gsi_trans *trans) { + struct gsi_channel *channel = &trans->gsi->channel[trans->channel_id]; + + channel->trans_count++; + channel->byte_count += trans->len; + + trans->trans_count = channel->trans_count; + trans->byte_count = channel->byte_count; +} + +void gsi_trans_tx_queued(struct gsi_trans *trans) +{ + u32 channel_id = trans->channel_id; + struct gsi *gsi = trans->gsi; + struct gsi_channel *channel; u32 trans_count; u32 byte_count; + channel = &gsi->channel[channel_id]; + byte_count = channel->byte_count - channel->queued_byte_count; trans_count = channel->trans_count - channel->queued_trans_count; channel->queued_byte_count = channel->byte_count; channel->queued_trans_count = channel->trans_count; - ipa_gsi_channel_tx_queued(channel->gsi, gsi_channel_id(channel), - trans_count, byte_count); + ipa_gsi_channel_tx_queued(gsi, channel_id, trans_count, byte_count); } /** - * gsi_channel_tx_update() - Report completed TX transfers - * @channel: Channel that has completed transmitting packets - * @trans: Last transation known to be complete + * gsi_trans_tx_completed() - Report completed TX transactions + * @trans: TX channel transaction that has completed * - * Compute the number of transactions and bytes that have been transferred - * over a TX channel since the given transaction was committed. Report this - * information to the network stack. + * Report that a transaction on a TX channel has completed. At the time a + * transaction is committed, we record *in the transaction* its channel's + * committed transaction and byte counts. Transactions are completed in + * order, and the difference between the channel's byte/transaction count + * when the transaction was committed and when it completes tells us + * exactly how much data has been transferred while the transaction was + * pending. * - * At the time a transaction is committed, we record its channel's - * committed transaction and byte counts *in the transaction*. - * Completions are signaled by the hardware with an interrupt, and - * we can determine the latest completed transaction at that time. - * - * The difference between the byte/transaction count recorded in - * the transaction and the count last time we recorded a completion - * tells us exactly how much data has been transferred between - * completions. - * - * Calling this each time we learn of a newly-completed transaction - * allows us to provide accurate information to the network stack - * about how much work has been completed by the hardware at a given - * point in time. + * We report this information to the network stack, which uses it to manage + * the rate at which data is sent to hardware. */ -static void -gsi_channel_tx_update(struct gsi_channel *channel, struct gsi_trans *trans) +static void gsi_trans_tx_completed(struct gsi_trans *trans) { - u64 byte_count = trans->byte_count + trans->len; - u64 trans_count = trans->trans_count + 1; + u32 channel_id = trans->channel_id; + struct gsi *gsi = trans->gsi; + struct gsi_channel *channel; + u32 trans_count; + u32 byte_count; + + channel = &gsi->channel[channel_id]; + trans_count = trans->trans_count - channel->compl_trans_count; + byte_count = trans->byte_count - channel->compl_byte_count; - byte_count -= channel->compl_byte_count; - channel->compl_byte_count += byte_count; - trans_count -= channel->compl_trans_count; channel->compl_trans_count += trans_count; + channel->compl_byte_count += byte_count; - ipa_gsi_channel_tx_completed(channel->gsi, gsi_channel_id(channel), - trans_count, byte_count); + ipa_gsi_channel_tx_completed(gsi, channel_id, trans_count, byte_count); } /* Channel control interrupt handler */ @@ -1327,28 +1318,45 @@ static int gsi_irq_init(struct gsi *gsi, struct platform_device *pdev) } /* Return the transaction associated with a transfer completion event */ -static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel, - struct gsi_event *event) +static struct gsi_trans * +gsi_event_trans(struct gsi *gsi, struct gsi_event *event) { + u32 channel_id = event->chid; + struct gsi_channel *channel; + struct gsi_trans *trans; u32 tre_offset; u32 tre_index; + channel = &gsi->channel[channel_id]; + if (WARN(!channel->gsi, "event has bad channel %u\n", channel_id)) + return NULL; + /* Event xfer_ptr records the TRE it's associated with */ tre_offset = lower_32_bits(le64_to_cpu(event->xfer_ptr)); tre_index = gsi_ring_index(&channel->tre_ring, tre_offset); - return gsi_channel_trans_mapped(channel, tre_index); + trans = gsi_channel_trans_mapped(channel, tre_index); + + if (WARN(!trans, "channel %u event with no transaction\n", channel_id)) + return NULL; + + return trans; } /** - * gsi_evt_ring_rx_update() - Record lengths of received data - * @evt_ring: Event ring associated with channel that received packets - * @index: Event index in ring reported by hardware + * gsi_evt_ring_update() - Update transaction state from hardware + * @gsi: GSI pointer + * @evt_ring_id: Event ring ID + * @index: Event index in ring reported by hardware * * Events for RX channels contain the actual number of bytes received into * the buffer. Every event has a transaction associated with it, and here * we update transactions to record their actual received lengths. * + * When an event for a TX channel arrives we use information in the + * transaction to report the number of requests and bytes have been + * transferred. + * * This function is called whenever we learn that the GSI hardware has filled * new events since the last time we checked. The ring's index field tells * the first entry in need of processing. The index provided is the @@ -1359,29 +1367,24 @@ static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel, * * Note that @index always refers to an element *within* the event ring. */ -static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index) +static void gsi_evt_ring_update(struct gsi *gsi, u32 evt_ring_id, u32 index) { - struct gsi_channel *channel = evt_ring->channel; + struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; struct gsi_ring *ring = &evt_ring->ring; - struct gsi_trans_info *trans_info; struct gsi_event *event_done; struct gsi_event *event; - struct gsi_trans *trans; - u32 trans_count = 0; - u32 byte_count = 0; u32 event_avail; u32 old_index; - trans_info = &channel->trans_info; - - /* We'll start with the oldest un-processed event. RX channels - * replenish receive buffers in single-TRE transactions, so we - * can just map that event to its transaction. Transactions - * associated with completion events are consecutive. + /* Starting with the oldest un-processed event, determine which + * transaction (and which channel) is associated with the event. + * For RX channels, update each completed transaction with the + * number of bytes that were actually received. For TX channels + * associated with a network device, report to the network stack + * the number of transfers and bytes this completion represents. */ old_index = ring->index; event = gsi_ring_virt(ring, old_index); - trans = gsi_event_trans(channel, event); /* Compute the number of events to process before we wrap, * and determine when we'll be done processing events. @@ -1389,21 +1392,28 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index) event_avail = ring->count - old_index % ring->count; event_done = gsi_ring_virt(ring, index); do { - trans->len = __le16_to_cpu(event->len); - byte_count += trans->len; - trans_count++; + struct gsi_trans *trans; + + trans = gsi_event_trans(gsi, event); + if (!trans) + return; + + if (trans->direction == DMA_FROM_DEVICE) + trans->len = __le16_to_cpu(event->len); + else + gsi_trans_tx_completed(trans); + + gsi_trans_move_complete(trans); /* Move on to the next event and transaction */ if (--event_avail) event++; else event = gsi_ring_virt(ring, 0); - trans = gsi_trans_pool_next(&trans_info->pool, trans); } while (event != event_done); - /* We record RX bytes when they are received */ - channel->byte_count += byte_count; - channel->trans_count += trans_count; + /* Tell the hardware we've handled these events */ + gsi_evt_ring_doorbell(gsi, evt_ring_id, index); } /* Initialize a ring, including allocating DMA memory for its entries */ @@ -1493,22 +1503,16 @@ static struct gsi_trans *gsi_channel_update(struct gsi_channel *channel) return NULL; /* Get the transaction for the latest completed event. */ - trans = gsi_event_trans(channel, gsi_ring_virt(ring, index - 1)); + trans = gsi_event_trans(gsi, gsi_ring_virt(ring, index - 1)); + if (!trans) + return NULL; /* For RX channels, update each completed transaction with the number * of bytes that were actually received. For TX channels, report * the number of transactions and bytes this completion represents * up the network stack. */ - if (channel->toward_ipa) - gsi_channel_tx_update(channel, trans); - else - gsi_evt_ring_rx_update(evt_ring, index); - - gsi_trans_move_complete(trans); - - /* Tell the hardware we've handled these events */ - gsi_evt_ring_doorbell(gsi, evt_ring_id, index); + gsi_evt_ring_update(gsi, evt_ring_id, index); return gsi_channel_trans_complete(channel); } @@ -2001,9 +2005,10 @@ static void gsi_channel_evt_ring_exit(struct gsi_channel *channel) gsi_evt_ring_id_free(gsi, evt_ring_id); } -static bool gsi_channel_data_valid(struct gsi *gsi, +static bool gsi_channel_data_valid(struct gsi *gsi, bool command, const struct ipa_gsi_endpoint_data *data) { + const struct gsi_channel_data *channel_data; u32 channel_id = data->channel_id; struct device *dev = gsi->dev; @@ -2019,10 +2024,24 @@ static bool gsi_channel_data_valid(struct gsi *gsi, return false; } - if (!data->channel.tlv_count || - data->channel.tlv_count > GSI_TLV_MAX) { + if (command && !data->toward_ipa) { + dev_err(dev, "command channel %u is not TX\n", channel_id); + return false; + } + + channel_data = &data->channel; + + if (!channel_data->tlv_count || + channel_data->tlv_count > GSI_TLV_MAX) { dev_err(dev, "channel %u bad tlv_count %u; must be 1..%u\n", - channel_id, data->channel.tlv_count, GSI_TLV_MAX); + channel_id, channel_data->tlv_count, GSI_TLV_MAX); + return false; + } + + if (command && IPA_COMMAND_TRANS_TRE_MAX > channel_data->tlv_count) { + dev_err(dev, "command TRE max too big for channel %u (%u > %u)\n", + channel_id, IPA_COMMAND_TRANS_TRE_MAX, + channel_data->tlv_count); return false; } @@ -2031,22 +2050,22 @@ static bool gsi_channel_data_valid(struct gsi *gsi, * gsi_channel_tre_max() is computed, tre_count has to be almost * twice the TLV FIFO size to satisfy this requirement. */ - if (data->channel.tre_count < 2 * data->channel.tlv_count - 1) { + if (channel_data->tre_count < 2 * channel_data->tlv_count - 1) { dev_err(dev, "channel %u TLV count %u exceeds TRE count %u\n", - channel_id, data->channel.tlv_count, - data->channel.tre_count); + channel_id, channel_data->tlv_count, + channel_data->tre_count); return false; } - if (!is_power_of_2(data->channel.tre_count)) { + if (!is_power_of_2(channel_data->tre_count)) { dev_err(dev, "channel %u bad tre_count %u; not power of 2\n", - channel_id, data->channel.tre_count); + channel_id, channel_data->tre_count); return false; } - if (!is_power_of_2(data->channel.event_count)) { + if (!is_power_of_2(channel_data->event_count)) { dev_err(dev, "channel %u bad event_count %u; not power of 2\n", - channel_id, data->channel.event_count); + channel_id, channel_data->event_count); return false; } @@ -2062,7 +2081,7 @@ static int gsi_channel_init_one(struct gsi *gsi, u32 tre_count; int ret; - if (!gsi_channel_data_valid(gsi, data)) + if (!gsi_channel_data_valid(gsi, command, data)) return -EINVAL; /* Worst case we need an event for every outstanding TRE */ @@ -2080,7 +2099,7 @@ static int gsi_channel_init_one(struct gsi *gsi, channel->gsi = gsi; channel->toward_ipa = data->toward_ipa; channel->command = command; - channel->tlv_count = data->channel.tlv_count; + channel->trans_tre_max = data->channel.tlv_count; channel->tre_count = tre_count; channel->event_count = data->channel.event_count; @@ -2295,13 +2314,5 @@ u32 gsi_channel_tre_max(struct gsi *gsi, u32 channel_id) struct gsi_channel *channel = &gsi->channel[channel_id]; /* Hardware limit is channel->tre_count - 1 */ - return channel->tre_count - (channel->tlv_count - 1); -} - -/* Returns the maximum number of TREs in a single transaction for a channel */ -u32 gsi_channel_trans_tre_max(struct gsi *gsi, u32 channel_id) -{ - struct gsi_channel *channel = &gsi->channel[channel_id]; - - return channel->tlv_count; + return channel->tre_count - (channel->trans_tre_max - 1); } diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h index 5d66116b46b0..bad1a78a96ed 100644 --- a/drivers/net/ipa/gsi.h +++ b/drivers/net/ipa/gsi.h @@ -110,16 +110,16 @@ struct gsi_channel { bool toward_ipa; bool command; /* AP command TX channel or not */ - u8 tlv_count; /* # entries in TLV FIFO */ + u8 trans_tre_max; /* max TREs in a transaction */ u16 tre_count; u16 event_count; struct gsi_ring tre_ring; u32 evt_ring_id; + /* The following counts are used only for TX endpoints */ u64 byte_count; /* total # bytes transferred */ u64 trans_count; /* total # transactions */ - /* The following counts are used only for TX endpoints */ u64 queued_byte_count; /* last reported queued byte count */ u64 queued_trans_count; /* ...and queued trans count */ u64 compl_byte_count; /* last reported completed byte count */ @@ -189,15 +189,6 @@ void gsi_teardown(struct gsi *gsi); u32 gsi_channel_tre_max(struct gsi *gsi, u32 channel_id); /** - * gsi_channel_trans_tre_max() - Maximum TREs in a single transaction - * @gsi: GSI pointer - * @channel_id: Channel whose limit is to be returned - * - * Return: The maximum TRE count per transaction on the channel - */ -u32 gsi_channel_trans_tre_max(struct gsi *gsi, u32 channel_id); - -/** * gsi_channel_start() - Start an allocated GSI channel * @gsi: GSI pointer * @channel_id: Channel to start diff --git a/drivers/net/ipa/gsi_private.h b/drivers/net/ipa/gsi_private.h index ea333a244cf5..0b2516fa21b5 100644 --- a/drivers/net/ipa/gsi_private.h +++ b/drivers/net/ipa/gsi_private.h @@ -16,9 +16,6 @@ struct gsi_channel; #define GSI_RING_ELEMENT_SIZE 16 /* bytes; must be a power of 2 */ -/* Return the entry that follows one provided in a transaction pool */ -void *gsi_trans_pool_next(struct gsi_trans_pool *pool, void *element); - /** * gsi_trans_move_complete() - Mark a GSI transaction completed * @trans: Transaction to commit @@ -105,14 +102,21 @@ void gsi_channel_doorbell(struct gsi_channel *channel); void *gsi_ring_virt(struct gsi_ring *ring, u32 index); /** - * gsi_channel_tx_queued() - Report the number of bytes queued to hardware - * @channel: Channel whose bytes have been queued + * gsi_trans_tx_committed() - Record bytes committed for transmit + * @trans: TX endpoint transaction being committed + * + * Report that a TX transaction has been committed. It updates some + * statistics used to manage transmit rates. + */ +void gsi_trans_tx_committed(struct gsi_trans *trans); + +/** + * gsi_trans_tx_queued() - Report a queued TX channel transaction + * @trans: Transaction being passed to hardware * - * This arranges for the the number of transactions and bytes for - * transfer that have been queued to hardware to be reported. It - * passes this information up the network stack so it can be used to - * throttle transmissions. + * Report to the network stack that a TX transaction is being supplied + * to the hardware. */ -void gsi_channel_tx_queued(struct gsi_channel *channel); +void gsi_trans_tx_queued(struct gsi_trans *trans); #endif /* _GSI_PRIVATE_H_ */ diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c index 55f8fe7d2668..29496ca15825 100644 --- a/drivers/net/ipa/gsi_trans.c +++ b/drivers/net/ipa/gsi_trans.c @@ -214,26 +214,14 @@ void *gsi_trans_pool_alloc_dma(struct gsi_trans_pool *pool, dma_addr_t *addr) return pool->base + offset; } -/* Return the pool element that immediately follows the one given. - * This only works done if elements are allocated one at a time. - */ -void *gsi_trans_pool_next(struct gsi_trans_pool *pool, void *element) +/* Map a TRE ring entry index to the transaction it is associated with */ +static void gsi_trans_map(struct gsi_trans *trans, u32 index) { - void *end = pool->base + pool->count * pool->size; - - WARN_ON(element < pool->base); - WARN_ON(element >= end); - WARN_ON(pool->max_alloc != 1); - - element += pool->size; + struct gsi_channel *channel = &trans->gsi->channel[trans->channel_id]; - return element < end ? element : pool->base; -} + /* The completion event will indicate the last TRE used */ + index += trans->used_count - 1; -/* Map a given ring entry index to the transaction associated with it */ -static void gsi_channel_trans_map(struct gsi_channel *channel, u32 index, - struct gsi_trans *trans) -{ /* Note: index *must* be used modulo the ring count here */ channel->trans_info.map[index % channel->tre_ring.count] = trans; } @@ -340,7 +328,7 @@ struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id, struct gsi_trans_info *trans_info; struct gsi_trans *trans; - if (WARN_ON(tre_count > gsi_channel_trans_tre_max(gsi, channel_id))) + if (WARN_ON(tre_count > channel->trans_tre_max)) return NULL; trans_info = &channel->trans_info; @@ -351,11 +339,11 @@ struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id, if (!gsi_trans_tre_reserve(trans_info, tre_count)) return NULL; - /* Allocate and initialize non-zero fields in the the transaction */ + /* Allocate and initialize non-zero fields in the transaction */ trans = gsi_trans_pool_alloc(&trans_info->pool, 1); trans->gsi = gsi; trans->channel_id = channel_id; - trans->tre_count = tre_count; + trans->rsvd_count = tre_count; init_completion(&trans->completion); /* Allocate the scatterlist and (if requested) info entries. */ @@ -405,17 +393,17 @@ void gsi_trans_free(struct gsi_trans *trans) /* Releasing the reserved TREs implicitly frees the sgl[] and * (if present) info[] arrays, plus the transaction itself. */ - gsi_trans_tre_release(trans_info, trans->tre_count); + gsi_trans_tre_release(trans_info, trans->rsvd_count); } /* Add an immediate command to a transaction */ void gsi_trans_cmd_add(struct gsi_trans *trans, void *buf, u32 size, dma_addr_t addr, enum ipa_cmd_opcode opcode) { - u32 which = trans->used++; + u32 which = trans->used_count++; struct scatterlist *sg; - WARN_ON(which >= trans->tre_count); + WARN_ON(which >= trans->rsvd_count); /* Commands are quite different from data transfer requests. * Their payloads come from a pool whose memory is allocated @@ -446,9 +434,9 @@ int gsi_trans_page_add(struct gsi_trans *trans, struct page *page, u32 size, struct scatterlist *sg = &trans->sgl[0]; int ret; - if (WARN_ON(trans->tre_count != 1)) + if (WARN_ON(trans->rsvd_count != 1)) return -EINVAL; - if (WARN_ON(trans->used)) + if (WARN_ON(trans->used_count)) return -EINVAL; sg_set_page(sg, page, size, offset); @@ -456,7 +444,7 @@ int gsi_trans_page_add(struct gsi_trans *trans, struct page *page, u32 size, if (!ret) return -ENOMEM; - trans->used++; /* Transaction now owns the (DMA mapped) page */ + trans->used_count++; /* Transaction now owns the (DMA mapped) page */ return 0; } @@ -465,25 +453,26 @@ int gsi_trans_page_add(struct gsi_trans *trans, struct page *page, u32 size, int gsi_trans_skb_add(struct gsi_trans *trans, struct sk_buff *skb) { struct scatterlist *sg = &trans->sgl[0]; - u32 used; + u32 used_count; int ret; - if (WARN_ON(trans->tre_count != 1)) + if (WARN_ON(trans->rsvd_count != 1)) return -EINVAL; - if (WARN_ON(trans->used)) + if (WARN_ON(trans->used_count)) return -EINVAL; /* skb->len will not be 0 (checked early) */ ret = skb_to_sgvec(skb, sg, 0, skb->len); if (ret < 0) return ret; - used = ret; + used_count = ret; - ret = dma_map_sg(trans->gsi->dev, sg, used, trans->direction); + ret = dma_map_sg(trans->gsi->dev, sg, used_count, trans->direction); if (!ret) return -ENOMEM; - trans->used += used; /* Transaction now owns the (DMA mapped) skb */ + /* Transaction now owns the (DMA mapped) skb */ + trans->used_count += used_count; return 0; } @@ -549,7 +538,7 @@ static void gsi_trans_tre_fill(struct gsi_tre *dest_tre, dma_addr_t addr, static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db) { struct gsi_channel *channel = &trans->gsi->channel[trans->channel_id]; - struct gsi_ring *ring = &channel->tre_ring; + struct gsi_ring *tre_ring = &channel->tre_ring; enum ipa_cmd_opcode opcode = IPA_CMD_NONE; bool bei = channel->toward_ipa; struct gsi_tre *dest_tre; @@ -559,7 +548,7 @@ static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db) u32 avail; u32 i; - WARN_ON(!trans->used); + WARN_ON(!trans->used_count); /* Consume the entries. If we cross the end of the ring while * filling them we'll switch to the beginning to finish. @@ -567,35 +556,30 @@ static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db) * transfer request, whose opcode is IPA_CMD_NONE. */ cmd_opcode = channel->command ? &trans->cmd_opcode[0] : NULL; - avail = ring->count - ring->index % ring->count; - dest_tre = gsi_ring_virt(ring, ring->index); - for_each_sg(trans->sgl, sg, trans->used, i) { - bool last_tre = i == trans->used - 1; + avail = tre_ring->count - tre_ring->index % tre_ring->count; + dest_tre = gsi_ring_virt(tre_ring, tre_ring->index); + for_each_sg(trans->sgl, sg, trans->used_count, i) { + bool last_tre = i == trans->used_count - 1; dma_addr_t addr = sg_dma_address(sg); u32 len = sg_dma_len(sg); byte_count += len; if (!avail--) - dest_tre = gsi_ring_virt(ring, 0); + dest_tre = gsi_ring_virt(tre_ring, 0); if (cmd_opcode) opcode = *cmd_opcode++; gsi_trans_tre_fill(dest_tre, addr, len, last_tre, bei, opcode); dest_tre++; } - ring->index += trans->used; - - if (channel->toward_ipa) { - /* We record TX bytes when they are sent */ - trans->len = byte_count; - trans->trans_count = channel->trans_count; - trans->byte_count = channel->byte_count; - channel->trans_count++; - channel->byte_count += byte_count; - } + /* Associate the TRE with the transaction */ + gsi_trans_map(trans, tre_ring->index); - /* Associate the last TRE with the transaction */ - gsi_channel_trans_map(channel, ring->index - 1, trans); + tre_ring->index += trans->used_count; + + trans->len = byte_count; + if (channel->toward_ipa) + gsi_trans_tx_committed(trans); gsi_trans_move_pending(trans); @@ -603,7 +587,7 @@ static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db) if (ring_db || !atomic_read(&channel->trans_info.tre_avail)) { /* Report what we're handing off to hardware for TX channels */ if (channel->toward_ipa) - gsi_channel_tx_queued(channel); + gsi_trans_tx_queued(trans); gsi_channel_doorbell(channel); } } @@ -611,7 +595,7 @@ static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db) /* Commit a GSI transaction */ void gsi_trans_commit(struct gsi_trans *trans, bool ring_db) { - if (trans->used) + if (trans->used_count) __gsi_trans_commit(trans, ring_db); else gsi_trans_free(trans); @@ -620,7 +604,7 @@ void gsi_trans_commit(struct gsi_trans *trans, bool ring_db) /* Commit a GSI transaction and wait for it to complete */ void gsi_trans_commit_wait(struct gsi_trans *trans) { - if (!trans->used) + if (!trans->used_count) goto out_trans_free; refcount_inc(&trans->refcount); @@ -638,7 +622,7 @@ void gsi_trans_complete(struct gsi_trans *trans) { /* If the entire SGL was mapped when added, unmap it now */ if (trans->direction != DMA_NONE) - dma_unmap_sg(trans->gsi->dev, trans->sgl, trans->used, + dma_unmap_sg(trans->gsi->dev, trans->sgl, trans->used_count, trans->direction); ipa_gsi_trans_complete(trans); @@ -675,7 +659,7 @@ void gsi_channel_trans_cancel_pending(struct gsi_channel *channel) int gsi_trans_read_byte(struct gsi *gsi, u32 channel_id, dma_addr_t addr) { struct gsi_channel *channel = &gsi->channel[channel_id]; - struct gsi_ring *ring = &channel->tre_ring; + struct gsi_ring *tre_ring = &channel->tre_ring; struct gsi_trans_info *trans_info; struct gsi_tre *dest_tre; @@ -685,12 +669,12 @@ int gsi_trans_read_byte(struct gsi *gsi, u32 channel_id, dma_addr_t addr) if (!gsi_trans_tre_reserve(trans_info, 1)) return -EBUSY; - /* Now fill the the reserved TRE and tell the hardware */ + /* Now fill the reserved TRE and tell the hardware */ - dest_tre = gsi_ring_virt(ring, ring->index); + dest_tre = gsi_ring_virt(tre_ring, tre_ring->index); gsi_trans_tre_fill(dest_tre, addr, 1, true, false, IPA_CMD_NONE); - ring->index++; + tre_ring->index++; gsi_channel_doorbell(channel); return 0; @@ -745,14 +729,10 @@ int gsi_channel_trans_init(struct gsi *gsi, u32 channel_id) * element is used to fill a single TRE when the transaction is * committed. So we need as many scatterlist elements as the * maximum number of TREs that can be outstanding. - * - * All TREs in a transaction must fit within the channel's TLV FIFO. - * A transaction on a channel can allocate as many TREs as that but - * no more. */ ret = gsi_trans_pool_init(&trans_info->sg_pool, sizeof(struct scatterlist), - tre_max, channel->tlv_count); + tre_max, channel->trans_tre_max); if (ret) goto err_trans_pool_exit; diff --git a/drivers/net/ipa/gsi_trans.h b/drivers/net/ipa/gsi_trans.h index 020c3b32de1d..7084507830c2 100644 --- a/drivers/net/ipa/gsi_trans.h +++ b/drivers/net/ipa/gsi_trans.h @@ -33,9 +33,9 @@ struct gsi_trans_pool; * @gsi: GSI pointer * @channel_id: Channel number transaction is associated with * @cancelled: If set by the core code, transaction was cancelled - * @tre_count: Number of TREs reserved for this transaction - * @used: Number of TREs *used* (could be less than tre_count) - * @len: Total # of transfer bytes represented in sgl[] (set by core) + * @rsvd_count: Number of TREs reserved for this transaction + * @used_count: Number of TREs *used* (could be less than rsvd_count) + * @len: Number of bytes sent or received by the transaction * @data: Preserved but not touched by the core transaction code * @cmd_opcode: Array of command opcodes (command channel only) * @sgl: An array of scatter/gather entries managed by core code @@ -45,8 +45,9 @@ struct gsi_trans_pool; * @byte_count: TX channel byte count recorded when transaction committed * @trans_count: Channel transaction count when committed (for BQL accounting) * - * The size used for some fields in this structure were chosen to ensure - * the full structure size is no larger than 128 bytes. + * The @len field is set when the transaction is committed. For RX + * transactions it is updated later to reflect the actual number of bytes + * received. */ struct gsi_trans { struct list_head links; /* gsi_channel lists */ @@ -56,8 +57,8 @@ struct gsi_trans { bool cancelled; /* true if transaction was cancelled */ - u8 tre_count; /* # TREs requested */ - u8 used; /* # entries used in sgl[] */ + u8 rsvd_count; /* # TREs requested */ + u8 used_count; /* # entries used in sgl[] */ u32 len; /* total # bytes across sgl[] */ union { diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index e58cd4478fd3..6dea40259b60 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -353,13 +353,13 @@ int ipa_cmd_pool_init(struct gsi_channel *channel, u32 tre_max) /* This is as good a place as any to validate build constants */ ipa_cmd_validate_build(); - /* Even though command payloads are allocated one at a time, - * a single transaction can require up to tlv_count of them, - * so we treat them as if that many can be allocated at once. + /* Command payloads are allocated one at a time, but a single + * transaction can require up to the maximum supported by the + * channel; treat them as if they were allocated all at once. */ return gsi_trans_pool_init_dma(dev, &trans_info->cmd_pool, sizeof(union ipa_cmd_payload), - tre_max, channel->tlv_count); + tre_max, channel->trans_tre_max); } void ipa_cmd_pool_exit(struct gsi_channel *channel) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index d3b3255ac3d1..66d2bfdf9e42 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1020,7 +1020,7 @@ int ipa_endpoint_skb_tx(struct ipa_endpoint *endpoint, struct sk_buff *skb) * If not, see if we can linearize it before giving up. */ nr_frags = skb_shinfo(skb)->nr_frags; - if (1 + nr_frags > endpoint->trans_tre_max) { + if (nr_frags > endpoint->skb_frag_max) { if (skb_linearize(skb)) return -E2BIG; nr_frags = 0; @@ -1368,18 +1368,14 @@ static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint, } } -/* Complete a TX transaction, command or from ipa_endpoint_skb_tx() */ -static void ipa_endpoint_tx_complete(struct ipa_endpoint *endpoint, - struct gsi_trans *trans) -{ -} - -/* Complete transaction initiated in ipa_endpoint_replenish_one() */ -static void ipa_endpoint_rx_complete(struct ipa_endpoint *endpoint, - struct gsi_trans *trans) +void ipa_endpoint_trans_complete(struct ipa_endpoint *endpoint, + struct gsi_trans *trans) { struct page *page; + if (endpoint->toward_ipa) + return; + if (trans->cancelled) goto done; @@ -1393,15 +1389,6 @@ done: ipa_endpoint_replenish(endpoint); } -void ipa_endpoint_trans_complete(struct ipa_endpoint *endpoint, - struct gsi_trans *trans) -{ - if (endpoint->toward_ipa) - ipa_endpoint_tx_complete(endpoint, trans); - else - ipa_endpoint_rx_complete(endpoint, trans); -} - void ipa_endpoint_trans_release(struct ipa_endpoint *endpoint, struct gsi_trans *trans) { @@ -1721,7 +1708,7 @@ static void ipa_endpoint_setup_one(struct ipa_endpoint *endpoint) if (endpoint->ee_id != GSI_EE_AP) return; - endpoint->trans_tre_max = gsi_channel_trans_tre_max(gsi, channel_id); + endpoint->skb_frag_max = gsi->channel[channel_id].trans_tre_max - 1; if (!endpoint->toward_ipa) { /* RX transactions require a single TRE, so the maximum * backlog is the same as the maximum outstanding TREs. diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h index 01790c60bee8..28e0a7386fd7 100644 --- a/drivers/net/ipa/ipa_endpoint.h +++ b/drivers/net/ipa/ipa_endpoint.h @@ -142,7 +142,7 @@ enum ipa_replenish_flag { * @endpoint_id: IPA endpoint number * @toward_ipa: Endpoint direction (true = TX, false = RX) * @config: Default endpoint configuration - * @trans_tre_max: Maximum number of TRE descriptors per transaction + * @skb_frag_max: Maximum allowed number of TX SKB fragments * @evt_ring_id: GSI event ring used by the endpoint * @netdev: Network device pointer, if endpoint uses one * @replenish_flags: Replenishing state flags @@ -157,7 +157,7 @@ struct ipa_endpoint { bool toward_ipa; struct ipa_endpoint_config config; - u32 trans_tre_max; + u32 skb_frag_max; /* Used for netdev TX only */ u32 evt_ring_id; /* Net device this endpoint is associated with, if any */ diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 3837c897832e..de94921cbef9 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -47,11 +47,11 @@ typedef enum { } ipvl_hdr_type; struct ipvl_pcpu_stats { - u64 rx_pkts; - u64 rx_bytes; - u64 rx_mcast; - u64 tx_pkts; - u64 tx_bytes; + u64_stats_t rx_pkts; + u64_stats_t rx_bytes; + u64_stats_t rx_mcast; + u64_stats_t tx_pkts; + u64_stats_t tx_bytes; struct u64_stats_sync syncp; u32 rx_errs; u32 tx_drps; diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 6ffb27419e64..dfeb5b392e64 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -19,10 +19,10 @@ void ipvlan_count_rx(const struct ipvl_dev *ipvlan, pcptr = this_cpu_ptr(ipvlan->pcpu_stats); u64_stats_update_begin(&pcptr->syncp); - pcptr->rx_pkts++; - pcptr->rx_bytes += len; + u64_stats_inc(&pcptr->rx_pkts); + u64_stats_add(&pcptr->rx_bytes, len); if (mcast) - pcptr->rx_mcast++; + u64_stats_inc(&pcptr->rx_mcast); u64_stats_update_end(&pcptr->syncp); } else { this_cpu_inc(ipvlan->pcpu_stats->rx_errs); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index aa28a29e228c..49ba8a50dfb1 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -224,8 +224,8 @@ static netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb, pcptr = this_cpu_ptr(ipvlan->pcpu_stats); u64_stats_update_begin(&pcptr->syncp); - pcptr->tx_pkts++; - pcptr->tx_bytes += skblen; + u64_stats_inc(&pcptr->tx_pkts); + u64_stats_add(&pcptr->tx_bytes, skblen); u64_stats_update_end(&pcptr->syncp); } else { this_cpu_inc(ipvlan->pcpu_stats->tx_drps); @@ -300,11 +300,11 @@ static void ipvlan_get_stats64(struct net_device *dev, pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx); do { strt= u64_stats_fetch_begin_irq(&pcptr->syncp); - rx_pkts = pcptr->rx_pkts; - rx_bytes = pcptr->rx_bytes; - rx_mcast = pcptr->rx_mcast; - tx_pkts = pcptr->tx_pkts; - tx_bytes = pcptr->tx_bytes; + rx_pkts = u64_stats_read(&pcptr->rx_pkts); + rx_bytes = u64_stats_read(&pcptr->rx_bytes); + rx_mcast = u64_stats_read(&pcptr->rx_mcast); + tx_pkts = u64_stats_read(&pcptr->tx_pkts); + tx_bytes = u64_stats_read(&pcptr->tx_bytes); } while (u64_stats_fetch_retry_irq(&pcptr->syncp, strt)); @@ -315,8 +315,8 @@ static void ipvlan_get_stats64(struct net_device *dev, s->tx_bytes += tx_bytes; /* u32 values are updated without syncp protection. */ - rx_errs += pcptr->rx_errs; - tx_drps += pcptr->tx_drps; + rx_errs += READ_ONCE(pcptr->rx_errs); + tx_drps += READ_ONCE(pcptr->tx_drps); } s->rx_errors = rx_errs; s->rx_dropped = rx_errs; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 817577e713d7..c881e1bf6f6e 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -523,8 +523,8 @@ static void count_tx(struct net_device *dev, int ret, int len) struct pcpu_sw_netstats *stats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&stats->syncp); - stats->tx_packets++; - stats->tx_bytes += len; + u64_stats_inc(&stats->tx_packets); + u64_stats_add(&stats->tx_bytes, len); u64_stats_update_end(&stats->syncp); } } @@ -825,8 +825,8 @@ static void count_rx(struct net_device *dev, int len) struct pcpu_sw_netstats *stats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&stats->syncp); - stats->rx_packets++; - stats->rx_bytes += len; + u64_stats_inc(&stats->rx_packets); + u64_stats_add(&stats->rx_bytes, len); u64_stats_update_end(&stats->syncp); } @@ -3462,7 +3462,7 @@ static int macsec_dev_init(struct net_device *dev) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); /* Get macsec's reference to real_dev */ - dev_hold_track(real_dev, &macsec->dev_tracker, GFP_KERNEL); + netdev_hold(real_dev, &macsec->dev_tracker, GFP_KERNEL); return 0; } @@ -3710,7 +3710,7 @@ static void macsec_free_netdev(struct net_device *dev) free_percpu(macsec->secy.tx_sc.stats); /* Get rid of the macsec's reference to real_dev */ - dev_put_track(macsec->real_dev, &macsec->dev_tracker); + netdev_put(macsec->real_dev, &macsec->dev_tracker); } static void macsec_setup(struct net_device *dev) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index eff75beb1395..1080d6ebff63 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -575,8 +575,8 @@ static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, pcpu_stats = this_cpu_ptr(vlan->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); - pcpu_stats->tx_packets++; - pcpu_stats->tx_bytes += len; + u64_stats_inc(&pcpu_stats->tx_packets); + u64_stats_add(&pcpu_stats->tx_bytes, len); u64_stats_update_end(&pcpu_stats->syncp); } else { this_cpu_inc(vlan->pcpu_stats->tx_dropped); @@ -915,7 +915,7 @@ static int macvlan_init(struct net_device *dev) port->count += 1; /* Get macvlan's reference to lowerdev */ - dev_hold_track(lowerdev, &vlan->dev_tracker, GFP_KERNEL); + netdev_hold(lowerdev, &vlan->dev_tracker, GFP_KERNEL); return 0; } @@ -949,11 +949,11 @@ static void macvlan_dev_get_stats64(struct net_device *dev, p = per_cpu_ptr(vlan->pcpu_stats, i); do { start = u64_stats_fetch_begin_irq(&p->syncp); - rx_packets = p->rx_packets; - rx_bytes = p->rx_bytes; - rx_multicast = p->rx_multicast; - tx_packets = p->tx_packets; - tx_bytes = p->tx_bytes; + rx_packets = u64_stats_read(&p->rx_packets); + rx_bytes = u64_stats_read(&p->rx_bytes); + rx_multicast = u64_stats_read(&p->rx_multicast); + tx_packets = u64_stats_read(&p->tx_packets); + tx_bytes = u64_stats_read(&p->tx_bytes); } while (u64_stats_fetch_retry_irq(&p->syncp, start)); stats->rx_packets += rx_packets; @@ -964,8 +964,8 @@ static void macvlan_dev_get_stats64(struct net_device *dev, /* rx_errors & tx_dropped are u32, updated * without syncp protection. */ - rx_errors += p->rx_errors; - tx_dropped += p->tx_dropped; + rx_errors += READ_ONCE(p->rx_errors); + tx_dropped += READ_ONCE(p->tx_dropped); } stats->rx_errors = rx_errors; stats->rx_dropped = rx_errors; @@ -1185,7 +1185,7 @@ static void macvlan_dev_free(struct net_device *dev) struct macvlan_dev *vlan = netdev_priv(dev); /* Get rid of the macvlan's reference to lowerdev */ - dev_put_track(vlan->lowerdev, &vlan->dev_tracker); + netdev_put(vlan->lowerdev, &vlan->dev_tracker); } void macvlan_common_setup(struct net_device *dev) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index ab8cd5551020..ddac61d79145 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -721,7 +721,7 @@ restart: __netpoll_cleanup(&nt->np); spin_lock_irqsave(&target_list_lock, flags); - dev_put_track(nt->np.dev, &nt->np.dev_tracker); + netdev_put(nt->np.dev, &nt->np.dev_tracker); nt->np.dev = NULL; nt->enabled = false; stopped = true; diff --git a/drivers/net/pcs/Kconfig b/drivers/net/pcs/Kconfig index 22ba7b0b476d..6289b7c765f1 100644 --- a/drivers/net/pcs/Kconfig +++ b/drivers/net/pcs/Kconfig @@ -6,8 +6,8 @@ menu "PCS device drivers" config PCS_XPCS - tristate "Synopsys DesignWare XPCS controller" - depends on MDIO_DEVICE && MDIO_BUS + tristate + select PHYLINK help This module provides helper functions for Synopsys DesignWare XPCS controllers. @@ -18,4 +18,12 @@ config PCS_LYNX This module provides helpers to phylink for managing the Lynx PCS which is part of the Layerscape and QorIQ Ethernet SERDES. +config PCS_RZN1_MIIC + tristate "Renesas RZ/N1 MII converter" + depends on OF && (ARCH_RZN1 || COMPILE_TEST) + help + This module provides a driver for the MII converter that is available + on RZ/N1 SoCs. This PCS converts MII to RMII/RGMII or can be set in + pass-through mode for MII. + endmenu diff --git a/drivers/net/pcs/Makefile b/drivers/net/pcs/Makefile index 0603d469bd57..0ff5388fcdea 100644 --- a/drivers/net/pcs/Makefile +++ b/drivers/net/pcs/Makefile @@ -5,3 +5,4 @@ pcs_xpcs-$(CONFIG_PCS_XPCS) := pcs-xpcs.o pcs-xpcs-nxp.o obj-$(CONFIG_PCS_XPCS) += pcs_xpcs.o obj-$(CONFIG_PCS_LYNX) += pcs-lynx.o +obj-$(CONFIG_PCS_RZN1_MIIC) += pcs-rzn1-miic.o diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c index fd3445374955..7d5fc7f54b2f 100644 --- a/drivers/net/pcs/pcs-lynx.c +++ b/drivers/net/pcs/pcs-lynx.c @@ -71,12 +71,10 @@ static void lynx_pcs_get_state_usxgmii(struct mdio_device *pcs, static void lynx_pcs_get_state_2500basex(struct mdio_device *pcs, struct phylink_link_state *state) { - struct mii_bus *bus = pcs->bus; - int addr = pcs->addr; int bmsr, lpa; - bmsr = mdiobus_read(bus, addr, MII_BMSR); - lpa = mdiobus_read(bus, addr, MII_LPA); + bmsr = mdiodev_read(pcs, MII_BMSR); + lpa = mdiodev_read(pcs, MII_LPA); if (bmsr < 0 || lpa < 0) { state->link = false; return; @@ -124,57 +122,39 @@ static void lynx_pcs_get_state(struct phylink_pcs *pcs, state->link, state->an_enabled, state->an_complete); } -static int lynx_pcs_config_1000basex(struct mdio_device *pcs, - unsigned int mode, - const unsigned long *advertising) +static int lynx_pcs_config_giga(struct mdio_device *pcs, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising) { - struct mii_bus *bus = pcs->bus; - int addr = pcs->addr; u32 link_timer; - int err; - - link_timer = LINK_TIMER_VAL(IEEE8023_LINK_TIMER_NS); - mdiobus_write(bus, addr, LINK_TIMER_LO, link_timer & 0xffff); - mdiobus_write(bus, addr, LINK_TIMER_HI, link_timer >> 16); - - err = mdiobus_modify(bus, addr, IF_MODE, - IF_MODE_SGMII_EN | IF_MODE_USE_SGMII_AN, - 0); - if (err) - return err; - - return phylink_mii_c22_pcs_config(pcs, mode, - PHY_INTERFACE_MODE_1000BASEX, - advertising); -} - -static int lynx_pcs_config_sgmii(struct mdio_device *pcs, unsigned int mode, - const unsigned long *advertising) -{ - struct mii_bus *bus = pcs->bus; - int addr = pcs->addr; u16 if_mode; int err; - if_mode = IF_MODE_SGMII_EN; - if (mode == MLO_AN_INBAND) { - u32 link_timer; - - if_mode |= IF_MODE_USE_SGMII_AN; - - /* Adjust link timer for SGMII */ - link_timer = LINK_TIMER_VAL(SGMII_AN_LINK_TIMER_NS); - mdiobus_write(bus, addr, LINK_TIMER_LO, link_timer & 0xffff); - mdiobus_write(bus, addr, LINK_TIMER_HI, link_timer >> 16); + if (interface == PHY_INTERFACE_MODE_1000BASEX) { + link_timer = LINK_TIMER_VAL(IEEE8023_LINK_TIMER_NS); + mdiodev_write(pcs, LINK_TIMER_LO, link_timer & 0xffff); + mdiodev_write(pcs, LINK_TIMER_HI, link_timer >> 16); + + if_mode = 0; + } else { + if_mode = IF_MODE_SGMII_EN; + if (mode == MLO_AN_INBAND) { + if_mode |= IF_MODE_USE_SGMII_AN; + + /* Adjust link timer for SGMII */ + link_timer = LINK_TIMER_VAL(SGMII_AN_LINK_TIMER_NS); + mdiodev_write(pcs, LINK_TIMER_LO, link_timer & 0xffff); + mdiodev_write(pcs, LINK_TIMER_HI, link_timer >> 16); + } } - err = mdiobus_modify(bus, addr, IF_MODE, + + err = mdiodev_modify(pcs, IF_MODE, IF_MODE_SGMII_EN | IF_MODE_USE_SGMII_AN, if_mode); if (err) return err; - return phylink_mii_c22_pcs_config(pcs, mode, PHY_INTERFACE_MODE_SGMII, - advertising); + return phylink_mii_c22_pcs_config(pcs, mode, interface, advertising); } static int lynx_pcs_config_usxgmii(struct mdio_device *pcs, unsigned int mode, @@ -204,10 +184,10 @@ static int lynx_pcs_config(struct phylink_pcs *pcs, unsigned int mode, switch (ifmode) { case PHY_INTERFACE_MODE_1000BASEX: - return lynx_pcs_config_1000basex(lynx->mdio, mode, advertising); case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_QSGMII: - return lynx_pcs_config_sgmii(lynx->mdio, mode, advertising); + return lynx_pcs_config_giga(lynx->mdio, mode, ifmode, + advertising); case PHY_INTERFACE_MODE_2500BASEX: if (phylink_autoneg_inband(mode)) { dev_err(&lynx->mdio->dev, @@ -237,9 +217,7 @@ static void lynx_pcs_an_restart(struct phylink_pcs *pcs) static void lynx_pcs_link_up_sgmii(struct mdio_device *pcs, unsigned int mode, int speed, int duplex) { - struct mii_bus *bus = pcs->bus; u16 if_mode = 0, sgmii_speed; - int addr = pcs->addr; /* The PCS needs to be configured manually only * when not operating on in-band mode @@ -269,7 +247,7 @@ static void lynx_pcs_link_up_sgmii(struct mdio_device *pcs, unsigned int mode, } if_mode |= IF_MODE_SPEED(sgmii_speed); - mdiobus_modify(bus, addr, IF_MODE, + mdiodev_modify(pcs, IF_MODE, IF_MODE_HALF_DUPLEX | IF_MODE_SPEED_MSK, if_mode); } @@ -294,8 +272,6 @@ static void lynx_pcs_link_up_2500basex(struct mdio_device *pcs, unsigned int mode, int speed, int duplex) { - struct mii_bus *bus = pcs->bus; - int addr = pcs->addr; u16 if_mode = 0; if (mode == MLO_AN_INBAND) { @@ -307,7 +283,7 @@ static void lynx_pcs_link_up_2500basex(struct mdio_device *pcs, if_mode |= IF_MODE_HALF_DUPLEX; if_mode |= IF_MODE_SPEED(SGMII_SPEED_2500); - mdiobus_modify(bus, addr, IF_MODE, + mdiodev_modify(pcs, IF_MODE, IF_MODE_HALF_DUPLEX | IF_MODE_SPEED_MSK, if_mode); } diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c new file mode 100644 index 000000000000..8f5e910f443d --- /dev/null +++ b/drivers/net/pcs/pcs-rzn1-miic.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Schneider Electric + * + * Clément Léger <clement.leger@bootlin.com> + */ + +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/mdio.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/pcs-rzn1-miic.h> +#include <linux/phylink.h> +#include <linux/pm_runtime.h> +#include <dt-bindings/net/pcs-rzn1-miic.h> + +#define MIIC_PRCMD 0x0 +#define MIIC_ESID_CODE 0x4 + +#define MIIC_MODCTRL 0x20 +#define MIIC_MODCTRL_SW_MODE GENMASK(4, 0) + +#define MIIC_CONVCTRL(port) (0x100 + (port) * 4) + +#define MIIC_CONVCTRL_CONV_SPEED GENMASK(1, 0) +#define CONV_MODE_10MBPS 0 +#define CONV_MODE_100MBPS 1 +#define CONV_MODE_1000MBPS 2 + +#define MIIC_CONVCTRL_CONV_MODE GENMASK(3, 2) +#define CONV_MODE_MII 0 +#define CONV_MODE_RMII 1 +#define CONV_MODE_RGMII 2 + +#define MIIC_CONVCTRL_FULLD BIT(8) +#define MIIC_CONVCTRL_RGMII_LINK BIT(12) +#define MIIC_CONVCTRL_RGMII_DUPLEX BIT(13) +#define MIIC_CONVCTRL_RGMII_SPEED GENMASK(15, 14) + +#define MIIC_CONVRST 0x114 +#define MIIC_CONVRST_PHYIF_RST(port) BIT(port) +#define MIIC_CONVRST_PHYIF_RST_MASK GENMASK(4, 0) + +#define MIIC_SWCTRL 0x304 +#define MIIC_SWDUPC 0x308 + +#define MIIC_MAX_NR_PORTS 5 + +#define MIIC_MODCTRL_CONF_CONV_NUM 6 +#define MIIC_MODCTRL_CONF_NONE -1 + +/** + * struct modctrl_match - Matching table entry for convctrl configuration + * See section 8.2.1 of manual. + * @mode_cfg: Configuration value for convctrl + * @conv: Configuration of ethernet port muxes. First index is SWITCH_PORTIN, + * then index 1 - 5 are CONV1 - CONV5. + */ +struct modctrl_match { + u32 mode_cfg; + u8 conv[MIIC_MODCTRL_CONF_CONV_NUM]; +}; + +static struct modctrl_match modctrl_match_table[] = { + {0x0, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_SERCOS_PORTB, MIIC_SERCOS_PORTA}}, + {0x1, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}}, + {0x2, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_ETHERCAT_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}}, + {0x3, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_SWITCH_PORTB, MIIC_SWITCH_PORTA}}, + + {0x8, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_SERCOS_PORTB, MIIC_SERCOS_PORTA}}, + {0x9, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}}, + {0xA, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_ETHERCAT_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}}, + {0xB, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_SWITCH_PORTB, MIIC_SWITCH_PORTA}}, + + {0x10, {MIIC_GMAC2_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_SERCOS_PORTB, MIIC_SERCOS_PORTA}}, + {0x11, {MIIC_GMAC2_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}}, + {0x12, {MIIC_GMAC2_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_ETHERCAT_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}}, + {0x13, {MIIC_GMAC2_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_SWITCH_PORTB, MIIC_SWITCH_PORTA}} +}; + +static const char * const conf_to_string[] = { + [MIIC_GMAC1_PORT] = "GMAC1_PORT", + [MIIC_GMAC2_PORT] = "GMAC2_PORT", + [MIIC_RTOS_PORT] = "RTOS_PORT", + [MIIC_SERCOS_PORTA] = "SERCOS_PORTA", + [MIIC_SERCOS_PORTB] = "SERCOS_PORTB", + [MIIC_ETHERCAT_PORTA] = "ETHERCAT_PORTA", + [MIIC_ETHERCAT_PORTB] = "ETHERCAT_PORTB", + [MIIC_ETHERCAT_PORTC] = "ETHERCAT_PORTC", + [MIIC_SWITCH_PORTA] = "SWITCH_PORTA", + [MIIC_SWITCH_PORTB] = "SWITCH_PORTB", + [MIIC_SWITCH_PORTC] = "SWITCH_PORTC", + [MIIC_SWITCH_PORTD] = "SWITCH_PORTD", + [MIIC_HSR_PORTA] = "HSR_PORTA", + [MIIC_HSR_PORTB] = "HSR_PORTB", +}; + +static const char *index_to_string[MIIC_MODCTRL_CONF_CONV_NUM] = { + "SWITCH_PORTIN", + "CONV1", + "CONV2", + "CONV3", + "CONV4", + "CONV5", +}; + +/** + * struct miic - MII converter structure + * @base: base address of the MII converter + * @dev: Device associated to the MII converter + * @clks: Clocks used for this device + * @nclk: Number of clocks + * @lock: Lock used for read-modify-write access + */ +struct miic { + void __iomem *base; + struct device *dev; + struct clk_bulk_data *clks; + int nclk; + spinlock_t lock; +}; + +/** + * struct miic_port - Per port MII converter struct + * @miic: backiling to MII converter structure + * @pcs: PCS structure associated to the port + * @port: port number + */ +struct miic_port { + struct miic *miic; + struct phylink_pcs pcs; + int port; +}; + +static struct miic_port *phylink_pcs_to_miic_port(struct phylink_pcs *pcs) +{ + return container_of(pcs, struct miic_port, pcs); +} + +static void miic_reg_writel(struct miic *miic, int offset, u32 value) +{ + writel(value, miic->base + offset); +} + +static u32 miic_reg_readl(struct miic *miic, int offset) +{ + return readl(miic->base + offset); +} + +static void miic_reg_rmw(struct miic *miic, int offset, u32 mask, u32 val) +{ + u32 reg; + + spin_lock(&miic->lock); + + reg = miic_reg_readl(miic, offset); + reg &= ~mask; + reg |= val; + miic_reg_writel(miic, offset, reg); + + spin_unlock(&miic->lock); +} + +static void miic_converter_enable(struct miic *miic, int port, int enable) +{ + u32 val = 0; + + if (enable) + val = MIIC_CONVRST_PHYIF_RST(port); + + miic_reg_rmw(miic, MIIC_CONVRST, MIIC_CONVRST_PHYIF_RST(port), val); +} + +static int miic_config(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising, bool permit) +{ + struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs); + struct miic *miic = miic_port->miic; + int port = miic_port->port; + u32 speed, conv_mode, val; + + switch (interface) { + case PHY_INTERFACE_MODE_RMII: + conv_mode = CONV_MODE_RMII; + speed = CONV_MODE_100MBPS; + break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + conv_mode = CONV_MODE_RGMII; + speed = CONV_MODE_1000MBPS; + break; + case PHY_INTERFACE_MODE_MII: + conv_mode = CONV_MODE_MII; + /* When in MII mode, speed should be set to 0 (which is actually + * CONV_MODE_10MBPS) + */ + speed = CONV_MODE_10MBPS; + break; + default: + return -EOPNOTSUPP; + } + + val = FIELD_PREP(MIIC_CONVCTRL_CONV_MODE, conv_mode) | + FIELD_PREP(MIIC_CONVCTRL_CONV_SPEED, speed); + + miic_reg_rmw(miic, MIIC_CONVCTRL(port), + MIIC_CONVCTRL_CONV_MODE | MIIC_CONVCTRL_CONV_SPEED, val); + miic_converter_enable(miic_port->miic, miic_port->port, 1); + + return 0; +} + +static void miic_link_up(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, int speed, int duplex) +{ + struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs); + struct miic *miic = miic_port->miic; + u32 conv_speed = 0, val = 0; + int port = miic_port->port; + + if (duplex == DUPLEX_FULL) + val |= MIIC_CONVCTRL_FULLD; + + /* No speed in MII through-mode */ + if (interface != PHY_INTERFACE_MODE_MII) { + switch (speed) { + case SPEED_1000: + conv_speed = CONV_MODE_1000MBPS; + break; + case SPEED_100: + conv_speed = CONV_MODE_100MBPS; + break; + case SPEED_10: + conv_speed = CONV_MODE_10MBPS; + break; + default: + return; + } + } + + val |= FIELD_PREP(MIIC_CONVCTRL_CONV_SPEED, conv_speed); + + miic_reg_rmw(miic, MIIC_CONVCTRL(port), + (MIIC_CONVCTRL_CONV_SPEED | MIIC_CONVCTRL_FULLD), val); +} + +static int miic_validate(struct phylink_pcs *pcs, unsigned long *supported, + const struct phylink_link_state *state) +{ + if (phy_interface_mode_is_rgmii(state->interface) || + state->interface == PHY_INTERFACE_MODE_RMII || + state->interface == PHY_INTERFACE_MODE_MII) + return 1; + + return -EINVAL; +} + +static const struct phylink_pcs_ops miic_phylink_ops = { + .pcs_validate = miic_validate, + .pcs_config = miic_config, + .pcs_link_up = miic_link_up, +}; + +struct phylink_pcs *miic_create(struct device *dev, struct device_node *np) +{ + struct platform_device *pdev; + struct miic_port *miic_port; + struct device_node *pcs_np; + struct miic *miic; + u32 port; + + if (!of_device_is_available(np)) + return ERR_PTR(-ENODEV); + + if (of_property_read_u32(np, "reg", &port)) + return ERR_PTR(-EINVAL); + + if (port > MIIC_MAX_NR_PORTS || port < 1) + return ERR_PTR(-EINVAL); + + /* The PCS pdev is attached to the parent node */ + pcs_np = of_get_parent(np); + if (!pcs_np) + return ERR_PTR(-ENODEV); + + if (!of_device_is_available(pcs_np)) { + of_node_put(pcs_np); + return ERR_PTR(-ENODEV); + } + + pdev = of_find_device_by_node(pcs_np); + of_node_put(pcs_np); + if (!pdev || !platform_get_drvdata(pdev)) + return ERR_PTR(-EPROBE_DEFER); + + miic_port = kzalloc(sizeof(*miic_port), GFP_KERNEL); + if (!miic_port) + return ERR_PTR(-ENOMEM); + + miic = platform_get_drvdata(pdev); + device_link_add(dev, miic->dev, DL_FLAG_AUTOREMOVE_CONSUMER); + + miic_port->miic = miic; + miic_port->port = port - 1; + miic_port->pcs.ops = &miic_phylink_ops; + + return &miic_port->pcs; +} +EXPORT_SYMBOL(miic_create); + +void miic_destroy(struct phylink_pcs *pcs) +{ + struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs); + + miic_converter_enable(miic_port->miic, miic_port->port, 0); + kfree(miic_port); +} +EXPORT_SYMBOL(miic_destroy); + +static int miic_init_hw(struct miic *miic, u32 cfg_mode) +{ + int port; + + /* Unlock write access to accessory registers (cf datasheet). If this + * is going to be used in conjunction with the Cortex-M3, this sequence + * will have to be moved in register write + */ + miic_reg_writel(miic, MIIC_PRCMD, 0x00A5); + miic_reg_writel(miic, MIIC_PRCMD, 0x0001); + miic_reg_writel(miic, MIIC_PRCMD, 0xFFFE); + miic_reg_writel(miic, MIIC_PRCMD, 0x0001); + + miic_reg_writel(miic, MIIC_MODCTRL, + FIELD_PREP(MIIC_MODCTRL_SW_MODE, cfg_mode)); + + for (port = 0; port < MIIC_MAX_NR_PORTS; port++) { + miic_converter_enable(miic, port, 0); + /* Disable speed/duplex control from these registers, datasheet + * says switch registers should be used to setup switch port + * speed and duplex. + */ + miic_reg_writel(miic, MIIC_SWCTRL, 0x0); + miic_reg_writel(miic, MIIC_SWDUPC, 0x0); + } + + return 0; +} + +static bool miic_modctrl_match(s8 table_val[MIIC_MODCTRL_CONF_CONV_NUM], + s8 dt_val[MIIC_MODCTRL_CONF_CONV_NUM]) +{ + int i; + + for (i = 0; i < MIIC_MODCTRL_CONF_CONV_NUM; i++) { + if (dt_val[i] == MIIC_MODCTRL_CONF_NONE) + continue; + + if (dt_val[i] != table_val[i]) + return false; + } + + return true; +} + +static void miic_dump_conf(struct device *dev, + s8 conf[MIIC_MODCTRL_CONF_CONV_NUM]) +{ + const char *conf_name; + int i; + + for (i = 0; i < MIIC_MODCTRL_CONF_CONV_NUM; i++) { + if (conf[i] != MIIC_MODCTRL_CONF_NONE) + conf_name = conf_to_string[conf[i]]; + else + conf_name = "NONE"; + + dev_err(dev, "%s: %s\n", index_to_string[i], conf_name); + } +} + +static int miic_match_dt_conf(struct device *dev, + s8 dt_val[MIIC_MODCTRL_CONF_CONV_NUM], + u32 *mode_cfg) +{ + struct modctrl_match *table_entry; + int i; + + for (i = 0; i < ARRAY_SIZE(modctrl_match_table); i++) { + table_entry = &modctrl_match_table[i]; + + if (miic_modctrl_match(table_entry->conv, dt_val)) { + *mode_cfg = table_entry->mode_cfg; + return 0; + } + } + + dev_err(dev, "Failed to apply requested configuration\n"); + miic_dump_conf(dev, dt_val); + + return -EINVAL; +} + +static int miic_parse_dt(struct device *dev, u32 *mode_cfg) +{ + s8 dt_val[MIIC_MODCTRL_CONF_CONV_NUM]; + struct device_node *np = dev->of_node; + struct device_node *conv; + u32 conf; + int port; + + memset(dt_val, MIIC_MODCTRL_CONF_NONE, sizeof(dt_val)); + + if (of_property_read_u32(np, "renesas,miic-switch-portin", &conf) == 0) + dt_val[0] = conf; + + for_each_child_of_node(np, conv) { + if (of_property_read_u32(conv, "reg", &port)) + continue; + + if (!of_device_is_available(conv)) + continue; + + if (of_property_read_u32(conv, "renesas,miic-input", &conf) == 0) + dt_val[port] = conf; + } + + return miic_match_dt_conf(dev, dt_val, mode_cfg); +} + +static int miic_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct miic *miic; + u32 mode_cfg; + int ret; + + ret = miic_parse_dt(dev, &mode_cfg); + if (ret < 0) + return ret; + + miic = devm_kzalloc(dev, sizeof(*miic), GFP_KERNEL); + if (!miic) + return -ENOMEM; + + spin_lock_init(&miic->lock); + miic->dev = dev; + miic->base = devm_platform_ioremap_resource(pdev, 0); + if (!miic->base) + return -EINVAL; + + ret = devm_pm_runtime_enable(dev); + if (ret < 0) + return ret; + + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + return ret; + + ret = miic_init_hw(miic, mode_cfg); + if (ret) + goto disable_runtime_pm; + + /* miic_create() relies on that fact that data are attached to the + * platform device to determine if the driver is ready so this needs to + * be the last thing to be done after everything is initialized + * properly. + */ + platform_set_drvdata(pdev, miic); + + return 0; + +disable_runtime_pm: + pm_runtime_put(dev); + + return ret; +} + +static int miic_remove(struct platform_device *pdev) +{ + pm_runtime_put(&pdev->dev); + + return 0; +} + +static const struct of_device_id miic_of_mtable[] = { + { .compatible = "renesas,rzn1-miic" }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, miic_of_mtable); + +static struct platform_driver miic_driver = { + .driver = { + .name = "rzn1_miic", + .suppress_bind_attrs = true, + .of_match_table = miic_of_mtable, + }, + .probe = miic_probe, + .remove = miic_remove, +}; +module_platform_driver(miic_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Renesas MII converter PCS driver"); +MODULE_AUTHOR("Clément Léger <clement.leger@bootlin.com>"); diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 4cfd05c15aee..ab0af1d2531f 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -77,6 +77,14 @@ static const int xpcs_sgmii_features[] = { __ETHTOOL_LINK_MODE_MASK_NBITS, }; +static const int xpcs_1000basex_features[] = { + ETHTOOL_LINK_MODE_Pause_BIT, + ETHTOOL_LINK_MODE_Asym_Pause_BIT, + ETHTOOL_LINK_MODE_Autoneg_BIT, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT, + __ETHTOOL_LINK_MODE_MASK_NBITS, +}; + static const int xpcs_2500basex_features[] = { ETHTOOL_LINK_MODE_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT, @@ -102,6 +110,10 @@ static const phy_interface_t xpcs_sgmii_interfaces[] = { PHY_INTERFACE_MODE_SGMII, }; +static const phy_interface_t xpcs_1000basex_interfaces[] = { + PHY_INTERFACE_MODE_1000BASEX, +}; + static const phy_interface_t xpcs_2500basex_interfaces[] = { PHY_INTERFACE_MODE_2500BASEX, PHY_INTERFACE_MODE_MAX, @@ -112,6 +124,7 @@ enum { DW_XPCS_10GKR, DW_XPCS_XLGMII, DW_XPCS_SGMII, + DW_XPCS_1000BASEX, DW_XPCS_2500BASEX, DW_XPCS_INTERFACE_MAX, }; @@ -189,6 +202,14 @@ int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val) return mdiobus_c45_write(bus, addr, dev, reg, val); } +static int xpcs_modify_changed(struct dw_xpcs *xpcs, int dev, u32 reg, + u16 mask, u16 set) +{ + u32 reg_addr = mdiobus_c45_addr(dev, reg); + + return mdiodev_modify_changed(xpcs->mdiodev, reg_addr, mask, set); +} + static int xpcs_read_vendor(struct dw_xpcs *xpcs, int dev, u32 reg) { return xpcs_read(xpcs, dev, DW_VENDOR | reg); @@ -237,6 +258,7 @@ static int xpcs_soft_reset(struct dw_xpcs *xpcs, break; case DW_AN_C37_SGMII: case DW_2500BASEX: + case DW_AN_C37_1000BASEX: dev = MDIO_MMD_VEND2; break; default: @@ -772,6 +794,68 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, unsigned int mode) return ret; } +static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs, unsigned int mode, + const unsigned long *advertising) +{ + phy_interface_t interface = PHY_INTERFACE_MODE_1000BASEX; + int ret, mdio_ctrl, adv; + bool changed = 0; + + /* According to Chap 7.12, to set 1000BASE-X C37 AN, AN must + * be disabled first:- + * 1) VR_MII_MMD_CTRL Bit(12)[AN_ENABLE] = 0b + * 2) VR_MII_AN_CTRL Bit(2:1)[PCS_MODE] = 00b (1000BASE-X C37) + */ + mdio_ctrl = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL); + if (mdio_ctrl < 0) + return mdio_ctrl; + + if (mdio_ctrl & AN_CL37_EN) { + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, + mdio_ctrl & ~AN_CL37_EN); + if (ret < 0) + return ret; + } + + ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL); + if (ret < 0) + return ret; + + ret &= ~DW_VR_MII_PCS_MODE_MASK; + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL, ret); + if (ret < 0) + return ret; + + /* Check for advertising changes and update the C45 MII ADV + * register accordingly. + */ + adv = phylink_mii_c22_pcs_encode_advertisement(interface, + advertising); + if (adv >= 0) { + ret = xpcs_modify_changed(xpcs, MDIO_MMD_VEND2, + MII_ADVERTISE, 0xffff, adv); + if (ret < 0) + return ret; + + changed = ret; + } + + /* Clear CL37 AN complete status */ + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS, 0); + if (ret < 0) + return ret; + + if (phylink_autoneg_inband(mode) && + linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, advertising)) { + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, + mdio_ctrl | AN_CL37_EN); + if (ret < 0) + return ret; + } + + return changed; +} + static int xpcs_config_2500basex(struct dw_xpcs *xpcs) { int ret; @@ -795,7 +879,7 @@ static int xpcs_config_2500basex(struct dw_xpcs *xpcs) } int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, - unsigned int mode) + unsigned int mode, const unsigned long *advertising) { const struct xpcs_compat *compat; int ret; @@ -817,6 +901,12 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, if (ret) return ret; break; + case DW_AN_C37_1000BASEX: + ret = xpcs_config_aneg_c37_1000basex(xpcs, mode, + advertising); + if (ret) + return ret; + break; case DW_2500BASEX: ret = xpcs_config_2500basex(xpcs); if (ret) @@ -843,7 +933,7 @@ static int xpcs_config(struct phylink_pcs *pcs, unsigned int mode, { struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); - return xpcs_do_config(xpcs, interface, mode); + return xpcs_do_config(xpcs, interface, mode, advertising); } static int xpcs_get_state_c73(struct dw_xpcs *xpcs, @@ -864,7 +954,7 @@ static int xpcs_get_state_c73(struct dw_xpcs *xpcs, state->link = 0; - return xpcs_do_config(xpcs, state->interface, MLO_AN_INBAND); + return xpcs_do_config(xpcs, state->interface, MLO_AN_INBAND, NULL); } if (state->an_enabled && xpcs_aneg_done_c73(xpcs, state, compat)) { @@ -921,6 +1011,29 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs, return 0; } +static int xpcs_get_state_c37_1000basex(struct dw_xpcs *xpcs, + struct phylink_link_state *state) +{ + int lpa, bmsr; + + if (state->an_enabled) { + /* Reset link state */ + state->link = false; + + lpa = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_LPA); + if (lpa < 0 || lpa & LPA_RFAULT) + return lpa; + + bmsr = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_BMSR); + if (bmsr < 0) + return bmsr; + + phylink_mii_c22_pcs_decode_state(state, bmsr, lpa); + } + + return 0; +} + static void xpcs_get_state(struct phylink_pcs *pcs, struct phylink_link_state *state) { @@ -948,6 +1061,13 @@ static void xpcs_get_state(struct phylink_pcs *pcs, ERR_PTR(ret)); } break; + case DW_AN_C37_1000BASEX: + ret = xpcs_get_state_c37_1000basex(xpcs, state); + if (ret) { + pr_err("xpcs_get_state_c37_1000basex returned %pe\n", + ERR_PTR(ret)); + } + break; default: return; } @@ -961,22 +1081,35 @@ static void xpcs_link_up_sgmii(struct dw_xpcs *xpcs, unsigned int mode, if (phylink_autoneg_inband(mode)) return; + val = mii_bmcr_encode_fixed(speed, duplex); + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, val); + if (ret) + pr_err("%s: xpcs_write returned %pe\n", __func__, ERR_PTR(ret)); +} + +static void xpcs_link_up_1000basex(struct dw_xpcs *xpcs, unsigned int mode, + int speed, int duplex) +{ + int val, ret; + + if (phylink_autoneg_inband(mode)) + return; + switch (speed) { case SPEED_1000: val = BMCR_SPEED1000; break; case SPEED_100: - val = BMCR_SPEED100; - break; case SPEED_10: - val = BMCR_SPEED10; - break; default: + pr_err("%s: speed = %d\n", __func__, speed); return; } if (duplex == DUPLEX_FULL) val |= BMCR_FULLDPLX; + else + pr_err("%s: half duplex not supported\n", __func__); ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, val); if (ret) @@ -992,9 +1125,23 @@ void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode, return xpcs_config_usxgmii(xpcs, speed); if (interface == PHY_INTERFACE_MODE_SGMII) return xpcs_link_up_sgmii(xpcs, mode, speed, duplex); + if (interface == PHY_INTERFACE_MODE_1000BASEX) + return xpcs_link_up_1000basex(xpcs, mode, speed, duplex); } EXPORT_SYMBOL_GPL(xpcs_link_up); +static void xpcs_an_restart(struct phylink_pcs *pcs) +{ + struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); + int ret; + + ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1); + if (ret >= 0) { + ret |= BMCR_ANRESTART; + xpcs_write(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, ret); + } +} + static u32 xpcs_get_id(struct dw_xpcs *xpcs) { int ret; @@ -1060,6 +1207,12 @@ static const struct xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = { .num_interfaces = ARRAY_SIZE(xpcs_sgmii_interfaces), .an_mode = DW_AN_C37_SGMII, }, + [DW_XPCS_1000BASEX] = { + .supported = xpcs_1000basex_features, + .interface = xpcs_1000basex_interfaces, + .num_interfaces = ARRAY_SIZE(xpcs_1000basex_interfaces), + .an_mode = DW_AN_C37_1000BASEX, + }, [DW_XPCS_2500BASEX] = { .supported = xpcs_2500basex_features, .interface = xpcs_2500basex_interfaces, @@ -1115,6 +1268,7 @@ static const struct phylink_pcs_ops xpcs_phylink_ops = { .pcs_validate = xpcs_validate, .pcs_config = xpcs_config, .pcs_get_state = xpcs_get_state, + .pcs_an_restart = xpcs_an_restart, .pcs_link_up = xpcs_link_up, }; diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h index 35651d32a224..770df50323a0 100644 --- a/drivers/net/pcs/pcs-xpcs.h +++ b/drivers/net/pcs/pcs-xpcs.h @@ -109,7 +109,6 @@ int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg); int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val); - int nxp_sja1105_sgmii_pma_config(struct dw_xpcs *xpcs); int nxp_sja1110_sgmii_pma_config(struct dw_xpcs *xpcs); int nxp_sja1110_2500basex_pma_config(struct dw_xpcs *xpcs); diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 9fee639ee5c8..c57a0262fb64 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -104,6 +104,8 @@ config AX88796B_PHY config BROADCOM_PHY tristate "Broadcom 54XX PHYs" select BCM_NET_PHYLIB + select BCM_NET_PHYPTP if NETWORK_PHY_TIMESTAMPING + depends on PTP_1588_CLOCK_OPTIONAL help Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464, BCM5481, BCM54810 and BCM5482 PHYs. @@ -160,6 +162,9 @@ config BCM_CYGNUS_PHY config BCM_NET_PHYLIB tristate +config BCM_NET_PHYPTP + tristate + config CICADA_PHY tristate "Cicada PHYs" help @@ -216,6 +221,8 @@ config MARVELL_88X2222_PHY config MAXLINEAR_GPHY tristate "Maxlinear Ethernet PHYs" + select POLYNOMIAL if HWMON + depends on HWMON || HWMON=n help Support for the Maxlinear GPY115, GPY211, GPY212, GPY215, GPY241, GPY245 PHYs. diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index b12b1d86fc99..f7138d3c896b 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_BCM84881_PHY) += bcm84881.o obj-$(CONFIG_BCM87XX_PHY) += bcm87xx.o obj-$(CONFIG_BCM_CYGNUS_PHY) += bcm-cygnus.o obj-$(CONFIG_BCM_NET_PHYLIB) += bcm-phy-lib.o +obj-$(CONFIG_BCM_NET_PHYPTP) += bcm-phy-ptp.o obj-$(CONFIG_BROADCOM_PHY) += broadcom.o obj-$(CONFIG_CICADA_PHY) += cicada.o obj-$(CONFIG_CORTINA_PHY) += cortina.o diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c index c7047f5d7a9b..8b7a46db30e0 100644 --- a/drivers/net/phy/aquantia_main.c +++ b/drivers/net/phy/aquantia_main.c @@ -22,6 +22,7 @@ #define PHY_ID_AQR107 0x03a1b4e0 #define PHY_ID_AQCS109 0x03a1b5c2 #define PHY_ID_AQR405 0x03a1b4b0 +#define PHY_ID_AQR113C 0x31c31c12 #define MDIO_PHYXS_VEND_IF_STATUS 0xe812 #define MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK GENMASK(7, 3) @@ -697,6 +698,24 @@ static struct phy_driver aqr_driver[] = { .handle_interrupt = aqr_handle_interrupt, .read_status = aqr_read_status, }, +{ + PHY_ID_MATCH_MODEL(PHY_ID_AQR113C), + .name = "Aquantia AQR113C", + .probe = aqr107_probe, + .config_init = aqr107_config_init, + .config_aneg = aqr_config_aneg, + .config_intr = aqr_config_intr, + .handle_interrupt = aqr_handle_interrupt, + .read_status = aqr107_read_status, + .get_tunable = aqr107_get_tunable, + .set_tunable = aqr107_set_tunable, + .suspend = aqr107_suspend, + .resume = aqr107_resume, + .get_sset_count = aqr107_get_sset_count, + .get_strings = aqr107_get_strings, + .get_stats = aqr107_get_stats, + .link_change_notify = aqr107_link_change_notify, +}, }; module_phy_driver(aqr_driver); @@ -709,6 +728,7 @@ static struct mdio_device_id __maybe_unused aqr_tbl[] = { { PHY_ID_MATCH_MODEL(PHY_ID_AQR107) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQCS109) }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR405) }, + { PHY_ID_MATCH_MODEL(PHY_ID_AQR113C) }, { } }; diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h index c3842f87c33b..9902fb182099 100644 --- a/drivers/net/phy/bcm-phy-lib.h +++ b/drivers/net/phy/bcm-phy-lib.h @@ -87,4 +87,23 @@ int bcm_phy_cable_test_start_rdb(struct phy_device *phydev); int bcm_phy_cable_test_start(struct phy_device *phydev); int bcm_phy_cable_test_get_status(struct phy_device *phydev, bool *finished); +#if IS_ENABLED(CONFIG_BCM_NET_PHYPTP) +struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev); +void bcm_ptp_config_init(struct phy_device *phydev); +void bcm_ptp_stop(struct bcm_ptp_private *priv); +#else +static inline struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev) +{ + return NULL; +} + +static inline void bcm_ptp_config_init(struct phy_device *phydev) +{ +} + +static inline void bcm_ptp_stop(struct bcm_ptp_private *priv) +{ +} +#endif + #endif /* _LINUX_BCM_PHY_LIB_H */ diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c new file mode 100644 index 000000000000..ef00d6163061 --- /dev/null +++ b/drivers/net/phy/bcm-phy-ptp.c @@ -0,0 +1,944 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Meta Platforms Inc. + * Copyright (C) 2022 Jonathan Lemon <jonathan.lemon@gmail.com> + */ + +#include <asm/unaligned.h> +#include <linux/mii.h> +#include <linux/phy.h> +#include <linux/ptp_classify.h> +#include <linux/ptp_clock_kernel.h> +#include <linux/net_tstamp.h> +#include <linux/netdevice.h> +#include <linux/workqueue.h> + +#include "bcm-phy-lib.h" + +/* IEEE 1588 Expansion registers */ +#define SLICE_CTRL 0x0810 +#define SLICE_TX_EN BIT(0) +#define SLICE_RX_EN BIT(8) +#define TX_EVENT_MODE 0x0811 +#define MODE_TX_UPDATE_CF BIT(0) +#define MODE_TX_REPLACE_TS_CF BIT(1) +#define MODE_TX_REPLACE_TS GENMASK(1, 0) +#define RX_EVENT_MODE 0x0819 +#define MODE_RX_UPDATE_CF BIT(0) +#define MODE_RX_INSERT_TS_48 BIT(1) +#define MODE_RX_INSERT_TS_64 GENMASK(1, 0) + +#define MODE_EVT_SHIFT_SYNC 0 +#define MODE_EVT_SHIFT_DELAY_REQ 2 +#define MODE_EVT_SHIFT_PDELAY_REQ 4 +#define MODE_EVT_SHIFT_PDELAY_RESP 6 + +#define MODE_SEL_SHIFT_PORT 0 +#define MODE_SEL_SHIFT_CPU 8 + +#define RX_MODE_SEL(sel, evt, act) \ + (((MODE_RX_##act) << (MODE_EVT_SHIFT_##evt)) << (MODE_SEL_SHIFT_##sel)) + +#define TX_MODE_SEL(sel, evt, act) \ + (((MODE_TX_##act) << (MODE_EVT_SHIFT_##evt)) << (MODE_SEL_SHIFT_##sel)) + +/* needs global TS capture first */ +#define TX_TS_CAPTURE 0x0821 +#define TX_TS_CAP_EN BIT(0) +#define RX_TS_CAPTURE 0x0822 +#define RX_TS_CAP_EN BIT(0) + +#define TIME_CODE_0 0x0854 +#define TIME_CODE_1 0x0855 +#define TIME_CODE_2 0x0856 +#define TIME_CODE_3 0x0857 +#define TIME_CODE_4 0x0858 + +#define DPLL_SELECT 0x085b +#define DPLL_HB_MODE2 BIT(6) + +#define SHADOW_CTRL 0x085c +#define SHADOW_LOAD 0x085d +#define TIME_CODE_LOAD BIT(10) +#define SYNC_OUT_LOAD BIT(9) +#define NCO_TIME_LOAD BIT(7) +#define FREQ_LOAD BIT(6) +#define INTR_MASK 0x085e +#define INTR_STATUS 0x085f +#define INTC_FSYNC BIT(0) +#define INTC_SOP BIT(1) + +#define NCO_FREQ_LSB 0x0873 +#define NCO_FREQ_MSB 0x0874 + +#define NCO_TIME_0 0x0875 +#define NCO_TIME_1 0x0876 +#define NCO_TIME_2_CTRL 0x0877 +#define FREQ_MDIO_SEL BIT(14) + +#define SYNC_OUT_0 0x0878 +#define SYNC_OUT_1 0x0879 +#define SYNC_OUT_2 0x087a + +#define SYNC_IN_DIVIDER 0x087b + +#define SYNOUT_TS_0 0x087c +#define SYNOUT_TS_1 0x087d +#define SYNOUT_TS_2 0x087e + +#define NSE_CTRL 0x087f +#define NSE_GMODE_EN GENMASK(15, 14) +#define NSE_CAPTURE_EN BIT(13) +#define NSE_INIT BIT(12) +#define NSE_CPU_FRAMESYNC BIT(5) +#define NSE_SYNC1_FRAMESYNC BIT(3) +#define NSE_FRAMESYNC_MASK GENMASK(5, 2) +#define NSE_PEROUT_EN BIT(1) +#define NSE_ONESHOT_EN BIT(0) +#define NSE_SYNC_OUT_MASK GENMASK(1, 0) + +#define TS_READ_CTRL 0x0885 +#define TS_READ_START BIT(0) +#define TS_READ_END BIT(1) + +#define HB_REG_0 0x0886 +#define HB_REG_1 0x0887 +#define HB_REG_2 0x0888 +#define HB_REG_3 0x08ec +#define HB_REG_4 0x08ed +#define HB_STAT_CTRL 0x088e +#define HB_READ_START BIT(10) +#define HB_READ_END BIT(11) +#define HB_READ_MASK GENMASK(11, 10) + +#define TS_REG_0 0x0889 +#define TS_REG_1 0x088a +#define TS_REG_2 0x088b +#define TS_REG_3 0x08c4 + +#define TS_INFO_0 0x088c +#define TS_INFO_1 0x088d + +#define TIMECODE_CTRL 0x08c3 +#define TX_TIMECODE_SEL GENMASK(7, 0) +#define RX_TIMECODE_SEL GENMASK(15, 8) + +#define TIME_SYNC 0x0ff5 +#define TIME_SYNC_EN BIT(0) + +struct bcm_ptp_private { + struct phy_device *phydev; + struct mii_timestamper mii_ts; + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_info; + struct ptp_pin_desc pin; + struct mutex mutex; + struct sk_buff_head tx_queue; + int tx_type; + bool hwts_rx; + u16 nse_ctrl; + bool pin_active; + struct delayed_work pin_work; +}; + +struct bcm_ptp_skb_cb { + unsigned long timeout; + u16 seq_id; + u8 msgtype; + bool discard; +}; + +struct bcm_ptp_capture { + ktime_t hwtstamp; + u16 seq_id; + u8 msgtype; + bool tx_dir; +}; + +#define BCM_SKB_CB(skb) ((struct bcm_ptp_skb_cb *)(skb)->cb) +#define SKB_TS_TIMEOUT 10 /* jiffies */ + +#define BCM_MAX_PULSE_8NS ((1U << 9) - 1) +#define BCM_MAX_PERIOD_8NS ((1U << 30) - 1) + +#define BRCM_PHY_MODEL(phydev) \ + ((phydev)->drv->phy_id & (phydev)->drv->phy_id_mask) + +static struct bcm_ptp_private *mii2priv(struct mii_timestamper *mii_ts) +{ + return container_of(mii_ts, struct bcm_ptp_private, mii_ts); +} + +static struct bcm_ptp_private *ptp2priv(struct ptp_clock_info *info) +{ + return container_of(info, struct bcm_ptp_private, ptp_info); +} + +static void bcm_ptp_get_framesync_ts(struct phy_device *phydev, + struct timespec64 *ts) +{ + u16 hb[4]; + + bcm_phy_write_exp(phydev, HB_STAT_CTRL, HB_READ_START); + + hb[0] = bcm_phy_read_exp(phydev, HB_REG_0); + hb[1] = bcm_phy_read_exp(phydev, HB_REG_1); + hb[2] = bcm_phy_read_exp(phydev, HB_REG_2); + hb[3] = bcm_phy_read_exp(phydev, HB_REG_3); + + bcm_phy_write_exp(phydev, HB_STAT_CTRL, HB_READ_END); + bcm_phy_write_exp(phydev, HB_STAT_CTRL, 0); + + ts->tv_sec = (hb[3] << 16) | hb[2]; + ts->tv_nsec = (hb[1] << 16) | hb[0]; +} + +static u16 bcm_ptp_framesync_disable(struct phy_device *phydev, u16 orig_ctrl) +{ + u16 ctrl = orig_ctrl & ~(NSE_FRAMESYNC_MASK | NSE_CAPTURE_EN); + + bcm_phy_write_exp(phydev, NSE_CTRL, ctrl); + + return ctrl; +} + +static void bcm_ptp_framesync_restore(struct phy_device *phydev, u16 orig_ctrl) +{ + if (orig_ctrl & NSE_FRAMESYNC_MASK) + bcm_phy_write_exp(phydev, NSE_CTRL, orig_ctrl); +} + +static void bcm_ptp_framesync(struct phy_device *phydev, u16 ctrl) +{ + /* trigger framesync - must have 0->1 transition. */ + bcm_phy_write_exp(phydev, NSE_CTRL, ctrl | NSE_CPU_FRAMESYNC); +} + +static int bcm_ptp_framesync_ts(struct phy_device *phydev, + struct ptp_system_timestamp *sts, + struct timespec64 *ts, + u16 orig_ctrl) +{ + u16 ctrl, reg; + int i; + + ctrl = bcm_ptp_framesync_disable(phydev, orig_ctrl); + + ptp_read_system_prets(sts); + + /* trigger framesync + capture */ + bcm_ptp_framesync(phydev, ctrl | NSE_CAPTURE_EN); + + ptp_read_system_postts(sts); + + /* poll for FSYNC interrupt from TS capture */ + for (i = 0; i < 10; i++) { + reg = bcm_phy_read_exp(phydev, INTR_STATUS); + if (reg & INTC_FSYNC) { + bcm_ptp_get_framesync_ts(phydev, ts); + break; + } + } + + bcm_ptp_framesync_restore(phydev, orig_ctrl); + + return reg & INTC_FSYNC ? 0 : -ETIMEDOUT; +} + +static int bcm_ptp_gettimex(struct ptp_clock_info *info, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct bcm_ptp_private *priv = ptp2priv(info); + int err; + + mutex_lock(&priv->mutex); + err = bcm_ptp_framesync_ts(priv->phydev, sts, ts, priv->nse_ctrl); + mutex_unlock(&priv->mutex); + + return err; +} + +static int bcm_ptp_settime_locked(struct bcm_ptp_private *priv, + const struct timespec64 *ts) +{ + struct phy_device *phydev = priv->phydev; + u16 ctrl; + u64 ns; + + ctrl = bcm_ptp_framesync_disable(phydev, priv->nse_ctrl); + + /* set up time code */ + bcm_phy_write_exp(phydev, TIME_CODE_0, ts->tv_nsec); + bcm_phy_write_exp(phydev, TIME_CODE_1, ts->tv_nsec >> 16); + bcm_phy_write_exp(phydev, TIME_CODE_2, ts->tv_sec); + bcm_phy_write_exp(phydev, TIME_CODE_3, ts->tv_sec >> 16); + bcm_phy_write_exp(phydev, TIME_CODE_4, ts->tv_sec >> 32); + + /* set NCO counter to match */ + ns = timespec64_to_ns(ts); + bcm_phy_write_exp(phydev, NCO_TIME_0, ns >> 4); + bcm_phy_write_exp(phydev, NCO_TIME_1, ns >> 20); + bcm_phy_write_exp(phydev, NCO_TIME_2_CTRL, (ns >> 36) & 0xfff); + + /* set up load on next frame sync (auto-clears due to NSE_INIT) */ + bcm_phy_write_exp(phydev, SHADOW_LOAD, TIME_CODE_LOAD | NCO_TIME_LOAD); + + /* must have NSE_INIT in order to write time code */ + bcm_ptp_framesync(phydev, ctrl | NSE_INIT); + + bcm_ptp_framesync_restore(phydev, priv->nse_ctrl); + + return 0; +} + +static int bcm_ptp_settime(struct ptp_clock_info *info, + const struct timespec64 *ts) +{ + struct bcm_ptp_private *priv = ptp2priv(info); + int err; + + mutex_lock(&priv->mutex); + err = bcm_ptp_settime_locked(priv, ts); + mutex_unlock(&priv->mutex); + + return err; +} + +static int bcm_ptp_adjtime_locked(struct bcm_ptp_private *priv, + s64 delta_ns) +{ + struct timespec64 ts; + int err; + s64 ns; + + err = bcm_ptp_framesync_ts(priv->phydev, NULL, &ts, priv->nse_ctrl); + if (!err) { + ns = timespec64_to_ns(&ts) + delta_ns; + ts = ns_to_timespec64(ns); + err = bcm_ptp_settime_locked(priv, &ts); + } + return err; +} + +static int bcm_ptp_adjtime(struct ptp_clock_info *info, s64 delta_ns) +{ + struct bcm_ptp_private *priv = ptp2priv(info); + int err; + + mutex_lock(&priv->mutex); + err = bcm_ptp_adjtime_locked(priv, delta_ns); + mutex_unlock(&priv->mutex); + + return err; +} + +/* A 125Mhz clock should adjust 8ns per pulse. + * The frequency adjustment base is 0x8000 0000, or 8*2^28. + * + * Frequency adjustment is + * adj = scaled_ppm * 8*2^28 / (10^6 * 2^16) + * which simplifies to: + * adj = scaled_ppm * 2^9 / 5^6 + */ +static int bcm_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm) +{ + struct bcm_ptp_private *priv = ptp2priv(info); + int neg_adj = 0; + u32 diff, freq; + u16 ctrl; + u64 adj; + + if (scaled_ppm < 0) { + neg_adj = 1; + scaled_ppm = -scaled_ppm; + } + + adj = scaled_ppm << 9; + diff = div_u64(adj, 15625); + freq = (8 << 28) + (neg_adj ? -diff : diff); + + mutex_lock(&priv->mutex); + + ctrl = bcm_ptp_framesync_disable(priv->phydev, priv->nse_ctrl); + + bcm_phy_write_exp(priv->phydev, NCO_FREQ_LSB, freq); + bcm_phy_write_exp(priv->phydev, NCO_FREQ_MSB, freq >> 16); + + bcm_phy_write_exp(priv->phydev, NCO_TIME_2_CTRL, FREQ_MDIO_SEL); + + /* load on next framesync */ + bcm_phy_write_exp(priv->phydev, SHADOW_LOAD, FREQ_LOAD); + + bcm_ptp_framesync(priv->phydev, ctrl); + + /* clear load */ + bcm_phy_write_exp(priv->phydev, SHADOW_LOAD, 0); + + bcm_ptp_framesync_restore(priv->phydev, priv->nse_ctrl); + + mutex_unlock(&priv->mutex); + + return 0; +} + +static bool bcm_ptp_rxtstamp(struct mii_timestamper *mii_ts, + struct sk_buff *skb, int type) +{ + struct bcm_ptp_private *priv = mii2priv(mii_ts); + struct skb_shared_hwtstamps *hwts; + struct ptp_header *header; + u32 sec, nsec; + u8 *data; + int off; + + if (!priv->hwts_rx) + return false; + + header = ptp_parse_header(skb, type); + if (!header) + return false; + + data = (u8 *)(header + 1); + sec = get_unaligned_be32(data); + nsec = get_unaligned_be32(data + 4); + + hwts = skb_hwtstamps(skb); + hwts->hwtstamp = ktime_set(sec, nsec); + + off = data - skb->data + 8; + if (off < skb->len) { + memmove(data, data + 8, skb->len - off); + __pskb_trim(skb, skb->len - 8); + } + + return false; +} + +static bool bcm_ptp_get_tstamp(struct bcm_ptp_private *priv, + struct bcm_ptp_capture *capts) +{ + struct phy_device *phydev = priv->phydev; + u16 ts[4], reg; + u32 sec, nsec; + + mutex_lock(&priv->mutex); + + reg = bcm_phy_read_exp(phydev, INTR_STATUS); + if ((reg & INTC_SOP) == 0) { + mutex_unlock(&priv->mutex); + return false; + } + + bcm_phy_write_exp(phydev, TS_READ_CTRL, TS_READ_START); + + ts[0] = bcm_phy_read_exp(phydev, TS_REG_0); + ts[1] = bcm_phy_read_exp(phydev, TS_REG_1); + ts[2] = bcm_phy_read_exp(phydev, TS_REG_2); + ts[3] = bcm_phy_read_exp(phydev, TS_REG_3); + + /* not in be32 format for some reason */ + capts->seq_id = bcm_phy_read_exp(priv->phydev, TS_INFO_0); + + reg = bcm_phy_read_exp(phydev, TS_INFO_1); + capts->msgtype = reg >> 12; + capts->tx_dir = !!(reg & BIT(11)); + + bcm_phy_write_exp(phydev, TS_READ_CTRL, TS_READ_END); + bcm_phy_write_exp(phydev, TS_READ_CTRL, 0); + + mutex_unlock(&priv->mutex); + + sec = (ts[3] << 16) | ts[2]; + nsec = (ts[1] << 16) | ts[0]; + capts->hwtstamp = ktime_set(sec, nsec); + + return true; +} + +static void bcm_ptp_match_tstamp(struct bcm_ptp_private *priv, + struct bcm_ptp_capture *capts) +{ + struct skb_shared_hwtstamps hwts; + struct sk_buff *skb, *ts_skb; + unsigned long flags; + bool first = false; + + ts_skb = NULL; + spin_lock_irqsave(&priv->tx_queue.lock, flags); + skb_queue_walk(&priv->tx_queue, skb) { + if (BCM_SKB_CB(skb)->seq_id == capts->seq_id && + BCM_SKB_CB(skb)->msgtype == capts->msgtype) { + first = skb_queue_is_first(&priv->tx_queue, skb); + __skb_unlink(skb, &priv->tx_queue); + ts_skb = skb; + break; + } + } + spin_unlock_irqrestore(&priv->tx_queue.lock, flags); + + /* TX captures one-step packets, discard them if needed. */ + if (ts_skb) { + if (BCM_SKB_CB(ts_skb)->discard) { + kfree_skb(ts_skb); + } else { + memset(&hwts, 0, sizeof(hwts)); + hwts.hwtstamp = capts->hwtstamp; + skb_complete_tx_timestamp(ts_skb, &hwts); + } + } + + /* not first match, try and expire entries */ + if (!first) { + while ((skb = skb_dequeue(&priv->tx_queue))) { + if (!time_after(jiffies, BCM_SKB_CB(skb)->timeout)) { + skb_queue_head(&priv->tx_queue, skb); + break; + } + kfree_skb(skb); + } + } +} + +static long bcm_ptp_do_aux_work(struct ptp_clock_info *info) +{ + struct bcm_ptp_private *priv = ptp2priv(info); + struct bcm_ptp_capture capts; + bool reschedule = false; + + while (!skb_queue_empty_lockless(&priv->tx_queue)) { + if (!bcm_ptp_get_tstamp(priv, &capts)) { + reschedule = true; + break; + } + bcm_ptp_match_tstamp(priv, &capts); + } + + return reschedule ? 1 : -1; +} + +static int bcm_ptp_cancel_func(struct bcm_ptp_private *priv) +{ + if (!priv->pin_active) + return 0; + + priv->pin_active = false; + + priv->nse_ctrl &= ~(NSE_SYNC_OUT_MASK | NSE_SYNC1_FRAMESYNC | + NSE_CAPTURE_EN); + bcm_phy_write_exp(priv->phydev, NSE_CTRL, priv->nse_ctrl); + + cancel_delayed_work_sync(&priv->pin_work); + + return 0; +} + +static void bcm_ptp_perout_work(struct work_struct *pin_work) +{ + struct bcm_ptp_private *priv = + container_of(pin_work, struct bcm_ptp_private, pin_work.work); + struct phy_device *phydev = priv->phydev; + struct timespec64 ts; + u64 ns, next; + u16 ctrl; + + mutex_lock(&priv->mutex); + + /* no longer running */ + if (!priv->pin_active) { + mutex_unlock(&priv->mutex); + return; + } + + bcm_ptp_framesync_ts(phydev, NULL, &ts, priv->nse_ctrl); + + /* this is 1PPS only */ + next = NSEC_PER_SEC - ts.tv_nsec; + ts.tv_sec += next < NSEC_PER_MSEC ? 2 : 1; + ts.tv_nsec = 0; + + ns = timespec64_to_ns(&ts); + + /* force 0->1 transition for ONESHOT */ + ctrl = bcm_ptp_framesync_disable(phydev, + priv->nse_ctrl & ~NSE_ONESHOT_EN); + + bcm_phy_write_exp(phydev, SYNOUT_TS_0, ns & 0xfff0); + bcm_phy_write_exp(phydev, SYNOUT_TS_1, ns >> 16); + bcm_phy_write_exp(phydev, SYNOUT_TS_2, ns >> 32); + + /* load values on next framesync */ + bcm_phy_write_exp(phydev, SHADOW_LOAD, SYNC_OUT_LOAD); + + bcm_ptp_framesync(phydev, ctrl | NSE_ONESHOT_EN | NSE_INIT); + + priv->nse_ctrl |= NSE_ONESHOT_EN; + bcm_ptp_framesync_restore(phydev, priv->nse_ctrl); + + mutex_unlock(&priv->mutex); + + next = next + NSEC_PER_MSEC; + schedule_delayed_work(&priv->pin_work, nsecs_to_jiffies(next)); +} + +static int bcm_ptp_perout_locked(struct bcm_ptp_private *priv, + struct ptp_perout_request *req, int on) +{ + struct phy_device *phydev = priv->phydev; + u64 period, pulse; + u16 val; + + if (!on) + return bcm_ptp_cancel_func(priv); + + /* 1PPS */ + if (req->period.sec != 1 || req->period.nsec != 0) + return -EINVAL; + + period = BCM_MAX_PERIOD_8NS; /* write nonzero value */ + + if (req->flags & PTP_PEROUT_PHASE) + return -EOPNOTSUPP; + + if (req->flags & PTP_PEROUT_DUTY_CYCLE) + pulse = ktime_to_ns(ktime_set(req->on.sec, req->on.nsec)); + else + pulse = (u64)BCM_MAX_PULSE_8NS << 3; + + /* convert to 8ns units */ + pulse >>= 3; + + if (!pulse || pulse > period || pulse > BCM_MAX_PULSE_8NS) + return -EINVAL; + + bcm_phy_write_exp(phydev, SYNC_OUT_0, period); + + val = ((pulse & 0x3) << 14) | ((period >> 16) & 0x3fff); + bcm_phy_write_exp(phydev, SYNC_OUT_1, val); + + val = ((pulse >> 2) & 0x7f) | (pulse << 7); + bcm_phy_write_exp(phydev, SYNC_OUT_2, val); + + if (priv->pin_active) + cancel_delayed_work_sync(&priv->pin_work); + + priv->pin_active = true; + INIT_DELAYED_WORK(&priv->pin_work, bcm_ptp_perout_work); + schedule_delayed_work(&priv->pin_work, 0); + + return 0; +} + +static void bcm_ptp_extts_work(struct work_struct *pin_work) +{ + struct bcm_ptp_private *priv = + container_of(pin_work, struct bcm_ptp_private, pin_work.work); + struct phy_device *phydev = priv->phydev; + struct ptp_clock_event event; + struct timespec64 ts; + u16 reg; + + mutex_lock(&priv->mutex); + + /* no longer running */ + if (!priv->pin_active) { + mutex_unlock(&priv->mutex); + return; + } + + reg = bcm_phy_read_exp(phydev, INTR_STATUS); + if ((reg & INTC_FSYNC) == 0) + goto out; + + bcm_ptp_get_framesync_ts(phydev, &ts); + + event.index = 0; + event.type = PTP_CLOCK_EXTTS; + event.timestamp = timespec64_to_ns(&ts); + ptp_clock_event(priv->ptp_clock, &event); + +out: + mutex_unlock(&priv->mutex); + schedule_delayed_work(&priv->pin_work, HZ / 4); +} + +static int bcm_ptp_extts_locked(struct bcm_ptp_private *priv, int on) +{ + struct phy_device *phydev = priv->phydev; + + if (!on) + return bcm_ptp_cancel_func(priv); + + if (priv->pin_active) + cancel_delayed_work_sync(&priv->pin_work); + + bcm_ptp_framesync_disable(phydev, priv->nse_ctrl); + + priv->nse_ctrl |= NSE_SYNC1_FRAMESYNC | NSE_CAPTURE_EN; + + bcm_ptp_framesync_restore(phydev, priv->nse_ctrl); + + priv->pin_active = true; + INIT_DELAYED_WORK(&priv->pin_work, bcm_ptp_extts_work); + schedule_delayed_work(&priv->pin_work, 0); + + return 0; +} + +static int bcm_ptp_enable(struct ptp_clock_info *info, + struct ptp_clock_request *rq, int on) +{ + struct bcm_ptp_private *priv = ptp2priv(info); + int err = -EBUSY; + + mutex_lock(&priv->mutex); + + switch (rq->type) { + case PTP_CLK_REQ_PEROUT: + if (priv->pin.func == PTP_PF_PEROUT) + err = bcm_ptp_perout_locked(priv, &rq->perout, on); + break; + case PTP_CLK_REQ_EXTTS: + if (priv->pin.func == PTP_PF_EXTTS) + err = bcm_ptp_extts_locked(priv, on); + break; + default: + err = -EOPNOTSUPP; + break; + } + + mutex_unlock(&priv->mutex); + + return err; +} + +static int bcm_ptp_verify(struct ptp_clock_info *info, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + switch (func) { + case PTP_PF_NONE: + case PTP_PF_EXTTS: + case PTP_PF_PEROUT: + break; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static const struct ptp_clock_info bcm_ptp_clock_info = { + .owner = THIS_MODULE, + .name = KBUILD_MODNAME, + .max_adj = 100000000, + .gettimex64 = bcm_ptp_gettimex, + .settime64 = bcm_ptp_settime, + .adjtime = bcm_ptp_adjtime, + .adjfine = bcm_ptp_adjfine, + .enable = bcm_ptp_enable, + .verify = bcm_ptp_verify, + .do_aux_work = bcm_ptp_do_aux_work, + .n_pins = 1, + .n_per_out = 1, + .n_ext_ts = 1, +}; + +static void bcm_ptp_txtstamp(struct mii_timestamper *mii_ts, + struct sk_buff *skb, int type) +{ + struct bcm_ptp_private *priv = mii2priv(mii_ts); + struct ptp_header *hdr; + bool discard = false; + int msgtype; + + hdr = ptp_parse_header(skb, type); + if (!hdr) + goto out; + msgtype = ptp_get_msgtype(hdr, type); + + switch (priv->tx_type) { + case HWTSTAMP_TX_ONESTEP_P2P: + if (msgtype == PTP_MSGTYPE_PDELAY_RESP) + discard = true; + fallthrough; + case HWTSTAMP_TX_ONESTEP_SYNC: + if (msgtype == PTP_MSGTYPE_SYNC) + discard = true; + fallthrough; + case HWTSTAMP_TX_ON: + BCM_SKB_CB(skb)->timeout = jiffies + SKB_TS_TIMEOUT; + BCM_SKB_CB(skb)->seq_id = be16_to_cpu(hdr->sequence_id); + BCM_SKB_CB(skb)->msgtype = msgtype; + BCM_SKB_CB(skb)->discard = discard; + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + skb_queue_tail(&priv->tx_queue, skb); + ptp_schedule_worker(priv->ptp_clock, 0); + return; + default: + break; + } + +out: + kfree_skb(skb); +} + +static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts, + struct ifreq *ifr) +{ + struct bcm_ptp_private *priv = mii2priv(mii_ts); + struct hwtstamp_config cfg; + u16 mode, ctrl; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + switch (cfg.rx_filter) { + case HWTSTAMP_FILTER_NONE: + priv->hwts_rx = false; + break; + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + priv->hwts_rx = true; + break; + default: + return -ERANGE; + } + + priv->tx_type = cfg.tx_type; + + ctrl = priv->hwts_rx ? SLICE_RX_EN : 0; + ctrl |= priv->tx_type != HWTSTAMP_TX_OFF ? SLICE_TX_EN : 0; + + mode = TX_MODE_SEL(PORT, SYNC, REPLACE_TS) | + TX_MODE_SEL(PORT, DELAY_REQ, REPLACE_TS) | + TX_MODE_SEL(PORT, PDELAY_REQ, REPLACE_TS) | + TX_MODE_SEL(PORT, PDELAY_RESP, REPLACE_TS); + + bcm_phy_write_exp(priv->phydev, TX_EVENT_MODE, mode); + + mode = RX_MODE_SEL(PORT, SYNC, INSERT_TS_64) | + RX_MODE_SEL(PORT, DELAY_REQ, INSERT_TS_64) | + RX_MODE_SEL(PORT, PDELAY_REQ, INSERT_TS_64) | + RX_MODE_SEL(PORT, PDELAY_RESP, INSERT_TS_64); + + bcm_phy_write_exp(priv->phydev, RX_EVENT_MODE, mode); + + bcm_phy_write_exp(priv->phydev, SLICE_CTRL, ctrl); + + if (ctrl & SLICE_TX_EN) + bcm_phy_write_exp(priv->phydev, TX_TS_CAPTURE, TX_TS_CAP_EN); + else + ptp_cancel_worker_sync(priv->ptp_clock); + + /* purge existing data */ + skb_queue_purge(&priv->tx_queue); + + return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; +} + +static int bcm_ptp_ts_info(struct mii_timestamper *mii_ts, + struct ethtool_ts_info *ts_info) +{ + struct bcm_ptp_private *priv = mii2priv(mii_ts); + + ts_info->phc_index = ptp_clock_index(priv->ptp_clock); + ts_info->so_timestamping = + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + ts_info->tx_types = + BIT(HWTSTAMP_TX_ON) | + BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ONESTEP_SYNC) | + BIT(HWTSTAMP_TX_ONESTEP_P2P); + ts_info->rx_filters = + BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_PTP_V2_EVENT); + + return 0; +} + +void bcm_ptp_stop(struct bcm_ptp_private *priv) +{ + ptp_cancel_worker_sync(priv->ptp_clock); + bcm_ptp_cancel_func(priv); +} +EXPORT_SYMBOL_GPL(bcm_ptp_stop); + +void bcm_ptp_config_init(struct phy_device *phydev) +{ + /* init network sync engine */ + bcm_phy_write_exp(phydev, NSE_CTRL, NSE_GMODE_EN | NSE_INIT); + + /* enable time sync (TX/RX SOP capture) */ + bcm_phy_write_exp(phydev, TIME_SYNC, TIME_SYNC_EN); + + /* use sec.nsec heartbeat capture */ + bcm_phy_write_exp(phydev, DPLL_SELECT, DPLL_HB_MODE2); + + /* use 64 bit timecode for TX */ + bcm_phy_write_exp(phydev, TIMECODE_CTRL, TX_TIMECODE_SEL); + + /* always allow FREQ_LOAD on framesync */ + bcm_phy_write_exp(phydev, SHADOW_CTRL, FREQ_LOAD); + + bcm_phy_write_exp(phydev, SYNC_IN_DIVIDER, 1); +} +EXPORT_SYMBOL_GPL(bcm_ptp_config_init); + +static void bcm_ptp_init(struct bcm_ptp_private *priv) +{ + priv->nse_ctrl = NSE_GMODE_EN; + + mutex_init(&priv->mutex); + skb_queue_head_init(&priv->tx_queue); + + priv->mii_ts.rxtstamp = bcm_ptp_rxtstamp; + priv->mii_ts.txtstamp = bcm_ptp_txtstamp; + priv->mii_ts.hwtstamp = bcm_ptp_hwtstamp; + priv->mii_ts.ts_info = bcm_ptp_ts_info; + + priv->phydev->mii_ts = &priv->mii_ts; +} + +struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev) +{ + struct bcm_ptp_private *priv; + struct ptp_clock *clock; + + switch (BRCM_PHY_MODEL(phydev)) { + case PHY_ID_BCM54210E: + break; + default: + return NULL; + } + + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return ERR_PTR(-ENOMEM); + + priv->ptp_info = bcm_ptp_clock_info; + + snprintf(priv->pin.name, sizeof(priv->pin.name), "SYNC_OUT"); + priv->ptp_info.pin_config = &priv->pin; + + clock = ptp_clock_register(&priv->ptp_info, &phydev->mdio.dev); + if (IS_ERR(clock)) + return ERR_CAST(clock); + priv->ptp_clock = clock; + + priv->phydev = phydev; + bcm_ptp_init(priv); + + return priv; +} +EXPORT_SYMBOL_GPL(bcm_ptp_probe); + +MODULE_LICENSE("GPL"); diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index e36809aa6d30..876bc45ede60 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -27,6 +27,11 @@ MODULE_DESCRIPTION("Broadcom PHY driver"); MODULE_AUTHOR("Maciej W. Rozycki"); MODULE_LICENSE("GPL"); +struct bcm54xx_phy_priv { + u64 *stats; + struct bcm_ptp_private *ptp; +}; + static int bcm54xx_config_clock_delay(struct phy_device *phydev) { int rc, val; @@ -313,6 +318,22 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev) bcm_phy_write_shadow(phydev, BCM54XX_SHD_APD, val); } +static void bcm54xx_ptp_stop(struct phy_device *phydev) +{ + struct bcm54xx_phy_priv *priv = phydev->priv; + + if (priv->ptp) + bcm_ptp_stop(priv->ptp); +} + +static void bcm54xx_ptp_config_init(struct phy_device *phydev) +{ + struct bcm54xx_phy_priv *priv = phydev->priv; + + if (priv->ptp) + bcm_ptp_config_init(phydev); +} + static int bcm54xx_config_init(struct phy_device *phydev) { int reg, err, val; @@ -390,6 +411,8 @@ static int bcm54xx_config_init(struct phy_device *phydev) bcm_phy_write_exp(phydev, BCM_EXP_MULTICOLOR, val); } + bcm54xx_ptp_config_init(phydev); + return 0; } @@ -418,6 +441,8 @@ static int bcm54xx_suspend(struct phy_device *phydev) { int ret; + bcm54xx_ptp_stop(phydev); + /* We cannot use a read/modify/write here otherwise the PHY gets into * a bad state where its LEDs keep flashing, thus defeating the purpose * of low power mode. @@ -741,10 +766,6 @@ static irqreturn_t brcm_fet_handle_interrupt(struct phy_device *phydev) return IRQ_HANDLED; } -struct bcm54xx_phy_priv { - u64 *stats; -}; - static int bcm54xx_phy_probe(struct phy_device *phydev) { struct bcm54xx_phy_priv *priv; @@ -761,6 +782,10 @@ static int bcm54xx_phy_probe(struct phy_device *phydev) if (!priv->stats) return -ENOMEM; + priv->ptp = bcm_ptp_probe(phydev); + if (IS_ERR(priv->ptp)) + return PTR_ERR(priv->ptp); + return 0; } diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 13dafe7a29bd..1e38039c5c56 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -14,6 +14,7 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/bitfield.h> +#include <linux/nvmem-consumer.h> #include <dt-bindings/net/ti-dp83867.h> @@ -522,6 +523,51 @@ static int dp83867_verify_rgmii_cfg(struct phy_device *phydev) } #if IS_ENABLED(CONFIG_OF_MDIO) +static int dp83867_of_init_io_impedance(struct phy_device *phydev) +{ + struct dp83867_private *dp83867 = phydev->priv; + struct device *dev = &phydev->mdio.dev; + struct device_node *of_node = dev->of_node; + struct nvmem_cell *cell; + u8 *buf, val; + int ret; + + cell = of_nvmem_cell_get(of_node, "io_impedance_ctrl"); + if (IS_ERR(cell)) { + ret = PTR_ERR(cell); + if (ret != -ENOENT) + return phydev_err_probe(phydev, ret, + "failed to get nvmem cell io_impedance_ctrl\n"); + + /* If no nvmem cell, check for the boolean properties. */ + if (of_property_read_bool(of_node, "ti,max-output-impedance")) + dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX; + else if (of_property_read_bool(of_node, "ti,min-output-impedance")) + dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN; + else + dp83867->io_impedance = -1; /* leave at default */ + + return 0; + } + + buf = nvmem_cell_read(cell, NULL); + nvmem_cell_put(cell); + + if (IS_ERR(buf)) + return PTR_ERR(buf); + + val = *buf; + kfree(buf); + + if ((val & DP83867_IO_MUX_CFG_IO_IMPEDANCE_MASK) != val) { + phydev_err(phydev, "nvmem cell 'io_impedance_ctrl' contents out of range\n"); + return -ERANGE; + } + dp83867->io_impedance = val; + + return 0; +} + static int dp83867_of_init(struct phy_device *phydev) { struct dp83867_private *dp83867 = phydev->priv; @@ -549,12 +595,9 @@ static int dp83867_of_init(struct phy_device *phydev) } } - if (of_property_read_bool(of_node, "ti,max-output-impedance")) - dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX; - else if (of_property_read_bool(of_node, "ti,min-output-impedance")) - dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN; - else - dp83867->io_impedance = -1; /* leave at default */ + ret = dp83867_of_init_io_impedance(phydev); + if (ret) + return ret; dp83867->rxctrl_strap_quirk = of_property_read_bool(of_node, "ti,dp83867-rxctrl-strap-quirk"); diff --git a/drivers/net/phy/dp83td510.c b/drivers/net/phy/dp83td510.c index 1ae792b0daaa..3cd9a77f9532 100644 --- a/drivers/net/phy/dp83td510.c +++ b/drivers/net/phy/dp83td510.c @@ -27,6 +27,27 @@ #define DP83TD510E_AN_STAT_1 0x60c #define DP83TD510E_MASTER_SLAVE_RESOL_FAIL BIT(15) +#define DP83TD510E_MSE_DETECT 0xa85 + +#define DP83TD510_SQI_MAX 7 + +/* Register values are converted to SNR(dB) as suggested by + * "Application Report - DP83TD510E Cable Diagnostics Toolkit": + * SNR(dB) = -10 * log10 (VAL/2^17) - 1.76 dB. + * SQI ranges are implemented according to "OPEN ALLIANCE - Advanced diagnostic + * features for 100BASE-T1 automotive Ethernet PHYs" + */ +static const u16 dp83td510_mse_sqi_map[] = { + 0x0569, /* < 18dB */ + 0x044c, /* 18dB =< SNR < 19dB */ + 0x0369, /* 19dB =< SNR < 20dB */ + 0x02b6, /* 20dB =< SNR < 21dB */ + 0x0227, /* 21dB =< SNR < 22dB */ + 0x01b6, /* 22dB =< SNR < 23dB */ + 0x015b, /* 23dB =< SNR < 24dB */ + 0x0000 /* 24dB =< SNR */ +}; + static int dp83td510_config_intr(struct phy_device *phydev) { int ret; @@ -164,6 +185,32 @@ static int dp83td510_config_aneg(struct phy_device *phydev) return genphy_c45_check_and_restart_aneg(phydev, changed); } +static int dp83td510_get_sqi(struct phy_device *phydev) +{ + int sqi, ret; + u16 mse_val; + + if (!phydev->link) + return 0; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_MSE_DETECT); + if (ret < 0) + return ret; + + mse_val = 0xFFFF & ret; + for (sqi = 0; sqi < ARRAY_SIZE(dp83td510_mse_sqi_map); sqi++) { + if (mse_val >= dp83td510_mse_sqi_map[sqi]) + return sqi; + } + + return -EINVAL; +} + +static int dp83td510_get_sqi_max(struct phy_device *phydev) +{ + return DP83TD510_SQI_MAX; +} + static int dp83td510_get_features(struct phy_device *phydev) { /* This PHY can't respond on MDIO bus if no RMII clock is enabled. @@ -192,6 +239,8 @@ static struct phy_driver dp83td510_driver[] = { .get_features = dp83td510_get_features, .config_intr = dp83td510_config_intr, .handle_interrupt = dp83td510_handle_interrupt, + .get_sqi = dp83td510_get_sqi, + .get_sqi_max = dp83td510_get_sqi_max, .suspend = genphy_suspend, .resume = genphy_resume, diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 03abe6233bbb..aef739c20ac4 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -353,6 +353,7 @@ static int __init fixed_mdio_bus_init(void) fmb->mii_bus->parent = &pdev->dev; fmb->mii_bus->read = &fixed_mdio_read; fmb->mii_bus->write = &fixed_mdio_write; + fmb->mii_bus->phy_mask = ~0; ret = mdiobus_register(fmb->mii_bus); if (ret) diff --git a/drivers/net/phy/marvell-88x2222.c b/drivers/net/phy/marvell-88x2222.c index d8b31d4d2a73..f070776ca904 100644 --- a/drivers/net/phy/marvell-88x2222.c +++ b/drivers/net/phy/marvell-88x2222.c @@ -490,6 +490,7 @@ static int mv2222_sfp_insert(void *upstream, const struct sfp_eeprom_id *id) dev = &phydev->mdio.dev; sfp_parse_support(phydev->sfp_bus, id, sfp_supported); + phydev->port = sfp_parse_port(phydev->sfp_bus, id, sfp_supported); sfp_interface = sfp_select_interface(phydev->sfp_bus, sfp_supported); dev_info(dev, "%s SFP module inserted\n", phy_modes(sfp_interface)); @@ -526,6 +527,7 @@ static void mv2222_sfp_remove(void *upstream) priv->line_interface = PHY_INTERFACE_MODE_NA; linkmode_zero(priv->supported); + phydev->port = PORT_NONE; } static void mv2222_sfp_link_up(void *upstream) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index d777c8851ed6..a714150f5e8c 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1991,15 +1991,9 @@ static int m88e1510_loopback(struct phy_device *phydev, bool enable) int err; if (enable) { - u16 bmcr_ctl = 0, mscr2_ctl = 0; + u16 bmcr_ctl, mscr2_ctl = 0; - if (phydev->speed == SPEED_1000) - bmcr_ctl = BMCR_SPEED1000; - else if (phydev->speed == SPEED_100) - bmcr_ctl = BMCR_SPEED100; - - if (phydev->duplex == DUPLEX_FULL) - bmcr_ctl |= BMCR_FULLDPLX; + bmcr_ctl = mii_bmcr_encode_fixed(phydev->speed, phydev->duplex); err = phy_write(phydev, MII_BMCR, bmcr_ctl); if (err < 0) diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c index 5ce1bf03bbd7..5b99acf44337 100644 --- a/drivers/net/phy/mxl-gpy.c +++ b/drivers/net/phy/mxl-gpy.c @@ -8,7 +8,9 @@ #include <linux/module.h> #include <linux/bitfield.h> +#include <linux/hwmon.h> #include <linux/phy.h> +#include <linux/polynomial.h> #include <linux/netdevice.h> /* PHY ID */ @@ -64,6 +66,10 @@ #define VSPEC1_SGMII_ANEN_ANRS (VSPEC1_SGMII_CTRL_ANEN | \ VSPEC1_SGMII_CTRL_ANRS) +/* Temperature sensor */ +#define VPSPEC1_TEMP_STA 0x0E +#define VPSPEC1_TEMP_STA_DATA GENMASK(9, 0) + /* WoL */ #define VPSPEC2_WOL_CTL 0x0E06 #define VPSPEC2_WOL_AD01 0x0E08 @@ -80,6 +86,102 @@ static const struct { {9, 0x73}, }; +#if IS_ENABLED(CONFIG_HWMON) +/* The original translation formulae of the temperature (in degrees of Celsius) + * are as follows: + * + * T = -2.5761e-11*(N^4) + 9.7332e-8*(N^3) + -1.9165e-4*(N^2) + + * 3.0762e-1*(N^1) + -5.2156e1 + * + * where [-52.156, 137.961]C and N = [0, 1023]. + * + * They must be accordingly altered to be suitable for the integer arithmetics. + * The technique is called 'factor redistribution', which just makes sure the + * multiplications and divisions are made so to have a result of the operations + * within the integer numbers limit. In addition we need to translate the + * formulae to accept millidegrees of Celsius. Here what it looks like after + * the alterations: + * + * T = -25761e-12*(N^4) + 97332e-9*(N^3) + -191650e-6*(N^2) + + * 307620e-3*(N^1) + -52156 + * + * where T = [-52156, 137961]mC and N = [0, 1023]. + */ +static const struct polynomial poly_N_to_temp = { + .terms = { + {4, -25761, 1000, 1}, + {3, 97332, 1000, 1}, + {2, -191650, 1000, 1}, + {1, 307620, 1000, 1}, + {0, -52156, 1, 1} + } +}; + +static int gpy_hwmon_read(struct device *dev, + enum hwmon_sensor_types type, + u32 attr, int channel, long *value) +{ + struct phy_device *phydev = dev_get_drvdata(dev); + int ret; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VPSPEC1_TEMP_STA); + if (ret < 0) + return ret; + if (!ret) + return -ENODATA; + + *value = polynomial_calc(&poly_N_to_temp, + FIELD_GET(VPSPEC1_TEMP_STA_DATA, ret)); + + return 0; +} + +static umode_t gpy_hwmon_is_visible(const void *data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + return 0444; +} + +static const struct hwmon_channel_info *gpy_hwmon_info[] = { + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), + NULL +}; + +static const struct hwmon_ops gpy_hwmon_hwmon_ops = { + .is_visible = gpy_hwmon_is_visible, + .read = gpy_hwmon_read, +}; + +static const struct hwmon_chip_info gpy_hwmon_chip_info = { + .ops = &gpy_hwmon_hwmon_ops, + .info = gpy_hwmon_info, +}; + +static int gpy_hwmon_register(struct phy_device *phydev) +{ + struct device *dev = &phydev->mdio.dev; + struct device *hwmon_dev; + char *hwmon_name; + + hwmon_name = devm_hwmon_sanitize_name(dev, dev_name(dev)); + if (IS_ERR(hwmon_name)) + return PTR_ERR(hwmon_name); + + hwmon_dev = devm_hwmon_device_register_with_info(dev, hwmon_name, + phydev, + &gpy_hwmon_chip_info, + NULL); + + return PTR_ERR_OR_ZERO(hwmon_dev); +} +#else +static int gpy_hwmon_register(struct phy_device *phydev) +{ + return 0; +} +#endif + static int gpy_config_init(struct phy_device *phydev) { int ret; @@ -109,6 +211,10 @@ static int gpy_probe(struct phy_device *phydev) if (ret < 0) return ret; + ret = gpy_hwmon_register(phydev); + if (ret) + return ret; + phydev_info(phydev, "Firmware Version: 0x%04X (%s)\n", ret, (ret & PHY_FWV_REL_MASK) ? "release" : "test"); @@ -295,6 +401,9 @@ static void gpy_update_interface(struct phy_device *phydev) ret); break; } + + if (phydev->speed == SPEED_2500 || phydev->speed == SPEED_1000) + genphy_read_master_slave(phydev); } static int gpy_read_status(struct phy_device *phydev) diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c index 9944cc501806..2a8195c50d14 100644 --- a/drivers/net/phy/nxp-tja11xx.c +++ b/drivers/net/phy/nxp-tja11xx.c @@ -444,15 +444,10 @@ static int tja11xx_hwmon_register(struct phy_device *phydev, struct tja11xx_priv *priv) { struct device *dev = &phydev->mdio.dev; - int i; - - priv->hwmon_name = devm_kstrdup(dev, dev_name(dev), GFP_KERNEL); - if (!priv->hwmon_name) - return -ENOMEM; - for (i = 0; priv->hwmon_name[i]; i++) - if (hwmon_is_bad_char(priv->hwmon_name[i])) - priv->hwmon_name[i] = '_'; + priv->hwmon_name = devm_hwmon_sanitize_name(dev, dev_name(dev)); + if (IS_ERR(priv->hwmon_name)) + return PTR_ERR(priv->hwmon_name); priv->hwmon_dev = devm_hwmon_device_register_with_info(dev, priv->hwmon_name, diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 431a8719c635..7885bceff773 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2001,18 +2001,12 @@ EXPORT_SYMBOL(genphy_config_eee_advert); */ int genphy_setup_forced(struct phy_device *phydev) { - u16 ctl = 0; + u16 ctl; phydev->pause = 0; phydev->asym_pause = 0; - if (SPEED_1000 == phydev->speed) - ctl |= BMCR_SPEED1000; - else if (SPEED_100 == phydev->speed) - ctl |= BMCR_SPEED100; - - if (DUPLEX_FULL == phydev->duplex) - ctl |= BMCR_FULLDPLX; + ctl = mii_bmcr_encode_fixed(phydev->speed, phydev->duplex); return phy_modify(phydev, MII_BMCR, ~(BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN), ctl); @@ -2614,13 +2608,7 @@ int genphy_loopback(struct phy_device *phydev, bool enable) u16 val, ctl = BMCR_LOOPBACK; int ret; - if (phydev->speed == SPEED_1000) - ctl |= BMCR_SPEED1000; - else if (phydev->speed == SPEED_100) - ctl |= BMCR_SPEED100; - - if (phydev->duplex == DUPLEX_FULL) - ctl |= BMCR_FULLDPLX; + ctl |= mii_bmcr_encode_fixed(phydev->speed, phydev->duplex); phy_modify(phydev, MII_BMCR, ~0, ctl); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 066684b80919..e20cdab824db 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2991,6 +2991,7 @@ int phylink_mii_c22_pcs_encode_advertisement(phy_interface_t interface, adv |= ADVERTISE_1000XPSE_ASYM; return adv; case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_QSGMII: return 0x0001; default: /* Nothing to do for other modes */ diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 9a5d5a10560f..81a529c3dbe4 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -1290,7 +1290,7 @@ static const struct hwmon_chip_info sfp_hwmon_chip_info = { static void sfp_hwmon_probe(struct work_struct *work) { struct sfp *sfp = container_of(work, struct sfp, hwmon_probe.work); - int err, i; + int err; /* hwmon interface needs to access 16bit registers in atomic way to * guarantee coherency of the diagnostic monitoring data. If it is not @@ -1318,16 +1318,12 @@ static void sfp_hwmon_probe(struct work_struct *work) return; } - sfp->hwmon_name = kstrdup(dev_name(sfp->dev), GFP_KERNEL); - if (!sfp->hwmon_name) { + sfp->hwmon_name = hwmon_sanitize_name(dev_name(sfp->dev)); + if (IS_ERR(sfp->hwmon_name)) { dev_err(sfp->dev, "out of memory for hwmon name\n"); return; } - for (i = 0; sfp->hwmon_name[i]; i++) - if (hwmon_is_bad_char(sfp->hwmon_name[i])) - sfp->hwmon_name[i] = '_'; - sfp->hwmon_dev = hwmon_device_register_with_info(sfp->dev, sfp->hwmon_name, sfp, &sfp_hwmon_chip_info, diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 96d3c40932d8..69423b8965b3 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -121,10 +121,7 @@ static int smsc_phy_config_init(struct phy_device *phydev) /* Enable energy detect mode for this SMSC Transceivers */ rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS, rc | MII_LAN83C185_EDPWRDOWN); - if (rc < 0) - return rc; - - return smsc_phy_ack_interrupt(phydev); + return rc; } static int smsc_phy_reset(struct phy_device *phydev) @@ -146,11 +143,6 @@ static int smsc_phy_reset(struct phy_device *phydev) return genphy_soft_reset(phydev); } -static int lan911x_config_init(struct phy_device *phydev) -{ - return smsc_phy_ack_interrupt(phydev); -} - static int lan87xx_config_aneg(struct phy_device *phydev) { int rc; @@ -420,9 +412,6 @@ static struct phy_driver smsc_phy_driver[] = { .probe = smsc_phy_probe, - /* basic functions */ - .config_init = lan911x_config_init, - /* IRQ related */ .config_intr = smsc_phy_config_intr, .handle_interrupt = smsc_phy_handle_interrupt, diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 4a365f15533e..9206c660a72e 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2968,7 +2968,7 @@ ppp_unregister_channel(struct ppp_channel *chan) chan->ppp = NULL; /* - * This ensures that we have returned from any calls into the + * This ensures that we have returned from any calls into * the channel's start_xmit or ioctl routine before we proceed. */ down_write(&pch->chan_sem); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index b07dde6f0abf..aac133a1e27a 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -749,10 +749,10 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb) pcpu_stats = this_cpu_ptr(team->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); - pcpu_stats->rx_packets++; - pcpu_stats->rx_bytes += skb->len; + u64_stats_inc(&pcpu_stats->rx_packets); + u64_stats_add(&pcpu_stats->rx_bytes, skb->len); if (skb->pkt_type == PACKET_MULTICAST) - pcpu_stats->rx_multicast++; + u64_stats_inc(&pcpu_stats->rx_multicast); u64_stats_update_end(&pcpu_stats->syncp); skb->dev = team->dev; @@ -1720,8 +1720,8 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev) pcpu_stats = this_cpu_ptr(team->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); - pcpu_stats->tx_packets++; - pcpu_stats->tx_bytes += len; + u64_stats_inc(&pcpu_stats->tx_packets); + u64_stats_add(&pcpu_stats->tx_bytes, len); u64_stats_update_end(&pcpu_stats->syncp); } else { this_cpu_inc(team->pcpu_stats->tx_dropped); @@ -1854,11 +1854,11 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) p = per_cpu_ptr(team->pcpu_stats, i); do { start = u64_stats_fetch_begin_irq(&p->syncp); - rx_packets = p->rx_packets; - rx_bytes = p->rx_bytes; - rx_multicast = p->rx_multicast; - tx_packets = p->tx_packets; - tx_bytes = p->tx_bytes; + rx_packets = u64_stats_read(&p->rx_packets); + rx_bytes = u64_stats_read(&p->rx_bytes); + rx_multicast = u64_stats_read(&p->rx_multicast); + tx_packets = u64_stats_read(&p->tx_packets); + tx_bytes = u64_stats_read(&p->tx_bytes); } while (u64_stats_fetch_retry_irq(&p->syncp, start)); stats->rx_packets += rx_packets; @@ -1870,9 +1870,9 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) * rx_dropped, tx_dropped & rx_nohandler are u32, * updated without syncp protection. */ - rx_dropped += p->rx_dropped; - tx_dropped += p->tx_dropped; - rx_nohandler += p->rx_nohandler; + rx_dropped += READ_ONCE(p->rx_dropped); + tx_dropped += READ_ONCE(p->tx_dropped); + rx_nohandler += READ_ONCE(p->rx_nohandler); } stats->rx_dropped = rx_dropped; stats->tx_dropped = tx_dropped; diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index bd03e16f98a1..35110814ba22 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -2088,6 +2088,11 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0424, 0x9E08), .driver_info = (unsigned long) &smsc95xx_info, }, + { + /* Microchip's EVB-LAN8670-USB 10BASE-T1S Ethernet Device */ + USB_DEVICE(0x184F, 0x0051), + .driver_info = (unsigned long)&smsc95xx_info, + }, { }, /* END */ }; MODULE_DEVICE_TABLE(usb, products); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 1cb6dab3e2d0..713cfc2723b8 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -337,8 +337,8 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb) skb->protocol = eth_type_trans (skb, dev->net); flags = u64_stats_update_begin_irqsave(&stats64->syncp); - stats64->rx_packets++; - stats64->rx_bytes += skb->len; + u64_stats_inc(&stats64->rx_packets); + u64_stats_add(&stats64->rx_bytes, skb->len); u64_stats_update_end_irqrestore(&stats64->syncp, flags); netif_dbg(dev, rx_status, dev->net, "< rx, len %zu, type 0x%x\n", @@ -849,13 +849,11 @@ int usbnet_stop (struct net_device *net) mpn = !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags); - /* deferred work (task, timer, softirq) must also stop. - * can't flush_scheduled_work() until we drop rtnl (later), - * else workers could deadlock; so make workers a NOP. - */ + /* deferred work (timer, softirq, task) must also stop */ dev->flags = 0; del_timer_sync (&dev->delay); tasklet_kill (&dev->bh); + cancel_work_sync(&dev->kevent); if (!pm) usb_autopm_put_interface(dev->intf); @@ -1258,8 +1256,8 @@ static void tx_complete (struct urb *urb) unsigned long flags; flags = u64_stats_update_begin_irqsave(&stats64->syncp); - stats64->tx_packets += entry->packets; - stats64->tx_bytes += entry->length; + u64_stats_add(&stats64->tx_packets, entry->packets); + u64_stats_add(&stats64->tx_bytes, entry->length); u64_stats_update_end_irqrestore(&stats64->syncp, flags); } else { dev->net->stats.tx_errors++; @@ -1619,8 +1617,6 @@ void usbnet_disconnect (struct usb_interface *intf) net = dev->net; unregister_netdev (net); - cancel_work_sync(&dev->kevent); - usb_scuttle_anchored_urbs(&dev->deferred); if (dev->driver_info->unbind) diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile index 7a38925f4165..a666a88ac1ff 100644 --- a/drivers/net/vmxnet3/Makefile +++ b/drivers/net/vmxnet3/Makefile @@ -2,7 +2,7 @@ # # Linux driver for VMware's vmxnet3 ethernet NIC. # -# Copyright (C) 2007-2021, VMware, Inc. All Rights Reserved. +# Copyright (C) 2007-2022, VMware, Inc. All Rights Reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/upt1_defs.h b/drivers/net/vmxnet3/upt1_defs.h index f9f3a23d1698..41c0660a0c54 100644 --- a/drivers/net/vmxnet3/upt1_defs.h +++ b/drivers/net/vmxnet3/upt1_defs.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h index 74d4e8bc4abc..41d6767283a6 100644 --- a/drivers/net/vmxnet3/vmxnet3_defs.h +++ b/drivers/net/vmxnet3/vmxnet3_defs.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -40,7 +40,13 @@ enum { VMXNET3_REG_MACL = 0x28, /* MAC Address Low */ VMXNET3_REG_MACH = 0x30, /* MAC Address High */ VMXNET3_REG_ICR = 0x38, /* Interrupt Cause Register */ - VMXNET3_REG_ECR = 0x40 /* Event Cause Register */ + VMXNET3_REG_ECR = 0x40, /* Event Cause Register */ + VMXNET3_REG_DCR = 0x48, /* Device capability register, + * from 0x48 to 0x80 + */ + VMXNET3_REG_PTCR = 0x88, /* Passthru capbility register + * from 0x88 to 0xb0 + */ }; /* BAR 0 */ @@ -51,8 +57,18 @@ enum { VMXNET3_REG_RXPROD2 = 0xA00 /* Rx Producer Index for ring 2 */ }; -#define VMXNET3_PT_REG_SIZE 4096 /* BAR 0 */ -#define VMXNET3_VD_REG_SIZE 4096 /* BAR 1 */ +/* For Large PT BAR, the following offset to DB register */ +enum { + VMXNET3_REG_LB_TXPROD = 0x1000, /* Tx Producer Index */ + VMXNET3_REG_LB_RXPROD = 0x1400, /* Rx Producer Index for ring 1 */ + VMXNET3_REG_LB_RXPROD2 = 0x1800, /* Rx Producer Index for ring 2 */ +}; + +#define VMXNET3_PT_REG_SIZE 4096 /* BAR 0 */ +#define VMXNET3_LARGE_PT_REG_SIZE 8192 /* large PT pages */ +#define VMXNET3_VD_REG_SIZE 4096 /* BAR 1 */ +#define VMXNET3_LARGE_BAR0_REG_SIZE (4096 * 4096) /* LARGE BAR 0 */ +#define VMXNET3_OOB_REG_SIZE (4094 * 4096) /* OOB pages */ #define VMXNET3_REG_ALIGN 8 /* All registers are 8-byte aligned. */ #define VMXNET3_REG_ALIGN_MASK 0x7 @@ -83,6 +99,9 @@ enum { VMXNET3_CMD_SET_COALESCE, VMXNET3_CMD_REGISTER_MEMREGS, VMXNET3_CMD_SET_RSS_FIELDS, + VMXNET3_CMD_RESERVED4, + VMXNET3_CMD_RESERVED5, + VMXNET3_CMD_SET_RING_BUFFER_SIZE, VMXNET3_CMD_FIRST_GET = 0xF00D0000, VMXNET3_CMD_GET_QUEUE_STATUS = VMXNET3_CMD_FIRST_GET, @@ -101,6 +120,9 @@ enum { VMXNET3_CMD_GET_RESERVED2, VMXNET3_CMD_GET_RESERVED3, VMXNET3_CMD_GET_MAX_QUEUES_CONF, + VMXNET3_CMD_GET_RESERVED4, + VMXNET3_CMD_GET_MAX_CAPABILITIES, + VMXNET3_CMD_GET_DCR0_REG, }; /* @@ -126,17 +148,17 @@ struct Vmxnet3_TxDesc { #ifdef __BIG_ENDIAN_BITFIELD u32 msscof:14; /* MSS, checksum offset, flags */ - u32 ext1:1; + u32 ext1:1; /* set to 1 to indicate inner csum/tso, vmxnet3 v7 */ u32 dtype:1; /* descriptor type */ - u32 oco:1; + u32 oco:1; /* Outer csum offload */ u32 gen:1; /* generation bit */ u32 len:14; #else u32 len:14; u32 gen:1; /* generation bit */ - u32 oco:1; + u32 oco:1; /* Outer csum offload */ u32 dtype:1; /* descriptor type */ - u32 ext1:1; + u32 ext1:1; /* set to 1 to indicate inner csum/tso, vmxnet3 v7 */ u32 msscof:14; /* MSS, checksum offset, flags */ #endif /* __BIG_ENDIAN_BITFIELD */ @@ -240,11 +262,13 @@ struct Vmxnet3_RxCompDesc { u32 rqID:10; /* rx queue/ring ID */ u32 sop:1; /* Start of Packet */ u32 eop:1; /* End of Packet */ - u32 ext1:2; + u32 ext1:2; /* bit 0: indicating v4/v6/.. is for inner header */ + /* bit 1: indicating rssType is based on inner header */ u32 rxdIdx:12; /* Index of the RxDesc */ #else u32 rxdIdx:12; /* Index of the RxDesc */ - u32 ext1:2; + u32 ext1:2; /* bit 0: indicating v4/v6/.. is for inner header */ + /* bit 1: indicating rssType is based on inner header */ u32 eop:1; /* End of Packet */ u32 sop:1; /* Start of Packet */ u32 rqID:10; /* rx queue/ring ID */ @@ -378,6 +402,8 @@ union Vmxnet3_GenericDesc { /* max # of tx descs for a non-tso pkt */ #define VMXNET3_MAX_TXD_PER_PKT 16 +/* max # of tx descs for a tso pkt */ +#define VMXNET3_MAX_TSO_TXD_PER_PKT 24 /* Max size of a single rx buffer */ #define VMXNET3_MAX_RX_BUF_SIZE ((1 << 14) - 1) @@ -724,6 +750,13 @@ enum Vmxnet3_RSSField { VMXNET3_RSS_FIELDS_ESPIP6 = 0x0020, }; +struct Vmxnet3_RingBufferSize { + __le16 ring1BufSizeType0; + __le16 ring1BufSizeType1; + __le16 ring2BufSizeType1; + __le16 pad; +}; + /* If the command data <= 16 bytes, use the shared memory directly. * otherwise, use variable length configuration descriptor. */ @@ -731,6 +764,7 @@ union Vmxnet3_CmdInfo { struct Vmxnet3_VariableLenConfDesc varConf; struct Vmxnet3_SetPolling setPolling; enum Vmxnet3_RSSField setRssFields; + struct Vmxnet3_RingBufferSize ringBufSize; __le64 data[2]; }; @@ -801,4 +835,30 @@ struct Vmxnet3_DriverShared { #define VMXNET3_LINK_UP (10000 << 16 | 1) /* 10 Gbps, up */ #define VMXNET3_LINK_DOWN 0 +#define VMXNET3_DCR_ERROR 31 /* error when bit 31 of DCR is set */ +#define VMXNET3_CAP_UDP_RSS 0 /* bit 0 of DCR 0 */ +#define VMXNET3_CAP_ESP_RSS_IPV4 1 /* bit 1 of DCR 0 */ +#define VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD 2 /* bit 2 of DCR 0 */ +#define VMXNET3_CAP_GENEVE_TSO 3 /* bit 3 of DCR 0 */ +#define VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD 4 /* bit 4 of DCR 0 */ +#define VMXNET3_CAP_VXLAN_TSO 5 /* bit 5 of DCR 0 */ +#define VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD 6 /* bit 6 of DCR 0 */ +#define VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD 7 /* bit 7 of DCR 0 */ +#define VMXNET3_CAP_PKT_STEERING_IPV4 8 /* bit 8 of DCR 0 */ +#define VMXNET3_CAP_VERSION_4_MAX VMXNET3_CAP_PKT_STEERING_IPV4 +#define VMXNET3_CAP_ESP_RSS_IPV6 9 /* bit 9 of DCR 0 */ +#define VMXNET3_CAP_VERSION_5_MAX VMXNET3_CAP_ESP_RSS_IPV6 +#define VMXNET3_CAP_ESP_OVER_UDP_RSS 10 /* bit 10 of DCR 0 */ +#define VMXNET3_CAP_INNER_RSS 11 /* bit 11 of DCR 0 */ +#define VMXNET3_CAP_INNER_ESP_RSS 12 /* bit 12 of DCR 0 */ +#define VMXNET3_CAP_CRC32_HASH_FUNC 13 /* bit 13 of DCR 0 */ +#define VMXNET3_CAP_VERSION_6_MAX VMXNET3_CAP_CRC32_HASH_FUNC +#define VMXNET3_CAP_OAM_FILTER 14 /* bit 14 of DCR 0 */ +#define VMXNET3_CAP_ESP_QS 15 /* bit 15 of DCR 0 */ +#define VMXNET3_CAP_LARGE_BAR 16 /* bit 16 of DCR 0 */ +#define VMXNET3_CAP_OOORX_COMP 17 /* bit 17 of DCR 0 */ +#define VMXNET3_CAP_VERSION_7_MAX 18 +/* when new capability is introduced, update VMXNET3_CAP_MAX */ +#define VMXNET3_CAP_MAX VMXNET3_CAP_VERSION_7_MAX + #endif /* _VMXNET3_DEFS_H_ */ diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 93e8d119d45f..19c414733747 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -130,6 +130,20 @@ vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue)); } +/* Check if capability is supported by UPT device or + * UPT is even requested + */ +bool +vmxnet3_check_ptcapability(u32 cap_supported, u32 cap) +{ + if (cap_supported & (1UL << VMXNET3_DCR_ERROR) || + cap_supported & (1UL << cap)) { + return true; + } + + return false; +} + /* * Check the link state. This may start or stop the tx queue. @@ -571,6 +585,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, rbi = rbi_base + ring->next2fill; gd = ring->base + ring->next2fill; + rbi->comp_state = VMXNET3_RXD_COMP_PENDING; if (rbi->buf_type == VMXNET3_RX_BUF_SKB) { if (rbi->skb == NULL) { @@ -630,8 +645,10 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, /* Fill the last buffer but dont mark it ready, or else the * device will think that the queue is full */ - if (num_allocated == num_to_alloc) + if (num_allocated == num_to_alloc) { + rbi->comp_state = VMXNET3_RXD_COMP_DONE; break; + } gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT); num_allocated++; @@ -1044,6 +1061,23 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, } tq->stats.copy_skb_header++; } + if (unlikely(count > VMXNET3_MAX_TSO_TXD_PER_PKT)) { + /* tso pkts must not use more than + * VMXNET3_MAX_TSO_TXD_PER_PKT entries + */ + if (skb_linearize(skb) != 0) { + tq->stats.drop_too_many_frags++; + goto drop_pkt; + } + tq->stats.linearized++; + + /* recalculate the # of descriptors to use */ + count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1; + if (unlikely(count > VMXNET3_MAX_TSO_TXD_PER_PKT)) { + tq->stats.drop_too_many_frags++; + goto drop_pkt; + } + } if (skb->encapsulation) { vmxnet3_prepare_inner_tso(skb, &ctx); } else { @@ -1127,7 +1161,12 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, if (ctx.mss) { if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation) { gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size; - gdesc->txd.om = VMXNET3_OM_ENCAP; + if (VMXNET3_VERSION_GE_7(adapter)) { + gdesc->txd.om = VMXNET3_OM_TSO; + gdesc->txd.ext1 = 1; + } else { + gdesc->txd.om = VMXNET3_OM_ENCAP; + } gdesc->txd.msscof = ctx.mss; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) @@ -1144,8 +1183,15 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, skb->encapsulation) { gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size; - gdesc->txd.om = VMXNET3_OM_ENCAP; - gdesc->txd.msscof = 0; /* Reserved */ + if (VMXNET3_VERSION_GE_7(adapter)) { + gdesc->txd.om = VMXNET3_OM_CSUM; + gdesc->txd.msscof = ctx.l4_offset + + skb->csum_offset; + gdesc->txd.ext1 = 1; + } else { + gdesc->txd.om = VMXNET3_OM_ENCAP; + gdesc->txd.msscof = 0; /* Reserved */ + } } else { gdesc->txd.hlen = ctx.l4_offset; gdesc->txd.om = VMXNET3_OM_CSUM; @@ -1193,7 +1239,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) { tq->shared->txNumDeferred = 0; VMXNET3_WRITE_BAR0_REG(adapter, - VMXNET3_REG_TXPROD + tq->qid * 8, + adapter->tx_prod_offset + tq->qid * 8, tq->tx_ring.next2fill); } @@ -1345,14 +1391,15 @@ static int vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter, int quota) { - static const u32 rxprod_reg[2] = { - VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2 + u32 rxprod_reg[2] = { + adapter->rx_prod_offset, adapter->rx_prod2_offset }; u32 num_pkts = 0; bool skip_page_frags = false; struct Vmxnet3_RxCompDesc *rcd; struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; u16 segCnt = 0, mss = 0; + int comp_offset, fill_offset; #ifdef __BIG_ENDIAN_BITFIELD struct Vmxnet3_RxDesc rxCmdDesc; struct Vmxnet3_RxCompDesc rxComp; @@ -1625,9 +1672,15 @@ not_lro: rcd_done: /* device may have skipped some rx descs */ - ring->next2comp = idx; - num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring); ring = rq->rx_ring + ring_idx; + rbi->comp_state = VMXNET3_RXD_COMP_DONE; + + comp_offset = vmxnet3_cmd_ring_desc_avail(ring); + fill_offset = (idx > ring->next2fill ? 0 : ring->size) + + idx - ring->next2fill - 1; + if (!ring->isOutOfOrder || fill_offset >= comp_offset) + ring->next2comp = idx; + num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring); /* Ensure that the writes to rxd->gen bits will be observed * after all other writes to rxd objects. @@ -1635,18 +1688,38 @@ rcd_done: dma_wmb(); while (num_to_alloc) { - vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd, - &rxCmdDesc); - BUG_ON(!rxd->addr); + rbi = rq->buf_info[ring_idx] + ring->next2fill; + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_OOORX_COMP))) + goto refill_buf; + if (ring_idx == 0) { + /* ring0 Type1 buffers can get skipped; re-fill them */ + if (rbi->buf_type != VMXNET3_RX_BUF_SKB) + goto refill_buf; + } + if (rbi->comp_state == VMXNET3_RXD_COMP_DONE) { +refill_buf: + vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd, + &rxCmdDesc); + WARN_ON(!rxd->addr); + + /* Recv desc is ready to be used by the device */ + rxd->gen = ring->gen; + vmxnet3_cmd_ring_adv_next2fill(ring); + rbi->comp_state = VMXNET3_RXD_COMP_PENDING; + num_to_alloc--; + } else { + /* rx completion hasn't occurred */ + ring->isOutOfOrder = 1; + break; + } + } - /* Recv desc is ready to be used by the device */ - rxd->gen = ring->gen; - vmxnet3_cmd_ring_adv_next2fill(ring); - num_to_alloc--; + if (num_to_alloc == 0) { + ring->isOutOfOrder = 0; } /* if needed, update the register */ - if (unlikely(rq->shared->updateRxProd)) { + if (unlikely(rq->shared->updateRxProd) && (ring->next2fill & 0xf) == 0) { VMXNET3_WRITE_BAR0_REG(adapter, rxprod_reg[ring_idx] + rq->qid * 8, ring->next2fill); @@ -1810,6 +1883,7 @@ vmxnet3_rq_init(struct vmxnet3_rx_queue *rq, memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc)); rq->rx_ring[i].gen = VMXNET3_INIT_GEN; + rq->rx_ring[i].isOutOfOrder = 0; } if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1, adapter) == 0) { @@ -2000,8 +2074,17 @@ vmxnet3_poll_rx_only(struct napi_struct *napi, int budget) rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget); if (rxd_done < budget) { + struct Vmxnet3_RxCompDesc *rcd; +#ifdef __BIG_ENDIAN_BITFIELD + struct Vmxnet3_RxCompDesc rxComp; +#endif napi_complete_done(napi, rxd_done); vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx); + /* after unmasking the interrupt, check if any descriptors were completed */ + vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, + &rxComp); + if (rcd->gen == rq->comp_ring.gen && napi_reschedule(napi)) + vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx); } return rxd_done; } @@ -2627,6 +2710,23 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) } static void +vmxnet3_init_bufsize(struct vmxnet3_adapter *adapter) +{ + struct Vmxnet3_DriverShared *shared = adapter->shared; + union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo; + unsigned long flags; + + if (!VMXNET3_VERSION_GE_7(adapter)) + return; + + cmdInfo->ringBufSize = adapter->ringBufSize; + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_SET_RING_BUFFER_SIZE); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); +} + +static void vmxnet3_init_coalesce(struct vmxnet3_adapter *adapter) { struct Vmxnet3_DriverShared *shared = adapter->shared; @@ -2671,6 +2771,36 @@ vmxnet3_init_rssfields(struct vmxnet3_adapter *adapter) adapter->rss_fields = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); } else { + if (VMXNET3_VERSION_GE_7(adapter)) { + if ((adapter->rss_fields & VMXNET3_RSS_FIELDS_UDPIP4 || + adapter->rss_fields & VMXNET3_RSS_FIELDS_UDPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_UDP_RSS)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_UDP_RSS; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_UDP_RSS); + } + + if ((adapter->rss_fields & VMXNET3_RSS_FIELDS_ESPIP4) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV4)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV4; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV4); + } + + if ((adapter->rss_fields & VMXNET3_RSS_FIELDS_ESPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV6)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV6; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV6); + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + } cmdInfo->setRssFields = adapter->rss_fields; VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_SET_RSS_FIELDS); @@ -2734,14 +2864,15 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) goto activate_err; } + vmxnet3_init_bufsize(adapter); vmxnet3_init_coalesce(adapter); vmxnet3_init_rssfields(adapter); for (i = 0; i < adapter->num_rx_queues; i++) { VMXNET3_WRITE_BAR0_REG(adapter, - VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN, + adapter->rx_prod_offset + i * VMXNET3_REG_ALIGN, adapter->rx_queue[i].rx_ring[0].next2fill); - VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 + + VMXNET3_WRITE_BAR0_REG(adapter, (adapter->rx_prod2_offset + (i * VMXNET3_REG_ALIGN)), adapter->rx_queue[i].rx_ring[1].next2fill); } @@ -2907,19 +3038,29 @@ static void vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter) { size_t sz, i, ring0_size, ring1_size, comp_size; - if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE - - VMXNET3_MAX_ETH_HDR_SIZE) { - adapter->skb_buf_size = adapter->netdev->mtu + - VMXNET3_MAX_ETH_HDR_SIZE; - if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE) - adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE; - - adapter->rx_buf_per_pkt = 1; + /* With version7 ring1 will have only T0 buffers */ + if (!VMXNET3_VERSION_GE_7(adapter)) { + if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE - + VMXNET3_MAX_ETH_HDR_SIZE) { + adapter->skb_buf_size = adapter->netdev->mtu + + VMXNET3_MAX_ETH_HDR_SIZE; + if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE) + adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE; + + adapter->rx_buf_per_pkt = 1; + } else { + adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE; + sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE + + VMXNET3_MAX_ETH_HDR_SIZE; + adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE; + } } else { - adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE; - sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE + - VMXNET3_MAX_ETH_HDR_SIZE; - adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE; + adapter->skb_buf_size = min((int)adapter->netdev->mtu + VMXNET3_MAX_ETH_HDR_SIZE, + VMXNET3_MAX_SKB_BUF_SIZE); + adapter->rx_buf_per_pkt = 1; + adapter->ringBufSize.ring1BufSizeType0 = cpu_to_le16(adapter->skb_buf_size); + adapter->ringBufSize.ring1BufSizeType1 = 0; + adapter->ringBufSize.ring2BufSizeType1 = cpu_to_le16(PAGE_SIZE); } /* @@ -2935,6 +3076,11 @@ vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter) ring1_size = (ring1_size + sz - 1) / sz * sz; ring1_size = min_t(u32, ring1_size, VMXNET3_RX_RING2_MAX_SIZE / sz * sz); + /* For v7 and later, keep ring size power of 2 for UPT */ + if (VMXNET3_VERSION_GE_7(adapter)) { + ring0_size = rounddown_pow_of_two(ring0_size); + ring1_size = rounddown_pow_of_two(ring1_size); + } comp_size = ring0_size + ring1_size; for (i = 0; i < adapter->num_rx_queues; i++) { @@ -3185,6 +3331,54 @@ vmxnet3_declare_features(struct vmxnet3_adapter *adapter) NETIF_F_GSO_UDP_TUNNEL_CSUM; } + if (VMXNET3_VERSION_GE_7(adapter)) { + unsigned long flags; + + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD; + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_TSO)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_TSO))) { + netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL; + } + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD))) { + netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + } + netdev->vlan_features = netdev->hw_features & ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); @@ -3472,7 +3666,12 @@ vmxnet3_probe_device(struct pci_dev *pdev, goto err_alloc_pci; ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS); - if (ver & (1 << VMXNET3_REV_6)) { + if (ver & (1 << VMXNET3_REV_7)) { + VMXNET3_WRITE_BAR1_REG(adapter, + VMXNET3_REG_VRRS, + 1 << VMXNET3_REV_7); + adapter->version = VMXNET3_REV_7 + 1; + } else if (ver & (1 << VMXNET3_REV_6)) { VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1 << VMXNET3_REV_6); @@ -3520,6 +3719,39 @@ vmxnet3_probe_device(struct pci_dev *pdev, goto err_ver; } + if (VMXNET3_VERSION_GE_7(adapter)) { + adapter->devcap_supported[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_DCR); + adapter->ptcap_supported[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_PTCR); + if (adapter->devcap_supported[0] & (1UL << VMXNET3_CAP_LARGE_BAR)) { + adapter->dev_caps[0] = adapter->devcap_supported[0] & + (1UL << VMXNET3_CAP_LARGE_BAR); + } + if (!(adapter->ptcap_supported[0] & (1UL << VMXNET3_DCR_ERROR)) && + adapter->ptcap_supported[0] & (1UL << VMXNET3_CAP_OOORX_COMP) && + adapter->devcap_supported[0] & (1UL << VMXNET3_CAP_OOORX_COMP)) { + adapter->dev_caps[0] |= adapter->devcap_supported[0] & + (1UL << VMXNET3_CAP_OOORX_COMP); + } + if (adapter->dev_caps[0]) + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } + + if (VMXNET3_VERSION_GE_7(adapter) && + adapter->dev_caps[0] & (1UL << VMXNET3_CAP_LARGE_BAR)) { + adapter->tx_prod_offset = VMXNET3_REG_LB_TXPROD; + adapter->rx_prod_offset = VMXNET3_REG_LB_RXPROD; + adapter->rx_prod2_offset = VMXNET3_REG_LB_RXPROD2; + } else { + adapter->tx_prod_offset = VMXNET3_REG_TXPROD; + adapter->rx_prod_offset = VMXNET3_REG_RXPROD; + adapter->rx_prod2_offset = VMXNET3_REG_RXPROD2; + } + if (VMXNET3_VERSION_GE_6(adapter)) { spin_lock_irqsave(&adapter->cmd_lock, flags); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 3172d46c0335..c3eaf1b864ed 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -298,7 +298,7 @@ netdev_features_t vmxnet3_features_check(struct sk_buff *skb, return features; } -static void vmxnet3_enable_encap_offloads(struct net_device *netdev) +static void vmxnet3_enable_encap_offloads(struct net_device *netdev, netdev_features_t features) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); @@ -306,8 +306,56 @@ static void vmxnet3_enable_encap_offloads(struct net_device *netdev) netdev->hw_enc_features |= NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM; + NETIF_F_LRO; + if (features & NETIF_F_GSO_UDP_TUNNEL) + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; + if (features & NETIF_F_GSO_UDP_TUNNEL_CSUM) + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + if (VMXNET3_VERSION_GE_7(adapter)) { + unsigned long flags; + + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD; + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_TSO)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_TSO))) { + netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL; + } + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD))) { + netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM; + } } } @@ -322,6 +370,22 @@ static void vmxnet3_disable_encap_offloads(struct net_device *netdev) NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM); } + if (VMXNET3_VERSION_GE_7(adapter)) { + unsigned long flags; + + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD | + 1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD | + 1UL << VMXNET3_CAP_GENEVE_TSO | + 1UL << VMXNET3_CAP_VXLAN_TSO | + 1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD | + 1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD); + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } } int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features) @@ -357,8 +421,8 @@ int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features) adapter->shared->devRead.misc.uptFeatures &= ~UPT1_F_RXVLAN; - if ((features & tun_offload_mask) != 0 && !udp_tun_enabled) { - vmxnet3_enable_encap_offloads(netdev); + if ((features & tun_offload_mask) != 0) { + vmxnet3_enable_encap_offloads(netdev, features); adapter->shared->devRead.misc.uptFeatures |= UPT1_F_RXINNEROFLD; } else if ((features & tun_offload_mask) == 0 && @@ -462,7 +526,7 @@ vmxnet3_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) for (i = 0; i < adapter->num_tx_queues; i++) { struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i]; - buf[j++] = VMXNET3_READ_BAR0_REG(adapter, VMXNET3_REG_TXPROD + + buf[j++] = VMXNET3_READ_BAR0_REG(adapter, adapter->tx_prod_offset + i * VMXNET3_REG_ALIGN); buf[j++] = VMXNET3_GET_ADDR_LO(tq->tx_ring.basePA); @@ -490,9 +554,9 @@ vmxnet3_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) for (i = 0; i < adapter->num_rx_queues; i++) { struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i]; - buf[j++] = VMXNET3_READ_BAR0_REG(adapter, VMXNET3_REG_RXPROD + + buf[j++] = VMXNET3_READ_BAR0_REG(adapter, adapter->rx_prod_offset + i * VMXNET3_REG_ALIGN); - buf[j++] = VMXNET3_READ_BAR0_REG(adapter, VMXNET3_REG_RXPROD2 + + buf[j++] = VMXNET3_READ_BAR0_REG(adapter, adapter->rx_prod2_offset + i * VMXNET3_REG_ALIGN); buf[j++] = VMXNET3_GET_ADDR_LO(rq->rx_ring[0].basePA); @@ -660,6 +724,13 @@ vmxnet3_set_ringparam(struct net_device *netdev, new_rx_ring2_size = min_t(u32, new_rx_ring2_size, VMXNET3_RX_RING2_MAX_SIZE); + /* For v7 and later, keep ring size power of 2 for UPT */ + if (VMXNET3_VERSION_GE_7(adapter)) { + new_tx_ring_size = rounddown_pow_of_two(new_tx_ring_size); + new_rx_ring_size = rounddown_pow_of_two(new_rx_ring_size); + new_rx_ring2_size = rounddown_pow_of_two(new_rx_ring2_size); + } + /* rx data ring buffer size has to be a multiple of * VMXNET3_RXDATA_DESC_SIZE_ALIGN */ @@ -913,6 +984,39 @@ vmxnet3_set_rss_hash_opt(struct net_device *netdev, union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo; unsigned long flags; + if (VMXNET3_VERSION_GE_7(adapter)) { + if ((rss_fields & VMXNET3_RSS_FIELDS_UDPIP4 || + rss_fields & VMXNET3_RSS_FIELDS_UDPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_UDP_RSS)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_UDP_RSS; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_UDP_RSS); + } + if ((rss_fields & VMXNET3_RSS_FIELDS_ESPIP4) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV4)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV4; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV4); + } + if ((rss_fields & VMXNET3_RSS_FIELDS_ESPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV6)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV6; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV6); + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, + adapter->dev_caps[0]); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, + VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } spin_lock_irqsave(&adapter->cmd_lock, flags); cmdInfo->setRssFields = rss_fields; VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 7027ff483fa5..3367db23aa13 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -69,18 +69,19 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.6.0.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.7.0.0-k" /* Each byte of this 32-bit integer encodes a version number in * VMXNET3_DRIVER_VERSION_STRING. */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01060000 +#define VMXNET3_DRIVER_VERSION_NUM 0x01070000 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ #define VMXNET3_RSS #endif +#define VMXNET3_REV_7 6 /* Vmxnet3 Rev. 7 */ #define VMXNET3_REV_6 5 /* Vmxnet3 Rev. 6 */ #define VMXNET3_REV_5 4 /* Vmxnet3 Rev. 5 */ #define VMXNET3_REV_4 3 /* Vmxnet3 Rev. 4 */ @@ -135,6 +136,7 @@ struct vmxnet3_cmd_ring { u32 next2fill; u32 next2comp; u8 gen; + u8 isOutOfOrder; dma_addr_t basePA; }; @@ -259,9 +261,13 @@ enum vmxnet3_rx_buf_type { VMXNET3_RX_BUF_PAGE = 2 }; +#define VMXNET3_RXD_COMP_PENDING 0 +#define VMXNET3_RXD_COMP_DONE 1 + struct vmxnet3_rx_buf_info { enum vmxnet3_rx_buf_type buf_type; u16 len; + u8 comp_state; union { struct sk_buff *skb; struct page *page; @@ -402,6 +408,13 @@ struct vmxnet3_adapter { dma_addr_t pm_conf_pa; dma_addr_t rss_conf_pa; bool queuesExtEnabled; + struct Vmxnet3_RingBufferSize ringBufSize; + u32 devcap_supported[8]; + u32 ptcap_supported[8]; + u32 dev_caps[8]; + u16 tx_prod_offset; + u16 rx_prod_offset; + u16 rx_prod2_offset; }; #define VMXNET3_WRITE_BAR0_REG(adapter, reg, val) \ @@ -431,11 +444,13 @@ struct vmxnet3_adapter { (adapter->version >= VMXNET3_REV_5 + 1) #define VMXNET3_VERSION_GE_6(adapter) \ (adapter->version >= VMXNET3_REV_6 + 1) +#define VMXNET3_VERSION_GE_7(adapter) \ + (adapter->version >= VMXNET3_REV_7 + 1) /* must be a multiple of VMXNET3_RING_SIZE_ALIGN */ #define VMXNET3_DEF_TX_RING_SIZE 512 #define VMXNET3_DEF_RX_RING_SIZE 1024 -#define VMXNET3_DEF_RX_RING2_SIZE 256 +#define VMXNET3_DEF_RX_RING2_SIZE 512 #define VMXNET3_DEF_RXDATA_DESC_SIZE 128 @@ -494,6 +509,7 @@ void vmxnet3_set_ethtool_ops(struct net_device *netdev); void vmxnet3_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); +bool vmxnet3_check_ptcapability(u32 cap_supported, u32 cap); extern char vmxnet3_driver_name[]; #endif diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index cfc30ce4c6e1..40445a12c682 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -814,8 +814,8 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) */ if (rt6) { dst = &rt6->dst; - dev_replace_track(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); + netdev_ref_replace(dst->dev, net->loopback_dev, + &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } @@ -1061,8 +1061,8 @@ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) */ if (rth) { dst = &rth->dst; - dev_replace_track(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); + netdev_ref_replace(dst->dev, net->loopback_dev, + &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 265d4a0245e7..8b0710b576c2 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2385,15 +2385,15 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni); u64_stats_update_begin(&tx_stats->syncp); - tx_stats->tx_packets++; - tx_stats->tx_bytes += len; + u64_stats_inc(&tx_stats->tx_packets); + u64_stats_add(&tx_stats->tx_bytes, len); u64_stats_update_end(&tx_stats->syncp); vxlan_vnifilter_count(src_vxlan, vni, NULL, VXLAN_VNI_STATS_TX, len); if (__netif_rx(skb) == NET_RX_SUCCESS) { u64_stats_update_begin(&rx_stats->syncp); - rx_stats->rx_packets++; - rx_stats->rx_bytes += len; + u64_stats_inc(&rx_stats->rx_packets); + u64_stats_add(&rx_stats->rx_bytes, len); u64_stats_update_end(&rx_stats->syncp); vxlan_vnifilter_count(dst_vxlan, vni, NULL, VXLAN_VNI_STATS_RX, len); diff --git a/drivers/net/wan/farsync.h b/drivers/net/wan/farsync.h index 5f43568a9715..63908dbbb02d 100644 --- a/drivers/net/wan/farsync.h +++ b/drivers/net/wan/farsync.h @@ -43,7 +43,7 @@ * This version number is incremented with each official release of the * package and is a simplified number for normal user reference. * Individual files are tracked by the version control system and may - * have individual versions (or IDs) that move much faster than the + * have individual versions (or IDs) that move much faster than * the release version as individual updates are tracked. */ #define FST_USER_VERSION "1.04" diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 7b8df406c773..7135d51d2d87 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -19,15 +19,8 @@ /* Must be called with bh disabled. */ static void update_rx_stats(struct wg_peer *peer, size_t len) { - struct pcpu_sw_netstats *tstats = - get_cpu_ptr(peer->device->dev->tstats); - - u64_stats_update_begin(&tstats->syncp); - ++tstats->rx_packets; - tstats->rx_bytes += len; + dev_sw_netstats_rx_add(peer->device->dev, len); peer->rx_bytes += len; - u64_stats_update_end(&tstats->syncp); - put_cpu_ptr(tstats); } #define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 2f746eb64507..bd408d260e9c 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -290,8 +290,7 @@ static inline int hwsim_net_set_netgroup(struct net *net) { struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id); - hwsim_net->netgroup = ida_simple_get(&hwsim_netgroup_ida, - 0, 0, GFP_KERNEL); + hwsim_net->netgroup = ida_alloc(&hwsim_netgroup_ida, GFP_KERNEL); return hwsim_net->netgroup >= 0 ? 0 : -ENOMEM; } @@ -4733,7 +4732,7 @@ static void __net_exit hwsim_exit_net(struct net *net) NULL); } - ida_simple_remove(&hwsim_netgroup_ida, hwsim_net_get_netgroup(net)); + ida_free(&hwsim_netgroup_ida, hwsim_net_get_netgroup(net)); } static struct pernet_operations hwsim_net_ops = { diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 8d8378bafd9b..1ac4684fab25 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -20,9 +20,11 @@ static const struct ieee80211_txrx_stypes wilc_wfi_cfg80211_mgmt_types[NUM_NL80211_IFTYPES] = { [NL80211_IFTYPE_STATION] = { - .tx = 0xffff, + .tx = BIT(IEEE80211_STYPE_ACTION >> 4) | + BIT(IEEE80211_STYPE_AUTH >> 4), .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | - BIT(IEEE80211_STYPE_PROBE_REQ >> 4) + BIT(IEEE80211_STYPE_PROBE_REQ >> 4) | + BIT(IEEE80211_STYPE_AUTH >> 4) }, [NL80211_IFTYPE_AP] = { .tx = 0xffff, @@ -305,6 +307,7 @@ static int connect(struct wiphy *wiphy, struct net_device *dev, int ret; u32 i; u8 security = WILC_FW_SEC_NO; + enum mfptype mfp_type = WILC_FW_MFP_NONE; enum authtype auth_type = WILC_FW_AUTH_ANY; u32 cipher_group; struct cfg80211_bss *bss; @@ -313,32 +316,9 @@ static int connect(struct wiphy *wiphy, struct net_device *dev, vif->connecting = true; - memset(priv->wep_key, 0, sizeof(priv->wep_key)); - memset(priv->wep_key_len, 0, sizeof(priv->wep_key_len)); - cipher_group = sme->crypto.cipher_group; if (cipher_group != 0) { - if (cipher_group == WLAN_CIPHER_SUITE_WEP40) { - security = WILC_FW_SEC_WEP; - - priv->wep_key_len[sme->key_idx] = sme->key_len; - memcpy(priv->wep_key[sme->key_idx], sme->key, - sme->key_len); - - wilc_set_wep_default_keyid(vif, sme->key_idx); - wilc_add_wep_key_bss_sta(vif, sme->key, sme->key_len, - sme->key_idx); - } else if (cipher_group == WLAN_CIPHER_SUITE_WEP104) { - security = WILC_FW_SEC_WEP_EXTENDED; - - priv->wep_key_len[sme->key_idx] = sme->key_len; - memcpy(priv->wep_key[sme->key_idx], sme->key, - sme->key_len); - - wilc_set_wep_default_keyid(vif, sme->key_idx); - wilc_add_wep_key_bss_sta(vif, sme->key, sme->key_len, - sme->key_idx); - } else if (sme->crypto.wpa_versions & NL80211_WPA_VERSION_2) { + if (sme->crypto.wpa_versions & NL80211_WPA_VERSION_2) { if (cipher_group == WLAN_CIPHER_SUITE_TKIP) security = WILC_FW_SEC_WPA2_TKIP; else @@ -373,8 +353,14 @@ static int connect(struct wiphy *wiphy, struct net_device *dev, auth_type = WILC_FW_AUTH_OPEN_SYSTEM; break; - case NL80211_AUTHTYPE_SHARED_KEY: - auth_type = WILC_FW_AUTH_SHARED_KEY; + case NL80211_AUTHTYPE_SAE: + auth_type = WILC_FW_AUTH_SAE; + if (sme->ssid_len) { + memcpy(vif->auth.ssid.ssid, sme->ssid, sme->ssid_len); + vif->auth.ssid.ssid_len = sme->ssid_len; + } + vif->auth.key_mgmt_suite = cpu_to_be32(sme->crypto.akm_suites[0]); + ether_addr_copy(vif->auth.bssid, sme->bssid); break; default: @@ -384,6 +370,10 @@ static int connect(struct wiphy *wiphy, struct net_device *dev, if (sme->crypto.n_akm_suites) { if (sme->crypto.akm_suites[0] == WLAN_AKM_SUITE_8021X) auth_type = WILC_FW_AUTH_IEEE8021; + else if (sme->crypto.akm_suites[0] == WLAN_AKM_SUITE_PSK_SHA256) + auth_type = WILC_FW_AUTH_OPEN_SYSTEM_SHA256; + else if (sme->crypto.akm_suites[0] == WLAN_AKM_SUITE_8021X_SHA256) + auth_type = WILC_FW_AUTH_IEE8021X_SHA256; } if (wfi_drv->usr_scan_req.scan_result) { @@ -427,6 +417,13 @@ static int connect(struct wiphy *wiphy, struct net_device *dev, wfi_drv->conn_info.arg = priv; wfi_drv->conn_info.param = join_params; + if (sme->mfp == NL80211_MFP_OPTIONAL) + mfp_type = WILC_FW_MFP_OPTIONAL; + else if (sme->mfp == NL80211_MFP_REQUIRED) + mfp_type = WILC_FW_MFP_REQUIRED; + + wfi_drv->conn_info.mfp_type = mfp_type; + ret = wilc_set_join_req(vif, bss->bssid, sme->ie, sme->ie_len); if (ret) { netdev_err(dev, "wilc_set_join_req(): Error\n"); @@ -487,14 +484,6 @@ static int disconnect(struct wiphy *wiphy, struct net_device *dev, return ret; } -static inline void wilc_wfi_cfg_copy_wep_info(struct wilc_priv *priv, - u8 key_index, - struct key_params *params) -{ - priv->wep_key_len[key_index] = params->key_len; - memcpy(priv->wep_key[key_index], params->key, params->key_len); -} - static int wilc_wfi_cfg_allocate_wpa_entry(struct wilc_priv *priv, u8 idx) { if (!priv->wilc_gtk[idx]) { @@ -514,6 +503,18 @@ static int wilc_wfi_cfg_allocate_wpa_entry(struct wilc_priv *priv, u8 idx) return 0; } +static int wilc_wfi_cfg_allocate_wpa_igtk_entry(struct wilc_priv *priv, u8 idx) +{ + idx -= 4; + if (!priv->wilc_igtk[idx]) { + priv->wilc_igtk[idx] = kzalloc(sizeof(*priv->wilc_igtk[idx]), + GFP_KERNEL); + if (!priv->wilc_igtk[idx]) + return -ENOMEM; + } + return 0; +} + static int wilc_wfi_cfg_copy_wpa_info(struct wilc_wfi_key *key_info, struct key_params *params) { @@ -550,35 +551,9 @@ static int add_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, u8 op_mode; struct wilc_vif *vif = netdev_priv(netdev); struct wilc_priv *priv = &vif->priv; + struct wilc_wfi_key *key; switch (params->cipher) { - case WLAN_CIPHER_SUITE_WEP40: - case WLAN_CIPHER_SUITE_WEP104: - if (priv->wdev.iftype == NL80211_IFTYPE_AP) { - wilc_wfi_cfg_copy_wep_info(priv, key_index, params); - - if (params->cipher == WLAN_CIPHER_SUITE_WEP40) - mode = WILC_FW_SEC_WEP; - else - mode = WILC_FW_SEC_WEP_EXTENDED; - - ret = wilc_add_wep_key_bss_ap(vif, params->key, - params->key_len, - key_index, mode, - WILC_FW_AUTH_OPEN_SYSTEM); - break; - } - if (memcmp(params->key, priv->wep_key[key_index], - params->key_len)) { - wilc_wfi_cfg_copy_wep_info(priv, key_index, params); - - ret = wilc_add_wep_key_bss_sta(vif, params->key, - params->key_len, - key_index); - } - - break; - case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_CCMP: if (priv->wdev.iftype == NL80211_IFTYPE_AP || @@ -640,6 +615,26 @@ static int add_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, key_index); break; + case WLAN_CIPHER_SUITE_AES_CMAC: + ret = wilc_wfi_cfg_allocate_wpa_igtk_entry(priv, key_index); + if (ret) + return -ENOMEM; + + key = priv->wilc_igtk[key_index - 4]; + ret = wilc_wfi_cfg_copy_wpa_info(key, params); + if (ret) + return -ENOMEM; + + if (priv->wdev.iftype == NL80211_IFTYPE_AP || + priv->wdev.iftype == NL80211_IFTYPE_P2P_GO) + op_mode = WILC_AP_MODE; + else + op_mode = WILC_STATION_MODE; + + ret = wilc_add_igtk(vif, params->key, keylen, params->seq, + params->seq_len, mac_addr, op_mode, + key_index); + break; default: netdev_err(netdev, "%s: Unsupported cipher\n", __func__); @@ -657,30 +652,34 @@ static int del_key(struct wiphy *wiphy, struct net_device *netdev, struct wilc_vif *vif = netdev_priv(netdev); struct wilc_priv *priv = &vif->priv; - if (priv->wilc_gtk[key_index]) { - kfree(priv->wilc_gtk[key_index]->key); - priv->wilc_gtk[key_index]->key = NULL; - kfree(priv->wilc_gtk[key_index]->seq); - priv->wilc_gtk[key_index]->seq = NULL; - - kfree(priv->wilc_gtk[key_index]); - priv->wilc_gtk[key_index] = NULL; - } - - if (priv->wilc_ptk[key_index]) { - kfree(priv->wilc_ptk[key_index]->key); - priv->wilc_ptk[key_index]->key = NULL; - kfree(priv->wilc_ptk[key_index]->seq); - priv->wilc_ptk[key_index]->seq = NULL; - kfree(priv->wilc_ptk[key_index]); - priv->wilc_ptk[key_index] = NULL; - } - - if (key_index <= 3 && priv->wep_key_len[key_index]) { - memset(priv->wep_key[key_index], 0, - priv->wep_key_len[key_index]); - priv->wep_key_len[key_index] = 0; - wilc_remove_wep_key(vif, key_index); + if (!pairwise && (key_index == 4 || key_index == 5)) { + key_index -= 4; + if (priv->wilc_igtk[key_index]) { + kfree(priv->wilc_igtk[key_index]->key); + priv->wilc_igtk[key_index]->key = NULL; + kfree(priv->wilc_igtk[key_index]->seq); + priv->wilc_igtk[key_index]->seq = NULL; + kfree(priv->wilc_igtk[key_index]); + priv->wilc_igtk[key_index] = NULL; + } + } else { + if (priv->wilc_gtk[key_index]) { + kfree(priv->wilc_gtk[key_index]->key); + priv->wilc_gtk[key_index]->key = NULL; + kfree(priv->wilc_gtk[key_index]->seq); + priv->wilc_gtk[key_index]->seq = NULL; + + kfree(priv->wilc_gtk[key_index]); + priv->wilc_gtk[key_index] = NULL; + } + if (priv->wilc_ptk[key_index]) { + kfree(priv->wilc_ptk[key_index]->key); + priv->wilc_ptk[key_index]->key = NULL; + kfree(priv->wilc_ptk[key_index]->seq); + priv->wilc_ptk[key_index]->seq = NULL; + kfree(priv->wilc_ptk[key_index]); + priv->wilc_ptk[key_index] = NULL; + } } return 0; @@ -695,11 +694,20 @@ static int get_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, struct key_params key_params; if (!pairwise) { - key_params.key = priv->wilc_gtk[key_index]->key; - key_params.cipher = priv->wilc_gtk[key_index]->cipher; - key_params.key_len = priv->wilc_gtk[key_index]->key_len; - key_params.seq = priv->wilc_gtk[key_index]->seq; - key_params.seq_len = priv->wilc_gtk[key_index]->seq_len; + if (key_index == 4 || key_index == 5) { + key_index -= 4; + key_params.key = priv->wilc_igtk[key_index]->key; + key_params.cipher = priv->wilc_igtk[key_index]->cipher; + key_params.key_len = priv->wilc_igtk[key_index]->key_len; + key_params.seq = priv->wilc_igtk[key_index]->seq; + key_params.seq_len = priv->wilc_igtk[key_index]->seq_len; + } else { + key_params.key = priv->wilc_gtk[key_index]->key; + key_params.cipher = priv->wilc_gtk[key_index]->cipher; + key_params.key_len = priv->wilc_gtk[key_index]->key_len; + key_params.seq = priv->wilc_gtk[key_index]->seq; + key_params.seq_len = priv->wilc_gtk[key_index]->seq_len; + } } else { key_params.key = priv->wilc_ptk[key_index]->key; key_params.cipher = priv->wilc_ptk[key_index]->cipher; @@ -713,14 +721,19 @@ static int get_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, return 0; } +/* wiphy_new_nm() will WARNON if not present */ static int set_default_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, bool unicast, bool multicast) { - struct wilc_vif *vif = netdev_priv(netdev); + return 0; +} - wilc_set_wep_default_keyid(vif, key_index); +static int set_default_mgmt_key(struct wiphy *wiphy, struct net_device *netdev, + u8 key_index) +{ + struct wilc_vif *vif = netdev_priv(netdev); - return 0; + return wilc_set_default_mgmt_key_index(vif, key_index); } static int get_station(struct wiphy *wiphy, struct net_device *dev, @@ -977,6 +990,18 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) } } +bool wilc_wfi_mgmt_frame_rx(struct wilc_vif *vif, u8 *buff, u32 size) +{ + struct wilc *wl = vif->wilc; + struct wilc_priv *priv = &vif->priv; + int freq, ret; + + freq = ieee80211_channel_to_frequency(wl->op_ch, NL80211_BAND_2GHZ); + ret = cfg80211_rx_mgmt(&priv->wdev, freq, 0, buff, size, 0); + + return ret; +} + void wilc_wfi_p2p_rx(struct wilc_vif *vif, u8 *buff, u32 size) { struct wilc *wl = vif->wilc; @@ -1162,8 +1187,14 @@ static int mgmt_tx(struct wiphy *wiphy, goto out_txq_add_pkt; } - if (!ieee80211_is_public_action((struct ieee80211_hdr *)buf, len)) + if (!ieee80211_is_public_action((struct ieee80211_hdr *)buf, len)) { + if (chan) + wilc_set_mac_chnl_num(vif, chan->hw_value); + else + wilc_set_mac_chnl_num(vif, vif->wilc->op_ch); + goto out_set_timeout; + } d = (struct wilc_p2p_pub_act_frame *)(&mgmt->u.action); if (d->oui_type != WLAN_OUI_TYPE_WFA_P2P || @@ -1230,6 +1261,7 @@ void wilc_update_mgmt_frame_registrations(struct wiphy *wiphy, struct wilc_vif *vif = netdev_priv(wdev->netdev); u32 presp_bit = BIT(IEEE80211_STYPE_PROBE_REQ >> 4); u32 action_bit = BIT(IEEE80211_STYPE_ACTION >> 4); + u32 pauth_bit = BIT(IEEE80211_STYPE_AUTH >> 4); if (wl->initialized) { bool prev = vif->mgmt_reg_stypes & presp_bit; @@ -1243,10 +1275,26 @@ void wilc_update_mgmt_frame_registrations(struct wiphy *wiphy, if (now != prev) wilc_frame_register(vif, IEEE80211_STYPE_ACTION, now); + + prev = vif->mgmt_reg_stypes & pauth_bit; + now = upd->interface_stypes & pauth_bit; + if (now != prev) + wilc_frame_register(vif, IEEE80211_STYPE_AUTH, now); } vif->mgmt_reg_stypes = - upd->interface_stypes & (presp_bit | action_bit); + upd->interface_stypes & (presp_bit | action_bit | pauth_bit); +} + +static int external_auth(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_external_auth_params *auth) +{ + struct wilc_vif *vif = netdev_priv(dev); + + if (auth->status == WLAN_STATUS_SUCCESS) + wilc_set_external_auth_param(vif, auth); + + return 0; } static int set_cqm_rssi_config(struct wiphy *wiphy, struct net_device *dev, @@ -1647,6 +1695,7 @@ static const struct cfg80211_ops wilc_cfg80211_ops = { .del_key = del_key, .get_key = get_key, .set_default_key = set_default_key, + .set_default_mgmt_key = set_default_mgmt_key, .add_virtual_intf = add_virtual_intf, .del_virtual_intf = del_virtual_intf, .change_virtual_intf = change_virtual_intf, @@ -1662,6 +1711,7 @@ static const struct cfg80211_ops wilc_cfg80211_ops = { .change_bss = change_bss, .set_wiphy_params = set_wiphy_params, + .external_auth = external_auth, .set_pmksa = set_pmksa, .del_pmksa = del_pmksa, .flush_pmksa = flush_pmksa, @@ -1804,7 +1854,7 @@ struct wilc *wilc_create_wiphy(struct device *dev) BIT(NL80211_IFTYPE_P2P_GO) | BIT(NL80211_IFTYPE_P2P_CLIENT); wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL; - + wiphy->features |= NL80211_FEATURE_SAE; set_wiphy_dev(wiphy, dev); wl->wiphy = wiphy; ret = wiphy_register(wiphy); diff --git a/drivers/net/wireless/microchip/wilc1000/fw.h b/drivers/net/wireless/microchip/wilc1000/fw.h index 1114530d03e4..5c5cac4aab02 100644 --- a/drivers/net/wireless/microchip/wilc1000/fw.h +++ b/drivers/net/wireless/microchip/wilc1000/fw.h @@ -41,21 +41,23 @@ struct wilc_drv_handler { u8 mode; } __packed; -struct wilc_wep_key { - u8 index; +struct wilc_sta_wpa_ptk { + u8 mac_addr[ETH_ALEN]; u8 key_len; u8 key[]; } __packed; -struct wilc_sta_wpa_ptk { +struct wilc_ap_wpa_ptk { u8 mac_addr[ETH_ALEN]; + u8 index; u8 key_len; u8 key[]; } __packed; -struct wilc_ap_wpa_ptk { - u8 mac_addr[ETH_ALEN]; +struct wilc_wpa_igtk { u8 index; + u8 pn_len; + u8 pn[6]; u8 key_len; u8 key[]; } __packed; @@ -116,4 +118,13 @@ struct wilc_join_bss_param { struct wilc_noa_opp_enable opp_en; }; } __packed; + +struct wilc_external_auth_param { + u8 action; + u8 bssid[ETH_ALEN]; + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + __le32 key_mgmt_suites; + __le16 status; +} __packed; #endif diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index 71b44cfe0dfc..4038a254465f 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -271,12 +271,19 @@ error: static int wilc_send_connect_wid(struct wilc_vif *vif) { int result = 0; - struct wid wid_list[4]; + struct wid wid_list[5]; u32 wid_cnt = 0; struct host_if_drv *hif_drv = vif->hif_drv; struct wilc_conn_info *conn_attr = &hif_drv->conn_info; struct wilc_join_bss_param *bss_param = conn_attr->param; + + wid_list[wid_cnt].id = WID_SET_MFP; + wid_list[wid_cnt].type = WID_CHAR; + wid_list[wid_cnt].size = sizeof(char); + wid_list[wid_cnt].val = (s8 *)&conn_attr->mfp_type; + wid_cnt++; + wid_list[wid_cnt].id = WID_INFO_ELEMENT_ASSOCIATE; wid_list[wid_cnt].type = WID_BIN_DATA; wid_list[wid_cnt].val = conn_attr->req_ies; @@ -306,7 +313,10 @@ static int wilc_send_connect_wid(struct wilc_vif *vif) netdev_err(vif->ndev, "failed to send config packet\n"); goto error; } else { - hif_drv->hif_state = HOST_IF_WAITING_CONN_RESP; + if (conn_attr->auth_type == WILC_FW_AUTH_SAE) + hif_drv->hif_state = HOST_IF_EXTERNAL_AUTH; + else + hif_drv->hif_state = HOST_IF_WAITING_CONN_RESP; } return 0; @@ -665,7 +675,12 @@ static void handle_rcvd_gnrl_async_info(struct work_struct *work) goto free_msg; } - if (hif_drv->hif_state == HOST_IF_WAITING_CONN_RESP) { + + if (hif_drv->hif_state == HOST_IF_EXTERNAL_AUTH) { + cfg80211_external_auth_request(vif->ndev, &vif->auth, + GFP_KERNEL); + hif_drv->hif_state = HOST_IF_WAITING_CONN_RESP; + } else if (hif_drv->hif_state == HOST_IF_WAITING_CONN_RESP) { host_int_parse_assoc_resp_info(vif, mac_info->status); } else if (mac_info->status == WILC_MAC_STATUS_DISCONNECTED) { if (hif_drv->hif_state == HOST_IF_CONNECTED) { @@ -710,7 +725,8 @@ int wilc_disconnect(struct wilc_vif *vif) } if (conn_info->conn_result) { - if (hif_drv->hif_state == HOST_IF_WAITING_CONN_RESP) + if (hif_drv->hif_state == HOST_IF_WAITING_CONN_RESP || + hif_drv->hif_state == HOST_IF_EXTERNAL_AUTH) del_timer(&hif_drv->connect_timer); conn_info->conn_result(CONN_DISCONN_EVENT_DISCONN_NOTIF, 0, @@ -986,6 +1002,31 @@ void wilc_set_wowlan_trigger(struct wilc_vif *vif, bool enabled) pr_err("Failed to send wowlan trigger config packet\n"); } +int wilc_set_external_auth_param(struct wilc_vif *vif, + struct cfg80211_external_auth_params *auth) +{ + int ret; + struct wid wid; + struct wilc_external_auth_param *param; + + wid.id = WID_EXTERNAL_AUTH_PARAM; + wid.type = WID_BIN_DATA; + wid.size = sizeof(*param); + param = kzalloc(sizeof(*param), GFP_KERNEL); + if (!param) + return -EINVAL; + + wid.val = (u8 *)param; + param->action = auth->action; + ether_addr_copy(param->bssid, auth->bssid); + memcpy(param->ssid, auth->ssid.ssid, auth->ssid.ssid_len); + param->ssid_len = auth->ssid.ssid_len; + ret = wilc_send_config_pkt(vif, WILC_SET_CFG, &wid, 1); + + kfree(param); + return ret; +} + static void handle_scan_timer(struct work_struct *work) { struct host_if_msg *msg = container_of(work, struct host_if_msg, work); @@ -1038,108 +1079,6 @@ static void timer_connect_cb(struct timer_list *t) kfree(msg); } -int wilc_remove_wep_key(struct wilc_vif *vif, u8 index) -{ - struct wid wid; - int result; - - wid.id = WID_REMOVE_WEP_KEY; - wid.type = WID_STR; - wid.size = sizeof(char); - wid.val = &index; - - result = wilc_send_config_pkt(vif, WILC_SET_CFG, &wid, 1); - if (result) - netdev_err(vif->ndev, - "Failed to send remove wep key config packet\n"); - return result; -} - -int wilc_set_wep_default_keyid(struct wilc_vif *vif, u8 index) -{ - struct wid wid; - int result; - - wid.id = WID_KEY_ID; - wid.type = WID_CHAR; - wid.size = sizeof(char); - wid.val = &index; - result = wilc_send_config_pkt(vif, WILC_SET_CFG, &wid, 1); - if (result) - netdev_err(vif->ndev, - "Failed to send wep default key config packet\n"); - - return result; -} - -int wilc_add_wep_key_bss_sta(struct wilc_vif *vif, const u8 *key, u8 len, - u8 index) -{ - struct wid wid; - int result; - struct wilc_wep_key *wep_key; - - wid.id = WID_ADD_WEP_KEY; - wid.type = WID_STR; - wid.size = sizeof(*wep_key) + len; - wep_key = kzalloc(wid.size, GFP_KERNEL); - if (!wep_key) - return -ENOMEM; - - wid.val = (u8 *)wep_key; - - wep_key->index = index; - wep_key->key_len = len; - memcpy(wep_key->key, key, len); - - result = wilc_send_config_pkt(vif, WILC_SET_CFG, &wid, 1); - if (result) - netdev_err(vif->ndev, - "Failed to add wep key config packet\n"); - - kfree(wep_key); - return result; -} - -int wilc_add_wep_key_bss_ap(struct wilc_vif *vif, const u8 *key, u8 len, - u8 index, u8 mode, enum authtype auth_type) -{ - struct wid wid_list[3]; - int result; - struct wilc_wep_key *wep_key; - - wid_list[0].id = WID_11I_MODE; - wid_list[0].type = WID_CHAR; - wid_list[0].size = sizeof(char); - wid_list[0].val = &mode; - - wid_list[1].id = WID_AUTH_TYPE; - wid_list[1].type = WID_CHAR; - wid_list[1].size = sizeof(char); - wid_list[1].val = (s8 *)&auth_type; - - wid_list[2].id = WID_WEP_KEY_VALUE; - wid_list[2].type = WID_STR; - wid_list[2].size = sizeof(*wep_key) + len; - wep_key = kzalloc(wid_list[2].size, GFP_KERNEL); - if (!wep_key) - return -ENOMEM; - - wid_list[2].val = (u8 *)wep_key; - - wep_key->index = index; - wep_key->key_len = len; - memcpy(wep_key->key, key, len); - result = wilc_send_config_pkt(vif, WILC_SET_CFG, wid_list, - ARRAY_SIZE(wid_list)); - if (result) - netdev_err(vif->ndev, - "Failed to add wep ap key config packet\n"); - - kfree(wep_key); - return result; -} - int wilc_add_ptk(struct wilc_vif *vif, const u8 *ptk, u8 ptk_key_len, const u8 *mac_addr, const u8 *rx_mic, const u8 *tx_mic, u8 mode, u8 cipher_mode, u8 index) @@ -1211,6 +1150,36 @@ int wilc_add_ptk(struct wilc_vif *vif, const u8 *ptk, u8 ptk_key_len, return result; } +int wilc_add_igtk(struct wilc_vif *vif, const u8 *igtk, u8 igtk_key_len, + const u8 *pn, u8 pn_len, const u8 *mac_addr, u8 mode, u8 index) +{ + int result = 0; + u8 t_key_len = igtk_key_len; + struct wid wid; + struct wilc_wpa_igtk *key_buf; + + key_buf = kzalloc(sizeof(*key_buf) + t_key_len, GFP_KERNEL); + if (!key_buf) + return -ENOMEM; + + key_buf->index = index; + + memcpy(&key_buf->pn[0], pn, pn_len); + key_buf->pn_len = pn_len; + + memcpy(&key_buf->key[0], igtk, igtk_key_len); + key_buf->key_len = t_key_len; + + wid.id = WID_ADD_IGTK; + wid.type = WID_STR; + wid.size = sizeof(*key_buf) + t_key_len; + wid.val = (s8 *)key_buf; + result = wilc_send_config_pkt(vif, WILC_SET_CFG, &wid, 1); + kfree(key_buf); + + return result; +} + int wilc_add_rx_gtk(struct wilc_vif *vif, const u8 *rx_gtk, u8 gtk_key_len, u8 index, u32 key_rsc_len, const u8 *key_rsc, const u8 *rx_mic, const u8 *tx_mic, u8 mode, @@ -1749,6 +1718,10 @@ void wilc_frame_register(struct wilc_vif *vif, u16 frame_type, bool reg) reg_frame.reg_id = WILC_FW_PROBE_REQ_IDX; break; + case IEEE80211_STYPE_AUTH: + reg_frame.reg_id = WILC_FW_AUTH_REQ_IDX; + break; + default: break; } @@ -1996,3 +1969,20 @@ int wilc_get_tx_power(struct wilc_vif *vif, u8 *tx_power) return wilc_send_config_pkt(vif, WILC_GET_CFG, &wid, 1); } + +int wilc_set_default_mgmt_key_index(struct wilc_vif *vif, u8 index) +{ + struct wid wid; + int result; + + wid.id = WID_DEFAULT_MGMT_KEY_ID; + wid.type = WID_CHAR; + wid.size = sizeof(char); + wid.val = &index; + result = wilc_send_config_pkt(vif, WILC_SET_CFG, &wid, 1); + if (result) + netdev_err(vif->ndev, + "Failed to send default mgmt key index\n"); + + return result; +} diff --git a/drivers/net/wireless/microchip/wilc1000/hif.h b/drivers/net/wireless/microchip/wilc1000/hif.h index 77616fc77575..d8dd94dcfe14 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.h +++ b/drivers/net/wireless/microchip/wilc1000/hif.h @@ -47,6 +47,7 @@ enum host_if_state { HOST_IF_WAITING_CONN_RESP = 3, HOST_IF_CONNECTED = 4, HOST_IF_P2P_LISTEN = 5, + HOST_IF_EXTERNAL_AUTH = 6, HOST_IF_FORCE_32BIT = 0xFFFFFFFF }; @@ -107,6 +108,7 @@ struct wilc_conn_info { u8 bssid[ETH_ALEN]; u8 security; enum authtype auth_type; + enum mfptype mfp_type; u8 ch; u8 *req_ies; size_t req_ies_len; @@ -151,15 +153,12 @@ struct host_if_drv { }; struct wilc_vif; -int wilc_remove_wep_key(struct wilc_vif *vif, u8 index); -int wilc_set_wep_default_keyid(struct wilc_vif *vif, u8 index); -int wilc_add_wep_key_bss_sta(struct wilc_vif *vif, const u8 *key, u8 len, - u8 index); -int wilc_add_wep_key_bss_ap(struct wilc_vif *vif, const u8 *key, u8 len, - u8 index, u8 mode, enum authtype auth_type); int wilc_add_ptk(struct wilc_vif *vif, const u8 *ptk, u8 ptk_key_len, const u8 *mac_addr, const u8 *rx_mic, const u8 *tx_mic, u8 mode, u8 cipher_mode, u8 index); +int wilc_add_igtk(struct wilc_vif *vif, const u8 *igtk, u8 igtk_key_len, + const u8 *pn, u8 pn_len, const u8 *mac_addr, u8 mode, + u8 index); s32 wilc_get_inactive_time(struct wilc_vif *vif, const u8 *mac, u32 *out_val); int wilc_add_rx_gtk(struct wilc_vif *vif, const u8 *rx_gtk, u8 gtk_key_len, @@ -208,9 +207,12 @@ int wilc_get_vif_idx(struct wilc_vif *vif); int wilc_set_tx_power(struct wilc_vif *vif, u8 tx_power); int wilc_get_tx_power(struct wilc_vif *vif, u8 *tx_power); void wilc_set_wowlan_trigger(struct wilc_vif *vif, bool enabled); +int wilc_set_external_auth_param(struct wilc_vif *vif, + struct cfg80211_external_auth_params *param); void wilc_scan_complete_received(struct wilc *wilc, u8 *buffer, u32 length); void wilc_network_info_received(struct wilc *wilc, u8 *buffer, u32 length); void wilc_gnrl_async_info_received(struct wilc *wilc, u8 *buffer, u32 length); void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, struct cfg80211_crypto_settings *crypto); +int wilc_set_default_mgmt_key_index(struct wilc_vif *vif, u8 index); #endif diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c index 3c292e3464c2..fcc4e61592ee 100644 --- a/drivers/net/wireless/microchip/wilc1000/netdev.c +++ b/drivers/net/wireless/microchip/wilc1000/netdev.c @@ -835,15 +835,24 @@ void wilc_frmw_to_host(struct wilc *wilc, u8 *buff, u32 size, } } -void wilc_wfi_mgmt_rx(struct wilc *wilc, u8 *buff, u32 size) +void wilc_wfi_mgmt_rx(struct wilc *wilc, u8 *buff, u32 size, bool is_auth) { int srcu_idx; struct wilc_vif *vif; srcu_idx = srcu_read_lock(&wilc->srcu); list_for_each_entry_rcu(vif, &wilc->vif_list, list) { + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buff; u16 type = le16_to_cpup((__le16 *)buff); u32 type_bit = BIT(type >> 4); + u32 auth_bit = BIT(IEEE80211_STYPE_AUTH >> 4); + + if ((vif->mgmt_reg_stypes & auth_bit && + ieee80211_is_auth(mgmt->frame_control)) && + vif->iftype == WILC_STATION_MODE && is_auth) { + wilc_wfi_mgmt_frame_rx(vif, buff, size); + break; + } if (vif->priv.p2p_listen_state && vif->mgmt_reg_stypes & type_bit) diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.h b/drivers/net/wireless/microchip/wilc1000/netdev.h index a067274c2014..822e65d00f53 100644 --- a/drivers/net/wireless/microchip/wilc1000/netdev.h +++ b/drivers/net/wireless/microchip/wilc1000/netdev.h @@ -45,12 +45,6 @@ struct wilc_wfi_key { u32 cipher; }; -struct wilc_wfi_wep_key { - u8 *key; - u8 key_len; - u8 key_idx; -}; - struct sta_info { u8 sta_associated_bss[WILC_MAX_NUM_STA][ETH_ALEN]; }; @@ -63,8 +57,6 @@ struct wilc_wfi_p2p_listen_params { }; static const u32 wilc_cipher_suites[] = { - WLAN_CIPHER_SUITE_WEP40, - WLAN_CIPHER_SUITE_WEP104, WLAN_CIPHER_SUITE_TKIP, WLAN_CIPHER_SUITE_CCMP, WLAN_CIPHER_SUITE_AES_CMAC @@ -132,13 +124,12 @@ struct wilc_priv { struct net_device *dev; struct host_if_drv *hif_drv; struct wilc_pmkid_attr pmkid_list; - u8 wep_key[4][WLAN_KEY_LEN_WEP104]; - u8 wep_key_len[4]; /* The real interface that the monitor is on */ struct net_device *real_ndev; struct wilc_wfi_key *wilc_gtk[WILC_MAX_NUM_STA]; struct wilc_wfi_key *wilc_ptk[WILC_MAX_NUM_STA]; + struct wilc_wfi_key *wilc_igtk[2]; u8 wilc_groupkey; /* mutexes */ @@ -195,6 +186,7 @@ struct wilc_vif { struct wilc_priv priv; struct list_head list; struct cfg80211_bss *bss; + struct cfg80211_external_auth_params auth; }; struct wilc_tx_queue_status { @@ -288,7 +280,7 @@ struct wilc_wfi_mon_priv { void wilc_frmw_to_host(struct wilc *wilc, u8 *buff, u32 size, u32 pkt_offset); void wilc_mac_indicate(struct wilc *wilc); void wilc_netdev_cleanup(struct wilc *wilc); -void wilc_wfi_mgmt_rx(struct wilc *wilc, u8 *buff, u32 size); +void wilc_wfi_mgmt_rx(struct wilc *wilc, u8 *buff, u32 size, bool is_auth); void wilc_wlan_set_bssid(struct net_device *wilc_netdev, const u8 *bssid, u8 mode); struct wilc_vif *wilc_netdev_ifc_init(struct wilc *wl, const char *name, diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c index 18420e954402..2ae8dd3411ac 100644 --- a/drivers/net/wireless/microchip/wilc1000/spi.c +++ b/drivers/net/wireless/microchip/wilc1000/spi.c @@ -191,11 +191,11 @@ static void wilc_wlan_power(struct wilc *wilc, bool on) /* assert ENABLE: */ gpiod_set_value(gpios->enable, 1); mdelay(5); - /* deassert RESET: */ - gpiod_set_value(gpios->reset, 0); - } else { /* assert RESET: */ gpiod_set_value(gpios->reset, 1); + } else { + /* deassert RESET: */ + gpiod_set_value(gpios->reset, 0); /* deassert ENABLE: */ gpiod_set_value(gpios->enable, 0); } diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c index 48441f0389ca..f3f504d12873 100644 --- a/drivers/net/wireless/microchip/wilc1000/wlan.c +++ b/drivers/net/wireless/microchip/wilc1000/wlan.c @@ -968,7 +968,8 @@ static void wilc_wlan_handle_rx_buff(struct wilc *wilc, u8 *buffer, int size) if (pkt_offset & IS_MANAGMEMENT) { buff_ptr += HOST_HDR_OFFSET; - wilc_wfi_mgmt_rx(wilc, buff_ptr, pkt_len); + wilc_wfi_mgmt_rx(wilc, buff_ptr, pkt_len, + pkt_offset & IS_MGMT_AUTH_PKT); } else { if (!is_cfg_packet) { wilc_frmw_to_host(wilc, buff_ptr, pkt_len, diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.h b/drivers/net/wireless/microchip/wilc1000/wlan.h index eb7978166d73..b45e72789a0e 100644 --- a/drivers/net/wireless/microchip/wilc1000/wlan.h +++ b/drivers/net/wireless/microchip/wilc1000/wlan.h @@ -305,6 +305,7 @@ #define IS_MANAGMEMENT 0x100 #define IS_MANAGMEMENT_CALLBACK 0x080 #define IS_MGMT_STATUS_SUCCES 0x040 +#define IS_MGMT_AUTH_PKT 0x010 #define WILC_WID_TYPE GENMASK(15, 12) #define WILC_VMM_ENTRY_FULL_RETRY 1 @@ -423,6 +424,7 @@ int wilc_wlan_get_num_conn_ifcs(struct wilc *wilc); netdev_tx_t wilc_mac_xmit(struct sk_buff *skb, struct net_device *dev); void wilc_wfi_p2p_rx(struct wilc_vif *vif, u8 *buff, u32 size); +bool wilc_wfi_mgmt_frame_rx(struct wilc_vif *vif, u8 *buff, u32 size); void host_wakeup_notify(struct wilc *wilc); void host_sleep_notify(struct wilc *wilc); void chip_allow_sleep(struct wilc *wilc); diff --git a/drivers/net/wireless/microchip/wilc1000/wlan_if.h b/drivers/net/wireless/microchip/wilc1000/wlan_if.h index 6eb7eb4ac294..df2f5a63bdf6 100644 --- a/drivers/net/wireless/microchip/wilc1000/wlan_if.h +++ b/drivers/net/wireless/microchip/wilc1000/wlan_if.h @@ -85,7 +85,16 @@ enum authtype { WILC_FW_AUTH_OPEN_SYSTEM = 1, WILC_FW_AUTH_SHARED_KEY = 2, WILC_FW_AUTH_ANY = 3, - WILC_FW_AUTH_IEEE8021 = 5 + WILC_FW_AUTH_IEEE8021 = 5, + WILC_FW_AUTH_SAE = 7, + WILC_FW_AUTH_IEE8021X_SHA256 = 9, + WILC_FW_AUTH_OPEN_SYSTEM_SHA256 = 13 +}; + +enum mfptype { + WILC_FW_MFP_NONE = 0x0, + WILC_FW_MFP_OPTIONAL = 0x1, + WILC_FW_MFP_REQUIRED = 0x2 }; enum site_survey { @@ -176,7 +185,8 @@ enum { enum { WILC_FW_ACTION_FRM_IDX = 0, - WILC_FW_PROBE_REQ_IDX = 1 + WILC_FW_PROBE_REQ_IDX = 1, + WILC_FW_AUTH_REQ_IDX = 2 }; enum wid_type { @@ -657,6 +667,9 @@ enum { WID_LOG_TERMINAL_SWITCH = 0x00CD, WID_TX_POWER = 0x00CE, WID_WOWLAN_TRIGGER = 0X00CF, + WID_SET_MFP = 0x00D0, + + WID_DEFAULT_MGMT_KEY_ID = 0x00D2, /* EMAC Short WID list */ /* RTS Threshold */ /* @@ -746,6 +759,7 @@ enum { WID_REMOVE_KEY = 0x301E, WID_ASSOC_REQ_INFO = 0x301F, WID_ASSOC_RES_INFO = 0x3020, + WID_ADD_IGTK = 0x3022, WID_MANUFACTURER = 0x3026, /* Added for CAPI tool */ WID_MODEL_NAME = 0x3027, /* Added for CAPI tool */ WID_MODEL_NUM = 0x3028, /* Added for CAPI tool */ @@ -789,7 +803,7 @@ enum { WID_ADD_BEACON = 0x408a, WID_SETUP_MULTICAST_FILTER = 0x408b, - + WID_EXTERNAL_AUTH_PARAM = 0x408d, /* Miscellaneous WIDs */ WID_ALL = 0x7FFE, WID_MAX = 0xFFFF diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 87e98ab068ed..1f57a0055bbd 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -1643,38 +1643,34 @@ static void authenticate_timeout(struct timer_list *t) /*===========================================================================*/ static int parse_addr(char *in_str, UCHAR *out) { + int i, k; int len; - int i, j, k; - int status; if (in_str == NULL) return 0; - if ((len = strlen(in_str)) < 2) + len = strnlen(in_str, ADDRLEN * 2 + 1) - 1; + if (len < 1) return 0; memset(out, 0, ADDRLEN); - status = 1; - j = len - 1; - if (j > 12) - j = 12; i = 5; - while (j > 0) { - if ((k = hex_to_bin(in_str[j--])) != -1) + while (len > 0) { + if ((k = hex_to_bin(in_str[len--])) != -1) out[i] = k; else return 0; - if (j == 0) + if (len == 0) break; - if ((k = hex_to_bin(in_str[j--])) != -1) + if ((k = hex_to_bin(in_str[len--])) != -1) out[i] += k << 4; else return 0; if (!i--) break; } - return status; + return 1; } /*===========================================================================*/ diff --git a/drivers/net/wireless/realtek/rtlwifi/debug.c b/drivers/net/wireless/realtek/rtlwifi/debug.c index 901cdfe3723c..0b1bc04cb6ad 100644 --- a/drivers/net/wireless/realtek/rtlwifi/debug.c +++ b/drivers/net/wireless/realtek/rtlwifi/debug.c @@ -329,8 +329,8 @@ static ssize_t rtl_debugfs_set_write_h2c(struct file *filp, tmp_len = (count > sizeof(tmp) - 1 ? sizeof(tmp) - 1 : count); - if (!buffer || copy_from_user(tmp, buffer, tmp_len)) - return count; + if (copy_from_user(tmp, buffer, tmp_len)) + return -EFAULT; tmp[tmp_len] = '\0'; @@ -340,8 +340,8 @@ static ssize_t rtl_debugfs_set_write_h2c(struct file *filp, &h2c_data[4], &h2c_data[5], &h2c_data[6], &h2c_data[7]); - if (h2c_len <= 0) - return count; + if (h2c_len == 0) + return -EINVAL; for (i = 0; i < h2c_len; i++) h2c_data_packed[i] = (u8)h2c_data[i]; diff --git a/drivers/net/wireless/realtek/rtw88/debug.c b/drivers/net/wireless/realtek/rtw88/debug.c index 1a52ff585fbc..7cde6bcf253b 100644 --- a/drivers/net/wireless/realtek/rtw88/debug.c +++ b/drivers/net/wireless/realtek/rtw88/debug.c @@ -269,11 +269,7 @@ static int rtw_debugfs_get_rsvd_page(struct seq_file *m, void *v) for (i = 0 ; i < buf_size ; i += 8) { if (i % page_size == 0) seq_printf(m, "PAGE %d\n", (i + offset) / page_size); - seq_printf(m, "%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", - *(buf + i), *(buf + i + 1), - *(buf + i + 2), *(buf + i + 3), - *(buf + i + 4), *(buf + i + 5), - *(buf + i + 6), *(buf + i + 7)); + seq_printf(m, "%8ph\n", buf + i); } vfree(buf); diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index efabd5b1bf5b..a44b1810165d 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -1383,9 +1383,12 @@ void rtw_core_scan_start(struct rtw_dev *rtwdev, struct rtw_vif *rtwvif, void rtw_core_scan_complete(struct rtw_dev *rtwdev, struct ieee80211_vif *vif, bool hw_scan) { - struct rtw_vif *rtwvif = (struct rtw_vif *)vif->drv_priv; + struct rtw_vif *rtwvif = vif ? (struct rtw_vif *)vif->drv_priv : NULL; u32 config = 0; + if (!rtwvif) + return; + clear_bit(RTW_FLAG_SCANNING, rtwdev->flags); clear_bit(RTW_FLAG_DIG_DISABLE, rtwdev->flags); diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c index 93cce44df531..993bd6b1d723 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c @@ -2701,7 +2701,7 @@ static const struct rtw_reg_domain coex_info_hw_regs_8723d[] = { {0x953, BIT(1), RTW_REG_DOMAIN_MAC8}, }; -struct rtw_chip_info rtw8723d_hw_spec = { +const struct rtw_chip_info rtw8723d_hw_spec = { .ops = &rtw8723d_ops, .id = RTW_CHIP_TYPE_8723D, .fw_name = "rtw88/rtw8723d_fw.bin", diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h index 41d35174a542..4641f6e047b4 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h @@ -72,6 +72,8 @@ struct rtw8723d_efuse { struct rtw8723de_efuse e; }; +extern const struct rtw_chip_info rtw8723d_hw_spec; + /* phy status page0 */ #define GET_PHY_STAT_P0_PWDB(phy_stat) \ le32_get_bits(*((__le32 *)(phy_stat) + 0x00), GENMASK(15, 8)) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723de.c b/drivers/net/wireless/realtek/rtw88/rtw8723de.c index 2dd689441e8d..abbaafa32851 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8723de.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8723de.c @@ -5,7 +5,7 @@ #include <linux/module.h> #include <linux/pci.h> #include "pci.h" -#include "rtw8723de.h" +#include "rtw8723d.h" static const struct pci_device_id rtw_8723de_id_table[] = { { diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723de.h b/drivers/net/wireless/realtek/rtw88/rtw8723de.h deleted file mode 100644 index 2b4894846a07..000000000000 --- a/drivers/net/wireless/realtek/rtw88/rtw8723de.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ -/* Copyright(c) 2018-2019 Realtek Corporation - */ - -#ifndef __RTW_8723DE_H_ -#define __RTW_8723DE_H_ - -extern struct rtw_chip_info rtw8723d_hw_spec; - -#endif diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index 488a7ddd507c..025262a8970e 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -1879,7 +1879,7 @@ static const struct rtw_reg_domain coex_info_hw_regs_8821c[] = { {0x60A, MASKBYTE0, RTW_REG_DOMAIN_MAC8}, }; -struct rtw_chip_info rtw8821c_hw_spec = { +const struct rtw_chip_info rtw8821c_hw_spec = { .ops = &rtw8821c_ops, .id = RTW_CHIP_TYPE_8821C, .fw_name = "rtw88/rtw8821c_fw.bin", diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.h b/drivers/net/wireless/realtek/rtw88/rtw8821c.h index d9fbddd7b0f3..2698801fc35d 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.h @@ -84,6 +84,8 @@ _rtw_write32s_mask(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 data) rtw_write32_mask(rtwdev, addr + 0x200, mask, data); } +extern const struct rtw_chip_info rtw8821c_hw_spec; + #define rtw_write32s_mask(rtwdev, addr, mask, data) \ do { \ BUILD_BUG_ON((addr) < 0xC00 || (addr) >= 0xD00); \ diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821ce.c b/drivers/net/wireless/realtek/rtw88/rtw8821ce.c index 56d22f9de904..f3d971feda04 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821ce.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821ce.c @@ -5,7 +5,7 @@ #include <linux/module.h> #include <linux/pci.h> #include "pci.h" -#include "rtw8821ce.h" +#include "rtw8821c.h" static const struct pci_device_id rtw_8821ce_id_table[] = { { diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821ce.h b/drivers/net/wireless/realtek/rtw88/rtw8821ce.h deleted file mode 100644 index 54142acca534..000000000000 --- a/drivers/net/wireless/realtek/rtw88/rtw8821ce.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ -/* Copyright(c) 2018-2019 Realtek Corporation - */ - -#ifndef __RTW_8821CE_H_ -#define __RTW_8821CE_H_ - -extern struct rtw_chip_info rtw8821c_hw_spec; - -#endif diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c index dccd722b8e62..321848870561 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c @@ -2497,7 +2497,7 @@ static struct rtw_hw_reg_offset rtw8822b_edcca_th[] = { [EDCCA_TH_H2L_IDX] = {{.addr = 0x8a4, .mask = MASKBYTE1}, .offset = 0}, }; -struct rtw_chip_info rtw8822b_hw_spec = { +const struct rtw_chip_info rtw8822b_hw_spec = { .ops = &rtw8822b_ops, .id = RTW_CHIP_TYPE_8822B, .fw_name = "rtw88/rtw8822b_fw.bin", diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.h b/drivers/net/wireless/realtek/rtw88/rtw8822b.h index 3fff8b881854..01d3644e0c94 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822b.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.h @@ -187,4 +187,6 @@ _rtw_write32s_mask(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 data) #define REG_ANTWT 0x1904 #define REG_IQKFAILMSK 0x1bf0 +extern const struct rtw_chip_info rtw8822b_hw_spec; + #endif diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822be.c b/drivers/net/wireless/realtek/rtw88/rtw8822be.c index 62ee7e62cac0..4994950776cd 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822be.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822be.c @@ -5,7 +5,7 @@ #include <linux/module.h> #include <linux/pci.h> #include "pci.h" -#include "rtw8822be.h" +#include "rtw8822b.h" static const struct pci_device_id rtw_8822be_id_table[] = { { diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822be.h b/drivers/net/wireless/realtek/rtw88/rtw8822be.h deleted file mode 100644 index 6668460d664d..000000000000 --- a/drivers/net/wireless/realtek/rtw88/rtw8822be.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ -/* Copyright(c) 2018-2019 Realtek Corporation - */ - -#ifndef __RTW_8822BE_H_ -#define __RTW_8822BE_H_ - -extern struct rtw_chip_info rtw8822b_hw_spec; - -#endif diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c index c043b5c520b9..09f9e4adcf34 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c @@ -5310,7 +5310,7 @@ static const struct rtw_reg_domain coex_info_hw_regs_8822c[] = { {0xc50, MASKBYTE0, RTW_REG_DOMAIN_MAC8}, }; -struct rtw_chip_info rtw8822c_hw_spec = { +const struct rtw_chip_info rtw8822c_hw_spec = { .ops = &rtw8822c_ops, .id = RTW_CHIP_TYPE_8822C, .fw_name = "rtw88/rtw8822c_fw.bin", diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.h b/drivers/net/wireless/realtek/rtw88/rtw8822c.h index 8201955e1f21..479d5d769c52 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822c.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.h @@ -118,6 +118,8 @@ enum rtw8822c_dpk_one_shot_action { void rtw8822c_parse_tbl_dpk(struct rtw_dev *rtwdev, const struct rtw_table *tbl); +extern const struct rtw_chip_info rtw8822c_hw_spec; + #define RTW_DECL_TABLE_DPK(name) \ const struct rtw_table name ## _tbl = { \ .data = name, \ diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822ce.c b/drivers/net/wireless/realtek/rtw88/rtw8822ce.c index 3845b1333dc3..e26c6bc82936 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822ce.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822ce.c @@ -5,7 +5,7 @@ #include <linux/module.h> #include <linux/pci.h> #include "pci.h" -#include "rtw8822ce.h" +#include "rtw8822c.h" static const struct pci_device_id rtw_8822ce_id_table[] = { { diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822ce.h b/drivers/net/wireless/realtek/rtw88/rtw8822ce.h deleted file mode 100644 index fee32d7a4504..000000000000 --- a/drivers/net/wireless/realtek/rtw88/rtw8822ce.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ -/* Copyright(c) 2018-2019 Realtek Corporation - */ - -#ifndef __RTW_8822CE_H_ -#define __RTW_8822CE_H_ - -extern struct rtw_chip_info rtw8822c_hw_spec; - -#endif diff --git a/drivers/net/wireless/realtek/rtw89/cam.c b/drivers/net/wireless/realtek/rtw89/cam.c index 8a26adeb23fb..db3c55f0ccd0 100644 --- a/drivers/net/wireless/realtek/rtw89/cam.c +++ b/drivers/net/wireless/realtek/rtw89/cam.c @@ -602,11 +602,18 @@ int rtw89_cam_fill_bssid_cam_info(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif = rtwvif_to_vif(rtwvif); struct rtw89_bssid_cam_entry *bssid_cam = &rtwvif->bssid_cam; u8 bss_color = vif->bss_conf.he_bss_color.color; + u8 bss_mask; + + if (vif->bss_conf.nontransmitted) + bss_mask = RTW89_BSSID_MATCH_5_BYTES; + else + bss_mask = RTW89_BSSID_MATCH_ALL; FWCMD_SET_ADDR_BSSID_IDX(cmd, bssid_cam->bssid_cam_idx); FWCMD_SET_ADDR_BSSID_OFFSET(cmd, bssid_cam->offset); FWCMD_SET_ADDR_BSSID_LEN(cmd, bssid_cam->len); FWCMD_SET_ADDR_BSSID_VALID(cmd, bssid_cam->valid); + FWCMD_SET_ADDR_BSSID_MASK(cmd, bss_mask); FWCMD_SET_ADDR_BSSID_BB_SEL(cmd, bssid_cam->phy_idx); FWCMD_SET_ADDR_BSSID_BSS_COLOR(cmd, bss_color); diff --git a/drivers/net/wireless/realtek/rtw89/cam.h b/drivers/net/wireless/realtek/rtw89/cam.h index a3931d3e40d2..74a6c4748d64 100644 --- a/drivers/net/wireless/realtek/rtw89/cam.h +++ b/drivers/net/wireless/realtek/rtw89/cam.h @@ -9,6 +9,9 @@ #define RTW89_SEC_CAM_LEN 20 +#define RTW89_BSSID_MATCH_ALL GENMASK(5, 0) +#define RTW89_BSSID_MATCH_5_BYTES GENMASK(4, 0) + static inline void FWCMD_SET_ADDR_IDX(void *cmd, u32 value) { le32p_replace_bits((__le32 *)(cmd) + 1, value, GENMASK(7, 0)); @@ -309,6 +312,11 @@ static inline void FWCMD_SET_ADDR_BSSID_BB_SEL(void *cmd, u32 value) le32p_replace_bits((__le32 *)(cmd) + 13, value, BIT(1)); } +static inline void FWCMD_SET_ADDR_BSSID_MASK(void *cmd, u32 value) +{ + le32p_replace_bits((__le32 *)(cmd) + 13, value, GENMASK(7, 2)); +} + static inline void FWCMD_SET_ADDR_BSSID_BSS_COLOR(void *cmd, u32 value) { le32p_replace_bits((__le32 *)(cmd) + 13, value, GENMASK(13, 8)); diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index a6a90572e74b..d2f2a3d65ef6 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -1343,6 +1343,47 @@ struct rtw89_vif_rx_stats_iter_data { const u8 *bssid; }; +static void rtw89_stats_trigger_frame(struct rtw89_dev *rtwdev, + struct ieee80211_vif *vif, + struct sk_buff *skb) +{ + struct rtw89_vif *rtwvif = (struct rtw89_vif *)vif->drv_priv; + struct ieee80211_trigger *tf = (struct ieee80211_trigger *)skb->data; + u8 *pos, *end, type; + u16 aid; + + if (!ether_addr_equal(vif->bss_conf.bssid, tf->ta) || + rtwvif->wifi_role != RTW89_WIFI_ROLE_STATION || + rtwvif->net_type == RTW89_NET_TYPE_NO_LINK) + return; + + type = le64_get_bits(tf->common_info, IEEE80211_TRIGGER_TYPE_MASK); + if (type != IEEE80211_TRIGGER_TYPE_BASIC) + return; + + end = (u8 *)tf + skb->len; + pos = tf->variable; + + while (end - pos >= RTW89_TF_BASIC_USER_INFO_SZ) { + aid = RTW89_GET_TF_USER_INFO_AID12(pos); + rtw89_debug(rtwdev, RTW89_DBG_TXRX, + "[TF] aid: %d, ul_mcs: %d, rua: %d\n", + aid, RTW89_GET_TF_USER_INFO_UL_MCS(pos), + RTW89_GET_TF_USER_INFO_RUA(pos)); + + if (aid == RTW89_TF_PAD) + break; + + if (aid == vif->bss_conf.aid) { + rtwvif->stats.rx_tf_acc++; + rtwdev->stats.rx_tf_acc++; + break; + } + + pos += RTW89_TF_BASIC_USER_INFO_SZ; + } +} + static void rtw89_vif_rx_stats_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { @@ -1355,6 +1396,11 @@ static void rtw89_vif_rx_stats_iter(void *data, u8 *mac, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; const u8 *bssid = iter_data->bssid; + if (ieee80211_is_trigger(hdr->frame_control)) { + rtw89_stats_trigger_frame(rtwdev, vif, skb); + return; + } + if (!ether_addr_equal(vif->bss_conf.bssid, bssid)) return; @@ -1608,7 +1654,7 @@ static void rtw89_core_update_rx_status(struct rtw89_dev *rtwdev, if (rtwdev->scanning && RTW89_CHK_FW_FEATURE(SCAN_OFFLOAD, &rtwdev->fw)) { - u8 chan = hal->current_channel; + u8 chan = hal->current_primary_channel; u8 band = hal->current_band_type; enum nl80211_band nl_band; @@ -2023,6 +2069,8 @@ static bool rtw89_traffic_stats_calc(struct rtw89_dev *rtwdev, stats->rx_unicast = 0; stats->tx_cnt = 0; stats->rx_cnt = 0; + stats->rx_tf_periodic = stats->rx_tf_acc; + stats->rx_tf_acc = 0; if (tx_tfc_lv != stats->tx_tfc_lv || rx_tfc_lv != stats->rx_tfc_lv) return true; @@ -2875,7 +2923,10 @@ void rtw89_core_scan_start(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif, void rtw89_core_scan_complete(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif, bool hw_scan) { - struct rtw89_vif *rtwvif = (struct rtw89_vif *)vif->drv_priv; + struct rtw89_vif *rtwvif = vif ? (struct rtw89_vif *)vif->drv_priv : NULL; + + if (!rtwvif) + return; ether_addr_copy(rtwvif->mac_addr, vif->addr); rtw89_fw_h2c_cam(rtwdev, rtwvif, NULL, NULL); @@ -3008,6 +3059,7 @@ static int rtw89_core_register_hw(struct rtw89_dev *rtwdev) ieee80211_hw_set(hw, SUPPORTS_PS); ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS); ieee80211_hw_set(hw, SINGLE_SCAN_ON_ALL_BANDS); + ieee80211_hw_set(hw, SUPPORTS_MULTI_BSSID); hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_AP); diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index e8a77225a90f..239d47d0ec6d 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -55,6 +55,16 @@ enum htc_om_channel_width { #define RTW89_HTC_MASK_HTC_OM_DL_MU_MIMO_RR BIT(16) #define RTW89_HTC_MASK_HTC_OM_UL_MU_DATA_DIS BIT(17) +#define RTW89_TF_PAD GENMASK(11, 0) +#define RTW89_TF_BASIC_USER_INFO_SZ 6 + +#define RTW89_GET_TF_USER_INFO_AID12(data) \ + le32_get_bits(*((const __le32 *)(data)), GENMASK(11, 0)) +#define RTW89_GET_TF_USER_INFO_RUA(data) \ + le32_get_bits(*((const __le32 *)(data)), GENMASK(19, 12)) +#define RTW89_GET_TF_USER_INFO_UL_MCS(data) \ + le32_get_bits(*((const __le32 *)(data)), GENMASK(24, 21)) + enum rtw89_subband { RTW89_CH_2G = 0, RTW89_CH_5G_BAND_1 = 1, @@ -943,6 +953,10 @@ struct rtw89_traffic_stats { u32 rx_throughput; u32 tx_throughput_raw; u32 rx_throughput_raw; + + u32 rx_tf_acc; + u32 rx_tf_periodic; + enum rtw89_tfc_lv tx_tfc_lv; enum rtw89_tfc_lv rx_tfc_lv; struct ewma_tp tx_ewma_tp; @@ -2550,9 +2564,24 @@ enum rtw89_sar_sources { RTW89_SAR_SOURCE_NR, }; +enum rtw89_sar_subband { + RTW89_SAR_2GHZ_SUBBAND, + RTW89_SAR_5GHZ_SUBBAND_1_2, /* U-NII-1 and U-NII-2 */ + RTW89_SAR_5GHZ_SUBBAND_2_E, /* U-NII-2-Extended */ + RTW89_SAR_5GHZ_SUBBAND_3, /* U-NII-3 */ + RTW89_SAR_6GHZ_SUBBAND_5_L, /* U-NII-5 lower part */ + RTW89_SAR_6GHZ_SUBBAND_5_H, /* U-NII-5 higher part */ + RTW89_SAR_6GHZ_SUBBAND_6, /* U-NII-6 */ + RTW89_SAR_6GHZ_SUBBAND_7_L, /* U-NII-7 lower part */ + RTW89_SAR_6GHZ_SUBBAND_7_H, /* U-NII-7 higher part */ + RTW89_SAR_6GHZ_SUBBAND_8, /* U-NII-8 */ + + RTW89_SAR_SUBBAND_NR, +}; + struct rtw89_sar_cfg_common { - bool set[RTW89_SUBBAND_NR]; - s32 cfg[RTW89_SUBBAND_NR]; + bool set[RTW89_SAR_SUBBAND_NR]; + s32 cfg[RTW89_SAR_SUBBAND_NR]; }; struct rtw89_sar_info { @@ -2646,6 +2675,10 @@ struct rtw89_lck_info { u8 thermal[RF_PATH_MAX]; }; +struct rtw89_rx_dck_info { + u8 thermal[RF_PATH_MAX]; +}; + struct rtw89_iqk_info { bool lok_cor_fail[RTW89_IQK_CHS_NR][RTW89_IQK_PATH_NR]; bool lok_fin_fail[RTW89_IQK_CHS_NR][RTW89_IQK_PATH_NR]; @@ -2776,13 +2809,20 @@ enum rtw89_multi_cfo_mode { enum rtw89_phy_cfo_status { RTW89_PHY_DCFO_STATE_NORMAL = 0, RTW89_PHY_DCFO_STATE_ENHANCE = 1, + RTW89_PHY_DCFO_STATE_HOLD = 2, RTW89_PHY_DCFO_STATE_MAX }; +enum rtw89_phy_cfo_ul_ofdma_acc_mode { + RTW89_CFO_UL_OFDMA_ACC_DISABLE = 0, + RTW89_CFO_UL_OFDMA_ACC_ENABLE = 1 +}; + struct rtw89_cfo_tracking_info { u16 cfo_timer_ms; bool cfo_trig_by_timer_en; enum rtw89_phy_cfo_status phy_cfo_status; + enum rtw89_phy_cfo_ul_ofdma_acc_mode cfo_ul_ofdma_acc_mode; u8 phy_cfo_trk_cnt; bool is_adjust; enum rtw89_multi_cfo_mode rtw89_multi_cfo_mode; @@ -3125,6 +3165,7 @@ struct rtw89_dev { struct rtw89_dpk_info dpk; struct rtw89_mcc_info mcc; struct rtw89_lck_info lck; + struct rtw89_rx_dck_info rx_dck; bool is_tssi_mode[RF_PATH_MAX]; bool is_bt_iqk_timeout; diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c index 7820bc3ab3b4..f00f81916f2f 100644 --- a/drivers/net/wireless/realtek/rtw89/debug.c +++ b/drivers/net/wireless/realtek/rtw89/debug.c @@ -2376,7 +2376,8 @@ static int rtw89_debug_priv_phy_info_get(struct seq_file *m, void *v) seq_printf(m, "TP TX: %u [%u] Mbps (lv: %d), RX: %u [%u] Mbps (lv: %d)\n", stats->tx_throughput, stats->tx_throughput_raw, stats->tx_tfc_lv, stats->rx_throughput, stats->rx_throughput_raw, stats->rx_tfc_lv); - seq_printf(m, "Beacon: %u\n", pkt_stat->beacon_nr); + seq_printf(m, "Beacon: %u, TF: %u\n", pkt_stat->beacon_nr, + stats->rx_tf_periodic); seq_printf(m, "Avg packet length: TX=%u, RX=%u\n", stats->tx_avg_len, stats->rx_avg_len); diff --git a/drivers/net/wireless/realtek/rtw89/debug.h b/drivers/net/wireless/realtek/rtw89/debug.h index de72155ad1fe..561b04faf703 100644 --- a/drivers/net/wireless/realtek/rtw89/debug.h +++ b/drivers/net/wireless/realtek/rtw89/debug.h @@ -24,6 +24,7 @@ enum rtw89_debug_mask { RTW89_DBG_BTC = BIT(13), RTW89_DBG_BF = BIT(14), RTW89_DBG_HW_SCAN = BIT(15), + RTW89_DBG_SAR = BIT(16), }; enum rtw89_debug_mac_reg_sel { diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 4718aced1428..2d9c3157d878 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -2257,7 +2257,7 @@ static int rtw89_hw_scan_add_chan_list(struct rtw89_dev *rtwdev, list_add_tail(&ch_info->list, &chan_list); off_chan_time += ch_info->period; } - rtw89_fw_h2c_scan_list_offload(rtwdev, list_len, &chan_list); + ret = rtw89_fw_h2c_scan_list_offload(rtwdev, list_len, &chan_list); out: list_for_each_entry_safe(ch_info, tmp, &chan_list, list) { @@ -2339,6 +2339,9 @@ void rtw89_hw_scan_complete(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif, rtwvif->scan_req = NULL; rtwvif->scan_ies = NULL; rtwdev->scan_info.scanning_vif = NULL; + + if (rtwvif->net_type != RTW89_NET_TYPE_NO_LINK) + rtw89_store_op_chan(rtwdev, false); } void rtw89_hw_scan_abort(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif) @@ -2365,20 +2368,27 @@ int rtw89_hw_scan_offload(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif, if (ret) goto out; } - rtw89_fw_h2c_scan_offload(rtwdev, &opt, rtwvif); + ret = rtw89_fw_h2c_scan_offload(rtwdev, &opt, rtwvif); out: return ret; } -void rtw89_store_op_chan(struct rtw89_dev *rtwdev) +void rtw89_store_op_chan(struct rtw89_dev *rtwdev, bool backup) { struct rtw89_hw_scan_info *scan_info = &rtwdev->scan_info; struct rtw89_hal *hal = &rtwdev->hal; - scan_info->op_pri_ch = hal->current_primary_channel; - scan_info->op_chan = hal->current_channel; - scan_info->op_bw = hal->current_band_width; - scan_info->op_band = hal->current_band_type; + if (backup) { + scan_info->op_pri_ch = hal->current_primary_channel; + scan_info->op_chan = hal->current_channel; + scan_info->op_bw = hal->current_band_width; + scan_info->op_band = hal->current_band_type; + } else { + hal->current_primary_channel = scan_info->op_pri_ch; + hal->current_channel = scan_info->op_chan; + hal->current_band_width = scan_info->op_bw; + hal->current_band_type = scan_info->op_band; + } } #define H2C_FW_CPU_EXCEPTION_LEN 4 diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index 95a55c4213db..e75ad22aa85d 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -2633,17 +2633,14 @@ int rtw89_fw_msg_reg(struct rtw89_dev *rtwdev, struct rtw89_mac_c2h_info *c2h_info); int rtw89_fw_h2c_fw_log(struct rtw89_dev *rtwdev, bool enable); void rtw89_fw_st_dbg_dump(struct rtw89_dev *rtwdev); -void rtw89_store_op_chan(struct rtw89_dev *rtwdev); +void rtw89_store_op_chan(struct rtw89_dev *rtwdev, bool backup); void rtw89_hw_scan_start(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif, struct ieee80211_scan_request *req); void rtw89_hw_scan_complete(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif, bool aborted); int rtw89_hw_scan_offload(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif, bool enable); -void rtw89_hw_scan_status_report(struct rtw89_dev *rtwdev, struct sk_buff *skb); -void rtw89_hw_scan_chan_switch(struct rtw89_dev *rtwdev, struct sk_buff *skb); void rtw89_hw_scan_abort(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif); -void rtw89_store_op_chan(struct rtw89_dev *rtwdev); int rtw89_fw_h2c_trigger_cpu_exception(struct rtw89_dev *rtwdev); #endif diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index 3cf892912c1d..93124b815825 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -3681,17 +3681,20 @@ rtw89_mac_c2h_scanofld_rsp(struct rtw89_dev *rtwdev, struct sk_buff *c2h, rtw89_hw_scan_complete(rtwdev, vif, false); break; case RTW89_SCAN_ENTER_CH_NOTIFY: - if (rtw89_is_op_chan(rtwdev, band, chan)) + hal->prev_band_type = hal->current_band_type; + hal->current_band_type = band; + hal->prev_primary_channel = hal->current_primary_channel; + hal->current_primary_channel = chan; + hal->current_channel = chan; + hal->current_band_width = RTW89_CHANNEL_WIDTH_20; + if (rtw89_is_op_chan(rtwdev, band, chan)) { + rtw89_store_op_chan(rtwdev, false); ieee80211_wake_queues(rtwdev->hw); + } break; default: return; } - - hal->prev_band_type = hal->current_band_type; - hal->prev_primary_channel = hal->current_channel; - hal->current_channel = chan; - hal->current_band_type = band; } static void diff --git a/drivers/net/wireless/realtek/rtw89/mac.h b/drivers/net/wireless/realtek/rtw89/mac.h index 9f511c8d8a37..f66619354734 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.h +++ b/drivers/net/wireless/realtek/rtw89/mac.h @@ -666,6 +666,7 @@ enum mac_ax_err_info { MAC_AX_ERR_L2_ERR_APB_BBRF_TO_RX4281 = 0x2360, MAC_AX_ERR_L2_ERR_APB_BBRF_TO_OTHERS = 0x2370, MAC_AX_ERR_L2_RESET_DONE = 0x2400, + MAC_AX_ERR_L2_ERR_WDT_TIMEOUT_INT = 0x2599, MAC_AX_ERR_CPU_EXCEPTION = 0x3000, MAC_AX_ERR_ASSERTION = 0x4000, MAC_AX_GET_ERR_MAX, diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index f24e4a208376..6d0c62c545a7 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -350,7 +350,7 @@ static void rtw89_ops_bss_info_changed(struct ieee80211_hw *hw, rtw89_phy_set_bss_color(rtwdev, vif); rtw89_chip_cfg_txpwr_ul_tb_offset(rtwdev, vif); rtw89_mac_port_update(rtwdev, rtwvif); - rtw89_store_op_chan(rtwdev); + rtw89_store_op_chan(rtwdev, true); } else { /* Abort ongoing scan if cancel_scan isn't issued * when disconnected by peer diff --git a/drivers/net/wireless/realtek/rtw89/pci.c b/drivers/net/wireless/realtek/rtw89/pci.c index 0ef7821b2e0f..25872dfb4da1 100644 --- a/drivers/net/wireless/realtek/rtw89/pci.c +++ b/drivers/net/wireless/realtek/rtw89/pci.c @@ -738,6 +738,9 @@ static irqreturn_t rtw89_pci_interrupt_threadfn(int irq, void *dev) if (unlikely(isrs.halt_c2h_isrs & B_AX_HALT_C2H_INT_EN)) rtw89_ser_notify(rtwdev, rtw89_mac_get_err_status(rtwdev)); + if (unlikely(isrs.halt_c2h_isrs & B_AX_WDT_TIMEOUT_INT_EN)) + rtw89_ser_notify(rtwdev, MAC_AX_ERR_L2_ERR_WDT_TIMEOUT_INT); + if (unlikely(rtwpci->under_recovery)) goto enable_intr; @@ -3126,7 +3129,7 @@ static void rtw89_pci_recovery_intr_mask_v1(struct rtw89_dev *rtwdev) struct rtw89_pci *rtwpci = (struct rtw89_pci *)rtwdev->priv; rtwpci->ind_intrs = B_AX_HS0ISR_IND_INT_EN; - rtwpci->halt_c2h_intrs = B_AX_HALT_C2H_INT_EN; + rtwpci->halt_c2h_intrs = B_AX_HALT_C2H_INT_EN | B_AX_WDT_TIMEOUT_INT_EN; rtwpci->intrs[0] = 0; rtwpci->intrs[1] = 0; } @@ -3138,7 +3141,7 @@ static void rtw89_pci_default_intr_mask_v1(struct rtw89_dev *rtwdev) rtwpci->ind_intrs = B_AX_HCI_AXIDMA_INT_EN | B_AX_HS1ISR_IND_INT_EN | B_AX_HS0ISR_IND_INT_EN; - rtwpci->halt_c2h_intrs = B_AX_HALT_C2H_INT_EN; + rtwpci->halt_c2h_intrs = B_AX_HALT_C2H_INT_EN | B_AX_WDT_TIMEOUT_INT_EN; rtwpci->intrs[0] = B_AX_TXDMA_STUCK_INT_EN | B_AX_RXDMA_INT_EN | B_AX_RXP1DMA_INT_EN | @@ -3155,7 +3158,7 @@ static void rtw89_pci_low_power_intr_mask_v1(struct rtw89_dev *rtwdev) rtwpci->ind_intrs = B_AX_HS1ISR_IND_INT_EN | B_AX_HS0ISR_IND_INT_EN; - rtwpci->halt_c2h_intrs = B_AX_HALT_C2H_INT_EN; + rtwpci->halt_c2h_intrs = B_AX_HALT_C2H_INT_EN | B_AX_WDT_TIMEOUT_INT_EN; rtwpci->intrs[0] = 0; rtwpci->intrs[1] = B_AX_GPIO18_INT_EN; } diff --git a/drivers/net/wireless/realtek/rtw89/pci.h b/drivers/net/wireless/realtek/rtw89/pci.h index bb585ed19190..a118647213e3 100644 --- a/drivers/net/wireless/realtek/rtw89/pci.h +++ b/drivers/net/wireless/realtek/rtw89/pci.h @@ -94,6 +94,7 @@ /* Interrupts */ #define R_AX_HIMR0 0x01A0 +#define B_AX_WDT_TIMEOUT_INT_EN BIT(22) #define B_AX_HALT_C2H_INT_EN BIT(21) #define R_AX_HISR0 0x01A4 diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index 762cdba9d3cf..217aacb6e8c1 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -2151,6 +2151,7 @@ static void rtw89_phy_cfo_init(struct rtw89_dev *rtwdev) cfo->cfo_trig_by_timer_en = false; cfo->phy_cfo_trk_cnt = 0; cfo->phy_cfo_status = RTW89_PHY_DCFO_STATE_NORMAL; + cfo->cfo_ul_ofdma_acc_mode = RTW89_CFO_UL_OFDMA_ACC_ENABLE; } static void rtw89_phy_cfo_crystal_cap_adjust(struct rtw89_dev *rtwdev, @@ -2419,6 +2420,13 @@ void rtw89_phy_cfo_track(struct rtw89_dev *rtwdev) { struct rtw89_cfo_tracking_info *cfo = &rtwdev->cfo_tracking; struct rtw89_traffic_stats *stats = &rtwdev->stats; + bool is_ul_ofdma = false, ofdma_acc_en = false; + + if (stats->rx_tf_periodic > CFO_TF_CNT_TH) + is_ul_ofdma = true; + if (cfo->cfo_ul_ofdma_acc_mode == RTW89_CFO_UL_OFDMA_ACC_ENABLE && + is_ul_ofdma) + ofdma_acc_en = true; switch (cfo->phy_cfo_status) { case RTW89_PHY_DCFO_STATE_NORMAL: @@ -2430,16 +2438,26 @@ void rtw89_phy_cfo_track(struct rtw89_dev *rtwdev) } break; case RTW89_PHY_DCFO_STATE_ENHANCE: - if (cfo->phy_cfo_trk_cnt >= CFO_PERIOD_CNT) { + if (stats->tx_throughput <= CFO_TP_LOWER) + cfo->phy_cfo_status = RTW89_PHY_DCFO_STATE_NORMAL; + else if (ofdma_acc_en && + cfo->phy_cfo_trk_cnt >= CFO_PERIOD_CNT) + cfo->phy_cfo_status = RTW89_PHY_DCFO_STATE_HOLD; + else + cfo->phy_cfo_trk_cnt++; + + if (cfo->phy_cfo_status == RTW89_PHY_DCFO_STATE_NORMAL) { cfo->phy_cfo_trk_cnt = 0; cfo->cfo_trig_by_timer_en = false; } - if (cfo->cfo_trig_by_timer_en == 1) - cfo->phy_cfo_trk_cnt++; + break; + case RTW89_PHY_DCFO_STATE_HOLD: if (stats->tx_throughput <= CFO_TP_LOWER) { cfo->phy_cfo_status = RTW89_PHY_DCFO_STATE_NORMAL; cfo->phy_cfo_trk_cnt = 0; cfo->cfo_trig_by_timer_en = false; + } else { + cfo->phy_cfo_trk_cnt++; } break; default: diff --git a/drivers/net/wireless/realtek/rtw89/phy.h b/drivers/net/wireless/realtek/rtw89/phy.h index 291660154d58..e20636f54b55 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.h +++ b/drivers/net/wireless/realtek/rtw89/phy.h @@ -62,6 +62,7 @@ #define CFO_COMP_PERIOD 250 #define CFO_COMP_WEIGHT 8 #define MAX_CFO_TOLERANCE 30 +#define CFO_TF_CNT_TH 300 #define CCX_MAX_PERIOD 2097 #define CCX_MAX_PERIOD_UNIT 32 diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c.c b/drivers/net/wireless/realtek/rtw89/rtw8852c.c index 64840c8d9efe..b697aef2faf2 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c.c @@ -1861,6 +1861,7 @@ static void rtw8852c_rfk_track(struct rtw89_dev *rtwdev) { rtw8852c_dpk_track(rtwdev); rtw8852c_lck_track(rtwdev); + rtw8852c_rx_dck_track(rtwdev); } static u32 rtw8852c_bb_cal_txpwr_ref(struct rtw89_dev *rtwdev, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.c b/drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.c index dfb9caba9bc4..4186d825d19b 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.c @@ -3864,6 +3864,7 @@ void rtw8852c_iqk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx) void rtw8852c_rx_dck(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy, bool is_afe) { + struct rtw89_rx_dck_info *rx_dck = &rtwdev->rx_dck; u8 path, kpath; u32 rf_reg5; @@ -3883,6 +3884,7 @@ void rtw8852c_rx_dck(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy, bool is_a rtw89_write_rf(rtwdev, path, RR_RSV1, RR_RSV1_RST, 0x0); rtw89_write_rf(rtwdev, path, RR_MOD, RR_MOD_MASK, RR_MOD_V_RX); _set_rx_dck(rtwdev, phy, path, is_afe); + rx_dck->thermal[path] = ewma_thermal_read(&rtwdev->phystat.avg_thermal[path]); rtw89_write_rf(rtwdev, path, RR_RSV1, RFREG_MASK, rf_reg5); if (rtwdev->is_tssi_mode[path]) @@ -3891,6 +3893,31 @@ void rtw8852c_rx_dck(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy, bool is_a } } +#define RTW8852C_RX_DCK_TH 8 + +void rtw8852c_rx_dck_track(struct rtw89_dev *rtwdev) +{ + struct rtw89_rx_dck_info *rx_dck = &rtwdev->rx_dck; + u8 cur_thermal; + int delta; + int path; + + for (path = 0; path < RF_PATH_NUM_8852C; path++) { + cur_thermal = + ewma_thermal_read(&rtwdev->phystat.avg_thermal[path]); + delta = abs((int)cur_thermal - rx_dck->thermal[path]); + + rtw89_debug(rtwdev, RTW89_DBG_RFK_TRACK, + "[RX_DCK] path=%d current thermal=0x%x delta=0x%x\n", + path, cur_thermal, delta); + + if (delta >= RTW8852C_RX_DCK_TH) { + rtw8852c_rx_dck(rtwdev, RTW89_PHY_0, false); + return; + } + } +} + void rtw8852c_dpk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx) { u32 tx_en; diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.h b/drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.h index c32756f0c01a..5118a49da8d3 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.h +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.h @@ -12,6 +12,7 @@ void rtw8852c_rck(struct rtw89_dev *rtwdev); void rtw8852c_dack(struct rtw89_dev *rtwdev); void rtw8852c_iqk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx); void rtw8852c_rx_dck(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx, bool is_afe); +void rtw8852c_rx_dck_track(struct rtw89_dev *rtwdev); void rtw8852c_dpk(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy); void rtw8852c_dpk_track(struct rtw89_dev *rtwdev); void rtw8852c_tssi(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy); diff --git a/drivers/net/wireless/realtek/rtw89/sar.c b/drivers/net/wireless/realtek/rtw89/sar.c index 097c87899cea..eb2d3ec28775 100644 --- a/drivers/net/wireless/realtek/rtw89/sar.c +++ b/drivers/net/wireless/realtek/rtw89/sar.c @@ -5,15 +5,122 @@ #include "debug.h" #include "sar.h" +static enum rtw89_sar_subband rtw89_sar_get_subband(struct rtw89_dev *rtwdev, + u32 center_freq) +{ + switch (center_freq) { + default: + rtw89_debug(rtwdev, RTW89_DBG_SAR, + "center freq: %u to SAR subband is unhandled\n", + center_freq); + fallthrough; + case 2412 ... 2484: + return RTW89_SAR_2GHZ_SUBBAND; + case 5180 ... 5320: + return RTW89_SAR_5GHZ_SUBBAND_1_2; + case 5500 ... 5720: + return RTW89_SAR_5GHZ_SUBBAND_2_E; + case 5745 ... 5825: + return RTW89_SAR_5GHZ_SUBBAND_3; + case 5955 ... 6155: + return RTW89_SAR_6GHZ_SUBBAND_5_L; + case 6175 ... 6415: + return RTW89_SAR_6GHZ_SUBBAND_5_H; + case 6435 ... 6515: + return RTW89_SAR_6GHZ_SUBBAND_6; + case 6535 ... 6695: + return RTW89_SAR_6GHZ_SUBBAND_7_L; + case 6715 ... 6855: + return RTW89_SAR_6GHZ_SUBBAND_7_H; + + /* freq 6875 (ch 185, 20MHz) spans RTW89_SAR_6GHZ_SUBBAND_7_H + * and RTW89_SAR_6GHZ_SUBBAND_8, so directly describe it with + * struct rtw89_sar_span in the following. + */ + + case 6895 ... 7115: + return RTW89_SAR_6GHZ_SUBBAND_8; + } +} + +struct rtw89_sar_span { + enum rtw89_sar_subband subband_low; + enum rtw89_sar_subband subband_high; +}; + +#define RTW89_SAR_SPAN_VALID(span) ((span)->subband_high) + +#define RTW89_SAR_6GHZ_SPAN_HEAD 6145 +#define RTW89_SAR_6GHZ_SPAN_IDX(center_freq) \ + ((((int)(center_freq) - RTW89_SAR_6GHZ_SPAN_HEAD) / 5) / 2) + +#define RTW89_DECL_SAR_6GHZ_SPAN(center_freq, subband_l, subband_h) \ + [RTW89_SAR_6GHZ_SPAN_IDX(center_freq)] = { \ + .subband_low = RTW89_SAR_6GHZ_ ## subband_l, \ + .subband_high = RTW89_SAR_6GHZ_ ## subband_h, \ + } + +/* Since 6GHz SAR subbands are not edge aligned, some cases span two SAR + * subbands. In the following, we describe each of them with rtw89_sar_span. + */ +static const struct rtw89_sar_span rtw89_sar_overlapping_6ghz[] = { + RTW89_DECL_SAR_6GHZ_SPAN(6145, SUBBAND_5_L, SUBBAND_5_H), + RTW89_DECL_SAR_6GHZ_SPAN(6165, SUBBAND_5_L, SUBBAND_5_H), + RTW89_DECL_SAR_6GHZ_SPAN(6185, SUBBAND_5_L, SUBBAND_5_H), + RTW89_DECL_SAR_6GHZ_SPAN(6505, SUBBAND_6, SUBBAND_7_L), + RTW89_DECL_SAR_6GHZ_SPAN(6525, SUBBAND_6, SUBBAND_7_L), + RTW89_DECL_SAR_6GHZ_SPAN(6545, SUBBAND_6, SUBBAND_7_L), + RTW89_DECL_SAR_6GHZ_SPAN(6665, SUBBAND_7_L, SUBBAND_7_H), + RTW89_DECL_SAR_6GHZ_SPAN(6705, SUBBAND_7_L, SUBBAND_7_H), + RTW89_DECL_SAR_6GHZ_SPAN(6825, SUBBAND_7_H, SUBBAND_8), + RTW89_DECL_SAR_6GHZ_SPAN(6865, SUBBAND_7_H, SUBBAND_8), + RTW89_DECL_SAR_6GHZ_SPAN(6875, SUBBAND_7_H, SUBBAND_8), + RTW89_DECL_SAR_6GHZ_SPAN(6885, SUBBAND_7_H, SUBBAND_8), +}; + static int rtw89_query_sar_config_common(struct rtw89_dev *rtwdev, s32 *cfg) { struct rtw89_sar_cfg_common *rtwsar = &rtwdev->sar.cfg_common; - enum rtw89_subband subband = rtwdev->hal.current_subband; + struct rtw89_hal *hal = &rtwdev->hal; + enum rtw89_band band = hal->current_band_type; + u32 center_freq = hal->current_freq; + const struct rtw89_sar_span *span = NULL; + enum rtw89_sar_subband subband_l, subband_h; + int idx; + + if (band == RTW89_BAND_6G) { + idx = RTW89_SAR_6GHZ_SPAN_IDX(center_freq); + /* To decrease size of rtw89_sar_overlapping_6ghz[], + * RTW89_SAR_6GHZ_SPAN_IDX() truncates the leading NULLs + * to make first span as index 0 of the table. So, if center + * frequency is less than the first one, it will get netative. + */ + if (idx >= 0 && idx < ARRAY_SIZE(rtw89_sar_overlapping_6ghz)) + span = &rtw89_sar_overlapping_6ghz[idx]; + } + + if (span && RTW89_SAR_SPAN_VALID(span)) { + subband_l = span->subband_low; + subband_h = span->subband_high; + } else { + subband_l = rtw89_sar_get_subband(rtwdev, center_freq); + subband_h = subband_l; + } + + rtw89_debug(rtwdev, RTW89_DBG_SAR, + "for {band %u, center_freq %u}, SAR subband: {%u, %u}\n", + band, center_freq, subband_l, subband_h); - if (!rtwsar->set[subband]) + if (!rtwsar->set[subband_l] && !rtwsar->set[subband_h]) return -ENODATA; - *cfg = rtwsar->cfg[subband]; + if (!rtwsar->set[subband_l]) + *cfg = rtwsar->cfg[subband_h]; + else if (!rtwsar->set[subband_h]) + *cfg = rtwsar->cfg[subband_l]; + else + *cfg = min(rtwsar->cfg[subband_l], rtwsar->cfg[subband_h]); + return 0; } @@ -128,21 +235,20 @@ exit: return ret; } -static const u8 rtw89_common_sar_subband_map[] = { - RTW89_CH_2G, - RTW89_CH_5G_BAND_1, - RTW89_CH_5G_BAND_3, - RTW89_CH_5G_BAND_4, -}; - static const struct cfg80211_sar_freq_ranges rtw89_common_sar_freq_ranges[] = { { .start_freq = 2412, .end_freq = 2484, }, { .start_freq = 5180, .end_freq = 5320, }, { .start_freq = 5500, .end_freq = 5720, }, { .start_freq = 5745, .end_freq = 5825, }, + { .start_freq = 5955, .end_freq = 6155, }, + { .start_freq = 6175, .end_freq = 6415, }, + { .start_freq = 6435, .end_freq = 6515, }, + { .start_freq = 6535, .end_freq = 6695, }, + { .start_freq = 6715, .end_freq = 6875, }, + { .start_freq = 6875, .end_freq = 7115, }, }; -static_assert(ARRAY_SIZE(rtw89_common_sar_subband_map) == +static_assert(RTW89_SAR_SUBBAND_NR == ARRAY_SIZE(rtw89_common_sar_freq_ranges)); const struct cfg80211_sar_capa rtw89_sar_capa = { @@ -159,7 +265,6 @@ int rtw89_ops_set_sar_specs(struct ieee80211_hw *hw, u8 fct; u32 freq_start; u32 freq_end; - u32 band; s32 power; u32 i, idx; @@ -175,15 +280,14 @@ int rtw89_ops_set_sar_specs(struct ieee80211_hw *hw, freq_start = rtw89_common_sar_freq_ranges[idx].start_freq; freq_end = rtw89_common_sar_freq_ranges[idx].end_freq; - band = rtw89_common_sar_subband_map[idx]; power = sar->sub_specs[i].power; - rtw89_info(rtwdev, "On freq %u to %u, ", freq_start, freq_end); - rtw89_info(rtwdev, "set SAR power limit %d (unit: 1/%lu dBm)\n", - power, BIT(fct)); + rtw89_debug(rtwdev, RTW89_DBG_SAR, + "On freq %u to %u, set SAR limit %d (unit: 1/%lu dBm)\n", + freq_start, freq_end, power, BIT(fct)); - sar_common.set[band] = true; - sar_common.cfg[band] = power; + sar_common.set[idx] = true; + sar_common.cfg[idx] = power; } return rtw89_apply_sar_common(rtwdev, &sar_common); diff --git a/drivers/net/wireless/silabs/wfx/fwio.c b/drivers/net/wireless/silabs/wfx/fwio.c index 3d1b8a135dc0..52c7f560b062 100644 --- a/drivers/net/wireless/silabs/wfx/fwio.c +++ b/drivers/net/wireless/silabs/wfx/fwio.c @@ -286,8 +286,7 @@ static int load_firmware_secure(struct wfx_dev *wdev) error: kfree(buf); - if (fw) - release_firmware(fw); + release_firmware(fw); if (ret) print_boot_status(wdev); return ret; diff --git a/drivers/net/wireless/st/cw1200/bh.c b/drivers/net/wireless/st/cw1200/bh.c index 10e019cddcc6..3b4ded2ac801 100644 --- a/drivers/net/wireless/st/cw1200/bh.c +++ b/drivers/net/wireless/st/cw1200/bh.c @@ -327,18 +327,12 @@ static int cw1200_bh_rx_helper(struct cw1200_common *priv, if (WARN_ON(wsm_handle_rx(priv, wsm_id, wsm, &skb_rx))) goto err; - if (skb_rx) { - dev_kfree_skb(skb_rx); - skb_rx = NULL; - } + dev_kfree_skb(skb_rx); return 0; err: - if (skb_rx) { - dev_kfree_skb(skb_rx); - skb_rx = NULL; - } + dev_kfree_skb(skb_rx); return -1; } diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index 514f2c1124b6..ba14d83353a4 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -654,7 +654,7 @@ static int __init virt_wifi_init_module(void) { int err; - /* Guaranteed to be locallly-administered and not multicast. */ + /* Guaranteed to be locally-administered and not multicast. */ eth_random_addr(fake_router_bssid); err = register_netdevice_notifier(&virt_wifi_notifier); diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index d9dea4829c86..8174d7b2966c 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -48,7 +48,6 @@ #include <linux/debugfs.h> typedef unsigned int pending_ring_idx_t; -#define INVALID_PENDING_RING_IDX (~0U) struct pending_tx_info { struct xen_netif_tx_request req; /* tx request */ @@ -82,8 +81,6 @@ struct xenvif_rx_meta { /* Discriminate from any valid pending_idx value. */ #define INVALID_PENDING_IDX 0xFFFF -#define MAX_BUFFER_OFFSET XEN_PAGE_SIZE - #define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE /* The maximum number of frags is derived from the size of a grant (same @@ -367,11 +364,6 @@ void xenvif_free(struct xenvif *vif); int xenvif_xenbus_init(void); void xenvif_xenbus_fini(void); -int xenvif_schedulable(struct xenvif *vif); - -int xenvif_queue_stopped(struct xenvif_queue *queue); -void xenvif_wake_queue(struct xenvif_queue *queue); - /* (Un)Map communication rings. */ void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue); int xenvif_map_frontend_data_rings(struct xenvif_queue *queue, @@ -394,7 +386,6 @@ int xenvif_dealloc_kthread(void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); -void xenvif_rx_action(struct xenvif_queue *queue); void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); void xenvif_carrier_on(struct xenvif *vif); @@ -403,9 +394,6 @@ void xenvif_carrier_on(struct xenvif *vif); void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf, bool zerocopy_success); -/* Unmap a pending page and release it back to the guest */ -void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); - static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue) { return MAX_PENDING_REQS - diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 8e035374a370..fb32ae82d9b0 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -69,7 +69,7 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) wake_up(&queue->dealloc_wq); } -int xenvif_schedulable(struct xenvif *vif) +static int xenvif_schedulable(struct xenvif *vif) { return netif_running(vif->dev) && test_bit(VIF_STATUS_CONNECTED, &vif->status) && @@ -177,20 +177,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -int xenvif_queue_stopped(struct xenvif_queue *queue) -{ - struct net_device *dev = queue->vif->dev; - unsigned int id = queue->id; - return netif_tx_queue_stopped(netdev_get_tx_queue(dev, id)); -} - -void xenvif_wake_queue(struct xenvif_queue *queue) -{ - struct net_device *dev = queue->vif->dev; - unsigned int id = queue->id; - netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); -} - static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index d93814c14a23..fc61a4418737 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -112,6 +112,8 @@ static void make_tx_response(struct xenvif_queue *queue, s8 st); static void push_tx_responses(struct xenvif_queue *queue); +static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); + static inline int tx_work_todo(struct xenvif_queue *queue); static inline unsigned long idx_to_pfn(struct xenvif_queue *queue, @@ -1418,7 +1420,7 @@ static void push_tx_responses(struct xenvif_queue *queue) notify_remote_via_irq(queue->tx_irq); } -void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) +static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) { int ret; struct gnttab_unmap_grant_ref tx_unmap_op; diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index dbac4c03d21a..8df2c736fd23 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -486,7 +486,7 @@ static void xenvif_rx_skb(struct xenvif_queue *queue) #define RX_BATCH_SIZE 64 -void xenvif_rx_action(struct xenvif_queue *queue) +static void xenvif_rx_action(struct xenvif_queue *queue) { struct sk_buff_head completed_skbs; unsigned int work_done = 0; diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 4519ef42b458..e59ea2173aac 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2020 Facebook */ +#include <linux/bits.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/module.h> @@ -88,10 +89,10 @@ struct tod_reg { #define TOD_CTRL_DISABLE_FMT_A BIT(17) #define TOD_CTRL_DISABLE_FMT_B BIT(16) #define TOD_CTRL_ENABLE BIT(0) -#define TOD_CTRL_GNSS_MASK ((1U << 4) - 1) +#define TOD_CTRL_GNSS_MASK GENMASK(3, 0) #define TOD_CTRL_GNSS_SHIFT 24 -#define TOD_STATUS_UTC_MASK 0xff +#define TOD_STATUS_UTC_MASK GENMASK(7, 0) #define TOD_STATUS_UTC_VALID BIT(8) #define TOD_STATUS_LEAP_ANNOUNCE BIT(12) #define TOD_STATUS_LEAP_VALID BIT(16) @@ -205,7 +206,7 @@ struct frequency_reg { #define FREQ_STATUS_VALID BIT(31) #define FREQ_STATUS_ERROR BIT(30) #define FREQ_STATUS_OVERRUN BIT(29) -#define FREQ_STATUS_MASK (BIT(24) - 1) +#define FREQ_STATUS_MASK GENMASK(23, 0) struct ptp_ocp_flash_info { const char *name; @@ -674,9 +675,9 @@ static const struct ocp_selector ptp_ocp_clock[] = { { } }; +#define SMA_DISABLE BIT(16) #define SMA_ENABLE BIT(15) -#define SMA_SELECT_MASK ((1U << 15) - 1) -#define SMA_DISABLE 0x10000 +#define SMA_SELECT_MASK GENMASK(14, 0) static const struct ocp_selector ptp_ocp_sma_in[] = { { .name = "10Mhz", .value = 0x0000 }, @@ -2154,7 +2155,7 @@ ptp_ocp_fb_set_pins(struct ptp_ocp *bp) struct ptp_pin_desc *config; int i; - config = kzalloc(sizeof(*config) * 4, GFP_KERNEL); + config = kcalloc(4, sizeof(*config), GFP_KERNEL); if (!config) return -ENOMEM; @@ -3440,7 +3441,7 @@ ptp_ocp_tod_status_show(struct seq_file *s, void *data) val = ioread32(&bp->tod->utc_status); seq_printf(s, "UTC status register: 0x%08X\n", val); - seq_printf(s, "UTC offset: %d valid:%d\n", + seq_printf(s, "UTC offset: %ld valid:%d\n", val & TOD_STATUS_UTC_MASK, val & TOD_STATUS_UTC_VALID ? 1 : 0); seq_printf(s, "Leap second info valid:%d, Leap second announce %d\n", val & TOD_STATUS_LEAP_VALID ? 1 : 0, @@ -3700,10 +3701,8 @@ ptp_ocp_detach(struct ptp_ocp *bp) serial8250_unregister_port(bp->mac_port); if (bp->nmea_port != -1) serial8250_unregister_port(bp->nmea_port); - if (bp->spi_flash) - platform_device_unregister(bp->spi_flash); - if (bp->i2c_ctrl) - platform_device_unregister(bp->i2c_ctrl); + platform_device_unregister(bp->spi_flash); + platform_device_unregister(bp->i2c_ctrl); if (bp->i2c_clk) clk_hw_unregister_fixed_rate(bp->i2c_clk); if (bp->n_irqs) @@ -3773,7 +3772,6 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) out: ptp_ocp_detach(bp); - pci_set_drvdata(pdev, NULL); out_disable: pci_disable_device(pdev); out_free: @@ -3789,7 +3787,6 @@ ptp_ocp_remove(struct pci_dev *pdev) devlink_unregister(devlink); ptp_ocp_detach(bp); - pci_set_drvdata(pdev, NULL); pci_disable_device(pdev); devlink_free(devlink); diff --git a/include/dt-bindings/net/pcs-rzn1-miic.h b/include/dt-bindings/net/pcs-rzn1-miic.h new file mode 100644 index 000000000000..784782eaec9e --- /dev/null +++ b/include/dt-bindings/net/pcs-rzn1-miic.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2022 Schneider-Electric + * + * Clément Léger <clement.leger@bootlin.com> + */ + +#ifndef _DT_BINDINGS_PCS_RZN1_MIIC +#define _DT_BINDINGS_PCS_RZN1_MIIC + +/* + * Reefer to the datasheet [1] section 8.2.1, Internal Connection of Ethernet + * Ports to check the available combination + * + * [1] REN_r01uh0750ej0140-rzn1-introduction_MAT_20210228.pdf + */ + +#define MIIC_GMAC1_PORT 0 +#define MIIC_GMAC2_PORT 1 +#define MIIC_RTOS_PORT 2 +#define MIIC_SERCOS_PORTA 3 +#define MIIC_SERCOS_PORTB 4 +#define MIIC_ETHERCAT_PORTA 5 +#define MIIC_ETHERCAT_PORTB 6 +#define MIIC_ETHERCAT_PORTC 7 +#define MIIC_SWITCH_PORTA 8 +#define MIIC_SWITCH_PORTB 9 +#define MIIC_SWITCH_PORTC 10 +#define MIIC_SWITCH_PORTD 11 +#define MIIC_HSR_PORTA 12 +#define MIIC_HSR_PORTB 13 + +#endif diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b914a56a2c5..0edd7d2c0064 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -5,6 +5,7 @@ #define _LINUX_BPF_H 1 #include <uapi/linux/bpf.h> +#include <uapi/linux/filter.h> #include <linux/workqueue.h> #include <linux/file.h> @@ -22,8 +23,10 @@ #include <linux/sched/mm.h> #include <linux/slab.h> #include <linux/percpu-refcount.h> +#include <linux/stddef.h> #include <linux/bpfptr.h> #include <linux/btf.h> +#include <linux/rcupdate_trace.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -398,6 +401,9 @@ enum bpf_type_flag { /* DYNPTR points to a ringbuf record. */ DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), + /* Size is known at compile time. */ + MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; @@ -461,6 +467,8 @@ enum bpf_arg_type { * all bytes or clear them in error case. */ ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, + /* Pointer to valid memory of size known at compile time. */ + ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. @@ -526,6 +534,14 @@ struct bpf_func_proto { u32 *arg5_btf_id; }; u32 *arg_btf_id[5]; + struct { + size_t arg1_size; + size_t arg2_size; + size_t arg3_size; + size_t arg4_size; + size_t arg5_size; + }; + size_t arg_size[5]; }; int *ret_btf_id; /* return value btf_id */ bool (*allowed)(const struct bpf_prog *prog); @@ -1084,6 +1100,40 @@ struct bpf_prog_aux { }; }; +struct bpf_prog { + u16 pages; /* Number of allocated pages */ + u16 jited:1, /* Is our filter JIT'ed? */ + jit_requested:1,/* archs need to JIT the prog */ + gpl_compatible:1, /* Is filter GPL compatible? */ + cb_access:1, /* Is control block accessed? */ + dst_needed:1, /* Do we need dst entry? */ + blinding_requested:1, /* needs constant blinding */ + blinded:1, /* Was blinded */ + is_func:1, /* program is a bpf function */ + kprobe_override:1, /* Do we override a kprobe? */ + has_callchain_buf:1, /* callchain buffer allocated? */ + enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ + call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ + call_get_func_ip:1, /* Do we call get_func_ip() */ + tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ + enum bpf_prog_type type; /* Type of BPF program */ + enum bpf_attach_type expected_attach_type; /* For some prog types */ + u32 len; /* Number of filter blocks */ + u32 jited_len; /* Size of jited insns in bytes */ + u8 tag[BPF_TAG_SIZE]; + struct bpf_prog_stats __percpu *stats; + int __percpu *active; + unsigned int (*bpf_func)(const void *ctx, + const struct bpf_insn *insn); + struct bpf_prog_aux *aux; /* Auxiliary fields */ + struct sock_fprog_kern *orig_prog; /* Original BPF program */ + /* Instructions for interpreter */ + union { + DECLARE_FLEX_ARRAY(struct sock_filter, insns); + DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi); + }; +}; + struct bpf_array_aux { /* Programs with direct jumps into programs part of this array. */ struct list_head poke_progs; @@ -1336,6 +1386,8 @@ extern struct bpf_empty_prog_array bpf_empty_prog_array; struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array *progs); +/* Use when traversal over the bpf_prog_array uses tasks_trace rcu */ +void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs); int bpf_prog_array_length(struct bpf_prog_array *progs); bool bpf_prog_array_is_empty(struct bpf_prog_array *array); int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs, @@ -1427,6 +1479,55 @@ bpf_prog_run_array(const struct bpf_prog_array *array, return ret; } +/* Notes on RCU design for bpf_prog_arrays containing sleepable programs: + * + * We use the tasks_trace rcu flavor read section to protect the bpf_prog_array + * overall. As a result, we must use the bpf_prog_array_free_sleepable + * in order to use the tasks_trace rcu grace period. + * + * When a non-sleepable program is inside the array, we take the rcu read + * section and disable preemption for that program alone, so it can access + * rcu-protected dynamically sized maps. + */ +static __always_inline u32 +bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu, + const void *ctx, bpf_prog_run_fn run_prog) +{ + const struct bpf_prog_array_item *item; + const struct bpf_prog *prog; + const struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_trace_run_ctx run_ctx; + u32 ret = 1; + + might_fault(); + + rcu_read_lock_trace(); + migrate_disable(); + + array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held()); + if (unlikely(!array)) + goto out; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + if (!prog->aux->sleepable) + rcu_read_lock(); + + run_ctx.bpf_cookie = item->bpf_cookie; + ret &= run_prog(prog, ctx); + item++; + + if (!prog->aux->sleepable) + rcu_read_unlock(); + } + bpf_reset_run_ctx(old_run_ctx); +out: + migrate_enable(); + rcu_read_unlock_trace(); + return ret; +} + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); extern struct mutex bpf_stats_enabled_mutex; @@ -2104,6 +2205,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); void sock_map_unhash(struct sock *sk); +void sock_map_destroy(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else static inline int bpf_prog_offload_init(struct bpf_prog *prog, @@ -2261,12 +2363,9 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; -extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; -extern const struct bpf_func_proto bpf_strncmp_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; -extern const struct bpf_func_proto bpf_kptr_xchg_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e8439f6cbe57..3930c963fa67 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -299,7 +299,7 @@ struct bpf_verifier_state { * If is_state_visited() sees a state with branches > 0 it means * there is a loop. If such state is exactly equal to the current state * it's an infinite loop. Note states_equal() checks for states - * equvalency, so two states being 'states_equal' does not mean + * equivalency, so two states being 'states_equal' does not mean * infinite loop. The exact comparison is provided by * states_maybe_looping() function. It's a stronger pre-check and * much faster than states_equal(). diff --git a/include/linux/btf.h b/include/linux/btf.h index 2611cea2c2b6..1bfed7fa0428 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -177,6 +177,19 @@ static inline bool btf_type_is_enum(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; } +static inline bool btf_is_any_enum(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM || + BTF_INFO_KIND(t->info) == BTF_KIND_ENUM64; +} + +static inline bool btf_kind_core_compat(const struct btf_type *t1, + const struct btf_type *t2) +{ + return BTF_INFO_KIND(t1->info) == BTF_INFO_KIND(t2->info) || + (btf_is_any_enum(t1) && btf_is_any_enum(t2)); +} + static inline bool str_is_empty(const char *s) { return !s || !s[0]; @@ -192,6 +205,16 @@ static inline bool btf_is_enum(const struct btf_type *t) return btf_kind(t) == BTF_KIND_ENUM; } +static inline bool btf_is_enum64(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_ENUM64; +} + +static inline u64 btf_enum64_value(const struct btf_enum64 *e) +{ + return ((u64)e->val_hi32 << 32) | e->val_lo32; +} + static inline bool btf_is_composite(const struct btf_type *t) { u16 kind = btf_kind(t); @@ -332,6 +355,11 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t) return (struct btf_enum *)(t + 1); } +static inline struct btf_enum64 *btf_enum64(const struct btf_type *t) +{ + return (struct btf_enum64 *)(t + 1); +} + static inline const struct btf_var_secinfo *btf_type_var_secinfo( const struct btf_type *t) { diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index fdb22b00674a..182749e858b3 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -31,6 +31,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, struct canfd_frame **cfd); struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf); +bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb); /* * The struct can_skb_priv is used to transport additional information along @@ -96,64 +97,6 @@ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) return nskb; } -/* Check for outgoing skbs that have not been created by the CAN subsystem */ -static inline bool can_skb_headroom_valid(struct net_device *dev, - struct sk_buff *skb) -{ - /* af_packet creates a headroom of HH_DATA_MOD bytes which is fine */ - if (WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct can_skb_priv))) - return false; - - /* af_packet does not apply CAN skb specific settings */ - if (skb->ip_summed == CHECKSUM_NONE) { - /* init headroom */ - can_skb_prv(skb)->ifindex = dev->ifindex; - can_skb_prv(skb)->skbcnt = 0; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - - /* perform proper loopback on capable devices */ - if (dev->flags & IFF_ECHO) - skb->pkt_type = PACKET_LOOPBACK; - else - skb->pkt_type = PACKET_HOST; - - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - } - - return true; -} - -/* Drop a given socketbuffer if it does not contain a valid CAN frame. */ -static inline bool can_dropped_invalid_skb(struct net_device *dev, - struct sk_buff *skb) -{ - const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - - if (skb->protocol == htons(ETH_P_CAN)) { - if (unlikely(skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN)) - goto inval_skb; - } else if (skb->protocol == htons(ETH_P_CANFD)) { - if (unlikely(skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN)) - goto inval_skb; - } else - goto inval_skb; - - if (!can_skb_headroom_valid(dev, skb)) - goto inval_skb; - - return false; - -inval_skb: - kfree_skb(skb); - dev->stats.tx_dropped++; - return true; -} - static inline bool can_is_canfd_skb(const struct sk_buff *skb) { /* the CAN specific type of skb is identified by its data length */ diff --git a/include/linux/filter.h b/include/linux/filter.h index ed0c0ff42ad5..d0cbb31b1b4d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -559,40 +559,6 @@ struct bpf_prog_stats { struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); -struct bpf_prog { - u16 pages; /* Number of allocated pages */ - u16 jited:1, /* Is our filter JIT'ed? */ - jit_requested:1,/* archs need to JIT the prog */ - gpl_compatible:1, /* Is filter GPL compatible? */ - cb_access:1, /* Is control block accessed? */ - dst_needed:1, /* Do we need dst entry? */ - blinding_requested:1, /* needs constant blinding */ - blinded:1, /* Was blinded */ - is_func:1, /* program is a bpf function */ - kprobe_override:1, /* Do we override a kprobe? */ - has_callchain_buf:1, /* callchain buffer allocated? */ - enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ - call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ - call_get_func_ip:1, /* Do we call get_func_ip() */ - tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ - enum bpf_prog_type type; /* Type of BPF program */ - enum bpf_attach_type expected_attach_type; /* For some prog types */ - u32 len; /* Number of filter blocks */ - u32 jited_len; /* Size of jited insns in bytes */ - u8 tag[BPF_TAG_SIZE]; - struct bpf_prog_stats __percpu *stats; - int __percpu *active; - unsigned int (*bpf_func)(const void *ctx, - const struct bpf_insn *insn); - struct bpf_prog_aux *aux; /* Auxiliary fields */ - struct sock_fprog_kern *orig_prog; /* Original BPF program */ - /* Instructions for interpreter */ - union { - DECLARE_FLEX_ARRAY(struct sock_filter, insns); - DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi); - }; -}; - struct sk_filter { refcount_t refcnt; struct rcu_head rcu; diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 75d40acb60c1..5c65ae6b8154 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -76,6 +76,7 @@ #define IEEE80211_STYPE_ACTION 0x00D0 /* control */ +#define IEEE80211_STYPE_TRIGGER 0x0020 #define IEEE80211_STYPE_CTL_EXT 0x0060 #define IEEE80211_STYPE_BACK_REQ 0x0080 #define IEEE80211_STYPE_BACK 0x0090 @@ -295,6 +296,17 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) #define IEEE80211_HT_CTL_LEN 4 +/* trigger type within common_info of trigger frame */ +#define IEEE80211_TRIGGER_TYPE_MASK 0xf +#define IEEE80211_TRIGGER_TYPE_BASIC 0x0 +#define IEEE80211_TRIGGER_TYPE_BFRP 0x1 +#define IEEE80211_TRIGGER_TYPE_MU_BAR 0x2 +#define IEEE80211_TRIGGER_TYPE_MU_RTS 0x3 +#define IEEE80211_TRIGGER_TYPE_BSRP 0x4 +#define IEEE80211_TRIGGER_TYPE_GCR_MU_BAR 0x5 +#define IEEE80211_TRIGGER_TYPE_BQRP 0x6 +#define IEEE80211_TRIGGER_TYPE_NFRP 0x7 + struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; @@ -324,6 +336,15 @@ struct ieee80211_qos_hdr { __le16 qos_ctrl; } __packed __aligned(2); +struct ieee80211_trigger { + __le16 frame_control; + __le16 duration; + u8 ra[ETH_ALEN]; + u8 ta[ETH_ALEN]; + __le64 common_info; + u8 variable[]; +} __packed __aligned(2); + /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder @@ -730,6 +751,16 @@ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) } /** + * ieee80211_is_trigger - check if frame is trigger frame + * @fc: frame control field in little-endian byteorder + */ +static inline bool ieee80211_is_trigger(__le16 fc) +{ + return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == + cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_TRIGGER); +} + +/** * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder */ diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index b42294739063..523025106a64 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -46,10 +46,10 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, pcpu_stats = get_cpu_ptr(vlan->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); - pcpu_stats->rx_packets++; - pcpu_stats->rx_bytes += len; + u64_stats_inc(&pcpu_stats->rx_packets); + u64_stats_add(&pcpu_stats->rx_bytes, len); if (multicast) - pcpu_stats->rx_multicast++; + u64_stats_inc(&pcpu_stats->rx_multicast); u64_stats_update_end(&pcpu_stats->syncp); put_cpu_ptr(vlan->pcpu_stats); } else { diff --git a/include/linux/if_team.h b/include/linux/if_team.h index add607943c95..fc985e5c739d 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -12,11 +12,11 @@ #include <uapi/linux/if_team.h> struct team_pcpu_stats { - u64 rx_packets; - u64 rx_bytes; - u64 rx_multicast; - u64 tx_packets; - u64 tx_bytes; + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_multicast; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; struct u64_stats_sync syncp; u32 rx_dropped; u32 tx_dropped; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 2be4dd7e90a9..e00c4ee81ff7 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -118,11 +118,11 @@ static inline void vlan_drop_rx_stag_filter_info(struct net_device *dev) * @tx_dropped: number of tx drops */ struct vlan_pcpu_stats { - u64 rx_packets; - u64 rx_bytes; - u64 rx_multicast; - u64 tx_packets; - u64 tx_bytes; + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_multicast; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; struct u64_stats_sync syncp; u32 rx_errors; u32 tx_dropped; diff --git a/include/linux/mii.h b/include/linux/mii.h index 5ee13083cec7..d5a959ce4877 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -545,4 +545,39 @@ static inline u8 mii_resolve_flowctrl_fdx(u16 lcladv, u16 rmtadv) return cap; } +/** + * mii_bmcr_encode_fixed - encode fixed speed/duplex settings to a BMCR value + * @speed: a SPEED_* value + * @duplex: a DUPLEX_* value + * + * Encode the speed and duplex to a BMCR value. 2500, 1000, 100 and 10 Mbps are + * supported. 2500Mbps is encoded to 1000Mbps. Other speeds are encoded as 10 + * Mbps. Unknown duplex values are encoded to half-duplex. + */ +static inline u16 mii_bmcr_encode_fixed(int speed, int duplex) +{ + u16 bmcr; + + switch (speed) { + case SPEED_2500: + case SPEED_1000: + bmcr = BMCR_SPEED1000; + break; + + case SPEED_100: + bmcr = BMCR_SPEED100; + break; + + case SPEED_10: + default: + bmcr = BMCR_SPEED10; + break; + } + + if (duplex == DUPLEX_FULL) + bmcr |= BMCR_FULLDPLX; + + return bmcr; +} + #endif /* __LINUX_MII_H__ */ diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index e05ee9f001ff..9dd4bf157255 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -26,7 +26,7 @@ * @remote: Remote address for tunnels */ struct vif_device { - struct net_device *dev; + struct net_device __rcu *dev; netdevice_tracker dev_tracker; unsigned long bytes_in, bytes_out; unsigned long pkt_in, pkt_out; @@ -52,6 +52,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, unsigned short family, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, unsigned short vif_index, u32 tb_id, struct netlink_ext_ack *extack) { @@ -60,7 +61,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, .family = family, .extack = extack, }, - .dev = vif->dev, + .dev = vif_dev, .vif_index = vif_index, .vif_flags = vif->flags, .tb_id = tb_id, @@ -73,6 +74,7 @@ static inline int mr_call_vif_notifiers(struct net *net, unsigned short family, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, unsigned short vif_index, u32 tb_id, unsigned int *ipmr_seq) { @@ -80,7 +82,7 @@ static inline int mr_call_vif_notifiers(struct net *net, .info = { .family = family, }, - .dev = vif->dev, + .dev = vif_dev, .vif_index = vif_index, .vif_flags = vif->flags, .tb_id = tb_id, @@ -98,7 +100,8 @@ static inline int mr_call_vif_notifiers(struct net *net, #define MAXVIFS 32 #endif -#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev)) +/* Note: This helper is deprecated. */ +#define VIF_EXISTS(_mrt, _idx) (!!rcu_access_pointer((_mrt)->vif_table[_idx].dev)) /* mfc_flags: * MFC_STATIC - the entry was added statically (not by a routing daemon) @@ -305,7 +308,7 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock, struct netlink_ext_ack *extack); + struct netlink_ext_ack *extack); #else static inline void vif_device_init(struct vif_device *v, struct net_device *dev, @@ -360,7 +363,7 @@ static inline int mr_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock, struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack) { return -EINVAL; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2563d30736e9..1a3cb93c3dcc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2636,10 +2636,10 @@ struct packet_offload { /* often modified stats are per-CPU, other are shared (netdev->stats) */ struct pcpu_sw_netstats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; struct u64_stats_sync syncp; } __aligned(4 * sizeof(u64)); @@ -2656,8 +2656,8 @@ static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int l struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->rx_bytes += len; - tstats->rx_packets++; + u64_stats_add(&tstats->rx_bytes, len); + u64_stats_inc(&tstats->rx_packets); u64_stats_update_end(&tstats->syncp); } @@ -2668,8 +2668,8 @@ static inline void dev_sw_netstats_tx_add(struct net_device *dev, struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += len; - tstats->tx_packets += packets; + u64_stats_add(&tstats->tx_bytes, len); + u64_stats_add(&tstats->tx_packets, packets); u64_stats_update_end(&tstats->syncp); } @@ -3981,8 +3981,8 @@ static inline void netdev_tracker_free(struct net_device *dev, #endif } -static inline void dev_hold_track(struct net_device *dev, - netdevice_tracker *tracker, gfp_t gfp) +static inline void netdev_hold(struct net_device *dev, + netdevice_tracker *tracker, gfp_t gfp) { if (dev) { __dev_hold(dev); @@ -3990,8 +3990,8 @@ static inline void dev_hold_track(struct net_device *dev, } } -static inline void dev_put_track(struct net_device *dev, - netdevice_tracker *tracker) +static inline void netdev_put(struct net_device *dev, + netdevice_tracker *tracker) { if (dev) { netdev_tracker_free(dev, tracker); @@ -4004,11 +4004,11 @@ static inline void dev_put_track(struct net_device *dev, * @dev: network device * * Hold reference to device to keep it from being freed. - * Try using dev_hold_track() instead. + * Try using netdev_hold() instead. */ static inline void dev_hold(struct net_device *dev) { - dev_hold_track(dev, NULL, GFP_ATOMIC); + netdev_hold(dev, NULL, GFP_ATOMIC); } /** @@ -4016,17 +4016,17 @@ static inline void dev_hold(struct net_device *dev) * @dev: network device * * Release reference to device to allow it to be freed. - * Try using dev_put_track() instead. + * Try using netdev_put() instead. */ static inline void dev_put(struct net_device *dev) { - dev_put_track(dev, NULL); + netdev_put(dev, NULL); } -static inline void dev_replace_track(struct net_device *odev, - struct net_device *ndev, - netdevice_tracker *tracker, - gfp_t gfp) +static inline void netdev_ref_replace(struct net_device *odev, + struct net_device *ndev, + netdevice_tracker *tracker, + gfp_t gfp) { if (odev) netdev_tracker_free(odev, tracker); diff --git a/include/linux/pcs-rzn1-miic.h b/include/linux/pcs-rzn1-miic.h new file mode 100644 index 000000000000..56d12b21365d --- /dev/null +++ b/include/linux/pcs-rzn1-miic.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Schneider Electric + * + * Clément Léger <clement.leger@bootlin.com> + */ + +#ifndef __LINUX_PCS_MIIC_H +#define __LINUX_PCS_MIIC_H + +struct phylink; +struct device_node; + +struct phylink_pcs *miic_create(struct device *dev, struct device_node *np); + +void miic_destroy(struct phylink_pcs *pcs); + +#endif /* __LINUX_PCS_MIIC_H */ diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 266eb26fb029..d2da1e0b4a92 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -17,6 +17,7 @@ #define DW_AN_C73 1 #define DW_AN_C37_SGMII 2 #define DW_2500BASEX 3 +#define DW_AN_C37_1000BASEX 4 struct xpcs_id; @@ -30,7 +31,7 @@ int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode, phy_interface_t interface, int speed, int duplex); int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, - unsigned int mode); + unsigned int mode, const unsigned long *advertising); void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); diff --git a/include/linux/phy.h b/include/linux/phy.h index 508f1149665b..bed9a347481b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1539,6 +1539,9 @@ static inline void phy_device_reset(struct phy_device *phydev, int value) #define phydev_err(_phydev, format, args...) \ dev_err(&_phydev->mdio.dev, format, ##args) +#define phydev_err_probe(_phydev, err, format, args...) \ + dev_err_probe(&_phydev->mdio.dev, err, format, ##args) + #define phydev_info(_phydev, format, args...) \ dev_info(&_phydev->mdio.dev, format, ##args) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d3d10556f0fa..f6a27ab19202 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -43,6 +43,7 @@ #include <linux/netfilter/nf_conntrack_common.h> #endif #include <net/net_debug.h> +#include <net/dropreason.h> /** * DOC: skb checksums @@ -337,184 +338,6 @@ struct sk_buff_head { struct sk_buff; -/* The reason of skb drop, which is used in kfree_skb_reason(). - * en...maybe they should be splited by group? - * - * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is - * used to translate the reason to string. - */ -enum skb_drop_reason { - SKB_NOT_DROPPED_YET = 0, - SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ - SKB_DROP_REASON_NO_SOCKET, /* socket not found */ - SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ - SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ - SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ - SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ - SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ - SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current - * host (interface is in promisc - * mode) - */ - SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ - SKB_DROP_REASON_IP_INHDR, /* there is something wrong with - * IP header (see - * IPSTATS_MIB_INHDRERRORS) - */ - SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. - * see the document for rp_filter - * in ip-sysctl.rst for more - * information - */ - SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 - * is multicast, but L3 is - * unicast. - */ - SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */ - SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */ - SKB_DROP_REASON_SOCKET_RCVBUFF, /* socket receive buff is full */ - SKB_DROP_REASON_PROTO_MEM, /* proto memory limition, such as - * udp packet drop out of - * udp_memory_allocated. - */ - SKB_DROP_REASON_TCP_MD5NOTFOUND, /* no MD5 hash and one - * expected, corresponding - * to LINUX_MIB_TCPMD5NOTFOUND - */ - SKB_DROP_REASON_TCP_MD5UNEXPECTED, /* MD5 hash and we're not - * expecting one, corresponding - * to LINUX_MIB_TCPMD5UNEXPECTED - */ - SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong, - * corresponding to - * LINUX_MIB_TCPMD5FAILURE - */ - SKB_DROP_REASON_SOCKET_BACKLOG, /* failed to add skb to socket - * backlog (see - * LINUX_MIB_TCPBACKLOGDROP) - */ - SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */ - SKB_DROP_REASON_TCP_ZEROWINDOW, /* TCP receive window size is zero, - * see LINUX_MIB_TCPZEROWINDOWDROP - */ - SKB_DROP_REASON_TCP_OLD_DATA, /* the TCP data reveived is already - * received before (spurious retrans - * may happened), see - * LINUX_MIB_DELAYEDACKLOST - */ - SKB_DROP_REASON_TCP_OVERWINDOW, /* the TCP data is out of window, - * the seq of the first byte exceed - * the right edges of receive - * window - */ - SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in - * the ofo queue, corresponding to - * LINUX_MIB_TCPOFOMERGE - */ - SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to - * LINUX_MIB_PAWSESTABREJECTED - */ - SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */ - SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */ - SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */ - SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */ - SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */ - SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */ - SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */ - SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */ - SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */ - SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */ - SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ - SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by - * BPF_PROG_TYPE_CGROUP_SKB - * eBPF program - */ - SKB_DROP_REASON_IPV6DISABLED, /* IPv6 is disabled on the device */ - SKB_DROP_REASON_NEIGH_CREATEFAIL, /* failed to create neigh - * entry - */ - SKB_DROP_REASON_NEIGH_FAILED, /* neigh entry in failed state */ - SKB_DROP_REASON_NEIGH_QUEUEFULL, /* arp_queue for neigh - * entry is full - */ - SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */ - SKB_DROP_REASON_TC_EGRESS, /* dropped in TC egress HOOK */ - SKB_DROP_REASON_QDISC_DROP, /* dropped by qdisc when packet - * outputting (failed to enqueue to - * current qdisc) - */ - SKB_DROP_REASON_CPU_BACKLOG, /* failed to enqueue the skb to - * the per CPU backlog queue. This - * can be caused by backlog queue - * full (see netdev_max_backlog in - * net.rst) or RPS flow limit - */ - SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ - SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ - SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented - * or not supported - */ - SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation - * error - */ - SKB_DROP_REASON_SKB_GSO_SEG, /* gso segmentation error */ - SKB_DROP_REASON_SKB_UCOPY_FAULT, /* failed to copy data from - * user space, e.g., via - * zerocopy_sg_from_iter() - * or skb_orphan_frags_rx() - */ - SKB_DROP_REASON_DEV_HDR, /* device driver specific - * header/metadata is invalid - */ - /* the device is not ready to xmit/recv due to any of its data - * structure that is not up/ready/initialized, e.g., the IFF_UP is - * not set, or driver specific tun->tfiles[txq] is not initialized - */ - SKB_DROP_REASON_DEV_READY, - SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ - SKB_DROP_REASON_NOMEM, /* error due to OOM */ - SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header - * from networking data, e.g., failed - * to pull the protocol header from - * frags via pskb_may_pull() - */ - SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly - * attached to tun/tap, e.g., via - * TUNSETFILTEREBPF - */ - SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented - * at tun/tap, e.g., check_filter() - */ - SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */ - SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC - * 2211, such as a broadcasts - * ICMP_TIMESTAMP - */ - SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding - * to IPSTATS_MIB_INADDRERRORS - */ - SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding - * to IPSTATS_MIB_INADDRERRORS - */ - SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed - * the MTU) - */ - SKB_DROP_REASON_MAX, -}; - -#define SKB_DR_INIT(name, reason) \ - enum skb_drop_reason name = SKB_DROP_REASON_##reason -#define SKB_DR(name) \ - SKB_DR_INIT(name, NOT_SPECIFIED) -#define SKB_DR_SET(name, reason) \ - (name = SKB_DROP_REASON_##reason) -#define SKB_DR_OR(name, reason) \ - do { \ - if (name == SKB_DROP_REASON_NOT_SPECIFIED || \ - name == SKB_NOT_DROPPED_YET) \ - SKB_DR_SET(name, reason); \ - } while (0) - /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. @@ -2529,6 +2352,18 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) } /** + * skb_len_add - adds a number to len fields of skb + * @skb: buffer to add len to + * @delta: number of bytes to add + */ +static inline void skb_len_add(struct sk_buff *skb, int delta) +{ + skb->len += delta; + skb->data_len += delta; + skb->truesize += delta; +} + +/** * __skb_fill_page_desc - initialise a paged fragment in an skb * @skb: buffer containing fragment to be initialised * @i: paged fragment index to initialise @@ -2940,8 +2775,14 @@ static inline void skb_set_network_header(struct sk_buff *skb, const int offset) skb->network_header += offset; } +static inline int skb_mac_header_was_set(const struct sk_buff *skb) +{ + return skb->mac_header != (typeof(skb->mac_header))~0U; +} + static inline unsigned char *skb_mac_header(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb)); return skb->head + skb->mac_header; } @@ -2952,14 +2793,10 @@ static inline int skb_mac_offset(const struct sk_buff *skb) static inline u32 skb_mac_header_len(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb)); return skb->network_header - skb->mac_header; } -static inline int skb_mac_header_was_set(const struct sk_buff *skb) -{ - return skb->mac_header != (typeof(skb->mac_header))~0U; -} - static inline void skb_unset_mac_header(struct sk_buff *skb) { skb->mac_header = (typeof(skb->mac_header))~0U; diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c5a2d6f50f25..153b6dec9b6a 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -95,6 +95,7 @@ struct sk_psock { spinlock_t link_lock; refcount_t refcnt; void (*saved_unhash)(struct sock *sk); + void (*saved_destroy)(struct sock *sk); void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); void (*saved_data_ready)(struct sock *sk); diff --git a/include/linux/socket.h b/include/linux/socket.h index 17311ad9f9af..414b8c7bb8f7 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -428,10 +428,6 @@ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, extern int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, struct sockaddr __user *addr, int addr_len); -extern int __sys_accept4_file(struct file *file, unsigned file_flags, - struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags, - unsigned long nofile); extern struct file *do_accept(struct file *file, unsigned file_flags, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); diff --git a/include/net/af_unix.h b/include/net/af_unix.h index a7ef624ed726..480fa579787e 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -16,12 +16,11 @@ void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); struct sock *unix_peer_get(struct sock *sk); -#define UNIX_HASH_SIZE 256 +#define UNIX_HASH_MOD (256 - 1) +#define UNIX_HASH_SIZE (256 * 2) #define UNIX_HASH_BITS 8 extern unsigned int unix_tot_inflight; -extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; -extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { refcount_t refcnt; diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 61b49063791c..d2aea5cf1e41 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -67,6 +67,7 @@ enum { BOND_OPT_LACP_ACTIVE, BOND_OPT_MISSED_MAX, BOND_OPT_NS_TARGETS, + BOND_OPT_PRIO, BOND_OPT_LAST }; @@ -83,7 +84,10 @@ struct bond_opt_value { char *string; u64 value; u32 flags; - char extra[BOND_OPT_EXTRA_MAXLEN]; + union { + char extra[BOND_OPT_EXTRA_MAXLEN]; + struct net_device *slave_dev; + }; }; struct bonding; @@ -107,7 +111,8 @@ struct bond_option { }; int __bond_opt_set(struct bonding *bond, unsigned int option, - struct bond_opt_value *val); + struct bond_opt_value *val, + struct nlattr *bad_attr, struct netlink_ext_ack *extack); int __bond_opt_set_notify(struct bonding *bond, unsigned int option, struct bond_opt_value *val); int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf); @@ -132,13 +137,16 @@ static inline void __bond_opt_init(struct bond_opt_value *optval, optval->value = value; else if (string) optval->string = string; - else if (extra_len <= BOND_OPT_EXTRA_MAXLEN) + + if (extra && extra_len <= BOND_OPT_EXTRA_MAXLEN) memcpy(optval->extra, extra, extra_len); } #define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value, NULL, 0) #define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX, NULL, 0) #define bond_opt_initextra(optval, extra, extra_len) \ __bond_opt_init(optval, NULL, ULLONG_MAX, extra, extra_len) +#define bond_opt_slave_initval(optval, slave_dev, value) \ + __bond_opt_init(optval, NULL, value, slave_dev, sizeof(struct net_device *)) void bond_option_arp_ip_targets_clear(struct bonding *bond); #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/bonding.h b/include/net/bonding.h index cb904d356e31..6e78d657aa05 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -178,6 +178,7 @@ struct slave { u32 speed; u16 queue_id; u8 perm_hwaddr[MAX_ADDR_LEN]; + int prio; struct ad_slave_info *ad_info; struct tlb_slave_info tlb_info; #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/include/net/dropreason.h b/include/net/dropreason.h new file mode 100644 index 000000000000..fae9b40e54fa --- /dev/null +++ b/include/net/dropreason.h @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _LINUX_DROPREASON_H +#define _LINUX_DROPREASON_H + +/** + * enum skb_drop_reason - the reasons of skb drops + * + * The reason of skb drop, which is used in kfree_skb_reason(). + */ +enum skb_drop_reason { + /** + * @SKB_NOT_DROPPED_YET: skb is not dropped yet (used for no-drop case) + */ + SKB_NOT_DROPPED_YET = 0, + /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */ + SKB_DROP_REASON_NOT_SPECIFIED, + /** @SKB_DROP_REASON_NO_SOCKET: socket not found */ + SKB_DROP_REASON_NO_SOCKET, + /** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */ + SKB_DROP_REASON_PKT_TOO_SMALL, + /** @SKB_DROP_REASON_TCP_CSUM: TCP checksum error */ + SKB_DROP_REASON_TCP_CSUM, + /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */ + SKB_DROP_REASON_SOCKET_FILTER, + /** @SKB_DROP_REASON_UDP_CSUM: UDP checksum error */ + SKB_DROP_REASON_UDP_CSUM, + /** @SKB_DROP_REASON_NETFILTER_DROP: dropped by netfilter */ + SKB_DROP_REASON_NETFILTER_DROP, + /** + * @SKB_DROP_REASON_OTHERHOST: packet don't belong to current host + * (interface is in promisc mode) + */ + SKB_DROP_REASON_OTHERHOST, + /** @SKB_DROP_REASON_IP_CSUM: IP checksum error */ + SKB_DROP_REASON_IP_CSUM, + /** + * @SKB_DROP_REASON_IP_INHDR: there is something wrong with IP header (see + * IPSTATS_MIB_INHDRERRORS) + */ + SKB_DROP_REASON_IP_INHDR, + /** + * @SKB_DROP_REASON_IP_RPFILTER: IP rpfilter validate failed. see the + * document for rp_filter in ip-sysctl.rst for more information + */ + SKB_DROP_REASON_IP_RPFILTER, + /** + * @SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST: destination address of L2 is + * multicast, but L3 is unicast. + */ + SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, + /** @SKB_DROP_REASON_XFRM_POLICY: xfrm policy check failed */ + SKB_DROP_REASON_XFRM_POLICY, + /** @SKB_DROP_REASON_IP_NOPROTO: no support for IP protocol */ + SKB_DROP_REASON_IP_NOPROTO, + /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */ + SKB_DROP_REASON_SOCKET_RCVBUFF, + /** + * @SKB_DROP_REASON_PROTO_MEM: proto memory limition, such as udp packet + * drop out of udp_memory_allocated. + */ + SKB_DROP_REASON_PROTO_MEM, + /** + * @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected, + * corresponding to LINUX_MIB_TCPMD5NOTFOUND + */ + SKB_DROP_REASON_TCP_MD5NOTFOUND, + /** + * @SKB_DROP_REASON_TCP_MD5UNEXPECTED: MD5 hash and we're not expecting + * one, corresponding to LINUX_MIB_TCPMD5UNEXPECTED + */ + SKB_DROP_REASON_TCP_MD5UNEXPECTED, + /** + * @SKB_DROP_REASON_TCP_MD5FAILURE: MD5 hash and its wrong, corresponding + * to LINUX_MIB_TCPMD5FAILURE + */ + SKB_DROP_REASON_TCP_MD5FAILURE, + /** + * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog ( + * see LINUX_MIB_TCPBACKLOGDROP) + */ + SKB_DROP_REASON_SOCKET_BACKLOG, + /** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */ + SKB_DROP_REASON_TCP_FLAGS, + /** + * @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero, + * see LINUX_MIB_TCPZEROWINDOWDROP + */ + SKB_DROP_REASON_TCP_ZEROWINDOW, + /** + * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data reveived is already + * received before (spurious retrans may happened), see + * LINUX_MIB_DELAYEDACKLOST + */ + SKB_DROP_REASON_TCP_OLD_DATA, + /** + * @SKB_DROP_REASON_TCP_OVERWINDOW: the TCP data is out of window, + * the seq of the first byte exceed the right edges of receive + * window + */ + SKB_DROP_REASON_TCP_OVERWINDOW, + /** + * @SKB_DROP_REASON_TCP_OFOMERGE: the data of skb is already in the ofo + * queue, corresponding to LINUX_MIB_TCPOFOMERGE + */ + SKB_DROP_REASON_TCP_OFOMERGE, + /** + * @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to + * LINUX_MIB_PAWSESTABREJECTED + */ + SKB_DROP_REASON_TCP_RFC7323_PAWS, + /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */ + SKB_DROP_REASON_TCP_INVALID_SEQUENCE, + /** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */ + SKB_DROP_REASON_TCP_RESET, + /** + * @SKB_DROP_REASON_TCP_INVALID_SYN: Incoming packet has unexpected + * SYN flag + */ + SKB_DROP_REASON_TCP_INVALID_SYN, + /** @SKB_DROP_REASON_TCP_CLOSE: TCP socket in CLOSE state */ + SKB_DROP_REASON_TCP_CLOSE, + /** @SKB_DROP_REASON_TCP_FASTOPEN: dropped by FASTOPEN request socket */ + SKB_DROP_REASON_TCP_FASTOPEN, + /** @SKB_DROP_REASON_TCP_OLD_ACK: TCP ACK is old, but in window */ + SKB_DROP_REASON_TCP_OLD_ACK, + /** @SKB_DROP_REASON_TCP_TOO_OLD_ACK: TCP ACK is too old */ + SKB_DROP_REASON_TCP_TOO_OLD_ACK, + /** + * @SKB_DROP_REASON_TCP_ACK_UNSENT_DATA: TCP ACK for data we haven't + * sent yet + */ + SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, + /** @SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE: pruned from TCP OFO queue */ + SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, + /** @SKB_DROP_REASON_TCP_OFO_DROP: data already in receive queue */ + SKB_DROP_REASON_TCP_OFO_DROP, + /** @SKB_DROP_REASON_IP_OUTNOROUTES: route lookup failed */ + SKB_DROP_REASON_IP_OUTNOROUTES, + /** + * @SKB_DROP_REASON_BPF_CGROUP_EGRESS: dropped by BPF_PROG_TYPE_CGROUP_SKB + * eBPF program + */ + SKB_DROP_REASON_BPF_CGROUP_EGRESS, + /** @SKB_DROP_REASON_IPV6DISABLED: IPv6 is disabled on the device */ + SKB_DROP_REASON_IPV6DISABLED, + /** @SKB_DROP_REASON_NEIGH_CREATEFAIL: failed to create neigh entry */ + SKB_DROP_REASON_NEIGH_CREATEFAIL, + /** @SKB_DROP_REASON_NEIGH_FAILED: neigh entry in failed state */ + SKB_DROP_REASON_NEIGH_FAILED, + /** @SKB_DROP_REASON_NEIGH_QUEUEFULL: arp_queue for neigh entry is full */ + SKB_DROP_REASON_NEIGH_QUEUEFULL, + /** @SKB_DROP_REASON_NEIGH_DEAD: neigh entry is dead */ + SKB_DROP_REASON_NEIGH_DEAD, + /** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */ + SKB_DROP_REASON_TC_EGRESS, + /** + * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting ( + * failed to enqueue to current qdisc) + */ + SKB_DROP_REASON_QDISC_DROP, + /** + * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU + * backlog queue. This can be caused by backlog queue full (see + * netdev_max_backlog in net.rst) or RPS flow limit + */ + SKB_DROP_REASON_CPU_BACKLOG, + /** @SKB_DROP_REASON_XDP: dropped by XDP in input path */ + SKB_DROP_REASON_XDP, + /** @SKB_DROP_REASON_TC_INGRESS: dropped in TC ingress HOOK */ + SKB_DROP_REASON_TC_INGRESS, + /** @SKB_DROP_REASON_UNHANDLED_PROTO: protocol not implemented or not supported */ + SKB_DROP_REASON_UNHANDLED_PROTO, + /** @SKB_DROP_REASON_SKB_CSUM: sk_buff checksum computation error */ + SKB_DROP_REASON_SKB_CSUM, + /** @SKB_DROP_REASON_SKB_GSO_SEG: gso segmentation error */ + SKB_DROP_REASON_SKB_GSO_SEG, + /** + * @SKB_DROP_REASON_SKB_UCOPY_FAULT: failed to copy data from user space, + * e.g., via zerocopy_sg_from_iter() or skb_orphan_frags_rx() + */ + SKB_DROP_REASON_SKB_UCOPY_FAULT, + /** @SKB_DROP_REASON_DEV_HDR: device driver specific header/metadata is invalid */ + SKB_DROP_REASON_DEV_HDR, + /** + * @SKB_DROP_REASON_DEV_READY: the device is not ready to xmit/recv due to + * any of its data structure that is not up/ready/initialized, + * e.g., the IFF_UP is not set, or driver specific tun->tfiles[txq] + * is not initialized + */ + SKB_DROP_REASON_DEV_READY, + /** @SKB_DROP_REASON_FULL_RING: ring buffer is full */ + SKB_DROP_REASON_FULL_RING, + /** @SKB_DROP_REASON_NOMEM: error due to OOM */ + SKB_DROP_REASON_NOMEM, + /** + * @SKB_DROP_REASON_HDR_TRUNC: failed to trunc/extract the header from + * networking data, e.g., failed to pull the protocol header from + * frags via pskb_may_pull() + */ + SKB_DROP_REASON_HDR_TRUNC, + /** + * @SKB_DROP_REASON_TAP_FILTER: dropped by (ebpf) filter directly attached + * to tun/tap, e.g., via TUNSETFILTEREBPF + */ + SKB_DROP_REASON_TAP_FILTER, + /** + * @SKB_DROP_REASON_TAP_TXFILTER: dropped by tx filter implemented at + * tun/tap, e.g., check_filter() + */ + SKB_DROP_REASON_TAP_TXFILTER, + /** @SKB_DROP_REASON_ICMP_CSUM: ICMP checksum error */ + SKB_DROP_REASON_ICMP_CSUM, + /** + * @SKB_DROP_REASON_INVALID_PROTO: the packet doesn't follow RFC 2211, + * such as a broadcasts ICMP_TIMESTAMP + */ + SKB_DROP_REASON_INVALID_PROTO, + /** + * @SKB_DROP_REASON_IP_INADDRERRORS: host unreachable, corresponding to + * IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INADDRERRORS, + /** + * @SKB_DROP_REASON_IP_INNOROUTES: network unreachable, corresponding to + * IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INNOROUTES, + /** + * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the + * MTU) + */ + SKB_DROP_REASON_PKT_TOO_BIG, + /** + * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be + * used as a real 'reason' + */ + SKB_DROP_REASON_MAX, +}; + +#define SKB_DR_INIT(name, reason) \ + enum skb_drop_reason name = SKB_DROP_REASON_##reason +#define SKB_DR(name) \ + SKB_DR_INIT(name, NOT_SPECIFIED) +#define SKB_DR_SET(name, reason) \ + (name = SKB_DROP_REASON_##reason) +#define SKB_DR_OR(name, reason) \ + do { \ + if (name == SKB_DROP_REASON_NOT_SPECIFIED || \ + name == SKB_NOT_DROPPED_YET) \ + SKB_DR_SET(name, reason); \ + } while (0) + +extern const char * const drop_reasons[]; + +#endif diff --git a/include/net/dsa.h b/include/net/dsa.h index 14f07275852b..33283eeda697 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -53,6 +53,7 @@ struct phylink_link_state; #define DSA_TAG_PROTO_SJA1110_VALUE 23 #define DSA_TAG_PROTO_RTL8_4_VALUE 24 #define DSA_TAG_PROTO_RTL8_4T_VALUE 25 +#define DSA_TAG_PROTO_RZN1_A5PSW_VALUE 26 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -81,6 +82,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE, DSA_TAG_PROTO_RTL8_4 = DSA_TAG_PROTO_RTL8_4_VALUE, DSA_TAG_PROTO_RTL8_4T = DSA_TAG_PROTO_RTL8_4T_VALUE, + DSA_TAG_PROTO_RZN1_A5PSW = DSA_TAG_PROTO_RZN1_A5PSW_VALUE, }; struct dsa_switch; @@ -888,6 +890,9 @@ struct dsa_switch_ops { struct ethtool_eth_mac_stats *mac_stats); void (*get_eth_ctrl_stats)(struct dsa_switch *ds, int port, struct ethtool_eth_ctrl_stats *ctrl_stats); + void (*get_rmon_stats)(struct dsa_switch *ds, int port, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges); void (*get_stats64)(struct dsa_switch *ds, int port, struct rtnl_link_stats64 *s); void (*self_test)(struct dsa_switch *ds, int port, diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index c24fa934221d..70cbc4a72669 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -456,8 +456,8 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; + u64_stats_add(&tstats->tx_bytes, pkt_len); + u64_stats_inc(&tstats->tx_packets); u64_stats_update_end(&tstats->syncp); put_cpu_ptr(tstats); } else { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ebadb2103968..5c9e97eca739 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1959,36 +1959,6 @@ struct ieee80211_key_seq { }; /** - * struct ieee80211_cipher_scheme - cipher scheme - * - * This structure contains a cipher scheme information defining - * the secure packet crypto handling. - * - * @cipher: a cipher suite selector - * @iftype: a cipher iftype bit mask indicating an allowed cipher usage - * @hdr_len: a length of a security header used the cipher - * @pn_len: a length of a packet number in the security header - * @pn_off: an offset of pn from the beginning of the security header - * @key_idx_off: an offset of key index byte in the security header - * @key_idx_mask: a bit mask of key_idx bits - * @key_idx_shift: a bit shift needed to get key_idx - * key_idx value calculation: - * (sec_header_base[key_idx_off] & key_idx_mask) >> key_idx_shift - * @mic_len: a mic length in bytes - */ -struct ieee80211_cipher_scheme { - u32 cipher; - u16 iftype; - u8 hdr_len; - u8 pn_len; - u8 pn_off; - u8 key_idx_off; - u8 key_idx_mask; - u8 key_idx_shift; - u8 mic_len; -}; - -/** * enum set_key_cmd - key command * * Used with the set_key() callback in &struct ieee80211_ops, this @@ -2664,9 +2634,6 @@ enum ieee80211_hw_flags { * deliver to a WMM STA during any Service Period triggered by the WMM STA. * Use IEEE80211_WMM_IE_STA_QOSINFO_SP_* for correct values. * - * @n_cipher_schemes: a size of an array of cipher schemes definitions. - * @cipher_schemes: a pointer to an array of cipher scheme definitions - * supported by HW. * @max_nan_de_entries: maximum number of NAN DE functions supported by the * device. * @@ -2716,8 +2683,6 @@ struct ieee80211_hw { netdev_features_t netdev_features; u8 uapsd_queues; u8 uapsd_max_sp_len; - u8 n_cipher_schemes; - const struct ieee80211_cipher_scheme *cipher_schemes; u8 max_nan_de_entries; u8 tx_sk_pacing_shift; u8 weight_multiplier; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index c4f5601f6e32..20a2992901c2 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -120,7 +120,9 @@ struct net { struct netns_core core; struct netns_mib mib; struct netns_packet packet; +#if IS_ENABLED(CONFIG_UNIX) struct netns_unix unx; +#endif struct netns_nexthop nexthop; struct netns_ipv4 ipv4; #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h index 91a3d7e39198..6f1a33df061d 100644 --- a/include/net/netns/unix.h +++ b/include/net/netns/unix.h @@ -5,8 +5,14 @@ #ifndef __NETNS_UNIX_H__ #define __NETNS_UNIX_H__ +struct unix_table { + spinlock_t *locks; + struct hlist_head *buckets; +}; + struct ctl_table_header; struct netns_unix { + struct unix_table table; int sysctl_max_dgram_qlen; struct ctl_table_header *ctl; }; diff --git a/include/net/raw.h b/include/net/raw.h index 8ad8df594853..d224376360e1 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -20,9 +20,8 @@ extern struct proto raw_prot; extern struct raw_hashinfo raw_v4_hashinfo; -struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, - unsigned short num, __be32 raddr, - __be32 laddr, int dif, int sdif); +bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num, + __be32 raddr, __be32 laddr, int dif, int sdif); int raw_abort(struct sock *sk, int err); void raw_icmp_error(struct sk_buff *, int, u32); @@ -33,10 +32,19 @@ int raw_rcv(struct sock *, struct sk_buff *); #define RAW_HTABLE_SIZE MAX_INET_PROTOS struct raw_hashinfo { - rwlock_t lock; - struct hlist_head ht[RAW_HTABLE_SIZE]; + spinlock_t lock; + struct hlist_nulls_head ht[RAW_HTABLE_SIZE]; }; +static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo) +{ + int i; + + spin_lock_init(&hashinfo->lock); + for (i = 0; i < RAW_HTABLE_SIZE; i++) + INIT_HLIST_NULLS_HEAD(&hashinfo->ht[i], i); +} + #ifdef CONFIG_PROC_FS int raw_proc_init(void); void raw_proc_exit(void); diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 53d86b6055e8..bc70909625f6 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -3,11 +3,12 @@ #define _NET_RAWV6_H #include <net/protocol.h> +#include <net/raw.h> extern struct raw_hashinfo raw_v6_hashinfo; -struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, - unsigned short num, const struct in6_addr *loc_addr, - const struct in6_addr *rmt_addr, int dif, int sdif); +bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num, + const struct in6_addr *loc_addr, + const struct in6_addr *rmt_addr, int dif, int sdif); int raw_abort(struct sock *sk, int err); diff --git a/include/net/sock.h b/include/net/sock.h index 72ca97ccb460..40bbd0e8925b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -609,7 +609,7 @@ void sock_net_set(struct sock *sk, struct net *net) int sk_set_peek_off(struct sock *sk, int val); -static inline int sk_peek_offset(struct sock *sk, int flags) +static inline int sk_peek_offset(const struct sock *sk, int flags) { if (unlikely(flags & MSG_PEEK)) { return READ_ONCE(sk->sk_peek_off); @@ -849,7 +849,7 @@ static inline void sk_add_bind_node(struct sock *sk, ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ pos = rcu_dereference(hlist_next_rcu(pos))) -static inline struct user_namespace *sk_user_ns(struct sock *sk) +static inline struct user_namespace *sk_user_ns(const struct sock *sk) { /* Careful only use this in a context where these parameters * can not change and must all be valid, such as recvmsg from @@ -895,7 +895,7 @@ enum sock_flags { #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) -static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) +static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk) { nsk->sk_flags = osk->sk_flags; } @@ -1240,6 +1240,7 @@ struct proto { void (*enter_memory_pressure)(struct sock *sk); void (*leave_memory_pressure)(struct sock *sk); atomic_long_t *memory_allocated; /* Current allocated memory. */ + int __percpu *per_cpu_fw_alloc; struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* @@ -1383,21 +1384,46 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) } static inline long -sk_memory_allocated(const struct sock *sk) +proto_memory_allocated(const struct proto *prot) { - return atomic_long_read(sk->sk_prot->memory_allocated); + return max(0L, atomic_long_read(prot->memory_allocated)); } static inline long +sk_memory_allocated(const struct sock *sk) +{ + return proto_memory_allocated(sk->sk_prot); +} + +/* 1 MB per cpu, in page units */ +#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) + +static inline void sk_memory_allocated_add(struct sock *sk, int amt) { - return atomic_long_add_return(amt, sk->sk_prot->memory_allocated); + int local_reserve; + + preempt_disable(); + local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); + if (local_reserve >= SK_MEMORY_PCPU_RESERVE) { + __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); + atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); + } + preempt_enable(); } static inline void sk_memory_allocated_sub(struct sock *sk, int amt) { - atomic_long_sub(amt, sk->sk_prot->memory_allocated); + int local_reserve; + + preempt_disable(); + local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); + if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) { + __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); + atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); + } + preempt_enable(); } #define SK_ALLOC_PERCPU_COUNTER_BATCH 16 @@ -1426,12 +1452,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot) return percpu_counter_sum_positive(prot->sockets_allocated); } -static inline long -proto_memory_allocated(struct proto *prot) -{ - return atomic_long_read(prot->memory_allocated); -} - static inline bool proto_memory_pressure(struct proto *prot) { @@ -1518,30 +1538,18 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind); void __sk_mem_reduce_allocated(struct sock *sk, int amount); void __sk_mem_reclaim(struct sock *sk, int amount); -/* We used to have PAGE_SIZE here, but systems with 64KB pages - * do not necessarily have 16x time more memory than 4KB ones. - */ -#define SK_MEM_QUANTUM 4096 -#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) #define SK_MEM_SEND 0 #define SK_MEM_RECV 1 -/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ +/* sysctl_mem values are in pages */ static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - long val = sk->sk_prot->sysctl_mem[index]; - -#if PAGE_SIZE > SK_MEM_QUANTUM - val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; -#elif PAGE_SIZE < SK_MEM_QUANTUM - val >>= SK_MEM_QUANTUM_SHIFT - PAGE_SHIFT; -#endif - return val; + return sk->sk_prot->sysctl_mem[index]; } static inline int sk_mem_pages(int amt) { - return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT; + return (amt + PAGE_SIZE - 1) >> PAGE_SHIFT; } static inline bool sk_has_account(struct sock *sk) @@ -1552,19 +1560,23 @@ static inline bool sk_has_account(struct sock *sk) static inline bool sk_wmem_schedule(struct sock *sk, int size) { + int delta; + if (!sk_has_account(sk)) return true; - return size <= sk->sk_forward_alloc || - __sk_mem_schedule(sk, size, SK_MEM_SEND); + delta = size - sk->sk_forward_alloc; + return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND); } static inline bool sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) { + int delta; + if (!sk_has_account(sk)) return true; - return size <= sk->sk_forward_alloc || - __sk_mem_schedule(sk, size, SK_MEM_RECV) || + delta = size - sk->sk_forward_alloc; + return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) || skb_pfmemalloc(skb); } @@ -1590,7 +1602,7 @@ static inline void sk_mem_reclaim(struct sock *sk) reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); - if (reclaimable >= SK_MEM_QUANTUM) + if (reclaimable >= (int)PAGE_SIZE) __sk_mem_reclaim(sk, reclaimable); } @@ -1600,19 +1612,6 @@ static inline void sk_mem_reclaim_final(struct sock *sk) sk_mem_reclaim(sk); } -static inline void sk_mem_reclaim_partial(struct sock *sk) -{ - int reclaimable; - - if (!sk_has_account(sk)) - return; - - reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); - - if (reclaimable > SK_MEM_QUANTUM) - __sk_mem_reclaim(sk, reclaimable - 1); -} - static inline void sk_mem_charge(struct sock *sk, int size) { if (!sk_has_account(sk)) @@ -1620,29 +1619,17 @@ static inline void sk_mem_charge(struct sock *sk, int size) sk->sk_forward_alloc -= size; } -/* the following macros control memory reclaiming in sk_mem_uncharge() +/* the following macros control memory reclaiming in mptcp_rmem_uncharge() */ #define SK_RECLAIM_THRESHOLD (1 << 21) #define SK_RECLAIM_CHUNK (1 << 20) static inline void sk_mem_uncharge(struct sock *sk, int size) { - int reclaimable; - if (!sk_has_account(sk)) return; sk->sk_forward_alloc += size; - reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); - - /* Avoid a possible overflow. - * TCP send queues can make this happen, if sk_mem_reclaim() - * is not called and more than 2 GBytes are released at once. - * - * If we reach 2 MBytes, reclaim 1 MBytes right now, there is - * no need to hold that much forward allocation anyway. - */ - if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD)) - __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK); + sk_mem_reclaim(sk); } /* @@ -2232,9 +2219,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro if (err) return err; - skb->len += copy; - skb->data_len += copy; - skb->truesize += copy; + skb_len_add(skb, copy); sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); return 0; diff --git a/include/net/tcp.h b/include/net/tcp.h index 1e99f5c61f84..c21a9b516f1e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -253,6 +253,8 @@ extern long sysctl_tcp_mem[3]; #define TCP_RACK_NO_DUPTHRESH 0x4 /* Do not use DUPACK threshold in RACK */ extern atomic_long_t tcp_memory_allocated; +DECLARE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); + extern struct percpu_counter tcp_sockets_allocated; extern unsigned long tcp_memory_pressure; @@ -432,6 +434,7 @@ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, struct tcphdr *th, u32 *cookie); u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, struct tcphdr *th, u32 *cookie); +u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss); u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct tcphdr *th); diff --git a/include/net/udp.h b/include/net/udp.h index b83a00330566..b60eea2e3fae 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -95,6 +95,7 @@ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table, extern struct proto udp_prot; extern atomic_long_t udp_memory_allocated; +DECLARE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); /* sysctl variables for udp */ extern long sysctl_udp_mem[3]; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c39d910d4b45..9287712ad977 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1923,7 +1923,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) if (dev->xfrmdev_ops->xdo_dev_state_free) dev->xfrmdev_ops->xdo_dev_state_free(x); xso->dev = NULL; - dev_put_track(dev, &xso->dev_tracker); + netdev_put(dev, &xso->dev_tracker); } } #else diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 5f88385a7748..3737570116c3 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -575,6 +575,7 @@ struct ocelot_ops { int (*psfp_stats_get)(struct ocelot *ocelot, struct flow_cls_offload *f, struct flow_stats *stats); void (*cut_through_fwd)(struct ocelot *ocelot); + void (*tas_clock_adjust)(struct ocelot *ocelot); }; struct ocelot_vcap_policer { @@ -691,6 +692,9 @@ struct ocelot_port { int bridge_num; int speed; + + /* Store the AdminBaseTime of EST fetched from userspace. */ + s64 base_time; }; struct ocelot { @@ -757,6 +761,9 @@ struct ocelot { /* Lock for serializing forwarding domain changes */ struct mutex fwd_domain_lock; + /* Lock for serializing Time-Aware Shaper changes */ + struct mutex tas_lock; + struct workqueue_struct *owq; u8 ptp:1; diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 032b431b987b..da611a7aaf97 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -136,7 +136,7 @@ DECLARE_EVENT_CLASS(net_dev_template, __assign_str(name, skb->dev->name); ), - TP_printk("dev=%s skbaddr=%px len=%u", + TP_printk("dev=%s skbaddr=%p len=%u", __get_str(name), __entry->skbaddr, __entry->len) ) diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index 59c945b66f9c..a3995925cb05 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -41,7 +41,7 @@ TRACE_EVENT(qdisc_dequeue, __entry->txq_state = txq->state; ), - TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%px", + TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p", __entry->ifindex, __entry->handle, __entry->parent, __entry->txq_state, __entry->packets, __entry->skbaddr ) ); @@ -70,7 +70,7 @@ TRACE_EVENT(qdisc_enqueue, __entry->parent = qdisc->parent; ), - TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%p", __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) ); diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index a477bf907498..45264e4bb254 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -9,92 +9,6 @@ #include <linux/netdevice.h> #include <linux/tracepoint.h> -#define TRACE_SKB_DROP_REASON \ - EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ - EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ - EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ - EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ - EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ - EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ - EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP) \ - EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \ - EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \ - EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \ - EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER) \ - EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, \ - UNICAST_IN_L2_MULTICAST) \ - EM(SKB_DROP_REASON_XFRM_POLICY, XFRM_POLICY) \ - EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO) \ - EM(SKB_DROP_REASON_SOCKET_RCVBUFF, SOCKET_RCVBUFF) \ - EM(SKB_DROP_REASON_PROTO_MEM, PROTO_MEM) \ - EM(SKB_DROP_REASON_TCP_MD5NOTFOUND, TCP_MD5NOTFOUND) \ - EM(SKB_DROP_REASON_TCP_MD5UNEXPECTED, \ - TCP_MD5UNEXPECTED) \ - EM(SKB_DROP_REASON_TCP_MD5FAILURE, TCP_MD5FAILURE) \ - EM(SKB_DROP_REASON_SOCKET_BACKLOG, SOCKET_BACKLOG) \ - EM(SKB_DROP_REASON_TCP_FLAGS, TCP_FLAGS) \ - EM(SKB_DROP_REASON_TCP_ZEROWINDOW, TCP_ZEROWINDOW) \ - EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \ - EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \ - EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \ - EM(SKB_DROP_REASON_TCP_OFO_DROP, TCP_OFO_DROP) \ - EM(SKB_DROP_REASON_TCP_RFC7323_PAWS, TCP_RFC7323_PAWS) \ - EM(SKB_DROP_REASON_TCP_INVALID_SEQUENCE, \ - TCP_INVALID_SEQUENCE) \ - EM(SKB_DROP_REASON_TCP_RESET, TCP_RESET) \ - EM(SKB_DROP_REASON_TCP_INVALID_SYN, TCP_INVALID_SYN) \ - EM(SKB_DROP_REASON_TCP_CLOSE, TCP_CLOSE) \ - EM(SKB_DROP_REASON_TCP_FASTOPEN, TCP_FASTOPEN) \ - EM(SKB_DROP_REASON_TCP_OLD_ACK, TCP_OLD_ACK) \ - EM(SKB_DROP_REASON_TCP_TOO_OLD_ACK, TCP_TOO_OLD_ACK) \ - EM(SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, \ - TCP_ACK_UNSENT_DATA) \ - EM(SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, \ - TCP_OFO_QUEUE_PRUNE) \ - EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \ - EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \ - BPF_CGROUP_EGRESS) \ - EM(SKB_DROP_REASON_IPV6DISABLED, IPV6DISABLED) \ - EM(SKB_DROP_REASON_NEIGH_CREATEFAIL, NEIGH_CREATEFAIL) \ - EM(SKB_DROP_REASON_NEIGH_FAILED, NEIGH_FAILED) \ - EM(SKB_DROP_REASON_NEIGH_QUEUEFULL, NEIGH_QUEUEFULL) \ - EM(SKB_DROP_REASON_NEIGH_DEAD, NEIGH_DEAD) \ - EM(SKB_DROP_REASON_TC_EGRESS, TC_EGRESS) \ - EM(SKB_DROP_REASON_QDISC_DROP, QDISC_DROP) \ - EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \ - EM(SKB_DROP_REASON_XDP, XDP) \ - EM(SKB_DROP_REASON_TC_INGRESS, TC_INGRESS) \ - EM(SKB_DROP_REASON_UNHANDLED_PROTO, UNHANDLED_PROTO) \ - EM(SKB_DROP_REASON_SKB_CSUM, SKB_CSUM) \ - EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG) \ - EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT) \ - EM(SKB_DROP_REASON_DEV_HDR, DEV_HDR) \ - EM(SKB_DROP_REASON_DEV_READY, DEV_READY) \ - EM(SKB_DROP_REASON_FULL_RING, FULL_RING) \ - EM(SKB_DROP_REASON_NOMEM, NOMEM) \ - EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC) \ - EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER) \ - EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \ - EM(SKB_DROP_REASON_ICMP_CSUM, ICMP_CSUM) \ - EM(SKB_DROP_REASON_INVALID_PROTO, INVALID_PROTO) \ - EM(SKB_DROP_REASON_IP_INADDRERRORS, IP_INADDRERRORS) \ - EM(SKB_DROP_REASON_IP_INNOROUTES, IP_INNOROUTES) \ - EM(SKB_DROP_REASON_PKT_TOO_BIG, PKT_TOO_BIG) \ - EMe(SKB_DROP_REASON_MAX, MAX) - -#undef EM -#undef EMe - -#define EM(a, b) TRACE_DEFINE_ENUM(a); -#define EMe(a, b) TRACE_DEFINE_ENUM(a); - -TRACE_SKB_DROP_REASON - -#undef EM -#undef EMe -#define EM(a, b) { a, #b }, -#define EMe(a, b) { a, #b } - /* * Tracepoint for free an sk_buff: */ @@ -121,8 +35,7 @@ TRACE_EVENT(kfree_skb, TP_printk("skbaddr=%p protocol=%u location=%p reason: %s", __entry->skbaddr, __entry->protocol, __entry->location, - __print_symbolic(__entry->reason, - TRACE_SKB_DROP_REASON)) + drop_reasons[__entry->reason]) ); TRACE_EVENT(consume_skb, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f4009dbdf62d..e81362891596 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3597,10 +3597,11 @@ union bpf_attr { * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). + * **sizeof**\ (**struct ipv6hdr**). * * *th* points to the start of the TCP header, while *th_len* - * contains **sizeof**\ (**struct tcphdr**). + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). * Return * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. @@ -3783,10 +3784,11 @@ union bpf_attr { * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). + * **sizeof**\ (**struct ipv6hdr**). * * *th* points to the start of the TCP header, while *th_len* - * contains the length of the TCP header. + * contains the length of the TCP header with options (at least + * **sizeof**\ (**struct tcphdr**)). * Return * On success, lower 32 bits hold the generated SYN cookie in * followed by 16 bits which hold the MSS value for that cookie, @@ -5249,6 +5251,80 @@ union bpf_attr { * Pointer to the underlying dynptr data, NULL if the dynptr is * read-only, if the dynptr is invalid, or if the offset and length * is out of bounds. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv4/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv6/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5455,6 +5531,10 @@ union bpf_attr { FN(dynptr_read), \ FN(dynptr_write), \ FN(dynptr_data), \ + FN(tcp_raw_gen_syncookie_ipv4), \ + FN(tcp_raw_gen_syncookie_ipv6), \ + FN(tcp_raw_check_syncookie_ipv4), \ + FN(tcp_raw_check_syncookie_ipv6), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index a9162a6c0284..ec1798b6d3ff 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -36,10 +36,10 @@ struct btf_type { * bits 24-28: kind (e.g. int, ptr, array...etc) * bits 29-30: unused * bit 31: kind_flag, currently used by - * struct, union and fwd + * struct, union, enum, fwd and enum64 */ __u32 info; - /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC. + /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64. * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, @@ -63,7 +63,7 @@ enum { BTF_KIND_ARRAY = 3, /* Array */ BTF_KIND_STRUCT = 4, /* Struct */ BTF_KIND_UNION = 5, /* Union */ - BTF_KIND_ENUM = 6, /* Enumeration */ + BTF_KIND_ENUM = 6, /* Enumeration up to 32-bit values */ BTF_KIND_FWD = 7, /* Forward */ BTF_KIND_TYPEDEF = 8, /* Typedef */ BTF_KIND_VOLATILE = 9, /* Volatile */ @@ -76,6 +76,7 @@ enum { BTF_KIND_FLOAT = 16, /* Floating point */ BTF_KIND_DECL_TAG = 17, /* Decl Tag */ BTF_KIND_TYPE_TAG = 18, /* Type Tag */ + BTF_KIND_ENUM64 = 19, /* Enumeration up to 64-bit values */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -186,4 +187,14 @@ struct btf_decl_tag { __s32 component_idx; }; +/* BTF_KIND_ENUM64 is followed by multiple "struct btf_enum64". + * The exact number of btf_enum64 is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_enum64 { + __u32 name_off; + __u32 val_lo32; + __u32 val_hi32; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 1d0bccc3fa54..d370165bc621 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -116,6 +116,7 @@ #define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_DSA_A5PSW 0xE001 /* A5PSW Tag Value [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */ #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5f58dcfe2787..e36d9d2c65a7 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -963,6 +963,7 @@ enum { IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + IFLA_BOND_SLAVE_PRIO, __IFLA_BOND_SLAVE_MAX, }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d9490e3062a7..98f905f16411 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5874,7 +5874,7 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_INACTIVITY_TIMER: This driver takes care of freeing up * the connected inactive stations in AP mode. * @NL80211_FEATURE_CELL_BASE_REG_HINTS: This driver has been tested - * to work properly to suppport receiving regulatory hints from + * to work properly to support receiving regulatory hints from * cellular base stations. * @NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL: (no longer available, only * here to reserve the value for API/ABI compatibility) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index eb12d4f705cc..d003d4d8242a 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -309,6 +309,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", [BTF_KIND_TYPE_TAG] = "TYPE_TAG", + [BTF_KIND_ENUM64] = "ENUM64", }; const char *btf_type_str(const struct btf_type *t) @@ -666,6 +667,7 @@ static bool btf_type_has_size(const struct btf_type *t) case BTF_KIND_ENUM: case BTF_KIND_DATASEC: case BTF_KIND_FLOAT: + case BTF_KIND_ENUM64: return true; } @@ -711,6 +713,11 @@ static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t) return (const struct btf_decl_tag *)(t + 1); } +static const struct btf_enum64 *btf_type_enum64(const struct btf_type *t) +{ + return (const struct btf_enum64 *)(t + 1); +} + static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) { return kind_ops[BTF_INFO_KIND(t->info)]; @@ -1019,6 +1026,7 @@ static const char *btf_show_name(struct btf_show *show) parens = "{"; break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: prefix = "enum"; break; default: @@ -1834,6 +1842,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type, case BTF_KIND_UNION: case BTF_KIND_ENUM: case BTF_KIND_FLOAT: + case BTF_KIND_ENUM64: size = type->size; goto resolved; @@ -3670,6 +3679,7 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, { const struct btf_enum *enums = btf_type_enum(t); struct btf *btf = env->btf; + const char *fmt_str; u16 i, nr_enums; u32 meta_needed; @@ -3683,11 +3693,6 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, return -EINVAL; } - if (btf_type_kflag(t)) { - btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); - return -EINVAL; - } - if (t->size > 8 || !is_power_of_2(t->size)) { btf_verifier_log_type(env, t, "Unexpected size"); return -EINVAL; @@ -3718,7 +3723,8 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, if (env->log.level == BPF_LOG_KERNEL) continue; - btf_verifier_log(env, "\t%s val=%d\n", + fmt_str = btf_type_kflag(t) ? "\t%s val=%d\n" : "\t%s val=%u\n"; + btf_verifier_log(env, fmt_str, __btf_name_by_offset(btf, enums[i].name_off), enums[i].val); } @@ -3759,7 +3765,10 @@ static void btf_enum_show(const struct btf *btf, const struct btf_type *t, return; } - btf_show_type_value(show, "%d", v); + if (btf_type_kflag(t)) + btf_show_type_value(show, "%d", v); + else + btf_show_type_value(show, "%u", v); btf_show_end_type(show); } @@ -3772,6 +3781,109 @@ static struct btf_kind_operations enum_ops = { .show = btf_enum_show, }; +static s32 btf_enum64_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + const struct btf_enum64 *enums = btf_type_enum64(t); + struct btf *btf = env->btf; + const char *fmt_str; + u16 i, nr_enums; + u32 meta_needed; + + nr_enums = btf_type_vlen(t); + meta_needed = nr_enums * sizeof(*enums); + + if (meta_left < meta_needed) { + btf_verifier_log_basic(env, t, + "meta_left:%u meta_needed:%u", + meta_left, meta_needed); + return -EINVAL; + } + + if (t->size > 8 || !is_power_of_2(t->size)) { + btf_verifier_log_type(env, t, "Unexpected size"); + return -EINVAL; + } + + /* enum type either no name or a valid one */ + if (t->name_off && + !btf_name_valid_identifier(env->btf, t->name_off)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + for (i = 0; i < nr_enums; i++) { + if (!btf_name_offset_valid(btf, enums[i].name_off)) { + btf_verifier_log(env, "\tInvalid name_offset:%u", + enums[i].name_off); + return -EINVAL; + } + + /* enum member must have a valid name */ + if (!enums[i].name_off || + !btf_name_valid_identifier(btf, enums[i].name_off)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + + if (env->log.level == BPF_LOG_KERNEL) + continue; + + fmt_str = btf_type_kflag(t) ? "\t%s val=%lld\n" : "\t%s val=%llu\n"; + btf_verifier_log(env, fmt_str, + __btf_name_by_offset(btf, enums[i].name_off), + btf_enum64_value(enums + i)); + } + + return meta_needed; +} + +static void btf_enum64_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct btf_show *show) +{ + const struct btf_enum64 *enums = btf_type_enum64(t); + u32 i, nr_enums = btf_type_vlen(t); + void *safe_data; + s64 v; + + safe_data = btf_show_start_type(show, t, type_id, data); + if (!safe_data) + return; + + v = *(u64 *)safe_data; + + for (i = 0; i < nr_enums; i++) { + if (v != btf_enum64_value(enums + i)) + continue; + + btf_show_type_value(show, "%s", + __btf_name_by_offset(btf, + enums[i].name_off)); + + btf_show_end_type(show); + return; + } + + if (btf_type_kflag(t)) + btf_show_type_value(show, "%lld", v); + else + btf_show_type_value(show, "%llu", v); + btf_show_end_type(show); +} + +static struct btf_kind_operations enum64_ops = { + .check_meta = btf_enum64_check_meta, + .resolve = btf_df_resolve, + .check_member = btf_enum_check_member, + .check_kflag_member = btf_enum_check_kflag_member, + .log_details = btf_enum_log, + .show = btf_enum64_show, +}; + static s32 btf_func_proto_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) @@ -4438,6 +4550,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { [BTF_KIND_FLOAT] = &float_ops, [BTF_KIND_DECL_TAG] = &decl_tag_ops, [BTF_KIND_TYPE_TAG] = &modifier_ops, + [BTF_KIND_ENUM64] = &enum64_ops, }; static s32 btf_check_meta(struct btf_verifier_env *env, @@ -5304,7 +5417,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, /* skip modifiers */ while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (btf_type_is_small_int(t) || btf_type_is_enum(t)) + if (btf_type_is_small_int(t) || btf_is_any_enum(t)) /* accessing a scalar */ return true; if (!btf_type_is_ptr(t)) { @@ -5768,7 +5881,7 @@ static int __get_type_size(struct btf *btf, u32 btf_id, if (btf_type_is_ptr(t)) /* kernel size of pointer. Not BPF's size of pointer*/ return sizeof(void *); - if (btf_type_is_int(t) || btf_type_is_enum(t)) + if (btf_type_is_int(t) || btf_is_any_enum(t)) return t->size; *bad_type = t; return -EINVAL; @@ -5916,7 +6029,7 @@ static int btf_check_func_type_match(struct bpf_verifier_log *log, * to context only. And only global functions can be replaced. * Hence type check only those types. */ - if (btf_type_is_int(t1) || btf_type_is_enum(t1)) + if (btf_type_is_int(t1) || btf_is_any_enum(t1)) continue; if (!btf_type_is_ptr(t1)) { bpf_log(log, @@ -6414,7 +6527,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, t = btf_type_by_id(btf, t->type); while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (!btf_type_is_int(t) && !btf_type_is_enum(t)) { + if (!btf_type_is_int(t) && !btf_is_any_enum(t)) { bpf_log(log, "Global function %s() doesn't return scalar. Only those are supported.\n", tname); @@ -6429,7 +6542,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, t = btf_type_by_id(btf, args[i].type); while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (btf_type_is_int(t) || btf_type_is_enum(t)) { + if (btf_type_is_int(t) || btf_is_any_enum(t)) { reg->type = SCALAR_VALUE; continue; } @@ -7341,6 +7454,7 @@ recur: case BTF_KIND_UNION: case BTF_KIND_ENUM: case BTF_KIND_FWD: + case BTF_KIND_ENUM64: return 1; case BTF_KIND_INT: /* just reject deprecated bitfield-like integers; all other @@ -7393,10 +7507,10 @@ recur: * field-based relocations. This function assumes that root types were already * checked for name match. Beyond that initial root-level name check, names * are completely ignored. Compatibility rules are as follows: - * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but + * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs/ENUM64s are considered compatible, but * kind should match for local and target types (i.e., STRUCT is not * compatible with UNION); - * - for ENUMs, the size is ignored; + * - for ENUMs/ENUM64s, the size is ignored; * - for INT, size and signedness are ignored; * - for ARRAY, dimensionality is ignored, element types are checked for * compatibility recursively; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index afb414b26d01..7a394f7c205c 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -721,6 +721,60 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs, } /** + * purge_effective_progs() - After compute_effective_progs fails to alloc new + * cgrp->bpf.inactive table we can recover by + * recomputing the array in place. + * + * @cgrp: The cgroup which descendants to travers + * @prog: A program to detach or NULL + * @link: A link to detach or NULL + * @atype: Type of detach operation + */ +static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_cgroup_link *link, + enum cgroup_bpf_attach_type atype) +{ + struct cgroup_subsys_state *css; + struct bpf_prog_array *progs; + struct bpf_prog_list *pl; + struct list_head *head; + struct cgroup *cg; + int pos; + + /* recompute effective prog array in place */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + if (percpu_ref_is_zero(&desc->bpf.refcnt)) + continue; + + /* find position of link or prog in effective progs array */ + for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { + if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) + continue; + + head = &cg->bpf.progs[atype]; + list_for_each_entry(pl, head, node) { + if (!prog_list_prog(pl)) + continue; + if (pl->prog == prog && pl->link == link) + goto found; + pos++; + } + } +found: + BUG_ON(!cg); + progs = rcu_dereference_protected( + desc->bpf.effective[atype], + lockdep_is_held(&cgroup_mutex)); + + /* Remove the program from the array */ + WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos), + "Failed to purge a prog from array at index %d", pos); + } +} + +/** * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and * propagate the change to descendants * @cgrp: The cgroup which descendants to traverse @@ -739,7 +793,6 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, struct bpf_prog_list *pl; struct list_head *progs; u32 flags; - int err; atype = to_cgroup_bpf_attach_type(type); if (atype < 0) @@ -761,9 +814,12 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, pl->prog = NULL; pl->link = NULL; - err = update_effective_progs(cgrp, atype); - if (err) - goto cleanup; + if (update_effective_progs(cgrp, atype)) { + /* if update effective array failed replace the prog with a dummy prog*/ + pl->prog = old_prog; + pl->link = link; + purge_effective_progs(cgrp, old_prog, link, atype); + } /* now can actually delete it from this cgroup list */ list_del(&pl->node); @@ -775,12 +831,6 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, bpf_prog_put(old_prog); static_branch_dec(&cgroup_bpf_enabled_key[atype]); return 0; - -cleanup: - /* restore back prog or link */ - pl->prog = old_prog; - pl->link = link; - return err; } static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 5f6f3f829b36..b5ffebcce6cc 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -176,7 +176,7 @@ void bpf_prog_jit_attempt_done(struct bpf_prog *prog) * here is relative to the prog itself instead of the main prog. * This array has one entry for each xlated bpf insn. * - * jited_off is the byte off to the last byte of the jited insn. + * jited_off is the byte off to the end of the jited insn. * * Hence, with * insn_start: @@ -2279,6 +2279,21 @@ void bpf_prog_array_free(struct bpf_prog_array *progs) kfree_rcu(progs, rcu); } +static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu) +{ + struct bpf_prog_array *progs; + + progs = container_of(rcu, struct bpf_prog_array, rcu); + kfree_rcu(progs, rcu); +} + +void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs) +{ + if (!progs || progs == &bpf_empty_prog_array.hdr) + return; + call_rcu_tasks_trace(&progs->rcu, __bpf_prog_array_free_sleepable_cb); +} + int bpf_prog_array_length(struct bpf_prog_array *array) { struct bpf_prog_array_item *item; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 225806a02efb..a1c84d256f83 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -584,7 +584,7 @@ BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2) return strncmp(s1, s2, s1_sz); } -const struct bpf_func_proto bpf_strncmp_proto = { +static const struct bpf_func_proto bpf_strncmp_proto = { .func = bpf_strncmp, .gpl_only = false, .ret_type = RET_INTEGER, @@ -1402,7 +1402,7 @@ BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) */ #define BPF_PTR_POISON ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) -const struct bpf_func_proto bpf_kptr_xchg_proto = { +static const struct bpf_func_proto bpf_kptr_xchg_proto = { .func = bpf_kptr_xchg, .gpl_only = false, .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, @@ -1487,7 +1487,7 @@ error: return err; } -const struct bpf_func_proto bpf_dynptr_from_mem_proto = { +static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { .func = bpf_dynptr_from_mem, .gpl_only = false, .ret_type = RET_INTEGER, @@ -1513,7 +1513,7 @@ BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src return 0; } -const struct bpf_func_proto bpf_dynptr_read_proto = { +static const struct bpf_func_proto bpf_dynptr_read_proto = { .func = bpf_dynptr_read, .gpl_only = false, .ret_type = RET_INTEGER, @@ -1539,7 +1539,7 @@ BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, return 0; } -const struct bpf_func_proto bpf_dynptr_write_proto = { +static const struct bpf_func_proto bpf_dynptr_write_proto = { .func = bpf_dynptr_write, .gpl_only = false, .ret_type = RET_INTEGER, @@ -1566,7 +1566,7 @@ BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len return (unsigned long)(ptr->data + ptr->offset + offset); } -const struct bpf_func_proto bpf_dynptr_data_proto = { +static const struct bpf_func_proto bpf_dynptr_data_proto = { .func = bpf_dynptr_data, .gpl_only = false, .ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL, diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 3d897de89061..00b874c8e889 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -31,7 +31,7 @@ static inline void pcpu_freelist_push_node(struct pcpu_freelist_head *head, struct pcpu_freelist_node *node) { node->next = head->first; - head->first = node; + WRITE_ONCE(head->first, node); } static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head, @@ -130,14 +130,17 @@ static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s) orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); + if (!READ_ONCE(head->first)) + goto next_cpu; raw_spin_lock(&head->lock); node = head->first; if (node) { - head->first = node->next; + WRITE_ONCE(head->first, node->next); raw_spin_unlock(&head->lock); return node; } raw_spin_unlock(&head->lock); +next_cpu: cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; @@ -146,10 +149,12 @@ static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s) } /* per cpu lists are all empty, try extralist */ + if (!READ_ONCE(s->extralist.first)) + return NULL; raw_spin_lock(&s->extralist.lock); node = s->extralist.first; if (node) - s->extralist.first = node->next; + WRITE_ONCE(s->extralist.first, node->next); raw_spin_unlock(&s->extralist.lock); return node; } @@ -164,15 +169,18 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s) orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); + if (!READ_ONCE(head->first)) + goto next_cpu; if (raw_spin_trylock(&head->lock)) { node = head->first; if (node) { - head->first = node->next; + WRITE_ONCE(head->first, node->next); raw_spin_unlock(&head->lock); return node; } raw_spin_unlock(&head->lock); } +next_cpu: cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; @@ -181,11 +189,11 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s) } /* cannot pop from per cpu lists, try extralist */ - if (!raw_spin_trylock(&s->extralist.lock)) + if (!READ_ONCE(s->extralist.first) || !raw_spin_trylock(&s->extralist.lock)) return NULL; node = s->extralist.first; if (node) - s->extralist.first = node->next; + WRITE_ONCE(s->extralist.first, node->next); raw_spin_unlock(&s->extralist.lock); return node; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2b69306d3c6e..7d5af5b99f0d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4090,14 +4090,15 @@ static int bpf_prog_get_info_by_fd(struct file *file, info.nr_jited_line_info = 0; if (info.nr_jited_line_info && ulen) { if (bpf_dump_raw_ok(file->f_cred)) { + unsigned long line_addr; __u64 __user *user_linfo; u32 i; user_linfo = u64_to_user_ptr(info.jited_line_info); ulen = min_t(u32, info.nr_jited_line_info, ulen); for (i = 0; i < ulen; i++) { - if (put_user((__u64)(long)prog->aux->jited_linfo[i], - &user_linfo[i])) + line_addr = (unsigned long)prog->aux->jited_linfo[i]; + if (put_user((__u64)line_addr, &user_linfo[i])) return -EFAULT; } } else { @@ -5130,7 +5131,7 @@ BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flag return *res ? 0 : -ENOENT; } -const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { +static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { .func = bpf_kallsyms_lookup_name, .gpl_only = false, .ret_type = RET_INTEGER, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index aedac2ac02b9..2859901ffbe3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5848,6 +5848,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; enum bpf_arg_type arg_type = fn->arg_type[arg]; enum bpf_reg_type type = reg->type; + u32 *arg_btf_id = NULL; int err = 0; if (arg_type == ARG_DONTCARE) @@ -5884,7 +5885,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, */ goto skip_type_check; - err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta); + /* arg_btf_id and arg_size are in a union. */ + if (base_type(arg_type) == ARG_PTR_TO_BTF_ID) + arg_btf_id = fn->arg_btf_id[arg]; + + err = check_reg_type(env, regno, arg_type, arg_btf_id, meta); if (err) return err; @@ -6011,6 +6016,11 @@ skip_type_check: * next is_mem_size argument below. */ meta->raw_mode = arg_type & MEM_UNINIT; + if (arg_type & MEM_FIXED_SIZE) { + err = check_helper_mem_access(env, regno, + fn->arg_size[arg], false, + meta); + } } else if (arg_type_is_mem_size(arg_type)) { bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); @@ -6400,11 +6410,19 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn) return count <= 1; } -static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, - enum bpf_arg_type arg_next) +static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg) { - return (base_type(arg_curr) == ARG_PTR_TO_MEM) != - arg_type_is_mem_size(arg_next); + bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE; + bool has_size = fn->arg_size[arg] != 0; + bool is_next_size = false; + + if (arg + 1 < ARRAY_SIZE(fn->arg_type)) + is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]); + + if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM) + return is_next_size; + + return has_size == is_next_size || is_next_size == is_fixed; } static bool check_arg_pair_ok(const struct bpf_func_proto *fn) @@ -6415,11 +6433,11 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn) * helper function specification. */ if (arg_type_is_mem_size(fn->arg1_type) || - base_type(fn->arg5_type) == ARG_PTR_TO_MEM || - check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || - check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || - check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || - check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) + check_args_pair_invalid(fn, 0) || + check_args_pair_invalid(fn, 1) || + check_args_pair_invalid(fn, 2) || + check_args_pair_invalid(fn, 3) || + check_args_pair_invalid(fn, 4)) return false; return true; @@ -6460,7 +6478,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn) if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) return false; - if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) + if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] && + /* arg_btf_id and arg_size are in a union. */ + (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM || + !(fn->arg_type[i] & MEM_FIXED_SIZE))) return false; } @@ -10901,7 +10922,7 @@ static int check_btf_func(struct bpf_verifier_env *env, goto err_free; ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL); scalar_return = - btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type); + btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type); if (i && !scalar_return && env->subprog_info[i].has_ld_abs) { verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n"); goto err_free; @@ -14829,8 +14850,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) } if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING && - prog->type != BPF_PROG_TYPE_LSM) { - verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n"); + prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_KPROBE) { + verbose(env, "Only fentry/fexit/fmod_ret, lsm, and kprobe/uprobe programs can be sleepable\n"); return -EINVAL; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 80782cddb1da..48bae58d240e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10068,26 +10068,30 @@ static inline bool perf_event_is_tracing(struct perf_event *event) int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie) { - bool is_kprobe, is_tracepoint, is_syscall_tp; + bool is_kprobe, is_uprobe, is_tracepoint, is_syscall_tp; if (!perf_event_is_tracing(event)) return perf_event_set_bpf_handler(event, prog, bpf_cookie); - is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE; + is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_KPROBE; + is_uprobe = event->tp_event->flags & TRACE_EVENT_FL_UPROBE; is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT; is_syscall_tp = is_syscall_trace_event(event->tp_event); - if (!is_kprobe && !is_tracepoint && !is_syscall_tp) + if (!is_kprobe && !is_uprobe && !is_tracepoint && !is_syscall_tp) /* bpf programs can only be attached to u/kprobe or tracepoint */ return -EINVAL; - if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) || + if (((is_kprobe || is_uprobe) && prog->type != BPF_PROG_TYPE_KPROBE) || (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) || (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) return -EINVAL; + if (prog->type == BPF_PROG_TYPE_KPROBE && prog->aux->sleepable && !is_uprobe) + /* only uprobe programs are allowed to be sleepable */ + return -EINVAL; + /* Kprobe override only works for kprobes, not uprobes. */ - if (prog->kprobe_override && - !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) + if (prog->kprobe_override && !is_kprobe) return -EINVAL; if (is_tracepoint || is_syscall_tp) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 88589d74a892..68e5cdd24cef 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1936,7 +1936,7 @@ int perf_event_attach_bpf_prog(struct perf_event *event, event->prog = prog; event->bpf_cookie = bpf_cookie; rcu_assign_pointer(event->tp_event->prog_array, new_array); - bpf_prog_array_free(old_array); + bpf_prog_array_free_sleepable(old_array); unlock: mutex_unlock(&bpf_event_mutex); @@ -1962,7 +1962,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event) bpf_prog_array_delete_safe(old_array, event->prog); } else { rcu_assign_pointer(event->tp_event->prog_array, new_array); - bpf_prog_array_free(old_array); + bpf_prog_array_free_sleepable(old_array); } bpf_prog_put(event->prog); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index c3dc4f859a6b..1f5351cae25f 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -16,6 +16,7 @@ #include <linux/namei.h> #include <linux/string.h> #include <linux/rculist.h> +#include <linux/filter.h> #include "trace_dynevent.h" #include "trace_probe.h" @@ -1345,9 +1346,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, if (bpf_prog_array_valid(call)) { u32 ret; - preempt_disable(); - ret = trace_call_bpf(call, regs); - preempt_enable(); + ret = bpf_prog_run_array_sleepable(call->prog_array, regs, bpf_prog_run); if (!ret) return; } diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c index d6bbbd4ab38b..7b374595328d 100644 --- a/net/6lowpan/nhc.c +++ b/net/6lowpan/nhc.c @@ -12,77 +12,26 @@ #include "nhc.h" -static struct rb_root rb_root = RB_ROOT; -static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX + 1]; +static const struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX + 1]; static DEFINE_SPINLOCK(lowpan_nhc_lock); -static int lowpan_nhc_insert(struct lowpan_nhc *nhc) +static const struct lowpan_nhc *lowpan_nhc_by_nhcid(struct sk_buff *skb) { - struct rb_node **new = &rb_root.rb_node, *parent = NULL; - - /* Figure out where to put new node */ - while (*new) { - struct lowpan_nhc *this = rb_entry(*new, struct lowpan_nhc, - node); - int result, len_dif, len; - - len_dif = nhc->idlen - this->idlen; - - if (nhc->idlen < this->idlen) - len = nhc->idlen; - else - len = this->idlen; - - result = memcmp(nhc->id, this->id, len); - if (!result) - result = len_dif; - - parent = *new; - if (result < 0) - new = &((*new)->rb_left); - else if (result > 0) - new = &((*new)->rb_right); - else - return -EEXIST; - } + const struct lowpan_nhc *nhc; + int i; + u8 id; - /* Add new node and rebalance tree. */ - rb_link_node(&nhc->node, parent, new); - rb_insert_color(&nhc->node, &rb_root); + if (!pskb_may_pull(skb, 1)) + return NULL; - return 0; -} + id = *skb->data; -static void lowpan_nhc_remove(struct lowpan_nhc *nhc) -{ - rb_erase(&nhc->node, &rb_root); -} + for (i = 0; i < NEXTHDR_MAX + 1; i++) { + nhc = lowpan_nexthdr_nhcs[i]; + if (!nhc) + continue; -static struct lowpan_nhc *lowpan_nhc_by_nhcid(const struct sk_buff *skb) -{ - struct rb_node *node = rb_root.rb_node; - const u8 *nhcid_skb_ptr = skb->data; - - while (node) { - struct lowpan_nhc *nhc = rb_entry(node, struct lowpan_nhc, - node); - u8 nhcid_skb_ptr_masked[LOWPAN_NHC_MAX_ID_LEN]; - int result, i; - - if (nhcid_skb_ptr + nhc->idlen > skb->data + skb->len) - return NULL; - - /* copy and mask afterwards the nhid value from skb */ - memcpy(nhcid_skb_ptr_masked, nhcid_skb_ptr, nhc->idlen); - for (i = 0; i < nhc->idlen; i++) - nhcid_skb_ptr_masked[i] &= nhc->idmask[i]; - - result = memcmp(nhcid_skb_ptr_masked, nhc->id, nhc->idlen); - if (result < 0) - node = node->rb_left; - else if (result > 0) - node = node->rb_right; - else + if ((id & nhc->idmask) == nhc->id) return nhc; } @@ -92,7 +41,7 @@ static struct lowpan_nhc *lowpan_nhc_by_nhcid(const struct sk_buff *skb) int lowpan_nhc_check_compression(struct sk_buff *skb, const struct ipv6hdr *hdr, u8 **hc_ptr) { - struct lowpan_nhc *nhc; + const struct lowpan_nhc *nhc; int ret = 0; spin_lock_bh(&lowpan_nhc_lock); @@ -110,7 +59,7 @@ int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr, u8 **hc_ptr) { int ret; - struct lowpan_nhc *nhc; + const struct lowpan_nhc *nhc; spin_lock_bh(&lowpan_nhc_lock); @@ -153,7 +102,7 @@ int lowpan_nhc_do_uncompression(struct sk_buff *skb, const struct net_device *dev, struct ipv6hdr *hdr) { - struct lowpan_nhc *nhc; + const struct lowpan_nhc *nhc; int ret; spin_lock_bh(&lowpan_nhc_lock); @@ -189,18 +138,9 @@ int lowpan_nhc_do_uncompression(struct sk_buff *skb, return 0; } -int lowpan_nhc_add(struct lowpan_nhc *nhc) +int lowpan_nhc_add(const struct lowpan_nhc *nhc) { - int ret; - - if (!nhc->idlen || !nhc->idsetup) - return -EINVAL; - - WARN_ONCE(nhc->idlen > LOWPAN_NHC_MAX_ID_LEN, - "LOWPAN_NHC_MAX_ID_LEN should be updated to %zd.\n", - nhc->idlen); - - nhc->idsetup(nhc); + int ret = 0; spin_lock_bh(&lowpan_nhc_lock); @@ -209,10 +149,6 @@ int lowpan_nhc_add(struct lowpan_nhc *nhc) goto out; } - ret = lowpan_nhc_insert(nhc); - if (ret < 0) - goto out; - lowpan_nexthdr_nhcs[nhc->nexthdr] = nhc; out: spin_unlock_bh(&lowpan_nhc_lock); @@ -220,11 +156,10 @@ out: } EXPORT_SYMBOL(lowpan_nhc_add); -void lowpan_nhc_del(struct lowpan_nhc *nhc) +void lowpan_nhc_del(const struct lowpan_nhc *nhc) { spin_lock_bh(&lowpan_nhc_lock); - lowpan_nhc_remove(nhc); lowpan_nexthdr_nhcs[nhc->nexthdr] = NULL; spin_unlock_bh(&lowpan_nhc_lock); diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h index 67951c40734b..ab7b4977c32b 100644 --- a/net/6lowpan/nhc.h +++ b/net/6lowpan/nhc.h @@ -16,24 +16,20 @@ * @_name: const char * of common header compression name. * @_nexthdr: ipv6 nexthdr field for the header compression. * @_nexthdrlen: ipv6 nexthdr len for the reserved space. - * @_idsetup: callback to setup id and mask values. - * @_idlen: len for the next header id and mask, should be always the same. + * @_id: one byte nhc id value. + * @_idmask: one byte nhc id mask value. * @_uncompress: callback for uncompression call. * @_compress: callback for compression call. */ #define LOWPAN_NHC(__nhc, _name, _nexthdr, \ - _hdrlen, _idsetup, _idlen, \ + _hdrlen, _id, _idmask, \ _uncompress, _compress) \ -static u8 __nhc##_val[_idlen]; \ -static u8 __nhc##_mask[_idlen]; \ -static struct lowpan_nhc __nhc = { \ +static const struct lowpan_nhc __nhc = { \ .name = _name, \ .nexthdr = _nexthdr, \ .nexthdrlen = _hdrlen, \ - .id = __nhc##_val, \ - .idmask = __nhc##_mask, \ - .idlen = _idlen, \ - .idsetup = _idsetup, \ + .id = _id, \ + .idmask = _idmask, \ .uncompress = _uncompress, \ .compress = _compress, \ } @@ -53,27 +49,21 @@ module_exit(__nhc##_exit); /** * struct lowpan_nhc - hold 6lowpan next hdr compression ifnformation * - * @node: holder for the rbtree. * @name: name of the specific next header compression * @nexthdr: next header value of the protocol which should be compressed. * @nexthdrlen: ipv6 nexthdr len for the reserved space. - * @id: array for nhc id. Note this need to be in network byteorder. - * @mask: array for nhc id mask. Note this need to be in network byteorder. - * @len: the length of the next header id and mask. - * @setup: callback to setup fill the next header id value and mask. + * @id: one byte nhc id value. + * @idmask: one byte nhc id mask value. * @compress: callback to do the header compression. * @uncompress: callback to do the header uncompression. */ struct lowpan_nhc { - struct rb_node node; const char *name; - const u8 nexthdr; - const size_t nexthdrlen; - u8 *id; - u8 *idmask; - const size_t idlen; + u8 nexthdr; + size_t nexthdrlen; + u8 id; + u8 idmask; - void (*idsetup)(struct lowpan_nhc *nhc); int (*uncompress)(struct sk_buff *skb, size_t needed); int (*compress)(struct sk_buff *skb, u8 **hc_ptr); }; @@ -126,14 +116,14 @@ int lowpan_nhc_do_uncompression(struct sk_buff *skb, * * @nhc: nhc which should be add. */ -int lowpan_nhc_add(struct lowpan_nhc *nhc); +int lowpan_nhc_add(const struct lowpan_nhc *nhc); /** * lowpan_nhc_del - delete a next header compression from framework * * @nhc: nhc which should be delete. */ -void lowpan_nhc_del(struct lowpan_nhc *nhc); +void lowpan_nhc_del(const struct lowpan_nhc *nhc); /** * lowpan_nhc_init - adding all default nhcs diff --git a/net/6lowpan/nhc_dest.c b/net/6lowpan/nhc_dest.c index 4768a9459212..0cbcc7806469 100644 --- a/net/6lowpan/nhc_dest.c +++ b/net/6lowpan/nhc_dest.c @@ -6,18 +6,11 @@ #include "nhc.h" -#define LOWPAN_NHC_DEST_IDLEN 1 #define LOWPAN_NHC_DEST_ID_0 0xe6 #define LOWPAN_NHC_DEST_MASK_0 0xfe -static void dest_nhid_setup(struct lowpan_nhc *nhc) -{ - nhc->id[0] = LOWPAN_NHC_DEST_ID_0; - nhc->idmask[0] = LOWPAN_NHC_DEST_MASK_0; -} - LOWPAN_NHC(nhc_dest, "RFC6282 Destination Options", NEXTHDR_DEST, 0, - dest_nhid_setup, LOWPAN_NHC_DEST_IDLEN, NULL, NULL); + LOWPAN_NHC_DEST_ID_0, LOWPAN_NHC_DEST_MASK_0, NULL, NULL); module_lowpan_nhc(nhc_dest); MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Destination Options compression"); diff --git a/net/6lowpan/nhc_fragment.c b/net/6lowpan/nhc_fragment.c index be85f07715bd..9414552df0ac 100644 --- a/net/6lowpan/nhc_fragment.c +++ b/net/6lowpan/nhc_fragment.c @@ -5,18 +5,11 @@ #include "nhc.h" -#define LOWPAN_NHC_FRAGMENT_IDLEN 1 #define LOWPAN_NHC_FRAGMENT_ID_0 0xe4 #define LOWPAN_NHC_FRAGMENT_MASK_0 0xfe -static void fragment_nhid_setup(struct lowpan_nhc *nhc) -{ - nhc->id[0] = LOWPAN_NHC_FRAGMENT_ID_0; - nhc->idmask[0] = LOWPAN_NHC_FRAGMENT_MASK_0; -} - LOWPAN_NHC(nhc_fragment, "RFC6282 Fragment", NEXTHDR_FRAGMENT, 0, - fragment_nhid_setup, LOWPAN_NHC_FRAGMENT_IDLEN, NULL, NULL); + LOWPAN_NHC_FRAGMENT_ID_0, LOWPAN_NHC_FRAGMENT_MASK_0, NULL, NULL); module_lowpan_nhc(nhc_fragment); MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Fragment compression"); diff --git a/net/6lowpan/nhc_ghc_ext_dest.c b/net/6lowpan/nhc_ghc_ext_dest.c index a9137f1733be..e4745ddd10a8 100644 --- a/net/6lowpan/nhc_ghc_ext_dest.c +++ b/net/6lowpan/nhc_ghc_ext_dest.c @@ -5,18 +5,11 @@ #include "nhc.h" -#define LOWPAN_GHC_EXT_DEST_IDLEN 1 #define LOWPAN_GHC_EXT_DEST_ID_0 0xb6 #define LOWPAN_GHC_EXT_DEST_MASK_0 0xfe -static void dest_ghid_setup(struct lowpan_nhc *nhc) -{ - nhc->id[0] = LOWPAN_GHC_EXT_DEST_ID_0; - nhc->idmask[0] = LOWPAN_GHC_EXT_DEST_MASK_0; -} - LOWPAN_NHC(ghc_ext_dest, "RFC7400 Destination Extension Header", NEXTHDR_DEST, - 0, dest_ghid_setup, LOWPAN_GHC_EXT_DEST_IDLEN, NULL, NULL); + 0, LOWPAN_GHC_EXT_DEST_ID_0, LOWPAN_GHC_EXT_DEST_MASK_0, NULL, NULL); module_lowpan_nhc(ghc_ext_dest); MODULE_DESCRIPTION("6LoWPAN generic header destination extension compression"); diff --git a/net/6lowpan/nhc_ghc_ext_frag.c b/net/6lowpan/nhc_ghc_ext_frag.c index d49b745918e0..220e5abfa946 100644 --- a/net/6lowpan/nhc_ghc_ext_frag.c +++ b/net/6lowpan/nhc_ghc_ext_frag.c @@ -5,19 +5,12 @@ #include "nhc.h" -#define LOWPAN_GHC_EXT_FRAG_IDLEN 1 #define LOWPAN_GHC_EXT_FRAG_ID_0 0xb4 #define LOWPAN_GHC_EXT_FRAG_MASK_0 0xfe -static void frag_ghid_setup(struct lowpan_nhc *nhc) -{ - nhc->id[0] = LOWPAN_GHC_EXT_FRAG_ID_0; - nhc->idmask[0] = LOWPAN_GHC_EXT_FRAG_MASK_0; -} - LOWPAN_NHC(ghc_ext_frag, "RFC7400 Fragmentation Extension Header", - NEXTHDR_FRAGMENT, 0, frag_ghid_setup, - LOWPAN_GHC_EXT_FRAG_IDLEN, NULL, NULL); + NEXTHDR_FRAGMENT, 0, LOWPAN_GHC_EXT_FRAG_ID_0, + LOWPAN_GHC_EXT_FRAG_MASK_0, NULL, NULL); module_lowpan_nhc(ghc_ext_frag); MODULE_DESCRIPTION("6LoWPAN generic header fragmentation extension compression"); diff --git a/net/6lowpan/nhc_ghc_ext_hop.c b/net/6lowpan/nhc_ghc_ext_hop.c index 3beedf5140a3..9b0de4da7379 100644 --- a/net/6lowpan/nhc_ghc_ext_hop.c +++ b/net/6lowpan/nhc_ghc_ext_hop.c @@ -5,18 +5,11 @@ #include "nhc.h" -#define LOWPAN_GHC_EXT_HOP_IDLEN 1 #define LOWPAN_GHC_EXT_HOP_ID_0 0xb0 #define LOWPAN_GHC_EXT_HOP_MASK_0 0xfe -static void hop_ghid_setup(struct lowpan_nhc *nhc) -{ - nhc->id[0] = LOWPAN_GHC_EXT_HOP_ID_0; - nhc->idmask[0] = LOWPAN_GHC_EXT_HOP_MASK_0; -} - LOWPAN_NHC(ghc_ext_hop, "RFC7400 Hop-by-Hop Extension Header", NEXTHDR_HOP, 0, - hop_ghid_setup, LOWPAN_GHC_EXT_HOP_IDLEN, NULL, NULL); + LOWPAN_GHC_EXT_HOP_ID_0, LOWPAN_GHC_EXT_HOP_MASK_0, NULL, NULL); module_lowpan_nhc(ghc_ext_hop); MODULE_DESCRIPTION("6LoWPAN generic header hop-by-hop extension compression"); diff --git a/net/6lowpan/nhc_ghc_ext_route.c b/net/6lowpan/nhc_ghc_ext_route.c index 70dc0ea3cf66..3e86faec59c9 100644 --- a/net/6lowpan/nhc_ghc_ext_route.c +++ b/net/6lowpan/nhc_ghc_ext_route.c @@ -5,18 +5,11 @@ #include "nhc.h" -#define LOWPAN_GHC_EXT_ROUTE_IDLEN 1 #define LOWPAN_GHC_EXT_ROUTE_ID_0 0xb2 #define LOWPAN_GHC_EXT_ROUTE_MASK_0 0xfe -static void route_ghid_setup(struct lowpan_nhc *nhc) -{ - nhc->id[0] = LOWPAN_GHC_EXT_ROUTE_ID_0; - nhc->idmask[0] = LOWPAN_GHC_EXT_ROUTE_MASK_0; -} - LOWPAN_NHC(ghc_ext_route, "RFC7400 Routing Extension Header", NEXTHDR_ROUTING, - 0, route_ghid_setup, LOWPAN_GHC_EXT_ROUTE_IDLEN, NULL, NULL); + 0, LOWPAN_GHC_EXT_ROUTE_ID_0, LOWPAN_GHC_EXT_ROUTE_MASK_0, NULL, NULL); module_lowpan_nhc(ghc_ext_route); MODULE_DESCRIPTION("6LoWPAN generic header routing extension compression"); diff --git a/net/6lowpan/nhc_ghc_icmpv6.c b/net/6lowpan/nhc_ghc_icmpv6.c index 339ceffc25a9..1634f3eb0be8 100644 --- a/net/6lowpan/nhc_ghc_icmpv6.c +++ b/net/6lowpan/nhc_ghc_icmpv6.c @@ -5,18 +5,11 @@ #include "nhc.h" -#define LOWPAN_GHC_ICMPV6_IDLEN 1 #define LOWPAN_GHC_ICMPV6_ID_0 0xdf #define LOWPAN_GHC_ICMPV6_MASK_0 0xff -static void icmpv6_ghid_setup(struct lowpan_nhc *nhc) -{ - nhc->id[0] = LOWPAN_GHC_ICMPV6_ID_0; - nhc->idmask[0] = LOWPAN_GHC_ICMPV6_MASK_0; -} - LOWPAN_NHC(ghc_icmpv6, "RFC7400 ICMPv6", NEXTHDR_ICMP, 0, - icmpv6_ghid_setup, LOWPAN_GHC_ICMPV6_IDLEN, NULL, NULL); + LOWPAN_GHC_ICMPV6_ID_0, LOWPAN_GHC_ICMPV6_MASK_0, NULL, NULL); module_lowpan_nhc(ghc_icmpv6); MODULE_DESCRIPTION("6LoWPAN generic header ICMPv6 compression"); diff --git a/net/6lowpan/nhc_ghc_udp.c b/net/6lowpan/nhc_ghc_udp.c index f47fec601e73..4ac4813b77ad 100644 --- a/net/6lowpan/nhc_ghc_udp.c +++ b/net/6lowpan/nhc_ghc_udp.c @@ -5,18 +5,11 @@ #include "nhc.h" -#define LOWPAN_GHC_UDP_IDLEN 1 #define LOWPAN_GHC_UDP_ID_0 0xd0 #define LOWPAN_GHC_UDP_MASK_0 0xf8 -static void udp_ghid_setup(struct lowpan_nhc *nhc) -{ - nhc->id[0] = LOWPAN_GHC_UDP_ID_0; - nhc->idmask[0] = LOWPAN_GHC_UDP_MASK_0; -} - LOWPAN_NHC(ghc_udp, "RFC7400 UDP", NEXTHDR_UDP, 0, - udp_ghid_setup, LOWPAN_GHC_UDP_IDLEN, NULL, NULL); + LOWPAN_GHC_UDP_ID_0, LOWPAN_GHC_UDP_MASK_0, NULL, NULL); module_lowpan_nhc(ghc_udp); MODULE_DESCRIPTION("6LoWPAN generic header UDP compression"); diff --git a/net/6lowpan/nhc_hop.c b/net/6lowpan/nhc_hop.c index 158fc1906327..182087dfd09d 100644 --- a/net/6lowpan/nhc_hop.c +++ b/net/6lowpan/nhc_hop.c @@ -5,18 +5,11 @@ #include "nhc.h" -#define LOWPAN_NHC_HOP_IDLEN 1 #define LOWPAN_NHC_HOP_ID_0 0xe0 #define LOWPAN_NHC_HOP_MASK_0 0xfe -static void hop_nhid_setup(struct lowpan_nhc *nhc) -{ - nhc->id[0] = LOWPAN_NHC_HOP_ID_0; - nhc->idmask[0] = LOWPAN_NHC_HOP_MASK_0; -} - LOWPAN_NHC(nhc_hop, "RFC6282 Hop-by-Hop Options", NEXTHDR_HOP, 0, - hop_nhid_setup, LOWPAN_NHC_HOP_IDLEN, NULL, NULL); + LOWPAN_NHC_HOP_ID_0, LOWPAN_NHC_HOP_MASK_0, NULL, NULL); module_lowpan_nhc(nhc_hop); MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Hop-by-Hop Options compression"); diff --git a/net/6lowpan/nhc_ipv6.c b/net/6lowpan/nhc_ipv6.c index 08b7589e5b38..20242360b1d4 100644 --- a/net/6lowpan/nhc_ipv6.c +++ b/net/6lowpan/nhc_ipv6.c @@ -5,18 +5,11 @@ #include "nhc.h" -#define LOWPAN_NHC_IPV6_IDLEN 1 #define LOWPAN_NHC_IPV6_ID_0 0xee #define LOWPAN_NHC_IPV6_MASK_0 0xfe -static void ipv6_nhid_setup(struct lowpan_nhc *nhc) -{ - nhc->id[0] = LOWPAN_NHC_IPV6_ID_0; - nhc->idmask[0] = LOWPAN_NHC_IPV6_MASK_0; -} - -LOWPAN_NHC(nhc_ipv6, "RFC6282 IPv6", NEXTHDR_IPV6, 0, ipv6_nhid_setup, - LOWPAN_NHC_IPV6_IDLEN, NULL, NULL); +LOWPAN_NHC(nhc_ipv6, "RFC6282 IPv6", NEXTHDR_IPV6, 0, LOWPAN_NHC_IPV6_ID_0, + LOWPAN_NHC_IPV6_MASK_0, NULL, NULL); module_lowpan_nhc(nhc_ipv6); MODULE_DESCRIPTION("6LoWPAN next header RFC6282 IPv6 compression"); diff --git a/net/6lowpan/nhc_mobility.c b/net/6lowpan/nhc_mobility.c index ac8fca689828..1c31d872c804 100644 --- a/net/6lowpan/nhc_mobility.c +++ b/net/6lowpan/nhc_mobility.c @@ -5,18 +5,11 @@ #include "nhc.h" -#define LOWPAN_NHC_MOBILITY_IDLEN 1 #define LOWPAN_NHC_MOBILITY_ID_0 0xe8 #define LOWPAN_NHC_MOBILITY_MASK_0 0xfe -static void mobility_nhid_setup(struct lowpan_nhc *nhc) -{ - nhc->id[0] = LOWPAN_NHC_MOBILITY_ID_0; - nhc->idmask[0] = LOWPAN_NHC_MOBILITY_MASK_0; -} - LOWPAN_NHC(nhc_mobility, "RFC6282 Mobility", NEXTHDR_MOBILITY, 0, - mobility_nhid_setup, LOWPAN_NHC_MOBILITY_IDLEN, NULL, NULL); + LOWPAN_NHC_MOBILITY_ID_0, LOWPAN_NHC_MOBILITY_MASK_0, NULL, NULL); module_lowpan_nhc(nhc_mobility); MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Mobility compression"); diff --git a/net/6lowpan/nhc_routing.c b/net/6lowpan/nhc_routing.c index 1c174023de42..dae03ebf7021 100644 --- a/net/6lowpan/nhc_routing.c +++ b/net/6lowpan/nhc_routing.c @@ -5,18 +5,11 @@ #include "nhc.h" -#define LOWPAN_NHC_ROUTING_IDLEN 1 #define LOWPAN_NHC_ROUTING_ID_0 0xe2 #define LOWPAN_NHC_ROUTING_MASK_0 0xfe -static void routing_nhid_setup(struct lowpan_nhc *nhc) -{ - nhc->id[0] = LOWPAN_NHC_ROUTING_ID_0; - nhc->idmask[0] = LOWPAN_NHC_ROUTING_MASK_0; -} - LOWPAN_NHC(nhc_routing, "RFC6282 Routing", NEXTHDR_ROUTING, 0, - routing_nhid_setup, LOWPAN_NHC_ROUTING_IDLEN, NULL, NULL); + LOWPAN_NHC_ROUTING_ID_0, LOWPAN_NHC_ROUTING_MASK_0, NULL, NULL); module_lowpan_nhc(nhc_routing); MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Routing compression"); diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c index 33f17bd8cda7..0a506c77283d 100644 --- a/net/6lowpan/nhc_udp.c +++ b/net/6lowpan/nhc_udp.c @@ -14,7 +14,6 @@ #define LOWPAN_NHC_UDP_MASK 0xF8 #define LOWPAN_NHC_UDP_ID 0xF0 -#define LOWPAN_NHC_UDP_IDLEN 1 #define LOWPAN_NHC_UDP_4BIT_PORT 0xF0B0 #define LOWPAN_NHC_UDP_4BIT_MASK 0xFFF0 @@ -169,14 +168,8 @@ static int udp_compress(struct sk_buff *skb, u8 **hc_ptr) return 0; } -static void udp_nhid_setup(struct lowpan_nhc *nhc) -{ - nhc->id[0] = LOWPAN_NHC_UDP_ID; - nhc->idmask[0] = LOWPAN_NHC_UDP_MASK; -} - LOWPAN_NHC(nhc_udp, "RFC6282 UDP", NEXTHDR_UDP, sizeof(struct udphdr), - udp_nhid_setup, LOWPAN_NHC_UDP_IDLEN, udp_uncompress, udp_compress); + LOWPAN_NHC_UDP_ID, LOWPAN_NHC_UDP_MASK, udp_uncompress, udp_compress); module_lowpan_nhc(nhc_udp); MODULE_DESCRIPTION("6LoWPAN next header RFC6282 UDP compression"); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index acf8c791f320..5aa8144101dc 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -63,10 +63,10 @@ bool vlan_do_receive(struct sk_buff **skbp) rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); - rx_stats->rx_packets++; - rx_stats->rx_bytes += skb->len; + u64_stats_inc(&rx_stats->rx_packets); + u64_stats_add(&rx_stats->rx_bytes, skb->len); if (skb->pkt_type == PACKET_MULTICAST) - rx_stats->rx_multicast++; + u64_stats_inc(&rx_stats->rx_multicast); u64_stats_update_end(&rx_stats->syncp); return true; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 839f2020b015..035812b0461c 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -128,8 +128,8 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, stats = this_cpu_ptr(vlan->vlan_pcpu_stats); u64_stats_update_begin(&stats->syncp); - stats->tx_packets++; - stats->tx_bytes += len; + u64_stats_inc(&stats->tx_packets); + u64_stats_add(&stats->tx_bytes, len); u64_stats_update_end(&stats->syncp); } else { this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped); @@ -615,7 +615,7 @@ static int vlan_dev_init(struct net_device *dev) return -ENOMEM; /* Get vlan's reference to real_dev */ - dev_hold_track(real_dev, &vlan->dev_tracker, GFP_KERNEL); + netdev_hold(real_dev, &vlan->dev_tracker, GFP_KERNEL); return 0; } @@ -713,11 +713,11 @@ static void vlan_dev_get_stats64(struct net_device *dev, p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); do { start = u64_stats_fetch_begin_irq(&p->syncp); - rxpackets = p->rx_packets; - rxbytes = p->rx_bytes; - rxmulticast = p->rx_multicast; - txpackets = p->tx_packets; - txbytes = p->tx_bytes; + rxpackets = u64_stats_read(&p->rx_packets); + rxbytes = u64_stats_read(&p->rx_bytes); + rxmulticast = u64_stats_read(&p->rx_multicast); + txpackets = u64_stats_read(&p->tx_packets); + txbytes = u64_stats_read(&p->tx_bytes); } while (u64_stats_fetch_retry_irq(&p->syncp, start)); stats->rx_packets += rxpackets; @@ -726,8 +726,8 @@ static void vlan_dev_get_stats64(struct net_device *dev, stats->tx_packets += txpackets; stats->tx_bytes += txbytes; /* rx_errors & tx_dropped are u32 */ - rx_errors += p->rx_errors; - tx_dropped += p->tx_dropped; + rx_errors += READ_ONCE(p->rx_errors); + tx_dropped += READ_ONCE(p->tx_dropped); } stats->rx_errors = rx_errors; stats->tx_dropped = tx_dropped; @@ -852,7 +852,7 @@ static void vlan_dev_free(struct net_device *dev) vlan->vlan_pcpu_stats = NULL; /* Get rid of the vlan's reference to real_dev */ - dev_put_track(vlan->real_dev, &vlan->dev_tracker); + netdev_put(vlan->real_dev, &vlan->dev_tracker); } void vlan_setup(struct net_device *dev) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 4c7030ed8d33..bbac3cb4dc99 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -102,7 +102,8 @@ again: ax25_disconnect(s, ENETUNREACH); s->ax25_dev = NULL; if (sk->sk_socket) { - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + netdev_put(ax25_dev->dev, + &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } ax25_cb_del(s); @@ -1065,7 +1066,7 @@ static int ax25_release(struct socket *sock) del_timer_sync(&ax25->t3timer); del_timer_sync(&ax25->idletimer); } - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + netdev_put(ax25_dev->dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } @@ -1146,7 +1147,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (ax25_dev) { ax25_fillin_cb(ax25, ax25_dev); - dev_hold_track(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC); + netdev_hold(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC); } done: diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 95a76d571c44..c5462486dbca 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -52,7 +52,8 @@ void ax25_dev_device_up(struct net_device *dev) { ax25_dev *ax25_dev; - if ((ax25_dev = kzalloc(sizeof(*ax25_dev), GFP_ATOMIC)) == NULL) { + ax25_dev = kzalloc(sizeof(*ax25_dev), GFP_KERNEL); + if (!ax25_dev) { printk(KERN_ERR "AX.25: ax25_dev_device_up - out of memory\n"); return; } @@ -60,7 +61,7 @@ void ax25_dev_device_up(struct net_device *dev) refcount_set(&ax25_dev->refcount, 1); dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; - dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &ax25_dev->dev_tracker, GFP_KERNEL); ax25_dev->forward = NULL; ax25_dev->device_up = true; @@ -136,7 +137,7 @@ unlock_put: spin_unlock_bh(&ax25_dev_lock); ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; - dev_put_track(dev, &ax25_dev->dev_tracker); + netdev_put(dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } @@ -205,7 +206,7 @@ void __exit ax25_dev_free(void) ax25_dev = ax25_dev_list; while (ax25_dev != NULL) { s = ax25_dev; - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + netdev_put(ax25_dev->dev, &ax25_dev->dev_tracker); ax25_dev = ax25_dev->next; kfree(s); } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 56f059b3c242..2ca96acbc50a 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -1420,9 +1420,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, void *data; int ret; - if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) - return -EINVAL; - if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) return -EINVAL; @@ -1487,9 +1484,6 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat u32 retval, duration; int ret = -EINVAL; - if (prog->type != BPF_PROG_TYPE_SK_LOOKUP) - return -EINVAL; - if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) return -EINVAL; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 47fcbade7389..a84a7cfb9d6d 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -274,7 +274,7 @@ static void destroy_nbp(struct net_bridge_port *p) p->br = NULL; p->dev = NULL; - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kobject_put(&p->kobj); } @@ -423,7 +423,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, return ERR_PTR(-ENOMEM); p->br = br; - dev_hold_track(dev, &p->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &p->dev_tracker, GFP_KERNEL); p->dev = dev; p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; @@ -434,7 +434,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, br_stp_port_timer_init(p); err = br_multicast_add_port(p); if (err) { - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kfree(p); p = ERR_PTR(err); } @@ -615,7 +615,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, err = dev_set_allmulti(dev, 1); if (err) { br_multicast_del_port(p); - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kfree(p); /* kobject not yet init'd, manually free */ goto err1; } @@ -725,7 +725,7 @@ err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: br_multicast_del_port(p); - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kobject_put(&p->kobj); dev_set_allmulti(dev, -1); err1: diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index fdcc641fc89a..589ff497d50c 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -1025,8 +1025,8 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device"); return -EINVAL; } - if (p->state == BR_STATE_DISABLED) { - NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state"); + if (p->state == BR_STATE_DISABLED && entry->state != MDB_PERMANENT) { + NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state and entry is not permanent"); return -EINVAL; } vg = nbp_vlan_group(p); @@ -1086,9 +1086,6 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry, if (!p->key.port || p->key.port->dev->ifindex != entry->ifindex) continue; - if (p->key.port->state == BR_STATE_DISABLED) - goto unlock; - br_multicast_del_pg(mp, p, pp); err = 0; break; @@ -1124,8 +1121,14 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; p = br_port_get_rtnl(pdev); - if (!p || p->br != br || p->state == BR_STATE_DISABLED) + if (!p) { + NL_SET_ERR_MSG_MOD(extack, "Net device is not a bridge port"); + return -EINVAL; + } + if (p->br != br) { + NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device"); return -EINVAL; + } vg = nbp_vlan_group(p); } else { vg = br_vlan_group(br); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index bb01776d2d88..1ef14a099c6b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1770,10 +1770,10 @@ static int br_fill_linkxstats(struct sk_buff *skb, if (v->vid == pvid) vxi.flags |= BRIDGE_VLAN_INFO_PVID; br_vlan_get_stats(v, &stats); - vxi.rx_bytes = stats.rx_bytes; - vxi.rx_packets = stats.rx_packets; - vxi.tx_bytes = stats.tx_bytes; - vxi.tx_packets = stats.tx_packets; + vxi.rx_bytes = u64_stats_read(&stats.rx_bytes); + vxi.rx_packets = u64_stats_read(&stats.rx_packets); + vxi.tx_bytes = u64_stats_read(&stats.tx_bytes); + vxi.tx_packets = u64_stats_read(&stats.tx_packets); if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi)) goto nla_put_failure; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 0f5e75ccac79..6e53dc991409 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -505,8 +505,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { stats = this_cpu_ptr(v->stats); u64_stats_update_begin(&stats->syncp); - stats->tx_bytes += skb->len; - stats->tx_packets++; + u64_stats_add(&stats->tx_bytes, skb->len); + u64_stats_inc(&stats->tx_packets); u64_stats_update_end(&stats->syncp); } @@ -624,8 +624,8 @@ static bool __allowed_ingress(const struct net_bridge *br, if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { stats = this_cpu_ptr(v->stats); u64_stats_update_begin(&stats->syncp); - stats->rx_bytes += skb->len; - stats->rx_packets++; + u64_stats_add(&stats->rx_bytes, skb->len); + u64_stats_inc(&stats->rx_packets); u64_stats_update_end(&stats->syncp); } @@ -1379,16 +1379,16 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v, cpu_stats = per_cpu_ptr(v->stats, i); do { start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - rxpackets = cpu_stats->rx_packets; - rxbytes = cpu_stats->rx_bytes; - txbytes = cpu_stats->tx_bytes; - txpackets = cpu_stats->tx_packets; + rxpackets = u64_stats_read(&cpu_stats->rx_packets); + rxbytes = u64_stats_read(&cpu_stats->rx_bytes); + txbytes = u64_stats_read(&cpu_stats->tx_bytes); + txpackets = u64_stats_read(&cpu_stats->tx_packets); } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); - stats->rx_packets += rxpackets; - stats->rx_bytes += rxbytes; - stats->tx_bytes += txbytes; - stats->tx_packets += txpackets; + u64_stats_add(&stats->rx_packets, rxpackets); + u64_stats_add(&stats->rx_bytes, rxbytes); + u64_stats_add(&stats->tx_bytes, txbytes); + u64_stats_add(&stats->tx_packets, txpackets); } } @@ -1779,14 +1779,18 @@ static bool br_vlan_stats_fill(struct sk_buff *skb, return false; br_vlan_get_stats(v, &stats); - if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_BYTES, stats.rx_bytes, + if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_BYTES, + u64_stats_read(&stats.rx_bytes), BRIDGE_VLANDB_STATS_PAD) || nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_PACKETS, - stats.rx_packets, BRIDGE_VLANDB_STATS_PAD) || - nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_BYTES, stats.tx_bytes, + u64_stats_read(&stats.rx_packets), + BRIDGE_VLANDB_STATS_PAD) || + nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_BYTES, + u64_stats_read(&stats.tx_bytes), BRIDGE_VLANDB_STATS_PAD) || nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_PACKETS, - stats.tx_packets, BRIDGE_VLANDB_STATS_PAD)) + u64_stats_read(&stats.tx_packets), + BRIDGE_VLANDB_STATS_PAD)) goto out_err; nla_nest_end(skb, nest); diff --git a/net/can/Kconfig b/net/can/Kconfig index a9ac5ffab286..cb56be8e3862 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -15,7 +15,8 @@ menuconfig CAN PF_CAN is contained in <Documentation/networking/can.rst>. If you want CAN support you should say Y here and also to the - specific driver for your controller(s) below. + specific driver for your controller(s) under the Network device + support section. if CAN @@ -69,6 +70,4 @@ config CAN_ISOTP If you want to perform automotive vehicle diagnostic services (UDS), say 'y'. -source "drivers/net/can/Kconfig" - endif diff --git a/net/core/.gitignore b/net/core/.gitignore new file mode 100644 index 000000000000..df1e74372cce --- /dev/null +++ b/net/core/.gitignore @@ -0,0 +1 @@ +dropreason_str.c diff --git a/net/core/Makefile b/net/core/Makefile index a8e4f737692b..e8ce3bd283a6 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -4,7 +4,8 @@ # obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o + gen_stats.o gen_estimator.o net_namespace.o secure_seq.o \ + flow_dissector.o dropreason_str.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o @@ -39,3 +40,23 @@ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o obj-$(CONFIG_BPF_SYSCALL) += sock_map.o obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_OF) += of_net.o + +clean-files := dropreason_str.c + +quiet_cmd_dropreason_str = GEN $@ +cmd_dropreason_str = awk -F ',' 'BEGIN{ print "\#include <net/dropreason.h>\n"; \ + print "const char * const drop_reasons[] = {" }\ + /^enum skb_drop/ { dr=1; }\ + /^\};/ { dr=0; }\ + /^\tSKB_DROP_REASON_/ {\ + if (dr) {\ + sub(/\tSKB_DROP_REASON_/, "", $$1);\ + printf "\t[SKB_DROP_REASON_%s] = \"%s\",\n", $$1, $$1;\ + }\ + }\ + END{ print "};" }' $< > $@ + +$(obj)/dropreason_str.c: $(srctree)/include/net/dropreason.h + $(call cmd,dropreason_str) + +$(obj)/dropreason_str.o: $(obj)/dropreason_str.c diff --git a/net/core/datagram.c b/net/core/datagram.c index 50f4faeea76c..35791f86bd1a 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -320,7 +320,6 @@ EXPORT_SYMBOL(skb_recv_datagram); void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { consume_skb(skb); - sk_mem_reclaim_partial(sk); } EXPORT_SYMBOL(skb_free_datagram); @@ -336,7 +335,6 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) slow = lock_sock_fast(sk); sk_peek_offset_bwd(sk, len); skb_orphan(skb); - sk_mem_reclaim_partial(sk); unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ @@ -396,7 +394,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) NULL); kfree_skb(skb); - sk_mem_reclaim_partial(sk); return err; } EXPORT_SYMBOL(skb_kill_datagram); diff --git a/net/core/dev.c b/net/core/dev.c index 8e6f22961206..978ed0622d8f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3927,7 +3927,7 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) skb->pkt_type = PACKET_LOOPBACK; if (skb->ip_summed == CHECKSUM_NONE) skb->ip_summed = CHECKSUM_UNNECESSARY; - WARN_ON(!skb_dst(skb)); + DEBUG_NET_WARN_ON_ONCE(!skb_dst(skb)); skb_dst_force(skb); netif_rx(skb); return 0; @@ -6353,6 +6353,23 @@ int dev_set_threaded(struct net_device *dev, bool threaded) } EXPORT_SYMBOL(dev_set_threaded); +/* Double check that napi_get_frags() allocates skbs with + * skb->head being backed by slab, not a page fragment. + * This is to make sure bug fixed in 3226b158e67c + * ("net: avoid 32 x truesize under-estimation for tiny skbs") + * does not accidentally come back. + */ +static void napi_get_frags_check(struct napi_struct *napi) +{ + struct sk_buff *skb; + + local_bh_disable(); + skb = napi_get_frags(napi); + WARN_ON_ONCE(skb && skb->head_frag); + napi_free_frags(napi); + local_bh_enable(); +} + void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -6380,6 +6397,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, set_bit(NAPI_STATE_NPSVC, &napi->state); list_add_rcu(&napi->dev_list, &dev->napi_list); napi_hash_add(napi); + napi_get_frags_check(napi); /* Create kthread for this napi if dev->threaded is set. * Clear dev->threaded if kthread creation failed so that * threaded mode will not be enabled in napi_enable(). @@ -7465,7 +7483,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->ref_nr = 1; adj->private = private; adj->ignore = false; - dev_hold_track(adj_dev, &adj->dev_tracker, GFP_KERNEL); + netdev_hold(adj_dev, &adj->dev_tracker, GFP_KERNEL); pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", dev->name, adj_dev->name, adj->ref_nr, adj_dev->name); @@ -7494,7 +7512,7 @@ remove_symlinks: if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); free_adj: - dev_put_track(adj_dev, &adj->dev_tracker); + netdev_put(adj_dev, &adj->dev_tracker); kfree(adj); return ret; @@ -7536,7 +7554,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, list_del_rcu(&adj->list); pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); - dev_put_track(adj_dev, &adj->dev_tracker); + netdev_put(adj_dev, &adj->dev_tracker); kfree_rcu(adj, rcu); } @@ -10064,7 +10082,7 @@ int register_netdevice(struct net_device *dev) dev_init_scheduler(dev); - dev_hold_track(dev, &dev->dev_registered_tracker, GFP_KERNEL); + netdev_hold(dev, &dev->dev_registered_tracker, GFP_KERNEL); list_netdevice(dev); add_device_randomness(dev->dev_addr, dev->addr_len); @@ -10463,23 +10481,23 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, int cpu; for_each_possible_cpu(cpu) { + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; const struct pcpu_sw_netstats *stats; - struct pcpu_sw_netstats tmp; unsigned int start; stats = per_cpu_ptr(netstats, cpu); do { start = u64_stats_fetch_begin_irq(&stats->syncp); - tmp.rx_packets = stats->rx_packets; - tmp.rx_bytes = stats->rx_bytes; - tmp.tx_packets = stats->tx_packets; - tmp.tx_bytes = stats->tx_bytes; + rx_packets = u64_stats_read(&stats->rx_packets); + rx_bytes = u64_stats_read(&stats->rx_bytes); + tx_packets = u64_stats_read(&stats->tx_packets); + tx_bytes = u64_stats_read(&stats->tx_bytes); } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); - s->rx_packets += tmp.rx_packets; - s->rx_bytes += tmp.rx_bytes; - s->tx_packets += tmp.tx_packets; - s->tx_bytes += tmp.tx_bytes; + s->rx_packets += rx_packets; + s->rx_bytes += rx_bytes; + s->tx_packets += tx_packets; + s->tx_bytes += tx_bytes; } } EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats); @@ -10873,7 +10891,7 @@ void unregister_netdevice_many(struct list_head *head) synchronize_net(); list_for_each_entry(dev, head, unreg_list) { - dev_put_track(dev, &dev->dev_registered_tracker); + netdev_put(dev, &dev->dev_registered_tracker); net_set_todo(dev); } diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 4f6be442ae7e..7674bb9f3076 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -384,10 +384,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return -ENODEV; if (!netif_is_bridge_master(dev)) return -EOPNOTSUPP; - dev_hold_track(dev, &dev_tracker, GFP_KERNEL); + netdev_hold(dev, &dev_tracker, GFP_KERNEL); rtnl_unlock(); err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL); - dev_put_track(dev, &dev_tracker); + netdev_put(dev, &dev_tracker); rtnl_lock(); return err; diff --git a/net/core/devlink.c b/net/core/devlink.c index 5cc88490f18f..db61f3a341cb 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -7946,8 +7946,8 @@ static int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb, } struct devlink_stats { - u64 rx_bytes; - u64 rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_packets; struct u64_stats_sync syncp; }; @@ -8104,12 +8104,12 @@ static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats, cpu_stats = per_cpu_ptr(trap_stats, i); do { start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - rx_packets = cpu_stats->rx_packets; - rx_bytes = cpu_stats->rx_bytes; + rx_packets = u64_stats_read(&cpu_stats->rx_packets); + rx_bytes = u64_stats_read(&cpu_stats->rx_bytes); } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); - stats->rx_packets += rx_packets; - stats->rx_bytes += rx_bytes; + u64_stats_add(&stats->rx_packets, rx_packets); + u64_stats_add(&stats->rx_bytes, rx_bytes); } } @@ -8127,11 +8127,13 @@ devlink_trap_group_stats_put(struct sk_buff *msg, return -EMSGSIZE; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS, - stats.rx_packets, DEVLINK_ATTR_PAD)) + u64_stats_read(&stats.rx_packets), + DEVLINK_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES, - stats.rx_bytes, DEVLINK_ATTR_PAD)) + u64_stats_read(&stats.rx_bytes), + DEVLINK_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); @@ -8171,11 +8173,13 @@ static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink, goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS, - stats.rx_packets, DEVLINK_ATTR_PAD)) + u64_stats_read(&stats.rx_packets), + DEVLINK_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES, - stats.rx_bytes, DEVLINK_ATTR_PAD)) + u64_stats_read(&stats.rx_bytes), + DEVLINK_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); @@ -11641,8 +11645,8 @@ devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats, stats = this_cpu_ptr(trap_stats); u64_stats_update_begin(&stats->syncp); - stats->rx_bytes += skb_len; - stats->rx_packets++; + u64_stats_add(&stats->rx_bytes, skb_len); + u64_stats_inc(&stats->rx_packets); u64_stats_update_end(&stats->syncp); } diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 41cac0e4834e..75501e1bdd25 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -48,19 +48,6 @@ static int trace_state = TRACE_OFF; static bool monitor_hw; -#undef EM -#undef EMe - -#define EM(a, b) [a] = #b, -#define EMe(a, b) [a] = #b - -/* drop_reasons is used to translate 'enum skb_drop_reason' to string, - * which is reported to user space. - */ -static const char * const drop_reasons[] = { - TRACE_SKB_DROP_REASON -}; - /* net_dm_mutex * * An overall lock guarding every operation coming from userspace. @@ -68,7 +55,7 @@ static const char * const drop_reasons[] = { static DEFINE_MUTEX(net_dm_mutex); struct net_dm_stats { - u64 dropped; + u64_stats_t dropped; struct u64_stats_sync syncp; }; @@ -543,7 +530,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore, unlock_free: spin_unlock_irqrestore(&data->drop_queue.lock, flags); u64_stats_update_begin(&data->stats.syncp); - data->stats.dropped++; + u64_stats_inc(&data->stats.dropped); u64_stats_update_end(&data->stats.syncp); consume_skb(nskb); } @@ -877,7 +864,8 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata) } hw_metadata->input_dev = metadata->input_dev; - dev_hold_track(hw_metadata->input_dev, &hw_metadata->dev_tracker, GFP_ATOMIC); + netdev_hold(hw_metadata->input_dev, &hw_metadata->dev_tracker, + GFP_ATOMIC); return hw_metadata; @@ -893,7 +881,7 @@ free_hw_metadata: static void net_dm_hw_metadata_free(struct devlink_trap_metadata *hw_metadata) { - dev_put_track(hw_metadata->input_dev, &hw_metadata->dev_tracker); + netdev_put(hw_metadata->input_dev, &hw_metadata->dev_tracker); kfree(hw_metadata->fa_cookie); kfree(hw_metadata->trap_name); kfree(hw_metadata->trap_group_name); @@ -998,7 +986,7 @@ net_dm_hw_trap_packet_probe(void *ignore, const struct devlink *devlink, unlock_free: spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); u64_stats_update_begin(&hw_data->stats.syncp); - hw_data->stats.dropped++; + u64_stats_inc(&hw_data->stats.dropped); u64_stats_update_end(&hw_data->stats.syncp); net_dm_hw_metadata_free(n_hw_metadata); free: @@ -1445,10 +1433,10 @@ static void net_dm_stats_read(struct net_dm_stats *stats) do { start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - dropped = cpu_stats->dropped; + dropped = u64_stats_read(&cpu_stats->dropped); } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); - stats->dropped += dropped; + u64_stats_add(&stats->dropped, dropped); } } @@ -1464,7 +1452,7 @@ static int net_dm_stats_put(struct sk_buff *msg) return -EMSGSIZE; if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, - stats.dropped, NET_DM_ATTR_PAD)) + u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); @@ -1489,10 +1477,10 @@ static void net_dm_hw_stats_read(struct net_dm_stats *stats) do { start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - dropped = cpu_stats->dropped; + dropped = u64_stats_read(&cpu_stats->dropped); } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); - stats->dropped += dropped; + u64_stats_add(&stats->dropped, dropped); } } @@ -1508,7 +1496,7 @@ static int net_dm_hw_stats_put(struct sk_buff *msg) return -EMSGSIZE; if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, - stats.dropped, NET_DM_ATTR_PAD)) + u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD)) goto nla_put_failure; nla_nest_end(msg, attr); diff --git a/net/core/dst.c b/net/core/dst.c index d16c2c9bfebd..bc9c9be4e080 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -49,7 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, unsigned short flags) { dst->dev = dev; - dev_hold_track(dev, &dst->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dst->dev_tracker, GFP_ATOMIC); dst->ops = ops; dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; @@ -117,7 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) if (dst->ops->destroy) dst->ops->destroy(dst); - dev_put_track(dst->dev, &dst->dev_tracker); + netdev_put(dst->dev, &dst->dev_tracker); lwtstate_put(dst->lwtstate); @@ -159,8 +159,8 @@ void dst_dev_put(struct dst_entry *dst) dst->input = dst_discard; dst->output = dst_discard_out; dst->dev = blackhole_netdev; - dev_replace_track(dev, blackhole_netdev, &dst->dev_tracker, - GFP_ATOMIC); + netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, + GFP_ATOMIC); } EXPORT_SYMBOL(dst_dev_put); diff --git a/net/core/failover.c b/net/core/failover.c index dcaa92a85ea2..864d2d83eff4 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -252,7 +252,7 @@ struct failover *failover_register(struct net_device *dev, return ERR_PTR(-ENOMEM); rcu_assign_pointer(failover->ops, ops); - dev_hold_track(dev, &failover->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &failover->dev_tracker, GFP_KERNEL); dev->priv_flags |= IFF_FAILOVER; rcu_assign_pointer(failover->failover_dev, dev); @@ -285,7 +285,7 @@ void failover_unregister(struct failover *failover) failover_dev->name); failover_dev->priv_flags &= ~IFF_FAILOVER; - dev_put_track(failover_dev, &failover->dev_tracker); + netdev_put(failover_dev, &failover->dev_tracker); spin_lock(&failover_lock); list_del(&failover->list); diff --git a/net/core/filter.c b/net/core/filter.c index 5d16d66727fc..994d91680b12 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6463,8 +6463,6 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, /* bpf_skc_lookup performs the core lookup for different types of sockets, * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE. - * Returns the socket as an 'unsigned long' to simplify the casting in the - * callers to satisfy BPF_CALL declarations. */ static struct sock * __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, @@ -7466,6 +7464,114 @@ static const struct bpf_func_proto bpf_skb_set_tstamp_proto = { .arg3_type = ARG_ANYTHING, }; +#ifdef CONFIG_SYN_COOKIES +BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv4, struct iphdr *, iph, + struct tcphdr *, th, u32, th_len) +{ + u32 cookie; + u16 mss; + + if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4)) + return -EINVAL; + + mss = tcp_parse_mss_option(th, 0) ?: TCP_MSS_DEFAULT; + cookie = __cookie_v4_init_sequence(iph, th, &mss); + + return cookie | ((u64)mss << 32); +} + +static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = { + .func = bpf_tcp_raw_gen_syncookie_ipv4, + .gpl_only = true, /* __cookie_v4_init_sequence() is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg1_size = sizeof(struct iphdr), + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + +BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph, + struct tcphdr *, th, u32, th_len) +{ +#if IS_BUILTIN(CONFIG_IPV6) + const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - + sizeof(struct ipv6hdr); + u32 cookie; + u16 mss; + + if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4)) + return -EINVAL; + + mss = tcp_parse_mss_option(th, 0) ?: mss_clamp; + cookie = __cookie_v6_init_sequence(iph, th, &mss); + + return cookie | ((u64)mss << 32); +#else + return -EPROTONOSUPPORT; +#endif +} + +static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = { + .func = bpf_tcp_raw_gen_syncookie_ipv6, + .gpl_only = true, /* __cookie_v6_init_sequence() is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg1_size = sizeof(struct ipv6hdr), + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + +BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph, + struct tcphdr *, th) +{ + u32 cookie = ntohl(th->ack_seq) - 1; + + if (__cookie_v4_check(iph, th, cookie) > 0) + return 0; + + return -EACCES; +} + +static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = { + .func = bpf_tcp_raw_check_syncookie_ipv4, + .gpl_only = true, /* __cookie_v4_check is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg1_size = sizeof(struct iphdr), + .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg2_size = sizeof(struct tcphdr), +}; + +BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph, + struct tcphdr *, th) +{ +#if IS_BUILTIN(CONFIG_IPV6) + u32 cookie = ntohl(th->ack_seq) - 1; + + if (__cookie_v6_check(iph, th, cookie) > 0) + return 0; + + return -EACCES; +#else + return -EPROTONOSUPPORT; +#endif +} + +static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = { + .func = bpf_tcp_raw_check_syncookie_ipv6, + .gpl_only = true, /* __cookie_v6_check is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg1_size = sizeof(struct ipv6hdr), + .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM, + .arg2_size = sizeof(struct tcphdr), +}; +#endif /* CONFIG_SYN_COOKIES */ + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -7829,6 +7935,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_assign_proto; case BPF_FUNC_skb_set_tstamp: return &bpf_skb_set_tstamp_proto; +#ifdef CONFIG_SYN_COOKIES + case BPF_FUNC_tcp_raw_gen_syncookie_ipv4: + return &bpf_tcp_raw_gen_syncookie_ipv4_proto; + case BPF_FUNC_tcp_raw_gen_syncookie_ipv6: + return &bpf_tcp_raw_gen_syncookie_ipv6_proto; + case BPF_FUNC_tcp_raw_check_syncookie_ipv4: + return &bpf_tcp_raw_check_syncookie_ipv4_proto; + case BPF_FUNC_tcp_raw_check_syncookie_ipv6: + return &bpf_tcp_raw_check_syncookie_ipv6_proto; +#endif #endif default: return bpf_sk_base_func_proto(func_id); @@ -7878,6 +7994,16 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_check_syncookie_proto; case BPF_FUNC_tcp_gen_syncookie: return &bpf_tcp_gen_syncookie_proto; +#ifdef CONFIG_SYN_COOKIES + case BPF_FUNC_tcp_raw_gen_syncookie_ipv4: + return &bpf_tcp_raw_gen_syncookie_ipv4_proto; + case BPF_FUNC_tcp_raw_gen_syncookie_ipv6: + return &bpf_tcp_raw_gen_syncookie_ipv6_proto; + case BPF_FUNC_tcp_raw_check_syncookie_ipv4: + return &bpf_tcp_raw_check_syncookie_ipv4_proto; + case BPF_FUNC_tcp_raw_check_syncookie_ipv6: + return &bpf_tcp_raw_check_syncookie_ipv6_proto; +#endif #endif default: return bpf_sk_base_func_proto(func_id); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index a244d3bade7d..aa6cb1f90966 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -110,7 +110,7 @@ static void linkwatch_add_event(struct net_device *dev) spin_lock_irqsave(&lweventlist_lock, flags); if (list_empty(&dev->link_watch_list)) { list_add_tail(&dev->link_watch_list, &lweventlist); - dev_hold_track(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); } spin_unlock_irqrestore(&lweventlist_lock, flags); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 54625287ee5b..d8ec70622ecb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -624,7 +624,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, memcpy(n->primary_key, pkey, key_len); n->dev = dev; - dev_hold_track(dev, &n->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC); /* Protocol specific setup. */ if (tbl->constructor && (error = tbl->constructor(n)) < 0) { @@ -770,10 +770,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; - dev_hold_track(dev, &n->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &n->dev_tracker, GFP_KERNEL); if (tbl->pconstructor && tbl->pconstructor(n)) { - dev_put_track(dev, &n->dev_tracker); + netdev_put(dev, &n->dev_tracker); kfree(n); n = NULL; goto out; @@ -805,7 +805,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); - dev_put_track(n->dev, &n->dev_tracker); + netdev_put(n->dev, &n->dev_tracker); kfree(n); return 0; } @@ -838,7 +838,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, n->next = NULL; if (tbl->pdestructor) tbl->pdestructor(n); - dev_put_track(n->dev, &n->dev_tracker); + netdev_put(n->dev, &n->dev_tracker); kfree(n); } return -ENOENT; @@ -879,7 +879,7 @@ void neigh_destroy(struct neighbour *neigh) if (dev->netdev_ops->ndo_neigh_destroy) dev->netdev_ops->ndo_neigh_destroy(dev, neigh); - dev_put_track(dev, &neigh->dev_tracker); + netdev_put(dev, &neigh->dev_tracker); neigh_parms_put(neigh->parms); neigh_dbg(2, "neigh %p is destroyed\n", neigh); @@ -1671,13 +1671,13 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, refcount_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); - dev_hold_track(dev, &p->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &p->dev_tracker, GFP_KERNEL); p->dev = dev; write_pnet(&p->net, net); p->sysctl_table = NULL; if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kfree(p); return NULL; } @@ -1708,7 +1708,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) list_del(&parms->list); parms->dead = 1; write_unlock_bh(&tbl->lock); - dev_put_track(parms->dev, &parms->dev_tracker); + netdev_put(parms->dev, &parms->dev_tracker); call_rcu(&parms->rcu_head, neigh_rcu_free_parms); } EXPORT_SYMBOL(neigh_parms_release); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a3642569fe53..d61afd21aab5 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1017,7 +1017,7 @@ static void rx_queue_release(struct kobject *kobj) #endif memset(kobj, 0, sizeof(*kobj)); - dev_put_track(queue->dev, &queue->dev_tracker); + netdev_put(queue->dev, &queue->dev_tracker); } static const void *rx_queue_namespace(struct kobject *kobj) @@ -1057,7 +1057,7 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) /* Kobject_put later will trigger rx_queue_release call which * decreases dev refcount: Take that reference here */ - dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL); + netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL); kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, @@ -1620,7 +1620,7 @@ static void netdev_queue_release(struct kobject *kobj) struct netdev_queue *queue = to_netdev_queue(kobj); memset(kobj, 0, sizeof(*kobj)); - dev_put_track(queue->dev, &queue->dev_tracker); + netdev_put(queue->dev, &queue->dev_tracker); } static const void *netdev_queue_namespace(struct kobject *kobj) @@ -1660,7 +1660,7 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) /* Kobject_put later will trigger netdev_queue_release call * which decreases dev refcount: Take that reference here */ - dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL); + netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL); kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, diff --git a/net/core/netpoll.c b/net/core/netpoll.c index db724463e7cd..5d27067b72d5 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -853,7 +853,7 @@ void netpoll_cleanup(struct netpoll *np) if (!np->dev) goto out; __netpoll_cleanup(np); - dev_put_track(np->dev, &np->dev_tracker); + netdev_put(np->dev, &np->dev_tracker); np->dev = NULL; out: rtnl_unlock(); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 84b62cd7bc57..88906ba6d9a7 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2100,7 +2100,7 @@ static int pktgen_setup_dev(const struct pktgen_net *pn, /* Clean old setups */ if (pkt_dev->odev) { - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); pkt_dev->odev = NULL; } @@ -3807,7 +3807,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) return add_dev_to_thread(t, pkt_dev); out2: - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); out1: #ifdef CONFIG_XFRM free_SAs(pkt_dev); @@ -3901,7 +3901,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, /* Dis-associate from the interface */ if (pkt_dev->odev) { - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); pkt_dev->odev = NULL; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5b3559cb1d82..c62e42d0c531 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -91,6 +91,9 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); +/* The array 'drop_reasons' is auto-generated in dropreason_str.c */ +EXPORT_SYMBOL(drop_reasons); + /** * skb_panic - private function for out-of-line support * @skb: buffer @@ -172,13 +175,14 @@ static struct sk_buff *napi_skb_cache_get(void) struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; - if (unlikely(!nc->skb_count)) + if (unlikely(!nc->skb_count)) { nc->skb_count = kmem_cache_alloc_bulk(skbuff_head_cache, GFP_ATOMIC, NAPI_SKB_CACHE_BULK, nc->skb_cache); - if (unlikely(!nc->skb_count)) - return NULL; + if (unlikely(!nc->skb_count)) + return NULL; + } skb = nc->skb_cache[--nc->skb_count]; kasan_unpoison_object_data(skbuff_head_cache, skb); @@ -557,6 +561,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, struct sk_buff *skb; void *data; + DEBUG_NET_WARN_ON_ONCE(!in_softirq()); len += NET_SKB_PAD + NET_IP_ALIGN; /* If requested length is either too small or too big, @@ -725,7 +730,7 @@ void skb_release_head_state(struct sk_buff *skb) { skb_dst_drop(skb); if (skb->destructor) { - WARN_ON(in_hardirq()); + DEBUG_NET_WARN_ON_ONCE(in_hardirq()); skb->destructor(skb); } #if IS_ENABLED(CONFIG_NF_CONNTRACK) @@ -978,7 +983,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget) return; } - lockdep_assert_in_softirq(); + DEBUG_NET_WARN_ON_ONCE(!in_softirq()); if (!skb_unref(skb)) return; @@ -3190,9 +3195,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) } } - to->truesize += len + plen; - to->len += len + plen; - to->data_len += len + plen; + skb_len_add(to, len + plen); if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { skb_tx_error(from); @@ -3629,13 +3632,8 @@ onlymerged: tgt->ip_summed = CHECKSUM_PARTIAL; skb->ip_summed = CHECKSUM_PARTIAL; - /* Yak, is it really working this way? Some helper please? */ - skb->len -= shiftlen; - skb->data_len -= shiftlen; - skb->truesize -= shiftlen; - tgt->len += shiftlen; - tgt->data_len += shiftlen; - tgt->truesize += shiftlen; + skb_len_add(skb, -shiftlen); + skb_len_add(tgt, shiftlen); return shiftlen; } diff --git a/net/core/skmsg.c b/net/core/skmsg.c index b0fcd0200e84..fc69154bbc88 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -720,6 +720,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) psock->eval = __SK_NONE; psock->sk_proto = prot; psock->saved_unhash = prot->unhash; + psock->saved_destroy = prot->destroy; psock->saved_close = prot->close; psock->saved_write_space = sk->sk_write_space; diff --git a/net/core/sock.c b/net/core/sock.c index 2ff40dd0a7a6..92a0296ccb18 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -991,7 +991,7 @@ EXPORT_SYMBOL(sock_set_mark); static void sock_release_reserved_memory(struct sock *sk, int bytes) { /* Round down bytes to multiple of pages */ - bytes &= ~(SK_MEM_QUANTUM - 1); + bytes = round_down(bytes, PAGE_SIZE); WARN_ON(bytes > sk->sk_reserved_mem); sk->sk_reserved_mem -= bytes; @@ -1019,7 +1019,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes) return -ENOMEM; /* pre-charge to forward_alloc */ - allocated = sk_memory_allocated_add(sk, pages); + sk_memory_allocated_add(sk, pages); + allocated = sk_memory_allocated(sk); /* If the system goes into memory pressure with this * precharge, give up and return error. */ @@ -1028,9 +1029,9 @@ static int sock_reserve_memory(struct sock *sk, int bytes) mem_cgroup_uncharge_skmem(sk->sk_memcg, pages); return -ENOMEM; } - sk->sk_forward_alloc += pages << SK_MEM_QUANTUM_SHIFT; + sk->sk_forward_alloc += pages << PAGE_SHIFT; - sk->sk_reserved_mem += pages << SK_MEM_QUANTUM_SHIFT; + sk->sk_reserved_mem += pages << PAGE_SHIFT; return 0; } @@ -2844,7 +2845,7 @@ void __release_sock(struct sock *sk) do { next = skb->next; prefetch(next); - WARN_ON_ONCE(skb_dst_is_noref(skb)); + DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb)); skb_mark_not_on_list(skb); sk_backlog_rcv(sk, skb); @@ -2906,11 +2907,13 @@ EXPORT_SYMBOL(sk_wait_data); */ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { - struct proto *prot = sk->sk_prot; - long allocated = sk_memory_allocated_add(sk, amt); bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg; + struct proto *prot = sk->sk_prot; bool charged = true; + long allocated; + sk_memory_allocated_add(sk, amt); + allocated = sk_memory_allocated(sk); if (memcg_charge && !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt, gfp_memcg_charge()))) @@ -2987,7 +2990,6 @@ suppress_allocation: return 0; } -EXPORT_SYMBOL(__sk_mem_raise_allocated); /** * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated @@ -3003,10 +3005,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) { int ret, amt = sk_mem_pages(size); - sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT; + sk->sk_forward_alloc += amt << PAGE_SHIFT; ret = __sk_mem_raise_allocated(sk, size, amt, kind); if (!ret) - sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT; + sk->sk_forward_alloc -= amt << PAGE_SHIFT; return ret; } EXPORT_SYMBOL(__sk_mem_schedule); @@ -3029,17 +3031,16 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount) (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) sk_leave_memory_pressure(sk); } -EXPORT_SYMBOL(__sk_mem_reduce_allocated); /** * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated * @sk: socket - * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) + * @amount: number of bytes (rounded down to a PAGE_SIZE multiple) */ void __sk_mem_reclaim(struct sock *sk, int amount) { - amount >>= SK_MEM_QUANTUM_SHIFT; - sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; + amount >>= PAGE_SHIFT; + sk->sk_forward_alloc -= amount << PAGE_SHIFT; __sk_mem_reduce_allocated(sk, amount); } EXPORT_SYMBOL(__sk_mem_reclaim); @@ -3798,6 +3799,10 @@ int proto_register(struct proto *prot, int alloc_slab) pr_err("%s: missing sysctl_mem\n", prot->name); return -EINVAL; } + if (prot->memory_allocated && !prot->per_cpu_fw_alloc) { + pr_err("%s: missing per_cpu_fw_alloc\n", prot->name); + return -EINVAL; + } if (alloc_slab) { prot->slab = kmem_cache_create_usercopy(prot->name, prot->obj_size, 0, diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 81d4b4756a02..9f08ccfaf6da 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1561,6 +1561,29 @@ void sock_map_unhash(struct sock *sk) } EXPORT_SYMBOL_GPL(sock_map_unhash); +void sock_map_destroy(struct sock *sk) +{ + void (*saved_destroy)(struct sock *sk); + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock_get(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + if (sk->sk_prot->destroy) + sk->sk_prot->destroy(sk); + return; + } + + saved_destroy = psock->saved_destroy; + sock_map_remove_links(sk, psock); + rcu_read_unlock(); + sk_psock_stop(psock, true); + sk_psock_put(sk, psock); + saved_destroy(sk); +} +EXPORT_SYMBOL_GPL(sock_map_destroy); + void sock_map_close(struct sock *sk, long timeout) { void (*saved_close)(struct sock *sk, long timeout); diff --git a/net/core/stream.c b/net/core/stream.c index 06b36c730ce8..ccc083cdef23 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -196,13 +196,13 @@ void sk_stream_kill_queues(struct sock *sk) __skb_queue_purge(&sk->sk_receive_queue); /* Next, the write queue. */ - WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); + WARN_ON_ONCE(!skb_queue_empty(&sk->sk_write_queue)); /* Account for returned memory. */ sk_mem_reclaim_final(sk); - WARN_ON(sk->sk_wmem_queued); - WARN_ON(sk->sk_forward_alloc); + WARN_ON_ONCE(sk->sk_wmem_queued); + WARN_ON_ONCE(sk->sk_forward_alloc); /* It is _impossible_ for the backlog to contain anything * when we get here. All user references to this socket diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index dc92a67baea3..aa4f43f52499 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -149,6 +149,7 @@ static DEFINE_RWLOCK(dn_hash_lock); static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; static struct hlist_head dn_wild_sk; static atomic_long_t decnet_memory_allocated; +static DEFINE_PER_CPU(int, decnet_memory_per_cpu_fw_alloc); static int __dn_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen, int flags); @@ -454,7 +455,10 @@ static struct proto dn_proto = { .owner = THIS_MODULE, .enter_memory_pressure = dn_enter_memory_pressure, .memory_pressure = &dn_memory_pressure, + .memory_allocated = &decnet_memory_allocated, + .per_cpu_fw_alloc = &decnet_memory_per_cpu_fw_alloc, + .sysctl_mem = sysctl_decnet_mem, .sysctl_wmem = sysctl_decnet_wmem, .sysctl_rmem = sysctl_decnet_rmem, diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 8cb87b5067ee..63853fff4e2f 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -132,6 +132,13 @@ config NET_DSA_TAG_RTL8_4 Say Y or M if you want to enable support for tagging frames for Realtek switches with 8 byte protocol 4 tags, such as the Realtek RTL8365MB-VC. +config NET_DSA_TAG_RZN1_A5PSW + tristate "Tag driver for Renesas RZ/N1 A5PSW switch" + help + Say Y or M if you want to enable support for tagging frames for + Renesas RZ/N1 embedded switch that uses an 8 byte tag located after + destination MAC address. + config NET_DSA_TAG_LAN9303 tristate "Tag driver for SMSC/Microchip LAN9303 family of switches" help diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 9f75820e7c98..af28c24ead18 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o +obj-$(CONFIG_NET_DSA_TAG_RZN1_A5PSW) += tag_rzn1_a5psw.o obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 801a5d445833..760ca58307a3 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -935,10 +935,10 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, s = per_cpu_ptr(dev->tstats, i); do { start = u64_stats_fetch_begin_irq(&s->syncp); - tx_packets = s->tx_packets; - tx_bytes = s->tx_bytes; - rx_packets = s->rx_packets; - rx_bytes = s->rx_bytes; + tx_packets = u64_stats_read(&s->tx_packets); + tx_bytes = u64_stats_read(&s->tx_bytes); + rx_packets = u64_stats_read(&s->rx_packets); + rx_bytes = u64_stats_read(&s->rx_bytes); } while (u64_stats_fetch_retry_irq(&s->syncp, start)); data[0] += tx_packets; data[1] += tx_bytes; @@ -1002,6 +1002,18 @@ dsa_slave_get_eth_ctrl_stats(struct net_device *dev, ds->ops->get_eth_ctrl_stats(ds, dp->index, ctrl_stats); } +static void +dsa_slave_get_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_rmon_stats) + ds->ops->get_rmon_stats(ds, dp->index, rmon_stats, ranges); +} + static void dsa_slave_net_selftest(struct net_device *ndev, struct ethtool_test *etest, u64 *buf) { @@ -2081,6 +2093,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_eth_phy_stats = dsa_slave_get_eth_phy_stats, .get_eth_mac_stats = dsa_slave_get_eth_mac_stats, .get_eth_ctrl_stats = dsa_slave_get_eth_ctrl_stats, + .get_rmon_stats = dsa_slave_get_rmon_stats, .set_wol = dsa_slave_set_wol, .get_wol = dsa_slave_get_wol, .set_eee = dsa_slave_set_eee, @@ -2460,8 +2473,9 @@ static int dsa_slave_changeupper(struct net_device *dev, if (!err) dsa_bridge_mtu_normalization(dp); if (err == -EOPNOTSUPP) { - NL_SET_ERR_MSG_MOD(extack, - "Offloading not supported"); + if (!extack->_msg) + NL_SET_ERR_MSG_MOD(extack, + "Offloading not supported"); err = 0; } err = notifier_from_errno(err); diff --git a/net/dsa/tag_rzn1_a5psw.c b/net/dsa/tag_rzn1_a5psw.c new file mode 100644 index 000000000000..e2a5ee6ae688 --- /dev/null +++ b/net/dsa/tag_rzn1_a5psw.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 Schneider Electric + * + * Clément Léger <clement.leger@bootlin.com> + */ + +#include <linux/bitfield.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <net/dsa.h> + +#include "dsa_priv.h" + +/* To define the outgoing port and to discover the incoming port a TAG is + * inserted after Src MAC : + * + * Dest MAC Src MAC TAG Type + * ...| 1 2 3 4 5 6 | 1 2 3 4 5 6 | 1 2 3 4 5 6 7 8 | 1 2 |... + * |<--------------->| + * + * See struct a5psw_tag for layout + */ + +#define ETH_P_DSA_A5PSW 0xE001 +#define A5PSW_TAG_LEN 8 +#define A5PSW_CTRL_DATA_FORCE_FORWARD BIT(0) +/* This is both used for xmit tag and rcv tagging */ +#define A5PSW_CTRL_DATA_PORT GENMASK(3, 0) + +struct a5psw_tag { + __be16 ctrl_tag; + __be16 ctrl_data; + __be16 ctrl_data2_hi; + __be16 ctrl_data2_lo; +}; + +static struct sk_buff *a5psw_tag_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct a5psw_tag *ptag; + u32 data2_val; + + BUILD_BUG_ON(sizeof(*ptag) != A5PSW_TAG_LEN); + + /* The Ethernet switch we are interfaced with needs packets to be at + * least 60 bytes otherwise they will be discarded when they enter the + * switch port logic. + */ + if (__skb_put_padto(skb, ETH_ZLEN, false)) + return NULL; + + /* provide 'A5PSW_TAG_LEN' bytes additional space */ + skb_push(skb, A5PSW_TAG_LEN); + + /* make room between MACs and Ether-Type to insert tag */ + dsa_alloc_etype_header(skb, A5PSW_TAG_LEN); + + ptag = dsa_etype_header_pos_tx(skb); + + data2_val = FIELD_PREP(A5PSW_CTRL_DATA_PORT, BIT(dp->index)); + ptag->ctrl_tag = htons(ETH_P_DSA_A5PSW); + ptag->ctrl_data = htons(A5PSW_CTRL_DATA_FORCE_FORWARD); + ptag->ctrl_data2_lo = htons(data2_val); + ptag->ctrl_data2_hi = 0; + + return skb; +} + +static struct sk_buff *a5psw_tag_rcv(struct sk_buff *skb, + struct net_device *dev) +{ + struct a5psw_tag *tag; + int port; + + if (unlikely(!pskb_may_pull(skb, A5PSW_TAG_LEN))) { + dev_warn_ratelimited(&dev->dev, + "Dropping packet, cannot pull\n"); + return NULL; + } + + tag = dsa_etype_header_pos_rx(skb); + + if (tag->ctrl_tag != htons(ETH_P_DSA_A5PSW)) { + dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid TAG marker\n"); + return NULL; + } + + port = FIELD_GET(A5PSW_CTRL_DATA_PORT, ntohs(tag->ctrl_data)); + + skb->dev = dsa_master_find_slave(dev, 0, port); + if (!skb->dev) + return NULL; + + skb_pull_rcsum(skb, A5PSW_TAG_LEN); + dsa_strip_etype_header(skb, A5PSW_TAG_LEN); + + dsa_default_offload_fwd_mark(skb); + + return skb; +} + +static const struct dsa_device_ops a5psw_netdev_ops = { + .name = "a5psw", + .proto = DSA_TAG_PROTO_RZN1_A5PSW, + .xmit = a5psw_tag_xmit, + .rcv = a5psw_tag_rcv, + .needed_headroom = A5PSW_TAG_LEN, +}; + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_A5PSW); +module_dsa_tag_driver(a5psw_netdev_ops); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 326e14ee05db..6a7308de192d 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -369,22 +369,9 @@ EXPORT_SYMBOL(ethtool_convert_legacy_u32_to_link_mode); bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, const unsigned long *src) { - bool retval = true; - - /* TODO: following test will soon always be true */ - if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(ext); - - linkmode_zero(ext); - bitmap_fill(ext, 32); - bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS); - if (linkmode_intersects(ext, src)) { - /* src mask goes beyond bit 31 */ - retval = false; - } - } *legacy_u32 = src[0]; - return retval; + return find_next_bit(src, __ETHTOOL_LINK_MODE_MASK_NBITS, 32) == + __ETHTOOL_LINK_MODE_MASK_NBITS; } EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32); @@ -2010,7 +1997,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) * removal of the device. */ busy = true; - dev_hold_track(dev, &dev_tracker, GFP_KERNEL); + netdev_hold(dev, &dev_tracker, GFP_KERNEL); rtnl_unlock(); if (rc == 0) { @@ -2034,7 +2021,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) } rtnl_lock(); - dev_put_track(dev, &dev_tracker); + netdev_put(dev, &dev_tracker); busy = false; (void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 5fe8f4ae2ceb..e26079e11835 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -402,7 +402,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ops->cleanup_data(reply_data); genlmsg_end(rskb, reply_payload); - dev_put_track(req_info->dev, &req_info->dev_tracker); + netdev_put(req_info->dev, &req_info->dev_tracker); kfree(reply_data); kfree(req_info); return genlmsg_reply(rskb, info); @@ -414,7 +414,7 @@ err_cleanup: if (ops->cleanup_data) ops->cleanup_data(reply_data); err_dev: - dev_put_track(req_info->dev, &req_info->dev_tracker); + netdev_put(req_info->dev, &req_info->dev_tracker); kfree(reply_data); kfree(req_info); return ret; @@ -550,7 +550,7 @@ static int ethnl_default_start(struct netlink_callback *cb) * same parser as for non-dump (doit) requests is used, it * would take reference to the device if it finds one */ - dev_put_track(req_info->dev, &req_info->dev_tracker); + netdev_put(req_info->dev, &req_info->dev_tracker); req_info->dev = NULL; } if (ret < 0) diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 7919ddb2371c..c0d587611854 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -237,7 +237,7 @@ struct ethnl_req_info { static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) { - dev_put_track(req_info->dev, &req_info->dev_tracker); + netdev_put(req_info->dev, &req_info->dev_tracker); } /** diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 93da9f783bec..da81f56fdd1c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -148,10 +148,10 @@ void inet_sock_destruct(struct sock *sk) return; } - WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(refcount_read(&sk->sk_wmem_alloc)); - WARN_ON(sk->sk_wmem_queued); - WARN_ON(sk_forward_alloc_get(sk)); + WARN_ON_ONCE(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); + WARN_ON_ONCE(sk->sk_wmem_queued); + WARN_ON_ONCE(sk_forward_alloc_get(sk)); kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); @@ -1929,6 +1929,8 @@ static int __init inet_init(void) sock_skb_cb_check_size(sizeof(struct inet_skb_parm)); + raw_hashinfo_init(&raw_v4_hashinfo); + rc = proto_register(&tcp_prot, 1); if (rc) goto out; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b2366ad540e6..92b778e423df 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -244,7 +244,7 @@ void in_dev_finish_destroy(struct in_device *idev) #ifdef NET_REFCNT_DEBUG pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); #endif - dev_put_track(dev, &idev->dev_tracker); + netdev_put(dev, &idev->dev_tracker); if (!idev->dead) pr_err("Freeing alive in_device %p\n", idev); else @@ -272,7 +272,7 @@ static struct in_device *inetdev_init(struct net_device *dev) if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) dev_disable_lro(dev); /* Reference in_dev->dev */ - dev_hold_track(dev, &in_dev->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL); /* Account for reference dev->ip_ptr (below) */ refcount_set(&in_dev->refcnt, 1); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b21238df3301..7eae8d686e20 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -502,9 +502,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * nfrags++; - skb->len += tailen; - skb->data_len += tailen; - skb->truesize += tailen; + skb_len_add(skb, tailen); if (sk && sk_fullsock(sk)) refcount_add(tailen, &sk->sk_wmem_alloc); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a57ba23571c9..a5439a8414d4 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -211,7 +211,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) void fib_nh_common_release(struct fib_nh_common *nhc) { - dev_put_track(nhc->nhc_dev, &nhc->nhc_dev_tracker); + netdev_put(nhc->nhc_dev, &nhc->nhc_dev_tracker); lwtstate_put(nhc->nhc_lwtstate); rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); rt_fibinfo_free(&nhc->nhc_rth_input); @@ -1057,7 +1057,8 @@ static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); if (!err) { nh->fib_nh_dev = fib6_nh.fib_nh_dev; - dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_KERNEL); + netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, + GFP_KERNEL); nh->fib_nh_oif = nh->fib_nh_dev->ifindex; nh->fib_nh_scope = RT_SCOPE_LINK; @@ -1141,7 +1142,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, if (!netif_carrier_ok(dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; nh->fib_nh_dev = dev; - dev_hold_track(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); nh->fib_nh_scope = RT_SCOPE_LINK; return 0; } @@ -1195,7 +1196,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, "No egress device for nexthop gateway"); goto out; } - dev_hold_track(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); if (!netif_carrier_ok(dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; @@ -1229,7 +1230,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, } nh->fib_nh_dev = in_dev->dev; - dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); + netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); nh->fib_nh_scope = RT_SCOPE_HOST; if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 00b4bf26fd93..5e32a2f86fbd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1214,9 +1214,7 @@ alloc_new_skb: pfrag->offset += copy; skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); - skb->len += copy; - skb->data_len += copy; - skb->truesize += copy; + skb_len_add(skb, copy); wmem_alloc_delta += copy; } else { err = skb_zerocopy_iter_dgram(skb, from, copy); @@ -1443,9 +1441,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, skb->csum = csum_block_add(skb->csum, csum, skb->len); } - skb->len += len; - skb->data_len += len; - skb->truesize += len; + skb_len_add(skb, len); refcount_add(len, &sk->sk_wmem_alloc); offset += len; size -= len; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 13e6329784fb..73651d17e51f 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -77,7 +77,12 @@ struct ipmr_result { * Note that the changes are semaphored via rtnl_lock. */ -static DEFINE_RWLOCK(mrt_lock); +static DEFINE_SPINLOCK(mrt_lock); + +static struct net_device *vif_dev_read(const struct vif_device *vif) +{ + return rcu_dereference(vif->dev); +} /* Multicast router control variables */ @@ -100,11 +105,11 @@ static void ipmr_free_table(struct mr_table *mrt); static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc_cache *cache, int local); -static int ipmr_cache_report(struct mr_table *mrt, +static int ipmr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); -static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); +static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); static void mroute_clean_tables(struct mr_table *mrt, int flags); static void ipmr_expire_process(struct timer_list *t); @@ -501,11 +506,15 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) return err; } - read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; - ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); - read_unlock(&mrt_lock); + rcu_read_lock(); + + /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ + ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), + IGMPMSG_WHOLEPKT); + + rcu_read_unlock(); kfree_skb(skb); return NETDEV_TX_OK; } @@ -572,6 +581,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, { struct net_device *reg_dev = NULL; struct iphdr *encap; + int vif_num; encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); /* Check that: @@ -584,11 +594,10 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, ntohs(encap->tot_len) + pimlen > skb->len) return 1; - read_lock(&mrt_lock); - if (mrt->mroute_reg_vif_num >= 0) - reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; - read_unlock(&mrt_lock); - + /* Pairs with WRITE_ONCE() in vif_add()/vid_delete() */ + vif_num = READ_ONCE(mrt->mroute_reg_vif_num); + if (vif_num >= 0) + reg_dev = vif_dev_read(&mrt->vif_table[vif_num]); if (!reg_dev) return 1; @@ -614,10 +623,11 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) static int call_ipmr_vif_entry_notifiers(struct net *net, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, vifi_t vif_index, u32 tb_id) { return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type, - vif, vif_index, tb_id, + vif, vif_dev, vif_index, tb_id, &net->ipv4.ipmr_seq); } @@ -649,22 +659,19 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, v = &mrt->vif_table[vifi]; - if (VIF_EXISTS(mrt, vifi)) - call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi, - mrt->id); - - write_lock_bh(&mrt_lock); - dev = v->dev; - v->dev = NULL; - - if (!dev) { - write_unlock_bh(&mrt_lock); + dev = rtnl_dereference(v->dev); + if (!dev) return -EADDRNOTAVAIL; - } - if (vifi == mrt->mroute_reg_vif_num) - mrt->mroute_reg_vif_num = -1; + spin_lock(&mrt_lock); + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, dev, + vifi, mrt->id); + RCU_INIT_POINTER(v->dev, NULL); + if (vifi == mrt->mroute_reg_vif_num) { + /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */ + WRITE_ONCE(mrt->mroute_reg_vif_num, -1); + } if (vifi + 1 == mrt->maxvif) { int tmp; @@ -672,10 +679,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, if (VIF_EXISTS(mrt, tmp)) break; } - mrt->maxvif = tmp+1; + WRITE_ONCE(mrt->maxvif, tmp + 1); } - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); dev_set_allmulti(dev, -1); @@ -691,7 +698,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) unregister_netdevice_queue(dev, head); - dev_put_track(dev, &v->dev_tracker); + netdev_put(dev, &v->dev_tracker); return 0; } @@ -777,7 +784,7 @@ out: spin_unlock(&mfc_unres_lock); } -/* Fill oifs list. It is called under write locked mrt_lock. */ +/* Fill oifs list. It is called under locked mrt_lock. */ static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, unsigned char *ttls) { @@ -889,15 +896,18 @@ static int vif_add(struct net *net, struct mr_table *mrt, v->remote = vifc->vifc_rmt_addr.s_addr; /* And finish update writing critical data */ - write_lock_bh(&mrt_lock); - v->dev = dev; + spin_lock(&mrt_lock); + rcu_assign_pointer(v->dev, dev); netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); - if (v->flags & VIFF_REGISTER) - mrt->mroute_reg_vif_num = vifi; + if (v->flags & VIFF_REGISTER) { + /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */ + WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); + } if (vifi+1 > mrt->maxvif) - mrt->maxvif = vifi+1; - write_unlock_bh(&mrt_lock); - call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id); + WRITE_ONCE(mrt->maxvif, vifi + 1); + spin_unlock(&mrt_lock); + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev, + vifi, mrt->id); return 0; } @@ -1001,9 +1011,9 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, /* Bounce a cache query up to mrouted and netlink. * - * Called under mrt_lock. + * Called under rcu_read_lock(). */ -static int ipmr_cache_report(struct mr_table *mrt, +static int ipmr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert) { const int ihl = ip_hdrlen(pkt); @@ -1038,8 +1048,11 @@ static int ipmr_cache_report(struct mr_table *mrt, msg->im_vif = vifi; msg->im_vif_hi = vifi >> 8; } else { - msg->im_vif = mrt->mroute_reg_vif_num; - msg->im_vif_hi = mrt->mroute_reg_vif_num >> 8; + /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ + int vif_num = READ_ONCE(mrt->mroute_reg_vif_num); + + msg->im_vif = vif_num; + msg->im_vif_hi = vif_num >> 8; } ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + @@ -1064,10 +1077,8 @@ static int ipmr_cache_report(struct mr_table *mrt, skb->transport_header = skb->network_header; } - rcu_read_lock(); mroute_sk = rcu_dereference(mrt->mroute_sk); if (!mroute_sk) { - rcu_read_unlock(); kfree_skb(skb); return -EINVAL; } @@ -1076,7 +1087,7 @@ static int ipmr_cache_report(struct mr_table *mrt, /* Deliver to mrouted */ ret = sock_queue_rcv_skb(mroute_sk, skb); - rcu_read_unlock(); + if (ret < 0) { net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); kfree_skb(skb); @@ -1086,6 +1097,7 @@ static int ipmr_cache_report(struct mr_table *mrt, } /* Queue a packet for resolution. It gets locked cache entry! */ +/* Called under rcu_read_lock() */ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb, struct net_device *dev) { @@ -1198,12 +1210,12 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, mfc->mfcc_mcastgrp.s_addr, parent); rcu_read_unlock(); if (c) { - write_lock_bh(&mrt_lock); + spin_lock(&mrt_lock); c->_c.mfc_parent = mfc->mfcc_parent; ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); if (!mrtsock) c->_c.mfc_flags |= MFC_STATIC; - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, mrt->id); mroute_netlink_event(mrt, c, RTM_NEWROUTE); @@ -1598,20 +1610,20 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) if (vr.vifi >= mrt->maxvif) return -EINVAL; vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); - read_lock(&mrt_lock); + rcu_read_lock(); vif = &mrt->vif_table[vr.vifi]; if (VIF_EXISTS(mrt, vr.vifi)) { - vr.icount = vif->pkt_in; - vr.ocount = vif->pkt_out; - vr.ibytes = vif->bytes_in; - vr.obytes = vif->bytes_out; - read_unlock(&mrt_lock); + vr.icount = READ_ONCE(vif->pkt_in); + vr.ocount = READ_ONCE(vif->pkt_out); + vr.ibytes = READ_ONCE(vif->bytes_in); + vr.obytes = READ_ONCE(vif->bytes_out); + rcu_read_unlock(); if (copy_to_user(arg, &vr, sizeof(vr))) return -EFAULT; return 0; } - read_unlock(&mrt_lock); + rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT: if (copy_from_user(&sr, arg, sizeof(sr))) @@ -1673,20 +1685,20 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) if (vr.vifi >= mrt->maxvif) return -EINVAL; vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); - read_lock(&mrt_lock); + rcu_read_lock(); vif = &mrt->vif_table[vr.vifi]; if (VIF_EXISTS(mrt, vr.vifi)) { - vr.icount = vif->pkt_in; - vr.ocount = vif->pkt_out; - vr.ibytes = vif->bytes_in; - vr.obytes = vif->bytes_out; - read_unlock(&mrt_lock); + vr.icount = READ_ONCE(vif->pkt_in); + vr.ocount = READ_ONCE(vif->pkt_out); + vr.ibytes = READ_ONCE(vif->bytes_in); + vr.obytes = READ_ONCE(vif->bytes_out); + rcu_read_unlock(); if (copy_to_user(arg, &vr, sizeof(vr))) return -EFAULT; return 0; } - read_unlock(&mrt_lock); + rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT: if (copy_from_user(&sr, arg, sizeof(sr))) @@ -1726,7 +1738,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v ipmr_for_each_table(mrt, net) { v = &mrt->vif_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { - if (v->dev == dev) + if (rcu_access_pointer(v->dev) == dev) vif_delete(mrt, ct, 1, NULL); } } @@ -1804,26 +1816,28 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, } #endif -/* Processing handlers for ipmr_forward */ +/* Processing handlers for ipmr_forward, under rcu_read_lock() */ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, int in_vifi, struct sk_buff *skb, int vifi) { const struct iphdr *iph = ip_hdr(skb); struct vif_device *vif = &mrt->vif_table[vifi]; + struct net_device *vif_dev; struct net_device *dev; struct rtable *rt; struct flowi4 fl4; int encap = 0; - if (!vif->dev) + vif_dev = vif_dev_read(vif); + if (!vif_dev) goto out_free; if (vif->flags & VIFF_REGISTER) { - vif->pkt_out++; - vif->bytes_out += skb->len; - vif->dev->stats.tx_bytes += skb->len; - vif->dev->stats.tx_packets++; + WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); + WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); + vif_dev->stats.tx_bytes += skb->len; + vif_dev->stats.tx_packets++; ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); goto out_free; } @@ -1868,8 +1882,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; } - vif->pkt_out++; - vif->bytes_out += skb->len; + WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); + WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); @@ -1881,8 +1895,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, if (vif->flags & VIFF_TUNNEL) { ip_encap(net, skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ - vif->dev->stats.tx_packets++; - vif->dev->stats.tx_bytes += skb->len; + vif_dev->stats.tx_packets++; + vif_dev->stats.tx_bytes += skb->len; } IPCB(skb)->flags |= IPSKB_FORWARDED; @@ -1906,18 +1920,20 @@ out_free: kfree_skb(skb); } -static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) +/* Called with mrt_lock or rcu_read_lock() */ +static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev) { int ct; - - for (ct = mrt->maxvif-1; ct >= 0; ct--) { - if (mrt->vif_table[ct].dev == dev) + /* Pairs with WRITE_ONCE() in vif_delete()/vif_add() */ + for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { + if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) break; } return ct; } /* "local" means that we should preserve one skb (for local delivery) */ +/* Called uner rcu_read_lock() */ static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc_cache *c, int local) @@ -1944,7 +1960,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, } /* Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif_table[vif].dev != dev) { + if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... @@ -1983,8 +1999,10 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, } forward: - mrt->vif_table[vif].pkt_in++; - mrt->vif_table[vif].bytes_in += skb->len; + WRITE_ONCE(mrt->vif_table[vif].pkt_in, + mrt->vif_table[vif].pkt_in + 1); + WRITE_ONCE(mrt->vif_table[vif].bytes_in, + mrt->vif_table[vif].bytes_in + skb->len); /* Forward the frame */ if (c->mfc_origin == htonl(INADDR_ANY) && @@ -2140,22 +2158,14 @@ int ip_mr_input(struct sk_buff *skb) skb = skb2; } - read_lock(&mrt_lock); vif = ipmr_find_vif(mrt, dev); - if (vif >= 0) { - int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev); - read_unlock(&mrt_lock); - - return err2; - } - read_unlock(&mrt_lock); + if (vif >= 0) + return ipmr_cache_unresolved(mrt, vif, skb, dev); kfree_skb(skb); return -ENODEV; } - read_lock(&mrt_lock); ip_mr_forward(net, mrt, dev, skb, cache, local); - read_unlock(&mrt_lock); if (local) return ip_local_deliver(skb); @@ -2252,18 +2262,15 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, int vif = -1; dev = skb->dev; - read_lock(&mrt_lock); if (dev) vif = ipmr_find_vif(mrt, dev); if (vif < 0) { - read_unlock(&mrt_lock); rcu_read_unlock(); return -ENODEV; } skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr)); if (!skb2) { - read_unlock(&mrt_lock); rcu_read_unlock(); return -ENOMEM; } @@ -2277,14 +2284,11 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, iph->daddr = daddr; iph->version = 0; err = ipmr_cache_unresolved(mrt, vif, skb2, dev); - read_unlock(&mrt_lock); rcu_read_unlock(); return err; } - read_lock(&mrt_lock); err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); - read_unlock(&mrt_lock); rcu_read_unlock(); return err; } @@ -2404,7 +2408,7 @@ static size_t igmpmsg_netlink_msgsize(size_t payloadlen) return len; } -static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) +static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) { struct net *net = read_pnet(&mrt->net); struct nlmsghdr *nlh; @@ -2744,18 +2748,21 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) { + struct net_device *vif_dev; struct nlattr *vif_nest; struct vif_device *vif; + vif = &mrt->vif_table[vifid]; + vif_dev = rtnl_dereference(vif->dev); /* if the VIF doesn't exist just continue */ - if (!VIF_EXISTS(mrt, vifid)) + if (!vif_dev) return true; - vif = &mrt->vif_table[vifid]; vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF); if (!vif_nest) return false; - if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || + + if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif_dev->ifindex) || nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, @@ -2887,7 +2894,7 @@ out: */ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(mrt_lock) + __acquires(RCU) { struct mr_vif_iter *iter = seq->private; struct net *net = seq_file_net(seq); @@ -2899,14 +2906,14 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) iter->mrt = mrt; - read_lock(&mrt_lock); + rcu_read_lock(); return mr_vif_seq_start(seq, pos); } static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) - __releases(mrt_lock) + __releases(RCU) { - read_unlock(&mrt_lock); + rcu_read_unlock(); } static int ipmr_vif_seq_show(struct seq_file *seq, void *v) @@ -2919,9 +2926,11 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); } else { const struct vif_device *vif = v; - const char *name = vif->dev ? - vif->dev->name : "none"; + const struct net_device *vif_dev; + const char *name; + vif_dev = vif_dev_read(vif); + name = vif_dev ? vif_dev->name : "none"; seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", vif - mrt->vif_table, @@ -3017,7 +3026,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump, - ipmr_mr_table_iter, &mrt_lock, extack); + ipmr_mr_table_iter, extack); } static const struct fib_notifier_ops ipmr_notifier_ops_template = { diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index aa8738a91210..271dc03fc6db 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -13,7 +13,7 @@ void vif_device_init(struct vif_device *v, unsigned short flags, unsigned short get_iflink_mask) { - v->dev = NULL; + RCU_INIT_POINTER(v->dev, NULL); v->bytes_in = 0; v->bytes_out = 0; v->pkt_in = 0; @@ -208,6 +208,7 @@ EXPORT_SYMBOL(mr_mfc_seq_next); int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mr_mfc *c, struct rtmsg *rtm) { + struct net_device *vif_dev; struct rta_mfc_stats mfcs; struct nlattr *mp_attr; struct rtnexthop *nhp; @@ -220,10 +221,13 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, return -ENOENT; } - if (VIF_EXISTS(mrt, c->mfc_parent) && - nla_put_u32(skb, RTA_IIF, - mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) + rcu_read_lock(); + vif_dev = rcu_dereference(mrt->vif_table[c->mfc_parent].dev); + if (vif_dev && nla_put_u32(skb, RTA_IIF, vif_dev->ifindex) < 0) { + rcu_read_unlock(); return -EMSGSIZE; + } + rcu_read_unlock(); if (c->mfc_flags & MFC_OFFLOAD) rtm->rtm_flags |= RTNH_F_OFFLOAD; @@ -232,23 +236,27 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, if (!mp_attr) return -EMSGSIZE; + rcu_read_lock(); for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { - if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { - struct vif_device *vif; + struct vif_device *vif = &mrt->vif_table[ct]; + + vif_dev = rcu_dereference(vif->dev); + if (vif_dev && c->mfc_un.res.ttls[ct] < 255) { nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); if (!nhp) { + rcu_read_unlock(); nla_nest_cancel(skb, mp_attr); return -EMSGSIZE; } nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - vif = &mrt->vif_table[ct]; - nhp->rtnh_ifindex = vif->dev->ifindex; + nhp->rtnh_ifindex = vif_dev->ifindex; nhp->rtnh_len = sizeof(*nhp); } } + rcu_read_unlock(); nla_nest_end(skb, mp_attr); @@ -275,13 +283,14 @@ static bool mr_mfc_uses_dev(const struct mr_table *mrt, int ct; for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { - if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { - const struct vif_device *vif; - - vif = &mrt->vif_table[ct]; - if (vif->dev == dev) - return true; - } + const struct net_device *vif_dev; + const struct vif_device *vif; + + vif = &mrt->vif_table[ct]; + vif_dev = rcu_access_pointer(vif->dev); + if (vif_dev && c->mfc_un.res.ttls[ct] < 255 && + vif_dev == dev) + return true; } return false; } @@ -390,7 +399,6 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock, struct netlink_ext_ack *extack) { struct mr_table *mrt; @@ -402,22 +410,25 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) { struct vif_device *v = &mrt->vif_table[0]; + struct net_device *vif_dev; struct mr_mfc *mfc; int vifi; /* Notifiy on table VIF entries */ - read_lock(mrt_lock); + rcu_read_lock(); for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { - if (!v->dev) + vif_dev = rcu_dereference(v->dev); + if (!vif_dev) continue; err = mr_call_vif_notifier(nb, family, - FIB_EVENT_VIF_ADD, - v, vifi, mrt->id, extack); + FIB_EVENT_VIF_ADD, v, + vif_dev, vifi, + mrt->id, extack); if (err) break; } - read_unlock(mrt_lock); + rcu_read_unlock(); if (err) return err; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 3c6101def7d6..b83c2bd9d722 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -50,7 +50,7 @@ struct ping_table { struct hlist_nulls_head hash[PING_HTABLE_SIZE]; - rwlock_t lock; + spinlock_t lock; }; static struct ping_table ping_table; @@ -82,7 +82,7 @@ int ping_get_port(struct sock *sk, unsigned short ident) struct sock *sk2 = NULL; isk = inet_sk(sk); - write_lock_bh(&ping_table.lock); + spin_lock(&ping_table.lock); if (ident == 0) { u32 i; u16 result = ping_port_rover + 1; @@ -128,14 +128,15 @@ next_port: if (sk_unhashed(sk)) { pr_debug("was not hashed\n"); sock_hold(sk); - hlist_nulls_add_head(&sk->sk_nulls_node, hlist); + sock_set_flag(sk, SOCK_RCU_FREE); + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, hlist); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } - write_unlock_bh(&ping_table.lock); + spin_unlock(&ping_table.lock); return 0; fail: - write_unlock_bh(&ping_table.lock); + spin_unlock(&ping_table.lock); return 1; } EXPORT_SYMBOL_GPL(ping_get_port); @@ -153,19 +154,19 @@ void ping_unhash(struct sock *sk) struct inet_sock *isk = inet_sk(sk); pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); - write_lock_bh(&ping_table.lock); + spin_lock(&ping_table.lock); if (sk_hashed(sk)) { - hlist_nulls_del(&sk->sk_nulls_node); - sk_nulls_node_init(&sk->sk_nulls_node); + hlist_nulls_del_init_rcu(&sk->sk_nulls_node); sock_put(sk); isk->inet_num = 0; isk->inet_sport = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } - write_unlock_bh(&ping_table.lock); + spin_unlock(&ping_table.lock); } EXPORT_SYMBOL_GPL(ping_unhash); +/* Called under rcu_read_lock() */ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) { struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); @@ -190,8 +191,6 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) return NULL; } - read_lock_bh(&ping_table.lock); - ping_portaddr_for_each_entry(sk, hnode, hslot) { isk = inet_sk(sk); @@ -230,13 +229,11 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) sk->sk_bound_dev_if != sdif) continue; - sock_hold(sk); goto exit; } sk = NULL; exit: - read_unlock_bh(&ping_table.lock); return sk; } @@ -592,7 +589,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) sk->sk_err = err; sk_error_report(sk); out: - sock_put(sk); + return; } EXPORT_SYMBOL_GPL(ping_err); @@ -998,7 +995,6 @@ enum skb_drop_reason ping_rcv(struct sk_buff *skb) reason = __ping_queue_rcv_skb(sk, skb2); else reason = SKB_DROP_REASON_NOMEM; - sock_put(sk); } if (reason) @@ -1084,13 +1080,13 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) } void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) - __acquires(ping_table.lock) + __acquires(RCU) { struct ping_iter_state *state = seq->private; state->bucket = 0; state->family = family; - read_lock_bh(&ping_table.lock); + rcu_read_lock(); return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } @@ -1116,9 +1112,9 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(ping_seq_next); void ping_seq_stop(struct seq_file *seq, void *v) - __releases(ping_table.lock) + __releases(RCU) { - read_unlock_bh(&ping_table.lock); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ping_seq_stop); @@ -1202,5 +1198,5 @@ void __init ping_init(void) for (i = 0; i < PING_HTABLE_SIZE; i++) INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i); - rwlock_init(&ping_table.lock); + spin_lock_init(&ping_table.lock); } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bbd717805b10..006c1f0ed8b4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -85,21 +85,20 @@ struct raw_frag_vec { int hlen; }; -struct raw_hashinfo raw_v4_hashinfo = { - .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), -}; +struct raw_hashinfo raw_v4_hashinfo; EXPORT_SYMBOL_GPL(raw_v4_hashinfo); int raw_hash_sk(struct sock *sk) { struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; - struct hlist_head *head; + struct hlist_nulls_head *hlist; - head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)]; + hlist = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)]; - write_lock_bh(&h->lock); - sk_add_node(sk, head); - write_unlock_bh(&h->lock); + spin_lock(&h->lock); + __sk_nulls_add_node_rcu(sk, hlist); + sock_set_flag(sk, SOCK_RCU_FREE); + spin_unlock(&h->lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); return 0; @@ -110,31 +109,26 @@ void raw_unhash_sk(struct sock *sk) { struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; - write_lock_bh(&h->lock); - if (sk_del_node_init(sk)) + spin_lock(&h->lock); + if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(&h->lock); + spin_unlock(&h->lock); } EXPORT_SYMBOL_GPL(raw_unhash_sk); -struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, - unsigned short num, __be32 raddr, __be32 laddr, - int dif, int sdif) +bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num, + __be32 raddr, __be32 laddr, int dif, int sdif) { - sk_for_each_from(sk) { - struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && inet->inet_num == num && - !(inet->inet_daddr && inet->inet_daddr != raddr) && - !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && - raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) - goto found; /* gotcha */ - } - sk = NULL; -found: - return sk; + struct inet_sock *inet = inet_sk(sk); + + if (net_eq(sock_net(sk), net) && inet->inet_num == num && + !(inet->inet_daddr && inet->inet_daddr != raddr) && + !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && + raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) + return true; + return false; } -EXPORT_SYMBOL_GPL(__raw_v4_lookup); +EXPORT_SYMBOL_GPL(raw_v4_match); /* * 0 - deliver @@ -168,23 +162,20 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) */ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) { + struct net *net = dev_net(skb->dev); + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; int sdif = inet_sdif(skb); int dif = inet_iif(skb); - struct sock *sk; - struct hlist_head *head; int delivered = 0; - struct net *net; - - read_lock(&raw_v4_hashinfo.lock); - head = &raw_v4_hashinfo.ht[hash]; - if (hlist_empty(head)) - goto out; - - net = dev_net(skb->dev); - sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, - iph->saddr, iph->daddr, dif, sdif); + struct sock *sk; - while (sk) { + hlist = &raw_v4_hashinfo.ht[hash]; + rcu_read_lock(); + sk_nulls_for_each(sk, hnode, hlist) { + if (!raw_v4_match(net, sk, iph->protocol, + iph->saddr, iph->daddr, dif, sdif)) + continue; delivered = 1; if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) && ip_mc_sf_allow(sk, iph->daddr, iph->saddr, @@ -195,31 +186,16 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) if (clone) raw_rcv(sk, clone); } - sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, - iph->saddr, iph->daddr, - dif, sdif); } -out: - read_unlock(&raw_v4_hashinfo.lock); + rcu_read_unlock(); return delivered; } int raw_local_deliver(struct sk_buff *skb, int protocol) { - int hash; - struct sock *raw_sk; - - hash = protocol & (RAW_HTABLE_SIZE - 1); - raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); - - /* If there maybe a raw socket we must check - if not we - * don't care less - */ - if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash)) - raw_sk = NULL; - - return raw_sk != NULL; + int hash = protocol & (RAW_HTABLE_SIZE - 1); + return raw_v4_input(skb, ip_hdr(skb), hash); } static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) @@ -286,31 +262,27 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) { - int hash; - struct sock *raw_sk; + struct net *net = dev_net(skb->dev); + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; + int dif = skb->dev->ifindex; + int sdif = inet_sdif(skb); const struct iphdr *iph; - struct net *net; + struct sock *sk; + int hash; hash = protocol & (RAW_HTABLE_SIZE - 1); + hlist = &raw_v4_hashinfo.ht[hash]; - read_lock(&raw_v4_hashinfo.lock); - raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); - if (raw_sk) { - int dif = skb->dev->ifindex; - int sdif = inet_sdif(skb); - + rcu_read_lock(); + sk_nulls_for_each(sk, hnode, hlist) { iph = (const struct iphdr *)skb->data; - net = dev_net(skb->dev); - - while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, - iph->daddr, iph->saddr, - dif, sdif)) != NULL) { - raw_err(raw_sk, skb, info); - raw_sk = sk_next(raw_sk); - iph = (const struct iphdr *)skb->data; - } + if (!raw_v4_match(net, sk, iph->protocol, + iph->daddr, iph->saddr, dif, sdif)) + continue; + raw_err(sk, skb, info); } - read_unlock(&raw_v4_hashinfo.lock); + rcu_read_unlock(); } static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -971,44 +943,41 @@ struct proto raw_prot = { }; #ifdef CONFIG_PROC_FS -static struct sock *raw_get_first(struct seq_file *seq) +static struct sock *raw_get_first(struct seq_file *seq, int bucket) { - struct sock *sk; struct raw_hashinfo *h = pde_data(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; + struct sock *sk; - for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; + for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE; ++state->bucket) { - sk_for_each(sk, &h->ht[state->bucket]) + hlist = &h->ht[state->bucket]; + sk_nulls_for_each(sk, hnode, hlist) { if (sock_net(sk) == seq_file_net(seq)) - goto found; + return sk; + } } - sk = NULL; -found: - return sk; + return NULL; } static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) { - struct raw_hashinfo *h = pde_data(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); do { - sk = sk_next(sk); -try_again: - ; + sk = sk_nulls_next(sk); } while (sk && sock_net(sk) != seq_file_net(seq)); - if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { - sk = sk_head(&h->ht[state->bucket]); - goto try_again; - } + if (!sk) + return raw_get_first(seq, state->bucket + 1); return sk; } static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) { - struct sock *sk = raw_get_first(seq); + struct sock *sk = raw_get_first(seq, 0); if (sk) while (pos && (sk = raw_get_next(seq, sk)) != NULL) @@ -1017,11 +986,9 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) } void *raw_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(&h->lock) + __acquires(RCU) { - struct raw_hashinfo *h = pde_data(file_inode(seq->file)); - - read_lock(&h->lock); + rcu_read_lock(); return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } EXPORT_SYMBOL_GPL(raw_seq_start); @@ -1031,7 +998,7 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct sock *sk; if (v == SEQ_START_TOKEN) - sk = raw_get_first(seq); + sk = raw_get_first(seq, 0); else sk = raw_get_next(seq, v); ++*pos; @@ -1040,11 +1007,9 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(raw_seq_next); void raw_seq_stop(struct seq_file *seq, void *v) - __releases(&h->lock) + __releases(RCU) { - struct raw_hashinfo *h = pde_data(file_inode(seq->file)); - - read_unlock(&h->lock); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(raw_seq_stop); @@ -1106,6 +1071,7 @@ static __net_initdata struct pernet_operations raw_net_ops = { int __init raw_proc_init(void) { + return register_pernet_subsys(&raw_net_ops); } diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index ccacbde30a2c..999321834b94 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -34,57 +34,57 @@ raw_get_hashinfo(const struct inet_diag_req_v2 *r) * use helper to figure it out. */ -static struct sock *raw_lookup(struct net *net, struct sock *from, - const struct inet_diag_req_v2 *req) +static bool raw_lookup(struct net *net, struct sock *sk, + const struct inet_diag_req_v2 *req) { struct inet_diag_req_raw *r = (void *)req; - struct sock *sk = NULL; if (r->sdiag_family == AF_INET) - sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol, - r->id.idiag_dst[0], - r->id.idiag_src[0], - r->id.idiag_if, 0); + return raw_v4_match(net, sk, r->sdiag_raw_protocol, + r->id.idiag_dst[0], + r->id.idiag_src[0], + r->id.idiag_if, 0); #if IS_ENABLED(CONFIG_IPV6) else - sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol, - (const struct in6_addr *)r->id.idiag_src, - (const struct in6_addr *)r->id.idiag_dst, - r->id.idiag_if, 0); + return raw_v6_match(net, sk, r->sdiag_raw_protocol, + (const struct in6_addr *)r->id.idiag_src, + (const struct in6_addr *)r->id.idiag_dst, + r->id.idiag_if, 0); #endif - return sk; + return false; } static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r) { struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); - struct sock *sk = NULL, *s; + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; + struct sock *sk; int slot; if (IS_ERR(hashinfo)) return ERR_CAST(hashinfo); - read_lock(&hashinfo->lock); + rcu_read_lock(); for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) { - sk_for_each(s, &hashinfo->ht[slot]) { - sk = raw_lookup(net, s, r); - if (sk) { + hlist = &hashinfo->ht[slot]; + sk_nulls_for_each(sk, hnode, hlist) { + if (raw_lookup(net, sk, r)) { /* * Grab it and keep until we fill - * diag meaage to be reported, so + * diag message to be reported, so * caller should call sock_put then. - * We can do that because we're keeping - * hashinfo->lock here. */ - sock_hold(sk); - goto out_unlock; + if (refcount_inc_not_zero(&sk->sk_refcnt)) + goto out_unlock; } } } + sk = ERR_PTR(-ENOENT); out_unlock: - read_unlock(&hashinfo->lock); + rcu_read_unlock(); - return sk ? sk : ERR_PTR(-ENOENT); + return sk; } static int raw_diag_dump_one(struct netlink_callback *cb, @@ -142,6 +142,8 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); struct net *net = sock_net(skb->sk); struct inet_diag_dump_data *cb_data; + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; int num, s_num, slot, s_slot; struct sock *sk = NULL; struct nlattr *bc; @@ -154,11 +156,12 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, s_slot = cb->args[0]; num = s_num = cb->args[1]; - read_lock(&hashinfo->lock); + rcu_read_lock(); for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) { num = 0; - sk_for_each(sk, &hashinfo->ht[slot]) { + hlist = &hashinfo->ht[slot]; + sk_nulls_for_each(sk, hnode, hlist) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) @@ -181,7 +184,7 @@ next: } out_unlock: - read_unlock(&hashinfo->lock); + rcu_read_unlock(); cb->args[0] = slot; cb->args[1] = num; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 356f535f3443..2d16bcc7d346 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1550,9 +1550,8 @@ void rt_flush_dev(struct net_device *dev) if (rt->dst.dev != dev) continue; rt->dst.dev = blackhole_netdev; - dev_replace_track(dev, blackhole_netdev, - &rt->dst.dev_tracker, - GFP_ATOMIC); + netdev_ref_replace(dev, blackhole_netdev, + &rt->dst.dev_tracker, GFP_ATOMIC); list_move(&rt->rt_uncached, &ul->quarantine); } spin_unlock_bh(&ul->lock); @@ -2851,7 +2850,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or new->output = dst_discard_out; new->dev = net->loopback_dev; - dev_hold_track(new->dev, &new->dev_tracker, GFP_ATOMIC); + netdev_hold(new->dev, &new->dev_tracker, GFP_ATOMIC); rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 028513d3e2a2..f7309452bdce 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -294,6 +294,8 @@ EXPORT_SYMBOL(sysctl_tcp_mem); atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); +DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); +EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc); #if IS_ENABLED(CONFIG_SMC) DEFINE_STATIC_KEY_FALSE(tcp_have_smc); @@ -856,9 +858,6 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, { struct sk_buff *skb; - if (unlikely(tcp_under_memory_pressure(sk))) - sk_mem_reclaim_partial(sk); - skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp); if (likely(skb)) { bool mem_scheduled; @@ -952,6 +951,24 @@ static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb) return 0; } + +static int tcp_wmem_schedule(struct sock *sk, int copy) +{ + int left; + + if (likely(sk_wmem_schedule(sk, copy))) + return copy; + + /* We could be in trouble if we have nothing queued. + * Use whatever is left in sk->sk_forward_alloc and tcp_wmem[0] + * to guarantee some progress. + */ + left = sock_net(sk)->ipv4.sysctl_tcp_wmem[0] - sk->sk_wmem_queued; + if (left > 0) + sk_forced_mem_schedule(sk, min(left, copy)); + return min(copy, sk->sk_forward_alloc); +} + static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, struct page *page, int offset, size_t *size) { @@ -987,7 +1004,11 @@ new_segment: tcp_mark_push(tp, skb); goto new_segment; } - if (tcp_downgrade_zcopy_pure(sk, skb) || !sk_wmem_schedule(sk, copy)) + if (tcp_downgrade_zcopy_pure(sk, skb)) + return NULL; + + copy = tcp_wmem_schedule(sk, copy); + if (!copy) return NULL; if (can_coalesce) { @@ -1335,8 +1356,11 @@ new_segment: copy = min_t(int, copy, pfrag->size - pfrag->offset); - if (tcp_downgrade_zcopy_pure(sk, skb) || - !sk_wmem_schedule(sk, copy)) + if (tcp_downgrade_zcopy_pure(sk, skb)) + goto wait_for_space; + + copy = tcp_wmem_schedule(sk, copy); + if (!copy) goto wait_for_space; err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, @@ -1363,7 +1387,8 @@ new_segment: skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY; if (!skb_zcopy_pure(skb)) { - if (!sk_wmem_schedule(sk, copy)) + copy = tcp_wmem_schedule(sk, copy); + if (!copy) goto wait_for_space; } @@ -2762,8 +2787,6 @@ void __tcp_close(struct sock *sk, long timeout) __kfree_skb(skb); } - sk_mem_reclaim(sk); - /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ if (sk->sk_state == TCP_CLOSE) goto adjudge_to_death; @@ -2871,7 +2894,6 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { - sk_mem_reclaim(sk); if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); @@ -2949,7 +2971,6 @@ void tcp_write_queue_purge(struct sock *sk) } tcp_rtx_queue_purge(sk); INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); - sk_mem_reclaim(sk); tcp_clear_all_retrans_hints(tcp_sk(sk)); tcp_sk(sk)->packets_out = 0; inet_csk(sk)->icsk_backoff = 0; @@ -4651,11 +4672,11 @@ void __init tcp_init(void) max_wshare = min(4UL*1024*1024, limit); max_rshare = min(6UL*1024*1024, limit); - init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; + init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE; init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024; init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare); - init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; + init_net.ipv4.sysctl_tcp_rmem[0] = PAGE_SIZE; init_net.ipv4.sysctl_tcp_rmem[1] = 131072; init_net.ipv4.sysctl_tcp_rmem[2] = max(131072, max_rshare); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 0d3f68bb51c0..a1626afe87a1 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -540,6 +540,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], struct proto *base) { prot[TCP_BPF_BASE] = *base; + prot[TCP_BPF_BASE].destroy = sock_map_destroy; prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2e2a9ece9af2..80cb112ef142 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -805,7 +805,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) * restart window, so that we send ACKs quickly. */ tcp_incr_quickack(sk, TCP_MAX_QUICKACKS); - sk_mem_reclaim(sk); } } icsk->icsk_ack.lrcvtime = now; @@ -3967,7 +3966,7 @@ static bool smc_parse_options(const struct tcphdr *th, /* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped * value on success. */ -static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) +u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) { const unsigned char *ptr = (const unsigned char *)(th + 1); int length = (th->doff * 4) - sizeof(struct tcphdr); @@ -4006,6 +4005,7 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) } return mss; } +EXPORT_SYMBOL_GPL(tcp_parse_mss_option); /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when @@ -4390,7 +4390,6 @@ void tcp_fin(struct sock *sk) skb_rbtree_purge(&tp->out_of_order_queue); if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); - sk_mem_reclaim(sk); if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); @@ -5287,7 +5286,7 @@ new_range: before(TCP_SKB_CB(skb)->end_seq, start)) { /* Do not attempt collapsing tiny skbs */ if (range_truesize != head->truesize || - end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { + end - start >= SKB_WITH_OVERHEAD(PAGE_SIZE)) { tcp_collapse(sk, NULL, &tp->out_of_order_queue, head, skb, start, end); } else { @@ -5336,7 +5335,6 @@ static bool tcp_prune_ofo_queue(struct sock *sk) tcp_drop_reason(sk, rb_to_skb(node), SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE); if (!prev || goal <= 0) { - sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !tcp_under_memory_pressure(sk)) break; @@ -5383,7 +5381,6 @@ static int tcp_prune_queue(struct sock *sk) skb_peek(&sk->sk_receive_queue), NULL, tp->copied_seq, tp->rcv_nxt); - sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index da5a3c44c4fb..68d0d8a008e2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3049,7 +3049,10 @@ struct proto tcp_prot = { .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, .orphan_count = &tcp_orphan_count, + .memory_allocated = &tcp_memory_allocated, + .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, + .memory_pressure = &tcp_memory_pressure, .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1c054431e358..18c913a2347a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3362,12 +3362,13 @@ void tcp_xmit_retransmit_queue(struct sock *sk) */ void sk_forced_mem_schedule(struct sock *sk, int size) { - int amt; + int delta, amt; - if (size <= sk->sk_forward_alloc) + delta = size - sk->sk_forward_alloc; + if (delta <= 0) return; - amt = sk_mem_pages(size); - sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; + amt = sk_mem_pages(delta); + sk->sk_forward_alloc += amt << PAGE_SHIFT; sk_memory_allocated_add(sk, amt); if (mem_cgroup_sockets_enabled && sk->sk_memcg) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 20cf4a98c69d..2208755e8efc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -290,15 +290,13 @@ void tcp_delack_timer_handler(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - sk_mem_reclaim_partial(sk); - if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) - goto out; + return; if (time_after(icsk->icsk_ack.timeout, jiffies)) { sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); - goto out; + return; } icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; @@ -317,10 +315,6 @@ void tcp_delack_timer_handler(struct sock *sk) tcp_send_ack(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); } - -out: - if (tcp_under_memory_pressure(sk)) - sk_mem_reclaim(sk); } @@ -600,11 +594,11 @@ void tcp_write_timer_handler(struct sock *sk) if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || !icsk->icsk_pending) - goto out; + return; if (time_after(icsk->icsk_timeout, jiffies)) { sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); - goto out; + return; } tcp_mstamp_refresh(tcp_sk(sk)); @@ -626,9 +620,6 @@ void tcp_write_timer_handler(struct sock *sk) tcp_probe_timer(sk); break; } - -out: - sk_mem_reclaim(sk); } static void tcp_write_timer(struct timer_list *t) @@ -743,8 +734,6 @@ static void tcp_keepalive_timer (struct timer_list *t) elapsed = keepalive_time_when(tp) - elapsed; } - sk_mem_reclaim(sk); - resched: inet_csk_reset_keepalive_timer (sk, elapsed); goto out; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index aa9f2ec3dc46..6172b4750a88 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -125,6 +125,8 @@ EXPORT_SYMBOL(sysctl_udp_mem); atomic_long_t udp_memory_allocated ____cacheline_aligned_in_smp; EXPORT_SYMBOL(udp_memory_allocated); +DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); +EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc); #define MAX_UDP_PORTS 65536 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) @@ -1461,11 +1463,11 @@ static void udp_rmem_release(struct sock *sk, int size, int partial, sk->sk_forward_alloc += size; - amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1); + amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1); sk->sk_forward_alloc -= amt; if (amt) - __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT); + __sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT); atomic_sub(size, &sk->sk_rmem_alloc); @@ -1558,7 +1560,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) spin_lock(&list->lock); if (size >= sk->sk_forward_alloc) { amt = sk_mem_pages(size); - delta = amt << SK_MEM_QUANTUM_SHIFT; + delta = amt << PAGE_SHIFT; if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) { err = -ENOBUFS; spin_unlock(&list->lock); @@ -2946,6 +2948,8 @@ struct proto udp_prot = { .psock_update_sk_prot = udp_bpf_update_proto, #endif .memory_allocated = &udp_memory_allocated, + .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, + .sysctl_mem = sysctl_udp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), @@ -3263,8 +3267,8 @@ EXPORT_SYMBOL(udp_flow_hashrnd); static void __udp_sysctl_init(struct net *net) { - net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM; - net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM; + net->ipv4.sysctl_udp_rmem_min = PAGE_SIZE; + net->ipv4.sysctl_udp_wmem_min = PAGE_SIZE; #ifdef CONFIG_NET_L3_MASTER_DEV net->ipv4.sysctl_udp_l3mdev_accept = 0; diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index cd1cd68adeec..6e08a76ae1e7 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -51,7 +51,10 @@ struct proto udplite_prot = { .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, + .memory_allocated = &udp_memory_allocated, + .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, + .sysctl_mem = sysctl_udp_mem, .obj_size = sizeof(struct udp_sock), .h.udp_table = &udplite_table, diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 6fde0b184791..3d0dfa6cf9f9 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -75,7 +75,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.dst.dev = dev; - dev_hold_track(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1b1932502e9e..3497ad1362c0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -398,13 +398,13 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (ndev->cnf.forwarding) dev_disable_lro(dev); /* We refer to the device */ - dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL); if (snmp6_alloc_dev(ndev) < 0) { netdev_dbg(dev, "%s: cannot allocate memory for statistics\n", __func__); neigh_parms_release(&nd_tbl, ndev->nd_parms); - dev_put_track(dev, &ndev->dev_tracker); + netdev_put(dev, &ndev->dev_tracker); kfree(ndev); return ERR_PTR(err); } diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 881d1477d24a..507a8353a6bd 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -263,7 +263,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) #ifdef NET_REFCNT_DEBUG pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL"); #endif - dev_put_track(dev, &idev->dev_tracker); + netdev_put(dev, &idev->dev_tracker); if (!idev->dead) { pr_warn("Freeing alive inet6 device %p\n", idev); return; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 70564ddccc46..658823e91eca 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -63,6 +63,7 @@ #include <net/compat.h> #include <net/xfrm.h> #include <net/ioam6.h> +#include <net/rawv6.h> #include <linux/uaccess.h> #include <linux/mroute6.h> @@ -1073,6 +1074,8 @@ static int __init inet6_init(void) goto out; } + raw_hashinfo_init(&raw_v6_hashinfo); + err = proto_register(&tcpv6_prot, 1); if (err) goto out; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a9051df0625d..1bd10ae332e8 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -398,7 +398,7 @@ static void ip6erspan_tunnel_uninit(struct net_device *dev) ip6erspan_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); dst_cache_reset(&t->dst_cache); - dev_put_track(dev, &t->dev_tracker); + netdev_put(dev, &t->dev_tracker); } static void ip6gre_tunnel_uninit(struct net_device *dev) @@ -411,7 +411,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) if (ign->fb_tunnel_dev == dev) WRITE_ONCE(ign->fb_tunnel_dev, NULL); dst_cache_reset(&t->dst_cache); - dev_put_track(dev, &t->dev_tracker); + netdev_put(dev, &t->dev_tracker); } @@ -1500,7 +1500,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) } ip6gre_tnl_init_features(dev); - dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); return 0; cleanup_dst_cache_init: @@ -1892,7 +1892,7 @@ static int ip6erspan_tap_init(struct net_device *dev) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ip6erspan_tnl_link_config(tunnel, 1); - dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); return 0; cleanup_dst_cache_init: diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 19325b7600bb..c7279f205817 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -381,7 +381,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) else ip6_tnl_unlink(ip6n, t); dst_cache_reset(&t->dst_cache); - dev_put_track(dev, &t->dev_tracker); + netdev_put(dev, &t->dev_tracker); } /** @@ -796,7 +796,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, struct sk_buff *skb), bool log_ecn_err) { - struct pcpu_sw_netstats *tstats; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); int err; @@ -856,11 +855,7 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, } } - tstats = this_cpu_ptr(tunnel->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); + dev_sw_netstats_rx_add(tunnel->dev, skb->len); skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); @@ -1889,7 +1884,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; - dev_hold_track(dev, &t->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); return 0; destroy_dst: diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 3a434d75925c..8fe59a79e800 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -293,7 +293,7 @@ static void vti6_dev_uninit(struct net_device *dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else vti6_tnl_unlink(ip6n, t); - dev_put_track(dev, &t->dev_tracker); + netdev_put(dev, &t->dev_tracker); } static int vti6_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi, @@ -936,7 +936,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev) dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - dev_hold_track(dev, &t->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); return 0; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 4e74bc61a3db..ec6e1509fc7c 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -62,7 +62,12 @@ struct ip6mr_result { Note that the changes are semaphored via rtnl_lock. */ -static DEFINE_RWLOCK(mrt_lock); +static DEFINE_SPINLOCK(mrt_lock); + +static struct net_device *vif_dev_read(const struct vif_device *vif) +{ + return rcu_dereference(vif->dev); +} /* Multicast router control variables */ @@ -85,11 +90,11 @@ static void ip6mr_free_table(struct mr_table *mrt); static void ip6_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc6_cache *cache); -static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, +static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, mifi_t mifi, int assert); static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, int cmd); -static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); +static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); static void mroute_clean_tables(struct mr_table *mrt, int flags); @@ -398,7 +403,7 @@ static void ip6mr_free_table(struct mr_table *mrt) */ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(mrt_lock) + __acquires(RCU) { struct mr_vif_iter *iter = seq->private; struct net *net = seq_file_net(seq); @@ -410,14 +415,14 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) iter->mrt = mrt; - read_lock(&mrt_lock); + rcu_read_lock(); return mr_vif_seq_start(seq, pos); } static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) - __releases(mrt_lock) + __releases(RCU) { - read_unlock(&mrt_lock); + rcu_read_unlock(); } static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) @@ -430,7 +435,11 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); } else { const struct vif_device *vif = v; - const char *name = vif->dev ? vif->dev->name : "none"; + const struct net_device *vif_dev; + const char *name; + + vif_dev = vif_dev_read(vif); + name = vif_dev ? vif_dev->name : "none"; seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", @@ -549,13 +558,11 @@ static int pim6_rcv(struct sk_buff *skb) if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) goto drop; - reg_vif_num = mrt->mroute_reg_vif_num; - read_lock(&mrt_lock); + /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */ + reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num); if (reg_vif_num >= 0) - reg_dev = mrt->vif_table[reg_vif_num].dev; - dev_hold(reg_dev); - read_unlock(&mrt_lock); + reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]); if (!reg_dev) goto drop; @@ -570,7 +577,6 @@ static int pim6_rcv(struct sk_buff *skb) netif_rx(skb); - dev_put(reg_dev); return 0; drop: kfree_skb(skb); @@ -600,11 +606,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) goto tx_err; - read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; - ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT); - read_unlock(&mrt_lock); + rcu_read_lock(); + ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), + MRT6MSG_WHOLEPKT); + rcu_read_unlock(); kfree_skb(skb); return NETDEV_TX_OK; @@ -670,10 +677,11 @@ failure: static int call_ip6mr_vif_entry_notifiers(struct net *net, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, mifi_t vif_index, u32 tb_id) { return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, - vif, vif_index, tb_id, + vif, vif_dev, vif_index, tb_id, &net->ipv6.ipmr_seq); } @@ -698,23 +706,21 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, v = &mrt->vif_table[vifi]; - if (VIF_EXISTS(mrt, vifi)) - call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), - FIB_EVENT_VIF_DEL, v, vifi, - mrt->id); - - write_lock_bh(&mrt_lock); - dev = v->dev; - v->dev = NULL; - - if (!dev) { - write_unlock_bh(&mrt_lock); + dev = rtnl_dereference(v->dev); + if (!dev) return -EADDRNOTAVAIL; - } + + call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), + FIB_EVENT_VIF_DEL, v, dev, + vifi, mrt->id); + spin_lock(&mrt_lock); + RCU_INIT_POINTER(v->dev, NULL); #ifdef CONFIG_IPV6_PIMSM_V2 - if (vifi == mrt->mroute_reg_vif_num) - mrt->mroute_reg_vif_num = -1; + if (vifi == mrt->mroute_reg_vif_num) { + /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */ + WRITE_ONCE(mrt->mroute_reg_vif_num, -1); + } #endif if (vifi + 1 == mrt->maxvif) { @@ -723,10 +729,10 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, if (VIF_EXISTS(mrt, tmp)) break; } - mrt->maxvif = tmp + 1; + WRITE_ONCE(mrt->maxvif, tmp + 1); } - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); dev_set_allmulti(dev, -1); @@ -741,7 +747,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, if ((v->flags & MIFF_REGISTER) && !notify) unregister_netdevice_queue(dev, head); - dev_put_track(dev, &v->dev_tracker); + netdev_put(dev, &v->dev_tracker); return 0; } @@ -826,7 +832,7 @@ static void ipmr_expire_process(struct timer_list *t) spin_unlock(&mfc_unres_lock); } -/* Fill oifs list. It is called under write locked mrt_lock. */ +/* Fill oifs list. It is called under locked mrt_lock. */ static void ip6mr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, @@ -912,18 +918,18 @@ static int mif6_add(struct net *net, struct mr_table *mrt, MIFF_REGISTER); /* And finish update writing critical data */ - write_lock_bh(&mrt_lock); - v->dev = dev; + spin_lock(&mrt_lock); + rcu_assign_pointer(v->dev, dev); netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); #ifdef CONFIG_IPV6_PIMSM_V2 if (v->flags & MIFF_REGISTER) - mrt->mroute_reg_vif_num = vifi; + WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); #endif if (vifi + 1 > mrt->maxvif) - mrt->maxvif = vifi + 1; - write_unlock_bh(&mrt_lock); + WRITE_ONCE(mrt->maxvif, vifi + 1); + spin_unlock(&mrt_lock); call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, - v, vifi, mrt->id); + v, dev, vifi, mrt->id); return 0; } @@ -1028,10 +1034,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, /* * Bounce a cache query up to pim6sd and netlink. * - * Called under mrt_lock. + * Called under rcu_read_lock() */ -static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, +static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, mifi_t mifi, int assert) { struct sock *mroute6_sk; @@ -1072,7 +1078,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, if (assert == MRT6MSG_WRMIFWHOLE) msg->im6_mif = mifi; else - msg->im6_mif = mrt->mroute_reg_vif_num; + msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num); msg->im6_pad = 0; msg->im6_src = ipv6_hdr(pkt)->saddr; msg->im6_dst = ipv6_hdr(pkt)->daddr; @@ -1107,10 +1113,8 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, skb->ip_summed = CHECKSUM_UNNECESSARY; } - rcu_read_lock(); mroute6_sk = rcu_dereference(mrt->mroute_sk); if (!mroute6_sk) { - rcu_read_unlock(); kfree_skb(skb); return -EINVAL; } @@ -1119,7 +1123,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, /* Deliver to user space multicast routing algorithms */ ret = sock_queue_rcv_skb(mroute6_sk, skb); - rcu_read_unlock(); + if (ret < 0) { net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); kfree_skb(skb); @@ -1243,7 +1247,7 @@ static int ip6mr_device_event(struct notifier_block *this, ip6mr_for_each_table(mrt, net) { v = &mrt->vif_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { - if (v->dev == dev) + if (rcu_access_pointer(v->dev) == dev) mif6_delete(mrt, ct, 1, NULL); } } @@ -1262,7 +1266,7 @@ static int ip6mr_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, - ip6mr_mr_table_iter, &mrt_lock, extack); + ip6mr_mr_table_iter, extack); } static struct notifier_block ip6_mr_notifier = { @@ -1437,12 +1441,12 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, &mfc->mf6cc_mcastgrp.sin6_addr, parent); rcu_read_unlock(); if (c) { - write_lock_bh(&mrt_lock); + spin_lock(&mrt_lock); c->_c.mfc_parent = mfc->mf6cc_parent; ip6mr_update_thresholds(mrt, &c->_c, ttls); if (!mrtsock) c->_c.mfc_flags |= MFC_STATIC; - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, mrt->id); mr6_netlink_event(mrt, c, RTM_NEWROUTE); @@ -1560,7 +1564,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) struct net *net = sock_net(sk); rtnl_lock(); - write_lock_bh(&mrt_lock); + spin_lock(&mrt_lock); if (rtnl_dereference(mrt->mroute_sk)) { err = -EADDRINUSE; } else { @@ -1568,7 +1572,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) sock_set_flag(sk, SOCK_RCU_FREE); atomic_inc(&net->ipv6.devconf_all->mc_forwarding); } - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); if (!err) inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, @@ -1598,14 +1602,14 @@ int ip6mr_sk_done(struct sock *sk) rtnl_lock(); ip6mr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { - write_lock_bh(&mrt_lock); + spin_lock(&mrt_lock); RCU_INIT_POINTER(mrt->mroute_sk, NULL); /* Note that mroute_sk had SOCK_RCU_FREE set, * so the RCU grace period before sk freeing * is guaranteed by sk_destruct() */ atomic_dec(&devconf->mc_forwarding); - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, @@ -1891,20 +1895,20 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) if (vr.mifi >= mrt->maxvif) return -EINVAL; vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); - read_lock(&mrt_lock); + rcu_read_lock(); vif = &mrt->vif_table[vr.mifi]; if (VIF_EXISTS(mrt, vr.mifi)) { - vr.icount = vif->pkt_in; - vr.ocount = vif->pkt_out; - vr.ibytes = vif->bytes_in; - vr.obytes = vif->bytes_out; - read_unlock(&mrt_lock); + vr.icount = READ_ONCE(vif->pkt_in); + vr.ocount = READ_ONCE(vif->pkt_out); + vr.ibytes = READ_ONCE(vif->bytes_in); + vr.obytes = READ_ONCE(vif->bytes_out); + rcu_read_unlock(); if (copy_to_user(arg, &vr, sizeof(vr))) return -EFAULT; return 0; } - read_unlock(&mrt_lock); + rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT_IN6: if (copy_from_user(&sr, arg, sizeof(sr))) @@ -1966,20 +1970,20 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) if (vr.mifi >= mrt->maxvif) return -EINVAL; vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); - read_lock(&mrt_lock); + rcu_read_lock(); vif = &mrt->vif_table[vr.mifi]; if (VIF_EXISTS(mrt, vr.mifi)) { - vr.icount = vif->pkt_in; - vr.ocount = vif->pkt_out; - vr.ibytes = vif->bytes_in; - vr.obytes = vif->bytes_out; - read_unlock(&mrt_lock); + vr.icount = READ_ONCE(vif->pkt_in); + vr.ocount = READ_ONCE(vif->pkt_out); + vr.ibytes = READ_ONCE(vif->bytes_in); + vr.obytes = READ_ONCE(vif->bytes_out); + rcu_read_unlock(); if (copy_to_user(arg, &vr, sizeof(vr))) return -EFAULT; return 0; } - read_unlock(&mrt_lock); + rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT_IN6: if (copy_from_user(&sr, arg, sizeof(sr))) @@ -2021,21 +2025,22 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct static int ip6mr_forward2(struct net *net, struct mr_table *mrt, struct sk_buff *skb, int vifi) { - struct ipv6hdr *ipv6h; struct vif_device *vif = &mrt->vif_table[vifi]; - struct net_device *dev; + struct net_device *vif_dev; + struct ipv6hdr *ipv6h; struct dst_entry *dst; struct flowi6 fl6; - if (!vif->dev) + vif_dev = vif_dev_read(vif); + if (!vif_dev) goto out_free; #ifdef CONFIG_IPV6_PIMSM_V2 if (vif->flags & MIFF_REGISTER) { - vif->pkt_out++; - vif->bytes_out += skb->len; - vif->dev->stats.tx_bytes += skb->len; - vif->dev->stats.tx_packets++; + WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); + WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); + vif_dev->stats.tx_bytes += skb->len; + vif_dev->stats.tx_packets++; ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); goto out_free; } @@ -2068,14 +2073,13 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, * not mrouter) cannot join to more than one interface - it will * result in receiving multiple packets. */ - dev = vif->dev; - skb->dev = dev; - vif->pkt_out++; - vif->bytes_out += skb->len; + skb->dev = vif_dev; + WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); + WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); /* We are about to write */ /* XXX: extension headers? */ - if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) + if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev))) goto out_free; ipv6h = ipv6_hdr(skb); @@ -2084,7 +2088,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, IP6CB(skb)->flags |= IP6SKB_FORWARDED; return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, - net, NULL, skb, skb->dev, dev, + net, NULL, skb, skb->dev, vif_dev, ip6mr_forward2_finish); out_free: @@ -2092,17 +2096,20 @@ out_free: return 0; } +/* Called with rcu_read_lock() */ static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) { int ct; - for (ct = mrt->maxvif - 1; ct >= 0; ct--) { - if (mrt->vif_table[ct].dev == dev) + /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */ + for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { + if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) break; } return ct; } +/* Called under rcu_read_lock() */ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc6_cache *c) @@ -2122,20 +2129,18 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, /* For an (*,G) entry, we only check that the incoming * interface is part of the static tree. */ - rcu_read_lock(); cache_proxy = mr_mfc_find_any_parent(mrt, vif); if (cache_proxy && cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) { rcu_read_unlock(); goto forward; } - rcu_read_unlock(); } /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif_table[vif].dev != dev) { + if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { c->_c.mfc_un.res.wrong_if++; if (true_vifi >= 0 && mrt->mroute_do_assert && @@ -2159,8 +2164,10 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, } forward: - mrt->vif_table[vif].pkt_in++; - mrt->vif_table[vif].bytes_in += skb->len; + WRITE_ONCE(mrt->vif_table[vif].pkt_in, + mrt->vif_table[vif].pkt_in + 1); + WRITE_ONCE(mrt->vif_table[vif].bytes_in, + mrt->vif_table[vif].bytes_in + skb->len); /* * Forward the frame @@ -2238,7 +2245,6 @@ int ip6_mr_input(struct sk_buff *skb) return err; } - read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); if (!cache) { @@ -2259,19 +2265,15 @@ int ip6_mr_input(struct sk_buff *skb) vif = ip6mr_find_vif(mrt, dev); if (vif >= 0) { int err = ip6mr_cache_unresolved(mrt, vif, skb, dev); - read_unlock(&mrt_lock); return err; } - read_unlock(&mrt_lock); kfree_skb(skb); return -ENODEV; } ip6_mr_forward(net, mrt, dev, skb, cache); - read_unlock(&mrt_lock); - return 0; } @@ -2287,7 +2289,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, if (!mrt) return -ENOENT; - read_lock(&mrt_lock); + rcu_read_lock(); cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); if (!cache && skb->dev) { int vif = ip6mr_find_vif(mrt, skb->dev); @@ -2305,14 +2307,14 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, dev = skb->dev; if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { - read_unlock(&mrt_lock); + rcu_read_unlock(); return -ENODEV; } /* really correct? */ skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); if (!skb2) { - read_unlock(&mrt_lock); + rcu_read_unlock(); return -ENOMEM; } @@ -2335,13 +2337,13 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, iph->daddr = rt->rt6i_dst.addr; err = ip6mr_cache_unresolved(mrt, vif, skb2, dev); - read_unlock(&mrt_lock); + rcu_read_unlock(); return err; } err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); - read_unlock(&mrt_lock); + rcu_read_unlock(); return err; } @@ -2460,7 +2462,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen) return len; } -static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) +static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) { struct net *net = read_pnet(&mrt->net); struct nlmsghdr *nlh; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 3b7cbd522b54..722de9dd0ff7 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -61,46 +61,30 @@ #define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */ -struct raw_hashinfo raw_v6_hashinfo = { - .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), -}; +struct raw_hashinfo raw_v6_hashinfo; EXPORT_SYMBOL_GPL(raw_v6_hashinfo); -struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, - unsigned short num, const struct in6_addr *loc_addr, - const struct in6_addr *rmt_addr, int dif, int sdif) +bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num, + const struct in6_addr *loc_addr, + const struct in6_addr *rmt_addr, int dif, int sdif) { - bool is_multicast = ipv6_addr_is_multicast(loc_addr); - - sk_for_each_from(sk) - if (inet_sk(sk)->inet_num == num) { - - if (!net_eq(sock_net(sk), net)) - continue; - - if (!ipv6_addr_any(&sk->sk_v6_daddr) && - !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) - continue; - - if (!raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, - dif, sdif)) - continue; - - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) - goto found; - if (is_multicast && - inet6_mc_check(sk, loc_addr, rmt_addr)) - goto found; - continue; - } - goto found; - } - sk = NULL; -found: - return sk; + if (inet_sk(sk)->inet_num != num || + !net_eq(sock_net(sk), net) || + (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || + !raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, + dif, sdif)) + return false; + + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) || + ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr) || + (ipv6_addr_is_multicast(loc_addr) && + inet6_mc_check(sk, loc_addr, rmt_addr))) + return true; + + return false; } -EXPORT_SYMBOL_GPL(__raw_v6_lookup); +EXPORT_SYMBOL_GPL(raw_v6_match); /* * 0 - deliver @@ -156,31 +140,27 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister); */ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { + struct net *net = dev_net(skb->dev); + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; const struct in6_addr *saddr; const struct in6_addr *daddr; struct sock *sk; bool delivered = false; __u8 hash; - struct net *net; saddr = &ipv6_hdr(skb)->saddr; daddr = saddr + 1; hash = nexthdr & (RAW_HTABLE_SIZE - 1); - - read_lock(&raw_v6_hashinfo.lock); - sk = sk_head(&raw_v6_hashinfo.ht[hash]); - - if (!sk) - goto out; - - net = dev_net(skb->dev); - sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, - inet6_iif(skb), inet6_sdif(skb)); - - while (sk) { + hlist = &raw_v6_hashinfo.ht[hash]; + rcu_read_lock(); + sk_nulls_for_each(sk, hnode, hlist) { int filtered; + if (!raw_v6_match(net, sk, nexthdr, daddr, saddr, + inet6_iif(skb), inet6_sdif(skb))) + continue; delivered = true; switch (nexthdr) { case IPPROTO_ICMPV6: @@ -219,23 +199,14 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) rawv6_rcv(sk, clone); } } - sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr, - inet6_iif(skb), inet6_sdif(skb)); } -out: - read_unlock(&raw_v6_hashinfo.lock); + rcu_read_unlock(); return delivered; } bool raw6_local_deliver(struct sk_buff *skb, int nexthdr) { - struct sock *raw_sk; - - raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]); - if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) - raw_sk = NULL; - - return raw_sk != NULL; + return ipv6_raw_deliver(skb, nexthdr); } /* This cleans up af_inet6 a bit. -DaveM */ @@ -361,30 +332,25 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, void raw6_icmp_error(struct sk_buff *skb, int nexthdr, u8 type, u8 code, int inner_offset, __be32 info) { + struct net *net = dev_net(skb->dev); + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; struct sock *sk; int hash; - const struct in6_addr *saddr, *daddr; - struct net *net; hash = nexthdr & (RAW_HTABLE_SIZE - 1); - - read_lock(&raw_v6_hashinfo.lock); - sk = sk_head(&raw_v6_hashinfo.ht[hash]); - if (sk) { + hlist = &raw_v6_hashinfo.ht[hash]; + rcu_read_lock(); + sk_nulls_for_each(sk, hnode, hlist) { /* Note: ipv6_hdr(skb) != skb->data */ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data; - saddr = &ip6h->saddr; - daddr = &ip6h->daddr; - net = dev_net(skb->dev); - - while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, - inet6_iif(skb), inet6_iif(skb)))) { - rawv6_err(sk, skb, NULL, type, code, - inner_offset, info); - sk = sk_next(sk); - } + + if (!raw_v6_match(net, sk, nexthdr, &ip6h->saddr, &ip6h->daddr, + inet6_iif(skb), inet6_iif(skb))) + continue; + rawv6_err(sk, skb, NULL, type, code, inner_offset, info); } - read_unlock(&raw_v6_hashinfo.lock); + rcu_read_unlock(); } static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d25dc83bac62..0be01a4d48c1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -182,9 +182,9 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev) if (rt_dev == dev) { rt->dst.dev = blackhole_netdev; - dev_replace_track(rt_dev, blackhole_netdev, - &rt->dst.dev_tracker, - GFP_ATOMIC); + netdev_ref_replace(rt_dev, blackhole_netdev, + &rt->dst.dev_tracker, + GFP_ATOMIC); handled = true; } if (handled) @@ -607,7 +607,7 @@ static void rt6_probe_deferred(struct work_struct *w) addrconf_addr_solict_mult(&work->target, &mcaddr); ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0); - dev_put_track(work->dev, &work->dev_tracker); + netdev_put(work->dev, &work->dev_tracker); kfree(work); } @@ -661,7 +661,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) } else { INIT_WORK(&work->work, rt6_probe_deferred); work->target = *nh_gw; - dev_hold_track(dev, &work->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &work->dev_tracker, GFP_ATOMIC); work->dev = dev; schedule_work(&work->work); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index c0b138c20992..fab89fd978f0 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -521,7 +521,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) ipip6_tunnel_del_prl(tunnel, NULL); } dst_cache_reset(&tunnel->dst_cache); - dev_put_track(dev, &tunnel->dev_tracker); + netdev_put(dev, &tunnel->dev_tracker); } static int ipip6_err(struct sk_buff *skb, u32 info) @@ -686,8 +686,6 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr, sifindex); if (tunnel) { - struct pcpu_sw_netstats *tstats; - if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && tunnel->parms.iph.protocol != 0) goto out; @@ -724,11 +722,7 @@ static int ipip6_rcv(struct sk_buff *skb) } } - tstats = this_cpu_ptr(tunnel->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); + dev_sw_netstats_rx_add(tunnel->dev, skb->len); netif_rx(skb); @@ -1463,7 +1457,7 @@ static int ipip6_tunnel_init(struct net_device *dev) dev->tstats = NULL; return err; } - dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); return 0; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f37dd4aa91c6..c72448ba6dc9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2159,7 +2159,10 @@ struct proto tcpv6_prot = { .leave_memory_pressure = tcp_leave_memory_pressure, .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, + .memory_allocated = &tcp_memory_allocated, + .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, + .memory_pressure = &tcp_memory_pressure, .orphan_count = &tcp_orphan_count, .sysctl_mem = sysctl_tcp_mem, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 55afd7f39c04..be074f07073a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1740,7 +1740,10 @@ struct proto udpv6_prot = { #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = udp_bpf_update_proto, #endif + .memory_allocated = &udp_memory_allocated, + .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, + .sysctl_mem = sysctl_udp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index fbb700d3f437..b70725856259 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -48,7 +48,10 @@ struct proto udplitev6_prot = { .unhash = udp_lib_unhash, .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, + .memory_allocated = &udp_memory_allocated, + .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, + .sysctl_mem = sysctl_udp_mem, .obj_size = sizeof(struct udp6_sock), .h.udp_table = &udplite_table, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index e64e427a51cf..4a4b0e49ec92 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -73,11 +73,11 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, struct rt6_info *rt = (struct rt6_info *)xdst->route; xdst->u.dst.dev = dev; - dev_hold_track(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); xdst->u.rt6.rt6i_idev = in6_dev_get(dev); if (!xdst->u.rt6.rt6i_idev) { - dev_put_track(dev, &xdst->u.dst.dev_tracker); + netdev_put(dev, &xdst->u.dst.dev_tracker); return -ENODEV; } diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a0385ddbffcf..498a0c35b7bb 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -278,8 +278,6 @@ static void iucv_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_error_queue); - sk_mem_reclaim(sk); - if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive iucv socket %p\n", sk); return; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7f555d2e5357..da7fe94bea2e 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -224,7 +224,7 @@ static int llc_ui_release(struct socket *sock) } else { release_sock(sk); } - dev_put_track(llc->dev, &llc->dev_tracker); + netdev_put(llc->dev, &llc->dev_tracker); sock_put(sk); llc_sk_free(sk); out: diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f7896f257e1b..881efbfb96f6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #include <linux/ieee80211.h> @@ -438,7 +438,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sta_info *sta = NULL; - const struct ieee80211_cipher_scheme *cs = NULL; struct ieee80211_key *key; int err; @@ -456,23 +455,12 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, if (WARN_ON_ONCE(fips_enabled)) return -EINVAL; break; - case WLAN_CIPHER_SUITE_CCMP: - case WLAN_CIPHER_SUITE_CCMP_256: - case WLAN_CIPHER_SUITE_AES_CMAC: - case WLAN_CIPHER_SUITE_BIP_CMAC_256: - case WLAN_CIPHER_SUITE_BIP_GMAC_128: - case WLAN_CIPHER_SUITE_BIP_GMAC_256: - case WLAN_CIPHER_SUITE_GCMP: - case WLAN_CIPHER_SUITE_GCMP_256: - break; default: - cs = ieee80211_cs_get(local, params->cipher, sdata->vif.type); break; } key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len, - params->key, params->seq_len, params->seq, - cs); + params->key, params->seq_len, params->seq); if (IS_ERR(key)) return PTR_ERR(key); @@ -537,9 +525,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, break; } - if (sta) - sta->cipher_scheme = cs; - err = ieee80211_key_link(key, sdata, sta); out_unlock: @@ -548,33 +533,53 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, return err; } +static struct ieee80211_key * +ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, + u8 key_idx, bool pairwise, const u8 *mac_addr) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + if (mac_addr) { + sta = sta_info_get_bss(sdata, mac_addr); + if (!sta) + return NULL; + + if (pairwise && key_idx < NUM_DEFAULT_KEYS) + return rcu_dereference_check_key_mtx(local, + sta->ptk[key_idx]); + + if (!pairwise && + key_idx < NUM_DEFAULT_KEYS + + NUM_DEFAULT_MGMT_KEYS + + NUM_DEFAULT_BEACON_KEYS) + return rcu_dereference_check_key_mtx(local, + sta->deflink.gtk[key_idx]); + + return NULL; + } + + if (key_idx < NUM_DEFAULT_KEYS + + NUM_DEFAULT_MGMT_KEYS + + NUM_DEFAULT_BEACON_KEYS) + return rcu_dereference_check_key_mtx(local, + sdata->keys[key_idx]); + + return NULL; +} + static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx, bool pairwise, const u8 *mac_addr) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - struct sta_info *sta; - struct ieee80211_key *key = NULL; + struct ieee80211_key *key; int ret; mutex_lock(&local->sta_mtx); mutex_lock(&local->key_mtx); - if (mac_addr) { - ret = -ENOENT; - - sta = sta_info_get_bss(sdata, mac_addr); - if (!sta) - goto out_unlock; - - if (pairwise) - key = key_mtx_dereference(local, sta->ptk[key_idx]); - else - key = key_mtx_dereference(local, - sta->deflink.gtk[key_idx]); - } else - key = key_mtx_dereference(local, sdata->keys[key_idx]); - + key = ieee80211_lookup_key(sdata, key_idx, pairwise, mac_addr); if (!key) { ret = -ENOENT; goto out_unlock; @@ -597,10 +602,9 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, struct key_params *params)) { struct ieee80211_sub_if_data *sdata; - struct sta_info *sta = NULL; u8 seq[6] = {0}; struct key_params params; - struct ieee80211_key *key = NULL; + struct ieee80211_key *key; u64 pn64; u32 iv32; u16 iv16; @@ -611,20 +615,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, rcu_read_lock(); - if (mac_addr) { - sta = sta_info_get_bss(sdata, mac_addr); - if (!sta) - goto out; - - if (pairwise && key_idx < NUM_DEFAULT_KEYS) - key = rcu_dereference(sta->ptk[key_idx]); - else if (!pairwise && - key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + - NUM_DEFAULT_BEACON_KEYS) - key = rcu_dereference(sta->deflink.gtk[key_idx]); - } else - key = rcu_dereference(sdata->keys[key_idx]); - + key = ieee80211_lookup_key(sdata, key_idx, pairwise, mac_addr); if (!key) goto out; @@ -1207,9 +1198,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, params->crypto.control_port_over_nl80211; sdata->control_port_no_preauth = params->crypto.control_port_no_preauth; - sdata->encrypt_headroom = ieee80211_cs_headroom(sdata->local, - ¶ms->crypto, - sdata->vif.type); list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { vlan->control_port_protocol = @@ -1220,10 +1208,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, params->crypto.control_port_over_nl80211; vlan->control_port_no_preauth = params->crypto.control_port_no_preauth; - vlan->encrypt_headroom = - ieee80211_cs_headroom(sdata->local, - ¶ms->crypto, - vlan->vif.type); } sdata->vif.bss_conf.dtim_period = params->dtim_period; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 86ef0a46a68c..1cf331572de1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #ifndef IEEE80211_I_H @@ -944,7 +944,6 @@ struct ieee80211_sub_if_data { bool control_port_no_encrypt; bool control_port_no_preauth; bool control_port_over_nl80211; - int encrypt_headroom; atomic_t num_tx_queued; struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS]; @@ -2483,14 +2482,6 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work); int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings *csa_settings); -bool ieee80211_cs_valid(const struct ieee80211_cipher_scheme *cs); -bool ieee80211_cs_list_valid(const struct ieee80211_cipher_scheme *cs, int n); -const struct ieee80211_cipher_scheme * -ieee80211_cs_get(struct ieee80211_local *local, u32 cipher, - enum nl80211_iftype iftype); -int ieee80211_cs_headroom(struct ieee80211_local *local, - struct cfg80211_crypto_settings *crypto, - enum nl80211_iftype iftype); void ieee80211_recalc_dtim(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 41531478437c..fb8d102fca48 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -8,7 +8,7 @@ * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #include <linux/slab.h> #include <linux/kernel.h> @@ -1036,8 +1036,6 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) wiphy_name(local->hw.wiphy)); sdata->wdev.iftype = NL80211_IFTYPE_MONITOR; - sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; - ieee80211_set_default_queues(sdata); ret = drv_add_interface(local, sdata); @@ -1644,7 +1642,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->control_port_no_encrypt = false; sdata->control_port_over_nl80211 = false; sdata->control_port_no_preauth = false; - sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; sdata->vif.bss_conf.idle = true; sdata->vif.bss_conf.txpower = INT_MIN; /* unset */ @@ -2116,8 +2113,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL; sdata->user_power_level = local->user_power_level; - sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; - /* setup type-dependent data */ ieee80211_setup_sdata(sdata, type); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 0fcf8aebedc4..c3476de4b14d 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -6,7 +6,7 @@ * Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright 2018-2020 Intel Corporation + * Copyright 2018-2020, 2022 Intel Corporation */ #include <linux/if_ether.h> @@ -531,8 +531,7 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, struct ieee80211_key * ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, const u8 *key_data, - size_t seq_len, const u8 *seq, - const struct ieee80211_cipher_scheme *cs) + size_t seq_len, const u8 *seq) { struct ieee80211_key *key; int i, j, err; @@ -675,21 +674,6 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, return ERR_PTR(err); } break; - default: - if (cs) { - if (seq_len && seq_len != cs->pn_len) { - kfree(key); - return ERR_PTR(-EINVAL); - } - - key->conf.iv_len = cs->hdr_len; - key->conf.icv_len = cs->mic_len; - for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) - for (j = 0; j < seq_len; j++) - key->u.gen.rx_pn[i][j] = - seq[seq_len - j - 1]; - key->flags |= KEY_FLAG_CIPHER_SCHEME; - } } memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); @@ -1294,7 +1278,7 @@ ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, key = ieee80211_key_alloc(keyconf->cipher, keyconf->keyidx, keyconf->keylen, keyconf->key, - 0, NULL, NULL); + 0, NULL); if (IS_ERR(key)) return ERR_CAST(key); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 1e326c89d721..e994dcea1ce3 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -2,7 +2,7 @@ /* * Copyright 2002-2004, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019, 2022 Intel Corporation */ #ifndef IEEE80211_KEY_H @@ -30,12 +30,10 @@ struct sta_info; * @KEY_FLAG_UPLOADED_TO_HARDWARE: Indicates that this key is present * in the hardware for TX crypto hardware acceleration. * @KEY_FLAG_TAINTED: Key is tainted and packets should be dropped. - * @KEY_FLAG_CIPHER_SCHEME: This key is for a hardware cipher scheme */ enum ieee80211_internal_key_flags { KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0), KEY_FLAG_TAINTED = BIT(1), - KEY_FLAG_CIPHER_SCHEME = BIT(2), }; enum ieee80211_internal_tkip_state { @@ -140,8 +138,7 @@ struct ieee80211_key { struct ieee80211_key * ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, const u8 *key_data, - size_t seq_len, const u8 *seq, - const struct ieee80211_cipher_scheme *cs); + size_t seq_len, const u8 *seq); /* * Insert a key into data structures (sdata, sta if necessary) * to make it used, free old key. On failure, also free the new key. @@ -166,6 +163,8 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata); #define key_mtx_dereference(local, ref) \ rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx))) +#define rcu_dereference_check_key_mtx(local, ref) \ + rcu_dereference_check(ref, lockdep_is_held(&((local)->key_mtx))) void ieee80211_delayed_tailroom_dec(struct work_struct *wk); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 5a385d4146b9..4f3e93c0819b 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #include <net/mac80211.h> @@ -778,7 +778,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local) { bool have_wep = !fips_enabled; /* FIPS does not permit the use of RC4 */ bool have_mfp = ieee80211_hw_check(&local->hw, MFP_CAPABLE); - int n_suites = 0, r = 0, w = 0; + int r = 0, w = 0; u32 *suites; static const u32 cipher_suites[] = { /* keep WEP first, it may be removed below */ @@ -824,10 +824,9 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local) continue; suites[w++] = suite; } - } else if (!local->hw.cipher_schemes) { - /* If the driver doesn't have cipher schemes, there's nothing - * else to do other than assign the (software supported and - * perhaps offloaded) cipher suites. + } else { + /* assign the (software supported and perhaps offloaded) + * cipher suites */ local->hw.wiphy->cipher_suites = cipher_suites; local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); @@ -842,58 +841,6 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local) /* not dynamically allocated, so just return */ return 0; - } else { - const struct ieee80211_cipher_scheme *cs; - - cs = local->hw.cipher_schemes; - - /* Driver specifies cipher schemes only (but not cipher suites - * including the schemes) - * - * We start counting ciphers defined by schemes, TKIP, CCMP, - * CCMP-256, GCMP, and GCMP-256 - */ - n_suites = local->hw.n_cipher_schemes + 5; - - /* check if we have WEP40 and WEP104 */ - if (have_wep) - n_suites += 2; - - /* check if we have AES_CMAC, BIP-CMAC-256, BIP-GMAC-128, - * BIP-GMAC-256 - */ - if (have_mfp) - n_suites += 4; - - suites = kmalloc_array(n_suites, sizeof(u32), GFP_KERNEL); - if (!suites) - return -ENOMEM; - - suites[w++] = WLAN_CIPHER_SUITE_CCMP; - suites[w++] = WLAN_CIPHER_SUITE_CCMP_256; - suites[w++] = WLAN_CIPHER_SUITE_TKIP; - suites[w++] = WLAN_CIPHER_SUITE_GCMP; - suites[w++] = WLAN_CIPHER_SUITE_GCMP_256; - - if (have_wep) { - suites[w++] = WLAN_CIPHER_SUITE_WEP40; - suites[w++] = WLAN_CIPHER_SUITE_WEP104; - } - - if (have_mfp) { - suites[w++] = WLAN_CIPHER_SUITE_AES_CMAC; - suites[w++] = WLAN_CIPHER_SUITE_BIP_CMAC_256; - suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_128; - suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_256; - } - - for (r = 0; r < local->hw.n_cipher_schemes; r++) { - suites[w++] = cs[r].cipher; - if (WARN_ON(cs[r].pn_len > IEEE80211_MAX_PN_LEN)) { - kfree(suites); - return -EINVAL; - } - } } local->hw.wiphy->cipher_suites = suites; @@ -1168,12 +1115,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (local->hw.wiphy->max_scan_ie_len) local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len; - if (WARN_ON(!ieee80211_cs_list_valid(local->hw.cipher_schemes, - local->hw.n_cipher_schemes))) { - result = -EINVAL; - goto fail_workqueue; - } - result = ieee80211_init_cipher_suites(local); if (result < 0) goto fail_workqueue; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 58ebdcd69d05..45e7c1b307bc 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2019, 2021 Intel Corporation + * Copyright (C) 2019, 2021-2022 Intel Corporation * Author: Luis Carlos Cobo <luisca@cozybit.com> */ @@ -247,13 +247,13 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, return -EAGAIN; skb = dev_alloc_skb(local->tx_headroom + - sdata->encrypt_headroom + + IEEE80211_ENCRYPT_HEADROOM + IEEE80211_ENCRYPT_TAILROOM + hdr_len + 2 + 15 /* PERR IE */); if (!skb) return -1; - skb_reserve(skb, local->tx_headroom + sdata->encrypt_headroom); + skb_reserve(skb, local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM); mgmt = skb_put_zero(skb, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 58d48dcae030..6d5ad71ef02c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8,7 +8,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2021 Intel Corporation + * Copyright (C) 2018 - 2022 Intel Corporation */ #include <linux/delay.h> @@ -2496,8 +2496,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(ifmgd->tx_tspec, 0, sizeof(ifmgd->tx_tspec)); cancel_delayed_work_sync(&ifmgd->tx_tspec_wk); - sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; - bss_conf->pwr_reduction = 0; bss_conf->tx_pwr_env_num = 0; memset(bss_conf->tx_pwr_env, 0, sizeof(bss_conf->tx_pwr_env)); @@ -6071,8 +6069,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, sdata->control_port_over_nl80211 = req->crypto.control_port_over_nl80211; sdata->control_port_no_preauth = req->crypto.control_port_no_preauth; - sdata->encrypt_headroom = ieee80211_cs_headroom(local, &req->crypto, - sdata->vif.type); /* kick off associate process */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3c08ae04ddbc..a9f4e90ad893 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -6,7 +6,7 @@ * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #include <linux/jiffies.h> @@ -1009,43 +1009,20 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) return -1; } -static int ieee80211_get_keyid(struct sk_buff *skb, - const struct ieee80211_cipher_scheme *cs) +static int ieee80211_get_keyid(struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - __le16 fc; - int hdrlen; - int minlen; - u8 key_idx_off; - u8 key_idx_shift; + __le16 fc = hdr->frame_control; + int hdrlen = ieee80211_hdrlen(fc); u8 keyid; - fc = hdr->frame_control; - hdrlen = ieee80211_hdrlen(fc); - - if (cs) { - minlen = hdrlen + cs->hdr_len; - key_idx_off = hdrlen + cs->key_idx_off; - key_idx_shift = cs->key_idx_shift; - } else { - /* WEP, TKIP, CCMP and GCMP */ - minlen = hdrlen + IEEE80211_WEP_IV_LEN; - key_idx_off = hdrlen + 3; - key_idx_shift = 6; - } - - if (unlikely(skb->len < minlen)) + /* WEP, TKIP, CCMP and GCMP */ + if (unlikely(skb->len < hdrlen + IEEE80211_WEP_IV_LEN)) return -EINVAL; - skb_copy_bits(skb, key_idx_off, &keyid, 1); + skb_copy_bits(skb, hdrlen + 3, &keyid, 1); - if (cs) - keyid &= cs->key_idx_mask; - keyid >>= key_idx_shift; - - /* cs could use more than the usual two bits for the keyid */ - if (unlikely(keyid >= NUM_DEFAULT_KEYS)) - return -EINVAL; + keyid >>= 6; return keyid; } @@ -1916,7 +1893,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_key *ptk_idx = NULL; int mmie_keyidx = -1; __le16 fc; - const struct ieee80211_cipher_scheme *cs = NULL; if (ieee80211_is_ext(hdr->frame_control)) return RX_CONTINUE; @@ -1959,8 +1935,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (ieee80211_has_protected(fc) && !(status->flag & RX_FLAG_IV_STRIPPED)) { - cs = rx->sta->cipher_scheme; - keyid = ieee80211_get_keyid(rx->skb, cs); + keyid = ieee80211_get_keyid(rx->skb); if (unlikely(keyid < 0)) return RX_DROP_UNUSABLE; @@ -2065,7 +2040,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) (status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; - keyidx = ieee80211_get_keyid(rx->skb, cs); + keyidx = ieee80211_get_keyid(rx->skb); if (unlikely(keyidx < 0)) return RX_DROP_UNUSABLE; @@ -2131,7 +2106,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) result = ieee80211_crypto_gcmp_decrypt(rx); break; default: - result = ieee80211_crypto_hw_decrypt(rx); + result = RX_DROP_UNUSABLE; } /* the hdr variable is invalid after the decrypt handlers */ @@ -2945,7 +2920,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) tailroom = IEEE80211_ENCRYPT_TAILROOM; fwd_skb = skb_copy_expand(skb, local->tx_headroom + - sdata->encrypt_headroom, + IEEE80211_ENCRYPT_HEADROOM, tailroom, GFP_ATOMIC); if (!fwd_skb) goto out; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 35c390bedfba..aa6950aa49a9 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -3,7 +3,7 @@ * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright(c) 2020-2021 Intel Corporation + * Copyright(c) 2020-2022 Intel Corporation */ #ifndef STA_INFO_H @@ -616,7 +616,6 @@ struct link_sta_info { * taken from HT/VHT capabilities or VHT operating mode notification * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for * AP only. - * @cipher_scheme: optional cipher scheme for this station * @cparams: CoDel parameters for this station. * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) * @fast_tx: TX fastpath information @@ -700,7 +699,6 @@ struct sta_info { #endif enum ieee80211_smps_mode known_smps_mode; - const struct ieee80211_cipher_scheme *cipher_scheme; struct codel_params cparams; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0e4efc08c762..37fe72bb5ab0 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * Transmit and frame generation functions. */ @@ -882,7 +882,7 @@ static int ieee80211_fragment(struct ieee80211_tx_data *tx, rem -= fraglen; tmp = dev_alloc_skb(local->tx_headroom + frag_threshold + - tx->sdata->encrypt_headroom + + IEEE80211_ENCRYPT_HEADROOM + IEEE80211_ENCRYPT_TAILROOM); if (!tmp) return -ENOMEM; @@ -890,7 +890,7 @@ static int ieee80211_fragment(struct ieee80211_tx_data *tx, __skb_queue_tail(&tx->skbs, tmp); skb_reserve(tmp, - local->tx_headroom + tx->sdata->encrypt_headroom); + local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM); /* copy control information */ memcpy(tmp->cb, skb->cb, sizeof(tmp->cb)); @@ -1040,8 +1040,6 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: return ieee80211_crypto_gcmp_encrypt(tx); - default: - return ieee80211_crypto_hw_encrypt(tx); } return TX_DROP; @@ -2013,7 +2011,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, headroom = local->tx_headroom; if (encrypt != ENCRYPT_NO) - headroom += sdata->encrypt_headroom; + headroom += IEEE80211_ENCRYPT_HEADROOM; headroom -= skb_headroom(skb); headroom = max_t(int, 0, headroom); @@ -2867,7 +2865,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, */ if (head_need > 0 || skb_cloned(skb)) { - head_need += sdata->encrypt_headroom; + head_need += IEEE80211_ENCRYPT_HEADROOM; head_need += local->tx_headroom; head_need = max_t(int, 0, head_need); if (ieee80211_skb_resize(sdata, skb, head_need, ENCRYPT_DATA)) { @@ -3128,15 +3126,6 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) /* we don't know how to generate IVs for this at all */ if (WARN_ON(gen_iv)) goto out; - /* pure hardware keys are OK, of course */ - if (!(build.key->flags & KEY_FLAG_CIPHER_SCHEME)) - break; - /* cipher scheme might require space allocation */ - if (iv_spc && - build.key->conf.iv_len > IEEE80211_FAST_XMIT_MAX_IV) - goto out; - if (iv_spc) - build.hdr_len += build.key->conf.iv_len; } fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1e26b5235add..9e6c4dcef280 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -6,7 +6,7 @@ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * utilities for mac80211 */ @@ -4212,74 +4212,6 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, return 0; } -bool ieee80211_cs_valid(const struct ieee80211_cipher_scheme *cs) -{ - return !(cs == NULL || cs->cipher == 0 || - cs->hdr_len < cs->pn_len + cs->pn_off || - cs->hdr_len <= cs->key_idx_off || - cs->key_idx_shift > 7 || - cs->key_idx_mask == 0); -} - -bool ieee80211_cs_list_valid(const struct ieee80211_cipher_scheme *cs, int n) -{ - int i; - - /* Ensure we have enough iftype bitmap space for all iftype values */ - WARN_ON((NUM_NL80211_IFTYPES / 8 + 1) > sizeof(cs[0].iftype)); - - for (i = 0; i < n; i++) - if (!ieee80211_cs_valid(&cs[i])) - return false; - - return true; -} - -const struct ieee80211_cipher_scheme * -ieee80211_cs_get(struct ieee80211_local *local, u32 cipher, - enum nl80211_iftype iftype) -{ - const struct ieee80211_cipher_scheme *l = local->hw.cipher_schemes; - int n = local->hw.n_cipher_schemes; - int i; - const struct ieee80211_cipher_scheme *cs = NULL; - - for (i = 0; i < n; i++) { - if (l[i].cipher == cipher) { - cs = &l[i]; - break; - } - } - - if (!cs || !(cs->iftype & BIT(iftype))) - return NULL; - - return cs; -} - -int ieee80211_cs_headroom(struct ieee80211_local *local, - struct cfg80211_crypto_settings *crypto, - enum nl80211_iftype iftype) -{ - const struct ieee80211_cipher_scheme *cs; - int headroom = IEEE80211_ENCRYPT_HEADROOM; - int i; - - for (i = 0; i < crypto->n_ciphers_pairwise; i++) { - cs = ieee80211_cs_get(local, crypto->ciphers_pairwise[i], - iftype); - - if (cs && headroom < cs->hdr_len) - headroom = cs->hdr_len; - } - - cs = ieee80211_cs_get(local, crypto->cipher_group, iftype); - if (cs && headroom < cs->hdr_len) - headroom = cs->hdr_len; - - return headroom; -} - static bool ieee80211_extend_noa_desc(struct ieee80211_noa_data *data, u32 tsf, int i) { diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 5fd8a3e8b5b4..93ec2f349748 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -3,7 +3,7 @@ * Copyright 2002-2004, Instant802 Networks, Inc. * Copyright 2008, Jouni Malinen <j@w1.fi> * Copyright (C) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation */ #include <linux/netdevice.h> @@ -778,102 +778,6 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; } -static ieee80211_tx_result -ieee80211_crypto_cs_encrypt(struct ieee80211_tx_data *tx, - struct sk_buff *skb) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - struct ieee80211_key *key = tx->key; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int hdrlen; - u8 *pos, iv_len = key->conf.iv_len; - - if (info->control.hw_key && - !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) { - /* hwaccel has no need for preallocated head room */ - return TX_CONTINUE; - } - - if (unlikely(skb_headroom(skb) < iv_len && - pskb_expand_head(skb, iv_len, 0, GFP_ATOMIC))) - return TX_DROP; - - hdrlen = ieee80211_hdrlen(hdr->frame_control); - - pos = skb_push(skb, iv_len); - memmove(pos, pos + iv_len, hdrlen); - - return TX_CONTINUE; -} - -static inline int ieee80211_crypto_cs_pn_compare(u8 *pn1, u8 *pn2, int len) -{ - int i; - - /* pn is little endian */ - for (i = len - 1; i >= 0; i--) { - if (pn1[i] < pn2[i]) - return -1; - else if (pn1[i] > pn2[i]) - return 1; - } - - return 0; -} - -static ieee80211_rx_result -ieee80211_crypto_cs_decrypt(struct ieee80211_rx_data *rx) -{ - struct ieee80211_key *key = rx->key; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; - const struct ieee80211_cipher_scheme *cs = NULL; - int hdrlen = ieee80211_hdrlen(hdr->frame_control); - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - int data_len; - u8 *rx_pn; - u8 *skb_pn; - u8 qos_tid; - - if (!rx->sta || !rx->sta->cipher_scheme || - !(status->flag & RX_FLAG_DECRYPTED)) - return RX_DROP_UNUSABLE; - - if (!ieee80211_is_data(hdr->frame_control)) - return RX_CONTINUE; - - cs = rx->sta->cipher_scheme; - - data_len = rx->skb->len - hdrlen - cs->hdr_len; - - if (data_len < 0) - return RX_DROP_UNUSABLE; - - if (ieee80211_is_data_qos(hdr->frame_control)) - qos_tid = ieee80211_get_tid(hdr); - else - qos_tid = 0; - - if (skb_linearize(rx->skb)) - return RX_DROP_UNUSABLE; - - rx_pn = key->u.gen.rx_pn[qos_tid]; - skb_pn = rx->skb->data + hdrlen + cs->pn_off; - - if (ieee80211_crypto_cs_pn_compare(skb_pn, rx_pn, cs->pn_len) <= 0) - return RX_DROP_UNUSABLE; - - memcpy(rx_pn, skb_pn, cs->pn_len); - - /* remove security header and MIC */ - if (pskb_trim(rx->skb, rx->skb->len - cs->mic_len)) - return RX_DROP_UNUSABLE; - - memmove(rx->skb->data + cs->hdr_len, rx->skb->data, hdrlen); - skb_pull(rx->skb, cs->hdr_len); - - return RX_CONTINUE; -} - static void bip_aad(struct sk_buff *skb, u8 *aad) { __le16 mask_fc; @@ -1212,38 +1116,3 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; } - -ieee80211_tx_result -ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx) -{ - struct sk_buff *skb; - struct ieee80211_tx_info *info = NULL; - ieee80211_tx_result res; - - skb_queue_walk(&tx->skbs, skb) { - info = IEEE80211_SKB_CB(skb); - - /* handle hw-only algorithm */ - if (!info->control.hw_key) - return TX_DROP; - - if (tx->key->flags & KEY_FLAG_CIPHER_SCHEME) { - res = ieee80211_crypto_cs_encrypt(tx, skb); - if (res != TX_CONTINUE) - return res; - } - } - - ieee80211_tx_set_protected(tx); - - return TX_CONTINUE; -} - -ieee80211_rx_result -ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx) -{ - if (rx->sta && rx->sta->cipher_scheme) - return ieee80211_crypto_cs_decrypt(rx); - - return RX_DROP_UNUSABLE; -} diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h index af3272284e85..a9a81abb5479 100644 --- a/net/mac80211/wpa.h +++ b/net/mac80211/wpa.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2002-2004, Instant802 Networks, Inc. + * Copyright (C) 2022 Intel Corporation */ #ifndef WPA_H @@ -39,10 +40,6 @@ ieee80211_tx_result ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx); ieee80211_rx_result ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx); -ieee80211_tx_result -ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx); -ieee80211_rx_result -ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx); ieee80211_tx_result ieee80211_crypto_gcmp_encrypt(struct ieee80211_tx_data *tx); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 17e13396024a..e0fb9f96c45c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -167,8 +167,8 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, static void __mptcp_rmem_reclaim(struct sock *sk, int amount) { - amount >>= SK_MEM_QUANTUM_SHIFT; - mptcp_sk(sk)->rmem_fwd_alloc -= amount << SK_MEM_QUANTUM_SHIFT; + amount >>= PAGE_SHIFT; + mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT; __sk_mem_reduce_allocated(sk, amount); } @@ -327,7 +327,7 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) return true; amt = sk_mem_pages(size); - amount = amt << SK_MEM_QUANTUM_SHIFT; + amount = amt << PAGE_SHIFT; msk->rmem_fwd_alloc += amount; if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) { if (ssk->sk_forward_alloc < amount) { @@ -972,10 +972,10 @@ static void __mptcp_mem_reclaim_partial(struct sock *sk) lockdep_assert_held_once(&sk->sk_lock.slock); - if (reclaimable > SK_MEM_QUANTUM) + if (reclaimable > (int)PAGE_SIZE) __mptcp_rmem_reclaim(sk, reclaimable - 1); - sk_mem_reclaim_partial(sk); + sk_mem_reclaim(sk); } static void mptcp_mem_reclaim_partial(struct sock *sk) @@ -3437,7 +3437,10 @@ static struct proto mptcp_prot = { .get_port = mptcp_get_port, .forward_alloc_get = mptcp_forward_alloc_get, .sockets_allocated = &mptcp_sockets_allocated, + .memory_allocated = &tcp_memory_allocated, + .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, + .memory_pressure = &tcp_memory_pressure, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index b498dac4e1e0..2f61d5bdce1a 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -115,7 +115,7 @@ error_master_upper_dev_unlink: error_unlock: rtnl_unlock(); error_put: - dev_put_track(vport->dev, &vport->dev_tracker); + netdev_put(vport->dev, &vport->dev_tracker); error_free_vport: ovs_vport_free(vport); return ERR_PTR(err); @@ -137,7 +137,7 @@ static void vport_netdev_free(struct rcu_head *rcu) { struct vport *vport = container_of(rcu, struct vport, rcu); - dev_put_track(vport->dev, &vport->dev_tracker); + netdev_put(vport->dev, &vport->dev_tracker); ovs_vport_free(vport); } @@ -173,7 +173,7 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev); - dev_put_track(vport->dev, &vport->dev_tracker); + netdev_put(vport->dev, &vport->dev_tracker); vport->dev = NULL; rtnl_unlock(); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ca6e92a22923..d08c4728523b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3134,7 +3134,7 @@ static int packet_release(struct socket *sock) packet_cached_dev_reset(po); if (po->prot_hook.dev) { - dev_put_track(po->prot_hook.dev, &po->prot_hook.dev_tracker); + netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); @@ -3235,15 +3235,15 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, WRITE_ONCE(po->num, proto); po->prot_hook.type = proto; - dev_put_track(po->prot_hook.dev, &po->prot_hook.dev_tracker); + netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); if (unlikely(unlisted)) { po->prot_hook.dev = NULL; WRITE_ONCE(po->ifindex, -1); packet_cached_dev_reset(po); } else { - dev_hold_track(dev, &po->prot_hook.dev_tracker, - GFP_ATOMIC); + netdev_hold(dev, &po->prot_hook.dev_tracker, + GFP_ATOMIC); po->prot_hook.dev = dev; WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0); packet_cached_dev_assign(po, dev); @@ -4167,8 +4167,8 @@ static int packet_notifier(struct notifier_block *this, if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); WRITE_ONCE(po->ifindex, -1); - dev_put_track(po->prot_hook.dev, - &po->prot_hook.dev_tracker); + netdev_put(po->prot_hook.dev, + &po->prot_hook.dev_tracker); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index ebb92fb072ab..a1d70cf86843 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -79,7 +79,7 @@ static void tcf_mirred_release(struct tc_action *a) /* last reference to action, no need to lock */ dev = rcu_dereference_protected(m->tcfm_dev, 1); - dev_put_track(dev, &m->tcfm_dev_tracker); + netdev_put(dev, &m->tcfm_dev_tracker); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { @@ -181,7 +181,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, mac_header_xmit = dev_is_mac_header_xmit(ndev); odev = rcu_replace_pointer(m->tcfm_dev, ndev, lockdep_is_held(&m->tcf_lock)); - dev_put_track(odev, &m->tcfm_dev_tracker); + netdev_put(odev, &m->tcfm_dev_tracker); netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC); m->tcfm_mac_header_xmit = mac_header_xmit; } @@ -402,7 +402,7 @@ static int mirred_device_event(struct notifier_block *unused, list_for_each_entry(m, &mirred_list, tcfm_list) { spin_lock_bh(&m->tcf_lock); if (tcf_mirred_dev_dereference(m) == dev) { - dev_put_track(dev, &m->tcfm_dev_tracker); + netdev_put(dev, &m->tcfm_dev_tracker); /* Note : no rcu grace period necessary, as * net_device are already rcu protected. */ diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e3c0e8ea2dbb..bf87b50837a8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1292,7 +1292,7 @@ err_out5: if (ops->destroy) ops->destroy(sch); err_out3: - dev_put_track(dev, &sch->dev_tracker); + netdev_put(dev, &sch->dev_tracker); qdisc_free(sch); err_out2: module_put(ops->owner); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index dba0b3e24af5..cc6eabee2830 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -541,7 +541,7 @@ static void dev_watchdog(struct timer_list *t) spin_unlock(&dev->tx_global_lock); if (release) - dev_put_track(dev, &dev->watchdog_dev_tracker); + netdev_put(dev, &dev->watchdog_dev_tracker); } void __netdev_watchdog_up(struct net_device *dev) @@ -551,7 +551,8 @@ void __netdev_watchdog_up(struct net_device *dev) dev->watchdog_timeo = 5*HZ; if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) - dev_hold_track(dev, &dev->watchdog_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dev->watchdog_dev_tracker, + GFP_ATOMIC); } } EXPORT_SYMBOL_GPL(__netdev_watchdog_up); @@ -565,7 +566,7 @@ static void dev_watchdog_down(struct net_device *dev) { netif_tx_lock_bh(dev); if (del_timer(&dev->watchdog_timer)) - dev_put_track(dev, &dev->watchdog_dev_tracker); + netdev_put(dev, &dev->watchdog_dev_tracker); netif_tx_unlock_bh(dev); } @@ -975,7 +976,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; - dev_hold_track(dev, &sch->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL); refcount_set(&sch->refcnt, 1); return sch; @@ -1067,7 +1068,7 @@ static void qdisc_destroy(struct Qdisc *qdisc) ops->destroy(qdisc); module_put(ops->owner); - dev_put_track(qdisc_dev(qdisc), &qdisc->dev_tracker); + netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker); trace_qdisc_destroy(qdisc); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 35928fefae33..fa500ea3a1f1 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1523,11 +1523,11 @@ static __init int sctp_init(void) limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); - sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */ + sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */ sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1); sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); - sysctl_sctp_wmem[0] = SK_MEM_QUANTUM; + sysctl_sctp_wmem[0] = PAGE_SIZE; sysctl_sctp_wmem[1] = 16*1024; sysctl_sctp_wmem[2] = max(64*1024, max_share); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 52edee1322fc..f6ee7f4040c1 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6590,8 +6590,6 @@ static int sctp_eat_data(const struct sctp_association *asoc, pr_debug("%s: under pressure, reneging for tsn:%u\n", __func__, tsn); deliver = SCTP_CMD_RENEGE; - } else { - sk_mem_reclaim(sk); } } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6d37d2dfb3da..171f1a35d205 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -93,6 +93,7 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, static unsigned long sctp_memory_pressure; static atomic_long_t sctp_memory_allocated; +static DEFINE_PER_CPU(int, sctp_memory_per_cpu_fw_alloc); struct percpu_counter sctp_sockets_allocated; static void sctp_enter_memory_pressure(struct sock *sk) @@ -1823,9 +1824,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, if (sctp_wspace(asoc) < (int)msg_len) sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); - if (sk_under_memory_pressure(sk)) - sk_mem_reclaim(sk); - if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) { timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); @@ -9194,8 +9192,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, goto do_error; if (signal_pending(current)) goto do_interrupted; - if (sk_under_memory_pressure(sk)) - sk_mem_reclaim(sk); if ((int)msg_len <= sctp_wspace(asoc) && sk_wmem_schedule(sk, msg_len)) break; @@ -9657,7 +9653,10 @@ struct proto sctp_prot = { .sysctl_wmem = sysctl_sctp_wmem, .memory_pressure = &sctp_memory_pressure, .enter_memory_pressure = sctp_enter_memory_pressure, + .memory_allocated = &sctp_memory_allocated, + .per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc, + .sockets_allocated = &sctp_sockets_allocated, }; @@ -9700,7 +9699,10 @@ struct proto sctpv6_prot = { .sysctl_wmem = sysctl_sctp_wmem, .memory_pressure = &sctp_memory_pressure, .enter_memory_pressure = sctp_enter_memory_pressure, + .memory_allocated = &sctp_memory_allocated, + .per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc, + .sockets_allocated = &sctp_sockets_allocated, }; #endif /* IS_ENABLED(CONFIG_IPV6) */ diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index 6b13f737ebf2..bb22b71df7a3 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -979,8 +979,6 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, if (freed >= needed && sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0) sctp_intl_start_pd(ulpq, gfp); - - sk_mem_reclaim(asoc->base.sk); } static void sctp_intl_stream_abort_pd(struct sctp_ulpq *ulpq, __u16 sid, diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 407fed46931b..0a8510a0c5e6 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -1100,12 +1100,8 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, else if (retval == 1) sctp_ulpq_reasm_drain(ulpq); } - - sk_mem_reclaim(asoc->base.sk); } - - /* Notify the application if an association is aborted and in * partial delivery mode. Send up any pending received messages. */ diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 7055ed10e316..4c3bf6db7038 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -120,7 +120,8 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) smc_pnet_match(pnetelem->pnet_name, pnet_name)) { list_del(&pnetelem->list); if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) { - dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker); + netdev_put(pnetelem->ndev, + &pnetelem->dev_tracker); pr_warn_ratelimited("smc: net device %s " "erased user defined " "pnetid %.16s\n", @@ -196,7 +197,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev) list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { - dev_hold_track(ndev, &pnetelem->dev_tracker, GFP_ATOMIC); + netdev_hold(ndev, &pnetelem->dev_tracker, GFP_ATOMIC); pnetelem->ndev = ndev; rc = 0; pr_warn_ratelimited("smc: adding net device %s with " @@ -227,7 +228,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { - dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker); + netdev_put(pnetelem->ndev, &pnetelem->dev_tracker); pnetelem->ndev = NULL; rc = 0; pr_warn_ratelimited("smc: removing net device %s with " diff --git a/net/socket.c b/net/socket.c index 96300cdc0625..3d7eb2a79e82 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1878,10 +1878,8 @@ out_fd: return ERR_PTR(err); } -int __sys_accept4_file(struct file *file, unsigned file_flags, - struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags, - unsigned long nofile) +static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags) { struct file *newfile; int newfd; @@ -1892,11 +1890,11 @@ int __sys_accept4_file(struct file *file, unsigned file_flags, if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; - newfd = __get_unused_fd_flags(flags, nofile); + newfd = get_unused_fd_flags(flags); if (unlikely(newfd < 0)) return newfd; - newfile = do_accept(file, file_flags, upeer_sockaddr, upeer_addrlen, + newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen, flags); if (IS_ERR(newfile)) { put_unused_fd(newfd); @@ -1926,9 +1924,8 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, f = fdget(fd); if (f.file) { - ret = __sys_accept4_file(f.file, 0, upeer_sockaddr, - upeer_addrlen, flags, - rlimit(RLIMIT_NOFILE)); + ret = __sys_accept4_file(f.file, upeer_sockaddr, + upeer_addrlen, flags); fdput(f); } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 474f76383033..8cc42aea19c7 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -64,7 +64,7 @@ void switchdev_deferred_process(void) while ((dfitem = switchdev_deferred_dequeue())) { dfitem->func(dfitem->dev, dfitem->data); - dev_put_track(dfitem->dev, &dfitem->dev_tracker); + netdev_put(dfitem->dev, &dfitem->dev_tracker); kfree(dfitem); } } @@ -91,7 +91,7 @@ static int switchdev_deferred_enqueue(struct net_device *dev, dfitem->dev = dev; dfitem->func = func; memcpy(dfitem->data, data, data_len); - dev_hold_track(dev, &dfitem->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dfitem->dev_tracker, GFP_ATOMIC); spin_lock_bh(&deferred_lock); list_add_tail(&dfitem->list, &deferred); spin_unlock_bh(&deferred_lock); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 932c87b98eca..35cac7733fd3 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -788,7 +788,7 @@ int tipc_attach_loopback(struct net *net) if (!dev) return -ENODEV; - dev_hold_track(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL); + netdev_hold(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL); tn->loopback_pt.dev = dev; tn->loopback_pt.type = htons(ETH_P_TIPC); tn->loopback_pt.func = tipc_loopback_rcv_pkt; @@ -801,7 +801,7 @@ void tipc_detach_loopback(struct net *net) struct tipc_net *tn = tipc_net(net); dev_remove_pack(&tn->loopback_pt); - dev_put_track(net->loopback_dev, &tn->loopback_pt.dev_tracker); + netdev_put(net->loopback_dev, &tn->loopback_pt.dev_tracker); } /* Caller should hold rtnl_lock to protect the bearer */ diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 1d8ba233d047..d1180370fdf4 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -1202,14 +1202,3 @@ void tipc_dest_list_purge(struct list_head *l) kfree(dst); } } - -int tipc_dest_list_len(struct list_head *l) -{ - struct tipc_dest *dst; - int i = 0; - - list_for_each_entry(dst, l, list) { - i++; - } - return i; -} diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 259f95e3d99c..3bcd9ef8cee3 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -151,6 +151,5 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 port); bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port); bool tipc_dest_del(struct list_head *l, u32 node, u32 port); void tipc_dest_list_purge(struct list_head *l); -int tipc_dest_list_len(struct list_head *l); #endif diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2206e6f8902d..49f6626330c3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -118,15 +118,11 @@ #include "scm.h" -spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; -EXPORT_SYMBOL_GPL(unix_table_locks); -struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; -EXPORT_SYMBOL_GPL(unix_socket_table); static atomic_long_t unix_nr_socks; /* SMP locking strategy: - * hash table is protected with spinlock unix_table_locks - * each socket state is protected by separate spin lock. + * hash table is protected with spinlock. + * each socket state is protected by separate spinlock. */ static unsigned int unix_unbound_hash(struct sock *sk) @@ -137,12 +133,12 @@ static unsigned int unix_unbound_hash(struct sock *sk) hash ^= hash >> 8; hash ^= sk->sk_type; - return UNIX_HASH_SIZE + (hash & (UNIX_HASH_SIZE - 1)); + return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); } static unsigned int unix_bsd_hash(struct inode *i) { - return i->i_ino & (UNIX_HASH_SIZE - 1); + return i->i_ino & UNIX_HASH_MOD; } static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, @@ -155,26 +151,28 @@ static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, hash ^= hash >> 8; hash ^= type; - return hash & (UNIX_HASH_SIZE - 1); + return hash & UNIX_HASH_MOD; } -static void unix_table_double_lock(unsigned int hash1, unsigned int hash2) +static void unix_table_double_lock(struct net *net, + unsigned int hash1, unsigned int hash2) { /* hash1 and hash2 is never the same because - * one is between 0 and UNIX_HASH_SIZE - 1, and - * another is between UNIX_HASH_SIZE and UNIX_HASH_SIZE * 2. + * one is between 0 and UNIX_HASH_MOD, and + * another is between UNIX_HASH_MOD + 1 and UNIX_HASH_SIZE - 1. */ if (hash1 > hash2) swap(hash1, hash2); - spin_lock(&unix_table_locks[hash1]); - spin_lock_nested(&unix_table_locks[hash2], SINGLE_DEPTH_NESTING); + spin_lock(&net->unx.table.locks[hash1]); + spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); } -static void unix_table_double_unlock(unsigned int hash1, unsigned int hash2) +static void unix_table_double_unlock(struct net *net, + unsigned int hash1, unsigned int hash2) { - spin_unlock(&unix_table_locks[hash1]); - spin_unlock(&unix_table_locks[hash2]); + spin_unlock(&net->unx.table.locks[hash1]); + spin_unlock(&net->unx.table.locks[hash2]); } #ifdef CONFIG_SECURITY_NETWORK @@ -300,34 +298,34 @@ static void __unix_remove_socket(struct sock *sk) sk_del_node_init(sk); } -static void __unix_insert_socket(struct sock *sk) +static void __unix_insert_socket(struct net *net, struct sock *sk) { - WARN_ON(!sk_unhashed(sk)); - sk_add_node(sk, &unix_socket_table[sk->sk_hash]); + DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); + sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]); } -static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr, - unsigned int hash) +static void __unix_set_addr_hash(struct net *net, struct sock *sk, + struct unix_address *addr, unsigned int hash) { __unix_remove_socket(sk); smp_store_release(&unix_sk(sk)->addr, addr); sk->sk_hash = hash; - __unix_insert_socket(sk); + __unix_insert_socket(net, sk); } -static void unix_remove_socket(struct sock *sk) +static void unix_remove_socket(struct net *net, struct sock *sk) { - spin_lock(&unix_table_locks[sk->sk_hash]); + spin_lock(&net->unx.table.locks[sk->sk_hash]); __unix_remove_socket(sk); - spin_unlock(&unix_table_locks[sk->sk_hash]); + spin_unlock(&net->unx.table.locks[sk->sk_hash]); } -static void unix_insert_unbound_socket(struct sock *sk) +static void unix_insert_unbound_socket(struct net *net, struct sock *sk) { - spin_lock(&unix_table_locks[sk->sk_hash]); - __unix_insert_socket(sk); - spin_unlock(&unix_table_locks[sk->sk_hash]); + spin_lock(&net->unx.table.locks[sk->sk_hash]); + __unix_insert_socket(net, sk); + spin_unlock(&net->unx.table.locks[sk->sk_hash]); } static struct sock *__unix_find_socket_byname(struct net *net, @@ -336,12 +334,9 @@ static struct sock *__unix_find_socket_byname(struct net *net, { struct sock *s; - sk_for_each(s, &unix_socket_table[hash]) { + sk_for_each(s, &net->unx.table.buckets[hash]) { struct unix_sock *u = unix_sk(s); - if (!net_eq(sock_net(s), net)) - continue; - if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) return s; @@ -355,30 +350,30 @@ static inline struct sock *unix_find_socket_byname(struct net *net, { struct sock *s; - spin_lock(&unix_table_locks[hash]); + spin_lock(&net->unx.table.locks[hash]); s = __unix_find_socket_byname(net, sunname, len, hash); if (s) sock_hold(s); - spin_unlock(&unix_table_locks[hash]); + spin_unlock(&net->unx.table.locks[hash]); return s; } -static struct sock *unix_find_socket_byinode(struct inode *i) +static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) { unsigned int hash = unix_bsd_hash(i); struct sock *s; - spin_lock(&unix_table_locks[hash]); - sk_for_each(s, &unix_socket_table[hash]) { + spin_lock(&net->unx.table.locks[hash]); + sk_for_each(s, &net->unx.table.buckets[hash]) { struct dentry *dentry = unix_sk(s)->path.dentry; if (dentry && d_backing_inode(dentry) == i) { sock_hold(s); - spin_unlock(&unix_table_locks[hash]); + spin_unlock(&net->unx.table.locks[hash]); return s; } } - spin_unlock(&unix_table_locks[hash]); + spin_unlock(&net->unx.table.locks[hash]); return NULL; } @@ -554,9 +549,9 @@ static void unix_sock_destructor(struct sock *sk) u->oob_skb = NULL; } #endif - WARN_ON(refcount_read(&sk->sk_wmem_alloc)); - WARN_ON(!sk_unhashed(sk)); - WARN_ON(sk->sk_socket); + DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); + DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); + DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { pr_info("Attempt to release alive unix socket: %p\n", sk); return; @@ -576,12 +571,12 @@ static void unix_sock_destructor(struct sock *sk) static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); - struct path path; struct sock *skpair; struct sk_buff *skb; + struct path path; int state; - unix_remove_socket(sk); + unix_remove_socket(sock_net(sk), sk); /* Clear state */ unix_state_lock(sk); @@ -930,9 +925,9 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, init_waitqueue_head(&u->peer_wait); init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); memset(&u->scm_stat, 0, sizeof(struct scm_stat)); - unix_insert_unbound_socket(sk); + unix_insert_unbound_socket(net, sk); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + sock_prot_inuse_add(net, sk->sk_prot, 1); return sk; @@ -1015,7 +1010,7 @@ static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, if (!S_ISSOCK(inode->i_mode)) goto path_put; - sk = unix_find_socket_byinode(inode); + sk = unix_find_socket_byinode(net, inode); if (!sk) goto path_put; @@ -1074,6 +1069,7 @@ static int unix_autobind(struct sock *sk) { unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); + struct net *net = sock_net(sk); struct unix_address *addr; u32 lastnum, ordernum; int err; @@ -1102,11 +1098,10 @@ retry: sprintf(addr->name->sun_path + 1, "%05x", ordernum); new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); - unix_table_double_lock(old_hash, new_hash); + unix_table_double_lock(net, old_hash, new_hash); - if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, - new_hash)) { - unix_table_double_unlock(old_hash, new_hash); + if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { + unix_table_double_unlock(net, old_hash, new_hash); /* __unix_find_socket_byname() may take long time if many names * are already in use. @@ -1123,8 +1118,8 @@ retry: goto retry; } - __unix_set_addr_hash(sk, addr, new_hash); - unix_table_double_unlock(old_hash, new_hash); + __unix_set_addr_hash(net, sk, addr, new_hash); + unix_table_double_unlock(net, old_hash, new_hash); err = 0; out: mutex_unlock(&u->bindlock); @@ -1138,6 +1133,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); + struct net *net = sock_net(sk); struct user_namespace *ns; // barf... struct unix_address *addr; struct dentry *dentry; @@ -1178,11 +1174,11 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, goto out_unlock; new_hash = unix_bsd_hash(d_backing_inode(dentry)); - unix_table_double_lock(old_hash, new_hash); + unix_table_double_lock(net, old_hash, new_hash); u->path.mnt = mntget(parent.mnt); u->path.dentry = dget(dentry); - __unix_set_addr_hash(sk, addr, new_hash); - unix_table_double_unlock(old_hash, new_hash); + __unix_set_addr_hash(net, sk, addr, new_hash); + unix_table_double_unlock(net, old_hash, new_hash); mutex_unlock(&u->bindlock); done_path_create(&parent, dentry); return 0; @@ -1205,6 +1201,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, { unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); + struct net *net = sock_net(sk); struct unix_address *addr; int err; @@ -1222,19 +1219,18 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, } new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); - unix_table_double_lock(old_hash, new_hash); + unix_table_double_lock(net, old_hash, new_hash); - if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, - new_hash)) + if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) goto out_spin; - __unix_set_addr_hash(sk, addr, new_hash); - unix_table_double_unlock(old_hash, new_hash); + __unix_set_addr_hash(net, sk, addr, new_hash); + unix_table_double_unlock(net, old_hash, new_hash); mutex_unlock(&u->bindlock); return 0; out_spin: - unix_table_double_unlock(old_hash, new_hash); + unix_table_double_unlock(net, old_hash, new_hash); err = -EADDRINUSE; out_mutex: mutex_unlock(&u->bindlock); @@ -1293,9 +1289,8 @@ static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; + struct sock *sk = sock->sk; struct sock *other; int err; @@ -1316,7 +1311,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, } restart: - other = unix_find_other(net, sunaddr, alen, sock->type); + other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); if (IS_ERR(other)) { err = PTR_ERR(other); goto out; @@ -1404,15 +1399,13 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); + struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; struct unix_sock *u = unix_sk(sk), *newu, *otheru; - struct sock *newsk = NULL; - struct sock *other = NULL; + struct net *net = sock_net(sk); struct sk_buff *skb = NULL; - int st; - int err; long timeo; + int err; + int st; err = unix_validate_addr(sunaddr, addr_len); if (err) @@ -1432,7 +1425,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, */ /* create new sock for complete connection */ - newsk = unix_create1(sock_net(sk), NULL, 0, sock->type); + newsk = unix_create1(net, NULL, 0, sock->type); if (IS_ERR(newsk)) { err = PTR_ERR(newsk); newsk = NULL; @@ -1541,9 +1534,9 @@ restart: * * The contents of *(otheru->addr) and otheru->path * are seen fully set up here, since we have found - * otheru in hash under unix_table_locks. Insertion - * into the hash chain we'd found it in had been done - * in an earlier critical area protected by unix_table_locks, + * otheru in hash under its lock. Insertion into the + * hash chain we'd found it in had been done in an + * earlier critical area protected by the chain's lock, * the same one where we'd set *(otheru->addr) contents, * as well as otheru->path and otheru->addr itself. * @@ -1840,17 +1833,15 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb) static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); - struct unix_sock *u = unix_sk(sk); DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); - struct sock *other = NULL; - int err; - struct sk_buff *skb; - long timeo; + struct sock *sk = sock->sk, *other = NULL; + struct unix_sock *u = unix_sk(sk); struct scm_cookie scm; + struct sk_buff *skb; int data_len = 0; int sk_locked; + long timeo; + int err; wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); @@ -1917,7 +1908,7 @@ restart: if (sunaddr == NULL) goto out_free; - other = unix_find_other(net, sunaddr, msg->msg_namelen, + other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, sk->sk_type); if (IS_ERR(other)) { err = PTR_ERR(other); @@ -3226,12 +3217,11 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) { unsigned long offset = get_offset(*pos); unsigned long bucket = get_bucket(*pos); - struct sock *sk; unsigned long count = 0; + struct sock *sk; - for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { - if (sock_net(sk) != seq_file_net(seq)) - continue; + for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); + sk; sk = sk_next(sk)) { if (++count == offset) break; } @@ -3242,16 +3232,17 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) { unsigned long bucket = get_bucket(*pos); + struct net *net = seq_file_net(seq); struct sock *sk; - while (bucket < ARRAY_SIZE(unix_socket_table)) { - spin_lock(&unix_table_locks[bucket]); + while (bucket < UNIX_HASH_SIZE) { + spin_lock(&net->unx.table.locks[bucket]); sk = unix_from_bucket(seq, pos); if (sk) return sk; - spin_unlock(&unix_table_locks[bucket]); + spin_unlock(&net->unx.table.locks[bucket]); *pos = set_bucket_offset(++bucket, 1); } @@ -3264,11 +3255,12 @@ static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, { unsigned long bucket = get_bucket(*pos); - for (sk = sk_next(sk); sk; sk = sk_next(sk)) - if (sock_net(sk) == seq_file_net(seq)) - return sk; + sk = sk_next(sk); + if (sk) + return sk; - spin_unlock(&unix_table_locks[bucket]); + + spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); *pos = set_bucket_offset(++bucket, 1); @@ -3298,7 +3290,7 @@ static void unix_seq_stop(struct seq_file *seq, void *v) struct sock *sk = v; if (sk) - spin_unlock(&unix_table_locks[sk->sk_hash]); + spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); } static int unix_seq_show(struct seq_file *seq, void *v) @@ -3323,7 +3315,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), sock_i_ino(s)); - if (u->addr) { // under unix_table_locks here + if (u->addr) { // under a hash table lock here int i, len; seq_putc(seq, ' '); @@ -3393,9 +3385,6 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) iter->batch[iter->end_sk++] = start_sk; for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { - if (sock_net(sk) != seq_file_net(seq)) - continue; - if (iter->end_sk < iter->max_sk) { sock_hold(sk); iter->batch[iter->end_sk++] = sk; @@ -3404,7 +3393,7 @@ static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) expected++; } - spin_unlock(&unix_table_locks[start_sk->sk_hash]); + spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); return expected; } @@ -3564,7 +3553,7 @@ static const struct net_proto_family unix_family_ops = { static int __net_init unix_net_init(struct net *net) { - int error = -ENOMEM; + int i; net->unx.sysctl_max_dgram_qlen = 10; if (unix_sysctl_register(net)) @@ -3572,18 +3561,44 @@ static int __net_init unix_net_init(struct net *net) #ifdef CONFIG_PROC_FS if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, - sizeof(struct seq_net_private))) { - unix_sysctl_unregister(net); - goto out; + sizeof(struct seq_net_private))) + goto err_sysctl; +#endif + + net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, + sizeof(spinlock_t), GFP_KERNEL); + if (!net->unx.table.locks) + goto err_proc; + + net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!net->unx.table.buckets) + goto free_locks; + + for (i = 0; i < UNIX_HASH_SIZE; i++) { + spin_lock_init(&net->unx.table.locks[i]); + INIT_HLIST_HEAD(&net->unx.table.buckets[i]); } + + return 0; + +free_locks: + kvfree(net->unx.table.locks); +err_proc: +#ifdef CONFIG_PROC_FS + remove_proc_entry("unix", net->proc_net); +err_sysctl: #endif - error = 0; + unix_sysctl_unregister(net); out: - return error; + return -ENOMEM; } static void __net_exit unix_net_exit(struct net *net) { + kvfree(net->unx.table.buckets); + kvfree(net->unx.table.locks); unix_sysctl_unregister(net); remove_proc_entry("unix", net->proc_net); } @@ -3667,13 +3682,10 @@ static void __init bpf_iter_register(void) static int __init af_unix_init(void) { - int i, rc = -1; + int rc = -1; BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); - for (i = 0; i < 2 * UNIX_HASH_SIZE; i++) - spin_lock_init(&unix_table_locks[i]); - rc = proto_register(&unix_dgram_proto, 1); if (rc != 0) { pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); diff --git a/net/unix/diag.c b/net/unix/diag.c index bb0b5ea1655f..105f522a89fe 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -13,7 +13,7 @@ static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) { - /* might or might not have unix_table_locks */ + /* might or might not have a hash table lock */ struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); if (!addr) @@ -195,25 +195,21 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct unix_diag_req *req; - int num, s_num, slot, s_slot; struct net *net = sock_net(skb->sk); + int num, s_num, slot, s_slot; + struct unix_diag_req *req; req = nlmsg_data(cb->nlh); s_slot = cb->args[0]; num = s_num = cb->args[1]; - for (slot = s_slot; - slot < ARRAY_SIZE(unix_socket_table); - s_num = 0, slot++) { + for (slot = s_slot; slot < UNIX_HASH_SIZE; s_num = 0, slot++) { struct sock *sk; num = 0; - spin_lock(&unix_table_locks[slot]); - sk_for_each(sk, &unix_socket_table[slot]) { - if (!net_eq(sock_net(sk), net)) - continue; + spin_lock(&net->unx.table.locks[slot]); + sk_for_each(sk, &net->unx.table.buckets[slot]) { if (num < s_num) goto next; if (!(req->udiag_states & (1 << sk->sk_state))) @@ -222,13 +218,13 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) { - spin_unlock(&unix_table_locks[slot]); + spin_unlock(&net->unx.table.locks[slot]); goto done; } next: num++; } - spin_unlock(&unix_table_locks[slot]); + spin_unlock(&net->unx.table.locks[slot]); } done: cb->args[0] = slot; @@ -237,20 +233,21 @@ done: return skb->len; } -static struct sock *unix_lookup_by_ino(unsigned int ino) +static struct sock *unix_lookup_by_ino(struct net *net, unsigned int ino) { struct sock *sk; int i; - for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { - spin_lock(&unix_table_locks[i]); - sk_for_each(sk, &unix_socket_table[i]) + for (i = 0; i < UNIX_HASH_SIZE; i++) { + spin_lock(&net->unx.table.locks[i]); + sk_for_each(sk, &net->unx.table.buckets[i]) { if (ino == sock_i_ino(sk)) { sock_hold(sk); - spin_unlock(&unix_table_locks[i]); + spin_unlock(&net->unx.table.locks[i]); return sk; } - spin_unlock(&unix_table_locks[i]); + } + spin_unlock(&net->unx.table.locks[i]); } return NULL; } @@ -259,21 +256,20 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh, struct unix_diag_req *req) { - int err = -EINVAL; - struct sock *sk; - struct sk_buff *rep; - unsigned int extra_len; struct net *net = sock_net(in_skb->sk); + unsigned int extra_len; + struct sk_buff *rep; + struct sock *sk; + int err; + err = -EINVAL; if (req->udiag_ino == 0) goto out_nosk; - sk = unix_lookup_by_ino(req->udiag_ino); + sk = unix_lookup_by_ino(net, req->udiag_ino); err = -ENOENT; if (sk == NULL) goto out_nosk; - if (!net_eq(sock_net(sk), net)) - goto out; err = sock_diag_check_cookie(sk, req->udiag_cookie); if (err) @@ -308,7 +304,6 @@ out_nosk: static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct unix_diag_req); - struct net *net = sock_net(skb->sk); if (nlmsg_len(h) < hdrlen) return -EINVAL; @@ -317,7 +312,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) struct netlink_dump_control c = { .dump = unix_diag_dump, }; - return netlink_dump_start(net->diag_nlsk, skb, h, &c); + return netlink_dump_start(sock_net(skb->sk)->diag_nlsk, skb, h, &c); } else return unix_diag_get_exact(skb, h, nlmsg_data(h)); } diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index f01ef6bda390..869b9b9b9fad 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -57,7 +57,7 @@ static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, static void xdp_umem_release(struct xdp_umem *umem) { umem->zc = false; - ida_simple_remove(&umem_ida, umem->id); + ida_free(&umem_ida, umem->id); xdp_umem_addr_unmap(umem); xdp_umem_unpin_pages(umem); @@ -242,7 +242,7 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) if (!umem) return ERR_PTR(-ENOMEM); - err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL); + err = ida_alloc(&umem_ida, GFP_KERNEL); if (err < 0) { kfree(umem); return ERR_PTR(err); @@ -251,7 +251,7 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) err = xdp_umem_reg(umem, mr); if (err) { - ida_simple_remove(&umem_ida, umem->id); + ida_free(&umem_ida, umem->id); kfree(umem); return ERR_PTR(err); } diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 35c7e89b2e7d..637ca8838436 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -275,7 +275,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->dev = NULL; xso->dir = 0; xso->real_dev = NULL; - dev_put_track(dev, &xso->dev_tracker); + netdev_put(dev, &xso->dev_tracker); if (err != -EOPNOTSUPP) return err; diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c index 1828487bae9a..84f57f1209ce 100644 --- a/samples/bpf/xdp_fwd_user.c +++ b/samples/bpf/xdp_fwd_user.c @@ -47,17 +47,60 @@ static int do_attach(int idx, int prog_fd, int map_fd, const char *name) return err; } -static int do_detach(int idx, const char *name) +static int do_detach(int ifindex, const char *ifname, const char *app_name) { - int err; + LIBBPF_OPTS(bpf_xdp_attach_opts, opts); + struct bpf_prog_info prog_info = {}; + char prog_name[BPF_OBJ_NAME_LEN]; + __u32 info_len, curr_prog_id; + int prog_fd; + int err = 1; + + if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { + printf("ERROR: bpf_xdp_query_id failed (%s)\n", + strerror(errno)); + return err; + } - err = bpf_xdp_detach(idx, xdp_flags, NULL); - if (err < 0) - printf("ERROR: failed to detach program from %s\n", name); + if (!curr_prog_id) { + printf("ERROR: flags(0x%x) xdp prog is not attached to %s\n", + xdp_flags, ifname); + return err; + } + info_len = sizeof(prog_info); + prog_fd = bpf_prog_get_fd_by_id(curr_prog_id); + if (prog_fd < 0) { + printf("ERROR: bpf_prog_get_fd_by_id failed (%s)\n", + strerror(errno)); + return prog_fd; + } + + err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + if (err) { + printf("ERROR: bpf_obj_get_info_by_fd failed (%s)\n", + strerror(errno)); + goto close_out; + } + snprintf(prog_name, sizeof(prog_name), "%s_prog", app_name); + prog_name[BPF_OBJ_NAME_LEN - 1] = '\0'; + + if (strcmp(prog_info.name, prog_name)) { + printf("ERROR: %s isn't attached to %s\n", app_name, ifname); + err = 1; + goto close_out; + } + + opts.old_prog_fd = prog_fd; + err = bpf_xdp_detach(ifindex, xdp_flags, &opts); + if (err < 0) + printf("ERROR: failed to detach program from %s (%s)\n", + ifname, strerror(errno)); /* TODO: Remember to cleanup map, when adding use of shared map * bpf_map_delete_elem((map_fd, &idx); */ +close_out: + close(prog_fd); return err; } @@ -169,7 +212,7 @@ int main(int argc, char **argv) return 1; } if (!attach) { - err = do_detach(idx, argv[i]); + err = do_detach(idx, argv[i], prog_name); if (err) ret = err; } else { diff --git a/samples/bpf/xdp_router_ipv4.bpf.c b/samples/bpf/xdp_router_ipv4.bpf.c index 248119ca7938..0643330d1d2e 100644 --- a/samples/bpf/xdp_router_ipv4.bpf.c +++ b/samples/bpf/xdp_router_ipv4.bpf.c @@ -150,6 +150,15 @@ int xdp_router_ipv4_prog(struct xdp_md *ctx) dest_mac = bpf_map_lookup_elem(&arp_table, &prefix_value->gw); + if (!dest_mac) { + /* Forward the packet to the kernel in + * order to trigger ARP discovery for + * the default gw. + */ + if (rec) + NO_TEAR_INC(rec->xdp_pass); + return XDP_PASS; + } } } diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 855b937e7585..a0ec321469bd 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -635,6 +635,8 @@ class PrinterHelpers(Printer): 'struct bpf_timer', 'struct mptcp_sock', 'struct bpf_dynptr', + 'struct iphdr', + 'struct ipv6hdr', ] known_types = { '...', @@ -686,6 +688,8 @@ class PrinterHelpers(Printer): 'struct bpf_timer', 'struct mptcp_sock', 'struct bpf_dynptr', + 'struct iphdr', + 'struct ipv6hdr', } mapped_types = { 'u8': '__u8', diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index a17e9aa314fd..bd015ec9847b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -31,11 +31,17 @@ CGROUP COMMANDS | **bpftool** **cgroup help** | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } -| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | -| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | -| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | -| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | -| **sock_release** } +| *ATTACH_TYPE* := { **cgroup_inet_ingress** | **cgroup_inet_egress** | +| **cgroup_inet_sock_create** | **cgroup_sock_ops** | +| **cgroup_device** | **cgroup_inet4_bind** | **cgroup_inet6_bind** | +| **cgroup_inet4_post_bind** | **cgroup_inet6_post_bind** | +| **cgroup_inet4_connect** | **cgroup_inet6_connect** | +| **cgroup_inet4_getpeername** | **cgroup_inet6_getpeername** | +| **cgroup_inet4_getsockname** | **cgroup_inet6_getsockname** | +| **cgroup_udp4_sendmsg** | **cgroup_udp6_sendmsg** | +| **cgroup_udp4_recvmsg** | **cgroup_udp6_recvmsg** | +| **cgroup_sysctl** | **cgroup_getsockopt** | **cgroup_setsockopt** | +| **cgroup_inet_sock_release** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index a2e9359e554c..eb1b2a254eb1 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -53,8 +53,9 @@ PROG COMMANDS | **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** | | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** | } -| *ATTACH_TYPE* := { -| **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** +| *ATTACH_TYPE* := { +| **sk_msg_verdict** | **sk_skb_verdict** | **sk_skb_stream_verdict** | +| **sk_skb_stream_parser** | **flow_dissector** | } | *METRICs* := { | **cycles** | **instructions** | **l1d_loads** | **llc_misses** | diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index c6d2c77d0252..c19e0e4c41bd 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -53,7 +53,7 @@ $(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_ $(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ DESTDIR=$(LIBBPF_BOOTSTRAP_DESTDIR:/=) prefix= \ - ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers + ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) $@ install_headers $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS): $(LIBBPF_BOOTSTRAP_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_BOOTSTRAP_HDRS_DIR) $(call QUIET_INSTALL, $@) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 5df8d72c5179..91f89a9a5b36 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -407,8 +407,8 @@ _bpftool() return 0 ;; 5) - local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \ - skb_verdict stream_verdict stream_parser \ + local BPFTOOL_PROG_ATTACH_TYPES='sk_msg_verdict \ + sk_skb_verdict sk_skb_stream_verdict sk_skb_stream_parser \ flow_dissector' COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) ) return 0 @@ -1039,12 +1039,14 @@ _bpftool() return 0 ;; attach|detach) - local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \ - sock_create sock_ops device \ - bind4 bind6 post_bind4 post_bind6 connect4 connect6 \ - getpeername4 getpeername6 getsockname4 getsockname6 \ - sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \ - setsockopt sock_release' + local BPFTOOL_CGROUP_ATTACH_TYPES='cgroup_inet_ingress cgroup_inet_egress \ + cgroup_inet_sock_create cgroup_sock_ops cgroup_device cgroup_inet4_bind \ + cgroup_inet6_bind cgroup_inet4_post_bind cgroup_inet6_post_bind \ + cgroup_inet4_connect cgroup_inet6_connect cgroup_inet4_getpeername \ + cgroup_inet6_getpeername cgroup_inet4_getsockname cgroup_inet6_getsockname \ + cgroup_udp4_sendmsg cgroup_udp6_sendmsg cgroup_udp4_recvmsg \ + cgroup_udp6_recvmsg cgroup_sysctl cgroup_getsockopt cgroup_setsockopt \ + cgroup_inet_sock_release' local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag name' # Check for $prev = $command first diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 7e6accb9d9f7..0744bd1150be 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -40,6 +40,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", [BTF_KIND_TYPE_TAG] = "TYPE_TAG", + [BTF_KIND_ENUM64] = "ENUM64", }; struct btf_attach_point { @@ -212,26 +213,76 @@ static int dump_btf_type(const struct btf *btf, __u32 id, case BTF_KIND_ENUM: { const struct btf_enum *v = (const void *)(t + 1); __u16 vlen = BTF_INFO_VLEN(t->info); + const char *encoding; int i; + encoding = btf_kflag(t) ? "SIGNED" : "UNSIGNED"; if (json_output) { + jsonw_string_field(w, "encoding", encoding); jsonw_uint_field(w, "size", t->size); jsonw_uint_field(w, "vlen", vlen); jsonw_name(w, "values"); jsonw_start_array(w); } else { - printf(" size=%u vlen=%u", t->size, vlen); + printf(" encoding=%s size=%u vlen=%u", encoding, t->size, vlen); + } + for (i = 0; i < vlen; i++, v++) { + const char *name = btf_str(btf, v->name_off); + + if (json_output) { + jsonw_start_object(w); + jsonw_string_field(w, "name", name); + if (btf_kflag(t)) + jsonw_int_field(w, "val", v->val); + else + jsonw_uint_field(w, "val", v->val); + jsonw_end_object(w); + } else { + if (btf_kflag(t)) + printf("\n\t'%s' val=%d", name, v->val); + else + printf("\n\t'%s' val=%u", name, v->val); + } + } + if (json_output) + jsonw_end_array(w); + break; + } + case BTF_KIND_ENUM64: { + const struct btf_enum64 *v = btf_enum64(t); + __u16 vlen = btf_vlen(t); + const char *encoding; + int i; + + encoding = btf_kflag(t) ? "SIGNED" : "UNSIGNED"; + if (json_output) { + jsonw_string_field(w, "encoding", encoding); + jsonw_uint_field(w, "size", t->size); + jsonw_uint_field(w, "vlen", vlen); + jsonw_name(w, "values"); + jsonw_start_array(w); + } else { + printf(" encoding=%s size=%u vlen=%u", encoding, t->size, vlen); } for (i = 0; i < vlen; i++, v++) { const char *name = btf_str(btf, v->name_off); + __u64 val = ((__u64)v->val_hi32 << 32) | v->val_lo32; if (json_output) { jsonw_start_object(w); jsonw_string_field(w, "name", name); - jsonw_uint_field(w, "val", v->val); + if (btf_kflag(t)) + jsonw_int_field(w, "val", val); + else + jsonw_uint_field(w, "val", val); jsonw_end_object(w); } else { - printf("\n\t'%s' val=%u", name, v->val); + if (btf_kflag(t)) + printf("\n\t'%s' val=%lldLL", name, + (unsigned long long)val); + else + printf("\n\t'%s' val=%lluULL", name, + (unsigned long long)val); } } if (json_output) diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index f5dddf8ef404..125798b0bc5d 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -182,6 +182,32 @@ static int btf_dumper_enum(const struct btf_dumper *d, return 0; } +static int btf_dumper_enum64(const struct btf_dumper *d, + const struct btf_type *t, + const void *data) +{ + const struct btf_enum64 *enums = btf_enum64(t); + __u32 val_lo32, val_hi32; + __u64 value; + __u16 i; + + value = *(__u64 *)data; + val_lo32 = (__u32)value; + val_hi32 = value >> 32; + + for (i = 0; i < btf_vlen(t); i++) { + if (val_lo32 == enums[i].val_lo32 && val_hi32 == enums[i].val_hi32) { + jsonw_string(d->jw, + btf__name_by_offset(d->btf, + enums[i].name_off)); + return 0; + } + } + + jsonw_int(d->jw, value); + return 0; +} + static bool is_str_array(const struct btf *btf, const struct btf_array *arr, const char *s) { @@ -542,6 +568,8 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, return btf_dumper_array(d, type_id, data); case BTF_KIND_ENUM: return btf_dumper_enum(d, t, data); + case BTF_KIND_ENUM64: + return btf_dumper_enum64(d, t, data); case BTF_KIND_PTR: btf_dumper_ptr(d, t, data); return 0; @@ -618,6 +646,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id, btf__name_by_offset(btf, t->name_off)); break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: BTF_PRINT_ARG("enum %s ", btf__name_by_offset(btf, t->name_off)); break; diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index effe136119d7..42421fe47a58 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -21,25 +21,43 @@ #define HELP_SPEC_ATTACH_FLAGS \ "ATTACH_FLAGS := { multi | override }" -#define HELP_SPEC_ATTACH_TYPES \ - " ATTACH_TYPE := { ingress | egress | sock_create |\n" \ - " sock_ops | device | bind4 | bind6 |\n" \ - " post_bind4 | post_bind6 | connect4 |\n" \ - " connect6 | getpeername4 | getpeername6 |\n" \ - " getsockname4 | getsockname6 | sendmsg4 |\n" \ - " sendmsg6 | recvmsg4 | recvmsg6 |\n" \ - " sysctl | getsockopt | setsockopt |\n" \ - " sock_release }" +#define HELP_SPEC_ATTACH_TYPES \ + " ATTACH_TYPE := { cgroup_inet_ingress | cgroup_inet_egress |\n" \ + " cgroup_inet_sock_create | cgroup_sock_ops |\n" \ + " cgroup_device | cgroup_inet4_bind |\n" \ + " cgroup_inet6_bind | cgroup_inet4_post_bind |\n" \ + " cgroup_inet6_post_bind | cgroup_inet4_connect |\n" \ + " cgroup_inet6_connect | cgroup_inet4_getpeername |\n" \ + " cgroup_inet6_getpeername | cgroup_inet4_getsockname |\n" \ + " cgroup_inet6_getsockname | cgroup_udp4_sendmsg |\n" \ + " cgroup_udp6_sendmsg | cgroup_udp4_recvmsg |\n" \ + " cgroup_udp6_recvmsg | cgroup_sysctl |\n" \ + " cgroup_getsockopt | cgroup_setsockopt |\n" \ + " cgroup_inet_sock_release }" static unsigned int query_flags; static enum bpf_attach_type parse_attach_type(const char *str) { + const char *attach_type_str; enum bpf_attach_type type; - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { - if (attach_type_name[type] && - is_prefix(str, attach_type_name[type])) + for (type = 0; ; type++) { + attach_type_str = libbpf_bpf_attach_type_str(type); + if (!attach_type_str) + break; + if (!strcmp(str, attach_type_str)) + return type; + } + + /* Also check traditionally used attach type strings. For these we keep + * allowing prefixed usage. + */ + for (type = 0; ; type++) { + attach_type_str = bpf_attach_type_input_str(type); + if (!attach_type_str) + break; + if (is_prefix(str, attach_type_str)) return type; } @@ -52,6 +70,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, { char prog_name[MAX_PROG_FULL_NAME]; struct bpf_prog_info info = {}; + const char *attach_type_str; __u32 info_len = sizeof(info); int prog_fd; @@ -64,13 +83,13 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, return -1; } + attach_type_str = libbpf_bpf_attach_type_str(attach_type); get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name)); if (json_output) { jsonw_start_object(json_wtr); jsonw_uint_field(json_wtr, "id", info.id); - if (attach_type < ARRAY_SIZE(attach_type_name)) - jsonw_string_field(json_wtr, "attach_type", - attach_type_name[attach_type]); + if (attach_type_str) + jsonw_string_field(json_wtr, "attach_type", attach_type_str); else jsonw_uint_field(json_wtr, "attach_type", attach_type); jsonw_string_field(json_wtr, "attach_flags", @@ -79,8 +98,8 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, jsonw_end_object(json_wtr); } else { printf("%s%-8u ", level ? " " : "", info.id); - if (attach_type < ARRAY_SIZE(attach_type_name)) - printf("%-15s", attach_type_name[attach_type]); + if (attach_type_str) + printf("%-15s", attach_type_str); else printf("type %-10u", attach_type); printf(" %-15s %-15s\n", attach_flags_str, prog_name); diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index c740142c24d8..a0d4acd7c54a 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -17,6 +17,7 @@ #include <linux/magic.h> #include <net/if.h> #include <sys/mount.h> +#include <sys/resource.h> #include <sys/stat.h> #include <sys/vfs.h> @@ -31,52 +32,6 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif -const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { - [BPF_CGROUP_INET_INGRESS] = "ingress", - [BPF_CGROUP_INET_EGRESS] = "egress", - [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", - [BPF_CGROUP_INET_SOCK_RELEASE] = "sock_release", - [BPF_CGROUP_SOCK_OPS] = "sock_ops", - [BPF_CGROUP_DEVICE] = "device", - [BPF_CGROUP_INET4_BIND] = "bind4", - [BPF_CGROUP_INET6_BIND] = "bind6", - [BPF_CGROUP_INET4_CONNECT] = "connect4", - [BPF_CGROUP_INET6_CONNECT] = "connect6", - [BPF_CGROUP_INET4_POST_BIND] = "post_bind4", - [BPF_CGROUP_INET6_POST_BIND] = "post_bind6", - [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4", - [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6", - [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4", - [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6", - [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4", - [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6", - [BPF_CGROUP_SYSCTL] = "sysctl", - [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4", - [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6", - [BPF_CGROUP_GETSOCKOPT] = "getsockopt", - [BPF_CGROUP_SETSOCKOPT] = "setsockopt", - [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", - [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", - [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", - [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", - [BPF_LIRC_MODE2] = "lirc_mode2", - [BPF_FLOW_DISSECTOR] = "flow_dissector", - [BPF_TRACE_RAW_TP] = "raw_tp", - [BPF_TRACE_FENTRY] = "fentry", - [BPF_TRACE_FEXIT] = "fexit", - [BPF_MODIFY_RETURN] = "mod_ret", - [BPF_LSM_MAC] = "lsm_mac", - [BPF_SK_LOOKUP] = "sk_lookup", - [BPF_TRACE_ITER] = "trace_iter", - [BPF_XDP_DEVMAP] = "xdp_devmap", - [BPF_XDP_CPUMAP] = "xdp_cpumap", - [BPF_XDP] = "xdp", - [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select", - [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate", - [BPF_PERF_EVENT] = "perf_event", - [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", -}; - void p_err(const char *fmt, ...) { va_list ap; @@ -118,6 +73,13 @@ static bool is_bpffs(char *path) return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; } +void set_max_rlimit(void) +{ + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + setrlimit(RLIMIT_MEMLOCK, &rinf); +} + static int mnt_fs(const char *target, const char *type, char *buff, size_t bufflen) { @@ -1009,3 +971,39 @@ bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx) { return k1 == k2; } + +const char *bpf_attach_type_input_str(enum bpf_attach_type t) +{ + switch (t) { + case BPF_CGROUP_INET_INGRESS: return "ingress"; + case BPF_CGROUP_INET_EGRESS: return "egress"; + case BPF_CGROUP_INET_SOCK_CREATE: return "sock_create"; + case BPF_CGROUP_INET_SOCK_RELEASE: return "sock_release"; + case BPF_CGROUP_SOCK_OPS: return "sock_ops"; + case BPF_CGROUP_DEVICE: return "device"; + case BPF_CGROUP_INET4_BIND: return "bind4"; + case BPF_CGROUP_INET6_BIND: return "bind6"; + case BPF_CGROUP_INET4_CONNECT: return "connect4"; + case BPF_CGROUP_INET6_CONNECT: return "connect6"; + case BPF_CGROUP_INET4_POST_BIND: return "post_bind4"; + case BPF_CGROUP_INET6_POST_BIND: return "post_bind6"; + case BPF_CGROUP_INET4_GETPEERNAME: return "getpeername4"; + case BPF_CGROUP_INET6_GETPEERNAME: return "getpeername6"; + case BPF_CGROUP_INET4_GETSOCKNAME: return "getsockname4"; + case BPF_CGROUP_INET6_GETSOCKNAME: return "getsockname6"; + case BPF_CGROUP_UDP4_SENDMSG: return "sendmsg4"; + case BPF_CGROUP_UDP6_SENDMSG: return "sendmsg6"; + case BPF_CGROUP_SYSCTL: return "sysctl"; + case BPF_CGROUP_UDP4_RECVMSG: return "recvmsg4"; + case BPF_CGROUP_UDP6_RECVMSG: return "recvmsg6"; + case BPF_CGROUP_GETSOCKOPT: return "getsockopt"; + case BPF_CGROUP_SETSOCKOPT: return "setsockopt"; + case BPF_TRACE_RAW_TP: return "raw_tp"; + case BPF_TRACE_FENTRY: return "fentry"; + case BPF_TRACE_FEXIT: return "fexit"; + case BPF_MODIFY_RETURN: return "mod_ret"; + case BPF_SK_REUSEPORT_SELECT: return "sk_skb_reuseport_select"; + case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: return "sk_skb_reuseport_select_or_migrate"; + default: return libbpf_bpf_attach_type_str(t); + } +} diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index d12f46051aac..bac4ef428a02 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -548,8 +548,8 @@ static bool probe_prog_type_ifindex(enum bpf_prog_type prog_type, __u32 ifindex) } static void -probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types, - const char *define_prefix, __u32 ifindex) +probe_prog_type(enum bpf_prog_type prog_type, const char *prog_type_str, + bool *supported_types, const char *define_prefix, __u32 ifindex) { char feat_name[128], plain_desc[128], define_name[128]; const char *plain_comment = "eBPF program_type "; @@ -580,20 +580,16 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types, supported_types[prog_type] |= res; - if (!prog_type_name[prog_type]) { - p_info("program type name not found (type %d)", prog_type); - return; - } maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1; - if (strlen(prog_type_name[prog_type]) > maxlen) { + if (strlen(prog_type_str) > maxlen) { p_info("program type name too long"); return; } - sprintf(feat_name, "have_%s_prog_type", prog_type_name[prog_type]); - sprintf(define_name, "%s_prog_type", prog_type_name[prog_type]); + sprintf(feat_name, "have_%s_prog_type", prog_type_str); + sprintf(define_name, "%s_prog_type", prog_type_str); uppercase(define_name, sizeof(define_name)); - sprintf(plain_desc, "%s%s", plain_comment, prog_type_name[prog_type]); + sprintf(plain_desc, "%s%s", plain_comment, prog_type_str); print_bool_feature(feat_name, plain_desc, define_name, res, define_prefix); } @@ -619,8 +615,8 @@ static bool probe_map_type_ifindex(enum bpf_map_type map_type, __u32 ifindex) } static void -probe_map_type(enum bpf_map_type map_type, const char *define_prefix, - __u32 ifindex) +probe_map_type(enum bpf_map_type map_type, char const *map_type_str, + const char *define_prefix, __u32 ifindex) { char feat_name[128], plain_desc[128], define_name[128]; const char *plain_comment = "eBPF map_type "; @@ -645,20 +641,16 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix, * check required for unprivileged users */ - if (!map_type_name[map_type]) { - p_info("map type name not found (type %d)", map_type); - return; - } maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1; - if (strlen(map_type_name[map_type]) > maxlen) { + if (strlen(map_type_str) > maxlen) { p_info("map type name too long"); return; } - sprintf(feat_name, "have_%s_map_type", map_type_name[map_type]); - sprintf(define_name, "%s_map_type", map_type_name[map_type]); + sprintf(feat_name, "have_%s_map_type", map_type_str); + sprintf(define_name, "%s_map_type", map_type_str); uppercase(define_name, sizeof(define_name)); - sprintf(plain_desc, "%s%s", plain_comment, map_type_name[map_type]); + sprintf(plain_desc, "%s%s", plain_comment, map_type_str); print_bool_feature(feat_name, plain_desc, define_name, res, define_prefix); } @@ -728,10 +720,10 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, } static void -probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, +probe_helpers_for_progtype(enum bpf_prog_type prog_type, + const char *prog_type_str, bool supported_type, const char *define_prefix, __u32 ifindex) { - const char *ptype_name = prog_type_name[prog_type]; char feat_name[128]; unsigned int id; bool probe_res = false; @@ -747,12 +739,12 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, } if (json_output) { - sprintf(feat_name, "%s_available_helpers", ptype_name); + sprintf(feat_name, "%s_available_helpers", prog_type_str); jsonw_name(json_wtr, feat_name); jsonw_start_array(json_wtr); } else if (!define_prefix) { printf("eBPF helpers supported for program type %s:", - ptype_name); + prog_type_str); } for (id = 1; id < ARRAY_SIZE(helper_name); id++) { @@ -768,7 +760,7 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, /* fallthrough */ default: probe_res |= probe_helper_for_progtype(prog_type, supported_type, - define_prefix, id, ptype_name, + define_prefix, id, prog_type_str, ifindex); } } @@ -943,30 +935,47 @@ static void section_program_types(bool *supported_types, const char *define_prefix, __u32 ifindex) { - unsigned int i; + unsigned int prog_type = BPF_PROG_TYPE_UNSPEC; + const char *prog_type_str; print_start_section("program_types", "Scanning eBPF program types...", "/*** eBPF program types ***/", define_prefix); - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++) - probe_prog_type(i, supported_types, define_prefix, ifindex); + while (true) { + prog_type++; + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + /* libbpf will return NULL for variants unknown to it. */ + if (!prog_type_str) + break; + + probe_prog_type(prog_type, prog_type_str, supported_types, define_prefix, + ifindex); + } print_end_section(); } static void section_map_types(const char *define_prefix, __u32 ifindex) { - unsigned int i; + unsigned int map_type = BPF_MAP_TYPE_UNSPEC; + const char *map_type_str; print_start_section("map_types", "Scanning eBPF map types...", "/*** eBPF map types ***/", define_prefix); - for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++) - probe_map_type(i, define_prefix, ifindex); + while (true) { + map_type++; + map_type_str = libbpf_bpf_map_type_str(map_type); + /* libbpf will return NULL for variants unknown to it. */ + if (!map_type_str) + break; + + probe_map_type(map_type, map_type_str, define_prefix, ifindex); + } print_end_section(); } @@ -974,7 +983,8 @@ static void section_map_types(const char *define_prefix, __u32 ifindex) static void section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex) { - unsigned int i; + unsigned int prog_type = BPF_PROG_TYPE_UNSPEC; + const char *prog_type_str; print_start_section("helpers", "Scanning eBPF helper functions...", @@ -996,9 +1006,18 @@ section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex) " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n", define_prefix, define_prefix, define_prefix, define_prefix); - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++) - probe_helpers_for_progtype(i, supported_types[i], define_prefix, + while (true) { + prog_type++; + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + /* libbpf will return NULL for variants unknown to it. */ + if (!prog_type_str) + break; + + probe_helpers_for_progtype(prog_type, prog_type_str, + supported_types[prog_type], + define_prefix, ifindex); + } print_end_section(); } @@ -1148,6 +1167,8 @@ static int do_probe(int argc, char **argv) __u32 ifindex = 0; char *ifname; + set_max_rlimit(); + while (argc) { if (is_prefix(*argv, "kernel")) { if (target != COMPONENT_UNSPEC) { diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 4c9477ff748d..480cbd859359 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -474,6 +474,9 @@ static void codegen_asserts(struct bpf_object *obj, const char *obj_name) const struct btf_type *sec; char map_ident[256], var_ident[256]; + if (!btf) + return; + codegen("\ \n\ __attribute__((unused)) static void \n\ @@ -1747,6 +1750,7 @@ btfgen_mark_type(struct btfgen_info *info, unsigned int type_id, bool follow_poi case BTF_KIND_INT: case BTF_KIND_FLOAT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_STRUCT: case BTF_KIND_UNION: break; diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 6353a789322b..7a20931c3250 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -13,19 +13,6 @@ #include "json_writer.h" #include "main.h" -static const char * const link_type_name[] = { - [BPF_LINK_TYPE_UNSPEC] = "unspec", - [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", - [BPF_LINK_TYPE_TRACING] = "tracing", - [BPF_LINK_TYPE_CGROUP] = "cgroup", - [BPF_LINK_TYPE_ITER] = "iter", - [BPF_LINK_TYPE_NETNS] = "netns", - [BPF_LINK_TYPE_XDP] = "xdp", - [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", - [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", - [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", -}; - static struct hashmap *link_table; static int link_parse_fd(int *argc, char ***argv) @@ -67,9 +54,12 @@ static int link_parse_fd(int *argc, char ***argv) static void show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr) { + const char *link_type_str; + jsonw_uint_field(wtr, "id", info->id); - if (info->type < ARRAY_SIZE(link_type_name)) - jsonw_string_field(wtr, "type", link_type_name[info->type]); + link_type_str = libbpf_bpf_link_type_str(info->type); + if (link_type_str) + jsonw_string_field(wtr, "type", link_type_str); else jsonw_uint_field(wtr, "type", info->type); @@ -78,9 +68,11 @@ show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr) static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr) { - if (attach_type < ARRAY_SIZE(attach_type_name)) - jsonw_string_field(wtr, "attach_type", - attach_type_name[attach_type]); + const char *attach_type_str; + + attach_type_str = libbpf_bpf_attach_type_str(attach_type); + if (attach_type_str) + jsonw_string_field(wtr, "attach_type", attach_type_str); else jsonw_uint_field(wtr, "attach_type", attach_type); } @@ -121,6 +113,7 @@ static int get_prog_info(int prog_id, struct bpf_prog_info *info) static int show_link_close_json(int fd, struct bpf_link_info *info) { struct bpf_prog_info prog_info; + const char *prog_type_str; int err; jsonw_start_object(json_wtr); @@ -137,12 +130,12 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) if (err) return err; - if (prog_info.type < prog_type_name_size) - jsonw_string_field(json_wtr, "prog_type", - prog_type_name[prog_info.type]); + prog_type_str = libbpf_bpf_prog_type_str(prog_info.type); + /* libbpf will return NULL for variants unknown to it. */ + if (prog_type_str) + jsonw_string_field(json_wtr, "prog_type", prog_type_str); else - jsonw_uint_field(json_wtr, "prog_type", - prog_info.type); + jsonw_uint_field(json_wtr, "prog_type", prog_info.type); show_link_attach_type_json(info->tracing.attach_type, json_wtr); @@ -184,9 +177,12 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) static void show_link_header_plain(struct bpf_link_info *info) { + const char *link_type_str; + printf("%u: ", info->id); - if (info->type < ARRAY_SIZE(link_type_name)) - printf("%s ", link_type_name[info->type]); + link_type_str = libbpf_bpf_link_type_str(info->type); + if (link_type_str) + printf("%s ", link_type_str); else printf("type %u ", info->type); @@ -195,8 +191,11 @@ static void show_link_header_plain(struct bpf_link_info *info) static void show_link_attach_type_plain(__u32 attach_type) { - if (attach_type < ARRAY_SIZE(attach_type_name)) - printf("attach_type %s ", attach_type_name[attach_type]); + const char *attach_type_str; + + attach_type_str = libbpf_bpf_attach_type_str(attach_type); + if (attach_type_str) + printf("attach_type %s ", attach_type_str); else printf("attach_type %u ", attach_type); } @@ -214,6 +213,7 @@ static void show_iter_plain(struct bpf_link_info *info) static int show_link_close_plain(int fd, struct bpf_link_info *info) { struct bpf_prog_info prog_info; + const char *prog_type_str; int err; show_link_header_plain(info); @@ -228,9 +228,10 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) if (err) return err; - if (prog_info.type < prog_type_name_size) - printf("\n\tprog_type %s ", - prog_type_name[prog_info.type]); + prog_type_str = libbpf_bpf_prog_type_str(prog_info.type); + /* libbpf will return NULL for variants unknown to it. */ + if (prog_type_str) + printf("\n\tprog_type %s ", prog_type_str); else printf("\n\tprog_type %u ", prog_info.type); diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 9062ef2b8767..451cefc2d0da 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -508,8 +508,6 @@ int main(int argc, char **argv) * mode for loading generated skeleton. */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS); - } else { - libbpf_set_strict_mode(LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK); } argc -= optind; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index aa99ffab451a..589cb76b227a 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -63,14 +63,8 @@ static inline void *u64_to_ptr(__u64 ptr) #define HELP_SPEC_LINK \ "LINK := { id LINK_ID | pinned FILE }" -extern const char * const prog_type_name[]; -extern const size_t prog_type_name_size; - extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE]; -extern const char * const map_type_name[]; -extern const size_t map_type_name_size; - /* keep in sync with the definition in skeleton/pid_iter.bpf.c */ enum bpf_obj_type { BPF_OBJ_UNKNOWN, @@ -102,6 +96,8 @@ int detect_common_prefix(const char *arg, ...); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void usage(void) __noreturn; +void set_max_rlimit(void); + int mount_tracefs(const char *target); struct obj_ref { @@ -249,6 +245,20 @@ int print_all_levels(__maybe_unused enum libbpf_print_level level, size_t hash_fn_for_key_as_id(const void *key, void *ctx); bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx); +/* bpf_attach_type_input_str - convert the provided attach type value into a + * textual representation that we accept for input purposes. + * + * This function is similar in nature to libbpf_bpf_attach_type_str, but + * recognizes some attach type names that have been used by the program in the + * past and which do not follow the string inference scheme that libbpf uses. + * These textual representations should only be used for user input. + * + * @t: The attach type + * Returns a pointer to a static string identifying the attach type. NULL is + * returned for unknown bpf_attach_type values. + */ +const char *bpf_attach_type_input_str(enum bpf_attach_type t); + static inline void *u32_as_hash_field(__u32 x) { return (void *)(uintptr_t)x; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 877387ef79c7..38b6bc9c26c3 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -22,42 +22,6 @@ #include "json_writer.h" #include "main.h" -const char * const map_type_name[] = { - [BPF_MAP_TYPE_UNSPEC] = "unspec", - [BPF_MAP_TYPE_HASH] = "hash", - [BPF_MAP_TYPE_ARRAY] = "array", - [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", - [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", - [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", - [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", - [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", - [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", - [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", - [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", - [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", - [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", - [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", - [BPF_MAP_TYPE_DEVMAP] = "devmap", - [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", - [BPF_MAP_TYPE_SOCKMAP] = "sockmap", - [BPF_MAP_TYPE_CPUMAP] = "cpumap", - [BPF_MAP_TYPE_XSKMAP] = "xskmap", - [BPF_MAP_TYPE_SOCKHASH] = "sockhash", - [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", - [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", - [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", - [BPF_MAP_TYPE_QUEUE] = "queue", - [BPF_MAP_TYPE_STACK] = "stack", - [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", - [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", - [BPF_MAP_TYPE_RINGBUF] = "ringbuf", - [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", - [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", - [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", -}; - -const size_t map_type_name_size = ARRAY_SIZE(map_type_name); - static struct hashmap *map_table; static bool map_is_per_cpu(__u32 type) @@ -81,12 +45,18 @@ static bool map_is_map_of_progs(__u32 type) static int map_type_from_str(const char *type) { + const char *map_type_str; unsigned int i; - for (i = 0; i < ARRAY_SIZE(map_type_name); i++) + for (i = 0; ; i++) { + map_type_str = libbpf_bpf_map_type_str(i); + if (!map_type_str) + break; + /* Don't allow prefixing in case of possible future shadowing */ - if (map_type_name[i] && !strcmp(map_type_name[i], type)) + if (!strcmp(map_type_str, type)) return i; + } return -1; } @@ -472,9 +442,12 @@ static int parse_elem(char **argv, struct bpf_map_info *info, static void show_map_header_json(struct bpf_map_info *info, json_writer_t *wtr) { + const char *map_type_str; + jsonw_uint_field(wtr, "id", info->id); - if (info->type < ARRAY_SIZE(map_type_name)) - jsonw_string_field(wtr, "type", map_type_name[info->type]); + map_type_str = libbpf_bpf_map_type_str(info->type); + if (map_type_str) + jsonw_string_field(wtr, "type", map_type_str); else jsonw_uint_field(wtr, "type", info->type); @@ -513,10 +486,12 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type); + const char *prog_type_str; - if (prog_type < prog_type_name_size) + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + if (prog_type_str) jsonw_string_field(json_wtr, "owner_prog_type", - prog_type_name[prog_type]); + prog_type_str); else jsonw_uint_field(json_wtr, "owner_prog_type", prog_type); @@ -559,9 +534,13 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) static void show_map_header_plain(struct bpf_map_info *info) { + const char *map_type_str; + printf("%u: ", info->id); - if (info->type < ARRAY_SIZE(map_type_name)) - printf("%s ", map_type_name[info->type]); + + map_type_str = libbpf_bpf_map_type_str(info->type); + if (map_type_str) + printf("%s ", map_type_str); else printf("type %u ", info->type); @@ -597,10 +576,11 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) printf("\n\t"); if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type); + const char *prog_type_str; - if (prog_type < prog_type_name_size) - printf("owner_prog_type %s ", - prog_type_name[prog_type]); + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + if (prog_type_str) + printf("owner_prog_type %s ", prog_type_str); else printf("owner_prog_type %d ", prog_type); } @@ -876,9 +856,13 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, } if (info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && - info->value_size != 8) + info->value_size != 8) { + const char *map_type_str; + + map_type_str = libbpf_bpf_map_type_str(info->type); p_info("Warning: cannot read values from %s map with value_size != 8", - map_type_name[info->type]); + map_type_str); + } while (true) { err = bpf_map_get_next_key(fd, prev_key, key); if (err) { @@ -1342,6 +1326,8 @@ static int do_create(int argc, char **argv) goto exit; } + set_max_rlimit(); + fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr); if (fd < 0) { p_err("map create failed: %s", strerror(errno)); diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index e2d00d3cd868..bb6c969a114a 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -108,6 +108,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) p_err("failed to create hashmap for PID references"); return -1; } + set_max_rlimit(); skel = pid_iter_bpf__open(); if (!skel) { diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 5c2c63df92e8..f081de398b60 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -36,54 +36,28 @@ #define BPF_METADATA_PREFIX "bpf_metadata_" #define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1) -const char * const prog_type_name[] = { - [BPF_PROG_TYPE_UNSPEC] = "unspec", - [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", - [BPF_PROG_TYPE_KPROBE] = "kprobe", - [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", - [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", - [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", - [BPF_PROG_TYPE_XDP] = "xdp", - [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", - [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", - [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", - [BPF_PROG_TYPE_LWT_IN] = "lwt_in", - [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", - [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", - [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", - [BPF_PROG_TYPE_SK_SKB] = "sk_skb", - [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", - [BPF_PROG_TYPE_SK_MSG] = "sk_msg", - [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", - [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", - [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", - [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", - [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", - [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", - [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", - [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", - [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", - [BPF_PROG_TYPE_TRACING] = "tracing", - [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", - [BPF_PROG_TYPE_EXT] = "ext", - [BPF_PROG_TYPE_LSM] = "lsm", - [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", - [BPF_PROG_TYPE_SYSCALL] = "syscall", -}; - -const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name); - enum dump_mode { DUMP_JITED, DUMP_XLATED, }; +static const bool attach_types[] = { + [BPF_SK_SKB_STREAM_PARSER] = true, + [BPF_SK_SKB_STREAM_VERDICT] = true, + [BPF_SK_SKB_VERDICT] = true, + [BPF_SK_MSG_VERDICT] = true, + [BPF_FLOW_DISSECTOR] = true, + [__MAX_BPF_ATTACH_TYPE] = false, +}; + +/* Textual representations traditionally used by the program and kept around + * for the sake of backwards compatibility. + */ static const char * const attach_type_strings[] = { [BPF_SK_SKB_STREAM_PARSER] = "stream_parser", [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict", [BPF_SK_SKB_VERDICT] = "skb_verdict", [BPF_SK_MSG_VERDICT] = "msg_verdict", - [BPF_FLOW_DISSECTOR] = "flow_dissector", [__MAX_BPF_ATTACH_TYPE] = NULL, }; @@ -94,6 +68,14 @@ static enum bpf_attach_type parse_attach_type(const char *str) enum bpf_attach_type type; for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + if (attach_types[type]) { + const char *attach_type_str; + + attach_type_str = libbpf_bpf_attach_type_str(type); + if (!strcmp(str, attach_type_str)) + return type; + } + if (attach_type_strings[type] && is_prefix(str, attach_type_strings[type])) return type; @@ -428,12 +410,14 @@ out_free: static void print_prog_header_json(struct bpf_prog_info *info, int fd) { + const char *prog_type_str; char prog_name[MAX_PROG_FULL_NAME]; jsonw_uint_field(json_wtr, "id", info->id); - if (info->type < ARRAY_SIZE(prog_type_name)) - jsonw_string_field(json_wtr, "type", - prog_type_name[info->type]); + prog_type_str = libbpf_bpf_prog_type_str(info->type); + + if (prog_type_str) + jsonw_string_field(json_wtr, "type", prog_type_str); else jsonw_uint_field(json_wtr, "type", info->type); @@ -515,11 +499,13 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) static void print_prog_header_plain(struct bpf_prog_info *info, int fd) { + const char *prog_type_str; char prog_name[MAX_PROG_FULL_NAME]; printf("%u: ", info->id); - if (info->type < ARRAY_SIZE(prog_type_name)) - printf("%s ", prog_type_name[info->type]); + prog_type_str = libbpf_bpf_prog_type_str(info->type); + if (prog_type_str) + printf("%s ", prog_type_str); else printf("type %u ", info->type); @@ -1604,6 +1590,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) } } + set_max_rlimit(); + if (verifier_logs) /* log_level1 + log_level2 + stats, but not stable UAPI */ open_opts.kernel_log_level = 1 + 2 + 4; @@ -2301,6 +2289,7 @@ static int do_profile(int argc, char **argv) } } + set_max_rlimit(); err = profiler_bpf__load(profile_obj); if (err) { p_err("failed to load profile_obj"); @@ -2374,8 +2363,8 @@ static int do_help(int argc, char **argv) " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" - " ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n" - " stream_parser | flow_dissector }\n" + " ATTACH_TYPE := { sk_msg_verdict | sk_skb_verdict | sk_skb_stream_verdict |\n" + " sk_skb_stream_parser | flow_dissector }\n" " METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n" diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index 2535f079ed67..e08a6ff2866c 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -501,6 +501,8 @@ static int do_register(int argc, char **argv) if (libbpf_get_error(obj)) return -1; + set_max_rlimit(); + if (bpf_object__load(obj)) { bpf_object__close(obj); return -1; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f4009dbdf62d..e81362891596 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3597,10 +3597,11 @@ union bpf_attr { * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). + * **sizeof**\ (**struct ipv6hdr**). * * *th* points to the start of the TCP header, while *th_len* - * contains **sizeof**\ (**struct tcphdr**). + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). * Return * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. @@ -3783,10 +3784,11 @@ union bpf_attr { * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). + * **sizeof**\ (**struct ipv6hdr**). * * *th* points to the start of the TCP header, while *th_len* - * contains the length of the TCP header. + * contains the length of the TCP header with options (at least + * **sizeof**\ (**struct tcphdr**)). * Return * On success, lower 32 bits hold the generated SYN cookie in * followed by 16 bits which hold the MSS value for that cookie, @@ -5249,6 +5251,80 @@ union bpf_attr { * Pointer to the underlying dynptr data, NULL if the dynptr is * read-only, if the dynptr is invalid, or if the offset and length * is out of bounds. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv4/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv6/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5455,6 +5531,10 @@ union bpf_attr { FN(dynptr_read), \ FN(dynptr_write), \ FN(dynptr_data), \ + FN(tcp_raw_gen_syncookie_ipv4), \ + FN(tcp_raw_gen_syncookie_ipv6), \ + FN(tcp_raw_check_syncookie_ipv4), \ + FN(tcp_raw_check_syncookie_ipv6), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index a9162a6c0284..ec1798b6d3ff 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -36,10 +36,10 @@ struct btf_type { * bits 24-28: kind (e.g. int, ptr, array...etc) * bits 29-30: unused * bit 31: kind_flag, currently used by - * struct, union and fwd + * struct, union, enum, fwd and enum64 */ __u32 info; - /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC. + /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64. * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, @@ -63,7 +63,7 @@ enum { BTF_KIND_ARRAY = 3, /* Array */ BTF_KIND_STRUCT = 4, /* Struct */ BTF_KIND_UNION = 5, /* Union */ - BTF_KIND_ENUM = 6, /* Enumeration */ + BTF_KIND_ENUM = 6, /* Enumeration up to 32-bit values */ BTF_KIND_FWD = 7, /* Forward */ BTF_KIND_TYPEDEF = 8, /* Typedef */ BTF_KIND_VOLATILE = 9, /* Volatile */ @@ -76,6 +76,7 @@ enum { BTF_KIND_FLOAT = 16, /* Floating point */ BTF_KIND_DECL_TAG = 17, /* Decl Tag */ BTF_KIND_TYPE_TAG = 18, /* Type Tag */ + BTF_KIND_ENUM64 = 19, /* Enumeration up to 64-bit values */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -186,4 +187,14 @@ struct btf_decl_tag { __s32 component_idx; }; +/* BTF_KIND_ENUM64 is followed by multiple "struct btf_enum64". + * The exact number of btf_enum64 is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_enum64 { + __u32 name_off; + __u32 val_lo32; + __u32 val_hi32; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index b339bf2196ca..0242f31e339c 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -890,6 +890,7 @@ enum { IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + IFLA_BOND_SLAVE_PRIO, __IFLA_BOND_SLAVE_MAX, }; diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index bb1e06eb1eca..ae1520f7e1b0 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -130,7 +130,7 @@ static inline __u64 ptr_to_u64(const void *ptr) /* Ensure given dynamically allocated memory region pointed to by *data* with * capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough - * memory to accomodate *add_cnt* new elements, assuming *cur_cnt* elements + * memory to accommodate *add_cnt* new elements, assuming *cur_cnt* elements * are already used. At most *max_cnt* elements can be ever allocated. * If necessary, memory is reallocated and all existing data is copied over, * new pointer to the memory region is stored at *data, new memory region @@ -305,6 +305,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(__u32); case BTF_KIND_ENUM: return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ENUM64: + return base_size + vlen * sizeof(struct btf_enum64); case BTF_KIND_ARRAY: return base_size + sizeof(struct btf_array); case BTF_KIND_STRUCT: @@ -334,6 +336,7 @@ static void btf_bswap_type_base(struct btf_type *t) static int btf_bswap_type_rest(struct btf_type *t) { struct btf_var_secinfo *v; + struct btf_enum64 *e64; struct btf_member *m; struct btf_array *a; struct btf_param *p; @@ -361,6 +364,13 @@ static int btf_bswap_type_rest(struct btf_type *t) e->val = bswap_32(e->val); } return 0; + case BTF_KIND_ENUM64: + for (i = 0, e64 = btf_enum64(t); i < vlen; i++, e64++) { + e64->name_off = bswap_32(e64->name_off); + e64->val_lo32 = bswap_32(e64->val_lo32); + e64->val_hi32 = bswap_32(e64->val_hi32); + } + return 0; case BTF_KIND_ARRAY: a = btf_array(t); a->type = bswap_32(a->type); @@ -472,9 +482,22 @@ const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) static int determine_ptr_size(const struct btf *btf) { + static const char * const long_aliases[] = { + "long", + "long int", + "int long", + "unsigned long", + "long unsigned", + "unsigned long int", + "unsigned int long", + "long unsigned int", + "long int unsigned", + "int unsigned long", + "int long unsigned", + }; const struct btf_type *t; const char *name; - int i, n; + int i, j, n; if (btf->base_btf && btf->base_btf->ptr_sz > 0) return btf->base_btf->ptr_sz; @@ -485,15 +508,16 @@ static int determine_ptr_size(const struct btf *btf) if (!btf_is_int(t)) continue; + if (t->size != 4 && t->size != 8) + continue; + name = btf__name_by_offset(btf, t->name_off); if (!name) continue; - if (strcmp(name, "long int") == 0 || - strcmp(name, "long unsigned int") == 0) { - if (t->size != 4 && t->size != 8) - continue; - return t->size; + for (j = 0; j < ARRAY_SIZE(long_aliases); j++) { + if (strcmp(name, long_aliases[j]) == 0) + return t->size; } } @@ -597,6 +621,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_DATASEC: case BTF_KIND_FLOAT: size = t->size; @@ -644,6 +669,7 @@ int btf__align_of(const struct btf *btf, __u32 id) switch (kind) { case BTF_KIND_INT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FLOAT: return min(btf_ptr_sz(btf), (size_t)t->size); case BTF_KIND_PTR: @@ -2115,20 +2141,8 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, return 0; } -/* - * Append new BTF_KIND_ENUM type with: - * - *name* - name of the enum, can be NULL or empty for anonymous enums; - * - *byte_sz* - size of the enum, in bytes. - * - * Enum initially has no enum values in it (and corresponds to enum forward - * declaration). Enumerator values can be added by btf__add_enum_value() - * immediately after btf__add_enum() succeeds. - * - * Returns: - * - >0, type ID of newly added BTF type; - * - <0, on error. - */ -int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) +static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz, + bool is_signed, __u8 kind) { struct btf_type *t; int sz, name_off = 0; @@ -2153,13 +2167,35 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) /* start out with vlen=0; it will be adjusted when adding enum values */ t->name_off = name_off; - t->info = btf_type_info(BTF_KIND_ENUM, 0, 0); + t->info = btf_type_info(kind, 0, is_signed); t->size = byte_sz; return btf_commit_type(btf, sz); } /* + * Append new BTF_KIND_ENUM type with: + * - *name* - name of the enum, can be NULL or empty for anonymous enums; + * - *byte_sz* - size of the enum, in bytes. + * + * Enum initially has no enum values in it (and corresponds to enum forward + * declaration). Enumerator values can be added by btf__add_enum_value() + * immediately after btf__add_enum() succeeds. + * + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) +{ + /* + * set the signedness to be unsigned, it will change to signed + * if any later enumerator is negative. + */ + return btf_add_enum_common(btf, name, byte_sz, false, BTF_KIND_ENUM); +} + +/* * Append new enum value for the current ENUM type with: * - *name* - name of the enumerator value, can't be NULL or empty; * - *value* - integer value corresponding to enum value *name*; @@ -2206,6 +2242,82 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) t = btf_last_type(btf); btf_type_inc_vlen(t); + /* if negative value, set signedness to signed */ + if (value < 0) + t->info = btf_type_info(btf_kind(t), btf_vlen(t), true); + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + return 0; +} + +/* + * Append new BTF_KIND_ENUM64 type with: + * - *name* - name of the enum, can be NULL or empty for anonymous enums; + * - *byte_sz* - size of the enum, in bytes. + * - *is_signed* - whether the enum values are signed or not; + * + * Enum initially has no enum values in it (and corresponds to enum forward + * declaration). Enumerator values can be added by btf__add_enum64_value() + * immediately after btf__add_enum64() succeeds. + * + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_enum64(struct btf *btf, const char *name, __u32 byte_sz, + bool is_signed) +{ + return btf_add_enum_common(btf, name, byte_sz, is_signed, + BTF_KIND_ENUM64); +} + +/* + * Append new enum value for the current ENUM64 type with: + * - *name* - name of the enumerator value, can't be NULL or empty; + * - *value* - integer value corresponding to enum value *name*; + * Returns: + * - 0, on success; + * - <0, on error. + */ +int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) +{ + struct btf_enum64 *v; + struct btf_type *t; + int sz, name_off; + + /* last type should be BTF_KIND_ENUM64 */ + if (btf->nr_types == 0) + return libbpf_err(-EINVAL); + t = btf_last_type(btf); + if (!btf_is_enum64(t)) + return libbpf_err(-EINVAL); + + /* non-empty name */ + if (!name || !name[0]) + return libbpf_err(-EINVAL); + + /* decompose and invalidate raw data */ + if (btf_ensure_modifiable(btf)) + return libbpf_err(-ENOMEM); + + sz = sizeof(struct btf_enum64); + v = btf_add_type_mem(btf, sz); + if (!v) + return libbpf_err(-ENOMEM); + + name_off = btf__add_str(btf, name); + if (name_off < 0) + return name_off; + + v->name_off = name_off; + v->val_lo32 = (__u32)value; + v->val_hi32 = value >> 32; + + /* update parent type's vlen */ + t = btf_last_type(btf); + btf_type_inc_vlen(t); + btf->hdr->type_len += sz; btf->hdr->str_off += sz; return 0; @@ -3470,7 +3582,7 @@ static bool btf_equal_int_tag(struct btf_type *t1, struct btf_type *t2) return info1 == info2; } -/* Calculate type signature hash of ENUM. */ +/* Calculate type signature hash of ENUM/ENUM64. */ static long btf_hash_enum(struct btf_type *t) { long h; @@ -3504,9 +3616,31 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) return true; } +static bool btf_equal_enum64(struct btf_type *t1, struct btf_type *t2) +{ + const struct btf_enum64 *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = btf_vlen(t1); + m1 = btf_enum64(t1); + m2 = btf_enum64(t2); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->val_lo32 != m2->val_lo32 || + m1->val_hi32 != m2->val_hi32) + return false; + m1++; + m2++; + } + return true; +} + static inline bool btf_is_enum_fwd(struct btf_type *t) { - return btf_is_enum(t) && btf_vlen(t) == 0; + return btf_is_any_enum(t) && btf_vlen(t) == 0; } static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) @@ -3519,6 +3653,17 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) t1->size == t2->size; } +static bool btf_compat_enum64(struct btf_type *t1, struct btf_type *t2) +{ + if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2)) + return btf_equal_enum64(t1, t2); + + /* ignore vlen when comparing */ + return t1->name_off == t2->name_off && + (t1->info & ~0xffff) == (t2->info & ~0xffff) && + t1->size == t2->size; +} + /* * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, * as referenced type IDs equivalence is established separately during type @@ -3731,6 +3876,7 @@ static int btf_dedup_prep(struct btf_dedup *d) h = btf_hash_int_decl_tag(t); break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: h = btf_hash_enum(t); break; case BTF_KIND_STRUCT: @@ -3820,6 +3966,27 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) } break; + case BTF_KIND_ENUM64: + h = btf_hash_enum(t); + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = btf_type_by_id(d->btf, cand_id); + if (btf_equal_enum64(t, cand)) { + new_id = cand_id; + break; + } + if (btf_compat_enum64(t, cand)) { + if (btf_is_enum_fwd(t)) { + /* resolve fwd to full enum */ + new_id = cand_id; + break; + } + /* resolve canonical enum fwd to full enum */ + d->map[cand_id] = type_id; + } + } + break; + case BTF_KIND_FWD: case BTF_KIND_FLOAT: h = btf_hash_common(t); @@ -4115,6 +4282,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, case BTF_KIND_ENUM: return btf_compat_enum(cand_type, canon_type); + case BTF_KIND_ENUM64: + return btf_compat_enum64(cand_type, canon_type); + case BTF_KIND_FWD: case BTF_KIND_FLOAT: return btf_equal_common(cand_type, canon_type); @@ -4717,6 +4887,7 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct case BTF_KIND_INT: case BTF_KIND_FLOAT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: return 0; case BTF_KIND_FWD: @@ -4811,6 +4982,16 @@ int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ct } break; } + case BTF_KIND_ENUM64: { + struct btf_enum64 *m = btf_enum64(t); + + for (i = 0, n = btf_vlen(t); i < n; i++, m++) { + err = visit(&m->name_off, ctx); + if (err) + return err; + } + break; + } case BTF_KIND_FUNC_PROTO: { struct btf_param *m = btf_params(t); diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 951ac7475794..9fb416eb5644 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -215,6 +215,8 @@ LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_ /* enum construction APIs */ LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz); LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value); +LIBBPF_API int btf__add_enum64(struct btf *btf, const char *name, __u32 bytes_sz, bool is_signed); +LIBBPF_API int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value); enum btf_fwd_kind { BTF_FWD_STRUCT = 0, @@ -393,9 +395,10 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id, #ifndef BTF_KIND_FLOAT #define BTF_KIND_FLOAT 16 /* Floating point */ #endif -/* The kernel header switched to enums, so these two were never #defined */ +/* The kernel header switched to enums, so the following were never #defined */ #define BTF_KIND_DECL_TAG 17 /* Decl Tag */ #define BTF_KIND_TYPE_TAG 18 /* Type Tag */ +#define BTF_KIND_ENUM64 19 /* Enum for up-to 64bit values */ static inline __u16 btf_kind(const struct btf_type *t) { @@ -454,6 +457,11 @@ static inline bool btf_is_enum(const struct btf_type *t) return btf_kind(t) == BTF_KIND_ENUM; } +static inline bool btf_is_enum64(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_ENUM64; +} + static inline bool btf_is_fwd(const struct btf_type *t) { return btf_kind(t) == BTF_KIND_FWD; @@ -524,6 +532,18 @@ static inline bool btf_is_type_tag(const struct btf_type *t) return btf_kind(t) == BTF_KIND_TYPE_TAG; } +static inline bool btf_is_any_enum(const struct btf_type *t) +{ + return btf_is_enum(t) || btf_is_enum64(t); +} + +static inline bool btf_kind_core_compat(const struct btf_type *t1, + const struct btf_type *t2) +{ + return btf_kind(t1) == btf_kind(t2) || + (btf_is_any_enum(t1) && btf_is_any_enum(t2)); +} + static inline __u8 btf_int_encoding(const struct btf_type *t) { return BTF_INT_ENCODING(*(__u32 *)(t + 1)); @@ -549,6 +569,16 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t) return (struct btf_enum *)(t + 1); } +static inline struct btf_enum64 *btf_enum64(const struct btf_type *t) +{ + return (struct btf_enum64 *)(t + 1); +} + +static inline __u64 btf_enum64_value(const struct btf_enum64 *e) +{ + return ((__u64)e->val_hi32 << 32) | e->val_lo32; +} + static inline struct btf_member *btf_members(const struct btf_type *t) { return (struct btf_member *)(t + 1); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 6b1bc1f43728..f5275f819027 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -318,6 +318,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d) switch (btf_kind(t)) { case BTF_KIND_INT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FWD: case BTF_KIND_FLOAT: break; @@ -538,6 +539,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) return 1; } case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FWD: /* * non-anonymous or non-referenced enums are top-level @@ -739,6 +741,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) tstate->emit_state = EMITTED; break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: if (top_level_def) { btf_dump_emit_enum_def(d, id, t, 0); btf_dump_printf(d, ";\n\n"); @@ -989,38 +992,81 @@ static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id)); } -static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, - const struct btf_type *t, - int lvl) +static void btf_dump_emit_enum32_val(struct btf_dump *d, + const struct btf_type *t, + int lvl, __u16 vlen) { const struct btf_enum *v = btf_enum(t); - __u16 vlen = btf_vlen(t); + bool is_signed = btf_kflag(t); + const char *fmt_str; const char *name; size_t dup_cnt; int i; + for (i = 0; i < vlen; i++, v++) { + name = btf_name_of(d, v->name_off); + /* enumerators share namespace with typedef idents */ + dup_cnt = btf_dump_name_dups(d, d->ident_names, name); + if (dup_cnt > 1) { + fmt_str = is_signed ? "\n%s%s___%zd = %d," : "\n%s%s___%zd = %u,"; + btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, dup_cnt, v->val); + } else { + fmt_str = is_signed ? "\n%s%s = %d," : "\n%s%s = %u,"; + btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, v->val); + } + } +} + +static void btf_dump_emit_enum64_val(struct btf_dump *d, + const struct btf_type *t, + int lvl, __u16 vlen) +{ + const struct btf_enum64 *v = btf_enum64(t); + bool is_signed = btf_kflag(t); + const char *fmt_str; + const char *name; + size_t dup_cnt; + __u64 val; + int i; + + for (i = 0; i < vlen; i++, v++) { + name = btf_name_of(d, v->name_off); + dup_cnt = btf_dump_name_dups(d, d->ident_names, name); + val = btf_enum64_value(v); + if (dup_cnt > 1) { + fmt_str = is_signed ? "\n%s%s___%zd = %lldLL," + : "\n%s%s___%zd = %lluULL,"; + btf_dump_printf(d, fmt_str, + pfx(lvl + 1), name, dup_cnt, + (unsigned long long)val); + } else { + fmt_str = is_signed ? "\n%s%s = %lldLL," + : "\n%s%s = %lluULL,"; + btf_dump_printf(d, fmt_str, + pfx(lvl + 1), name, + (unsigned long long)val); + } + } +} +static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, + const struct btf_type *t, + int lvl) +{ + __u16 vlen = btf_vlen(t); + btf_dump_printf(d, "enum%s%s", t->name_off ? " " : "", btf_dump_type_name(d, id)); - if (vlen) { - btf_dump_printf(d, " {"); - for (i = 0; i < vlen; i++, v++) { - name = btf_name_of(d, v->name_off); - /* enumerators share namespace with typedef idents */ - dup_cnt = btf_dump_name_dups(d, d->ident_names, name); - if (dup_cnt > 1) { - btf_dump_printf(d, "\n%s%s___%zu = %u,", - pfx(lvl + 1), name, dup_cnt, - (__u32)v->val); - } else { - btf_dump_printf(d, "\n%s%s = %u,", - pfx(lvl + 1), name, - (__u32)v->val); - } - } - btf_dump_printf(d, "\n%s}", pfx(lvl)); - } + if (!vlen) + return; + + btf_dump_printf(d, " {"); + if (btf_is_enum(t)) + btf_dump_emit_enum32_val(d, t, lvl, vlen); + else + btf_dump_emit_enum64_val(d, t, lvl, vlen); + btf_dump_printf(d, "\n%s}", pfx(lvl)); } static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, @@ -1178,6 +1224,7 @@ skip_mod: break; case BTF_KIND_INT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FWD: case BTF_KIND_STRUCT: case BTF_KIND_UNION: @@ -1312,6 +1359,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, btf_dump_emit_struct_fwd(d, id, t); break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: btf_dump_emit_mods(d, decls); /* inline anonymous enum */ if (t->name_off == 0 && !d->skip_anon_defs) @@ -1988,7 +2036,8 @@ static int btf_dump_get_enum_value(struct btf_dump *d, __u32 id, __s64 *value) { - /* handle unaligned enum value */ + bool is_signed = btf_kflag(t); + if (!ptr_is_aligned(d->btf, id, data)) { __u64 val; int err; @@ -2005,13 +2054,13 @@ static int btf_dump_get_enum_value(struct btf_dump *d, *value = *(__s64 *)data; return 0; case 4: - *value = *(__s32 *)data; + *value = is_signed ? *(__s32 *)data : *(__u32 *)data; return 0; case 2: - *value = *(__s16 *)data; + *value = is_signed ? *(__s16 *)data : *(__u16 *)data; return 0; case 1: - *value = *(__s8 *)data; + *value = is_signed ? *(__s8 *)data : *(__u8 *)data; return 0; default: pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id); @@ -2024,7 +2073,7 @@ static int btf_dump_enum_data(struct btf_dump *d, __u32 id, const void *data) { - const struct btf_enum *e; + bool is_signed; __s64 value; int i, err; @@ -2032,14 +2081,31 @@ static int btf_dump_enum_data(struct btf_dump *d, if (err) return err; - for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) { - if (value != e->val) - continue; - btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); - return 0; - } + is_signed = btf_kflag(t); + if (btf_is_enum(t)) { + const struct btf_enum *e; + + for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) { + if (value != e->val) + continue; + btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); + return 0; + } - btf_dump_type_values(d, "%d", value); + btf_dump_type_values(d, is_signed ? "%d" : "%u", value); + } else { + const struct btf_enum64 *e; + + for (i = 0, e = btf_enum64(t); i < btf_vlen(t); i++, e++) { + if (value != btf_enum64_value(e)) + continue; + btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); + return 0; + } + + btf_dump_type_values(d, is_signed ? "%lldLL" : "%lluULL", + (unsigned long long)value); + } return 0; } @@ -2099,6 +2165,7 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d, case BTF_KIND_FLOAT: case BTF_KIND_PTR: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: if (data + bits_offset / 8 + size > d->typed_dump->data_end) return -E2BIG; break; @@ -2203,6 +2270,7 @@ static int btf_dump_type_data_check_zero(struct btf_dump *d, return -ENODATA; } case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: err = btf_dump_get_enum_value(d, t, data, id, &value); if (err) return err; @@ -2275,6 +2343,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, err = btf_dump_struct_data(d, t, id, data); break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: /* handle bitfield and int enum values */ if (bit_sz) { __u64 print_num; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e89cc9c885b3..49e359cd34df 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -72,6 +72,134 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); +static const char * const attach_type_name[] = { + [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", + [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress", + [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create", + [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release", + [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops", + [BPF_CGROUP_DEVICE] = "cgroup_device", + [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind", + [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", + [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", + [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", + [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", + [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", + [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", + [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", + [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", + [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", + [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", + [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", + [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", + [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", + [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", + [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", + [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", + [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", + [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", + [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", + [BPF_LIRC_MODE2] = "lirc_mode2", + [BPF_FLOW_DISSECTOR] = "flow_dissector", + [BPF_TRACE_RAW_TP] = "trace_raw_tp", + [BPF_TRACE_FENTRY] = "trace_fentry", + [BPF_TRACE_FEXIT] = "trace_fexit", + [BPF_MODIFY_RETURN] = "modify_return", + [BPF_LSM_MAC] = "lsm_mac", + [BPF_SK_LOOKUP] = "sk_lookup", + [BPF_TRACE_ITER] = "trace_iter", + [BPF_XDP_DEVMAP] = "xdp_devmap", + [BPF_XDP_CPUMAP] = "xdp_cpumap", + [BPF_XDP] = "xdp", + [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select", + [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", + [BPF_PERF_EVENT] = "perf_event", + [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", +}; + +static const char * const link_type_name[] = { + [BPF_LINK_TYPE_UNSPEC] = "unspec", + [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_LINK_TYPE_TRACING] = "tracing", + [BPF_LINK_TYPE_CGROUP] = "cgroup", + [BPF_LINK_TYPE_ITER] = "iter", + [BPF_LINK_TYPE_NETNS] = "netns", + [BPF_LINK_TYPE_XDP] = "xdp", + [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", + [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", + [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", +}; + +static const char * const map_type_name[] = { + [BPF_MAP_TYPE_UNSPEC] = "unspec", + [BPF_MAP_TYPE_HASH] = "hash", + [BPF_MAP_TYPE_ARRAY] = "array", + [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", + [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", + [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", + [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", + [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", + [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", + [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", + [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", + [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", + [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", + [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", + [BPF_MAP_TYPE_DEVMAP] = "devmap", + [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", + [BPF_MAP_TYPE_SOCKMAP] = "sockmap", + [BPF_MAP_TYPE_CPUMAP] = "cpumap", + [BPF_MAP_TYPE_XSKMAP] = "xskmap", + [BPF_MAP_TYPE_SOCKHASH] = "sockhash", + [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", + [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", + [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", + [BPF_MAP_TYPE_QUEUE] = "queue", + [BPF_MAP_TYPE_STACK] = "stack", + [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", + [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_MAP_TYPE_RINGBUF] = "ringbuf", + [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", + [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", + [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", +}; + +static const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", + [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", + [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", + [BPF_PROG_TYPE_XDP] = "xdp", + [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", + [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", + [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", + [BPF_PROG_TYPE_LWT_IN] = "lwt_in", + [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", + [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", + [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", + [BPF_PROG_TYPE_SK_SKB] = "sk_skb", + [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", + [BPF_PROG_TYPE_SK_MSG] = "sk_msg", + [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", + [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", + [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", + [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", + [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", + [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", + [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", + [BPF_PROG_TYPE_TRACING] = "tracing", + [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_PROG_TYPE_EXT] = "ext", + [BPF_PROG_TYPE_LSM] = "lsm", + [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", + [BPF_PROG_TYPE_SYSCALL] = "syscall", +}; + static int __base_pr(enum libbpf_print_level level, const char *format, va_list args) { @@ -2114,6 +2242,7 @@ static const char *__btf_kind_str(__u16 kind) case BTF_KIND_FLOAT: return "float"; case BTF_KIND_DECL_TAG: return "decl_tag"; case BTF_KIND_TYPE_TAG: return "type_tag"; + case BTF_KIND_ENUM64: return "enum64"; default: return "unknown"; } } @@ -2642,12 +2771,13 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); + bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); return !has_func || !has_datasec || !has_func_global || !has_float || - !has_decl_tag || !has_type_tag; + !has_decl_tag || !has_type_tag || !has_enum64; } -static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) +static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) { bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); @@ -2655,6 +2785,8 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); + bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); + int enum64_placeholder_id = 0; struct btf_type *t; int i, j, vlen; @@ -2717,8 +2849,32 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) /* replace TYPE_TAG with a CONST */ t->name_off = 0; t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); - } + } else if (!has_enum64 && btf_is_enum(t)) { + /* clear the kflag */ + t->info = btf_type_info(btf_kind(t), btf_vlen(t), false); + } else if (!has_enum64 && btf_is_enum64(t)) { + /* replace ENUM64 with a union */ + struct btf_member *m; + + if (enum64_placeholder_id == 0) { + enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); + if (enum64_placeholder_id < 0) + return enum64_placeholder_id; + + t = (struct btf_type *)btf__type_by_id(btf, i); + } + + m = btf_members(t); + vlen = btf_vlen(t); + t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen); + for (j = 0; j < vlen; j++, m++) { + m->type = enum64_placeholder_id; + m->offset = 0; + } + } } + + return 0; } static bool libbpf_needs_btf(const struct bpf_object *obj) @@ -3056,7 +3212,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) /* enforce 8-byte pointers for BPF-targeted BTFs */ btf__set_pointer_size(obj->btf, 8); - bpf_object__sanitize_btf(obj, kern_btf); + err = bpf_object__sanitize_btf(obj, kern_btf); + if (err) + return err; } if (obj->gen_loader) { @@ -3563,6 +3721,10 @@ static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, if (strcmp(name, "libbpf_tristate")) return KCFG_UNKNOWN; return KCFG_TRISTATE; + case BTF_KIND_ENUM64: + if (strcmp(name, "libbpf_tristate")) + return KCFG_UNKNOWN; + return KCFG_TRISTATE; case BTF_KIND_ARRAY: if (btf_array(t)->nelems == 0) return KCFG_UNKNOWN; @@ -4746,6 +4908,17 @@ static int probe_kern_bpf_cookie(void) return probe_fd(ret); } +static int probe_kern_btf_enum64(void) +{ + static const char strs[] = "\0enum64"; + __u32 types[] = { + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + enum kern_feature_result { FEAT_UNKNOWN = 0, FEAT_SUPPORTED = 1, @@ -4811,6 +4984,9 @@ static struct kern_feature_desc { [FEAT_BPF_COOKIE] = { "BPF cookie support", probe_kern_bpf_cookie, }, + [FEAT_BTF_ENUM64] = { + "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, + }, }; bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) @@ -4943,11 +5119,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); -static bool is_pow_of_2(size_t x) -{ - return x && (x & (x - 1)); -} - static size_t adjust_ringbuf_sz(size_t sz) { __u32 page_sz = sysconf(_SC_PAGE_SIZE); @@ -5353,7 +5524,7 @@ int bpf_core_add_cands(struct bpf_core_cand *local_cand, n = btf__type_cnt(targ_btf); for (i = targ_start_id; i < n; i++) { t = btf__type_by_id(targ_btf, i); - if (btf_kind(t) != btf_kind(local_t)) + if (!btf_kind_core_compat(t, local_t)) continue; targ_name = btf__name_by_offset(targ_btf, t->name_off); @@ -5567,7 +5738,7 @@ int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, /* caller made sure that names match (ignoring flavor suffix) */ local_type = btf__type_by_id(local_btf, local_id); targ_type = btf__type_by_id(targ_btf, targ_id); - if (btf_kind(local_type) != btf_kind(targ_type)) + if (!btf_kind_core_compat(local_type, targ_type)) return 0; recur: @@ -5580,7 +5751,7 @@ recur: if (!local_type || !targ_type) return -EINVAL; - if (btf_kind(local_type) != btf_kind(targ_type)) + if (!btf_kind_core_compat(local_type, targ_type)) return 0; switch (btf_kind(local_type)) { @@ -5588,6 +5759,7 @@ recur: case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FWD: return 1; case BTF_KIND_INT: @@ -9005,8 +9177,10 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), + SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), + SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), @@ -9300,6 +9474,38 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, return libbpf_err(-ESRCH); } +const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(attach_type_name)) + return NULL; + + return attach_type_name[t]; +} + +const char *libbpf_bpf_link_type_str(enum bpf_link_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(link_type_name)) + return NULL; + + return link_type_name[t]; +} + +const char *libbpf_bpf_map_type_str(enum bpf_map_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(map_type_name)) + return NULL; + + return map_type_name[t]; +} + +const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(prog_type_name)) + return NULL; + + return prog_type_name[t]; +} + static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, size_t offset) { @@ -10988,43 +11194,6 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, return pfd; } -/* uprobes deal in relative offsets; subtract the base address associated with - * the mapped binary. See Documentation/trace/uprobetracer.rst for more - * details. - */ -static long elf_find_relative_offset(const char *filename, Elf *elf, long addr) -{ - size_t n; - int i; - - if (elf_getphdrnum(elf, &n)) { - pr_warn("elf: failed to find program headers for '%s': %s\n", filename, - elf_errmsg(-1)); - return -ENOENT; - } - - for (i = 0; i < n; i++) { - int seg_start, seg_end, seg_offset; - GElf_Phdr phdr; - - if (!gelf_getphdr(elf, i, &phdr)) { - pr_warn("elf: failed to get program header %d from '%s': %s\n", i, filename, - elf_errmsg(-1)); - return -ENOENT; - } - if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X)) - continue; - - seg_start = phdr.p_vaddr; - seg_end = seg_start + phdr.p_memsz; - seg_offset = phdr.p_offset; - if (addr >= seg_start && addr < seg_end) - return addr - seg_start + seg_offset; - } - pr_warn("elf: failed to find prog header containing 0x%lx in '%s'\n", addr, filename); - return -ENOENT; -} - /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) { @@ -11111,6 +11280,8 @@ static long elf_find_func_offset(const char *binary_path, const char *name) for (idx = 0; idx < nr_syms; idx++) { int curr_bind; GElf_Sym sym; + Elf_Scn *sym_scn; + GElf_Shdr sym_sh; if (!gelf_getsym(symbols, idx, &sym)) continue; @@ -11148,12 +11319,28 @@ static long elf_find_func_offset(const char *binary_path, const char *name) continue; } } - ret = sym.st_value; + + /* Transform symbol's virtual address (absolute for + * binaries and relative for shared libs) into file + * offset, which is what kernel is expecting for + * uprobe/uretprobe attachment. + * See Documentation/trace/uprobetracer.rst for more + * details. + * This is done by looking up symbol's containing + * section's header and using it's virtual address + * (sh_addr) and corresponding file offset (sh_offset) + * to transform sym.st_value (virtual address) into + * desired final file offset. + */ + sym_scn = elf_getscn(elf, sym.st_shndx); + if (!sym_scn) + continue; + if (!gelf_getshdr(sym_scn, &sym_sh)) + continue; + + ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset; last_bind = curr_bind; } - /* For binaries that are not shared libraries, we need relative offset */ - if (ret > 0 && !is_shared_lib) - ret = elf_find_relative_offset(binary_path, elf, ret); if (ret > 0) break; } @@ -11386,7 +11573,8 @@ static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf break; case 3: case 4: - opts.retprobe = strcmp(probe_type, "uretprobe") == 0; + opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || + strcmp(probe_type, "uretprobe.s") == 0; if (opts.retprobe && offset != 0) { pr_warn("prog '%s': uretprobes do not support offset specification\n", prog->name); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 9e9a3fd3edd8..fa27969da0da 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -51,6 +51,42 @@ enum libbpf_errno { LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); +/** + * @brief **libbpf_bpf_attach_type_str()** converts the provided attach type + * value into a textual representation. + * @param t The attach type. + * @return Pointer to a static string identifying the attach type. NULL is + * returned for unknown **bpf_attach_type** values. + */ +LIBBPF_API const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t); + +/** + * @brief **libbpf_bpf_link_type_str()** converts the provided link type value + * into a textual representation. + * @param t The link type. + * @return Pointer to a static string identifying the link type. NULL is + * returned for unknown **bpf_link_type** values. + */ +LIBBPF_API const char *libbpf_bpf_link_type_str(enum bpf_link_type t); + +/** + * @brief **libbpf_bpf_map_type_str()** converts the provided map type value + * into a textual representation. + * @param t The map type. + * @return Pointer to a static string identifying the map type. NULL is + * returned for unknown **bpf_map_type** values. + */ +LIBBPF_API const char *libbpf_bpf_map_type_str(enum bpf_map_type t); + +/** + * @brief **libbpf_bpf_prog_type_str()** converts the provided program type + * value into a textual representation. + * @param t The program type. + * @return Pointer to a static string identifying the program type. NULL is + * returned for unknown **bpf_prog_type** values. + */ +LIBBPF_API const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t); + enum libbpf_print_level { LIBBPF_WARN, LIBBPF_INFO, @@ -71,7 +107,7 @@ struct bpf_object_open_attr { }; struct bpf_object_open_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* object name override, if provided: * - for object open from file, this will override setting object diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 52973cffc20c..116a2a8ee7c2 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -461,5 +461,13 @@ LIBBPF_0.8.0 { } LIBBPF_0.7.0; LIBBPF_1.0.0 { + global: + btf__add_enum64; + btf__add_enum64_value; + libbpf_bpf_attach_type_str; + libbpf_bpf_link_type_str; + libbpf_bpf_map_type_str; + libbpf_bpf_prog_type_str; + local: *; }; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 4abdbe2fea9d..a1ad145ffa74 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -351,6 +351,8 @@ enum kern_feature_id { FEAT_MEMCG_ACCOUNT, /* BPF cookie (bpf_get_attach_cookie() BPF helper) support */ FEAT_BPF_COOKIE, + /* BTF_KIND_ENUM64 support and BTF_KIND_ENUM kflag support */ + FEAT_BTF_ENUM64, __FEAT_CNT, }; @@ -580,4 +582,9 @@ struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man, const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie); +static inline bool is_pow_of_2(size_t x) +{ + return x && (x & (x - 1)) == 0; +} + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 9aa016fb55aa..4ac02c28e152 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -697,11 +697,6 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, return err; } -static bool is_pow_of_2(size_t x) -{ - return x && (x & (x - 1)) == 0; -} - static int linker_sanity_check_elf(struct src_obj *obj) { struct src_sec *sec; @@ -1340,6 +1335,7 @@ recur: case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FWD: case BTF_KIND_FUNC: case BTF_KIND_VAR: @@ -1362,6 +1358,7 @@ recur: case BTF_KIND_INT: case BTF_KIND_FLOAT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: /* ignore encoding for int and enum values for enum */ if (t1->size != t2->size) { pr_warn("global '%s': incompatible %s '%s' size %u and %u\n", diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index ba4453dfd1ed..6ad3c3891a9a 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -167,7 +167,7 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) * just a parsed access string representation): [0, 1, 2, 3]. * * High-level spec will capture only 3 points: - * - intial zero-index access by pointer (&s->... is the same as &s[0]...); + * - initial zero-index access by pointer (&s->... is the same as &s[0]...); * - field 'a' access (corresponds to '2' in low-level spec); * - array element #3 access (corresponds to '3' in low-level spec). * @@ -186,7 +186,7 @@ int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, struct bpf_core_accessor *acc; const struct btf_type *t; const char *name, *spec_str; - __u32 id; + __u32 id, name_off; __s64 sz; spec_str = btf__name_by_offset(btf, relo->access_str_off); @@ -231,11 +231,13 @@ int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, spec->len++; if (core_relo_is_enumval_based(relo->kind)) { - if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) + if (!btf_is_any_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) return -EINVAL; /* record enumerator name in a first accessor */ - acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); + name_off = btf_is_enum(t) ? btf_enum(t)[access_idx].name_off + : btf_enum64(t)[access_idx].name_off; + acc->name = btf__name_by_offset(btf, name_off); return 0; } @@ -340,7 +342,7 @@ recur: if (btf_is_composite(local_type) && btf_is_composite(targ_type)) return 1; - if (btf_kind(local_type) != btf_kind(targ_type)) + if (!btf_kind_core_compat(local_type, targ_type)) return 0; switch (btf_kind(local_type)) { @@ -348,6 +350,7 @@ recur: case BTF_KIND_FLOAT: return 1; case BTF_KIND_FWD: + case BTF_KIND_ENUM64: case BTF_KIND_ENUM: { const char *local_name, *targ_name; size_t local_len, targ_len; @@ -477,6 +480,7 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, const struct bpf_core_accessor *local_acc; struct bpf_core_accessor *targ_acc; int i, sz, matched; + __u32 name_off; memset(targ_spec, 0, sizeof(*targ_spec)); targ_spec->btf = targ_btf; @@ -494,18 +498,22 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, if (core_relo_is_enumval_based(local_spec->relo_kind)) { size_t local_essent_len, targ_essent_len; - const struct btf_enum *e; const char *targ_name; /* has to resolve to an enum */ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); - if (!btf_is_enum(targ_type)) + if (!btf_is_any_enum(targ_type)) return 0; local_essent_len = bpf_core_essential_name_len(local_acc->name); - for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { - targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); + for (i = 0; i < btf_vlen(targ_type); i++) { + if (btf_is_enum(targ_type)) + name_off = btf_enum(targ_type)[i].name_off; + else + name_off = btf_enum64(targ_type)[i].name_off; + + targ_name = btf__name_by_offset(targ_spec->btf, name_off); targ_essent_len = bpf_core_essential_name_len(targ_name); if (targ_essent_len != local_essent_len) continue; @@ -583,7 +591,7 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, static int bpf_core_calc_field_relo(const char *prog_name, const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val, __u32 *field_sz, __u32 *type_id, + __u64 *val, __u32 *field_sz, __u32 *type_id, bool *validate) { const struct bpf_core_accessor *acc; @@ -680,8 +688,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, *val = byte_sz; break; case BPF_CORE_FIELD_SIGNED: - /* enums will be assumed unsigned */ - *val = btf_is_enum(mt) || + *val = (btf_is_any_enum(mt) && BTF_INFO_KFLAG(mt->info)) || (btf_int_encoding(mt) & BTF_INT_SIGNED); if (validate) *validate = true; /* signedness is never ambiguous */ @@ -708,7 +715,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val, bool *validate) + __u64 *val, bool *validate) { __s64 sz; @@ -751,10 +758,9 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val) + __u64 *val) { const struct btf_type *t; - const struct btf_enum *e; switch (relo->kind) { case BPF_CORE_ENUMVAL_EXISTS: @@ -764,8 +770,10 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, if (!spec) return -EUCLEAN; /* request instruction poisoning */ t = btf_type_by_id(spec->btf, spec->spec[0].type_id); - e = btf_enum(t) + spec->spec[0].idx; - *val = e->val; + if (btf_is_enum(t)) + *val = btf_enum(t)[spec->spec[0].idx].val; + else + *val = btf_enum64_value(btf_enum64(t) + spec->spec[0].idx); break; default: return -EOPNOTSUPP; @@ -929,7 +937,7 @@ int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, int insn_idx, const struct bpf_core_relo *relo, int relo_idx, const struct bpf_core_relo_res *res) { - __u32 orig_val, new_val; + __u64 orig_val, new_val; __u8 class; class = BPF_CLASS(insn->code); @@ -954,28 +962,30 @@ poison: if (BPF_SRC(insn->code) != BPF_K) return -EINVAL; if (res->validate && insn->imm != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", + pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %llu -> %llu\n", prog_name, relo_idx, - insn_idx, insn->imm, orig_val, new_val); + insn_idx, insn->imm, (unsigned long long)orig_val, + (unsigned long long)new_val); return -EINVAL; } orig_val = insn->imm; insn->imm = new_val; - pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", + pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %llu -> %llu\n", prog_name, relo_idx, insn_idx, - orig_val, new_val); + (unsigned long long)orig_val, (unsigned long long)new_val); break; case BPF_LDX: case BPF_ST: case BPF_STX: if (res->validate && insn->off != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", - prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val); + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %llu -> %llu\n", + prog_name, relo_idx, insn_idx, insn->off, (unsigned long long)orig_val, + (unsigned long long)new_val); return -EINVAL; } if (new_val > SHRT_MAX) { - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", - prog_name, relo_idx, insn_idx, new_val); + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %llu\n", + prog_name, relo_idx, insn_idx, (unsigned long long)new_val); return -ERANGE; } if (res->fail_memsz_adjust) { @@ -987,8 +997,9 @@ poison: orig_val = insn->off; insn->off = new_val; - pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", - prog_name, relo_idx, insn_idx, orig_val, new_val); + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %llu -> %llu\n", + prog_name, relo_idx, insn_idx, (unsigned long long)orig_val, + (unsigned long long)new_val); if (res->new_sz != res->orig_sz) { int insn_bytes_sz, insn_bpf_sz; @@ -1024,20 +1035,20 @@ poison: return -EINVAL; } - imm = insn[0].imm + ((__u64)insn[1].imm << 32); + imm = (__u32)insn[0].imm | ((__u64)insn[1].imm << 32); if (res->validate && imm != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %llu -> %llu\n", prog_name, relo_idx, insn_idx, (unsigned long long)imm, - orig_val, new_val); + (unsigned long long)orig_val, (unsigned long long)new_val); return -EINVAL; } insn[0].imm = new_val; - insn[1].imm = 0; /* currently only 32-bit values are supported */ - pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", + insn[1].imm = new_val >> 32; + pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %llu\n", prog_name, relo_idx, insn_idx, - (unsigned long long)imm, new_val); + (unsigned long long)imm, (unsigned long long)new_val); break; } default: @@ -1057,7 +1068,6 @@ poison: int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec) { const struct btf_type *t; - const struct btf_enum *e; const char *s; __u32 type_id; int i, len = 0; @@ -1086,10 +1096,23 @@ int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *s if (core_relo_is_enumval_based(spec->relo_kind)) { t = skip_mods_and_typedefs(spec->btf, type_id, NULL); - e = btf_enum(t) + spec->raw_spec[0]; - s = btf__name_by_offset(spec->btf, e->name_off); + if (btf_is_enum(t)) { + const struct btf_enum *e; + const char *fmt_str; + + e = btf_enum(t) + spec->raw_spec[0]; + s = btf__name_by_offset(spec->btf, e->name_off); + fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %d" : "::%s = %u"; + append_buf(fmt_str, s, e->val); + } else { + const struct btf_enum64 *e; + const char *fmt_str; - append_buf("::%s = %u", s, e->val); + e = btf_enum64(t) + spec->raw_spec[0]; + s = btf__name_by_offset(spec->btf, e->name_off); + fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %lld" : "::%s = %llu"; + append_buf(fmt_str, s, (unsigned long long)btf_enum64_value(e)); + } return len; } @@ -1148,11 +1171,11 @@ int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *s * 3. It is supported and expected that there might be multiple flavors * matching the spec. As long as all the specs resolve to the same set of * offsets across all candidates, there is no error. If there is any - * ambiguity, CO-RE relocation will fail. This is necessary to accomodate - * imprefection of BTF deduplication, which can cause slight duplication of + * ambiguity, CO-RE relocation will fail. This is necessary to accommodate + * imperfection of BTF deduplication, which can cause slight duplication of * the same BTF type, if some directly or indirectly referenced (by * pointer) type gets resolved to different actual types in different - * object files. If such situation occurs, deduplicated BTF will end up + * object files. If such a situation occurs, deduplicated BTF will end up * with two (or more) structurally identical types, which differ only in * types they refer to through pointer. This should be OK in most cases and * is not an error. @@ -1261,10 +1284,12 @@ int bpf_core_calc_relo_insn(const char *prog_name, * decision and value, otherwise it's dangerous to * proceed due to ambiguity */ - pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", + pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %llu != %s %llu\n", prog_name, relo_idx, - cand_res.poison ? "failure" : "success", cand_res.new_val, - targ_res->poison ? "failure" : "success", targ_res->new_val); + cand_res.poison ? "failure" : "success", + (unsigned long long)cand_res.new_val, + targ_res->poison ? "failure" : "success", + (unsigned long long)targ_res->new_val); return -EINVAL; } diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index 073039d8ca4f..7df0da082f2c 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -46,9 +46,9 @@ struct bpf_core_spec { struct bpf_core_relo_res { /* expected value in the instruction, unless validate == false */ - __u32 orig_val; + __u64 orig_val; /* new value that needs to be patched up to */ - __u32 new_val; + __u64 new_val; /* relocation unsuccessful, poison instruction, but don't fail load */ bool poison; /* some relocations can't be validated against orig_val */ diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index f1c9339cfbbc..5159207cbfd9 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -441,7 +441,7 @@ static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, siz return 0; } -static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) +static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) { char path[PATH_MAX], line[PATH_MAX], mode[16]; size_t seg_start, seg_end, seg_off; @@ -531,35 +531,40 @@ err_out: return err; } -static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long addr, bool relative) +static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long virtaddr) { struct elf_seg *seg; int i; - if (relative) { - /* for shared libraries, address is relative offset and thus - * should be fall within logical offset-based range of - * [offset_start, offset_end) - */ - for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { - if (seg->offset <= addr && addr < seg->offset + (seg->end - seg->start)) - return seg; - } - } else { - /* for binaries, address is absolute and thus should be within - * absolute address range of [seg_start, seg_end) - */ - for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { - if (seg->start <= addr && addr < seg->end) - return seg; - } + /* for ELF binaries (both executables and shared libraries), we are + * given virtual address (absolute for executables, relative for + * libraries) which should match address range of [seg_start, seg_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->start <= virtaddr && virtaddr < seg->end) + return seg; } + return NULL; +} +static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long offset) +{ + struct elf_seg *seg; + int i; + + /* for VMA segments from /proc/<pid>/maps file, provided "address" is + * actually a file offset, so should be fall within logical + * offset-based range of [offset_start, offset_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->offset <= offset && offset < seg->offset + (seg->end - seg->start)) + return seg; + } return NULL; } -static int parse_usdt_note(Elf *elf, const char *path, long base_addr, - GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, +static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr, + const char *data, size_t name_off, size_t desc_off, struct usdt_note *usdt_note); static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); @@ -568,8 +573,8 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, struct usdt_target **out_targets, size_t *out_target_cnt) { - size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0; - struct elf_seg *segs = NULL, *lib_segs = NULL; + size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0; + struct elf_seg *segs = NULL, *vma_segs = NULL; struct usdt_target *targets = NULL, *target; long base_addr = 0; Elf_Scn *notes_scn, *base_scn; @@ -613,8 +618,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * struct elf_seg *seg = NULL; void *tmp; - err = parse_usdt_note(elf, path, base_addr, &nhdr, - data->d_buf, name_off, desc_off, ¬e); + err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, ¬e); if (err) goto err_out; @@ -654,30 +658,29 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * usdt_rel_ip += base_addr - note.base_addr; } - if (ehdr.e_type == ET_EXEC) { - /* When attaching uprobes (which what USDTs basically - * are) kernel expects a relative IP to be specified, - * so if we are attaching to an executable ELF binary - * (i.e., not a shared library), we need to calculate - * proper relative IP based on ELF's load address - */ - seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip, false /* relative */); - if (!seg) { - err = -ESRCH; - pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", - usdt_provider, usdt_name, path, usdt_abs_ip); - goto err_out; - } - if (!seg->is_exec) { - err = -ESRCH; - pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", - path, seg->start, seg->end, usdt_provider, usdt_name, - usdt_abs_ip); - goto err_out; - } + /* When attaching uprobes (which is what USDTs basically are) + * kernel expects file offset to be specified, not a relative + * virtual address, so we need to translate virtual address to + * file offset, for both ET_EXEC and ET_DYN binaries. + */ + seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_abs_ip); + goto err_out; + } + if (!seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + usdt_abs_ip); + goto err_out; + } + /* translate from virtual address to file offset */ + usdt_rel_ip = usdt_abs_ip - seg->start + seg->offset; - usdt_rel_ip = usdt_abs_ip - (seg->start - seg->offset); - } else if (!man->has_bpf_cookie) { /* ehdr.e_type == ET_DYN */ + if (ehdr.e_type == ET_DYN && !man->has_bpf_cookie) { /* If we don't have BPF cookie support but need to * attach to a shared library, we'll need to know and * record absolute addresses of attach points due to @@ -697,9 +700,9 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - /* lib_segs are lazily initialized only if necessary */ - if (lib_seg_cnt == 0) { - err = parse_lib_segs(pid, path, &lib_segs, &lib_seg_cnt); + /* vma_segs are lazily initialized only if necessary */ + if (vma_seg_cnt == 0) { + err = parse_vma_segs(pid, path, &vma_segs, &vma_seg_cnt); if (err) { pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n", pid, path, err); @@ -707,7 +710,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * } } - seg = find_elf_seg(lib_segs, lib_seg_cnt, usdt_rel_ip, true /* relative */); + seg = find_vma_seg(vma_segs, vma_seg_cnt, usdt_rel_ip); if (!seg) { err = -ESRCH; pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n", @@ -715,7 +718,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - usdt_abs_ip = seg->start + (usdt_rel_ip - seg->offset); + usdt_abs_ip = seg->start - seg->offset + usdt_rel_ip; } pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n", @@ -723,7 +726,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args, seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0); - /* Adjust semaphore address to be a relative offset */ + /* Adjust semaphore address to be a file offset */ if (note.sema_addr) { if (!man->has_sema_refcnt) { pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n", @@ -732,7 +735,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - seg = find_elf_seg(segs, seg_cnt, note.sema_addr, false /* relative */); + seg = find_elf_seg(segs, seg_cnt, note.sema_addr); if (!seg) { err = -ESRCH; pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n", @@ -747,7 +750,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - usdt_sema_off = note.sema_addr - (seg->start - seg->offset); + usdt_sema_off = note.sema_addr - seg->start + seg->offset; pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n", usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", @@ -770,7 +773,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * target->rel_ip = usdt_rel_ip; target->sema_off = usdt_sema_off; - /* notes->args references strings from Elf itself, so they can + /* notes.args references strings from Elf itself, so they can * be referenced safely until elf_end() call */ target->spec_str = note.args; @@ -788,7 +791,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * err_out: free(segs); - free(lib_segs); + free(vma_segs); if (err < 0) free(targets); return err; @@ -1089,8 +1092,8 @@ err_out: /* Parse out USDT ELF note from '.note.stapsdt' section. * Logic inspired by perf's code. */ -static int parse_usdt_note(Elf *elf, const char *path, long base_addr, - GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, +static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr, + const char *data, size_t name_off, size_t desc_off, struct usdt_note *note) { const char *provider, *name, *args; diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 595565eb68c0..ca2f47f45670 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -43,3 +43,4 @@ test_cpp *.tmp xdpxceiver xdp_redirect_multi +xdp_synproxy diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2d3c8c8f558a..cb8e552e1418 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ - xdpxceiver xdp_redirect_multi + xdpxceiver xdp_redirect_multi xdp_synproxy TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read @@ -168,17 +168,26 @@ $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ +# LLVM's ld.lld doesn't support all the architectures, so use it only on x86 +ifeq ($(SRCARCH),x86) +LLD := lld +else +LLD := ld +endif + # Filter out -static for liburandom_read.so and its dependent targets so that static builds # do not fail. Static builds leave urandom_read relying on system-wide shared libraries. $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c $(call msg,LIB,,$@) - $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) -fPIC -shared -o $@ + $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) \ + -fuse-ld=$(LLD) -Wl,-znoseparate-code -fPIC -shared -o $@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) - $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ - liburandom_read.so $(LDLIBS) \ - -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ + $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ + liburandom_read.so $(LDLIBS) \ + -fuse-ld=$(LLD) -Wl,-znoseparate-code \ + -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) @@ -502,6 +511,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ cap_helpers.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ + $(OUTPUT)/xdp_synproxy \ ima_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE @@ -560,6 +570,7 @@ $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h +$(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -571,13 +582,16 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_ringbufs.o \ $(OUTPUT)/bench_bloom_filter_map.o \ $(OUTPUT)/bench_bpf_loop.o \ - $(OUTPUT)/bench_strncmp.o + $(OUTPUT)/bench_strncmp.o \ + $(OUTPUT)/bench_bpf_hashmap_full_update.o $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature bpftool \ - $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h no_alu32 bpf_gcc bpf_testmod.ko) + $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \ + no_alu32 bpf_gcc bpf_testmod.ko \ + liburandom_read.so) .PHONY: docs docs-clean diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index f061cc20e776..d8aa62be996b 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -396,6 +396,7 @@ extern const struct bench bench_hashmap_with_bloom; extern const struct bench bench_bpf_loop; extern const struct bench bench_strncmp_no_helper; extern const struct bench bench_strncmp_helper; +extern const struct bench bench_bpf_hashmap_full_update; static const struct bench *benchs[] = { &bench_count_global, @@ -430,6 +431,7 @@ static const struct bench *benchs[] = { &bench_bpf_loop, &bench_strncmp_no_helper, &bench_strncmp_helper, + &bench_bpf_hashmap_full_update, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c new file mode 100644 index 000000000000..cec51e0ff4b8 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include <argp.h> +#include "bench.h" +#include "bpf_hashmap_full_update_bench.skel.h" +#include "bpf_util.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_hashmap_full_update_bench *skel; +} ctx; + +#define MAX_LOOP_NUM 10000 + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) { + /* trigger the bpf program */ + syscall(__NR_getpgid); + } + + return NULL; +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void measure(struct bench_res *res) +{ +} + +static void setup(void) +{ + struct bpf_link *link; + int map_fd, i, max_entries; + + setup_libbpf(); + + ctx.skel = bpf_hashmap_full_update_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + ctx.skel->bss->nr_loops = MAX_LOOP_NUM; + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } + + /* fill hash_map */ + map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench); + max_entries = bpf_map__max_entries(ctx.skel->maps.hash_map_bench); + for (i = 0; i < max_entries; i++) + bpf_map_update_elem(map_fd, &i, &i, BPF_ANY); +} + +void hashmap_report_final(struct bench_res res[], int res_cnt) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + int i; + + for (i = 0; i < nr_cpus; i++) { + u64 time = ctx.skel->bss->percpu_time[i]; + + if (!time) + continue; + + printf("%d:hash_map_full_perf %lld events per sec\n", + i, ctx.skel->bss->nr_loops * 1000000000ll / time); + } +} + +const struct bench bench_bpf_hashmap_full_update = { + .name = "bpf-hashmap-ful-update", + .validate = validate, + .setup = setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = NULL, + .report_final = hashmap_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh new file mode 100755 index 000000000000..1e2de838f9fa --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1` +summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-ful-update) +printf "$summary" +printf "\n" diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c index b5941d514e17..1c1c2c26690a 100644 --- a/tools/testing/selftests/bpf/btf_helpers.c +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -26,11 +26,12 @@ static const char * const btf_kind_str_mapping[] = { [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", [BTF_KIND_TYPE_TAG] = "TYPE_TAG", + [BTF_KIND_ENUM64] = "ENUM64", }; static const char *btf_kind_str(__u16 kind) { - if (kind > BTF_KIND_TYPE_TAG) + if (kind > BTF_KIND_ENUM64) return "UNKNOWN"; return btf_kind_str_mapping[kind]; } @@ -139,14 +140,32 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) } case BTF_KIND_ENUM: { const struct btf_enum *v = btf_enum(t); + const char *fmt_str; - fprintf(out, " size=%u vlen=%u", t->size, vlen); + fmt_str = btf_kflag(t) ? "\n\t'%s' val=%d" : "\n\t'%s' val=%u"; + fprintf(out, " encoding=%s size=%u vlen=%u", + btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen); for (i = 0; i < vlen; i++, v++) { - fprintf(out, "\n\t'%s' val=%u", + fprintf(out, fmt_str, btf_str(btf, v->name_off), v->val); } break; } + case BTF_KIND_ENUM64: { + const struct btf_enum64 *v = btf_enum64(t); + const char *fmt_str; + + fmt_str = btf_kflag(t) ? "\n\t'%s' val=%lld" : "\n\t'%s' val=%llu"; + + fprintf(out, " encoding=%s size=%u vlen=%u", + btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen); + for (i = 0; i < vlen; i++, v++) { + fprintf(out, fmt_str, + btf_str(btf, v->name_off), + ((__u64)v->val_hi32 << 32) | v->val_lo32); + } + break; + } case BTF_KIND_FWD: fprintf(out, " fwd_kind=%s", btf_kflag(t) ? "union" : "struct"); break; diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 08c0601b3e84..0b899d2d8ea7 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -17,6 +17,14 @@ static void trigger_func2(void) asm volatile (""); } +/* attach point for byname sleepable uprobe */ +static void trigger_func3(void) +{ + asm volatile (""); +} + +static char test_data[] = "test_data"; + void test_attach_probe(void) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); @@ -49,9 +57,17 @@ void test_attach_probe(void) if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) return; - skel = test_attach_probe__open_and_load(); + skel = test_attach_probe__open(); if (!ASSERT_OK_PTR(skel, "skel_open")) return; + + /* sleepable kprobe test case needs flags set before loading */ + if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable, + BPF_F_SLEEPABLE), "kprobe_sleepable_flags")) + goto cleanup; + + if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load")) + goto cleanup; if (!ASSERT_OK_PTR(skel->bss, "check_bss")) goto cleanup; @@ -151,6 +167,30 @@ void test_attach_probe(void) if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2")) goto cleanup; + /* sleepable kprobes should not attach successfully */ + skel->links.handle_kprobe_sleepable = bpf_program__attach(skel->progs.handle_kprobe_sleepable); + if (!ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable")) + goto cleanup; + + /* test sleepable uprobe and uretprobe variants */ + skel->links.handle_uprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uprobe_byname3_sleepable); + if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3_sleepable, "attach_uprobe_byname3_sleepable")) + goto cleanup; + + skel->links.handle_uprobe_byname3 = bpf_program__attach(skel->progs.handle_uprobe_byname3); + if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3, "attach_uprobe_byname3")) + goto cleanup; + + skel->links.handle_uretprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uretprobe_byname3_sleepable); + if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3_sleepable, "attach_uretprobe_byname3_sleepable")) + goto cleanup; + + skel->links.handle_uretprobe_byname3 = bpf_program__attach(skel->progs.handle_uretprobe_byname3); + if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3, "attach_uretprobe_byname3")) + goto cleanup; + + skel->bss->user_ptr = test_data; + /* trigger & validate kprobe && kretprobe */ usleep(1); @@ -164,6 +204,9 @@ void test_attach_probe(void) /* trigger & validate uprobe attached by name */ trigger_func2(); + /* trigger & validate sleepable uprobe attached by name */ + trigger_func3(); + ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res"); ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); @@ -174,6 +217,10 @@ void test_attach_probe(void) ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res"); ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res"); ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res"); + ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res"); + ASSERT_EQ(skel->bss->uprobe_byname3_res, 10, "check_uprobe_byname3_res"); + ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 11, "check_uretprobe_byname3_sleepable_res"); + ASSERT_EQ(skel->bss->uretprobe_byname3_res, 12, "check_uretprobe_byname3_res"); cleanup: test_attach_probe__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index ba5bde53d418..edb387163baa 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -2897,26 +2897,6 @@ static struct btf_raw_test raw_tests[] = { }, { - .descr = "invalid enum kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 1, 1), 4), /* [2] */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "enum_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - -{ .descr = "valid fwd kind_flag", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ @@ -4072,6 +4052,42 @@ static struct btf_raw_test raw_tests[] = { .btf_load_err = true, .err_str = "Type tags don't precede modifiers", }, +{ + .descr = "enum64 test #1, unsigned, size 8", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [2] */ + BTF_ENUM64_ENC(NAME_TBD, 0, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0a\0b\0c"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 8, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, +}, +{ + .descr = "enum64 test #2, signed, size 4", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 1, 2), 4), /* [2] */ + BTF_ENUM64_ENC(NAME_TBD, -1, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0a\0b\0c"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, +}, }; /* struct btf_raw_test raw_tests[] */ @@ -7000,9 +7016,12 @@ static struct btf_dedup_test dedup_tests[] = { BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */ + BTF_ENUM64_ENC(NAME_TBD, 0, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 1), BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"), }, .expect = { .raw_types = { @@ -7030,9 +7049,12 @@ static struct btf_dedup_test dedup_tests[] = { BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */ + BTF_ENUM64_ENC(NAME_TBD, 0, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 1), BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"), }, }, { @@ -7493,6 +7515,91 @@ static struct btf_dedup_test dedup_tests[] = { BTF_STR_SEC("\0tag1\0t\0m"), }, }, +{ + .descr = "dedup: enum64, standalone", + .input = { + .raw_types = { + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, + .expect = { + .raw_types = { + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, +}, +{ + .descr = "dedup: enum64, fwd resolution", + .input = { + .raw_types = { + /* [1] fwd enum64 'e1' before full enum */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + /* [2] full enum64 'e1' after fwd */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + /* [3] full enum64 'e2' before fwd */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(4), 0, 456), + /* [4] fwd enum64 'e2' after full enum */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + /* [5] incompatible full enum64 with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 0, 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, + .expect = { + .raw_types = { + /* [1] full enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + /* [2] full enum64 'e2' */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(4), 0, 456), + /* [3] incompatible full enum64 with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 0, 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, +}, +{ + .descr = "dedup: enum and enum64, no dedup", + .input = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, + .expect = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, +}, }; @@ -7517,6 +7624,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(__u32); case BTF_KIND_ENUM: return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ENUM64: + return base_size + vlen * sizeof(struct btf_enum64); case BTF_KIND_ARRAY: return base_size + sizeof(struct btf_array); case BTF_KIND_STRUCT: diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index addf99c05896..6e36de1302fc 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -9,6 +9,7 @@ static void gen_btf(struct btf *btf) const struct btf_var_secinfo *vi; const struct btf_type *t; const struct btf_member *m; + const struct btf_enum64 *v64; const struct btf_enum *v; const struct btf_param *p; int id, err, str_off; @@ -171,7 +172,7 @@ static void gen_btf(struct btf *btf) ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name"); ASSERT_EQ(v->val, 2, "v2_val"); ASSERT_STREQ(btf_type_raw_dump(btf, 9), - "[9] ENUM 'e1' size=4 vlen=2\n" + "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", "raw_dump"); @@ -202,7 +203,7 @@ static void gen_btf(struct btf *btf) ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind"); ASSERT_EQ(t->size, 4, "enum_fwd_sz"); ASSERT_STREQ(btf_type_raw_dump(btf, 12), - "[12] ENUM 'enum_fwd' size=4 vlen=0", "raw_dump"); + "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "raw_dump"); /* TYPEDEF */ id = btf__add_typedef(btf, "typedef1", 1); @@ -307,6 +308,48 @@ static void gen_btf(struct btf *btf) ASSERT_EQ(t->type, 1, "tag_type"); ASSERT_STREQ(btf_type_raw_dump(btf, 20), "[20] TYPE_TAG 'tag1' type_id=1", "raw_dump"); + + /* ENUM64 */ + id = btf__add_enum64(btf, "e1", 8, true); + ASSERT_EQ(id, 21, "enum64_id"); + err = btf__add_enum64_value(btf, "v1", -1); + ASSERT_OK(err, "v1_res"); + err = btf__add_enum64_value(btf, "v2", 0x123456789); /* 4886718345 */ + ASSERT_OK(err, "v2_res"); + t = btf__type_by_id(btf, 21); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind"); + ASSERT_EQ(btf_vlen(t), 2, "enum64_vlen"); + ASSERT_EQ(t->size, 8, "enum64_sz"); + v64 = btf_enum64(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name"); + ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val"); + ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val"); + v64 = btf_enum64(t) + 1; + ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v2", "v2_name"); + ASSERT_EQ(v64->val_hi32, 0x1, "v2_val"); + ASSERT_EQ(v64->val_lo32, 0x23456789, "v2_val"); + ASSERT_STREQ(btf_type_raw_dump(btf, 21), + "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", "raw_dump"); + + id = btf__add_enum64(btf, "e1", 8, false); + ASSERT_EQ(id, 22, "enum64_id"); + err = btf__add_enum64_value(btf, "v1", 0xffffffffFFFFFFFF); /* 18446744073709551615 */ + ASSERT_OK(err, "v1_res"); + t = btf__type_by_id(btf, 22); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind"); + ASSERT_EQ(btf_vlen(t), 1, "enum64_vlen"); + ASSERT_EQ(t->size, 8, "enum64_sz"); + v64 = btf_enum64(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name"); + ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val"); + ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val"); + ASSERT_STREQ(btf_type_raw_dump(btf, 22), + "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615", "raw_dump"); } static void test_btf_add() @@ -332,12 +375,12 @@ static void test_btf_add() "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "[8] UNION 'u1' size=8 vlen=1\n" "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", - "[9] ENUM 'e1' size=4 vlen=2\n" + "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", "[10] FWD 'struct_fwd' fwd_kind=struct", "[11] FWD 'union_fwd' fwd_kind=union", - "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "[13] TYPEDEF 'typedef1' type_id=1", "[14] FUNC 'func1' type_id=15 linkage=global", "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" @@ -348,7 +391,12 @@ static void test_btf_add() "\ttype_id=1 offset=4 size=8", "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", - "[20] TYPE_TAG 'tag1' type_id=1"); + "[20] TYPE_TAG 'tag1' type_id=1", + "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", + "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615"); btf__free(btf); } @@ -370,7 +418,7 @@ static void test_btf_add_btf() gen_btf(btf2); id = btf__add_btf(btf1, btf2); - if (!ASSERT_EQ(id, 21, "id")) + if (!ASSERT_EQ(id, 23, "id")) goto cleanup; VALIDATE_RAW_BTF( @@ -386,12 +434,12 @@ static void test_btf_add_btf() "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "[8] UNION 'u1' size=8 vlen=1\n" "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", - "[9] ENUM 'e1' size=4 vlen=2\n" + "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", "[10] FWD 'struct_fwd' fwd_kind=struct", "[11] FWD 'union_fwd' fwd_kind=union", - "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "[13] TYPEDEF 'typedef1' type_id=1", "[14] FUNC 'func1' type_id=15 linkage=global", "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" @@ -403,36 +451,46 @@ static void test_btf_add_btf() "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "[20] TYPE_TAG 'tag1' type_id=1", + "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", + "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615", /* types appended from the second BTF */ - "[21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[22] PTR '(anon)' type_id=21", - "[23] CONST '(anon)' type_id=25", - "[24] VOLATILE '(anon)' type_id=23", - "[25] RESTRICT '(anon)' type_id=24", - "[26] ARRAY '(anon)' type_id=22 index_type_id=21 nr_elems=10", - "[27] STRUCT 's1' size=8 vlen=2\n" - "\t'f1' type_id=21 bits_offset=0\n" - "\t'f2' type_id=21 bits_offset=32 bitfield_size=16", - "[28] UNION 'u1' size=8 vlen=1\n" - "\t'f1' type_id=21 bits_offset=0 bitfield_size=16", - "[29] ENUM 'e1' size=4 vlen=2\n" + "[23] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[24] PTR '(anon)' type_id=23", + "[25] CONST '(anon)' type_id=27", + "[26] VOLATILE '(anon)' type_id=25", + "[27] RESTRICT '(anon)' type_id=26", + "[28] ARRAY '(anon)' type_id=24 index_type_id=23 nr_elems=10", + "[29] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=23 bits_offset=0\n" + "\t'f2' type_id=23 bits_offset=32 bitfield_size=16", + "[30] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=23 bits_offset=0 bitfield_size=16", + "[31] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", - "[30] FWD 'struct_fwd' fwd_kind=struct", - "[31] FWD 'union_fwd' fwd_kind=union", - "[32] ENUM 'enum_fwd' size=4 vlen=0", - "[33] TYPEDEF 'typedef1' type_id=21", - "[34] FUNC 'func1' type_id=35 linkage=global", - "[35] FUNC_PROTO '(anon)' ret_type_id=21 vlen=2\n" - "\t'p1' type_id=21\n" - "\t'p2' type_id=22", - "[36] VAR 'var1' type_id=21, linkage=global-alloc", - "[37] DATASEC 'datasec1' size=12 vlen=1\n" - "\ttype_id=21 offset=4 size=8", - "[38] DECL_TAG 'tag1' type_id=36 component_idx=-1", - "[39] DECL_TAG 'tag2' type_id=34 component_idx=1", - "[40] TYPE_TAG 'tag1' type_id=21"); + "[32] FWD 'struct_fwd' fwd_kind=struct", + "[33] FWD 'union_fwd' fwd_kind=union", + "[34] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", + "[35] TYPEDEF 'typedef1' type_id=23", + "[36] FUNC 'func1' type_id=37 linkage=global", + "[37] FUNC_PROTO '(anon)' ret_type_id=23 vlen=2\n" + "\t'p1' type_id=23\n" + "\t'p2' type_id=24", + "[38] VAR 'var1' type_id=23, linkage=global-alloc", + "[39] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=23 offset=4 size=8", + "[40] DECL_TAG 'tag1' type_id=38 component_idx=-1", + "[41] DECL_TAG 'tag2' type_id=36 component_idx=1", + "[42] TYPE_TAG 'tag1' type_id=23", + "[43] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", + "[44] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615"); cleanup: btf__free(btf1); diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 3712dfe1be59..2f92feb809be 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -84,6 +84,7 @@ static int duration = 0; #define NESTING_ERR_CASE(name) { \ NESTING_CASE_COMMON(name), \ .fails = true, \ + .run_btfgen_fails = true, \ } #define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ @@ -258,12 +259,14 @@ static int duration = 0; BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \ "probed:", name), \ .fails = true, \ + .run_btfgen_fails = true, \ .raw_tp_name = "sys_enter", \ .prog_name = "test_core_bitfields", \ }, { \ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ "direct:", name), \ .fails = true, \ + .run_btfgen_fails = true, \ .prog_name = "test_core_bitfields_direct", \ } @@ -304,6 +307,7 @@ static int duration = 0; #define SIZE_ERR_CASE(name) { \ SIZE_CASE_COMMON(name), \ .fails = true, \ + .run_btfgen_fails = true, \ } #define TYPE_BASED_CASE_COMMON(name) \ @@ -363,6 +367,25 @@ static int duration = 0; .fails = true, \ } +#define ENUM64VAL_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_enum64val.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_enum64val" + +#define ENUM64VAL_CASE(name, ...) { \ + ENUM64VAL_CASE_COMMON(name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_enum64val_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_enum64val_output), \ +} + +#define ENUM64VAL_ERR_CASE(name) { \ + ENUM64VAL_CASE_COMMON(name), \ + .fails = true, \ +} + struct core_reloc_test_case; typedef int (*setup_test_fn)(struct core_reloc_test_case *test); @@ -377,6 +400,7 @@ struct core_reloc_test_case { const char *output; int output_len; bool fails; + bool run_btfgen_fails; bool needs_testmod; bool relaxed_core_relocs; const char *prog_name; @@ -831,6 +855,45 @@ static const struct core_reloc_test_case test_cases[] = { .anon_val2 = 0x222, }), ENUMVAL_ERR_CASE(enumval___err_missing), + + /* 64bit enumerator value existence and value relocations */ + ENUM64VAL_CASE(enum64val, { + .unsigned_val1_exists = true, + .unsigned_val2_exists = true, + .unsigned_val3_exists = true, + .signed_val1_exists = true, + .signed_val2_exists = true, + .signed_val3_exists = true, + .unsigned_val1 = 0x1ffffffffULL, + .unsigned_val2 = 0x2, + .signed_val1 = 0x1ffffffffLL, + .signed_val2 = -2, + }), + ENUM64VAL_CASE(enum64val___diff, { + .unsigned_val1_exists = true, + .unsigned_val2_exists = true, + .unsigned_val3_exists = true, + .signed_val1_exists = true, + .signed_val2_exists = true, + .signed_val3_exists = true, + .unsigned_val1 = 0x101ffffffffULL, + .unsigned_val2 = 0x202ffffffffULL, + .signed_val1 = -101, + .signed_val2 = -202, + }), + ENUM64VAL_CASE(enum64val___val3_missing, { + .unsigned_val1_exists = true, + .unsigned_val2_exists = true, + .unsigned_val3_exists = false, + .signed_val1_exists = true, + .signed_val2_exists = true, + .signed_val3_exists = false, + .unsigned_val1 = 0x111ffffffffULL, + .unsigned_val2 = 0x222, + .signed_val1 = 0x111ffffffffLL, + .signed_val2 = -222, + }), + ENUM64VAL_ERR_CASE(enum64val___err_missing), }; struct data { @@ -894,7 +957,7 @@ static void run_core_reloc_tests(bool use_btfgen) /* generate a "minimal" BTF file and use it as source */ if (use_btfgen) { - if (!test_case->btf_src_file || test_case->fails) { + if (!test_case->btf_src_file || test_case->run_btfgen_fails) { test__skip(); continue; } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index a7e74297f15f..5a7e6011f6bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -7,11 +7,9 @@ void serial_test_fexit_stress(void) { - char test_skb[128] = {}; int fexit_fd[CNT] = {}; int link_fd[CNT] = {}; - char error[4096]; - int err, i, filter_fd; + int err, i; const struct bpf_insn trace_program[] = { BPF_MOV64_IMM(BPF_REG_0, 0), @@ -20,25 +18,9 @@ void serial_test_fexit_stress(void) LIBBPF_OPTS(bpf_prog_load_opts, trace_opts, .expected_attach_type = BPF_TRACE_FEXIT, - .log_buf = error, - .log_size = sizeof(error), ); - const struct bpf_insn skb_program[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - - LIBBPF_OPTS(bpf_prog_load_opts, skb_opts, - .log_buf = error, - .log_size = sizeof(error), - ); - - LIBBPF_OPTS(bpf_test_run_opts, topts, - .data_in = test_skb, - .data_size_in = sizeof(test_skb), - .repeat = 1, - ); + LIBBPF_OPTS(bpf_test_run_opts, topts); err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", trace_opts.expected_attach_type); @@ -58,15 +40,9 @@ void serial_test_fexit_stress(void) goto out; } - filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", - skb_program, sizeof(skb_program) / sizeof(struct bpf_insn), - &skb_opts); - if (!ASSERT_GE(filter_fd, 0, "test_program_loaded")) - goto out; + err = bpf_prog_test_run_opts(fexit_fd[0], &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); - err = bpf_prog_test_run_opts(filter_fd, &topts); - close(filter_fd); - CHECK_FAIL(err); out: for (i = 0; i < CNT; i++) { if (link_fd[i]) diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c new file mode 100644 index 000000000000..93e9cddaadcf --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <ctype.h> +#include <test_progs.h> +#include <bpf/btf.h> + +/* + * Utility function uppercasing an entire string. + */ +static void uppercase(char *s) +{ + for (; *s != '\0'; s++) + *s = toupper(*s); +} + +/* + * Test case to check that all bpf_attach_type variants are covered by + * libbpf_bpf_attach_type_str. + */ +static void test_libbpf_bpf_attach_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_attach_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_attach_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_attach_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_attach_type attach_type = (enum bpf_attach_type)e->val; + const char *attach_type_name; + const char *attach_type_str; + char buf[256]; + + if (attach_type == __MAX_BPF_ATTACH_TYPE) + continue; + + attach_type_name = btf__str_by_offset(btf, e->name_off); + attach_type_str = libbpf_bpf_attach_type_str(attach_type); + ASSERT_OK_PTR(attach_type_str, attach_type_name); + + snprintf(buf, sizeof(buf), "BPF_%s", attach_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, attach_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + +/* + * Test case to check that all bpf_link_type variants are covered by + * libbpf_bpf_link_type_str. + */ +static void test_libbpf_bpf_link_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_link_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_link_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_link_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_link_type link_type = (enum bpf_link_type)e->val; + const char *link_type_name; + const char *link_type_str; + char buf[256]; + + if (link_type == MAX_BPF_LINK_TYPE) + continue; + + link_type_name = btf__str_by_offset(btf, e->name_off); + link_type_str = libbpf_bpf_link_type_str(link_type); + ASSERT_OK_PTR(link_type_str, link_type_name); + + snprintf(buf, sizeof(buf), "BPF_LINK_TYPE_%s", link_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, link_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + +/* + * Test case to check that all bpf_map_type variants are covered by + * libbpf_bpf_map_type_str. + */ +static void test_libbpf_bpf_map_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_map_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_map_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_map_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_map_type map_type = (enum bpf_map_type)e->val; + const char *map_type_name; + const char *map_type_str; + char buf[256]; + + map_type_name = btf__str_by_offset(btf, e->name_off); + map_type_str = libbpf_bpf_map_type_str(map_type); + ASSERT_OK_PTR(map_type_str, map_type_name); + + snprintf(buf, sizeof(buf), "BPF_MAP_TYPE_%s", map_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, map_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + +/* + * Test case to check that all bpf_prog_type variants are covered by + * libbpf_bpf_prog_type_str. + */ +static void test_libbpf_bpf_prog_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_prog_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_prog_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_prog_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_prog_type prog_type = (enum bpf_prog_type)e->val; + const char *prog_type_name; + const char *prog_type_str; + char buf[256]; + + prog_type_name = btf__str_by_offset(btf, e->name_off); + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + ASSERT_OK_PTR(prog_type_str, prog_type_name); + + snprintf(buf, sizeof(buf), "BPF_PROG_TYPE_%s", prog_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, prog_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + +/* + * Run all libbpf str conversion tests. + */ +void test_libbpf_str(void) +{ + if (test__start_subtest("bpf_attach_type_str")) + test_libbpf_bpf_attach_type_str(); + + if (test__start_subtest("bpf_link_type_str")) + test_libbpf_bpf_link_type_str(); + + if (test__start_subtest("bpf_map_type_str")) + test_libbpf_bpf_map_type_str(); + + if (test__start_subtest("bpf_prog_type_str")) + test_libbpf_bpf_prog_type_str(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 958dae769c52..cb6a53b3e023 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -646,7 +646,7 @@ static void test_tcp_clear_dtime(struct test_tc_dtime *skel) __u32 *errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 0); + test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t); ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, dtime_cnt_str(t, INGRESS_FWDNS_P100)); @@ -683,7 +683,7 @@ static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(family, SOCK_STREAM, addr, 0); + test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t); /* fwdns_prio100 prog does not read delivery_time_type, so * kernel puts the (rcv) timetamp in __sk_buff->tstamp @@ -715,13 +715,13 @@ static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(family, SOCK_DGRAM, addr, 0); + test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t); ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, dtime_cnt_str(t, INGRESS_FWDNS_P100)); /* non mono delivery time is not forwarded */ ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, - dtime_cnt_str(t, INGRESS_FWDNS_P100)); + dtime_cnt_str(t, INGRESS_FWDNS_P101)); for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++) ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c new file mode 100644 index 000000000000..fb77a123fe89 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#define _GNU_SOURCE +#include <test_progs.h> +#include <network_helpers.h> +#include <ctype.h> + +#define CMD_OUT_BUF_SIZE 1023 + +#define SYS(cmd) ({ \ + if (!ASSERT_OK(system(cmd), (cmd))) \ + goto out; \ +}) + +#define SYS_OUT(cmd, ...) ({ \ + char buf[1024]; \ + snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \ + FILE *f = popen(buf, "r"); \ + if (!ASSERT_OK_PTR(f, buf)) \ + goto out; \ + f; \ +}) + +/* out must be at least `size * 4 + 1` bytes long */ +static void escape_str(char *out, const char *in, size_t size) +{ + static const char *hex = "0123456789ABCDEF"; + size_t i; + + for (i = 0; i < size; i++) { + if (isprint(in[i]) && in[i] != '\\' && in[i] != '\'') { + *out++ = in[i]; + } else { + *out++ = '\\'; + *out++ = 'x'; + *out++ = hex[(in[i] >> 4) & 0xf]; + *out++ = hex[in[i] & 0xf]; + } + } + *out++ = '\0'; +} + +static bool expect_str(char *buf, size_t size, const char *str, const char *name) +{ + static char escbuf_expected[CMD_OUT_BUF_SIZE * 4]; + static char escbuf_actual[CMD_OUT_BUF_SIZE * 4]; + static int duration = 0; + bool ok; + + ok = size == strlen(str) && !memcmp(buf, str, size); + + if (!ok) { + escape_str(escbuf_expected, str, strlen(str)); + escape_str(escbuf_actual, buf, size); + } + CHECK(!ok, name, "unexpected %s: actual '%s' != expected '%s'\n", + name, escbuf_actual, escbuf_expected); + + return ok; +} + +static void test_synproxy(bool xdp) +{ + int server_fd = -1, client_fd = -1, accept_fd = -1; + char *prog_id, *prog_id_end; + struct nstoken *ns = NULL; + FILE *ctrl_file = NULL; + char buf[CMD_OUT_BUF_SIZE]; + size_t size; + + SYS("ip netns add synproxy"); + + SYS("ip link add tmp0 type veth peer name tmp1"); + SYS("ip link set tmp1 netns synproxy"); + SYS("ip link set tmp0 up"); + SYS("ip addr replace 198.18.0.1/24 dev tmp0"); + + /* When checksum offload is enabled, the XDP program sees wrong + * checksums and drops packets. + */ + SYS("ethtool -K tmp0 tx off"); + if (xdp) + /* Workaround required for veth. */ + SYS("ip link set tmp0 xdp object xdp_dummy.o section xdp 2> /dev/null"); + + ns = open_netns("synproxy"); + if (!ASSERT_OK_PTR(ns, "setns")) + goto out; + + SYS("ip link set lo up"); + SYS("ip link set tmp1 up"); + SYS("ip addr replace 198.18.0.2/24 dev tmp1"); + SYS("sysctl -w net.ipv4.tcp_syncookies=2"); + SYS("sysctl -w net.ipv4.tcp_timestamps=1"); + SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); + SYS("iptables -t raw -I PREROUTING \ + -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack"); + SYS("iptables -t filter -A INPUT \ + -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \ + -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460"); + SYS("iptables -t filter -A INPUT \ + -i tmp1 -m state --state INVALID -j DROP"); + + ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \ + --single --mss4 1460 --mss6 1440 \ + --wscale 7 --ttl 64%s", xdp ? "" : " --tc"); + size = fread(buf, 1, sizeof(buf), ctrl_file); + pclose(ctrl_file); + if (!expect_str(buf, size, "Total SYNACKs generated: 0\n", + "initial SYNACKs")) + goto out; + + if (!xdp) { + ctrl_file = SYS_OUT("tc filter show dev tmp1 ingress"); + size = fread(buf, 1, sizeof(buf), ctrl_file); + pclose(ctrl_file); + prog_id = memmem(buf, size, " id ", 4); + if (!ASSERT_OK_PTR(prog_id, "find prog id")) + goto out; + prog_id += 4; + if (!ASSERT_LT(prog_id, buf + size, "find prog id begin")) + goto out; + prog_id_end = prog_id; + while (prog_id_end < buf + size && *prog_id_end >= '0' && + *prog_id_end <= '9') + prog_id_end++; + if (!ASSERT_LT(prog_id_end, buf + size, "find prog id end")) + goto out; + *prog_id_end = '\0'; + } + + server_fd = start_server(AF_INET, SOCK_STREAM, "198.18.0.2", 8080, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) + goto out; + + close_netns(ns); + ns = NULL; + + client_fd = connect_to_fd(server_fd, 10000); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto out; + + accept_fd = accept(server_fd, NULL, NULL); + if (!ASSERT_GE(accept_fd, 0, "accept")) + goto out; + + ns = open_netns("synproxy"); + if (!ASSERT_OK_PTR(ns, "setns")) + goto out; + + if (xdp) + ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single"); + else + ctrl_file = SYS_OUT("./xdp_synproxy --prog %s --single", + prog_id); + size = fread(buf, 1, sizeof(buf), ctrl_file); + pclose(ctrl_file); + if (!expect_str(buf, size, "Total SYNACKs generated: 1\n", + "SYNACKs after connection")) + goto out; + +out: + if (accept_fd >= 0) + close(accept_fd); + if (client_fd >= 0) + close(client_fd); + if (server_fd >= 0) + close(server_fd); + if (ns) + close_netns(ns); + + system("ip link del tmp0"); + system("ip netns del synproxy"); +} + +void test_xdp_synproxy(void) +{ + if (test__start_subtest("xdp")) + test_synproxy(true); + if (test__start_subtest("tc")) + test_synproxy(false); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c new file mode 100644 index 000000000000..56957557e3e1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +#define MAX_ENTRIES 1000 + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_ENTRIES); +} hash_map_bench SEC(".maps"); + +u64 __attribute__((__aligned__(256))) percpu_time[256]; +u64 nr_loops; + +static int loop_update_callback(__u32 index, u32 *key) +{ + u64 init_val = 1; + + bpf_map_update_elem(&hash_map_bench, key, &init_val, BPF_ANY); + return 0; +} + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int benchmark(void *ctx) +{ + u32 cpu = bpf_get_smp_processor_id(); + u32 key = cpu + MAX_ENTRIES; + u64 start_time = bpf_ktime_get_ns(); + + bpf_loop(nr_loops, loop_update_callback, &key, 0); + percpu_time[cpu & 255] = bpf_ktime_get_ns() - start_time; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c new file mode 100644 index 000000000000..888e79db6a77 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c new file mode 100644 index 000000000000..194749130d87 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val___diff x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c new file mode 100644 index 000000000000..3d732d4193e4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val___err_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c new file mode 100644 index 000000000000..17cf5d6a848d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val___val3_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index f9dc9766546e..26e103302c05 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -1117,6 +1117,20 @@ struct core_reloc_enumval_output { int anon_val2; }; +struct core_reloc_enum64val_output { + bool unsigned_val1_exists; + bool unsigned_val2_exists; + bool unsigned_val3_exists; + bool signed_val1_exists; + bool signed_val2_exists; + bool signed_val3_exists; + + long unsigned_val1; + long unsigned_val2; + long signed_val1; + long signed_val2; +}; + enum named_enum { NAMED_ENUM_VAL1 = 1, NAMED_ENUM_VAL2 = 2, @@ -1134,6 +1148,23 @@ struct core_reloc_enumval { anon_enum f2; }; +enum named_unsigned_enum64 { + UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL, + UNSIGNED_ENUM64_VAL2 = 0x2, + UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL, +}; + +enum named_signed_enum64 { + SIGNED_ENUM64_VAL1 = 0x1ffffffffLL, + SIGNED_ENUM64_VAL2 = -2, + SIGNED_ENUM64_VAL3 = 0x3ffffffffLL, +}; + +struct core_reloc_enum64val { + enum named_unsigned_enum64 f1; + enum named_signed_enum64 f2; +}; + /* differing enumerator values */ enum named_enum___diff { NAMED_ENUM_VAL1___diff = 101, @@ -1152,6 +1183,23 @@ struct core_reloc_enumval___diff { anon_enum___diff f2; }; +enum named_unsigned_enum64___diff { + UNSIGNED_ENUM64_VAL1___diff = 0x101ffffffffULL, + UNSIGNED_ENUM64_VAL2___diff = 0x202ffffffffULL, + UNSIGNED_ENUM64_VAL3___diff = 0x303ffffffffULL, +}; + +enum named_signed_enum64___diff { + SIGNED_ENUM64_VAL1___diff = -101, + SIGNED_ENUM64_VAL2___diff = -202, + SIGNED_ENUM64_VAL3___diff = -303, +}; + +struct core_reloc_enum64val___diff { + enum named_unsigned_enum64___diff f1; + enum named_signed_enum64___diff f2; +}; + /* missing (optional) third enum value */ enum named_enum___val3_missing { NAMED_ENUM_VAL1___val3_missing = 111, @@ -1168,6 +1216,21 @@ struct core_reloc_enumval___val3_missing { anon_enum___val3_missing f2; }; +enum named_unsigned_enum64___val3_missing { + UNSIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffULL, + UNSIGNED_ENUM64_VAL2___val3_missing = 0x222, +}; + +enum named_signed_enum64___val3_missing { + SIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffLL, + SIGNED_ENUM64_VAL2___val3_missing = -222, +}; + +struct core_reloc_enum64val___val3_missing { + enum named_unsigned_enum64___val3_missing f1; + enum named_signed_enum64___val3_missing f2; +}; + /* missing (mandatory) second enum value, should fail */ enum named_enum___err_missing { NAMED_ENUM_VAL1___err_missing = 1, @@ -1183,3 +1246,18 @@ struct core_reloc_enumval___err_missing { enum named_enum___err_missing f1; anon_enum___err_missing f2; }; + +enum named_unsigned_enum64___err_missing { + UNSIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffULL, + UNSIGNED_ENUM64_VAL3___err_missing = 0x3ffffffffULL, +}; + +enum named_signed_enum64___err_missing { + SIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffLL, + SIGNED_ENUM64_VAL3___err_missing = -3, +}; + +struct core_reloc_enum64val___err_missing { + enum named_unsigned_enum64___err_missing f1; + enum named_signed_enum64___err_missing f2; +}; diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index ce9acf4db8d2..f1c88ad368ef 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -5,6 +5,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <stdbool.h> #include "bpf_misc.h" int kprobe_res = 0; @@ -17,6 +18,11 @@ int uprobe_byname_res = 0; int uretprobe_byname_res = 0; int uprobe_byname2_res = 0; int uretprobe_byname2_res = 0; +int uprobe_byname3_sleepable_res = 0; +int uprobe_byname3_res = 0; +int uretprobe_byname3_sleepable_res = 0; +int uretprobe_byname3_res = 0; +void *user_ptr = 0; SEC("kprobe") int handle_kprobe(struct pt_regs *ctx) @@ -32,6 +38,17 @@ int BPF_KPROBE(handle_kprobe_auto) return 0; } +/** + * This program will be manually made sleepable on the userspace side + * and should thus be unattachable. + */ +SEC("kprobe/" SYS_PREFIX "sys_nanosleep") +int handle_kprobe_sleepable(struct pt_regs *ctx) +{ + kprobe_res = 2; + return 0; +} + SEC("kretprobe") int handle_kretprobe(struct pt_regs *ctx) { @@ -93,4 +110,47 @@ int handle_uretprobe_byname2(struct pt_regs *ctx) return 0; } +static __always_inline bool verify_sleepable_user_copy(void) +{ + char data[9]; + + bpf_copy_from_user(data, sizeof(data), user_ptr); + return bpf_strncmp(data, sizeof(data), "test_data") == 0; +} + +SEC("uprobe.s//proc/self/exe:trigger_func3") +int handle_uprobe_byname3_sleepable(struct pt_regs *ctx) +{ + if (verify_sleepable_user_copy()) + uprobe_byname3_sleepable_res = 9; + return 0; +} + +/** + * same target as the uprobe.s above to force sleepable and non-sleepable + * programs in the same bpf_prog_array + */ +SEC("uprobe//proc/self/exe:trigger_func3") +int handle_uprobe_byname3(struct pt_regs *ctx) +{ + uprobe_byname3_res = 10; + return 0; +} + +SEC("uretprobe.s//proc/self/exe:trigger_func3") +int handle_uretprobe_byname3_sleepable(struct pt_regs *ctx) +{ + if (verify_sleepable_user_copy()) + uretprobe_byname3_sleepable_res = 11; + return 0; +} + +SEC("uretprobe//proc/self/exe:trigger_func3") +int handle_uretprobe_byname3(struct pt_regs *ctx) +{ + uretprobe_byname3_res = 12; + return 0; +} + + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c new file mode 100644 index 000000000000..63147fbfae6e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <stdint.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + bool skip; +} data = {}; + +enum named_unsigned_enum64 { + UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL, + UNSIGNED_ENUM64_VAL2 = 0x2ffffffffULL, + UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL, +}; + +enum named_signed_enum64 { + SIGNED_ENUM64_VAL1 = 0x1ffffffffLL, + SIGNED_ENUM64_VAL2 = -2, + SIGNED_ENUM64_VAL3 = 0x3ffffffffLL, +}; + +struct core_reloc_enum64val_output { + bool unsigned_val1_exists; + bool unsigned_val2_exists; + bool unsigned_val3_exists; + bool signed_val1_exists; + bool signed_val2_exists; + bool signed_val3_exists; + + long unsigned_val1; + long unsigned_val2; + long signed_val1; + long signed_val2; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_enum64val(void *ctx) +{ +#if __clang_major__ >= 15 + struct core_reloc_enum64val_output *out = (void *)&data.out; + enum named_unsigned_enum64 named_unsigned = 0; + enum named_signed_enum64 named_signed = 0; + + out->unsigned_val1_exists = bpf_core_enum_value_exists(named_unsigned, UNSIGNED_ENUM64_VAL1); + out->unsigned_val2_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL2); + out->unsigned_val3_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL3); + out->signed_val1_exists = bpf_core_enum_value_exists(named_signed, SIGNED_ENUM64_VAL1); + out->signed_val2_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL2); + out->signed_val3_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL3); + + out->unsigned_val1 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL1); + out->unsigned_val2 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL2); + out->signed_val1 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL1); + out->signed_val2 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL2); + /* NAMED_ENUM64_VAL3 value is optional */ + +#else + data.skip = true; +#endif + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c index 06f300d06dbd..b596479a9ebe 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c +++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c @@ -11,6 +11,8 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> #include <sys/socket.h> @@ -115,6 +117,19 @@ static bool bpf_fwd(void) return test < TCP_IP4_RT_FWD; } +static __u8 get_proto(void) +{ + switch (test) { + case UDP_IP4: + case UDP_IP6: + case UDP_IP4_RT_FWD: + case UDP_IP6_RT_FWD: + return IPPROTO_UDP; + default: + return IPPROTO_TCP; + } +} + /* -1: parse error: TC_ACT_SHOT * 0: not testing traffic: TC_ACT_OK * >0: first byte is the inet_proto, second byte has the netns @@ -122,11 +137,16 @@ static bool bpf_fwd(void) */ static int skb_get_type(struct __sk_buff *skb) { + __u16 dst_ns_port = __bpf_htons(50000 + test); void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); __u8 inet_proto = 0, ns = 0; struct ipv6hdr *ip6h; + __u16 sport, dport; struct iphdr *iph; + struct tcphdr *th; + struct udphdr *uh; + void *trans; switch (skb->protocol) { case __bpf_htons(ETH_P_IP): @@ -138,6 +158,7 @@ static int skb_get_type(struct __sk_buff *skb) else if (iph->saddr == ip4_dst) ns = DST_NS; inet_proto = iph->protocol; + trans = iph + 1; break; case __bpf_htons(ETH_P_IPV6): ip6h = data + sizeof(struct ethhdr); @@ -148,15 +169,43 @@ static int skb_get_type(struct __sk_buff *skb) else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst)) ns = DST_NS; inet_proto = ip6h->nexthdr; + trans = ip6h + 1; break; default: return 0; } - if ((inet_proto != IPPROTO_TCP && inet_proto != IPPROTO_UDP) || !ns) + /* skb is not from src_ns or dst_ns. + * skb is not the testing IPPROTO. + */ + if (!ns || inet_proto != get_proto()) return 0; - return (ns << 8 | inet_proto); + switch (inet_proto) { + case IPPROTO_TCP: + th = trans; + if (th + 1 > data_end) + return -1; + sport = th->source; + dport = th->dest; + break; + case IPPROTO_UDP: + uh = trans; + if (uh + 1 > data_end) + return -1; + sport = uh->source; + dport = uh->dest; + break; + default: + return 0; + } + + /* The skb is the testing traffic */ + if ((ns == SRC_NS && dport == dst_ns_port) || + (ns == DST_NS && sport == dst_ns_port)) + return (ns << 8 | inet_proto); + + return 0; } /* format: direction@iface@netns diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c index 913acdffd90f..3987ff174f1f 100644 --- a/tools/testing/selftests/bpf/progs/test_varlen.c +++ b/tools/testing/selftests/bpf/progs/test_varlen.c @@ -41,20 +41,20 @@ int handler64_unsigned(void *regs) { int pid = bpf_get_current_pid_tgid() >> 32; void *payload = payload1; - u64 len; + long len; /* ignore irrelevant invocations */ if (test_pid != pid || !capture) return 0; len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); - if (len <= MAX_LEN) { + if (len >= 0) { payload += len; payload1_len1 = len; } len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); - if (len <= MAX_LEN) { + if (len >= 0) { payload += len; payload1_len2 = len; } @@ -123,7 +123,7 @@ int handler32_signed(void *regs) { int pid = bpf_get_current_pid_tgid() >> 32; void *payload = payload4; - int len; + long len; /* ignore irrelevant invocations */ if (test_pid != pid || !capture) diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c new file mode 100644 index 000000000000..9fd62e94b5e6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -0,0 +1,833 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <asm/errno.h> + +#define TC_ACT_OK 0 +#define TC_ACT_SHOT 2 + +#define NSEC_PER_SEC 1000000000L + +#define ETH_ALEN 6 +#define ETH_P_IP 0x0800 +#define ETH_P_IPV6 0x86DD + +#define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3]) + +#define IP_DF 0x4000 +#define IP_MF 0x2000 +#define IP_OFFSET 0x1fff + +#define NEXTHDR_TCP 6 + +#define TCPOPT_NOP 1 +#define TCPOPT_EOL 0 +#define TCPOPT_MSS 2 +#define TCPOPT_WINDOW 3 +#define TCPOPT_SACK_PERM 4 +#define TCPOPT_TIMESTAMP 8 + +#define TCPOLEN_MSS 4 +#define TCPOLEN_WINDOW 3 +#define TCPOLEN_SACK_PERM 2 +#define TCPOLEN_TIMESTAMP 10 + +#define TCP_TS_HZ 1000 +#define TS_OPT_WSCALE_MASK 0xf +#define TS_OPT_SACK (1 << 4) +#define TS_OPT_ECN (1 << 5) +#define TSBITS 6 +#define TSMASK (((__u32)1 << TSBITS) - 1) +#define TCP_MAX_WSCALE 14U + +#define IPV4_MAXLEN 60 +#define TCP_MAXLEN 60 + +#define DEFAULT_MSS4 1460 +#define DEFAULT_MSS6 1440 +#define DEFAULT_WSCALE 7 +#define DEFAULT_TTL 64 +#define MAX_ALLOWED_PORTS 8 + +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __attribute__((__packed__)) *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u64); + __uint(max_entries, 2); +} values SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u16); + __uint(max_entries, MAX_ALLOWED_PORTS); +} allowed_ports SEC(".maps"); + +extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, + struct bpf_sock_tuple *bpf_tuple, + __u32 len_tuple, + struct bpf_ct_opts *opts, + __u32 len_opts) __ksym; + +extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, + struct bpf_sock_tuple *bpf_tuple, + u32 len_tuple, + struct bpf_ct_opts *opts, + u32 len_opts) __ksym; + +extern void bpf_ct_release(struct nf_conn *ct) __ksym; + +static __always_inline void swap_eth_addr(__u8 *a, __u8 *b) +{ + __u8 tmp[ETH_ALEN]; + + __builtin_memcpy(tmp, a, ETH_ALEN); + __builtin_memcpy(a, b, ETH_ALEN); + __builtin_memcpy(b, tmp, ETH_ALEN); +} + +static __always_inline __u16 csum_fold(__u32 csum) +{ + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + return (__u16)~csum; +} + +static __always_inline __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, + __u32 csum) +{ + __u64 s = csum; + + s += (__u32)saddr; + s += (__u32)daddr; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + s += proto + len; +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + s += (proto + len) << 8; +#else +#error Unknown endian +#endif + s = (s & 0xffffffff) + (s >> 32); + s = (s & 0xffffffff) + (s >> 32); + + return csum_fold((__u32)s); +} + +static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __u32 csum) +{ + __u64 sum = csum; + int i; + +#pragma unroll + for (i = 0; i < 4; i++) + sum += (__u32)saddr->in6_u.u6_addr32[i]; + +#pragma unroll + for (i = 0; i < 4; i++) + sum += (__u32)daddr->in6_u.u6_addr32[i]; + + /* Don't combine additions to avoid 32-bit overflow. */ + sum += bpf_htonl(len); + sum += bpf_htonl(proto); + + sum = (sum & 0xffffffff) + (sum >> 32); + sum = (sum & 0xffffffff) + (sum >> 32); + + return csum_fold((__u32)sum); +} + +static __always_inline __u64 tcp_clock_ns(void) +{ + return bpf_ktime_get_ns(); +} + +static __always_inline __u32 tcp_ns_to_ts(__u64 ns) +{ + return ns / (NSEC_PER_SEC / TCP_TS_HZ); +} + +static __always_inline __u32 tcp_time_stamp_raw(void) +{ + return tcp_ns_to_ts(tcp_clock_ns()); +} + +struct tcpopt_context { + __u8 *ptr; + __u8 *end; + void *data_end; + __be32 *tsecr; + __u8 wscale; + bool option_timestamp; + bool option_sack; +}; + +static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) +{ + __u8 opcode, opsize; + + if (ctx->ptr >= ctx->end) + return 1; + if (ctx->ptr >= ctx->data_end) + return 1; + + opcode = ctx->ptr[0]; + + if (opcode == TCPOPT_EOL) + return 1; + if (opcode == TCPOPT_NOP) { + ++ctx->ptr; + return 0; + } + + if (ctx->ptr + 1 >= ctx->end) + return 1; + if (ctx->ptr + 1 >= ctx->data_end) + return 1; + opsize = ctx->ptr[1]; + if (opsize < 2) + return 1; + + if (ctx->ptr + opsize > ctx->end) + return 1; + + switch (opcode) { + case TCPOPT_WINDOW: + if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end) + ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE; + break; + case TCPOPT_TIMESTAMP: + if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) { + ctx->option_timestamp = true; + /* Client's tsval becomes our tsecr. */ + *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2)); + } + break; + case TCPOPT_SACK_PERM: + if (opsize == TCPOLEN_SACK_PERM) + ctx->option_sack = true; + break; + } + + ctx->ptr += opsize; + + return 0; +} + +static int tscookie_tcpopt_parse_batch(__u32 index, void *context) +{ + int i; + + for (i = 0; i < 7; i++) + if (tscookie_tcpopt_parse(context)) + return 1; + return 0; +} + +static __always_inline bool tscookie_init(struct tcphdr *tcp_header, + __u16 tcp_len, __be32 *tsval, + __be32 *tsecr, void *data_end) +{ + struct tcpopt_context loop_ctx = { + .ptr = (__u8 *)(tcp_header + 1), + .end = (__u8 *)tcp_header + tcp_len, + .data_end = data_end, + .tsecr = tsecr, + .wscale = TS_OPT_WSCALE_MASK, + .option_timestamp = false, + .option_sack = false, + }; + u32 cookie; + + bpf_loop(6, tscookie_tcpopt_parse_batch, &loop_ctx, 0); + + if (!loop_ctx.option_timestamp) + return false; + + cookie = tcp_time_stamp_raw() & ~TSMASK; + cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK; + if (loop_ctx.option_sack) + cookie |= TS_OPT_SACK; + if (tcp_header->ece && tcp_header->cwr) + cookie |= TS_OPT_ECN; + *tsval = bpf_htonl(cookie); + + return true; +} + +static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale, + __u8 *ttl, bool ipv6) +{ + __u32 key = 0; + __u64 *value; + + value = bpf_map_lookup_elem(&values, &key); + if (value && *value != 0) { + if (ipv6) + *mss = (*value >> 32) & 0xffff; + else + *mss = *value & 0xffff; + *wscale = (*value >> 16) & 0xf; + *ttl = (*value >> 24) & 0xff; + return; + } + + *mss = ipv6 ? DEFAULT_MSS6 : DEFAULT_MSS4; + *wscale = DEFAULT_WSCALE; + *ttl = DEFAULT_TTL; +} + +static __always_inline void values_inc_synacks(void) +{ + __u32 key = 1; + __u32 *value; + + value = bpf_map_lookup_elem(&values, &key); + if (value) + __sync_fetch_and_add(value, 1); +} + +static __always_inline bool check_port_allowed(__u16 port) +{ + __u32 i; + + for (i = 0; i < MAX_ALLOWED_PORTS; i++) { + __u32 key = i; + __u16 *value; + + value = bpf_map_lookup_elem(&allowed_ports, &key); + + if (!value) + break; + /* 0 is a terminator value. Check it first to avoid matching on + * a forbidden port == 0 and returning true. + */ + if (*value == 0) + break; + + if (*value == port) + return true; + } + + return false; +} + +struct header_pointers { + struct ethhdr *eth; + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + struct tcphdr *tcp; + __u16 tcp_len; +}; + +static __always_inline int tcp_dissect(void *data, void *data_end, + struct header_pointers *hdr) +{ + hdr->eth = data; + if (hdr->eth + 1 > data_end) + return XDP_DROP; + + switch (bpf_ntohs(hdr->eth->h_proto)) { + case ETH_P_IP: + hdr->ipv6 = NULL; + + hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth); + if (hdr->ipv4 + 1 > data_end) + return XDP_DROP; + if (hdr->ipv4->ihl * 4 < sizeof(*hdr->ipv4)) + return XDP_DROP; + if (hdr->ipv4->version != 4) + return XDP_DROP; + + if (hdr->ipv4->protocol != IPPROTO_TCP) + return XDP_PASS; + + hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4; + break; + case ETH_P_IPV6: + hdr->ipv4 = NULL; + + hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth); + if (hdr->ipv6 + 1 > data_end) + return XDP_DROP; + if (hdr->ipv6->version != 6) + return XDP_DROP; + + /* XXX: Extension headers are not supported and could circumvent + * XDP SYN flood protection. + */ + if (hdr->ipv6->nexthdr != NEXTHDR_TCP) + return XDP_PASS; + + hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6); + break; + default: + /* XXX: VLANs will circumvent XDP SYN flood protection. */ + return XDP_PASS; + } + + if (hdr->tcp + 1 > data_end) + return XDP_DROP; + hdr->tcp_len = hdr->tcp->doff * 4; + if (hdr->tcp_len < sizeof(*hdr->tcp)) + return XDP_DROP; + + return XDP_TX; +} + +static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp) +{ + struct bpf_ct_opts ct_lookup_opts = { + .netns_id = BPF_F_CURRENT_NETNS, + .l4proto = IPPROTO_TCP, + }; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + __u32 tup_size; + + if (hdr->ipv4) { + /* TCP doesn't normally use fragments, and XDP can't reassemble + * them. + */ + if ((hdr->ipv4->frag_off & bpf_htons(IP_DF | IP_MF | IP_OFFSET)) != bpf_htons(IP_DF)) + return XDP_DROP; + + tup.ipv4.saddr = hdr->ipv4->saddr; + tup.ipv4.daddr = hdr->ipv4->daddr; + tup.ipv4.sport = hdr->tcp->source; + tup.ipv4.dport = hdr->tcp->dest; + tup_size = sizeof(tup.ipv4); + } else if (hdr->ipv6) { + __builtin_memcpy(tup.ipv6.saddr, &hdr->ipv6->saddr, sizeof(tup.ipv6.saddr)); + __builtin_memcpy(tup.ipv6.daddr, &hdr->ipv6->daddr, sizeof(tup.ipv6.daddr)); + tup.ipv6.sport = hdr->tcp->source; + tup.ipv6.dport = hdr->tcp->dest; + tup_size = sizeof(tup.ipv6); + } else { + /* The verifier can't track that either ipv4 or ipv6 is not + * NULL. + */ + return XDP_ABORTED; + } + if (xdp) + ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts)); + else + ct = bpf_skb_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts)); + if (ct) { + unsigned long status = ct->status; + + bpf_ct_release(ct); + if (status & IPS_CONFIRMED_BIT) + return XDP_PASS; + } else if (ct_lookup_opts.error != -ENOENT) { + return XDP_ABORTED; + } + + /* error == -ENOENT || !(status & IPS_CONFIRMED_BIT) */ + return XDP_TX; +} + +static __always_inline __u8 tcp_mkoptions(__be32 *buf, __be32 *tsopt, __u16 mss, + __u8 wscale) +{ + __be32 *start = buf; + + *buf++ = bpf_htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); + + if (!tsopt) + return buf - start; + + if (tsopt[0] & bpf_htonl(1 << 4)) + *buf++ = bpf_htonl((TCPOPT_SACK_PERM << 24) | + (TCPOLEN_SACK_PERM << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + else + *buf++ = bpf_htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + *buf++ = tsopt[0]; + *buf++ = tsopt[1]; + + if ((tsopt[0] & bpf_htonl(0xf)) != bpf_htonl(0xf)) + *buf++ = bpf_htonl((TCPOPT_NOP << 24) | + (TCPOPT_WINDOW << 16) | + (TCPOLEN_WINDOW << 8) | + wscale); + + return buf - start; +} + +static __always_inline void tcp_gen_synack(struct tcphdr *tcp_header, + __u32 cookie, __be32 *tsopt, + __u16 mss, __u8 wscale) +{ + void *tcp_options; + + tcp_flag_word(tcp_header) = TCP_FLAG_SYN | TCP_FLAG_ACK; + if (tsopt && (tsopt[0] & bpf_htonl(1 << 5))) + tcp_flag_word(tcp_header) |= TCP_FLAG_ECE; + tcp_header->doff = 5; /* doff is part of tcp_flag_word. */ + swap(tcp_header->source, tcp_header->dest); + tcp_header->ack_seq = bpf_htonl(bpf_ntohl(tcp_header->seq) + 1); + tcp_header->seq = bpf_htonl(cookie); + tcp_header->window = 0; + tcp_header->urg_ptr = 0; + tcp_header->check = 0; /* Calculate checksum later. */ + + tcp_options = (void *)(tcp_header + 1); + tcp_header->doff += tcp_mkoptions(tcp_options, tsopt, mss, wscale); +} + +static __always_inline void tcpv4_gen_synack(struct header_pointers *hdr, + __u32 cookie, __be32 *tsopt) +{ + __u8 wscale; + __u16 mss; + __u8 ttl; + + values_get_tcpipopts(&mss, &wscale, &ttl, false); + + swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest); + + swap(hdr->ipv4->saddr, hdr->ipv4->daddr); + hdr->ipv4->check = 0; /* Calculate checksum later. */ + hdr->ipv4->tos = 0; + hdr->ipv4->id = 0; + hdr->ipv4->ttl = ttl; + + tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale); + + hdr->tcp_len = hdr->tcp->doff * 4; + hdr->ipv4->tot_len = bpf_htons(sizeof(*hdr->ipv4) + hdr->tcp_len); +} + +static __always_inline void tcpv6_gen_synack(struct header_pointers *hdr, + __u32 cookie, __be32 *tsopt) +{ + __u8 wscale; + __u16 mss; + __u8 ttl; + + values_get_tcpipopts(&mss, &wscale, &ttl, true); + + swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest); + + swap(hdr->ipv6->saddr, hdr->ipv6->daddr); + *(__be32 *)hdr->ipv6 = bpf_htonl(0x60000000); + hdr->ipv6->hop_limit = ttl; + + tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale); + + hdr->tcp_len = hdr->tcp->doff * 4; + hdr->ipv6->payload_len = bpf_htons(hdr->tcp_len); +} + +static __always_inline int syncookie_handle_syn(struct header_pointers *hdr, + void *ctx, + void *data, void *data_end, + bool xdp) +{ + __u32 old_pkt_size, new_pkt_size; + /* Unlike clang 10, clang 11 and 12 generate code that doesn't pass the + * BPF verifier if tsopt is not volatile. Volatile forces it to store + * the pointer value and use it directly, otherwise tcp_mkoptions is + * (mis)compiled like this: + * if (!tsopt) + * return buf - start; + * reg = stored_return_value_of_tscookie_init; + * if (reg) + * tsopt = tsopt_buf; + * else + * tsopt = NULL; + * ... + * *buf++ = tsopt[1]; + * It creates a dead branch where tsopt is assigned NULL, but the + * verifier can't prove it's dead and blocks the program. + */ + __be32 * volatile tsopt = NULL; + __be32 tsopt_buf[2] = {}; + __u16 ip_len; + __u32 cookie; + __s64 value; + + /* Checksum is not yet verified, but both checksum failure and TCP + * header checks return XDP_DROP, so the order doesn't matter. + */ + if (hdr->tcp->fin || hdr->tcp->rst) + return XDP_DROP; + + /* Issue SYN cookies on allowed ports, drop SYN packets on blocked + * ports. + */ + if (!check_port_allowed(bpf_ntohs(hdr->tcp->dest))) + return XDP_DROP; + + if (hdr->ipv4) { + /* Check the IPv4 and TCP checksums before creating a SYNACK. */ + value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, hdr->ipv4->ihl * 4, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_fold(value) != 0) + return XDP_DROP; /* Bad IPv4 checksum. */ + + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_tcpudp_magic(hdr->ipv4->saddr, hdr->ipv4->daddr, + hdr->tcp_len, IPPROTO_TCP, value) != 0) + return XDP_DROP; /* Bad TCP checksum. */ + + ip_len = sizeof(*hdr->ipv4); + + value = bpf_tcp_raw_gen_syncookie_ipv4(hdr->ipv4, hdr->tcp, + hdr->tcp_len); + } else if (hdr->ipv6) { + /* Check the TCP checksum before creating a SYNACK. */ + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_ipv6_magic(&hdr->ipv6->saddr, &hdr->ipv6->daddr, + hdr->tcp_len, IPPROTO_TCP, value) != 0) + return XDP_DROP; /* Bad TCP checksum. */ + + ip_len = sizeof(*hdr->ipv6); + + value = bpf_tcp_raw_gen_syncookie_ipv6(hdr->ipv6, hdr->tcp, + hdr->tcp_len); + } else { + return XDP_ABORTED; + } + + if (value < 0) + return XDP_ABORTED; + cookie = (__u32)value; + + if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, + &tsopt_buf[0], &tsopt_buf[1], data_end)) + tsopt = tsopt_buf; + + /* Check that there is enough space for a SYNACK. It also covers + * the check that the destination of the __builtin_memmove below + * doesn't overflow. + */ + if (data + sizeof(*hdr->eth) + ip_len + TCP_MAXLEN > data_end) + return XDP_ABORTED; + + if (hdr->ipv4) { + if (hdr->ipv4->ihl * 4 > sizeof(*hdr->ipv4)) { + struct tcphdr *new_tcp_header; + + new_tcp_header = data + sizeof(*hdr->eth) + sizeof(*hdr->ipv4); + __builtin_memmove(new_tcp_header, hdr->tcp, sizeof(*hdr->tcp)); + hdr->tcp = new_tcp_header; + + hdr->ipv4->ihl = sizeof(*hdr->ipv4) / 4; + } + + tcpv4_gen_synack(hdr, cookie, tsopt); + } else if (hdr->ipv6) { + tcpv6_gen_synack(hdr, cookie, tsopt); + } else { + return XDP_ABORTED; + } + + /* Recalculate checksums. */ + hdr->tcp->check = 0; + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (hdr->ipv4) { + hdr->tcp->check = csum_tcpudp_magic(hdr->ipv4->saddr, + hdr->ipv4->daddr, + hdr->tcp_len, + IPPROTO_TCP, + value); + + hdr->ipv4->check = 0; + value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, sizeof(*hdr->ipv4), 0); + if (value < 0) + return XDP_ABORTED; + hdr->ipv4->check = csum_fold(value); + } else if (hdr->ipv6) { + hdr->tcp->check = csum_ipv6_magic(&hdr->ipv6->saddr, + &hdr->ipv6->daddr, + hdr->tcp_len, + IPPROTO_TCP, + value); + } else { + return XDP_ABORTED; + } + + /* Set the new packet size. */ + old_pkt_size = data_end - data; + new_pkt_size = sizeof(*hdr->eth) + ip_len + hdr->tcp->doff * 4; + if (xdp) { + if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size)) + return XDP_ABORTED; + } else { + if (bpf_skb_change_tail(ctx, new_pkt_size, 0)) + return XDP_ABORTED; + } + + values_inc_synacks(); + + return XDP_TX; +} + +static __always_inline int syncookie_handle_ack(struct header_pointers *hdr) +{ + int err; + + if (hdr->tcp->rst) + return XDP_DROP; + + if (hdr->ipv4) + err = bpf_tcp_raw_check_syncookie_ipv4(hdr->ipv4, hdr->tcp); + else if (hdr->ipv6) + err = bpf_tcp_raw_check_syncookie_ipv6(hdr->ipv6, hdr->tcp); + else + return XDP_ABORTED; + if (err) + return XDP_DROP; + + return XDP_PASS; +} + +static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end, + struct header_pointers *hdr, bool xdp) +{ + struct bpf_ct_opts ct_lookup_opts = { + .netns_id = BPF_F_CURRENT_NETNS, + .l4proto = IPPROTO_TCP, + }; + int ret; + + ret = tcp_dissect(data, data_end, hdr); + if (ret != XDP_TX) + return ret; + + ret = tcp_lookup(ctx, hdr, xdp); + if (ret != XDP_TX) + return ret; + + /* Packet is TCP and doesn't belong to an established connection. */ + + if ((hdr->tcp->syn ^ hdr->tcp->ack) != 1) + return XDP_DROP; + + /* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr->tcp_len + * to bpf_tcp_raw_gen_syncookie_ipv{4,6} and pass the verifier. + */ + if (xdp) { + if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr->tcp_len)) + return XDP_ABORTED; + } else { + /* Without volatile the verifier throws this error: + * R9 32-bit pointer arithmetic prohibited + */ + volatile u64 old_len = data_end - data; + + if (bpf_skb_change_tail(ctx, old_len + TCP_MAXLEN - hdr->tcp_len, 0)) + return XDP_ABORTED; + } + + return XDP_TX; +} + +static __always_inline int syncookie_part2(void *ctx, void *data, void *data_end, + struct header_pointers *hdr, bool xdp) +{ + if (hdr->ipv4) { + hdr->eth = data; + hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth); + /* IPV4_MAXLEN is needed when calculating checksum. + * At least sizeof(struct iphdr) is needed here to access ihl. + */ + if ((void *)hdr->ipv4 + IPV4_MAXLEN > data_end) + return XDP_ABORTED; + hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4; + } else if (hdr->ipv6) { + hdr->eth = data; + hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth); + hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6); + } else { + return XDP_ABORTED; + } + + if ((void *)hdr->tcp + TCP_MAXLEN > data_end) + return XDP_ABORTED; + + /* We run out of registers, tcp_len gets spilled to the stack, and the + * verifier forgets its min and max values checked above in tcp_dissect. + */ + hdr->tcp_len = hdr->tcp->doff * 4; + if (hdr->tcp_len < sizeof(*hdr->tcp)) + return XDP_ABORTED; + + return hdr->tcp->syn ? syncookie_handle_syn(hdr, ctx, data, data_end, xdp) : + syncookie_handle_ack(hdr); +} + +SEC("xdp") +int syncookie_xdp(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct header_pointers hdr; + int ret; + + ret = syncookie_part1(ctx, data, data_end, &hdr, true); + if (ret != XDP_TX) + return ret; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + return syncookie_part2(ctx, data, data_end, &hdr, true); +} + +SEC("tc") +int syncookie_tc(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct header_pointers hdr; + int ret; + + ret = syncookie_part1(skb, data, data_end, &hdr, false); + if (ret != XDP_TX) + return ret == XDP_PASS ? TC_ACT_OK : TC_ACT_SHOT; + + data_end = (void *)(long)skb->data_end; + data = (void *)(long)skb->data; + + ret = syncookie_part2(skb, data, data_end, &hdr, false); + switch (ret) { + case XDP_PASS: + return TC_ACT_OK; + case XDP_TX: + return bpf_redirect(skb->ifindex, 0); + default: + return TC_ACT_SHOT; + } +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index c0e7acd698ed..e443e6542cb9 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -58,7 +58,7 @@ class BlockParser(object): class ArrayParser(BlockParser): """ - A parser for extracting dicionaries of values from some BPF-related arrays. + A parser for extracting a set of values from some BPF-related arrays. @reader: a pointer to the open file to parse @array_name: name of the array to parse """ @@ -66,7 +66,7 @@ class ArrayParser(BlockParser): def __init__(self, reader, array_name): self.array_name = array_name - self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n') + self.start_marker = re.compile(f'(static )?const bool {self.array_name}\[.*\] = {{\n') super().__init__(reader) def search_block(self): @@ -80,15 +80,15 @@ class ArrayParser(BlockParser): Parse a block and return data as a dictionary. Items to extract must be on separate lines in the file. """ - pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$') - entries = {} + pattern = re.compile('\[(BPF_\w*)\]\s*= (true|false),?$') + entries = set() while True: line = self.reader.readline() if line == '' or re.match(self.end_marker, line): break capture = pattern.search(line) if capture: - entries[capture.group(1)] = capture.group(2) + entries |= {capture.group(1)} return entries class InlineListParser(BlockParser): @@ -115,7 +115,7 @@ class InlineListParser(BlockParser): class FileExtractor(object): """ A generic reader for extracting data from a given file. This class contains - several helper methods that wrap arround parser objects to extract values + several helper methods that wrap around parser objects to extract values from different structures. This class does not offer a way to set a filename, which is expected to be defined in children classes. @@ -139,21 +139,19 @@ class FileExtractor(object): def get_types_from_array(self, array_name): """ - Search for and parse an array associating names to BPF_* enum members, - for example: + Search for and parse a list of allowed BPF_* enum members, for example: - const char * const prog_type_name[] = { - [BPF_PROG_TYPE_UNSPEC] = "unspec", - [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", - [BPF_PROG_TYPE_KPROBE] = "kprobe", + const bool prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = true, + [BPF_PROG_TYPE_SOCKET_FILTER] = true, + [BPF_PROG_TYPE_KPROBE] = true, }; - Return a dictionary with the enum member names as keys and the - associated names as values, for example: + Return a set of the enum members, for example: - {'BPF_PROG_TYPE_UNSPEC': 'unspec', - 'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter', - 'BPF_PROG_TYPE_KPROBE': 'kprobe'} + {'BPF_PROG_TYPE_UNSPEC', + 'BPF_PROG_TYPE_SOCKET_FILTER', + 'BPF_PROG_TYPE_KPROBE'} @array_name: name of the array to parse """ @@ -186,6 +184,27 @@ class FileExtractor(object): parser.search_block(start_marker) return parser.parse(pattern, end_marker) + def make_enum_map(self, names, enum_prefix): + """ + Search for and parse an enum containing BPF_* members, just as get_enum + does. However, instead of just returning a set of the variant names, + also generate a textual representation from them by (assuming and) + removing a provided prefix and lowercasing the remainder. Then return a + dict mapping from name to textual representation. + + @enum_values: a set of enum values; e.g., as retrieved by get_enum + @enum_prefix: the prefix to remove from each of the variants to infer + textual representation + """ + mapping = {} + for name in names: + if not name.startswith(enum_prefix): + raise Exception(f"enum variant {name} does not start with {enum_prefix}") + text = name[len(enum_prefix):].lower() + mapping[name] = text + + return mapping + def __get_description_list(self, start_marker, pattern, end_marker): parser = InlineListParser(self.reader) parser.search_block(start_marker) @@ -333,11 +352,9 @@ class ProgFileExtractor(SourceFileExtractor): """ filename = os.path.join(BPFTOOL_DIR, 'prog.c') - def get_prog_types(self): - return self.get_types_from_array('prog_type_name') - def get_attach_types(self): - return self.get_types_from_array('attach_type_strings') + types = self.get_types_from_array('attach_types') + return self.make_enum_map(types, 'BPF_') def get_prog_attach_help(self): return self.get_help_list('ATTACH_TYPE') @@ -348,9 +365,6 @@ class MapFileExtractor(SourceFileExtractor): """ filename = os.path.join(BPFTOOL_DIR, 'map.c') - def get_map_types(self): - return self.get_types_from_array('map_type_name') - def get_map_help(self): return self.get_help_list('TYPE') @@ -363,30 +377,6 @@ class CgroupFileExtractor(SourceFileExtractor): def get_prog_attach_help(self): return self.get_help_list('ATTACH_TYPE') -class CommonFileExtractor(SourceFileExtractor): - """ - An extractor for bpftool's common.c. - """ - filename = os.path.join(BPFTOOL_DIR, 'common.c') - - def __init__(self): - super().__init__() - self.attach_types = {} - - def get_attach_types(self): - if not self.attach_types: - self.attach_types = self.get_types_from_array('attach_type_name') - return self.attach_types - - def get_cgroup_attach_types(self): - if not self.attach_types: - self.get_attach_types() - cgroup_types = {} - for (key, value) in self.attach_types.items(): - if key.find('BPF_CGROUP') != -1: - cgroup_types[key] = value - return cgroup_types - class GenericSourceExtractor(SourceFileExtractor): """ An extractor for generic source code files. @@ -403,14 +393,28 @@ class BpfHeaderExtractor(FileExtractor): """ filename = os.path.join(INCLUDE_DIR, 'uapi/linux/bpf.h') + def __init__(self): + super().__init__() + self.attach_types = {} + def get_prog_types(self): return self.get_enum('bpf_prog_type') - def get_map_types(self): - return self.get_enum('bpf_map_type') + def get_map_type_map(self): + names = self.get_enum('bpf_map_type') + return self.make_enum_map(names, 'BPF_MAP_TYPE_') - def get_attach_types(self): - return self.get_enum('bpf_attach_type') + def get_attach_type_map(self): + if not self.attach_types: + names = self.get_enum('bpf_attach_type') + self.attach_types = self.make_enum_map(names, 'BPF_') + return self.attach_types + + def get_cgroup_attach_type_map(self): + if not self.attach_types: + self.get_attach_type_map() + return {name: text for name, text in self.attach_types.items() + if name.startswith('BPF_CGROUP')} class ManPageExtractor(FileExtractor): """ @@ -495,21 +499,12 @@ def main(): """) args = argParser.parse_args() - # Map types (enum) - bpf_info = BpfHeaderExtractor() - ref = bpf_info.get_map_types() - - map_info = MapFileExtractor() - source_map_items = map_info.get_map_types() - map_types_enum = set(source_map_items.keys()) - - verify(ref, map_types_enum, - f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):') # Map types (names) - source_map_types = set(source_map_items.values()) + map_info = MapFileExtractor() + source_map_types = set(bpf_info.get_map_type_map().values()) source_map_types.discard('unspec') help_map_types = map_info.get_map_help() @@ -525,37 +520,17 @@ def main(): bashcomp_map_types = bashcomp_info.get_map_types() verify(source_map_types, help_map_types, - f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {MapFileExtractor.filename} (do_help() TYPE):') verify(source_map_types, man_map_types, - f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {ManMapExtractor.filename} (TYPE):') verify(help_map_options, man_map_options, f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):') verify(source_map_types, bashcomp_map_types, - f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):') - - # Program types (enum) - - ref = bpf_info.get_prog_types() - - prog_info = ProgFileExtractor() - prog_types = set(prog_info.get_prog_types().keys()) - - verify(ref, prog_types, - f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):') - - # Attach types (enum) - - ref = bpf_info.get_attach_types() - bpf_info.close() - - common_info = CommonFileExtractor() - attach_types = common_info.get_attach_types() - - verify(ref, attach_types, - f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):') # Attach types (names) + prog_info = ProgFileExtractor() source_prog_attach_types = set(prog_info.get_attach_types().values()) help_prog_attach_types = prog_info.get_prog_attach_help() @@ -571,18 +546,17 @@ def main(): bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types() verify(source_prog_attach_types, help_prog_attach_types, - f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):') + f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):') verify(source_prog_attach_types, man_prog_attach_types, - f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):') + f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ManProgExtractor.filename} (ATTACH_TYPE):') verify(help_prog_options, man_prog_options, f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):') verify(source_prog_attach_types, bashcomp_prog_attach_types, - f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):') + f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):') # Cgroup attach types - - source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values()) - common_info.close() + source_cgroup_attach_types = set(bpf_info.get_cgroup_attach_type_map().values()) + bpf_info.close() cgroup_info = CgroupFileExtractor() help_cgroup_attach_types = cgroup_info.get_prog_attach_help() @@ -598,13 +572,13 @@ def main(): bashcomp_info.close() verify(source_cgroup_attach_types, help_cgroup_attach_types, - f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):') verify(source_cgroup_attach_types, man_cgroup_attach_types, - f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {ManCgroupExtractor.filename} (ATTACH_TYPE):') verify(help_cgroup_options, man_cgroup_options, f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):') verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types, - f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):') # Options for remaining commands diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h index 128989bed8b7..38782bd47fdc 100644 --- a/tools/testing/selftests/bpf/test_btf.h +++ b/tools/testing/selftests/bpf/test_btf.h @@ -39,6 +39,7 @@ #define BTF_MEMBER_ENC(name, type, bits_offset) \ (name), (type), (bits_offset) #define BTF_ENUM_ENC(name, val) (name), (val) +#define BTF_ENUM64_ENC(name, val_lo32, val_hi32) (name), (val_lo32), (val_hi32) #define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \ ((bitfield_size) << 24 | (bits_offset)) diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh index c2f0ddb45531..c3d82e0a7378 100755 --- a/tools/testing/selftests/bpf/test_xdping.sh +++ b/tools/testing/selftests/bpf/test_xdping.sh @@ -95,5 +95,9 @@ for server_args in "" "-I veth0 -s -S" ; do test "$client_args" "$server_args" done +# Test drv mode +test "-I veth1 -N" "-I veth0 -s -N" +test "-I veth1 -N -c 10" "-I veth0 -s -N" + echo "OK. All tests passed" exit 0 diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c new file mode 100644 index 000000000000..d874ddfb39c4 --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_synproxy.c @@ -0,0 +1,466 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <stdnoreturn.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <getopt.h> +#include <signal.h> +#include <sys/types.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <net/if.h> +#include <linux/if_link.h> +#include <linux/limits.h> + +static unsigned int ifindex; +static __u32 attached_prog_id; +static bool attached_tc; + +static void noreturn cleanup(int sig) +{ + LIBBPF_OPTS(bpf_xdp_attach_opts, opts); + int prog_fd; + int err; + + if (attached_prog_id == 0) + exit(0); + + if (attached_tc) { + LIBBPF_OPTS(bpf_tc_hook, hook, + .ifindex = ifindex, + .attach_point = BPF_TC_INGRESS); + + err = bpf_tc_hook_destroy(&hook); + if (err < 0) { + fprintf(stderr, "Error: bpf_tc_hook_destroy: %s\n", strerror(-err)); + fprintf(stderr, "Failed to destroy the TC hook\n"); + exit(1); + } + exit(0); + } + + prog_fd = bpf_prog_get_fd_by_id(attached_prog_id); + if (prog_fd < 0) { + fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd)); + err = bpf_xdp_attach(ifindex, -1, 0, NULL); + if (err < 0) { + fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", strerror(-err)); + fprintf(stderr, "Failed to detach XDP program\n"); + exit(1); + } + } else { + opts.old_prog_fd = prog_fd; + err = bpf_xdp_attach(ifindex, -1, XDP_FLAGS_REPLACE, &opts); + close(prog_fd); + if (err < 0) { + fprintf(stderr, "Error: bpf_set_link_xdp_fd_opts: %s\n", strerror(-err)); + /* Not an error if already replaced by someone else. */ + if (err != -EEXIST) { + fprintf(stderr, "Failed to detach XDP program\n"); + exit(1); + } + } + } + exit(0); +} + +static noreturn void usage(const char *progname) +{ + fprintf(stderr, "Usage: %s [--iface <iface>|--prog <prog_id>] [--mss4 <mss ipv4> --mss6 <mss ipv6> --wscale <wscale> --ttl <ttl>] [--ports <port1>,<port2>,...] [--single] [--tc]\n", + progname); + exit(1); +} + +static unsigned long parse_arg_ul(const char *progname, const char *arg, unsigned long limit) +{ + unsigned long res; + char *endptr; + + errno = 0; + res = strtoul(arg, &endptr, 10); + if (errno != 0 || *endptr != '\0' || arg[0] == '\0' || res > limit) + usage(progname); + + return res; +} + +static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *prog_id, + __u64 *tcpipopts, char **ports, bool *single, bool *tc) +{ + static struct option long_options[] = { + { "help", no_argument, NULL, 'h' }, + { "iface", required_argument, NULL, 'i' }, + { "prog", required_argument, NULL, 'x' }, + { "mss4", required_argument, NULL, 4 }, + { "mss6", required_argument, NULL, 6 }, + { "wscale", required_argument, NULL, 'w' }, + { "ttl", required_argument, NULL, 't' }, + { "ports", required_argument, NULL, 'p' }, + { "single", no_argument, NULL, 's' }, + { "tc", no_argument, NULL, 'c' }, + { NULL, 0, NULL, 0 }, + }; + unsigned long mss4, mss6, wscale, ttl; + unsigned int tcpipopts_mask = 0; + + if (argc < 2) + usage(argv[0]); + + *ifindex = 0; + *prog_id = 0; + *tcpipopts = 0; + *ports = NULL; + *single = false; + + while (true) { + int opt; + + opt = getopt_long(argc, argv, "", long_options, NULL); + if (opt == -1) + break; + + switch (opt) { + case 'h': + usage(argv[0]); + break; + case 'i': + *ifindex = if_nametoindex(optarg); + if (*ifindex == 0) + usage(argv[0]); + break; + case 'x': + *prog_id = parse_arg_ul(argv[0], optarg, UINT32_MAX); + if (*prog_id == 0) + usage(argv[0]); + break; + case 4: + mss4 = parse_arg_ul(argv[0], optarg, UINT16_MAX); + tcpipopts_mask |= 1 << 0; + break; + case 6: + mss6 = parse_arg_ul(argv[0], optarg, UINT16_MAX); + tcpipopts_mask |= 1 << 1; + break; + case 'w': + wscale = parse_arg_ul(argv[0], optarg, 14); + tcpipopts_mask |= 1 << 2; + break; + case 't': + ttl = parse_arg_ul(argv[0], optarg, UINT8_MAX); + tcpipopts_mask |= 1 << 3; + break; + case 'p': + *ports = optarg; + break; + case 's': + *single = true; + break; + case 'c': + *tc = true; + break; + default: + usage(argv[0]); + } + } + if (optind < argc) + usage(argv[0]); + + if (tcpipopts_mask == 0xf) { + if (mss4 == 0 || mss6 == 0 || wscale == 0 || ttl == 0) + usage(argv[0]); + *tcpipopts = (mss6 << 32) | (ttl << 24) | (wscale << 16) | mss4; + } else if (tcpipopts_mask != 0) { + usage(argv[0]); + } + + if (*ifindex != 0 && *prog_id != 0) + usage(argv[0]); + if (*ifindex == 0 && *prog_id == 0) + usage(argv[0]); +} + +static int syncookie_attach(const char *argv0, unsigned int ifindex, bool tc) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + char xdp_filename[PATH_MAX]; + struct bpf_program *prog; + struct bpf_object *obj; + int prog_fd; + int err; + + snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv0); + obj = bpf_object__open_file(xdp_filename, NULL); + err = libbpf_get_error(obj); + if (err < 0) { + fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err)); + return err; + } + + err = bpf_object__load(obj); + if (err < 0) { + fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err)); + return err; + } + + prog = bpf_object__find_program_by_name(obj, tc ? "syncookie_tc" : "syncookie_xdp"); + if (!prog) { + fprintf(stderr, "Error: bpf_object__find_program_by_name: program was not found\n"); + return -ENOENT; + } + + prog_fd = bpf_program__fd(prog); + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err < 0) { + fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + goto out; + } + attached_tc = tc; + attached_prog_id = info.id; + signal(SIGINT, cleanup); + signal(SIGTERM, cleanup); + if (tc) { + LIBBPF_OPTS(bpf_tc_hook, hook, + .ifindex = ifindex, + .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, opts, + .handle = 1, + .priority = 1, + .prog_fd = prog_fd); + + err = bpf_tc_hook_create(&hook); + if (err < 0) { + fprintf(stderr, "Error: bpf_tc_hook_create: %s\n", + strerror(-err)); + goto fail; + } + err = bpf_tc_attach(&hook, &opts); + if (err < 0) { + fprintf(stderr, "Error: bpf_tc_attach: %s\n", + strerror(-err)); + goto fail; + } + + } else { + err = bpf_xdp_attach(ifindex, prog_fd, + XDP_FLAGS_UPDATE_IF_NOEXIST, NULL); + if (err < 0) { + fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", + strerror(-err)); + goto fail; + } + } + err = 0; +out: + bpf_object__close(obj); + return err; +fail: + signal(SIGINT, SIG_DFL); + signal(SIGTERM, SIG_DFL); + attached_prog_id = 0; + goto out; +} + +static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports_map_fd) +{ + struct bpf_prog_info prog_info; + __u32 map_ids[8]; + __u32 info_len; + int prog_fd; + int err; + int i; + + *values_map_fd = -1; + *ports_map_fd = -1; + + prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (prog_fd < 0) { + fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd)); + return prog_fd; + } + + prog_info = (struct bpf_prog_info) { + .nr_map_ids = 8, + .map_ids = (__u64)map_ids, + }; + info_len = sizeof(prog_info); + + err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + if (err != 0) { + fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + goto out; + } + + if (prog_info.nr_map_ids < 2) { + fprintf(stderr, "Error: Found %u BPF maps, expected at least 2\n", + prog_info.nr_map_ids); + err = -ENOENT; + goto out; + } + + for (i = 0; i < prog_info.nr_map_ids; i++) { + struct bpf_map_info map_info = {}; + int map_fd; + + err = bpf_map_get_fd_by_id(map_ids[i]); + if (err < 0) { + fprintf(stderr, "Error: bpf_map_get_fd_by_id: %s\n", strerror(-err)); + goto err_close_map_fds; + } + map_fd = err; + + info_len = sizeof(map_info); + err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + if (err != 0) { + fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + close(map_fd); + goto err_close_map_fds; + } + if (strcmp(map_info.name, "values") == 0) { + *values_map_fd = map_fd; + continue; + } + if (strcmp(map_info.name, "allowed_ports") == 0) { + *ports_map_fd = map_fd; + continue; + } + close(map_fd); + } + + if (*values_map_fd != -1 && *ports_map_fd != -1) { + err = 0; + goto out; + } + + err = -ENOENT; + +err_close_map_fds: + if (*values_map_fd != -1) + close(*values_map_fd); + if (*ports_map_fd != -1) + close(*ports_map_fd); + *values_map_fd = -1; + *ports_map_fd = -1; + +out: + close(prog_fd); + return err; +} + +int main(int argc, char *argv[]) +{ + int values_map_fd, ports_map_fd; + __u64 tcpipopts; + bool firstiter; + __u64 prevcnt; + __u32 prog_id; + char *ports; + bool single; + int err = 0; + bool tc; + + parse_options(argc, argv, &ifindex, &prog_id, &tcpipopts, &ports, + &single, &tc); + + if (prog_id == 0) { + if (!tc) { + err = bpf_xdp_query_id(ifindex, 0, &prog_id); + if (err < 0) { + fprintf(stderr, "Error: bpf_get_link_xdp_id: %s\n", + strerror(-err)); + goto out; + } + } + if (prog_id == 0) { + err = syncookie_attach(argv[0], ifindex, tc); + if (err < 0) + goto out; + prog_id = attached_prog_id; + } + } + + err = syncookie_open_bpf_maps(prog_id, &values_map_fd, &ports_map_fd); + if (err < 0) + goto out; + + if (ports) { + __u16 port_last = 0; + __u32 port_idx = 0; + char *p = ports; + + fprintf(stderr, "Replacing allowed ports\n"); + + while (p && *p != '\0') { + char *token = strsep(&p, ","); + __u16 port; + + port = parse_arg_ul(argv[0], token, UINT16_MAX); + err = bpf_map_update_elem(ports_map_fd, &port_idx, &port, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + fprintf(stderr, "Failed to add port %u (index %u)\n", + port, port_idx); + goto out_close_maps; + } + fprintf(stderr, "Added port %u\n", port); + port_idx++; + } + err = bpf_map_update_elem(ports_map_fd, &port_idx, &port_last, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + fprintf(stderr, "Failed to add the terminator value 0 (index %u)\n", + port_idx); + goto out_close_maps; + } + } + + if (tcpipopts) { + __u32 key = 0; + + fprintf(stderr, "Replacing TCP/IP options\n"); + + err = bpf_map_update_elem(values_map_fd, &key, &tcpipopts, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + goto out_close_maps; + } + } + + if ((ports || tcpipopts) && attached_prog_id == 0 && !single) + goto out_close_maps; + + prevcnt = 0; + firstiter = true; + while (true) { + __u32 key = 1; + __u64 value; + + err = bpf_map_lookup_elem(values_map_fd, &key, &value); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_lookup_elem: %s\n", strerror(-err)); + goto out_close_maps; + } + if (firstiter) { + prevcnt = value; + firstiter = false; + } + if (single) { + printf("Total SYNACKs generated: %llu\n", value); + break; + } + printf("SYNACKs generated: %llu (total %llu)\n", value - prevcnt, value); + prevcnt = value; + sleep(1); + } + +out_close_maps: + close(values_map_fd); + close(ports_map_fd); +out: + return err == 0 ? 0 : 1; +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh new file mode 100644 index 000000000000..a43a9926e690 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: GPL-2.0 + +RIF_COUNTER_NUM_NETIFS=2 + +rif_counter_addr4() +{ + local i=$1; shift + local p=$1; shift + + printf 192.0.%d.%d $((i / 64)) $(((4 * i % 256) + p)) +} + +rif_counter_addr4pfx() +{ + rif_counter_addr4 $@ + printf /30 +} + +rif_counter_h1_create() +{ + simple_if_init $h1 +} + +rif_counter_h1_destroy() +{ + simple_if_fini $h1 +} + +rif_counter_h2_create() +{ + simple_if_init $h2 +} + +rif_counter_h2_destroy() +{ + simple_if_fini $h2 +} + +rif_counter_setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + vrf_prepare + + rif_counter_h1_create + rif_counter_h2_create +} + +rif_counter_cleanup() +{ + local count=$1; shift + + pre_cleanup + + for ((i = 1; i <= count; i++)); do + vlan_destroy $h2 $i + done + + rif_counter_h2_destroy + rif_counter_h1_destroy + + vrf_cleanup + + if [[ -v RIF_COUNTER_BATCH_FILE ]]; then + rm -f $RIF_COUNTER_BATCH_FILE + fi +} + + +rif_counter_test() +{ + local count=$1; shift + local should_fail=$1; shift + + RIF_COUNTER_BATCH_FILE="$(mktemp)" + + for ((i = 1; i <= count; i++)); do + vlan_create $h2 $i v$h2 $(rif_counter_addr4pfx $i 2) + done + for ((i = 1; i <= count; i++)); do + cat >> $RIF_COUNTER_BATCH_FILE <<-EOF + stats set dev $h2.$i l3_stats on + EOF + done + + ip -b $RIF_COUNTER_BATCH_FILE + check_err_fail $should_fail $? "RIF counter enablement" +} + +rif_counter_traffic_test() +{ + local count=$1; shift + local i; + + for ((i = count; i > 0; i /= 2)); do + $MZ $h1 -Q $i -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \ + -A $(rif_counter_addr4 $i 1) \ + -B $(rif_counter_addr4 $i 2) \ + -q -t udp sp=54321,dp=12345 + done + for ((i = count; i > 0; i /= 2)); do + busywait "$TC_HIT_TIMEOUT" until_counter_is "== 1" \ + hw_stats_get l3_stats $h2.$i rx packets > /dev/null + check_err $? "Traffic not seen at RIF $h2.$i" + done +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index e9f65bd2e299..688338bbeb97 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -25,7 +25,16 @@ cleanup() trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile" +ALL_TESTS=" + router + tc_flower + mirror_gre + tc_police + port + rif_mac_profile + rif_counter +" + for current_test in ${TESTS:-$ALL_TESTS}; do RET_FIN=0 source ${current_test}_scale.sh @@ -36,16 +45,32 @@ for current_test in ${TESTS:-$ALL_TESTS}; do for should_fail in 0 1; do RET=0 target=$(${current_test}_get_target "$should_fail") + if ((target == 0)); then + log_test_skip "'$current_test' should_fail=$should_fail test" + continue + fi + ${current_test}_setup_prepare setup_wait $num_netifs + # Update target in case occupancy of a certain resource changed + # following the test setup. + target=$(${current_test}_get_target "$should_fail") ${current_test}_test "$target" "$should_fail" - ${current_test}_cleanup - devlink_reload if [[ "$should_fail" -eq 0 ]]; then log_test "'$current_test' $target" + + if ((!RET)); then + tt=${current_test}_traffic_test + if [[ $(type -t $tt) == "function" ]]; then + $tt "$target" + log_test "'$current_test' $target traffic test" + fi + fi else log_test "'$current_test' overflow $target" fi + ${current_test}_cleanup $target + devlink_reload RET_FIN=$(( RET_FIN || RET )) done done diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh new file mode 120000 index 000000000000..1f5752e8ffc0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh @@ -0,0 +1 @@ +../spectrum/rif_counter_scale.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh index efd798a85931..4444bbace1a9 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh @@ -4,17 +4,22 @@ source ../tc_flower_scale.sh tc_flower_get_target() { local should_fail=$1; shift + local max_cnts # The driver associates a counter with each tc filter, which means the # number of supported filters is bounded by the number of available # counters. - # Currently, the driver supports 30K (30,720) flow counters and six of - # these are used for multicast routing. - local target=30714 + max_cnts=$(devlink_resource_size_get counters flow) + + # Remove already allocated counters. + ((max_cnts -= $(devlink_resource_occ_get counters flow))) + + # Each rule uses two counters, for packets and bytes. + ((max_cnts /= 2)) if ((! should_fail)); then - echo $target + echo $max_cnts else - echo $((target + 1)) + echo $((max_cnts + 1)) fi } diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index dea33dc93790..95d9f710a630 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -22,7 +22,16 @@ cleanup() devlink_sp_read_kvd_defaults trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile" +ALL_TESTS=" + router + tc_flower + mirror_gre + tc_police + port + rif_mac_profile + rif_counter +" + for current_test in ${TESTS:-$ALL_TESTS}; do RET_FIN=0 source ${current_test}_scale.sh @@ -41,15 +50,31 @@ for current_test in ${TESTS:-$ALL_TESTS}; do for should_fail in 0 1; do RET=0 target=$(${current_test}_get_target "$should_fail") + if ((target == 0)); then + log_test_skip "'$current_test' [$profile] should_fail=$should_fail test" + continue + fi ${current_test}_setup_prepare setup_wait $num_netifs + # Update target in case occupancy of a certain resource + # changed following the test setup. + target=$(${current_test}_get_target "$should_fail") ${current_test}_test "$target" "$should_fail" - ${current_test}_cleanup if [[ "$should_fail" -eq 0 ]]; then log_test "'$current_test' [$profile] $target" + + if ((!RET)); then + tt=${current_test}_traffic_test + if [[ $(type -t $tt) == "function" ]] + then + $tt "$target" + log_test "'$current_test' [$profile] $target traffic test" + fi + fi else log_test "'$current_test' [$profile] overflow $target" fi + ${current_test}_cleanup $target RET_FIN=$(( RET_FIN || RET )) done done diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh new file mode 100644 index 000000000000..d44536276e8a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../rif_counter_scale.sh + +rif_counter_get_target() +{ + local should_fail=$1; shift + local max_cnts + local max_rifs + local target + + max_rifs=$(devlink_resource_size_get rifs) + max_cnts=$(devlink_resource_size_get counters rif) + + # Remove already allocated RIFs. + ((max_rifs -= $(devlink_resource_occ_get rifs))) + + # 10 KVD slots per counter, ingress+egress counters per RIF + ((max_cnts /= 20)) + + # Pointless to run the overflow test if we don't have enough RIFs to + # host all the counters. + if ((max_cnts > max_rifs && should_fail)); then + echo 0 + return + fi + + target=$((max_rifs < max_cnts ? max_rifs : max_cnts)) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh index aa74be9f47c8..d3d9e60d6ddf 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh @@ -77,6 +77,7 @@ tc_flower_rules_create() filter add dev $h2 ingress \ prot ipv6 \ pref 1000 \ + handle 42$i \ flower $tcflags dst_ip $(tc_flower_addr $i) \ action drop EOF @@ -121,3 +122,19 @@ tc_flower_test() tcflags="skip_sw" __tc_flower_test $count $should_fail } + +tc_flower_traffic_test() +{ + local count=$1; shift + local i; + + for ((i = count - 1; i > 0; i /= 2)); do + $MZ -6 $h1 -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \ + -A $(tc_flower_addr 0) -B $(tc_flower_addr $i) \ + -q -t udp sp=54321,dp=12345 + done + for ((i = count - 1; i > 0; i /= 2)); do + tc_check_packets "dev $h2 ingress" 42$i 1 + check_err $? "Traffic not seen at rule #$i" + done +} diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index 28d568c48a73..91e431cd919e 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -141,12 +141,13 @@ switch_create() ip link set dev $swp4 up ip link add name br1 type bridge vlan_filtering 1 - ip link set dev br1 up - __addr_add_del br1 add 192.0.2.129/32 - ip -4 route add 192.0.2.130/32 dev br1 team_create lag loadbalance $swp3 $swp4 ip link set dev lag master br1 + + ip link set dev br1 up + __addr_add_del br1 add 192.0.2.129/32 + ip -4 route add 192.0.2.130/32 dev br1 } switch_destroy() diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore index d52f65de23b4..9fe1cef72728 100644 --- a/tools/testing/selftests/tc-testing/.gitignore +++ b/tools/testing/selftests/tc-testing/.gitignore @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only __pycache__/ *.pyc -plugins/ *.xml *.tap tdc_config_local.py |