diff options
258 files changed, 10015 insertions, 3621 deletions
diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt index 02c4353b5cf2..9ef9338aaee1 100644 --- a/Documentation/devicetree/bindings/net/ti,dp83867.txt +++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt @@ -25,6 +25,8 @@ Optional property: software needs to take when this pin is strapped in these modes. See data manual for details. + - ti,clk-output-sel - Muxing option for CLK_OUT pin - see dt-bindings/net/ti-dp83867.h + for applicable values. Note: ti,min-output-impedance and ti,max-output-impedance are mutually exclusive. When both properties are present ti,max-output-impedance diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 204afe66de92..3d7a5c149af3 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -203,6 +203,12 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { } /* Terminating entry */ }; +static inline void ath3k_log_failed_loading(int err, int len, int size) +{ + BT_ERR("Error in firmware loading err = %d, len = %d, size = %d", + err, len, size); +} + #define USB_REQ_DFU_DNLOAD 1 #define BULK_SIZE 4096 #define FW_HDR_SIZE 20 @@ -227,15 +233,16 @@ static int ath3k_load_firmware(struct usb_device *udev, return -ENOMEM; } - memcpy(send_buf, firmware->data, 20); + memcpy(send_buf, firmware->data, FW_HDR_SIZE); err = usb_control_msg(udev, pipe, USB_REQ_DFU_DNLOAD, USB_TYPE_VENDOR, - 0, 0, send_buf, 20, USB_CTRL_SET_TIMEOUT); + 0, 0, send_buf, FW_HDR_SIZE, + USB_CTRL_SET_TIMEOUT); if (err < 0) { BT_ERR("Can't change to loading configuration err"); goto error; } - sent += 20; - count -= 20; + sent += FW_HDR_SIZE; + count -= FW_HDR_SIZE; pipe = usb_sndbulkpipe(udev, 0x02); @@ -250,8 +257,7 @@ static int ath3k_load_firmware(struct usb_device *udev, &len, 3000); if (err || (len != size)) { - BT_ERR("Error in firmware loading err = %d," - "len = %d, size = %d", err, len, size); + ath3k_log_failed_loading(err, len, size); goto error; } @@ -350,8 +356,7 @@ static int ath3k_load_fwfile(struct usb_device *udev, err = usb_bulk_msg(udev, pipe, send_buf, size, &len, 3000); if (err || (len != size)) { - BT_ERR("Error in firmware loading err = %d," - "len = %d, size = %d", err, len, size); + ath3k_log_failed_loading(err, len, size); kfree(send_buf); return err; } @@ -398,7 +403,7 @@ static int ath3k_set_normal_mode(struct usb_device *udev) static int ath3k_load_patch(struct usb_device *udev) { unsigned char fw_state; - char filename[ATH3K_NAME_LEN] = {0}; + char filename[ATH3K_NAME_LEN]; const struct firmware *firmware; struct ath3k_version fw_version; __u32 pt_rom_version, pt_build_version; @@ -451,7 +456,7 @@ static int ath3k_load_patch(struct usb_device *udev) static int ath3k_load_syscfg(struct usb_device *udev) { unsigned char fw_state; - char filename[ATH3K_NAME_LEN] = {0}; + char filename[ATH3K_NAME_LEN]; const struct firmware *firmware; struct ath3k_version fw_version; int clk_value, ret; @@ -522,7 +527,6 @@ static int ath3k_probe(struct usb_interface *intf, /* load patch and sysconfig files for AR3012 */ if (id->driver_info & BTUSB_ATH3012) { - /* New firmware with patch and sysconfig files already loaded */ if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x0001) return -ENODEV; @@ -565,7 +569,7 @@ static int ath3k_probe(struct usb_interface *intf, static void ath3k_disconnect(struct usb_interface *intf) { - BT_DBG("ath3k_disconnect intf %p", intf); + BT_DBG("%s intf %p", __func__, intf); } static struct usb_driver ath3k_driver = { diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c index b280d466f05b..f6c694a1b9b0 100644 --- a/drivers/bluetooth/btmrvl_main.c +++ b/drivers/bluetooth/btmrvl_main.c @@ -183,7 +183,7 @@ static int btmrvl_send_sync_cmd(struct btmrvl_private *priv, u16 opcode, return -EFAULT; } - skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_ATOMIC); + skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_KERNEL); if (!skb) { BT_ERR("No free skb"); return -ENOMEM; diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 6e2ad748abba..437f080deaab 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -35,6 +35,60 @@ #define RTL_ROM_LMP_8761A 0x8761 #define RTL_ROM_LMP_8822B 0x8822 +#define IC_MATCH_FL_LMPSUBV (1 << 0) +#define IC_MATCH_FL_HCIREV (1 << 1) +#define IC_INFO(lmps, hcir) \ + .match_flags = IC_MATCH_FL_LMPSUBV | IC_MATCH_FL_HCIREV, \ + .lmp_subver = (lmps), \ + .hci_rev = (hcir) + +struct id_table { + __u16 match_flags; + __u16 lmp_subver; + __u16 hci_rev; + bool config_needed; + char *fw_name; + char *cfg_name; +}; + +static const struct id_table ic_id_table[] = { + /* 8723B */ + { IC_INFO(RTL_ROM_LMP_8723B, 0xb), + .config_needed = false, + .fw_name = "rtl_bt/rtl8723b_fw.bin", + .cfg_name = "rtl_bt/rtl8723b_config.bin" }, + + /* 8723D */ + { IC_INFO(RTL_ROM_LMP_8723B, 0xd), + .config_needed = true, + .fw_name = "rtl_bt/rtl8723d_fw.bin", + .cfg_name = "rtl_bt/rtl8723d_config.bin" }, + + /* 8821A */ + { IC_INFO(RTL_ROM_LMP_8821A, 0xa), + .config_needed = false, + .fw_name = "rtl_bt/rtl8821a_fw.bin", + .cfg_name = "rtl_bt/rtl8821a_config.bin" }, + + /* 8821C */ + { IC_INFO(RTL_ROM_LMP_8821A, 0xc), + .config_needed = false, + .fw_name = "rtl_bt/rtl8821c_fw.bin", + .cfg_name = "rtl_bt/rtl8821c_config.bin" }, + + /* 8761A */ + { IC_MATCH_FL_LMPSUBV, RTL_ROM_LMP_8761A, 0x0, + .config_needed = false, + .fw_name = "rtl_bt/rtl8761a_fw.bin", + .cfg_name = "rtl_bt/rtl8761a_config.bin" }, + + /* 8822B */ + { IC_INFO(RTL_ROM_LMP_8822B, 0xb), + .config_needed = true, + .fw_name = "rtl_bt/rtl8822b_fw.bin", + .cfg_name = "rtl_bt/rtl8822b_config.bin" }, + }; + static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version) { struct rtl_rom_version_evt *rom_version; @@ -64,9 +118,9 @@ static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version) return 0; } -static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver, - const struct firmware *fw, - unsigned char **_buf) +static int rtlbt_parse_firmware(struct hci_dev *hdev, u16 lmp_subver, + const struct firmware *fw, + unsigned char **_buf) { const u8 extension_sig[] = { 0x51, 0x04, 0xfd, 0x77 }; struct rtl_epatch_header *epatch_info; @@ -88,6 +142,8 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver, { RTL_ROM_LMP_8821A, 2 }, { RTL_ROM_LMP_8761A, 3 }, { RTL_ROM_LMP_8822B, 8 }, + { RTL_ROM_LMP_8723B, 9 }, /* 8723D */ + { RTL_ROM_LMP_8821A, 10 }, /* 8821C */ }; ret = rtl_read_rom_version(hdev, &rom_version); @@ -320,8 +376,8 @@ out: return ret; } -static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver, - const char *fw_name) +static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 hci_rev, + u16 lmp_subver) { unsigned char *fw_data = NULL; const struct firmware *fw; @@ -330,39 +386,40 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver, u8 *cfg_buff = NULL; u8 *tbuff; char *cfg_name = NULL; - bool config_needed = false; + char *fw_name = NULL; + int i; + + for (i = 0; i < ARRAY_SIZE(ic_id_table); i++) { + if ((ic_id_table[i].match_flags & IC_MATCH_FL_LMPSUBV) && + (ic_id_table[i].lmp_subver != lmp_subver)) + continue; + if ((ic_id_table[i].match_flags & IC_MATCH_FL_HCIREV) && + (ic_id_table[i].hci_rev != hci_rev)) + continue; - switch (lmp_subver) { - case RTL_ROM_LMP_8723B: - cfg_name = "rtl_bt/rtl8723b_config.bin"; - break; - case RTL_ROM_LMP_8821A: - cfg_name = "rtl_bt/rtl8821a_config.bin"; - break; - case RTL_ROM_LMP_8761A: - cfg_name = "rtl_bt/rtl8761a_config.bin"; - break; - case RTL_ROM_LMP_8822B: - cfg_name = "rtl_bt/rtl8822b_config.bin"; - config_needed = true; - break; - default: - BT_ERR("%s: rtl: no config according to lmp_subver %04x", - hdev->name, lmp_subver); break; } + if (i >= ARRAY_SIZE(ic_id_table)) { + BT_ERR("%s: unknown IC info, lmp subver %04x, hci rev %04x", + hdev->name, lmp_subver, hci_rev); + return -EINVAL; + } + + cfg_name = ic_id_table[i].cfg_name; + if (cfg_name) { cfg_sz = rtl_load_config(hdev, cfg_name, &cfg_buff); if (cfg_sz < 0) { cfg_sz = 0; - if (config_needed) + if (ic_id_table[i].config_needed) BT_ERR("Necessary config file %s not found\n", cfg_name); } } else cfg_sz = 0; + fw_name = ic_id_table[i].fw_name; bt_dev_info(hdev, "rtl: loading %s", fw_name); ret = request_firmware(&fw, fw_name, &hdev->dev); if (ret < 0) { @@ -370,7 +427,7 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver, goto err_req_fw; } - ret = rtl8723b_parse_firmware(hdev, lmp_subver, fw, &fw_data); + ret = rtlbt_parse_firmware(hdev, lmp_subver, fw, &fw_data); if (ret < 0) goto out; @@ -429,7 +486,7 @@ int btrtl_setup_realtek(struct hci_dev *hdev) { struct sk_buff *skb; struct hci_rp_read_local_version *resp; - u16 lmp_subver; + u16 hci_rev, lmp_subver; skb = btrtl_read_local_version(hdev); if (IS_ERR(skb)) @@ -441,6 +498,7 @@ int btrtl_setup_realtek(struct hci_dev *hdev) resp->hci_ver, resp->hci_rev, resp->lmp_ver, resp->lmp_subver); + hci_rev = le16_to_cpu(resp->hci_rev); lmp_subver = le16_to_cpu(resp->lmp_subver); kfree_skb(skb); @@ -455,17 +513,10 @@ int btrtl_setup_realtek(struct hci_dev *hdev) case RTL_ROM_LMP_3499: return btrtl_setup_rtl8723a(hdev); case RTL_ROM_LMP_8723B: - return btrtl_setup_rtl8723b(hdev, lmp_subver, - "rtl_bt/rtl8723b_fw.bin"); case RTL_ROM_LMP_8821A: - return btrtl_setup_rtl8723b(hdev, lmp_subver, - "rtl_bt/rtl8821a_fw.bin"); case RTL_ROM_LMP_8761A: - return btrtl_setup_rtl8723b(hdev, lmp_subver, - "rtl_bt/rtl8761a_fw.bin"); case RTL_ROM_LMP_8822B: - return btrtl_setup_rtl8723b(hdev, lmp_subver, - "rtl_bt/rtl8822b_fw.bin"); + return btrtl_setup_rtl8723b(hdev, hci_rev, lmp_subver); default: bt_dev_info(hdev, "rtl: assuming no firmware upload needed"); return 0; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 2a55380ad730..c8e9ae6b99e1 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -339,6 +339,7 @@ static const struct usb_device_id blacklist_table[] = { /* Intel Bluetooth devices */ { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW }, + { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW }, { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR }, { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL }, { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL }, @@ -373,6 +374,9 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x13d3, 0x3461), .driver_info = BTUSB_REALTEK }, { USB_DEVICE(0x13d3, 0x3462), .driver_info = BTUSB_REALTEK }, + /* Additional Realtek 8822BE Bluetooth devices */ + { USB_DEVICE(0x0b05, 0x185c), .driver_info = BTUSB_REALTEK }, + /* Silicon Wave based devices */ { USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE }, @@ -2057,6 +2061,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) case 0x0c: /* WsP */ case 0x11: /* JfP */ case 0x12: /* ThP */ + case 0x13: /* HrP */ + case 0x14: /* QnJ, IcP */ break; default: BT_ERR("%s: Unsupported Intel hardware variant (%u)", @@ -2149,6 +2155,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) break; case 0x11: /* JfP */ case 0x12: /* ThP */ + case 0x13: /* HrP */ + case 0x14: /* QnJ, IcP */ snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.sfi", le16_to_cpu(ver.hw_variant), le16_to_cpu(ver.hw_revision), @@ -2180,6 +2188,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) break; case 0x11: /* JfP */ case 0x12: /* ThP */ + case 0x13: /* HrP */ + case 0x14: /* QnJ, IcP */ snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.ddc", le16_to_cpu(ver.hw_variant), le16_to_cpu(ver.hw_revision), diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c index 14ae7ee88acb..d568fbd94d6c 100644 --- a/drivers/bluetooth/hci_ath.c +++ b/drivers/bluetooth/hci_ath.c @@ -71,12 +71,12 @@ static int ath_wakeup_ar3k(struct tty_struct *tty) /* Clear RTS first */ tty->driver->ops->tiocmget(tty); tty->driver->ops->tiocmset(tty, 0x00, TIOCM_RTS); - mdelay(20); + msleep(20); /* Set RTS, wake up board */ tty->driver->ops->tiocmget(tty); tty->driver->ops->tiocmset(tty, TIOCM_RTS, 0x00); - mdelay(20); + msleep(20); status = tty->driver->ops->tiocmget(tty); return status; diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index 1b4417a623a4..2f30dcad96bd 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -650,7 +650,7 @@ static int download_firmware(struct ll_device *lldev) break; case ACTION_DELAY: /* sleep */ bt_dev_info(lldev->hu.hdev, "sleep command in scr"); - mdelay(((struct bts_action_delay *)action_ptr)->msec); + msleep(((struct bts_action_delay *)action_ptr)->msec); break; } len -= (sizeof(struct bts_action) + diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c index de318389a301..67de94343cb4 100644 --- a/drivers/infiniband/hw/usnic/usnic_transport.c +++ b/drivers/infiniband/hw/usnic/usnic_transport.c @@ -174,14 +174,13 @@ void usnic_transport_put_socket(struct socket *sock) int usnic_transport_sock_get_addr(struct socket *sock, int *proto, uint32_t *addr, uint16_t *port) { - int len; int err; struct sockaddr_in sock_addr; err = sock->ops->getname(sock, (struct sockaddr *)&sock_addr, - &len, 0); - if (err) + 0); + if (err < 0) return err; if (sock_addr.sin_family != AF_INET) diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index c5603d1a07d6..1f8f489b4167 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -560,7 +560,7 @@ done: static int data_sock_getname(struct socket *sock, struct sockaddr *addr, - int *addr_len, int peer) + int peer) { struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr; struct sock *sk = sock->sk; @@ -570,14 +570,13 @@ data_sock_getname(struct socket *sock, struct sockaddr *addr, lock_sock(sk); - *addr_len = sizeof(*maddr); maddr->family = AF_ISDN; maddr->dev = _pms(sk)->dev->id; maddr->channel = _pms(sk)->ch.nr; maddr->sapi = _pms(sk)->ch.addr & 0xff; maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xff; release_sock(sk); - return 0; + return sizeof(*maddr); } static const struct proto_ops data_sock_ops = { diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index 1aaa7a95ebc4..ae9e7f7cb31c 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -18,3 +18,13 @@ config NET_DSA_MV88E6XXX_GLOBAL2 It is required on most chips. If the chip you compile the support for doesn't have such registers set, say N here. In doubt, say Y. + +config NET_DSA_MV88E6XXX_PTP + bool "PTP support for Marvell 88E6xxx" + default n + depends on NET_DSA_MV88E6XXX_GLOBAL2 + imply NETWORK_PHY_TIMESTAMPING + imply PTP_1588_CLOCK + help + Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch + chips that support it. diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index 58a4a0014e59..50de304abe2f 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -5,6 +5,10 @@ mv88e6xxx-objs += global1.o mv88e6xxx-objs += global1_atu.o mv88e6xxx-objs += global1_vtu.o mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o +mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_avb.o +mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_scratch.o +mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += hwtstamp.o mv88e6xxx-objs += phy.o mv88e6xxx-objs += port.o +mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o mv88e6xxx-objs += serdes.o diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index eb328bade225..39c7ad7e490f 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -36,8 +36,10 @@ #include "chip.h" #include "global1.h" #include "global2.h" +#include "hwtstamp.h" #include "phy.h" #include "port.h" +#include "ptp.h" #include "serdes.h" static void assert_reg_lock(struct mv88e6xxx_chip *chip) @@ -712,9 +714,12 @@ static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port, for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) { stat = &mv88e6xxx_hw_stats[i]; if (stat->type & types) { + mutex_lock(&chip->reg_lock); data[j] = _mv88e6xxx_get_ethtool_stat(chip, stat, port, bank1_select, histogram); + mutex_unlock(&chip->reg_lock); + j++; } } @@ -762,14 +767,13 @@ static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port, mutex_lock(&chip->reg_lock); ret = mv88e6xxx_stats_snapshot(chip, port); - if (ret < 0) { - mutex_unlock(&chip->reg_lock); + mutex_unlock(&chip->reg_lock); + + if (ret < 0) return; - } mv88e6xxx_get_stats(chip, port, data); - mutex_unlock(&chip->reg_lock); } static int mv88e6xxx_stats_set_histogram(struct mv88e6xxx_chip *chip) @@ -1433,7 +1437,9 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip, eth_broadcast_addr(addr.mac); do { + mutex_lock(&chip->reg_lock); err = mv88e6xxx_g1_atu_getnext(chip, fid, &addr); + mutex_unlock(&chip->reg_lock); if (err) return err; @@ -1466,7 +1472,10 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port, int err; /* Dump port's default Filtering Information Database (VLAN ID 0) */ + mutex_lock(&chip->reg_lock); err = mv88e6xxx_port_get_fid(chip, port, &fid); + mutex_unlock(&chip->reg_lock); + if (err) return err; @@ -1476,7 +1485,9 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port, /* Dump VLANs' Filtering Information Databases */ do { + mutex_lock(&chip->reg_lock); err = mv88e6xxx_vtu_getnext(chip, &vlan); + mutex_unlock(&chip->reg_lock); if (err) return err; @@ -1496,13 +1507,8 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data) { struct mv88e6xxx_chip *chip = ds->priv; - int err; - - mutex_lock(&chip->reg_lock); - err = mv88e6xxx_port_db_dump(chip, port, cb, data); - mutex_unlock(&chip->reg_lock); - return err; + return mv88e6xxx_port_db_dump(chip, port, cb, data); } static int mv88e6xxx_bridge_map(struct mv88e6xxx_chip *chip, @@ -2092,6 +2098,17 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) if (err) goto unlock; + /* Setup PTP Hardware Clock and timestamping */ + if (chip->info->ptp_support) { + err = mv88e6xxx_ptp_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_hwtstamp_setup(chip); + if (err) + goto unlock; + } + unlock: mutex_unlock(&chip->reg_lock); @@ -2472,6 +2489,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, + .gpio_ops = &mv88e6352_gpio_ops, }; static const struct mv88e6xxx_ops mv88e6161_ops = { @@ -2602,6 +2620,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6352_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, }; static const struct mv88e6xxx_ops mv88e6175_ops = { @@ -2673,6 +2692,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6352_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, }; static const struct mv88e6xxx_ops mv88e6185_ops = { @@ -2736,6 +2756,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, }; static const struct mv88e6xxx_ops mv88e6190x_ops = { @@ -2771,6 +2792,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, }; static const struct mv88e6xxx_ops mv88e6191_ops = { @@ -2843,6 +2865,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6352_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6352_avb_ops, }; static const struct mv88e6xxx_ops mv88e6290_ops = { @@ -2879,6 +2903,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6390_avb_ops, }; static const struct mv88e6xxx_ops mv88e6320_ops = { @@ -2913,6 +2939,8 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6352_avb_ops, }; static const struct mv88e6xxx_ops mv88e6321_ops = { @@ -2945,6 +2973,8 @@ static const struct mv88e6xxx_ops mv88e6321_ops = { .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6352_avb_ops, }; static const struct mv88e6xxx_ops mv88e6341_ops = { @@ -2981,6 +3011,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6390_avb_ops, }; static const struct mv88e6xxx_ops mv88e6350_ops = { @@ -3049,6 +3081,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, + .avb_ops = &mv88e6352_avb_ops, }; static const struct mv88e6xxx_ops mv88e6352_ops = { @@ -3086,6 +3119,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6352_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6352_avb_ops, }; static const struct mv88e6xxx_ops mv88e6390_ops = { @@ -3124,6 +3159,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6390_avb_ops, }; static const struct mv88e6xxx_ops mv88e6390x_ops = { @@ -3162,6 +3199,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6390_avb_ops, }; static const struct mv88e6xxx_info mv88e6xxx_table[] = { @@ -3267,6 +3306,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6341", .num_databases = 4096, .num_ports = 6, + .num_gpio = 11, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3346,6 +3386,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6172", .num_databases = 4096, .num_ports = 7, + .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3386,6 +3427,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6176", .num_databases = 4096, .num_ports = 7, + .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3424,6 +3466,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6190", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ + .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, @@ -3444,6 +3487,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6190X", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ + .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, @@ -3475,6 +3519,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, + .ptp_support = true, .ops = &mv88e6191_ops, }, @@ -3484,6 +3529,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6240", .num_databases = 4096, .num_ports = 7, + .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3495,6 +3541,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, + .ptp_support = true, .ops = &mv88e6240_ops, }, @@ -3504,6 +3551,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6290", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ + .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, @@ -3515,6 +3563,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, + .ptp_support = true, .ops = &mv88e6290_ops, }, @@ -3524,6 +3573,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6320", .num_databases = 4096, .num_ports = 7, + .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3534,6 +3584,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, + .ptp_support = true, .ops = &mv88e6320_ops, }, @@ -3543,6 +3594,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6321", .num_databases = 4096, .num_ports = 7, + .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3552,6 +3604,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .atu_move_port_mask = 0xf, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, + .ptp_support = true, .ops = &mv88e6321_ops, }, @@ -3561,6 +3614,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6341", .num_databases = 4096, .num_ports = 6, + .num_gpio = 11, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3571,6 +3625,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, + .ptp_support = true, .ops = &mv88e6341_ops, }, @@ -3620,6 +3675,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6352", .num_databases = 4096, .num_ports = 7, + .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3631,6 +3687,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, + .ptp_support = true, .ops = &mv88e6352_ops, }, [MV88E6390] = { @@ -3639,6 +3696,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6390", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ + .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, @@ -3650,6 +3708,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, + .ptp_support = true, .ops = &mv88e6390_ops, }, [MV88E6390X] = { @@ -3658,6 +3717,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6390X", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ + .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, @@ -3669,6 +3729,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, + .ptp_support = true, .ops = &mv88e6390x_ops, }, }; @@ -3880,6 +3941,11 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_mdb_del = mv88e6xxx_port_mdb_del, .crosschip_bridge_join = mv88e6xxx_crosschip_bridge_join, .crosschip_bridge_leave = mv88e6xxx_crosschip_bridge_leave, + .port_hwtstamp_set = mv88e6xxx_port_hwtstamp_set, + .port_hwtstamp_get = mv88e6xxx_port_hwtstamp_get, + .port_txtstamp = mv88e6xxx_port_txtstamp, + .port_rxtstamp = mv88e6xxx_port_rxtstamp, + .get_ts_info = mv88e6xxx_get_ts_info, }; static struct dsa_switch_driver mv88e6xxx_switch_drv = { @@ -4022,6 +4088,11 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev) struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); struct mv88e6xxx_chip *chip = ds->priv; + if (chip->info->ptp_support) { + mv88e6xxx_hwtstamp_free(chip); + mv88e6xxx_ptp_free(chip); + } + mv88e6xxx_phy_destroy(chip); mv88e6xxx_unregister_switch(chip); mv88e6xxx_mdios_unregister(chip); diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 3dba6e90adcf..97d7915f32c7 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -16,6 +16,8 @@ #include <linux/irq.h> #include <linux/gpio/consumer.h> #include <linux/phy.h> +#include <linux/ptp_clock_kernel.h> +#include <linux/timecounter.h> #include <net/dsa.h> #ifndef UINT64_MAX @@ -39,6 +41,8 @@ #define MV88E6XXX_MAX_PVT_SWITCHES 32 #define MV88E6XXX_MAX_PVT_PORTS 16 +#define MV88E6XXX_MAX_GPIO 16 + enum mv88e6xxx_egress_mode { MV88E6XXX_EGRESS_MODE_UNMODIFIED, MV88E6XXX_EGRESS_MODE_UNTAGGED, @@ -105,6 +109,7 @@ struct mv88e6xxx_info { const char *name; unsigned int num_databases; unsigned int num_ports; + unsigned int num_gpio; unsigned int max_vid; unsigned int port_base_addr; unsigned int global1_addr; @@ -126,6 +131,9 @@ struct mv88e6xxx_info { */ u8 atu_move_port_mask; const struct mv88e6xxx_ops *ops; + + /* Supports PTP */ + bool ptp_support; }; struct mv88e6xxx_atu_entry { @@ -146,6 +154,8 @@ struct mv88e6xxx_vtu_entry { struct mv88e6xxx_bus_ops; struct mv88e6xxx_irq_ops; +struct mv88e6xxx_gpio_ops; +struct mv88e6xxx_avb_ops; struct mv88e6xxx_irq { u16 masked; @@ -154,6 +164,32 @@ struct mv88e6xxx_irq { unsigned int nirqs; }; +/* state flags for mv88e6xxx_port_hwtstamp::state */ +enum { + MV88E6XXX_HWTSTAMP_ENABLED, + MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, +}; + +struct mv88e6xxx_port_hwtstamp { + /* Port index */ + int port_id; + + /* Timestamping state */ + unsigned long state; + + /* Resources for receive timestamping */ + struct sk_buff_head rx_queue; + struct sk_buff_head rx_queue2; + + /* Resources for transmit timestamping */ + unsigned long tx_tstamp_start; + struct sk_buff *tx_skb; + u16 tx_seq_id; + + /* Current timestamp configuration */ + struct hwtstamp_config tstamp_config; +}; + struct mv88e6xxx_chip { const struct mv88e6xxx_info *info; @@ -209,6 +245,26 @@ struct mv88e6xxx_chip { int watchdog_irq; int atu_prob_irq; int vtu_prob_irq; + + /* GPIO resources */ + u8 gpio_data[2]; + + /* This cyclecounter abstracts the switch PTP time. + * reg_lock must be held for any operation that read()s. + */ + struct cyclecounter tstamp_cc; + struct timecounter tstamp_tc; + struct delayed_work overflow_work; + + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_clock_info; + struct delayed_work tai_event_work; + struct ptp_pin_desc pin_config[MV88E6XXX_MAX_GPIO]; + u16 trig_config; + u16 evcap_config; + + /* Per-port timestamping resources. */ + struct mv88e6xxx_port_hwtstamp port_hwtstamp[DSA_MAX_PORTS]; }; struct mv88e6xxx_bus_ops { @@ -344,6 +400,12 @@ struct mv88e6xxx_ops { struct mv88e6xxx_vtu_entry *entry); int (*vtu_loadpurge)(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); + + /* GPIO operations */ + const struct mv88e6xxx_gpio_ops *gpio_ops; + + /* Interface to the AVB/PTP registers */ + const struct mv88e6xxx_avb_ops *avb_ops; }; struct mv88e6xxx_irq_ops { @@ -355,6 +417,42 @@ struct mv88e6xxx_irq_ops { void (*irq_free)(struct mv88e6xxx_chip *chip); }; +struct mv88e6xxx_gpio_ops { + /* Get/set data on GPIO pin */ + int (*get_data)(struct mv88e6xxx_chip *chip, unsigned int pin); + int (*set_data)(struct mv88e6xxx_chip *chip, unsigned int pin, + int value); + + /* get/set GPIO direction */ + int (*get_dir)(struct mv88e6xxx_chip *chip, unsigned int pin); + int (*set_dir)(struct mv88e6xxx_chip *chip, unsigned int pin, + bool input); + + /* get/set GPIO pin control */ + int (*get_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin, + int *func); + int (*set_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin, + int func); +}; + +struct mv88e6xxx_avb_ops { + /* Access port-scoped Precision Time Protocol registers */ + int (*port_ptp_read)(struct mv88e6xxx_chip *chip, int port, int addr, + u16 *data, int len); + int (*port_ptp_write)(struct mv88e6xxx_chip *chip, int port, int addr, + u16 data); + + /* Access global Precision Time Protocol registers */ + int (*ptp_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data, + int len); + int (*ptp_write)(struct mv88e6xxx_chip *chip, int addr, u16 data); + + /* Access global Time Application Interface registers */ + int (*tai_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data, + int len); + int (*tai_write)(struct mv88e6xxx_chip *chip, int addr, u16 data); +}; + #define STATS_TYPE_PORT BIT(0) #define STATS_TYPE_BANK0 BIT(1) #define STATS_TYPE_BANK1 BIT(2) @@ -386,6 +484,11 @@ static inline u16 mv88e6xxx_port_mask(struct mv88e6xxx_chip *chip) return GENMASK(mv88e6xxx_num_ports(chip) - 1, 0); } +static inline unsigned int mv88e6xxx_num_gpio(struct mv88e6xxx_chip *chip) +{ + return chip->info->num_gpio; +} + int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index af0727877825..5f370f1fc7c4 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -20,22 +20,22 @@ #include "global1.h" /* for MV88E6XXX_G1_STS_IRQ_DEVICE */ #include "global2.h" -static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) +int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) { return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val); } -static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val) +int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val) { return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val); } -static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update) +int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update) { return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update); } -static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) +int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) { return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask); } @@ -798,6 +798,7 @@ int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, struct mii_bus *bus, val); } +/* Offset 0x1B: Watchdog Control */ static int mv88e6097_watchdog_action(struct mv88e6xxx_chip *chip, int irq) { u16 reg; diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index 669f59017b12..25f92b3d7157 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -149,7 +149,26 @@ #define MV88E6390_G2_EEPROM_ADDR_MASK 0xffff /* Offset 0x16: AVB Command Register */ -#define MV88E6352_G2_AVB_CMD 0x16 +#define MV88E6352_G2_AVB_CMD 0x16 +#define MV88E6352_G2_AVB_CMD_BUSY 0x8000 +#define MV88E6352_G2_AVB_CMD_OP_READ 0x4000 +#define MV88E6352_G2_AVB_CMD_OP_READ_INCR 0x6000 +#define MV88E6352_G2_AVB_CMD_OP_WRITE 0x3000 +#define MV88E6390_G2_AVB_CMD_OP_READ 0x0000 +#define MV88E6390_G2_AVB_CMD_OP_READ_INCR 0x4000 +#define MV88E6390_G2_AVB_CMD_OP_WRITE 0x6000 +#define MV88E6352_G2_AVB_CMD_PORT_MASK 0x0f00 +#define MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL 0xe +#define MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL 0xf +#define MV88E6390_G2_AVB_CMD_PORT_MASK 0x1f00 +#define MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL 0x1e +#define MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL 0x1f +#define MV88E6352_G2_AVB_CMD_BLOCK_PTP 0 +#define MV88E6352_G2_AVB_CMD_BLOCK_AVB 1 +#define MV88E6352_G2_AVB_CMD_BLOCK_QAV 2 +#define MV88E6352_G2_AVB_CMD_BLOCK_QVB 3 +#define MV88E6352_G2_AVB_CMD_BLOCK_MASK 0x00e0 +#define MV88E6352_G2_AVB_CMD_ADDR_MASK 0x001f /* Offset 0x17: AVB Data Register */ #define MV88E6352_G2_AVB_DATA 0x17 @@ -223,6 +242,35 @@ #define MV88E6352_G2_NOEGR_POLICY 0x2000 #define MV88E6390_G2_LAG_ID_4 0x2000 +/* Scratch/Misc registers accessed through MV88E6XXX_G2_SCRATCH_MISC */ +/* Offset 0x02: Misc Configuration */ +#define MV88E6352_G2_SCRATCH_MISC_CFG 0x02 +#define MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI 0x80 +/* Offset 0x60-0x61: GPIO Configuration */ +#define MV88E6352_G2_SCRATCH_GPIO_CFG0 0x60 +#define MV88E6352_G2_SCRATCH_GPIO_CFG1 0x61 +/* Offset 0x62-0x63: GPIO Direction */ +#define MV88E6352_G2_SCRATCH_GPIO_DIR0 0x62 +#define MV88E6352_G2_SCRATCH_GPIO_DIR1 0x63 +#define MV88E6352_G2_SCRATCH_GPIO_DIR_OUT 0 +#define MV88E6352_G2_SCRATCH_GPIO_DIR_IN 1 +/* Offset 0x64-0x65: GPIO Data */ +#define MV88E6352_G2_SCRATCH_GPIO_DATA0 0x64 +#define MV88E6352_G2_SCRATCH_GPIO_DATA1 0x65 +/* Offset 0x68-0x6F: GPIO Pin Control */ +#define MV88E6352_G2_SCRATCH_GPIO_PCTL0 0x68 +#define MV88E6352_G2_SCRATCH_GPIO_PCTL1 0x69 +#define MV88E6352_G2_SCRATCH_GPIO_PCTL2 0x6A +#define MV88E6352_G2_SCRATCH_GPIO_PCTL3 0x6B +#define MV88E6352_G2_SCRATCH_GPIO_PCTL4 0x6C +#define MV88E6352_G2_SCRATCH_GPIO_PCTL5 0x6D +#define MV88E6352_G2_SCRATCH_GPIO_PCTL6 0x6E +#define MV88E6352_G2_SCRATCH_GPIO_PCTL7 0x6F + +#define MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO 0 +#define MV88E6352_G2_SCRATCH_GPIO_PCTL_TRIG 1 +#define MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ 2 + #ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) @@ -230,6 +278,11 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) return 0; } +int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val); +int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val); +int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update); +int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask); + int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port); int mv88e6390_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port); @@ -267,6 +320,11 @@ int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip); extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops; extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops; +extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops; +extern const struct mv88e6xxx_avb_ops mv88e6390_avb_ops; + +extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops; + #else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) @@ -279,6 +337,26 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) return 0; } +static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) +{ + return -EOPNOTSUPP; +} + +static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val) +{ + return -EOPNOTSUPP; +} + +static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update) +{ + return -EOPNOTSUPP; +} + +static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) +{ + return -EOPNOTSUPP; +} + static inline int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port) { @@ -382,6 +460,11 @@ static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip) static const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops = {}; static const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {}; +static const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {}; +static const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {}; + +static const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {}; + #endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ #endif /* _MV88E6XXX_GLOBAL2_H */ diff --git a/drivers/net/dsa/mv88e6xxx/global2_avb.c b/drivers/net/dsa/mv88e6xxx/global2_avb.c new file mode 100644 index 000000000000..2e398ccb88ca --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global2_avb.c @@ -0,0 +1,193 @@ +/* + * Marvell 88E6xxx Switch Global 2 Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016-2017 Savoir-faire Linux Inc. + * Vivien Didelot <vivien.didelot@savoirfairelinux.com> + * + * Copyright (c) 2017 National Instruments + * Brandon Streiff <brandon.streiff@ni.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "global2.h" + +/* Offset 0x16: AVB Command Register + * Offset 0x17: AVB Data Register + * + * There are two different versions of this register interface: + * "6352": 3-bit "op" field, 4-bit "port" field. + * "6390": 2-bit "op" field, 5-bit "port" field. + * + * The "op" codes are different between the two, as well as the special + * port fields for global PTP and TAI configuration. + */ + +/* mv88e6xxx_g2_avb_read -- Read one or multiple 16-bit words. + * The hardware supports snapshotting up to four contiguous registers. + */ +static int mv88e6xxx_g2_avb_read(struct mv88e6xxx_chip *chip, u16 readop, + u16 *data, int len) +{ + int err; + int i; + + /* Hardware can only snapshot four words. */ + if (len > 4) + return -E2BIG; + + err = mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, readop); + if (err) + return err; + + for (i = 0; i < len; ++i) { + err = mv88e6xxx_g2_read(chip, MV88E6352_G2_AVB_DATA, + &data[i]); + if (err) + return err; + } + + return 0; +} + +/* mv88e6xxx_g2_avb_write -- Write one 16-bit word. */ +static int mv88e6xxx_g2_avb_write(struct mv88e6xxx_chip *chip, u16 writeop, + u16 data) +{ + int err; + + err = mv88e6xxx_g2_write(chip, MV88E6352_G2_AVB_DATA, data); + if (err) + return err; + + return mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, writeop); +} + +static int mv88e6352_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip, + int port, int addr, u16 *data, + int len) +{ + u16 readop = (len == 1 ? MV88E6352_G2_AVB_CMD_OP_READ : + MV88E6352_G2_AVB_CMD_OP_READ_INCR) | + (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | + addr; + + return mv88e6xxx_g2_avb_read(chip, readop, data, len); +} + +static int mv88e6352_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip, + int port, int addr, u16 data) +{ + u16 writeop = MV88E6352_G2_AVB_CMD_OP_WRITE | (port << 8) | + (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr; + + return mv88e6xxx_g2_avb_write(chip, writeop, data); +} + +static int mv88e6352_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr, + u16 *data, int len) +{ + return mv88e6352_g2_avb_port_ptp_read(chip, + MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL, + addr, data, len); +} + +static int mv88e6352_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr, + u16 data) +{ + return mv88e6352_g2_avb_port_ptp_write(chip, + MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL, + addr, data); +} + +static int mv88e6352_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr, + u16 *data, int len) +{ + return mv88e6352_g2_avb_port_ptp_read(chip, + MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL, + addr, data, len); +} + +static int mv88e6352_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr, + u16 data) +{ + return mv88e6352_g2_avb_port_ptp_write(chip, + MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL, + addr, data); +} + +const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = { + .port_ptp_read = mv88e6352_g2_avb_port_ptp_read, + .port_ptp_write = mv88e6352_g2_avb_port_ptp_write, + .ptp_read = mv88e6352_g2_avb_ptp_read, + .ptp_write = mv88e6352_g2_avb_ptp_write, + .tai_read = mv88e6352_g2_avb_tai_read, + .tai_write = mv88e6352_g2_avb_tai_write, +}; + +static int mv88e6390_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip, + int port, int addr, u16 *data, + int len) +{ + u16 readop = (len == 1 ? MV88E6390_G2_AVB_CMD_OP_READ : + MV88E6390_G2_AVB_CMD_OP_READ_INCR) | + (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | + addr; + + return mv88e6xxx_g2_avb_read(chip, readop, data, len); +} + +static int mv88e6390_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip, + int port, int addr, u16 data) +{ + u16 writeop = MV88E6390_G2_AVB_CMD_OP_WRITE | (port << 8) | + (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr; + + return mv88e6xxx_g2_avb_write(chip, writeop, data); +} + +static int mv88e6390_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr, + u16 *data, int len) +{ + return mv88e6390_g2_avb_port_ptp_read(chip, + MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL, + addr, data, len); +} + +static int mv88e6390_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr, + u16 data) +{ + return mv88e6390_g2_avb_port_ptp_write(chip, + MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL, + addr, data); +} + +static int mv88e6390_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr, + u16 *data, int len) +{ + return mv88e6390_g2_avb_port_ptp_read(chip, + MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL, + addr, data, len); +} + +static int mv88e6390_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr, + u16 data) +{ + return mv88e6390_g2_avb_port_ptp_write(chip, + MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL, + addr, data); +} + +const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = { + .port_ptp_read = mv88e6390_g2_avb_port_ptp_read, + .port_ptp_write = mv88e6390_g2_avb_port_ptp_write, + .ptp_read = mv88e6390_g2_avb_ptp_read, + .ptp_write = mv88e6390_g2_avb_ptp_write, + .tai_read = mv88e6390_g2_avb_tai_read, + .tai_write = mv88e6390_g2_avb_tai_write, +}; diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c new file mode 100644 index 000000000000..0ff12bff9f0e --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global2_scratch.c @@ -0,0 +1,240 @@ +/* + * Marvell 88E6xxx Switch Global 2 Scratch & Misc Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2017 National Instruments + * Brandon Streiff <brandon.streiff@ni.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "chip.h" +#include "global2.h" + +/* Offset 0x1A: Scratch and Misc. Register */ +static int mv88e6xxx_g2_scratch_read(struct mv88e6xxx_chip *chip, int reg, + u8 *data) +{ + u16 value; + int err; + + err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, + reg << 8); + if (err) + return err; + + err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, &value); + if (err) + return err; + + *data = (value & MV88E6XXX_G2_SCRATCH_MISC_DATA_MASK); + + return 0; +} + +static int mv88e6xxx_g2_scratch_write(struct mv88e6xxx_chip *chip, int reg, + u8 data) +{ + u16 value = (reg << 8) | data; + + return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, value); +} + +/** + * mv88e6xxx_g2_scratch_gpio_get_bit - get a bit + * @chip: chip private data + * @nr: bit index + * @set: is bit set? + */ +static int mv88e6xxx_g2_scratch_get_bit(struct mv88e6xxx_chip *chip, + int base_reg, unsigned int offset, + int *set) +{ + int reg = base_reg + (offset / 8); + u8 mask = (1 << (offset & 0x7)); + u8 val; + int err; + + err = mv88e6xxx_g2_scratch_read(chip, reg, &val); + if (err) + return err; + + *set = !!(mask & val); + + return 0; +} + +/** + * mv88e6xxx_g2_scratch_gpio_set_bit - set (or clear) a bit + * @chip: chip private data + * @nr: bit index + * @set: set if true, clear if false + * + * Helper function for dealing with the direction and data registers. + */ +static int mv88e6xxx_g2_scratch_set_bit(struct mv88e6xxx_chip *chip, + int base_reg, unsigned int offset, + int set) +{ + int reg = base_reg + (offset / 8); + u8 mask = (1 << (offset & 0x7)); + u8 val; + int err; + + err = mv88e6xxx_g2_scratch_read(chip, reg, &val); + if (err) + return err; + + if (set) + val |= mask; + else + val &= ~mask; + + return mv88e6xxx_g2_scratch_write(chip, reg, val); +} + +/** + * mv88e6352_g2_scratch_gpio_get_data - get data on gpio pin + * @chip: chip private data + * @pin: gpio index + * + * Return: 0 for low, 1 for high, negative error + */ +static int mv88e6352_g2_scratch_gpio_get_data(struct mv88e6xxx_chip *chip, + unsigned int pin) +{ + int val = 0; + int err; + + err = mv88e6xxx_g2_scratch_get_bit(chip, + MV88E6352_G2_SCRATCH_GPIO_DATA0, + pin, &val); + if (err) + return err; + + return val; +} + +/** + * mv88e6352_g2_scratch_gpio_set_data - set data on gpio pin + * @chip: chip private data + * @pin: gpio index + * @value: value to set + */ +static int mv88e6352_g2_scratch_gpio_set_data(struct mv88e6xxx_chip *chip, + unsigned int pin, int value) +{ + u8 mask = (1 << (pin & 0x7)); + int offset = (pin / 8); + int reg; + + reg = MV88E6352_G2_SCRATCH_GPIO_DATA0 + offset; + + if (value) + chip->gpio_data[offset] |= mask; + else + chip->gpio_data[offset] &= ~mask; + + return mv88e6xxx_g2_scratch_write(chip, reg, chip->gpio_data[offset]); +} + +/** + * mv88e6352_g2_scratch_gpio_get_dir - get direction of gpio pin + * @chip: chip private data + * @pin: gpio index + * + * Return: 0 for output, 1 for input (same as GPIOF_DIR_XXX). + */ +static int mv88e6352_g2_scratch_gpio_get_dir(struct mv88e6xxx_chip *chip, + unsigned int pin) +{ + int val = 0; + int err; + + err = mv88e6xxx_g2_scratch_get_bit(chip, + MV88E6352_G2_SCRATCH_GPIO_DIR0, + pin, &val); + if (err) + return err; + + return val; +} + +/** + * mv88e6352_g2_scratch_gpio_set_dir - set direction of gpio pin + * @chip: chip private data + * @pin: gpio index + */ +static int mv88e6352_g2_scratch_gpio_set_dir(struct mv88e6xxx_chip *chip, + unsigned int pin, bool input) +{ + int value = (input ? MV88E6352_G2_SCRATCH_GPIO_DIR_IN : + MV88E6352_G2_SCRATCH_GPIO_DIR_OUT); + + return mv88e6xxx_g2_scratch_set_bit(chip, + MV88E6352_G2_SCRATCH_GPIO_DIR0, + pin, value); +} + +/** + * mv88e6352_g2_scratch_gpio_get_pctl - get pin control setting + * @chip: chip private data + * @pin: gpio index + * @func: function number + * + * Note that the function numbers themselves may vary by chipset. + */ +static int mv88e6352_g2_scratch_gpio_get_pctl(struct mv88e6xxx_chip *chip, + unsigned int pin, int *func) +{ + int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2); + int offset = (pin & 0x1) ? 4 : 0; + u8 mask = (0x7 << offset); + int err; + u8 val; + + err = mv88e6xxx_g2_scratch_read(chip, reg, &val); + if (err) + return err; + + *func = (val & mask) >> offset; + + return 0; +} + +/** + * mv88e6352_g2_scratch_gpio_set_pctl - set pin control setting + * @chip: chip private data + * @pin: gpio index + * @func: function number + */ +static int mv88e6352_g2_scratch_gpio_set_pctl(struct mv88e6xxx_chip *chip, + unsigned int pin, int func) +{ + int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2); + int offset = (pin & 0x1) ? 4 : 0; + u8 mask = (0x7 << offset); + int err; + u8 val; + + err = mv88e6xxx_g2_scratch_read(chip, reg, &val); + if (err) + return err; + + val = (val & ~mask) | ((func & mask) << offset); + + return mv88e6xxx_g2_scratch_write(chip, reg, val); +} + +const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = { + .get_data = mv88e6352_g2_scratch_gpio_get_data, + .set_data = mv88e6352_g2_scratch_gpio_set_data, + .get_dir = mv88e6352_g2_scratch_gpio_get_dir, + .set_dir = mv88e6352_g2_scratch_gpio_set_dir, + .get_pctl = mv88e6352_g2_scratch_gpio_get_pctl, + .set_pctl = mv88e6352_g2_scratch_gpio_set_pctl, +}; diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c new file mode 100644 index 000000000000..ac7694c71266 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c @@ -0,0 +1,576 @@ +/* + * Marvell 88E6xxx Switch hardware timestamping support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2017 National Instruments + * Erik Hons <erik.hons@ni.com> + * Brandon Streiff <brandon.streiff@ni.com> + * Dane Wagner <dane.wagner@ni.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "chip.h" +#include "global2.h" +#include "hwtstamp.h" +#include "ptp.h" +#include <linux/ptp_classify.h> + +#define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb)) + +static int mv88e6xxx_port_ptp_read(struct mv88e6xxx_chip *chip, int port, + int addr, u16 *data, int len) +{ + if (!chip->info->ops->avb_ops->port_ptp_read) + return -EOPNOTSUPP; + + return chip->info->ops->avb_ops->port_ptp_read(chip, port, addr, + data, len); +} + +static int mv88e6xxx_port_ptp_write(struct mv88e6xxx_chip *chip, int port, + int addr, u16 data) +{ + if (!chip->info->ops->avb_ops->port_ptp_write) + return -EOPNOTSUPP; + + return chip->info->ops->avb_ops->port_ptp_write(chip, port, addr, + data); +} + +static int mv88e6xxx_ptp_write(struct mv88e6xxx_chip *chip, int addr, + u16 data) +{ + if (!chip->info->ops->avb_ops->ptp_write) + return -EOPNOTSUPP; + + return chip->info->ops->avb_ops->ptp_write(chip, addr, data); +} + +/* TX_TSTAMP_TIMEOUT: This limits the time spent polling for a TX + * timestamp. When working properly, hardware will produce a timestamp + * within 1ms. Software may enounter delays due to MDIO contention, so + * the timeout is set accordingly. + */ +#define TX_TSTAMP_TIMEOUT msecs_to_jiffies(20) + +int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port, + struct ethtool_ts_info *info) +{ + struct mv88e6xxx_chip *chip = ds->priv; + + if (!chip->info->ptp_support) + return -EOPNOTSUPP; + + info->so_timestamping = + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + info->phc_index = ptp_clock_index(chip->ptp_clock); + info->tx_types = + (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + info->rx_filters = + (1 << HWTSTAMP_FILTER_NONE) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ); + + return 0; +} + +static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port, + struct hwtstamp_config *config) +{ + struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; + bool tstamp_enable = false; + u16 port_config0; + int err; + + /* Prevent the TX/RX paths from trying to interact with the + * timestamp hardware while we reconfigure it. + */ + clear_bit_unlock(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state); + + /* reserved for future extensions */ + if (config->flags) + return -EINVAL; + + switch (config->tx_type) { + case HWTSTAMP_TX_OFF: + tstamp_enable = false; + break; + case HWTSTAMP_TX_ON: + tstamp_enable = true; + break; + default: + return -ERANGE; + } + + /* The switch supports timestamping both L2 and L4; one cannot be + * disabled independently of the other. + */ + switch (config->rx_filter) { + case HWTSTAMP_FILTER_NONE: + tstamp_enable = false; + break; + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + break; + case HWTSTAMP_FILTER_ALL: + default: + config->rx_filter = HWTSTAMP_FILTER_NONE; + return -ERANGE; + } + + if (tstamp_enable) { + /* Disable transportSpecific value matching, so that packets + * with either 1588 (0) and 802.1AS (1) will be timestamped. + */ + port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH; + } else { + /* Disable PTP. This disables both RX and TX timestamping. */ + port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP; + } + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0, + port_config0); + mutex_unlock(&chip->reg_lock); + + if (err < 0) + return err; + + /* Once hardware has been configured, enable timestamp checks + * in the RX/TX paths. + */ + if (tstamp_enable) + set_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state); + + return 0; +} + +int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port, + struct ifreq *ifr) +{ + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; + struct hwtstamp_config config; + int err; + + if (!chip->info->ptp_support) + return -EOPNOTSUPP; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + err = mv88e6xxx_set_hwtstamp_config(chip, port, &config); + if (err) + return err; + + /* Save the chosen configuration to be returned later. */ + memcpy(&ps->tstamp_config, &config, sizeof(config)); + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port, + struct ifreq *ifr) +{ + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; + struct hwtstamp_config *config = &ps->tstamp_config; + + if (!chip->info->ptp_support) + return -EOPNOTSUPP; + + return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? + -EFAULT : 0; +} + +/* Get the start of the PTP header in this skb */ +static u8 *parse_ptp_header(struct sk_buff *skb, unsigned int type) +{ + u8 *data = skb_mac_header(skb); + unsigned int offset = 0; + + if (type & PTP_CLASS_VLAN) + offset += VLAN_HLEN; + + switch (type & PTP_CLASS_PMASK) { + case PTP_CLASS_IPV4: + offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN; + break; + case PTP_CLASS_IPV6: + offset += ETH_HLEN + IP6_HLEN + UDP_HLEN; + break; + case PTP_CLASS_L2: + offset += ETH_HLEN; + break; + default: + return NULL; + } + + /* Ensure that the entire header is present in this packet. */ + if (skb->len + ETH_HLEN < offset + 34) + return NULL; + + return data + offset; +} + +/* Returns a pointer to the PTP header if the caller should time stamp, + * or NULL if the caller should not. + */ +static u8 *mv88e6xxx_should_tstamp(struct mv88e6xxx_chip *chip, int port, + struct sk_buff *skb, unsigned int type) +{ + struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; + u8 *hdr; + + if (!chip->info->ptp_support) + return NULL; + + hdr = parse_ptp_header(skb, type); + if (!hdr) + return NULL; + + if (!test_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state)) + return NULL; + + return hdr; +} + +static int mv88e6xxx_ts_valid(u16 status) +{ + if (!(status & MV88E6XXX_PTP_TS_VALID)) + return 0; + if (status & MV88E6XXX_PTP_TS_STATUS_MASK) + return 0; + return 1; +} + +static int seq_match(struct sk_buff *skb, u16 ts_seqid) +{ + unsigned int type = SKB_PTP_TYPE(skb); + u8 *hdr = parse_ptp_header(skb, type); + __be16 *seqid; + + seqid = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID); + + return ts_seqid == ntohs(*seqid); +} + +static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_port_hwtstamp *ps, + struct sk_buff *skb, u16 reg, + struct sk_buff_head *rxq) +{ + u16 buf[4] = { 0 }, status, seq_id; + u64 ns, timelo, timehi; + struct skb_shared_hwtstamps *shwt; + int err; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_port_ptp_read(chip, ps->port_id, + reg, buf, ARRAY_SIZE(buf)); + mutex_unlock(&chip->reg_lock); + if (err) + pr_err("failed to get the receive time stamp\n"); + + status = buf[0]; + timelo = buf[1]; + timehi = buf[2]; + seq_id = buf[3]; + + if (status & MV88E6XXX_PTP_TS_VALID) { + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_port_ptp_write(chip, ps->port_id, reg, 0); + mutex_unlock(&chip->reg_lock); + if (err) + pr_err("failed to clear the receive status\n"); + } + /* Since the device can only handle one time stamp at a time, + * we purge any extra frames from the queue. + */ + for ( ; skb; skb = skb_dequeue(rxq)) { + if (mv88e6xxx_ts_valid(status) && seq_match(skb, seq_id)) { + ns = timehi << 16 | timelo; + + mutex_lock(&chip->reg_lock); + ns = timecounter_cyc2time(&chip->tstamp_tc, ns); + mutex_unlock(&chip->reg_lock); + shwt = skb_hwtstamps(skb); + memset(shwt, 0, sizeof(*shwt)); + shwt->hwtstamp = ns_to_ktime(ns); + status &= ~MV88E6XXX_PTP_TS_VALID; + } + netif_rx_ni(skb); + } +} + +static void mv88e6xxx_rxtstamp_work(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_port_hwtstamp *ps) +{ + struct sk_buff *skb; + + skb = skb_dequeue(&ps->rx_queue); + + if (skb) + mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR0_STS, + &ps->rx_queue); + + skb = skb_dequeue(&ps->rx_queue2); + if (skb) + mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR1_STS, + &ps->rx_queue2); +} + +static int is_pdelay_resp(u8 *msgtype) +{ + return (*msgtype & 0xf) == 3; +} + +bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port, + struct sk_buff *skb, unsigned int type) +{ + struct mv88e6xxx_port_hwtstamp *ps; + struct mv88e6xxx_chip *chip; + u8 *hdr; + + chip = ds->priv; + ps = &chip->port_hwtstamp[port]; + + if (ps->tstamp_config.rx_filter != HWTSTAMP_FILTER_PTP_V2_EVENT) + return false; + + hdr = mv88e6xxx_should_tstamp(chip, port, skb, type); + if (!hdr) + return false; + + SKB_PTP_TYPE(skb) = type; + + if (is_pdelay_resp(hdr)) + skb_queue_tail(&ps->rx_queue2, skb); + else + skb_queue_tail(&ps->rx_queue, skb); + + ptp_schedule_worker(chip->ptp_clock, 0); + + return true; +} + +static int mv88e6xxx_txtstamp_work(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_port_hwtstamp *ps) +{ + struct skb_shared_hwtstamps shhwtstamps; + u16 departure_block[4], status; + struct sk_buff *tmp_skb; + u32 time_raw; + int err; + u64 ns; + + if (!ps->tx_skb) + return 0; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_port_ptp_read(chip, ps->port_id, + MV88E6XXX_PORT_PTP_DEP_STS, + departure_block, + ARRAY_SIZE(departure_block)); + mutex_unlock(&chip->reg_lock); + + if (err) + goto free_and_clear_skb; + + if (!(departure_block[0] & MV88E6XXX_PTP_TS_VALID)) { + if (time_is_before_jiffies(ps->tx_tstamp_start + + TX_TSTAMP_TIMEOUT)) { + dev_warn(chip->dev, "p%d: clearing tx timestamp hang\n", + ps->port_id); + goto free_and_clear_skb; + } + /* The timestamp should be available quickly, while getting it + * is high priority and time bounded to only 10ms. A poll is + * warranted so restart the work. + */ + return 1; + } + + /* We have the timestamp; go ahead and clear valid now */ + mutex_lock(&chip->reg_lock); + mv88e6xxx_port_ptp_write(chip, ps->port_id, + MV88E6XXX_PORT_PTP_DEP_STS, 0); + mutex_unlock(&chip->reg_lock); + + status = departure_block[0] & MV88E6XXX_PTP_TS_STATUS_MASK; + if (status != MV88E6XXX_PTP_TS_STATUS_NORMAL) { + dev_warn(chip->dev, "p%d: tx timestamp overrun\n", ps->port_id); + goto free_and_clear_skb; + } + + if (departure_block[3] != ps->tx_seq_id) { + dev_warn(chip->dev, "p%d: unexpected seq. id\n", ps->port_id); + goto free_and_clear_skb; + } + + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + time_raw = ((u32)departure_block[2] << 16) | departure_block[1]; + mutex_lock(&chip->reg_lock); + ns = timecounter_cyc2time(&chip->tstamp_tc, time_raw); + mutex_unlock(&chip->reg_lock); + shhwtstamps.hwtstamp = ns_to_ktime(ns); + + dev_dbg(chip->dev, + "p%d: txtstamp %llx status 0x%04x skb ID 0x%04x hw ID 0x%04x\n", + ps->port_id, ktime_to_ns(shhwtstamps.hwtstamp), + departure_block[0], ps->tx_seq_id, departure_block[3]); + + /* skb_complete_tx_timestamp() will free up the client to make + * another timestamp-able transmit. We have to be ready for it + * -- by clearing the ps->tx_skb "flag" -- beforehand. + */ + + tmp_skb = ps->tx_skb; + ps->tx_skb = NULL; + clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state); + skb_complete_tx_timestamp(tmp_skb, &shhwtstamps); + + return 0; + +free_and_clear_skb: + dev_kfree_skb_any(ps->tx_skb); + ps->tx_skb = NULL; + clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state); + + return 0; +} + +long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp) +{ + struct mv88e6xxx_chip *chip = ptp_to_chip(ptp); + struct dsa_switch *ds = chip->ds; + struct mv88e6xxx_port_hwtstamp *ps; + int i, restart = 0; + + for (i = 0; i < ds->num_ports; i++) { + if (!dsa_is_user_port(ds, i)) + continue; + + ps = &chip->port_hwtstamp[i]; + if (test_bit(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) + restart |= mv88e6xxx_txtstamp_work(chip, ps); + + mv88e6xxx_rxtstamp_work(chip, ps); + } + + return restart ? 1 : -1; +} + +bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port, + struct sk_buff *clone, unsigned int type) +{ + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; + __be16 *seq_ptr; + u8 *hdr; + + if (!(skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP)) + return false; + + hdr = mv88e6xxx_should_tstamp(chip, port, clone, type); + if (!hdr) + return false; + + seq_ptr = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID); + + if (test_and_set_bit_lock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, + &ps->state)) + return false; + + ps->tx_skb = clone; + ps->tx_tstamp_start = jiffies; + ps->tx_seq_id = be16_to_cpup(seq_ptr); + + ptp_schedule_worker(chip->ptp_clock, 0); + return true; +} + +static int mv88e6xxx_hwtstamp_port_setup(struct mv88e6xxx_chip *chip, int port) +{ + struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; + + ps->port_id = port; + + skb_queue_head_init(&ps->rx_queue); + skb_queue_head_init(&ps->rx_queue2); + + return mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0, + MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP); +} + +int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip) +{ + int err; + int i; + + /* Disable timestamping on all ports. */ + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { + err = mv88e6xxx_hwtstamp_port_setup(chip, i); + if (err) + return err; + } + + /* MV88E6XXX_PTP_MSG_TYPE is a mask of PTP message types to + * timestamp. This affects all ports that have timestamping enabled, + * but the timestamp config is per-port; thus we configure all events + * here and only support the HWTSTAMP_FILTER_*_EVENT filter types. + */ + err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_MSGTYPE, + MV88E6XXX_PTP_MSGTYPE_ALL_EVENT); + if (err) + return err; + + /* Use ARRIVAL1 for peer delay response messages. */ + err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_TS_ARRIVAL_PTR, + MV88E6XXX_PTP_MSGTYPE_PDLAY_RES); + if (err) + return err; + + /* 88E6341 devices default to timestamping at the PHY, but this has + * a hardware issue that results in unreliable timestamps. Force + * these devices to timestamp at the MAC. + */ + if (chip->info->family == MV88E6XXX_FAMILY_6341) { + u16 val = MV88E6341_PTP_CFG_UPDATE | + MV88E6341_PTP_CFG_MODE_IDX | + MV88E6341_PTP_CFG_MODE_TS_AT_MAC; + err = mv88e6xxx_ptp_write(chip, MV88E6341_PTP_CFG, val); + if (err) + return err; + } + + return 0; +} + +void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip) +{ +} diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.h b/drivers/net/dsa/mv88e6xxx/hwtstamp.h new file mode 100644 index 000000000000..bc71c9212a08 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.h @@ -0,0 +1,172 @@ +/* + * Marvell 88E6xxx Switch hardware timestamping support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2017 National Instruments + * Erik Hons <erik.hons@ni.com> + * Brandon Streiff <brandon.streiff@ni.com> + * Dane Wagner <dane.wagner@ni.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _MV88E6XXX_HWTSTAMP_H +#define _MV88E6XXX_HWTSTAMP_H + +#include "chip.h" + +/* Global PTP registers */ +/* Offset 0x00: PTP EtherType */ +#define MV88E6XXX_PTP_ETHERTYPE 0x00 + +/* Offset 0x01: Message Type Timestamp Enables */ +#define MV88E6XXX_PTP_MSGTYPE 0x01 +#define MV88E6XXX_PTP_MSGTYPE_SYNC 0x0001 +#define MV88E6XXX_PTP_MSGTYPE_DELAY_REQ 0x0002 +#define MV88E6XXX_PTP_MSGTYPE_PDLAY_REQ 0x0004 +#define MV88E6XXX_PTP_MSGTYPE_PDLAY_RES 0x0008 +#define MV88E6XXX_PTP_MSGTYPE_ALL_EVENT 0x000f + +/* Offset 0x02: Timestamp Arrival Capture Pointers */ +#define MV88E6XXX_PTP_TS_ARRIVAL_PTR 0x02 + +/* Offset 0x07: PTP Global Configuration */ +#define MV88E6341_PTP_CFG 0x07 +#define MV88E6341_PTP_CFG_UPDATE 0x8000 +#define MV88E6341_PTP_CFG_IDX_MASK 0x7f00 +#define MV88E6341_PTP_CFG_DATA_MASK 0x00ff +#define MV88E6341_PTP_CFG_MODE_IDX 0x0 +#define MV88E6341_PTP_CFG_MODE_TS_AT_PHY 0x00 +#define MV88E6341_PTP_CFG_MODE_TS_AT_MAC 0x80 + +/* Offset 0x08: PTP Interrupt Status */ +#define MV88E6XXX_PTP_IRQ_STATUS 0x08 + +/* Per-Port PTP Registers */ +/* Offset 0x00: PTP Configuration 0 */ +#define MV88E6XXX_PORT_PTP_CFG0 0x00 +#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_SHIFT 12 +#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_MASK 0xf000 +#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_1588 0x0000 +#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_8021AS 0x1000 +#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH 0x0800 +#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_OVERWRITE 0x0002 +#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP 0x0001 + +/* Offset 0x01: PTP Configuration 1 */ +#define MV88E6XXX_PORT_PTP_CFG1 0x01 + +/* Offset 0x02: PTP Configuration 2 */ +#define MV88E6XXX_PORT_PTP_CFG2 0x02 +#define MV88E6XXX_PORT_PTP_CFG2_EMBED_ARRIVAL 0x1000 +#define MV88E6XXX_PORT_PTP_CFG2_DEP_IRQ_EN 0x0002 +#define MV88E6XXX_PORT_PTP_CFG2_ARR_IRQ_EN 0x0001 + +/* Offset 0x03: PTP LED Configuration */ +#define MV88E6XXX_PORT_PTP_LED_CFG 0x03 + +/* Offset 0x08: PTP Arrival 0 Status */ +#define MV88E6XXX_PORT_PTP_ARR0_STS 0x08 + +/* Offset 0x09/0x0A: PTP Arrival 0 Time */ +#define MV88E6XXX_PORT_PTP_ARR0_TIME_LO 0x09 +#define MV88E6XXX_PORT_PTP_ARR0_TIME_HI 0x0a + +/* Offset 0x0B: PTP Arrival 0 Sequence ID */ +#define MV88E6XXX_PORT_PTP_ARR0_SEQID 0x0b + +/* Offset 0x0C: PTP Arrival 1 Status */ +#define MV88E6XXX_PORT_PTP_ARR1_STS 0x0c + +/* Offset 0x0D/0x0E: PTP Arrival 1 Time */ +#define MV88E6XXX_PORT_PTP_ARR1_TIME_LO 0x0d +#define MV88E6XXX_PORT_PTP_ARR1_TIME_HI 0x0e + +/* Offset 0x0F: PTP Arrival 1 Sequence ID */ +#define MV88E6XXX_PORT_PTP_ARR1_SEQID 0x0f + +/* Offset 0x10: PTP Departure Status */ +#define MV88E6XXX_PORT_PTP_DEP_STS 0x10 + +/* Offset 0x11/0x12: PTP Deperture Time */ +#define MV88E6XXX_PORT_PTP_DEP_TIME_LO 0x11 +#define MV88E6XXX_PORT_PTP_DEP_TIME_HI 0x12 + +/* Offset 0x13: PTP Departure Sequence ID */ +#define MV88E6XXX_PORT_PTP_DEP_SEQID 0x13 + +/* Status fields for arrival and depature timestamp status registers */ +#define MV88E6XXX_PTP_TS_STATUS_MASK 0x0006 +#define MV88E6XXX_PTP_TS_STATUS_NORMAL 0x0000 +#define MV88E6XXX_PTP_TS_STATUS_OVERWITTEN 0x0002 +#define MV88E6XXX_PTP_TS_STATUS_DISCARDED 0x0004 +#define MV88E6XXX_PTP_TS_VALID 0x0001 + +#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP + +int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port, + struct ifreq *ifr); +int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port, + struct ifreq *ifr); + +bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port, + struct sk_buff *clone, unsigned int type); +bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port, + struct sk_buff *clone, unsigned int type); + +int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port, + struct ethtool_ts_info *info); + +int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip); +void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip); + +#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */ + +static inline int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, + int port, struct ifreq *ifr) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, + int port, struct ifreq *ifr) +{ + return -EOPNOTSUPP; +} + +static inline bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port, + struct sk_buff *clone, + unsigned int type) +{ + return false; +} + +static inline bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port, + struct sk_buff *clone, + unsigned int type) +{ + return false; +} + +static inline int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port, + struct ethtool_ts_info *info) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip) +{ + return 0; +} + +static inline void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip) +{ +} + +#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */ + +#endif /* _MV88E6XXX_HWTSTAMP_H */ diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c new file mode 100644 index 000000000000..bd85e2c390e1 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/ptp.c @@ -0,0 +1,381 @@ +/* + * Marvell 88E6xxx Switch PTP support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2017 National Instruments + * Erik Hons <erik.hons@ni.com> + * Brandon Streiff <brandon.streiff@ni.com> + * Dane Wagner <dane.wagner@ni.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "chip.h" +#include "global2.h" +#include "ptp.h" + +/* Raw timestamps are in units of 8-ns clock periods. */ +#define CC_SHIFT 28 +#define CC_MULT (8 << CC_SHIFT) +#define CC_MULT_NUM (1 << 9) +#define CC_MULT_DEM 15625ULL + +#define TAI_EVENT_WORK_INTERVAL msecs_to_jiffies(100) + +#define cc_to_chip(cc) container_of(cc, struct mv88e6xxx_chip, tstamp_cc) +#define dw_overflow_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \ + overflow_work) +#define dw_tai_event_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \ + tai_event_work) + +static int mv88e6xxx_tai_read(struct mv88e6xxx_chip *chip, int addr, + u16 *data, int len) +{ + if (!chip->info->ops->avb_ops->tai_read) + return -EOPNOTSUPP; + + return chip->info->ops->avb_ops->tai_read(chip, addr, data, len); +} + +static int mv88e6xxx_tai_write(struct mv88e6xxx_chip *chip, int addr, u16 data) +{ + if (!chip->info->ops->avb_ops->tai_write) + return -EOPNOTSUPP; + + return chip->info->ops->avb_ops->tai_write(chip, addr, data); +} + +/* TODO: places where this are called should be using pinctrl */ +static int mv88e6xxx_set_gpio_func(struct mv88e6xxx_chip *chip, int pin, + int func, int input) +{ + int err; + + if (!chip->info->ops->gpio_ops) + return -EOPNOTSUPP; + + err = chip->info->ops->gpio_ops->set_dir(chip, pin, input); + if (err) + return err; + + return chip->info->ops->gpio_ops->set_pctl(chip, pin, func); +} + +static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc) +{ + struct mv88e6xxx_chip *chip = cc_to_chip(cc); + u16 phc_time[2]; + int err; + + err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_TIME_LO, phc_time, + ARRAY_SIZE(phc_time)); + if (err) + return 0; + else + return ((u32)phc_time[1] << 16) | phc_time[0]; +} + +/* mv88e6xxx_config_eventcap - configure TAI event capture + * @event: PTP_CLOCK_PPS (internal) or PTP_CLOCK_EXTTS (external) + * @rising: zero for falling-edge trigger, else rising-edge trigger + * + * This will also reset the capture sequence counter. + */ +static int mv88e6xxx_config_eventcap(struct mv88e6xxx_chip *chip, int event, + int rising) +{ + u16 global_config; + u16 cap_config; + int err; + + chip->evcap_config = MV88E6XXX_TAI_CFG_CAP_OVERWRITE | + MV88E6XXX_TAI_CFG_CAP_CTR_START; + if (!rising) + chip->evcap_config |= MV88E6XXX_TAI_CFG_EVREQ_FALLING; + + global_config = (chip->evcap_config | chip->trig_config); + err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_CFG, global_config); + if (err) + return err; + + if (event == PTP_CLOCK_PPS) { + cap_config = MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG; + } else if (event == PTP_CLOCK_EXTTS) { + /* if STATUS_CAP_TRIG is unset we capture PTP_EVREQ events */ + cap_config = 0; + } else { + return -EINVAL; + } + + /* Write the capture config; this also clears the capture counter */ + err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS, + cap_config); + + return err; +} + +static void mv88e6xxx_tai_event_work(struct work_struct *ugly) +{ + struct delayed_work *dw = to_delayed_work(ugly); + struct mv88e6xxx_chip *chip = dw_tai_event_to_chip(dw); + struct ptp_clock_event ev; + u16 status[4]; + u32 raw_ts; + int err; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_EVENT_STATUS, + status, ARRAY_SIZE(status)); + mutex_unlock(&chip->reg_lock); + + if (err) { + dev_err(chip->dev, "failed to read TAI status register\n"); + return; + } + if (status[0] & MV88E6XXX_TAI_EVENT_STATUS_ERROR) { + dev_warn(chip->dev, "missed event capture\n"); + return; + } + if (!(status[0] & MV88E6XXX_TAI_EVENT_STATUS_VALID)) + goto out; + + raw_ts = ((u32)status[2] << 16) | status[1]; + + /* Clear the valid bit so the next timestamp can come in */ + status[0] &= ~MV88E6XXX_TAI_EVENT_STATUS_VALID; + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS, status[0]); + mutex_unlock(&chip->reg_lock); + + /* This is an external timestamp */ + ev.type = PTP_CLOCK_EXTTS; + + /* We only have one timestamping channel. */ + ev.index = 0; + mutex_lock(&chip->reg_lock); + ev.timestamp = timecounter_cyc2time(&chip->tstamp_tc, raw_ts); + mutex_unlock(&chip->reg_lock); + + ptp_clock_event(chip->ptp_clock, &ev); +out: + schedule_delayed_work(&chip->tai_event_work, TAI_EVENT_WORK_INTERVAL); +} + +static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct mv88e6xxx_chip *chip = ptp_to_chip(ptp); + int neg_adj = 0; + u32 diff, mult; + u64 adj; + + if (scaled_ppm < 0) { + neg_adj = 1; + scaled_ppm = -scaled_ppm; + } + mult = CC_MULT; + adj = CC_MULT_NUM; + adj *= scaled_ppm; + diff = div_u64(adj, CC_MULT_DEM); + + mutex_lock(&chip->reg_lock); + + timecounter_read(&chip->tstamp_tc); + chip->tstamp_cc.mult = neg_adj ? mult - diff : mult + diff; + + mutex_unlock(&chip->reg_lock); + + return 0; +} + +static int mv88e6xxx_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mv88e6xxx_chip *chip = ptp_to_chip(ptp); + + mutex_lock(&chip->reg_lock); + timecounter_adjtime(&chip->tstamp_tc, delta); + mutex_unlock(&chip->reg_lock); + + return 0; +} + +static int mv88e6xxx_ptp_gettime(struct ptp_clock_info *ptp, + struct timespec64 *ts) +{ + struct mv88e6xxx_chip *chip = ptp_to_chip(ptp); + u64 ns; + + mutex_lock(&chip->reg_lock); + ns = timecounter_read(&chip->tstamp_tc); + mutex_unlock(&chip->reg_lock); + + *ts = ns_to_timespec64(ns); + + return 0; +} + +static int mv88e6xxx_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct mv88e6xxx_chip *chip = ptp_to_chip(ptp); + u64 ns; + + ns = timespec64_to_ns(ts); + + mutex_lock(&chip->reg_lock); + timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc, ns); + mutex_unlock(&chip->reg_lock); + + return 0; +} + +static int mv88e6xxx_ptp_enable_extts(struct mv88e6xxx_chip *chip, + struct ptp_clock_request *rq, int on) +{ + int rising = (rq->extts.flags & PTP_RISING_EDGE); + int func; + int pin; + int err; + + pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index); + + if (pin < 0) + return -EBUSY; + + mutex_lock(&chip->reg_lock); + + if (on) { + func = MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ; + + err = mv88e6xxx_set_gpio_func(chip, pin, func, true); + if (err) + goto out; + + schedule_delayed_work(&chip->tai_event_work, + TAI_EVENT_WORK_INTERVAL); + + err = mv88e6xxx_config_eventcap(chip, PTP_CLOCK_EXTTS, rising); + } else { + func = MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO; + + err = mv88e6xxx_set_gpio_func(chip, pin, func, true); + + cancel_delayed_work_sync(&chip->tai_event_work); + } + +out: + mutex_unlock(&chip->reg_lock); + + return err; +} + +static int mv88e6xxx_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + struct mv88e6xxx_chip *chip = ptp_to_chip(ptp); + + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + return mv88e6xxx_ptp_enable_extts(chip, rq, on); + default: + return -EOPNOTSUPP; + } +} + +static int mv88e6xxx_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + switch (func) { + case PTP_PF_NONE: + case PTP_PF_EXTTS: + break; + case PTP_PF_PEROUT: + case PTP_PF_PHYSYNC: + return -EOPNOTSUPP; + } + return 0; +} + +/* With a 125MHz input clock, the 32-bit timestamp counter overflows in ~34.3 + * seconds; this task forces periodic reads so that we don't miss any. + */ +#define MV88E6XXX_TAI_OVERFLOW_PERIOD (HZ * 16) +static void mv88e6xxx_ptp_overflow_check(struct work_struct *work) +{ + struct delayed_work *dw = to_delayed_work(work); + struct mv88e6xxx_chip *chip = dw_overflow_to_chip(dw); + struct timespec64 ts; + + mv88e6xxx_ptp_gettime(&chip->ptp_clock_info, &ts); + + schedule_delayed_work(&chip->overflow_work, + MV88E6XXX_TAI_OVERFLOW_PERIOD); +} + +int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip) +{ + int i; + + /* Set up the cycle counter */ + memset(&chip->tstamp_cc, 0, sizeof(chip->tstamp_cc)); + chip->tstamp_cc.read = mv88e6xxx_ptp_clock_read; + chip->tstamp_cc.mask = CYCLECOUNTER_MASK(32); + chip->tstamp_cc.mult = CC_MULT; + chip->tstamp_cc.shift = CC_SHIFT; + + timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc, + ktime_to_ns(ktime_get_real())); + + INIT_DELAYED_WORK(&chip->overflow_work, mv88e6xxx_ptp_overflow_check); + INIT_DELAYED_WORK(&chip->tai_event_work, mv88e6xxx_tai_event_work); + + chip->ptp_clock_info.owner = THIS_MODULE; + snprintf(chip->ptp_clock_info.name, sizeof(chip->ptp_clock_info.name), + dev_name(chip->dev)); + chip->ptp_clock_info.max_adj = 1000000; + + chip->ptp_clock_info.n_ext_ts = 1; + chip->ptp_clock_info.n_per_out = 0; + chip->ptp_clock_info.n_pins = mv88e6xxx_num_gpio(chip); + chip->ptp_clock_info.pps = 0; + + for (i = 0; i < chip->ptp_clock_info.n_pins; ++i) { + struct ptp_pin_desc *ppd = &chip->pin_config[i]; + + snprintf(ppd->name, sizeof(ppd->name), "mv88e6xxx_gpio%d", i); + ppd->index = i; + ppd->func = PTP_PF_NONE; + } + chip->ptp_clock_info.pin_config = chip->pin_config; + + chip->ptp_clock_info.adjfine = mv88e6xxx_ptp_adjfine; + chip->ptp_clock_info.adjtime = mv88e6xxx_ptp_adjtime; + chip->ptp_clock_info.gettime64 = mv88e6xxx_ptp_gettime; + chip->ptp_clock_info.settime64 = mv88e6xxx_ptp_settime; + chip->ptp_clock_info.enable = mv88e6xxx_ptp_enable; + chip->ptp_clock_info.verify = mv88e6xxx_ptp_verify; + chip->ptp_clock_info.do_aux_work = mv88e6xxx_hwtstamp_work; + + chip->ptp_clock = ptp_clock_register(&chip->ptp_clock_info, chip->dev); + if (IS_ERR(chip->ptp_clock)) + return PTR_ERR(chip->ptp_clock); + + schedule_delayed_work(&chip->overflow_work, + MV88E6XXX_TAI_OVERFLOW_PERIOD); + + return 0; +} + +void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip) +{ + if (chip->ptp_clock) { + cancel_delayed_work_sync(&chip->overflow_work); + cancel_delayed_work_sync(&chip->tai_event_work); + + ptp_clock_unregister(chip->ptp_clock); + chip->ptp_clock = NULL; + } +} diff --git a/drivers/net/dsa/mv88e6xxx/ptp.h b/drivers/net/dsa/mv88e6xxx/ptp.h new file mode 100644 index 000000000000..992818ade746 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/ptp.h @@ -0,0 +1,108 @@ +/* + * Marvell 88E6xxx Switch PTP support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2017 National Instruments + * Erik Hons <erik.hons@ni.com> + * Brandon Streiff <brandon.streiff@ni.com> + * Dane Wagner <dane.wagner@ni.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _MV88E6XXX_PTP_H +#define _MV88E6XXX_PTP_H + +#include "chip.h" + +/* Offset 0x00: TAI Global Config */ +#define MV88E6XXX_TAI_CFG 0x00 +#define MV88E6XXX_TAI_CFG_CAP_OVERWRITE 0x8000 +#define MV88E6XXX_TAI_CFG_CAP_CTR_START 0x4000 +#define MV88E6XXX_TAI_CFG_EVREQ_FALLING 0x2000 +#define MV88E6XXX_TAI_CFG_TRIG_ACTIVE_LO 0x1000 +#define MV88E6XXX_TAI_CFG_IRL_ENABLE 0x0400 +#define MV88E6XXX_TAI_CFG_TRIG_IRQ_EN 0x0200 +#define MV88E6XXX_TAI_CFG_EVREQ_IRQ_EN 0x0100 +#define MV88E6XXX_TAI_CFG_TRIG_LOCK 0x0080 +#define MV88E6XXX_TAI_CFG_BLOCK_UPDATE 0x0008 +#define MV88E6XXX_TAI_CFG_MULTI_PTP 0x0004 +#define MV88E6XXX_TAI_CFG_TRIG_MODE_ONESHOT 0x0002 +#define MV88E6XXX_TAI_CFG_TRIG_ENABLE 0x0001 + +/* Offset 0x01: Timestamp Clock Period (ps) */ +#define MV88E6XXX_TAI_CLOCK_PERIOD 0x01 + +/* Offset 0x02/0x03: Trigger Generation Amount */ +#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_LO 0x02 +#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_HI 0x03 + +/* Offset 0x04: Clock Compensation */ +#define MV88E6XXX_TAI_TRIG_CLOCK_COMP 0x04 + +/* Offset 0x05: Trigger Configuration */ +#define MV88E6XXX_TAI_TRIG_CFG 0x05 + +/* Offset 0x06: Ingress Rate Limiter Clock Generation Amount */ +#define MV88E6XXX_TAI_IRL_AMOUNT 0x06 + +/* Offset 0x07: Ingress Rate Limiter Compensation */ +#define MV88E6XXX_TAI_IRL_COMP 0x07 + +/* Offset 0x08: Ingress Rate Limiter Compensation */ +#define MV88E6XXX_TAI_IRL_COMP_PS 0x08 + +/* Offset 0x09: Event Status */ +#define MV88E6XXX_TAI_EVENT_STATUS 0x09 +#define MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG 0x4000 +#define MV88E6XXX_TAI_EVENT_STATUS_ERROR 0x0200 +#define MV88E6XXX_TAI_EVENT_STATUS_VALID 0x0100 +#define MV88E6XXX_TAI_EVENT_STATUS_CTR_MASK 0x00ff + +/* Offset 0x0A/0x0B: Event Time */ +#define MV88E6XXX_TAI_EVENT_TIME_LO 0x0a +#define MV88E6XXX_TAI_EVENT_TYPE_HI 0x0b + +/* Offset 0x0E/0x0F: PTP Global Time */ +#define MV88E6XXX_TAI_TIME_LO 0x0e +#define MV88E6XXX_TAI_TIME_HI 0x0f + +/* Offset 0x10/0x11: Trig Generation Time */ +#define MV88E6XXX_TAI_TRIG_TIME_LO 0x10 +#define MV88E6XXX_TAI_TRIG_TIME_HI 0x11 + +/* Offset 0x12: Lock Status */ +#define MV88E6XXX_TAI_LOCK_STATUS 0x12 + +#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP + +long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp); +int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip); +void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip); + +#define ptp_to_chip(ptp) container_of(ptp, struct mv88e6xxx_chip, \ + ptp_clock_info) + +#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */ + +static long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp) +{ + return -1; +} + +static inline int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip) +{ + return 0; +} + +static void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip) +{ +} + +#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */ + +#endif /* _MV88E6XXX_PTP_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 00a1d2d13169..9da6f57901a9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -878,6 +878,86 @@ static int cudbg_get_payload_range(struct adapter *padap, u8 mem_type, &payload->start, &payload->end); } +static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win, + int mtype, u32 addr, u32 len, void *hbuf) +{ + u32 win_pf, memoffset, mem_aperture, mem_base; + struct adapter *adap = pdbg_init->adap; + u32 pos, offset, resid; + u32 *res_buf; + u64 *buf; + int ret; + + /* Argument sanity checks ... + */ + if (addr & 0x3 || (uintptr_t)hbuf & 0x3) + return -EINVAL; + + buf = (u64 *)hbuf; + + /* Try to do 64-bit reads. Residual will be handled later. */ + resid = len & 0x7; + len -= resid; + + ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base, + &mem_aperture); + if (ret) + return ret; + + addr = addr + memoffset; + win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf); + + pos = addr & ~(mem_aperture - 1); + offset = addr - pos; + + /* Set up initial PCI-E Memory Window to cover the start of our + * transfer. + */ + t4_memory_update_win(adap, win, pos | win_pf); + + /* Transfer data from the adapter */ + while (len > 0) { + *buf++ = le64_to_cpu((__force __le64) + t4_read_reg64(adap, mem_base + offset)); + offset += sizeof(u64); + len -= sizeof(u64); + + /* If we've reached the end of our current window aperture, + * move the PCI-E Memory Window on to the next. + */ + if (offset == mem_aperture) { + pos += mem_aperture; + offset = 0; + t4_memory_update_win(adap, win, pos | win_pf); + } + } + + res_buf = (u32 *)buf; + /* Read residual in 32-bit multiples */ + while (resid > sizeof(u32)) { + *res_buf++ = le32_to_cpu((__force __le32) + t4_read_reg(adap, mem_base + offset)); + offset += sizeof(u32); + resid -= sizeof(u32); + + /* If we've reached the end of our current window aperture, + * move the PCI-E Memory Window on to the next. + */ + if (offset == mem_aperture) { + pos += mem_aperture; + offset = 0; + t4_memory_update_win(adap, win, pos | win_pf); + } + } + + /* Transfer residual < 32-bits */ + if (resid) + t4_memory_rw_residual(adap, resid, mem_base + offset, + (u8 *)res_buf, T4_MEMORY_READ); + + return 0; +} + #define CUDBG_YIELD_ITERATION 256 static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, @@ -937,10 +1017,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, goto skip_read; spin_lock(&padap->win0_lock); - rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type, - bytes_read, bytes, - (__be32 *)temp_buff.data, - 1); + rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type, + bytes_read, bytes, temp_buff.data); spin_unlock(&padap->win0_lock); if (rc) { cudbg_err->sys_err = rc; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 9040e13ce4b7..d3fa53db61ee 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1488,6 +1488,11 @@ u32 t4_read_pcie_cfg4(struct adapter *adap, int reg); u32 t4_get_util_window(struct adapter *adap); void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window); +int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off, + u32 *mem_base, u32 *mem_aperture); +void t4_memory_update_win(struct adapter *adap, int win, u32 addr); +void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf, + int dir); #define T4_MEMORY_WRITE 0 #define T4_MEMORY_READ 1 int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 920bccd6bc40..bd41f93f73ed 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -484,6 +484,117 @@ static int t4_edc_err_read(struct adapter *adap, int idx) } /** + * t4_memory_rw_init - Get memory window relative offset, base, and size. + * @adap: the adapter + * @win: PCI-E Memory Window to use + * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC + * @mem_off: memory relative offset with respect to @mtype. + * @mem_base: configured memory base address. + * @mem_aperture: configured memory window aperture. + * + * Get the configured memory window's relative offset, base, and size. + */ +int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off, + u32 *mem_base, u32 *mem_aperture) +{ + u32 edc_size, mc_size, mem_reg; + + /* Offset into the region of memory which is being accessed + * MEM_EDC0 = 0 + * MEM_EDC1 = 1 + * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller + * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5) + * MEM_HMA = 4 + */ + edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A)); + if (mtype == MEM_HMA) { + *mem_off = 2 * (edc_size * 1024 * 1024); + } else if (mtype != MEM_MC1) { + *mem_off = (mtype * (edc_size * 1024 * 1024)); + } else { + mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap, + MA_EXT_MEMORY0_BAR_A)); + *mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024; + } + + /* Each PCI-E Memory Window is programmed with a window size -- or + * "aperture" -- which controls the granularity of its mapping onto + * adapter memory. We need to grab that aperture in order to know + * how to use the specified window. The window is also programmed + * with the base address of the Memory Window in BAR0's address + * space. For T4 this is an absolute PCI-E Bus Address. For T5 + * the address is relative to BAR0. + */ + mem_reg = t4_read_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, + win)); + /* a dead adapter will return 0xffffffff for PIO reads */ + if (mem_reg == 0xffffffff) + return -ENXIO; + + *mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X); + *mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X; + if (is_t4(adap->params.chip)) + *mem_base -= adap->t4_bar0; + + return 0; +} + +/** + * t4_memory_update_win - Move memory window to specified address. + * @adap: the adapter + * @win: PCI-E Memory Window to use + * @addr: location to move. + * + * Move memory window to specified address. + */ +void t4_memory_update_win(struct adapter *adap, int win, u32 addr) +{ + t4_write_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win), + addr); + /* Read it back to ensure that changes propagate before we + * attempt to use the new value. + */ + t4_read_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win)); +} + +/** + * t4_memory_rw_residual - Read/Write residual data. + * @adap: the adapter + * @off: relative offset within residual to start read/write. + * @addr: address within indicated memory type. + * @buf: host memory buffer + * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0) + * + * Read/Write residual data less than 32-bits. + */ +void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf, + int dir) +{ + union { + u32 word; + char byte[4]; + } last; + unsigned char *bp; + int i; + + if (dir == T4_MEMORY_READ) { + last.word = le32_to_cpu((__force __le32) + t4_read_reg(adap, addr)); + for (bp = (unsigned char *)buf, i = off; i < 4; i++) + bp[i] = last.byte[i]; + } else { + last.word = *buf; + for (i = off; i < 4; i++) + last.byte[i] = 0; + t4_write_reg(adap, addr, + (__force u32)cpu_to_le32(last.word)); + } +} + +/** * t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window * @adap: the adapter * @win: PCI-E Memory Window to use @@ -504,8 +615,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, void *hbuf, int dir) { u32 pos, offset, resid, memoffset; - u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base; + u32 win_pf, mem_aperture, mem_base; u32 *buf; + int ret; /* Argument sanity checks ... */ @@ -521,59 +633,26 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, resid = len & 0x3; len -= resid; - /* Offset into the region of memory which is being accessed - * MEM_EDC0 = 0 - * MEM_EDC1 = 1 - * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller - * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5) - * MEM_HMA = 4 - */ - edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A)); - if (mtype == MEM_HMA) { - memoffset = 2 * (edc_size * 1024 * 1024); - } else if (mtype != MEM_MC1) { - memoffset = (mtype * (edc_size * 1024 * 1024)); - } else { - mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap, - MA_EXT_MEMORY0_BAR_A)); - memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024; - } + ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base, + &mem_aperture); + if (ret) + return ret; /* Determine the PCIE_MEM_ACCESS_OFFSET */ addr = addr + memoffset; - /* Each PCI-E Memory Window is programmed with a window size -- or - * "aperture" -- which controls the granularity of its mapping onto - * adapter memory. We need to grab that aperture in order to know - * how to use the specified window. The window is also programmed - * with the base address of the Memory Window in BAR0's address - * space. For T4 this is an absolute PCI-E Bus Address. For T5 - * the address is relative to BAR0. - */ - mem_reg = t4_read_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, - win)); - mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X); - mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X; - if (is_t4(adap->params.chip)) - mem_base -= adap->t4_bar0; win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf); /* Calculate our initial PCI-E Memory Window Position and Offset into * that Window. */ - pos = addr & ~(mem_aperture-1); + pos = addr & ~(mem_aperture - 1); offset = addr - pos; /* Set up initial PCI-E Memory Window to cover the start of our - * transfer. (Read it back to ensure that changes propagate before we - * attempt to use the new value.) + * transfer. */ - t4_write_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win), - pos | win_pf); - t4_read_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win)); + t4_memory_update_win(adap, win, pos | win_pf); /* Transfer data to/from the adapter as long as there's an integral * number of 32-bit transfers to complete. @@ -628,12 +707,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, if (offset == mem_aperture) { pos += mem_aperture; offset = 0; - t4_write_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, - win), pos | win_pf); - t4_read_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, - win)); + t4_memory_update_win(adap, win, pos | win_pf); } } @@ -642,28 +716,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, * residual amount. The PCI-E Memory Window has already been moved * above (if necessary) to cover this final transfer. */ - if (resid) { - union { - u32 word; - char byte[4]; - } last; - unsigned char *bp; - int i; - - if (dir == T4_MEMORY_READ) { - last.word = le32_to_cpu( - (__force __le32)t4_read_reg(adap, - mem_base + offset)); - for (bp = (unsigned char *)buf, i = resid; i < 4; i++) - bp[i] = last.byte[i]; - } else { - last.word = *buf; - for (i = resid; i < 4; i++) - last.byte[i] = 0; - t4_write_reg(adap, mem_base + offset, - (__force u32)cpu_to_le32(last.word)); - } - } + if (resid) + t4_memory_rw_residual(adap, resid, mem_base + offset, + (u8 *)buf, dir); return 0; } diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 7caa8da48421..a998c36c5e61 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2766,7 +2766,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) priv->channel = (u16)channel; - /* Start a thread that will walk the CPUs with affine portals + /* Walk the CPUs with affine portals * and add this pool channel to each's dequeue mask. */ dpaa_eth_add_channel(priv->channel); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index faea674094b9..85306d1b2acf 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -211,7 +211,7 @@ static int dpaa_set_pauseparam(struct net_device *net_dev, if (epause->rx_pause) newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause; if (epause->tx_pause) - newadv |= ADVERTISED_Asym_Pause; + newadv ^= ADVERTISED_Asym_Pause; oldadv = phydev->advertising & (ADVERTISED_Pause | ADVERTISED_Asym_Pause); diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 46e9f4e0a02c..36d9401a6258 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -507,6 +507,7 @@ struct i40e_pf { #define I40E_HW_STOP_FW_LLDP BIT(16) #define I40E_HW_PORT_ID_VALID BIT(17) #define I40E_HW_RESTART_AUTONEG BIT(18) +#define I40E_HW_STOPPABLE_FW_LLDP BIT(19) u64 flags; #define I40E_FLAG_RX_CSUM_ENABLED BIT_ULL(0) @@ -824,6 +825,7 @@ struct i40e_q_vector { struct i40e_ring_container rx; struct i40e_ring_container tx; + u8 itr_countdown; /* when 0 should adjust adaptive ITR */ u8 num_ringpairs; /* total number of ring pairs in vector */ cpumask_t affinity_mask; @@ -832,8 +834,6 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; -#define ITR_COUNTDOWN_START 100 - u8 itr_countdown; /* when 0 should adjust ITR */ } ____cacheline_internodealigned_in_smp; /* lan device */ @@ -1109,4 +1109,10 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate); +int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add); +int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add); #endif /* _I40E_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 4c3b4243cf65..b829fd365693 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -155,8 +155,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) dev_info(&pf->pdev->dev, " vlan_features = 0x%08lx\n", (unsigned long int)nd->vlan_features); } - dev_info(&pf->pdev->dev, - " vlgrp: & = %p\n", vsi->active_vlans); + dev_info(&pf->pdev->dev, " active_vlans is %s\n", + vsi->active_vlans ? "<valid>" : "<null>"); dev_info(&pf->pdev->dev, " flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n", vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags); @@ -270,14 +270,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) continue; dev_info(&pf->pdev->dev, - " rx_rings[%i]: desc = %p\n", - i, rx_ring->desc); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n", - i, rx_ring->dev, - rx_ring->netdev, - rx_ring->rx_bi); - dev_info(&pf->pdev->dev, " rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n", i, *rx_ring->state, rx_ring->queue_index, @@ -307,17 +299,12 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) rx_ring->rx_stats.realloc_count, rx_ring->rx_stats.page_reuse_count); dev_info(&pf->pdev->dev, - " rx_rings[%i]: size = %i, dma = 0x%08lx\n", - i, rx_ring->size, - (unsigned long int)rx_ring->dma); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: vsi = %p, q_vector = %p\n", - i, rx_ring->vsi, - rx_ring->q_vector); + " rx_rings[%i]: size = %i\n", + i, rx_ring->size); dev_info(&pf->pdev->dev, - " rx_rings[%i]: rx_itr_setting = %d (%s)\n", - i, rx_ring->rx_itr_setting, - ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed"); + " rx_rings[%i]: itr_setting = %d (%s)\n", + i, rx_ring->itr_setting, + ITR_IS_DYNAMIC(rx_ring->itr_setting) ? "dynamic" : "fixed"); } for (i = 0; i < vsi->num_queue_pairs; i++) { struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]); @@ -326,14 +313,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) continue; dev_info(&pf->pdev->dev, - " tx_rings[%i]: desc = %p\n", - i, tx_ring->desc); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n", - i, tx_ring->dev, - tx_ring->netdev, - tx_ring->tx_bi); - dev_info(&pf->pdev->dev, " tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n", i, *tx_ring->state, tx_ring->queue_index, @@ -355,20 +334,15 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) tx_ring->tx_stats.tx_busy, tx_ring->tx_stats.tx_done_old); dev_info(&pf->pdev->dev, - " tx_rings[%i]: size = %i, dma = 0x%08lx\n", - i, tx_ring->size, - (unsigned long int)tx_ring->dma); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: vsi = %p, q_vector = %p\n", - i, tx_ring->vsi, - tx_ring->q_vector); + " tx_rings[%i]: size = %i\n", + i, tx_ring->size); dev_info(&pf->pdev->dev, " tx_rings[%i]: DCB tc = %d\n", i, tx_ring->dcb_tc); dev_info(&pf->pdev->dev, - " tx_rings[%i]: tx_itr_setting = %d (%s)\n", - i, tx_ring->tx_itr_setting, - ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed"); + " tx_rings[%i]: itr_setting = %d (%s)\n", + i, tx_ring->itr_setting, + ITR_IS_DYNAMIC(tx_ring->itr_setting) ? "dynamic" : "fixed"); } rcu_read_unlock(); dev_info(&pf->pdev->dev, @@ -466,8 +440,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) vsi->info.resp_reserved[6], vsi->info.resp_reserved[7], vsi->info.resp_reserved[8], vsi->info.resp_reserved[9], vsi->info.resp_reserved[10], vsi->info.resp_reserved[11]); - if (vsi->back) - dev_info(&pf->pdev->dev, " PF = %p\n", vsi->back); dev_info(&pf->pdev->dev, " idx = %d\n", vsi->idx); dev_info(&pf->pdev->dev, " tc_config: numtc = %d, enabled_tc = 0x%x\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 2f5bee713fef..0dcbbda164c4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2244,14 +2244,14 @@ static int __i40e_get_coalesce(struct net_device *netdev, rx_ring = vsi->rx_rings[queue]; tx_ring = vsi->tx_rings[queue]; - if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) + if (ITR_IS_DYNAMIC(rx_ring->itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) + if (ITR_IS_DYNAMIC(tx_ring->itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC; /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite @@ -2311,34 +2311,35 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_q_vector *q_vector; - u16 vector, intrl; + u16 intrl; intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit); - rx_ring->rx_itr_setting = ec->rx_coalesce_usecs; - tx_ring->tx_itr_setting = ec->tx_coalesce_usecs; + rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); + tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); if (ec->use_adaptive_rx_coalesce) - rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC; + rx_ring->itr_setting |= I40E_ITR_DYNAMIC; else - rx_ring->rx_itr_setting &= ~I40E_ITR_DYNAMIC; + rx_ring->itr_setting &= ~I40E_ITR_DYNAMIC; if (ec->use_adaptive_tx_coalesce) - tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC; + tx_ring->itr_setting |= I40E_ITR_DYNAMIC; else - tx_ring->tx_itr_setting &= ~I40E_ITR_DYNAMIC; + tx_ring->itr_setting &= ~I40E_ITR_DYNAMIC; q_vector = rx_ring->q_vector; - q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector = tx_ring->q_vector; - q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); - wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); + /* The interrupt handler itself will take care of programming + * the Tx and Rx ITR values based on the values we have entered + * into the q_vector, no need to write the values now. + */ + + wr32(hw, I40E_PFINT_RATEN(q_vector->reg_idx), intrl); i40e_flush(hw); } @@ -2364,11 +2365,11 @@ static int __i40e_set_coalesce(struct net_device *netdev, vsi->work_limit = ec->tx_max_coalesced_frames_irq; if (queue < 0) { - cur_rx_itr = vsi->rx_rings[0]->rx_itr_setting; - cur_tx_itr = vsi->tx_rings[0]->tx_itr_setting; + cur_rx_itr = vsi->rx_rings[0]->itr_setting; + cur_tx_itr = vsi->tx_rings[0]->itr_setting; } else if (queue < vsi->num_queue_pairs) { - cur_rx_itr = vsi->rx_rings[queue]->rx_itr_setting; - cur_tx_itr = vsi->tx_rings[queue]->tx_itr_setting; + cur_rx_itr = vsi->rx_rings[queue]->itr_setting; + cur_tx_itr = vsi->tx_rings[queue]->itr_setting; } else { netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", vsi->num_queue_pairs - 1); @@ -2396,7 +2397,7 @@ static int __i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - if (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)) { + if (ec->rx_coalesce_usecs > I40E_MAX_ITR) { netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); return -EINVAL; } @@ -2407,16 +2408,16 @@ static int __i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - if (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)) { + if (ec->tx_coalesce_usecs > I40E_MAX_ITR) { netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); return -EINVAL; } if (ec->use_adaptive_rx_coalesce && !cur_rx_itr) - ec->rx_coalesce_usecs = I40E_MIN_ITR << 1; + ec->rx_coalesce_usecs = I40E_MIN_ITR; if (ec->use_adaptive_tx_coalesce && !cur_tx_itr) - ec->tx_coalesce_usecs = I40E_MIN_ITR << 1; + ec->tx_coalesce_usecs = I40E_MIN_ITR; intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high); vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg); @@ -4406,6 +4407,8 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) } flags_complete: + changed_flags = orig_flags ^ new_flags; + /* Before we finalize any flag changes, we need to perform some * checks to ensure that the changes are supported and safe. */ @@ -4415,21 +4418,17 @@ flags_complete: !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE)) return -EOPNOTSUPP; - /* Disable FW LLDP not supported if NPAR active or if FW - * API version < 1.7 + /* If the driver detected FW LLDP was disabled on init, this flag could + * be set, however we do not support _changing_ the flag if NPAR is + * enabled or FW API version < 1.7. There are situations where older + * FW versions/NPAR enabled PFs could disable LLDP, however we _must_ + * not allow the user to enable/disable LLDP with this flag on + * unsupported FW versions. */ - if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) { - if (pf->hw.func_caps.npar_enable) { - dev_warn(&pf->pdev->dev, - "Unable to stop FW LLDP if NPAR active\n"); - return -EOPNOTSUPP; - } - - if (pf->hw.aq.api_maj_ver < 1 || - (pf->hw.aq.api_maj_ver == 1 && - pf->hw.aq.api_min_ver < 7)) { + if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { + if (!(pf->hw_features & I40E_HW_STOPPABLE_FW_LLDP)) { dev_warn(&pf->pdev->dev, - "FW ver does not support stopping FW LLDP\n"); + "Device does not support changing FW LLDP\n"); return -EOPNOTSUPP; } } @@ -4439,6 +4438,10 @@ flags_complete: * something else has modified the flags variable since we copied it * originally. We'll just punt with an error and log something in the * message buffer. + * + * This is the point of no return for this function. We need to have + * checked any discrepancies or misconfigurations and returned + * EOPNOTSUPP before updating pf->flags here. */ if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) { dev_warn(&pf->pdev->dev, @@ -4446,8 +4449,6 @@ flags_complete: return -EAGAIN; } - changed_flags = orig_flags ^ new_flags; - /* Process any additional changes needed as a result of flag changes. * The changed_flags value reflects the list of bits that were * changed in the code above. @@ -4479,6 +4480,12 @@ flags_complete: } } + if ((changed_flags & pf->flags & + I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) && + (pf->flags & I40E_FLAG_MFP_ENABLED)) + dev_warn(&pf->pdev->dev, + "Turning on link-down-on-close flag may affect other partitions\n"); + if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) { struct i40e_dcbx_config *dcbcfg; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e31adbc75f9c..f6d37456f3b7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -69,12 +69,6 @@ static int i40e_reset(struct i40e_pf *pf); static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); -static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, - struct i40e_cloud_filter *filter, - bool add); -static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, - struct i40e_cloud_filter *filter, - bool add); static int i40e_get_capabilities(struct i40e_pf *pf, enum i40e_admin_queue_opc list_type); @@ -215,8 +209,8 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) { dev_info(&pf->pdev->dev, - "param err: pile=%p needed=%d id=0x%04x\n", - pile, needed, id); + "param err: pile=%s needed=%d id=0x%04x\n", + pile ? "<valid>" : "<null>", needed, id); return -EINVAL; } @@ -1380,14 +1374,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, ether_addr_copy(f->macaddr, macaddr); f->vlan = vlan; - /* If we're in overflow promisc mode, set the state directly - * to failed, so we don't bother to try sending the filter - * to the hardware. - */ - if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state)) - f->state = I40E_FILTER_FAILED; - else - f->state = I40E_FILTER_NEW; + f->state = I40E_FILTER_NEW; INIT_HLIST_NODE(&f->hlist); key = i40e_addr_to_hkey(macaddr); @@ -2116,17 +2103,16 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, * @list: the list of filters to send to firmware * @add_head: Position in the add hlist * @num_add: the number of filters to add - * @promisc_change: set to true on exit if promiscuous mode was forced on * * Send a request to firmware via AdminQ to add a chunk of filters. Will set - * promisc_changed to true if the firmware has run out of space for more - * filters. + * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of + * space for more filters. */ static void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_aqc_add_macvlan_element_data *list, struct i40e_new_mac_filter *add_head, - int num_add, bool *promisc_changed) + int num_add) { struct i40e_hw *hw = &vsi->back->hw; int aq_err, fcnt; @@ -2136,7 +2122,6 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, fcnt = i40e_update_filter_state(num_add, list, add_head); if (fcnt != num_add) { - *promisc_changed = true; set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, promiscuous mode forced on\n", @@ -2177,11 +2162,13 @@ i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, NULL); } - if (aq_ret) + if (aq_ret) { + set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); dev_warn(&vsi->back->pdev->dev, - "Error %s setting broadcast promiscuous mode on %s\n", + "Error %s, forcing overflow promiscuous on %s\n", i40e_aq_str(hw, hw->aq.asq_last_status), vsi_name); + } return aq_ret; } @@ -2267,9 +2254,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) struct i40e_mac_filter *f; struct i40e_new_mac_filter *new, *add_head = NULL; struct i40e_hw *hw = &vsi->back->hw; + bool old_overflow, new_overflow; unsigned int failed_filters = 0; unsigned int vlan_filters = 0; - bool promisc_changed = false; char vsi_name[16] = "PF"; int filter_list_len = 0; i40e_status aq_ret = 0; @@ -2291,6 +2278,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) usleep_range(1000, 2000); pf = vsi->back; + old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); + if (vsi->netdev) { changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags; vsi->current_netdev_flags = vsi->netdev->flags; @@ -2423,12 +2412,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) num_add = 0; hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { - if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, - vsi->state)) { - new->state = I40E_FILTER_FAILED; - continue; - } - /* handle broadcast filters by updating the broadcast * promiscuous flag instead of adding a MAC filter. */ @@ -2464,15 +2447,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) /* flush a full buffer */ if (num_add == filter_list_len) { i40e_aqc_add_filters(vsi, vsi_name, add_list, - add_head, num_add, - &promisc_changed); + add_head, num_add); memset(add_list, 0, list_size); num_add = 0; } } if (num_add) { i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head, - num_add, &promisc_changed); + num_add); } /* Now move all of the filters from the temp add list back to * the VSI's list. @@ -2501,24 +2483,16 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } spin_unlock_bh(&vsi->mac_filter_hash_lock); - /* If promiscuous mode has changed, we need to calculate a new - * threshold for when we are safe to exit - */ - if (promisc_changed) - vsi->promisc_threshold = (vsi->active_filters * 3) / 4; - /* Check if we are able to exit overflow promiscuous mode. We can * safely exit if we didn't just enter, we no longer have any failed * filters, and we have reduced filters below the threshold value. */ - if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) && - !promisc_changed && !failed_filters && - (vsi->active_filters < vsi->promisc_threshold)) { + if (old_overflow && !failed_filters && + vsi->active_filters < vsi->promisc_threshold) { dev_info(&pf->pdev->dev, "filter logjam cleared on %s, leaving overflow promiscuous mode\n", vsi_name); clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); - promisc_changed = true; vsi->promisc_threshold = 0; } @@ -2528,6 +2502,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) goto out; } + new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); + + /* If we are entering overflow promiscuous, we need to calculate a new + * threshold for when we are safe to exit + */ + if (!old_overflow && new_overflow) + vsi->promisc_threshold = (vsi->active_filters * 3) / 4; + /* check for changes in promiscuous modes */ if (changed_flags & IFF_ALLMULTI) { bool cur_multipromisc; @@ -2548,12 +2530,11 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } } - if ((changed_flags & IFF_PROMISC) || promisc_changed) { + if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) { bool cur_promisc; cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) || - test_bit(__I40E_VSI_OVERFLOW_PROMISC, - vsi->state)); + new_overflow); aq_ret = i40e_set_promiscuous(pf, cur_promisc); if (aq_ret) { retval = i40e_aq_rc_to_posix(aq_ret, @@ -3449,15 +3430,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++, vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[i]; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting); - q_vector->rx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = + ITR_TO_REG(vsi->rx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), - q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting); - q_vector->tx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = + ITR_TO_REG(vsi->tx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), - q_vector->tx.itr); + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; + wr32(hw, I40E_PFINT_RATEN(vector - 1), i40e_intrl_usec_to_reg(vsi->int_rate_limit)); @@ -3558,13 +3544,14 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) u32 val; /* set the ITR configuration */ - q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting); - q_vector->rx.latency_range = I40E_LOW_LATENCY; - wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting); - q_vector->tx.latency_range = I40E_LOW_LATENCY; - wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr); + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting); + wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting); + wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; i40e_enable_misc_int_causes(pf); @@ -5375,7 +5362,7 @@ out: * @vsi: VSI to be configured * **/ -int i40e_get_link_speed(struct i40e_vsi *vsi) +static int i40e_get_link_speed(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; @@ -6848,8 +6835,8 @@ i40e_set_cld_element(struct i40e_cloud_filter *filter, * Add or delete a cloud filter for a specific flow spec. * Returns 0 if the filter were successfully added. **/ -static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, - struct i40e_cloud_filter *filter, bool add) +int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, bool add) { struct i40e_aqc_cloud_filters_element_data cld_filter; struct i40e_pf *pf = vsi->back; @@ -6915,9 +6902,9 @@ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, * Add or delete a cloud filter for a specific flow spec using big buffer. * Returns 0 if the filter were successfully added. **/ -static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, - struct i40e_cloud_filter *filter, - bool add) +int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add) { struct i40e_aqc_cloud_filters_element_bb cld_filter; struct i40e_pf *pf = vsi->back; @@ -9215,6 +9202,17 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } i40e_get_oem_version(&pf->hw); + if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && + ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) || + hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) { + /* The following delay is necessary for 4.33 firmware and older + * to recover after EMP reset. 200 ms should suffice but we + * put here 300 ms to be sure that FW is ready to operate + * after reset. + */ + mdelay(300); + } + /* re-verify the eeprom if we just had an EMP reset */ if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) i40e_verify_eeprom(pf); @@ -9937,18 +9935,17 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) mutex_lock(&pf->switch_mutex); if (!pf->vsi[vsi->idx]) { - dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](%p,type %d)\n", - vsi->idx, vsi->idx, vsi, vsi->type); + dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n", + vsi->idx, vsi->idx, vsi->type); goto unlock_vsi; } if (pf->vsi[vsi->idx] != vsi) { dev_err(&pf->pdev->dev, - "pf->vsi[%d](%p, type %d) != vsi[%d](%p,type %d): no free!\n", + "pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n", pf->vsi[vsi->idx]->idx, - pf->vsi[vsi->idx], pf->vsi[vsi->idx]->type, - vsi->idx, vsi, vsi->type); + vsi->idx, vsi->type); goto unlock_vsi; } @@ -10018,7 +10015,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ring->dcb_tc = 0; if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; - ring->tx_itr_setting = pf->tx_itr_default; + ring->itr_setting = pf->tx_itr_default; vsi->tx_rings[i] = ring++; if (!i40e_enabled_xdp_vsi(vsi)) @@ -10036,7 +10033,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; set_ring_xdp(ring); - ring->tx_itr_setting = pf->tx_itr_default; + ring->itr_setting = pf->tx_itr_default; vsi->xdp_rings[i] = ring++; setup_rx: @@ -10049,7 +10046,7 @@ setup_rx: ring->count = vsi->num_desc; ring->size = 0; ring->dcb_tc = 0; - ring->rx_itr_setting = pf->rx_itr_default; + ring->itr_setting = pf->rx_itr_default; vsi->rx_rings[i] = ring; } @@ -10328,9 +10325,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu) netif_napi_add(vsi->netdev, &q_vector->napi, i40e_napi_poll, NAPI_POLL_WEIGHT); - q_vector->rx.latency_range = I40E_LOW_LATENCY; - q_vector->tx.latency_range = I40E_LOW_LATENCY; - /* tie q_vector and vsi together */ vsi->q_vectors[v_idx] = q_vector; @@ -11089,6 +11083,16 @@ static int i40e_sw_init(struct i40e_pf *pf) /* IWARP needs one extra vector for CQP just like MISC.*/ pf->num_iwarp_msix = (int)num_online_cpus() + 1; } + /* Stopping the FW LLDP engine is only supported on the + * XL710 with a FW ver >= 1.7. Also, stopping FW LLDP + * engine is not supported if NPAR is functioning on this + * part + */ + if (pf->hw.mac.type == I40E_MAC_XL710 && + !pf->hw.func_caps.npar_enable && + (pf->hw.aq.api_maj_ver > 1 || + (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver > 6))) + pf->hw_features |= I40E_HW_STOPPABLE_FW_LLDP; #ifdef CONFIG_PCI_IOV if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index e554aa6cf070..1ec9b1d8023d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -995,99 +995,241 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) } } +static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) +{ + return &q_vector->rx == rc; +} + +static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector) +{ + unsigned int divisor; + + switch (q_vector->vsi->back->hw.phy.link_info.link_speed) { + case I40E_LINK_SPEED_40GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024; + break; + case I40E_LINK_SPEED_25GB: + case I40E_LINK_SPEED_20GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512; + break; + default: + case I40E_LINK_SPEED_10GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256; + break; + case I40E_LINK_SPEED_1GB: + case I40E_LINK_SPEED_100MB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32; + break; + } + + return divisor; +} + /** - * i40e_set_new_dynamic_itr - Find new ITR level + * i40e_update_itr - update the dynamic ITR value based on statistics + * @q_vector: structure containing interrupt and ring information * @rc: structure containing ring performance data * - * Returns true if ITR changed, false if not - * - * Stores a new ITR value based on packets and byte counts during - * the last interrupt. The advantage of per interrupt computation - * is faster updates and more accurate ITR for the current traffic - * pattern. Constants in this function were computed based on - * theoretical maximum wire speed and thresholds were set based on - * testing data as well as attempting to minimize response time + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time * while increasing bulk throughput. **/ -static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) +static void i40e_update_itr(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) { - enum i40e_latency_range new_latency_range = rc->latency_range; - u32 new_itr = rc->itr; - int bytes_per_usec; - unsigned int usecs, estimated_usecs; + unsigned int avg_wire_size, packets, bytes, itr; + unsigned long next_update = jiffies; - if (rc->total_packets == 0 || !rc->itr) - return false; + /* If we don't have any rings just leave ourselves set for maximum + * possible latency so we take ourselves out of the equation. + */ + if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting)) + return; - usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_usec = rc->total_bytes / usecs; + /* For Rx we want to push the delay up and default to low latency. + * for Tx we want to pull the delay down and default to high latency. + */ + itr = i40e_container_is_rx(q_vector, rc) ? + I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY : + I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY; + + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. + */ + if (time_after(next_update, rc->next_update)) + goto clear_counts; + + /* If itr_countdown is set it means we programmed an ITR within + * the last 4 interrupt cycles. This has a side effect of us + * potentially firing an early interrupt. In order to work around + * this we need to throw out any data received for a few + * interrupts following the update. + */ + if (q_vector->itr_countdown) { + itr = rc->target_itr; + goto clear_counts; + } + + packets = rc->total_packets; + bytes = rc->total_bytes; - /* The calculations in this algorithm depend on interrupts actually - * firing at the ITR rate. This may not happen if the packet rate is - * really low, or if we've been napi polling. Check to make sure - * that's not the case before we continue. + if (i40e_container_is_rx(q_vector, rc)) { + /* If Rx there are 1 to 4 packets and bytes are less than + * 9000 assume insufficient data to use bulk rate limiting + * approach unless Tx is already in bulk rate limiting. We + * are likely latency driven. + */ + if (packets && packets < 4 && bytes < 9000 && + (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) { + itr = I40E_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + } else if (packets < 4) { + /* If we have Tx and Rx ITR maxed and Tx ITR is running in + * bulk mode and we are receiving 4 or fewer packets just + * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so + * that the Rx can relax. + */ + if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS && + (q_vector->rx.target_itr & I40E_ITR_MASK) == + I40E_ITR_ADAPTIVE_MAX_USECS) + goto clear_counts; + } else if (packets > 32) { + /* If we have processed over 32 packets in a single interrupt + * for Tx assume we need to switch over to "bulk" mode. + */ + rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY; + } + + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * Between 4 and 56 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. */ - estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update); - if (estimated_usecs > usecs) { - new_latency_range = I40E_LOW_LATENCY; - goto reset_latency; + if (packets < 56) { + itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC; + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; + } + goto clear_counts; } - /* simple throttlerate management - * 0-10MB/s lowest (50000 ints/s) - * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (18000 ints/s) + if (packets <= 256) { + itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); + itr &= I40E_ITR_MASK; + + /* Between 56 and 112 is our "goldilocks" zone where we are + * working out "just right". Just report that our current + * ITR is good for us. + */ + if (packets <= 112) + goto clear_counts; + + /* If packet count is 128 or greater we are likely looking + * at a slight overrun of the delay we want. Try halving + * our delay to see if that will cut the number of packets + * in half per interrupt. + */ + itr /= 2; + itr &= I40E_ITR_MASK; + if (itr < I40E_ITR_ADAPTIVE_MIN_USECS) + itr = I40E_ITR_ADAPTIVE_MIN_USECS; + + goto clear_counts; + } + + /* The paths below assume we are dealing with a bulk ITR since + * number of packets is greater than 256. We are just going to have + * to compute a value and try to bring the count under control, + * though for smaller packet sizes there isn't much we can do as + * NAPI polling will likely be kicking in sooner rather than later. + */ + itr = I40E_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * give the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value * - * The math works out because the divisor is in 10^(-6) which - * turns the bytes/us input value into MB/s values, but - * make sure to use usecs, as the register values written - * are in 2 usec increments in the ITR registers, and make sure - * to use the smoothed values that the countdown timer gives us. + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. */ - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - if (bytes_per_usec > 10) - new_latency_range = I40E_LOW_LATENCY; - break; - case I40E_LOW_LATENCY: - if (bytes_per_usec > 20) - new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_usec <= 10) - new_latency_range = I40E_LOWEST_LATENCY; - break; - case I40E_BULK_LATENCY: - default: - if (bytes_per_usec <= 20) - new_latency_range = I40E_LOW_LATENCY; - break; + if (avg_wire_size <= 60) { + /* Start at 250k ints/sec */ + avg_wire_size = 4096; + } else if (avg_wire_size <= 380) { + /* 250K ints/sec to 60K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 1696; + } else if (avg_wire_size <= 1084) { + /* 60K ints/sec to 36K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1980) { + /* 36K ints/sec to 30K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 30K ints/sec */ + avg_wire_size = 32256; } -reset_latency: - rc->latency_range = new_latency_range; + /* If we are in low latency mode halve our delay which doubles the + * rate to somewhere between 100K to 16K ints/sec + */ + if (itr & I40E_ITR_ADAPTIVE_LATENCY) + avg_wire_size /= 2; - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - new_itr = I40E_ITR_50K; - break; - case I40E_LOW_LATENCY: - new_itr = I40E_ITR_20K; - break; - case I40E_BULK_LATENCY: - new_itr = I40E_ITR_18K; - break; - default: - break; + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) * + I40E_ITR_ADAPTIVE_MIN_INC; + + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; } +clear_counts: + /* write back value */ + rc->target_itr = itr; + + /* next update should occur within next jiffy */ + rc->next_update = next_update + 1; + rc->total_bytes = 0; rc->total_packets = 0; - rc->last_itr_update = jiffies; - - if (new_itr != rc->itr) { - rc->itr = new_itr; - return true; - } - return false; } /** @@ -1991,7 +2133,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, * @rx_buffer: rx buffer to pull data from * * This function will clean up the contents of the rx_buffer. It will - * either recycle the bufer or unmap it and free the associated resources. + * either recycle the buffer or unmap it and free the associated resources. */ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer) @@ -2274,29 +2416,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) return failure ? budget : (int)total_rx_packets; } -static u32 i40e_buildreg_itr(const int type, const u16 itr) +static inline u32 i40e_buildreg_itr(const int type, u16 itr) { u32 val; + /* We don't bother with setting the CLEARPBA bit as the data sheet + * points out doing so is "meaningless since it was already + * auto-cleared". The auto-clearing happens when the interrupt is + * asserted. + * + * Hardware errata 28 for also indicates that writing to a + * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear + * an event in the PBA anyway so we need to rely on the automask + * to hold pending events for us until the interrupt is re-enabled + * + * The itr value is reported in microseconds, and the register + * value is recorded in 2 microsecond units. For this reason we + * only need to shift by the interval shift - 1 instead of the + * full value. + */ + itr &= I40E_ITR_MASK; + val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | - (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); + (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1)); return val; } /* a small macro to shorten up some long lines */ #define INTREG I40E_PFINT_DYN_CTLN -static inline int get_rx_itr(struct i40e_vsi *vsi, int idx) -{ - return vsi->rx_rings[idx]->rx_itr_setting; -} -static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) -{ - return vsi->tx_rings[idx]->tx_itr_setting; -} +/* The act of updating the ITR will cause it to immediately trigger. In order + * to prevent this from throwing off adaptive update statistics we defer the + * update so that it can only happen so often. So after either Tx or Rx are + * updated we make the adaptive scheme wait until either the ITR completely + * expires via the next_update expiration or we have been through at least + * 3 interrupts. + */ +#define ITR_COUNTDOWN_START 3 /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -2308,10 +2466,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { struct i40e_hw *hw = &vsi->back->hw; - bool rx = false, tx = false; - u32 rxval, txval; - int idx = q_vector->v_idx; - int rx_itr_setting, tx_itr_setting; + u32 intval; /* If we don't have MSIX, then we only need to re-enable icr0 */ if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { @@ -2319,65 +2474,49 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, return; } - /* avoid dynamic calculation if in countdown mode OR if - * all dynamic is disabled - */ - rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); - - rx_itr_setting = get_rx_itr(vsi, idx); - tx_itr_setting = get_tx_itr(vsi, idx); - - if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(rx_itr_setting) && - !ITR_IS_DYNAMIC(tx_itr_setting))) { - goto enable_int; - } - - if (ITR_IS_DYNAMIC(rx_itr_setting)) { - rx = i40e_set_new_dynamic_itr(&q_vector->rx); - rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); - } - - if (ITR_IS_DYNAMIC(tx_itr_setting)) { - tx = i40e_set_new_dynamic_itr(&q_vector->tx); - txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); - } + /* These will do nothing if dynamic updates are not enabled */ + i40e_update_itr(q_vector, &q_vector->tx); + i40e_update_itr(q_vector, &q_vector->rx); - if (rx || tx) { - /* get the higher of the two ITR adjustments and - * use the same value for both ITR registers - * when in adaptive mode (Rx and/or Tx) - */ - u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); - - q_vector->tx.itr = q_vector->rx.itr = itr; - txval = i40e_buildreg_itr(I40E_TX_ITR, itr); - tx = true; - rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); - rx = true; - } - - /* only need to enable the interrupt once, but need - * to possibly update both ITR values + /* This block of logic allows us to get away with only updating + * one ITR value with each interrupt. The idea is to perform a + * pseudo-lazy update with the following criteria. + * + * 1. Rx is given higher priority than Tx if both are in same state + * 2. If we must reduce an ITR that is given highest priority. + * 3. We then give priority to increasing ITR based on amount. */ - if (rx) { - /* set the INTENA_MSK_MASK so that this first write - * won't actually enable the interrupt, instead just - * updating the ITR (it's bit 31 PF and VF) + if (q_vector->rx.target_itr < q_vector->rx.current_itr) { + /* Rx ITR needs to be reduced, this is highest priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || + ((q_vector->rx.target_itr - q_vector->rx.current_itr) < + (q_vector->tx.target_itr - q_vector->tx.current_itr))) { + /* Tx ITR needs to be reduced, this is second priority + * Tx ITR needs to be increased more than Rx, fourth priority */ - rxval |= BIT(31); - /* don't check _DOWN because interrupt isn't being enabled */ - wr32(hw, INTREG(q_vector->reg_idx), rxval); + intval = i40e_buildreg_itr(I40E_TX_ITR, + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { + /* Rx ITR needs to be increased, third priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else { + /* No ITR update, lowest priority */ + intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + if (q_vector->itr_countdown) + q_vector->itr_countdown--; } -enable_int: if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), txval); - - if (q_vector->itr_countdown) - q_vector->itr_countdown--; - else - q_vector->itr_countdown = ITR_COUNTDOWN_START; + wr32(hw, INTREG(q_vector->reg_idx), intval); } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 701b708628b0..f75a8fe68fcf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -30,32 +30,37 @@ #include <net/xdp.h> /* Interrupt Throttling and Rate Limiting Goodies */ - -#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */ -#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */ -#define I40E_ITR_100K 0x0005 -#define I40E_ITR_50K 0x000A -#define I40E_ITR_20K 0x0019 -#define I40E_ITR_18K 0x001B -#define I40E_ITR_8K 0x003E -#define I40E_ITR_4K 0x007A -#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ -#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ -#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ #define I40E_DEFAULT_IRQ_WORK 256 -#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1) -#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC)) -#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1) + +/* The datasheet for the X710 and XL710 indicate that the maximum value for + * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec + * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing + * the register value which is divided by 2 lets use the actual values and + * avoid an excessive amount of translation. + */ +#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ +#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */ +#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */ +#define I40E_ITR_100K 10 /* all values below must be even */ +#define I40E_ITR_50K 20 +#define I40E_ITR_20K 50 +#define I40E_ITR_18K 60 +#define I40E_ITR_8K 122 +#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */ +#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC) +#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK) +#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC)) + +#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) +#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) + /* 0x40 is the enable bit for interrupt rate limiting, and must be set if * the value of the rate limit is non-zero */ #define INTRL_ENA BIT(6) +#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) + /** * i40e_intrl_usec_to_reg - convert interrupt rate limit to register * @intrl: interrupt rate limit to convert @@ -382,8 +387,7 @@ struct i40e_ring { * these values always store the USER setting, and must be converted * before programming to a register. */ - u16 rx_itr_setting; - u16 tx_itr_setting; + u16 itr_setting; u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ @@ -459,21 +463,21 @@ static inline void set_ring_xdp(struct i40e_ring *ring) ring->flags |= I40E_TXR_FLAGS_XDP; } -enum i40e_latency_range { - I40E_LOWEST_LATENCY = 0, - I40E_LOW_LATENCY = 1, - I40E_BULK_LATENCY = 2, -}; +#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002 +#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002 +#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e +#define I40E_ITR_ADAPTIVE_LATENCY 0x8000 +#define I40E_ITR_ADAPTIVE_BULK 0x0000 +#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY)) struct i40e_ring_container { - /* array of pointers to rings */ - struct i40e_ring *ring; + struct i40e_ring *ring; /* pointer to linked list of ring(s) */ + unsigned long next_update; /* jiffies value of next update */ unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_packets; /* total packets processed this int */ - unsigned long last_itr_update; /* jiffies of last ITR update */ u16 count; - enum i40e_latency_range latency_range; - u16 itr; + u16 target_itr; /* target ITR setting for ring(s) */ + u16 current_itr; /* current ITR setting for ring(s) */ }; /* iterator for handling rings in ring container */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index cd294e6a8587..b0eed8c0b2f2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -39,7 +39,7 @@ #define I40E_MASK(mask, shift) ((u32)(mask) << (shift)) #define I40E_MAX_VSI_QP 16 -#define I40E_MAX_VF_VSI 3 +#define I40E_MAX_VF_VSI 4 #define I40E_MAX_CHAINED_RX_BUFFERS 5 #define I40E_MAX_PF_UDP_OFFLOAD_PORTS 16 diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index e9309fb9084b..5cca083da93c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -258,6 +258,38 @@ static u16 i40e_vc_get_pf_queue_id(struct i40e_vf *vf, u16 vsi_id, } /** + * i40e_get_real_pf_qid + * @vf: pointer to the VF info + * @vsi_id: vsi id + * @queue_id: queue number + * + * wrapper function to get pf_queue_id handling ADq code as well + **/ +static u16 i40e_get_real_pf_qid(struct i40e_vf *vf, u16 vsi_id, u16 queue_id) +{ + int i; + + if (vf->adq_enabled) { + /* Although VF considers all the queues(can be 1 to 16) as its + * own but they may actually belong to different VSIs(up to 4). + * We need to find which queues belongs to which VSI. + */ + for (i = 0; i < vf->num_tc; i++) { + if (queue_id < vf->ch[i].num_qps) { + vsi_id = vf->ch[i].vsi_id; + break; + } + /* find right queue id which is relative to a + * given VSI. + */ + queue_id -= vf->ch[i].num_qps; + } + } + + return i40e_vc_get_pf_queue_id(vf, vsi_id, queue_id); +} + +/** * i40e_config_irq_link_list * @vf: pointer to the VF info * @vsi_id: id of VSI as given by the FW @@ -310,7 +342,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; - pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); + pf_queue_id = i40e_get_real_pf_qid(vf, vsi_id, vsi_queue_id); reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id); wr32(hw, reg_idx, reg); @@ -333,8 +365,9 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, if (next_q < size) { vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; - pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, - vsi_queue_id); + pf_queue_id = i40e_get_real_pf_qid(vf, + vsi_id, + vsi_queue_id); } else { pf_queue_id = I40E_QUEUE_END_OF_LIST; qtype = 0; @@ -669,18 +702,20 @@ error_param: /** * i40e_alloc_vsi_res * @vf: pointer to the VF info - * @type: type of VSI to allocate + * @idx: VSI index, applies only for ADq mode, zero otherwise * * alloc VF vsi context & resources **/ -static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) +static int i40e_alloc_vsi_res(struct i40e_vf *vf, u8 idx) { struct i40e_mac_filter *f = NULL; struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi; + u64 max_tx_rate = 0; int ret = 0; - vsi = i40e_vsi_setup(pf, type, pf->vsi[pf->lan_vsi]->seid, vf->vf_id); + vsi = i40e_vsi_setup(pf, I40E_VSI_SRIOV, pf->vsi[pf->lan_vsi]->seid, + vf->vf_id); if (!vsi) { dev_err(&pf->pdev->dev, @@ -689,7 +724,8 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) ret = -ENOENT; goto error_alloc_vsi_res; } - if (type == I40E_VSI_SRIOV) { + + if (!idx) { u64 hena = i40e_pf_get_default_rss_hena(pf); u8 broadcast[ETH_ALEN]; @@ -721,17 +757,29 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) spin_unlock_bh(&vsi->mac_filter_hash_lock); wr32(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)hena); wr32(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id), (u32)(hena >> 32)); + /* program mac filter only for VF VSI */ + ret = i40e_sync_vsi_filters(vsi); + if (ret) + dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); } - /* program mac filter */ - ret = i40e_sync_vsi_filters(vsi); - if (ret) - dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); + /* storing VSI index and id for ADq and don't apply the mac filter */ + if (vf->adq_enabled) { + vf->ch[idx].vsi_idx = vsi->idx; + vf->ch[idx].vsi_id = vsi->id; + } /* Set VF bandwidth if specified */ if (vf->tx_rate) { + max_tx_rate = vf->tx_rate; + } else if (vf->ch[idx].max_tx_rate) { + max_tx_rate = vf->ch[idx].max_tx_rate; + } + + if (max_tx_rate) { + max_tx_rate = div_u64(max_tx_rate, I40E_BW_CREDIT_DIVISOR); ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, - vf->tx_rate / 50, 0, NULL); + max_tx_rate, 0, NULL); if (ret) dev_err(&pf->pdev->dev, "Unable to set tx rate, VF %d, error code %d.\n", vf->vf_id, ret); @@ -742,6 +790,92 @@ error_alloc_vsi_res: } /** + * i40e_map_pf_queues_to_vsi + * @vf: pointer to the VF info + * + * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This + * function takes care of first part VSILAN_QTABLE, mapping pf queues to VSI. + **/ +static void i40e_map_pf_queues_to_vsi(struct i40e_vf *vf) +{ + struct i40e_pf *pf = vf->pf; + struct i40e_hw *hw = &pf->hw; + u32 reg, num_tc = 1; /* VF has at least one traffic class */ + u16 vsi_id, qps; + int i, j; + + if (vf->adq_enabled) + num_tc = vf->num_tc; + + for (i = 0; i < num_tc; i++) { + if (vf->adq_enabled) { + qps = vf->ch[i].num_qps; + vsi_id = vf->ch[i].vsi_id; + } else { + qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; + vsi_id = vf->lan_vsi_id; + } + + for (j = 0; j < 7; j++) { + if (j * 2 >= qps) { + /* end of list */ + reg = 0x07FF07FF; + } else { + u16 qid = i40e_vc_get_pf_queue_id(vf, + vsi_id, + j * 2); + reg = qid; + qid = i40e_vc_get_pf_queue_id(vf, vsi_id, + (j * 2) + 1); + reg |= qid << 16; + } + i40e_write_rx_ctl(hw, + I40E_VSILAN_QTABLE(j, vsi_id), + reg); + } + } +} + +/** + * i40e_map_pf_to_vf_queues + * @vf: pointer to the VF info + * + * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This + * function takes care of the second part VPLAN_QTABLE & completes VF mappings. + **/ +static void i40e_map_pf_to_vf_queues(struct i40e_vf *vf) +{ + struct i40e_pf *pf = vf->pf; + struct i40e_hw *hw = &pf->hw; + u32 reg, total_qps = 0; + u32 qps, num_tc = 1; /* VF has at least one traffic class */ + u16 vsi_id, qid; + int i, j; + + if (vf->adq_enabled) + num_tc = vf->num_tc; + + for (i = 0; i < num_tc; i++) { + if (vf->adq_enabled) { + qps = vf->ch[i].num_qps; + vsi_id = vf->ch[i].vsi_id; + } else { + qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; + vsi_id = vf->lan_vsi_id; + } + + for (j = 0; j < qps; j++) { + qid = i40e_vc_get_pf_queue_id(vf, vsi_id, j); + + reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK); + wr32(hw, I40E_VPLAN_QTABLE(total_qps, vf->vf_id), + reg); + total_qps++; + } + } +} + +/** * i40e_enable_vf_mappings * @vf: pointer to the VF info * @@ -751,8 +885,7 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf) { struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; - u32 reg, total_queue_pairs = 0; - int j; + u32 reg; /* Tell the hardware we're using noncontiguous mapping. HW requires * that VF queues be mapped using this method, even when they are @@ -765,30 +898,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf) reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK; wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), reg); - /* map PF queues to VF queues */ - for (j = 0; j < pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; j++) { - u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, j); - - reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK); - wr32(hw, I40E_VPLAN_QTABLE(total_queue_pairs, vf->vf_id), reg); - total_queue_pairs++; - } - - /* map PF queues to VSI */ - for (j = 0; j < 7; j++) { - if (j * 2 >= pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs) { - reg = 0x07FF07FF; /* unused */ - } else { - u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, - j * 2); - reg = qid; - qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, - (j * 2) + 1); - reg |= qid << 16; - } - i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id), - reg); - } + i40e_map_pf_to_vf_queues(vf); + i40e_map_pf_queues_to_vsi(vf); i40e_flush(hw); } @@ -824,7 +935,7 @@ static void i40e_free_vf_res(struct i40e_vf *vf) struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; u32 reg_idx, reg; - int i, msix_vf; + int i, j, msix_vf; /* Start by disabling VF's configuration API to prevent the OS from * accessing the VF's VSI after it's freed / invalidated. @@ -846,6 +957,20 @@ static void i40e_free_vf_res(struct i40e_vf *vf) vf->lan_vsi_id = 0; vf->num_mac = 0; } + + /* do the accounting and remove additional ADq VSI's */ + if (vf->adq_enabled && vf->ch[0].vsi_idx) { + for (j = 0; j < vf->num_tc; j++) { + /* At this point VSI0 is already released so don't + * release it again and only clear their values in + * structure variables + */ + if (j) + i40e_vsi_release(pf->vsi[vf->ch[j].vsi_idx]); + vf->ch[j].vsi_idx = 0; + vf->ch[j].vsi_id = 0; + } + } msix_vf = pf->hw.func_caps.num_msix_vectors_vf; /* disable interrupts so the VF starts in a known state */ @@ -891,7 +1016,7 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf) { struct i40e_pf *pf = vf->pf; int total_queue_pairs = 0; - int ret; + int ret, idx; if (vf->num_req_queues && vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF) @@ -900,11 +1025,30 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf) pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF; /* allocate hw vsi context & associated resources */ - ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV); + ret = i40e_alloc_vsi_res(vf, 0); if (ret) goto error_alloc; total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; + /* allocate additional VSIs based on tc information for ADq */ + if (vf->adq_enabled) { + if (pf->queues_left >= + (I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF)) { + /* TC 0 always belongs to VF VSI */ + for (idx = 1; idx < vf->num_tc; idx++) { + ret = i40e_alloc_vsi_res(vf, idx); + if (ret) + goto error_alloc; + } + /* send correct number of queues */ + total_queue_pairs = I40E_MAX_VF_QUEUES; + } else { + dev_info(&pf->pdev->dev, "VF %d: Not enough queues to allocate, disabling ADq\n", + vf->vf_id); + vf->adq_enabled = false; + } + } + /* We account for each VF to get a default number of queue pairs. If * the VF has now requested more, we need to account for that to make * certain we never request more queues than we actually have left in @@ -1537,6 +1681,27 @@ static int i40e_vc_get_version_msg(struct i40e_vf *vf, u8 *msg) } /** + * i40e_del_qch - delete all the additional VSIs created as a part of ADq + * @vf: pointer to VF structure + **/ +static void i40e_del_qch(struct i40e_vf *vf) +{ + struct i40e_pf *pf = vf->pf; + int i; + + /* first element in the array belongs to primary VF VSI and we shouldn't + * delete it. We should however delete the rest of the VSIs created + */ + for (i = 1; i < vf->num_tc; i++) { + if (vf->ch[i].vsi_idx) { + i40e_vsi_release(pf->vsi[vf->ch[i].vsi_idx]); + vf->ch[i].vsi_idx = 0; + vf->ch[i].vsi_id = 0; + } + } +} + +/** * i40e_vc_get_vf_resources_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -1631,6 +1796,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADQ; + vfres->num_vsis = num_vsis; vfres->num_queue_pairs = vf->num_queue_pairs; vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf; @@ -1855,27 +2023,37 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) (struct virtchnl_vsi_queue_config_info *)msg; struct virtchnl_queue_pair_info *qpi; struct i40e_pf *pf = vf->pf; - u16 vsi_id, vsi_queue_id; + u16 vsi_id, vsi_queue_id = 0; i40e_status aq_ret = 0; - int i; + int i, j = 0, idx = 0; + + vsi_id = qci->vsi_id; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; } - vsi_id = qci->vsi_id; if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; } + for (i = 0; i < qci->num_queue_pairs; i++) { qpi = &qci->qpair[i]; - vsi_queue_id = qpi->txq.queue_id; - if ((qpi->txq.vsi_id != vsi_id) || - (qpi->rxq.vsi_id != vsi_id) || - (qpi->rxq.queue_id != vsi_queue_id) || - !i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) { + + if (!vf->adq_enabled) { + vsi_queue_id = qpi->txq.queue_id; + + if (qpi->txq.vsi_id != qci->vsi_id || + qpi->rxq.vsi_id != qci->vsi_id || + qpi->rxq.queue_id != vsi_queue_id) { + aq_ret = I40E_ERR_PARAM; + goto error_param; + } + } + + if (!i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -1887,9 +2065,33 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) aq_ret = I40E_ERR_PARAM; goto error_param; } + + /* For ADq there can be up to 4 VSIs with max 4 queues each. + * VF does not know about these additional VSIs and all + * it cares is about its own queues. PF configures these queues + * to its appropriate VSIs based on TC mapping + **/ + if (vf->adq_enabled) { + if (j == (vf->ch[idx].num_qps - 1)) { + idx++; + j = 0; /* resetting the queue count */ + vsi_queue_id = 0; + } else { + j++; + vsi_queue_id++; + } + vsi_id = vf->ch[idx].vsi_id; + } } /* set vsi num_queue_pairs in use to num configured by VF */ - pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = qci->num_queue_pairs; + if (!vf->adq_enabled) { + pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = + qci->num_queue_pairs; + } else { + for (i = 0; i < vf->num_tc; i++) + pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs = + vf->ch[i].num_qps; + } error_param: /* send the response to the VF */ @@ -1898,6 +2100,33 @@ error_param: } /** + * i40e_validate_queue_map + * @vsi_id: vsi id + * @queuemap: Tx or Rx queue map + * + * check if Tx or Rx queue map is valid + **/ +static int i40e_validate_queue_map(struct i40e_vf *vf, u16 vsi_id, + unsigned long queuemap) +{ + u16 vsi_queue_id, queue_id; + + for_each_set_bit(vsi_queue_id, &queuemap, I40E_MAX_VSI_QP) { + if (vf->adq_enabled) { + vsi_id = vf->ch[vsi_queue_id / I40E_MAX_VF_VSI].vsi_id; + queue_id = (vsi_queue_id % I40E_DEFAULT_QUEUES_PER_VF); + } else { + queue_id = vsi_queue_id; + } + + if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id)) + return -EINVAL; + } + + return 0; +} + +/** * i40e_vc_config_irq_map_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -1911,9 +2140,8 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) struct virtchnl_irq_map_info *irqmap_info = (struct virtchnl_irq_map_info *)msg; struct virtchnl_vector_map *map; - u16 vsi_id, vsi_queue_id, vector_id; + u16 vsi_id, vector_id; i40e_status aq_ret = 0; - unsigned long tempmap; int i; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { @@ -1923,7 +2151,6 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) for (i = 0; i < irqmap_info->num_vectors; i++) { map = &irqmap_info->vecmap[i]; - vector_id = map->vector_id; vsi_id = map->vsi_id; /* validate msg params */ @@ -1933,23 +2160,14 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) goto error_param; } - /* lookout for the invalid queue index */ - tempmap = map->rxq_map; - for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) { - if (!i40e_vc_isvalid_queue_id(vf, vsi_id, - vsi_queue_id)) { - aq_ret = I40E_ERR_PARAM; - goto error_param; - } + if (i40e_validate_queue_map(vf, vsi_id, map->rxq_map)) { + aq_ret = I40E_ERR_PARAM; + goto error_param; } - tempmap = map->txq_map; - for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) { - if (!i40e_vc_isvalid_queue_id(vf, vsi_id, - vsi_queue_id)) { - aq_ret = I40E_ERR_PARAM; - goto error_param; - } + if (i40e_validate_queue_map(vf, vsi_id, map->txq_map)) { + aq_ret = I40E_ERR_PARAM; + goto error_param; } i40e_config_irq_link_list(vf, vsi_id, map); @@ -1975,6 +2193,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) struct i40e_pf *pf = vf->pf; u16 vsi_id = vqs->vsi_id; i40e_status aq_ret = 0; + int i; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; @@ -1993,6 +2212,16 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx])) aq_ret = I40E_ERR_TIMEOUT; + + /* need to start the rings for additional ADq VSI's as well */ + if (vf->adq_enabled) { + /* zero belongs to LAN VSI */ + for (i = 1; i < vf->num_tc; i++) { + if (i40e_vsi_start_rings(pf->vsi[vf->ch[i].vsi_idx])) + aq_ret = I40E_ERR_TIMEOUT; + } + } + error_param: /* send the response to the VF */ return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, @@ -2688,6 +2917,618 @@ err: } /** + * i40e_validate_cloud_filter + * @mask: mask for TC filter + * @data: data for TC filter + * + * This function validates cloud filter programmed as TC filter for ADq + **/ +static int i40e_validate_cloud_filter(struct i40e_vf *vf, + struct virtchnl_filter *tc_filter) +{ + struct virtchnl_l4_spec mask = tc_filter->mask.tcp_spec; + struct virtchnl_l4_spec data = tc_filter->data.tcp_spec; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + struct i40e_mac_filter *f; + struct hlist_node *h; + bool found = false; + int bkt; + + if (!tc_filter->action) { + dev_info(&pf->pdev->dev, + "VF %d: Currently ADq doesn't support Drop Action\n", + vf->vf_id); + goto err; + } + + /* action_meta is TC number here to which the filter is applied */ + if (!tc_filter->action_meta || + tc_filter->action_meta > I40E_MAX_VF_VSI) { + dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n", + vf->vf_id, tc_filter->action_meta); + goto err; + } + + /* Check filter if it's programmed for advanced mode or basic mode. + * There are two ADq modes (for VF only), + * 1. Basic mode: intended to allow as many filter options as possible + * to be added to a VF in Non-trusted mode. Main goal is + * to add filters to its own MAC and VLAN id. + * 2. Advanced mode: is for allowing filters to be applied other than + * its own MAC or VLAN. This mode requires the VF to be + * Trusted. + */ + if (mask.dst_mac[0] && !mask.dst_ip[0]) { + vsi = pf->vsi[vf->lan_vsi_idx]; + f = i40e_find_mac(vsi, data.dst_mac); + + if (!f) { + dev_info(&pf->pdev->dev, + "Destination MAC %pM doesn't belong to VF %d\n", + data.dst_mac, vf->vf_id); + goto err; + } + + if (mask.vlan_id) { + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, + hlist) { + if (f->vlan == ntohs(data.vlan_id)) { + found = true; + break; + } + } + if (!found) { + dev_info(&pf->pdev->dev, + "VF %d doesn't have any VLAN id %u\n", + vf->vf_id, ntohs(data.vlan_id)); + goto err; + } + } + } else { + /* Check if VF is trusted */ + if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) { + dev_err(&pf->pdev->dev, + "VF %d not trusted, make VF trusted to add advanced mode ADq cloud filters\n", + vf->vf_id); + return I40E_ERR_CONFIG; + } + } + + if (mask.dst_mac[0] & data.dst_mac[0]) { + if (is_broadcast_ether_addr(data.dst_mac) || + is_zero_ether_addr(data.dst_mac)) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Dest MAC addr %pM\n", + vf->vf_id, data.dst_mac); + goto err; + } + } + + if (mask.src_mac[0] & data.src_mac[0]) { + if (is_broadcast_ether_addr(data.src_mac) || + is_zero_ether_addr(data.src_mac)) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Source MAC addr %pM\n", + vf->vf_id, data.src_mac); + goto err; + } + } + + if (mask.dst_port & data.dst_port) { + if (!data.dst_port || be16_to_cpu(data.dst_port) > 0xFFFF) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Dest port\n", + vf->vf_id); + goto err; + } + } + + if (mask.src_port & data.src_port) { + if (!data.src_port || be16_to_cpu(data.src_port) > 0xFFFF) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Source port\n", + vf->vf_id); + goto err; + } + } + + if (tc_filter->flow_type != VIRTCHNL_TCP_V6_FLOW && + tc_filter->flow_type != VIRTCHNL_TCP_V4_FLOW) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Flow type\n", + vf->vf_id); + goto err; + } + + if (mask.vlan_id & data.vlan_id) { + if (ntohs(data.vlan_id) > I40E_MAX_VLANID) { + dev_info(&pf->pdev->dev, "VF %d: invalid VLAN ID\n", + vf->vf_id); + goto err; + } + } + + return I40E_SUCCESS; +err: + return I40E_ERR_CONFIG; +} + +/** + * i40e_find_vsi_from_seid - searches for the vsi with the given seid + * @vf: pointer to the VF info + * @seid - seid of the vsi it is searching for + **/ +static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid) +{ + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + int i; + + for (i = 0; i < vf->num_tc ; i++) { + vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id); + if (vsi->seid == seid) + return vsi; + } + return NULL; +} + +/** + * i40e_del_all_cloud_filters + * @vf: pointer to the VF info + * + * This function deletes all cloud filters + **/ +static void i40e_del_all_cloud_filters(struct i40e_vf *vf) +{ + struct i40e_cloud_filter *cfilter = NULL; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + struct hlist_node *node; + int ret; + + hlist_for_each_entry_safe(cfilter, node, + &vf->cloud_filter_list, cloud_node) { + vsi = i40e_find_vsi_from_seid(vf, cfilter->seid); + + if (!vsi) { + dev_err(&pf->pdev->dev, "VF %d: no VSI found for matching %u seid, can't delete cloud filter\n", + vf->vf_id, cfilter->seid); + continue; + } + + if (cfilter->dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, + false); + else + ret = i40e_add_del_cloud_filter(vsi, cfilter, false); + if (ret) + dev_err(&pf->pdev->dev, + "VF %d: Failed to delete cloud filter, err %s aq_err %s\n", + vf->vf_id, i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + + hlist_del(&cfilter->cloud_node); + kfree(cfilter); + vf->num_cloud_filters--; + } +} + +/** + * i40e_vc_del_cloud_filter + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * This function deletes a cloud filter programmed as TC filter for ADq + **/ +static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) +{ + struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg; + struct virtchnl_l4_spec mask = vcf->mask.tcp_spec; + struct virtchnl_l4_spec tcf = vcf->data.tcp_spec; + struct i40e_cloud_filter cfilter, *cf = NULL; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + struct hlist_node *node; + i40e_status aq_ret = 0; + int i, ret; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (!vf->adq_enabled) { + dev_info(&pf->pdev->dev, + "VF %d: ADq not enabled, can't apply cloud filter\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (i40e_validate_cloud_filter(vf, vcf)) { + dev_info(&pf->pdev->dev, + "VF %d: Invalid input, can't apply cloud filter\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + memset(&cfilter, 0, sizeof(cfilter)); + /* parse destination mac address */ + for (i = 0; i < ETH_ALEN; i++) + cfilter.dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i]; + + /* parse source mac address */ + for (i = 0; i < ETH_ALEN; i++) + cfilter.src_mac[i] = mask.src_mac[i] & tcf.src_mac[i]; + + cfilter.vlan_id = mask.vlan_id & tcf.vlan_id; + cfilter.dst_port = mask.dst_port & tcf.dst_port; + cfilter.src_port = mask.src_port & tcf.src_port; + + switch (vcf->flow_type) { + case VIRTCHNL_TCP_V4_FLOW: + cfilter.n_proto = ETH_P_IP; + if (mask.dst_ip[0] & tcf.dst_ip[0]) + memcpy(&cfilter.ip.v4.dst_ip, tcf.dst_ip, + ARRAY_SIZE(tcf.dst_ip)); + else if (mask.src_ip[0] & tcf.dst_ip[0]) + memcpy(&cfilter.ip.v4.src_ip, tcf.src_ip, + ARRAY_SIZE(tcf.dst_ip)); + break; + case VIRTCHNL_TCP_V6_FLOW: + cfilter.n_proto = ETH_P_IPV6; + if (mask.dst_ip[3] & tcf.dst_ip[3]) + memcpy(&cfilter.ip.v6.dst_ip6, tcf.dst_ip, + sizeof(cfilter.ip.v6.dst_ip6)); + if (mask.src_ip[3] & tcf.src_ip[3]) + memcpy(&cfilter.ip.v6.src_ip6, tcf.src_ip, + sizeof(cfilter.ip.v6.src_ip6)); + break; + default: + /* TC filter can be configured based on different combinations + * and in this case IP is not a part of filter config + */ + dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n", + vf->vf_id); + } + + /* get the vsi to which the tc belongs to */ + vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx]; + cfilter.seid = vsi->seid; + cfilter.flags = vcf->field_flags; + + /* Deleting TC filter */ + if (tcf.dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, &cfilter, false); + else + ret = i40e_add_del_cloud_filter(vsi, &cfilter, false); + if (ret) { + dev_err(&pf->pdev->dev, + "VF %d: Failed to delete cloud filter, err %s aq_err %s\n", + vf->vf_id, i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + goto err; + } + + hlist_for_each_entry_safe(cf, node, + &vf->cloud_filter_list, cloud_node) { + if (cf->seid != cfilter.seid) + continue; + if (mask.dst_port) + if (cfilter.dst_port != cf->dst_port) + continue; + if (mask.dst_mac[0]) + if (!ether_addr_equal(cf->src_mac, cfilter.src_mac)) + continue; + /* for ipv4 data to be valid, only first byte of mask is set */ + if (cfilter.n_proto == ETH_P_IP && mask.dst_ip[0]) + if (memcmp(&cfilter.ip.v4.dst_ip, &cf->ip.v4.dst_ip, + ARRAY_SIZE(tcf.dst_ip))) + continue; + /* for ipv6, mask is set for all sixteen bytes (4 words) */ + if (cfilter.n_proto == ETH_P_IPV6 && mask.dst_ip[3]) + if (memcmp(&cfilter.ip.v6.dst_ip6, &cf->ip.v6.dst_ip6, + sizeof(cfilter.ip.v6.src_ip6))) + continue; + if (mask.vlan_id) + if (cfilter.vlan_id != cf->vlan_id) + continue; + + hlist_del(&cf->cloud_node); + kfree(cf); + vf->num_cloud_filters--; + } + +err: + return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_CLOUD_FILTER, + aq_ret); +} + +/** + * i40e_vc_add_cloud_filter + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * This function adds a cloud filter programmed as TC filter for ADq + **/ +static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) +{ + struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg; + struct virtchnl_l4_spec mask = vcf->mask.tcp_spec; + struct virtchnl_l4_spec tcf = vcf->data.tcp_spec; + struct i40e_cloud_filter *cfilter = NULL; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + i40e_status aq_ret = 0; + int i, ret; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (!vf->adq_enabled) { + dev_info(&pf->pdev->dev, + "VF %d: ADq is not enabled, can't apply cloud filter\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (i40e_validate_cloud_filter(vf, vcf)) { + dev_info(&pf->pdev->dev, + "VF %d: Invalid input/s, can't apply cloud filter\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL); + if (!cfilter) + return -ENOMEM; + + /* parse destination mac address */ + for (i = 0; i < ETH_ALEN; i++) + cfilter->dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i]; + + /* parse source mac address */ + for (i = 0; i < ETH_ALEN; i++) + cfilter->src_mac[i] = mask.src_mac[i] & tcf.src_mac[i]; + + cfilter->vlan_id = mask.vlan_id & tcf.vlan_id; + cfilter->dst_port = mask.dst_port & tcf.dst_port; + cfilter->src_port = mask.src_port & tcf.src_port; + + switch (vcf->flow_type) { + case VIRTCHNL_TCP_V4_FLOW: + cfilter->n_proto = ETH_P_IP; + if (mask.dst_ip[0] & tcf.dst_ip[0]) + memcpy(&cfilter->ip.v4.dst_ip, tcf.dst_ip, + ARRAY_SIZE(tcf.dst_ip)); + else if (mask.src_ip[0] & tcf.dst_ip[0]) + memcpy(&cfilter->ip.v4.src_ip, tcf.src_ip, + ARRAY_SIZE(tcf.dst_ip)); + break; + case VIRTCHNL_TCP_V6_FLOW: + cfilter->n_proto = ETH_P_IPV6; + if (mask.dst_ip[3] & tcf.dst_ip[3]) + memcpy(&cfilter->ip.v6.dst_ip6, tcf.dst_ip, + sizeof(cfilter->ip.v6.dst_ip6)); + if (mask.src_ip[3] & tcf.src_ip[3]) + memcpy(&cfilter->ip.v6.src_ip6, tcf.src_ip, + sizeof(cfilter->ip.v6.src_ip6)); + break; + default: + /* TC filter can be configured based on different combinations + * and in this case IP is not a part of filter config + */ + dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n", + vf->vf_id); + } + + /* get the VSI to which the TC belongs to */ + vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx]; + cfilter->seid = vsi->seid; + cfilter->flags = vcf->field_flags; + + /* Adding cloud filter programmed as TC filter */ + if (tcf.dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true); + else + ret = i40e_add_del_cloud_filter(vsi, cfilter, true); + if (ret) { + dev_err(&pf->pdev->dev, + "VF %d: Failed to add cloud filter, err %s aq_err %s\n", + vf->vf_id, i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + goto err; + } + + INIT_HLIST_NODE(&cfilter->cloud_node); + hlist_add_head(&cfilter->cloud_node, &vf->cloud_filter_list); + vf->num_cloud_filters++; +err: + return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_CLOUD_FILTER, + aq_ret); +} + +/** + * i40e_vc_add_qch_msg: Add queue channel and enable ADq + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + **/ +static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) +{ + struct virtchnl_tc_info *tci = + (struct virtchnl_tc_info *)msg; + struct i40e_pf *pf = vf->pf; + struct i40e_link_status *ls = &pf->hw.phy.link_info; + int i, adq_request_qps = 0, speed = 0; + i40e_status aq_ret = 0; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* ADq cannot be applied if spoof check is ON */ + if (vf->spoofchk) { + dev_err(&pf->pdev->dev, + "Spoof check is ON, turn it OFF to enable ADq\n"); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)) { + dev_err(&pf->pdev->dev, + "VF %d attempting to enable ADq, but hasn't properly negotiated that capability\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* max number of traffic classes for VF currently capped at 4 */ + if (!tci->num_tc || tci->num_tc > I40E_MAX_VF_VSI) { + dev_err(&pf->pdev->dev, + "VF %d trying to set %u TCs, valid range 1-4 TCs per VF\n", + vf->vf_id, tci->num_tc); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* validate queues for each TC */ + for (i = 0; i < tci->num_tc; i++) + if (!tci->list[i].count || + tci->list[i].count > I40E_DEFAULT_QUEUES_PER_VF) { + dev_err(&pf->pdev->dev, + "VF %d: TC %d trying to set %u queues, valid range 1-4 queues per TC\n", + vf->vf_id, i, tci->list[i].count); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* need Max VF queues but already have default number of queues */ + adq_request_qps = I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF; + + if (pf->queues_left < adq_request_qps) { + dev_err(&pf->pdev->dev, + "No queues left to allocate to VF %d\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } else { + /* we need to allocate max VF queues to enable ADq so as to + * make sure ADq enabled VF always gets back queues when it + * goes through a reset. + */ + vf->num_queue_pairs = I40E_MAX_VF_QUEUES; + } + + /* get link speed in MB to validate rate limit */ + switch (ls->link_speed) { + case VIRTCHNL_LINK_SPEED_100MB: + speed = SPEED_100; + break; + case VIRTCHNL_LINK_SPEED_1GB: + speed = SPEED_1000; + break; + case VIRTCHNL_LINK_SPEED_10GB: + speed = SPEED_10000; + break; + case VIRTCHNL_LINK_SPEED_20GB: + speed = SPEED_20000; + break; + case VIRTCHNL_LINK_SPEED_25GB: + speed = SPEED_25000; + break; + case VIRTCHNL_LINK_SPEED_40GB: + speed = SPEED_40000; + break; + default: + dev_err(&pf->pdev->dev, + "Cannot detect link speed\n"); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* parse data from the queue channel info */ + vf->num_tc = tci->num_tc; + for (i = 0; i < vf->num_tc; i++) { + if (tci->list[i].max_tx_rate) { + if (tci->list[i].max_tx_rate > speed) { + dev_err(&pf->pdev->dev, + "Invalid max tx rate %llu specified for VF %d.", + tci->list[i].max_tx_rate, + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } else { + vf->ch[i].max_tx_rate = + tci->list[i].max_tx_rate; + } + } + vf->ch[i].num_qps = tci->list[i].count; + } + + /* set this flag only after making sure all inputs are sane */ + vf->adq_enabled = true; + /* num_req_queues is set when user changes number of queues via ethtool + * and this causes issue for default VSI(which depends on this variable) + * when ADq is enabled, hence reset it. + */ + vf->num_req_queues = 0; + + /* reset the VF in order to allocate resources */ + i40e_vc_notify_vf_reset(vf); + i40e_reset_vf(vf, false); + + return I40E_SUCCESS; + + /* send the response to the VF */ +err: + return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_CHANNELS, + aq_ret); +} + +/** + * i40e_vc_del_qch_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + **/ +static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg) +{ + struct i40e_pf *pf = vf->pf; + i40e_status aq_ret = 0; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (vf->adq_enabled) { + i40e_del_all_cloud_filters(vf); + i40e_del_qch(vf); + vf->adq_enabled = false; + vf->num_tc = 0; + dev_info(&pf->pdev->dev, + "Deleting Queue Channels and cloud filters for ADq on VF %d\n", + vf->vf_id); + } else { + dev_info(&pf->pdev->dev, "VF %d trying to delete queue channels but ADq isn't enabled\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + } + + /* reset the VF in order to allocate resources */ + i40e_vc_notify_vf_reset(vf); + i40e_reset_vf(vf, false); + + return I40E_SUCCESS; + +err: + return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_CHANNELS, + aq_ret); +} + +/** * i40e_vc_process_vf_msg * @pf: pointer to the PF structure * @vf_id: source VF id @@ -2816,7 +3657,18 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, case VIRTCHNL_OP_REQUEST_QUEUES: ret = i40e_vc_request_queues_msg(vf, msg, msglen); break; - + case VIRTCHNL_OP_ENABLE_CHANNELS: + ret = i40e_vc_add_qch_msg(vf, msg); + break; + case VIRTCHNL_OP_DISABLE_CHANNELS: + ret = i40e_vc_del_qch_msg(vf, msg); + break; + case VIRTCHNL_OP_ADD_CLOUD_FILTER: + ret = i40e_vc_add_cloud_filter(vf, msg); + break; + case VIRTCHNL_OP_DEL_CLOUD_FILTER: + ret = i40e_vc_del_cloud_filter(vf, msg); + break; case VIRTCHNL_OP_UNKNOWN: default: dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n", @@ -3382,6 +4234,16 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) i40e_vc_disable_vf(vf); dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", vf_id, setting ? "" : "un"); + + if (vf->adq_enabled) { + if (!vf->trusted) { + dev_info(&pf->pdev->dev, + "VF %u no longer Trusted, deleting all cloud filters\n", + vf_id); + i40e_del_all_cloud_filters(vf); + } + } + out: return ret; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 5efc4f92bb37..6852599b2379 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -69,6 +69,19 @@ enum i40e_vf_capabilities { I40E_VIRTCHNL_VF_CAP_IWARP, }; +/* In ADq, max 4 VSI's can be allocated per VF including primary VF VSI. + * These variables are used to store indices, id's and number of queues + * for each VSI including that of primary VF VSI. Each Traffic class is + * termed as channel and each channel can in-turn have 4 queues which + * means max 16 queues overall per VF. + */ +struct i40evf_channel { + u16 vsi_idx; /* index in PF struct for all channel VSIs */ + u16 vsi_id; /* VSI ID used by firmware */ + u16 num_qps; /* number of queue pairs requested by user */ + u64 max_tx_rate; /* bandwidth rate allocation for VSIs */ +}; + /* VF information structure */ struct i40e_vf { struct i40e_pf *pf; @@ -111,6 +124,13 @@ struct i40e_vf { u16 num_mac; u16 num_vlan; + /* ADq related variables */ + bool adq_enabled; /* flag to enable adq */ + u8 num_tc; + struct i40evf_channel ch[I40E_MAX_VF_VSI]; + struct hlist_head cloud_filter_list; + u16 num_cloud_filters; + /* RDMA Client */ struct virtchnl_iwarp_qvlist_info *qvlist_info; }; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 357d6051281f..eb8f3e327f6b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -392,99 +392,241 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) val); } +static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) +{ + return &q_vector->rx == rc; +} + +static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector) +{ + unsigned int divisor; + + switch (q_vector->adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024; + break; + case I40E_LINK_SPEED_25GB: + case I40E_LINK_SPEED_20GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512; + break; + default: + case I40E_LINK_SPEED_10GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256; + break; + case I40E_LINK_SPEED_1GB: + case I40E_LINK_SPEED_100MB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32; + break; + } + + return divisor; +} + /** - * i40e_set_new_dynamic_itr - Find new ITR level + * i40e_update_itr - update the dynamic ITR value based on statistics + * @q_vector: structure containing interrupt and ring information * @rc: structure containing ring performance data * - * Returns true if ITR changed, false if not - * - * Stores a new ITR value based on packets and byte counts during - * the last interrupt. The advantage of per interrupt computation - * is faster updates and more accurate ITR for the current traffic - * pattern. Constants in this function were computed based on - * theoretical maximum wire speed and thresholds were set based on - * testing data as well as attempting to minimize response time + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time * while increasing bulk throughput. **/ -static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) +static void i40e_update_itr(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) { - enum i40e_latency_range new_latency_range = rc->latency_range; - u32 new_itr = rc->itr; - int bytes_per_usec; - unsigned int usecs, estimated_usecs; + unsigned int avg_wire_size, packets, bytes, itr; + unsigned long next_update = jiffies; - if (rc->total_packets == 0 || !rc->itr) - return false; + /* If we don't have any rings just leave ourselves set for maximum + * possible latency so we take ourselves out of the equation. + */ + if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting)) + return; + + /* For Rx we want to push the delay up and default to low latency. + * for Tx we want to pull the delay down and default to high latency. + */ + itr = i40e_container_is_rx(q_vector, rc) ? + I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY : + I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY; + + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. + */ + if (time_after(next_update, rc->next_update)) + goto clear_counts; + + /* If itr_countdown is set it means we programmed an ITR within + * the last 4 interrupt cycles. This has a side effect of us + * potentially firing an early interrupt. In order to work around + * this we need to throw out any data received for a few + * interrupts following the update. + */ + if (q_vector->itr_countdown) { + itr = rc->target_itr; + goto clear_counts; + } + + packets = rc->total_packets; + bytes = rc->total_bytes; - usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_usec = rc->total_bytes / usecs; + if (i40e_container_is_rx(q_vector, rc)) { + /* If Rx there are 1 to 4 packets and bytes are less than + * 9000 assume insufficient data to use bulk rate limiting + * approach unless Tx is already in bulk rate limiting. We + * are likely latency driven. + */ + if (packets && packets < 4 && bytes < 9000 && + (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) { + itr = I40E_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + } else if (packets < 4) { + /* If we have Tx and Rx ITR maxed and Tx ITR is running in + * bulk mode and we are receiving 4 or fewer packets just + * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so + * that the Rx can relax. + */ + if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS && + (q_vector->rx.target_itr & I40E_ITR_MASK) == + I40E_ITR_ADAPTIVE_MAX_USECS) + goto clear_counts; + } else if (packets > 32) { + /* If we have processed over 32 packets in a single interrupt + * for Tx assume we need to switch over to "bulk" mode. + */ + rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY; + } - /* The calculations in this algorithm depend on interrupts actually - * firing at the ITR rate. This may not happen if the packet rate is - * really low, or if we've been napi polling. Check to make sure - * that's not the case before we continue. + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * Between 4 and 56 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. */ - estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update); - if (estimated_usecs > usecs) { - new_latency_range = I40E_LOW_LATENCY; - goto reset_latency; + if (packets < 56) { + itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC; + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; + } + goto clear_counts; + } + + if (packets <= 256) { + itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); + itr &= I40E_ITR_MASK; + + /* Between 56 and 112 is our "goldilocks" zone where we are + * working out "just right". Just report that our current + * ITR is good for us. + */ + if (packets <= 112) + goto clear_counts; + + /* If packet count is 128 or greater we are likely looking + * at a slight overrun of the delay we want. Try halving + * our delay to see if that will cut the number of packets + * in half per interrupt. + */ + itr /= 2; + itr &= I40E_ITR_MASK; + if (itr < I40E_ITR_ADAPTIVE_MIN_USECS) + itr = I40E_ITR_ADAPTIVE_MIN_USECS; + + goto clear_counts; } - /* simple throttlerate management - * 0-10MB/s lowest (50000 ints/s) - * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (18000 ints/s) + /* The paths below assume we are dealing with a bulk ITR since + * number of packets is greater than 256. We are just going to have + * to compute a value and try to bring the count under control, + * though for smaller packet sizes there isn't much we can do as + * NAPI polling will likely be kicking in sooner rather than later. + */ + itr = I40E_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * give the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value * - * The math works out because the divisor is in 10^(-6) which - * turns the bytes/us input value into MB/s values, but - * make sure to use usecs, as the register values written - * are in 2 usec increments in the ITR registers, and make sure - * to use the smoothed values that the countdown timer gives us. + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. */ - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - if (bytes_per_usec > 10) - new_latency_range = I40E_LOW_LATENCY; - break; - case I40E_LOW_LATENCY: - if (bytes_per_usec > 20) - new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_usec <= 10) - new_latency_range = I40E_LOWEST_LATENCY; - break; - case I40E_BULK_LATENCY: - default: - if (bytes_per_usec <= 20) - new_latency_range = I40E_LOW_LATENCY; - break; + if (avg_wire_size <= 60) { + /* Start at 250k ints/sec */ + avg_wire_size = 4096; + } else if (avg_wire_size <= 380) { + /* 250K ints/sec to 60K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 1696; + } else if (avg_wire_size <= 1084) { + /* 60K ints/sec to 36K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1980) { + /* 36K ints/sec to 30K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 30K ints/sec */ + avg_wire_size = 32256; } -reset_latency: - rc->latency_range = new_latency_range; + /* If we are in low latency mode halve our delay which doubles the + * rate to somewhere between 100K to 16K ints/sec + */ + if (itr & I40E_ITR_ADAPTIVE_LATENCY) + avg_wire_size /= 2; - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - new_itr = I40E_ITR_50K; - break; - case I40E_LOW_LATENCY: - new_itr = I40E_ITR_20K; - break; - case I40E_BULK_LATENCY: - new_itr = I40E_ITR_18K; - break; - default: - break; + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) * + I40E_ITR_ADAPTIVE_MIN_INC; + + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; } +clear_counts: + /* write back value */ + rc->target_itr = itr; + + /* next update should occur within next jiffy */ + rc->next_update = next_update + 1; + rc->total_bytes = 0; rc->total_packets = 0; - rc->last_itr_update = jiffies; - - if (new_itr != rc->itr) { - rc->itr = new_itr; - return true; - } - return false; } /** @@ -1273,7 +1415,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, * @rx_buffer: rx buffer to pull data from * * This function will clean up the contents of the rx_buffer. It will - * either recycle the bufer or unmap it and free the associated resources. + * either recycle the buffer or unmap it and free the associated resources. */ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer) @@ -1457,33 +1599,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) return failure ? budget : (int)total_rx_packets; } -static u32 i40e_buildreg_itr(const int type, const u16 itr) +static inline u32 i40e_buildreg_itr(const int type, u16 itr) { u32 val; + /* We don't bother with setting the CLEARPBA bit as the data sheet + * points out doing so is "meaningless since it was already + * auto-cleared". The auto-clearing happens when the interrupt is + * asserted. + * + * Hardware errata 28 for also indicates that writing to a + * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear + * an event in the PBA anyway so we need to rely on the automask + * to hold pending events for us until the interrupt is re-enabled + * + * The itr value is reported in microseconds, and the register + * value is recorded in 2 microsecond units. For this reason we + * only need to shift by the interval shift - 1 instead of the + * full value. + */ + itr &= I40E_ITR_MASK; + val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | - (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); + (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1)); return val; } /* a small macro to shorten up some long lines */ #define INTREG I40E_VFINT_DYN_CTLN1 -static inline int get_rx_itr(struct i40e_vsi *vsi, int idx) -{ - struct i40evf_adapter *adapter = vsi->back; - return adapter->rx_rings[idx].rx_itr_setting; -} - -static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) -{ - struct i40evf_adapter *adapter = vsi->back; - - return adapter->tx_rings[idx].tx_itr_setting; -} +/* The act of updating the ITR will cause it to immediately trigger. In order + * to prevent this from throwing off adaptive update statistics we defer the + * update so that it can only happen so often. So after either Tx or Rx are + * updated we make the adaptive scheme wait until either the ITR completely + * expires via the next_update expiration or we have been through at least + * 3 interrupts. + */ +#define ITR_COUNTDOWN_START 3 /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -1495,70 +1649,51 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { struct i40e_hw *hw = &vsi->back->hw; - bool rx = false, tx = false; - u32 rxval, txval; - int idx = q_vector->v_idx; - int rx_itr_setting, tx_itr_setting; - - /* avoid dynamic calculation if in countdown mode OR if - * all dynamic is disabled - */ - rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); - - rx_itr_setting = get_rx_itr(vsi, idx); - tx_itr_setting = get_tx_itr(vsi, idx); + u32 intval; - if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(rx_itr_setting) && - !ITR_IS_DYNAMIC(tx_itr_setting))) { - goto enable_int; - } - - if (ITR_IS_DYNAMIC(rx_itr_setting)) { - rx = i40e_set_new_dynamic_itr(&q_vector->rx); - rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); - } + /* These will do nothing if dynamic updates are not enabled */ + i40e_update_itr(q_vector, &q_vector->tx); + i40e_update_itr(q_vector, &q_vector->rx); - if (ITR_IS_DYNAMIC(tx_itr_setting)) { - tx = i40e_set_new_dynamic_itr(&q_vector->tx); - txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); - } - - if (rx || tx) { - /* get the higher of the two ITR adjustments and - * use the same value for both ITR registers - * when in adaptive mode (Rx and/or Tx) - */ - u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); - - q_vector->tx.itr = q_vector->rx.itr = itr; - txval = i40e_buildreg_itr(I40E_TX_ITR, itr); - tx = true; - rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); - rx = true; - } - - /* only need to enable the interrupt once, but need - * to possibly update both ITR values + /* This block of logic allows us to get away with only updating + * one ITR value with each interrupt. The idea is to perform a + * pseudo-lazy update with the following criteria. + * + * 1. Rx is given higher priority than Tx if both are in same state + * 2. If we must reduce an ITR that is given highest priority. + * 3. We then give priority to increasing ITR based on amount. */ - if (rx) { - /* set the INTENA_MSK_MASK so that this first write - * won't actually enable the interrupt, instead just - * updating the ITR (it's bit 31 PF and VF) + if (q_vector->rx.target_itr < q_vector->rx.current_itr) { + /* Rx ITR needs to be reduced, this is highest priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || + ((q_vector->rx.target_itr - q_vector->rx.current_itr) < + (q_vector->tx.target_itr - q_vector->tx.current_itr))) { + /* Tx ITR needs to be reduced, this is second priority + * Tx ITR needs to be increased more than Rx, fourth priority */ - rxval |= BIT(31); - /* don't check _DOWN because interrupt isn't being enabled */ - wr32(hw, INTREG(q_vector->reg_idx), rxval); + intval = i40e_buildreg_itr(I40E_TX_ITR, + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { + /* Rx ITR needs to be increased, third priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else { + /* No ITR update, lowest priority */ + intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + if (q_vector->itr_countdown) + q_vector->itr_countdown--; } -enable_int: if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), txval); - - if (q_vector->itr_countdown) - q_vector->itr_countdown--; - else - q_vector->itr_countdown = ITR_COUNTDOWN_START; + wr32(hw, INTREG(q_vector->reg_idx), intval); } /** diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 7798a6645c3f..9129447d079b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -28,31 +28,35 @@ #define _I40E_TXRX_H_ /* Interrupt Throttling and Rate Limiting Goodies */ - -#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */ -#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */ -#define I40E_ITR_100K 0x0005 -#define I40E_ITR_50K 0x000A -#define I40E_ITR_20K 0x0019 -#define I40E_ITR_18K 0x001B -#define I40E_ITR_8K 0x003E -#define I40E_ITR_4K 0x007A -#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ -#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ -#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ #define I40E_DEFAULT_IRQ_WORK 256 -#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1) -#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC)) -#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1) + +/* The datasheet for the X710 and XL710 indicate that the maximum value for + * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec + * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing + * the register value which is divided by 2 lets use the actual values and + * avoid an excessive amount of translation. + */ +#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ +#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */ +#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */ +#define I40E_ITR_100K 10 /* all values below must be even */ +#define I40E_ITR_50K 20 +#define I40E_ITR_20K 50 +#define I40E_ITR_18K 60 +#define I40E_ITR_8K 122 +#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */ +#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC) +#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK) +#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC)) + +#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) +#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) + /* 0x40 is the enable bit for interrupt rate limiting, and must be set if * the value of the rate limit is non-zero */ #define INTRL_ENA BIT(6) +#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) #define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0) #define I40E_INTRL_8K 125 /* 8000 ints/sec */ @@ -362,8 +366,7 @@ struct i40e_ring { * these values always store the USER setting, and must be converted * before programming to a register. */ - u16 rx_itr_setting; - u16 tx_itr_setting; + u16 itr_setting; u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ @@ -425,21 +428,21 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring) ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED; } -enum i40e_latency_range { - I40E_LOWEST_LATENCY = 0, - I40E_LOW_LATENCY = 1, - I40E_BULK_LATENCY = 2, -}; +#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002 +#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002 +#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e +#define I40E_ITR_ADAPTIVE_LATENCY 0x8000 +#define I40E_ITR_ADAPTIVE_BULK 0x0000 +#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY)) struct i40e_ring_container { - /* array of pointers to rings */ - struct i40e_ring *ring; + struct i40e_ring *ring; /* pointer to linked list of ring(s) */ + unsigned long next_update; /* jiffies value of next update */ unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_packets; /* total packets processed this int */ - unsigned long last_itr_update; /* jiffies of last ITR update */ u16 count; - enum i40e_latency_range latency_range; - u16 itr; + u16 target_itr; /* target ITR setting for ring(s) */ + u16 current_itr; /* current ITR setting for ring(s) */ }; /* iterator for handling rings in ring container */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 9690c1ea019e..e46555ad7122 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -52,7 +52,10 @@ #include <linux/socket.h> #include <linux/jiffies.h> #include <net/ip6_checksum.h> +#include <net/pkt_cls.h> #include <net/udp.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> #include "i40e_type.h" #include <linux/avf/virtchnl.h> @@ -106,6 +109,7 @@ struct i40e_vsi { #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4) #define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4) +#define I40EVF_MBPS_DIVISOR 125000 /* divisor to convert to Mbps */ /* MAX_MSIX_Q_VECTORS of these are allocated, * but we only use one per queue-specific vector. @@ -117,9 +121,8 @@ struct i40e_q_vector { struct i40e_ring_container rx; struct i40e_ring_container tx; u32 ring_mask; + u8 itr_countdown; /* when 0 should adjust adaptive ITR */ u8 num_ringpairs; /* total number of ring pairs in vector */ -#define ITR_COUNTDOWN_START 100 - u8 itr_countdown; /* when 0 or 1 update ITR */ u16 v_idx; /* index in the vsi->q_vector array. */ u16 reg_idx; /* register index of the interrupt */ char name[IFNAMSIZ + 15]; @@ -169,6 +172,28 @@ struct i40evf_vlan_filter { bool add; /* filter needs to be added */ }; +#define I40EVF_MAX_TRAFFIC_CLASS 4 +/* State of traffic class creation */ +enum i40evf_tc_state_t { + __I40EVF_TC_INVALID, /* no traffic class, default state */ + __I40EVF_TC_RUNNING, /* traffic classes have been created */ +}; + +/* channel info */ +struct i40evf_channel_config { + struct virtchnl_channel_info ch_info[I40EVF_MAX_TRAFFIC_CLASS]; + enum i40evf_tc_state_t state; + u8 total_qps; +}; + +/* State of cloud filter */ +enum i40evf_cloud_filter_state_t { + __I40EVF_CF_INVALID, /* cloud filter not added */ + __I40EVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */ + __I40EVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */ + __I40EVF_CF_ACTIVE, /* cloud filter is active */ +}; + /* Driver state. The order of these is important! */ enum i40evf_state_t { __I40EVF_STARTUP, /* driver loaded, probe complete */ @@ -190,6 +215,36 @@ enum i40evf_critical_section_t { __I40EVF_IN_REMOVE_TASK, /* device being removed */ }; +#define I40EVF_CLOUD_FIELD_OMAC 0x01 +#define I40EVF_CLOUD_FIELD_IMAC 0x02 +#define I40EVF_CLOUD_FIELD_IVLAN 0x04 +#define I40EVF_CLOUD_FIELD_TEN_ID 0x08 +#define I40EVF_CLOUD_FIELD_IIP 0x10 + +#define I40EVF_CF_FLAGS_OMAC I40EVF_CLOUD_FIELD_OMAC +#define I40EVF_CF_FLAGS_IMAC I40EVF_CLOUD_FIELD_IMAC +#define I40EVF_CF_FLAGS_IMAC_IVLAN (I40EVF_CLOUD_FIELD_IMAC |\ + I40EVF_CLOUD_FIELD_IVLAN) +#define I40EVF_CF_FLAGS_IMAC_TEN_ID (I40EVF_CLOUD_FIELD_IMAC |\ + I40EVF_CLOUD_FIELD_TEN_ID) +#define I40EVF_CF_FLAGS_OMAC_TEN_ID_IMAC (I40EVF_CLOUD_FIELD_OMAC |\ + I40EVF_CLOUD_FIELD_IMAC |\ + I40EVF_CLOUD_FIELD_TEN_ID) +#define I40EVF_CF_FLAGS_IMAC_IVLAN_TEN_ID (I40EVF_CLOUD_FIELD_IMAC |\ + I40EVF_CLOUD_FIELD_IVLAN |\ + I40EVF_CLOUD_FIELD_TEN_ID) +#define I40EVF_CF_FLAGS_IIP I40E_CLOUD_FIELD_IIP + +/* bookkeeping of cloud filters */ +struct i40evf_cloud_filter { + enum i40evf_cloud_filter_state_t state; + struct list_head list; + struct virtchnl_filter f; + unsigned long cookie; + bool del; /* filter needs to be deleted */ + bool add; /* filter needs to be added */ +}; + /* board specific private data structure */ struct i40evf_adapter { struct timer_list watchdog_timer; @@ -241,6 +296,7 @@ struct i40evf_adapter { #define I40EVF_FLAG_ALLMULTI_ON BIT(14) #define I40EVF_FLAG_LEGACY_RX BIT(15) #define I40EVF_FLAG_REINIT_ITR_NEEDED BIT(16) +#define I40EVF_FLAG_QUEUES_DISABLED BIT(17) /* duplicates for common code */ #define I40E_FLAG_DCB_ENABLED 0 #define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED @@ -269,6 +325,10 @@ struct i40evf_adapter { #define I40EVF_FLAG_AQ_RELEASE_ALLMULTI BIT(18) #define I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING BIT(19) #define I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING BIT(20) +#define I40EVF_FLAG_AQ_ENABLE_CHANNELS BIT(21) +#define I40EVF_FLAG_AQ_DISABLE_CHANNELS BIT(22) +#define I40EVF_FLAG_AQ_ADD_CLOUD_FILTER BIT(23) +#define I40EVF_FLAG_AQ_DEL_CLOUD_FILTER BIT(24) /* OS defined structs */ struct net_device *netdev; @@ -314,6 +374,13 @@ struct i40evf_adapter { u16 rss_lut_size; u8 *rss_key; u8 *rss_lut; + /* ADQ related members */ + struct i40evf_channel_config ch_config; + u8 num_tc; + struct list_head cloud_filter_list; + /* lock to protest access to the cloud filter list */ + spinlock_t cloud_filter_list_lock; + u16 num_cloud_filters; }; @@ -380,4 +447,8 @@ void i40evf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len); void i40evf_notify_client_l2_params(struct i40e_vsi *vsi); void i40evf_notify_client_open(struct i40e_vsi *vsi); void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset); +void i40evf_enable_channels(struct i40evf_adapter *adapter); +void i40evf_disable_channels(struct i40evf_adapter *adapter); +void i40evf_add_cloud_filter(struct i40evf_adapter *adapter); +void i40evf_del_cloud_filter(struct i40evf_adapter *adapter); #endif /* _I40EVF_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index e2d8aa19d205..e6793255de0b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -457,14 +457,14 @@ static int __i40evf_get_coalesce(struct net_device *netdev, rx_ring = &adapter->rx_rings[queue]; tx_ring = &adapter->tx_rings[queue]; - if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) + if (ITR_IS_DYNAMIC(rx_ring->itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) + if (ITR_IS_DYNAMIC(tx_ring->itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC; return 0; } @@ -502,7 +502,7 @@ static int i40evf_get_per_queue_coalesce(struct net_device *netdev, /** * i40evf_set_itr_per_queue - set ITR values for specific queue - * @vsi: the VSI to set values for + * @adapter: the VF adapter struct to set values for * @ec: coalesce settings from ethtool * @queue: the queue to modify * @@ -514,33 +514,29 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter, { struct i40e_ring *rx_ring = &adapter->rx_rings[queue]; struct i40e_ring *tx_ring = &adapter->tx_rings[queue]; - struct i40e_vsi *vsi = &adapter->vsi; - struct i40e_hw *hw = &adapter->hw; struct i40e_q_vector *q_vector; - u16 vector; - rx_ring->rx_itr_setting = ec->rx_coalesce_usecs; - tx_ring->tx_itr_setting = ec->tx_coalesce_usecs; + rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); + tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); - rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC; + rx_ring->itr_setting |= I40E_ITR_DYNAMIC; if (!ec->use_adaptive_rx_coalesce) - rx_ring->rx_itr_setting ^= I40E_ITR_DYNAMIC; + rx_ring->itr_setting ^= I40E_ITR_DYNAMIC; - tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC; + tx_ring->itr_setting |= I40E_ITR_DYNAMIC; if (!ec->use_adaptive_tx_coalesce) - tx_ring->tx_itr_setting ^= I40E_ITR_DYNAMIC; + tx_ring->itr_setting ^= I40E_ITR_DYNAMIC; q_vector = rx_ring->q_vector; - q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector = tx_ring->q_vector; - q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); - i40e_flush(hw); + /* The interrupt handler itself will take care of programming + * the Tx and Rx ITR values based on the values we have entered + * into the q_vector, no need to write the values now. + */ } /** @@ -565,8 +561,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev, if (ec->rx_coalesce_usecs == 0) { if (ec->use_adaptive_rx_coalesce) netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); - } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || - (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + } else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) || + (ec->rx_coalesce_usecs > I40E_MAX_ITR)) { netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); return -EINVAL; } @@ -575,8 +571,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev, if (ec->tx_coalesce_usecs == 0) { if (ec->use_adaptive_tx_coalesce) netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); - } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || - (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + } else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) || + (ec->tx_coalesce_usecs > I40E_MAX_ITR)) { netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); return -EINVAL; } @@ -699,6 +695,12 @@ static int i40evf_set_channels(struct net_device *netdev, return -EINVAL; } + if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) { + dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n"); + return -EINVAL; + } + /* All of these should have already been checked by ethtool before this * even gets to us, but just to be sure. */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 16989ad2ca90..4955ce3ab6a2 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -353,11 +353,12 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) rx_ring->vsi = &adapter->vsi; q_vector->rx.ring = rx_ring; q_vector->rx.count++; - q_vector->rx.latency_range = I40E_LOW_LATENCY; - q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector->ring_mask |= BIT(r_idx); - q_vector->itr_countdown = ITR_COUNTDOWN_START; - wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr); + wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx), + q_vector->rx.current_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; } /** @@ -378,11 +379,12 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) tx_ring->vsi = &adapter->vsi; q_vector->tx.ring = tx_ring; q_vector->tx.count++; - q_vector->tx.latency_range = I40E_LOW_LATENCY; - q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); - q_vector->itr_countdown = ITR_COUNTDOWN_START; + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); q_vector->num_ringpairs++; - wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr); + wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx), + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; } /** @@ -783,7 +785,7 @@ static int i40evf_vlan_rx_kill_vid(struct net_device *netdev, **/ static struct i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter, - u8 *macaddr) + const u8 *macaddr) { struct i40evf_mac_filter *f; @@ -806,7 +808,7 @@ i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter, **/ static struct i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter, - u8 *macaddr) + const u8 *macaddr) { struct i40evf_mac_filter *f; @@ -878,50 +880,64 @@ static int i40evf_set_mac(struct net_device *netdev, void *p) } /** - * i40evf_set_rx_mode - NDO callback to set the netdev filters - * @netdev: network interface device structure - **/ -static void i40evf_set_rx_mode(struct net_device *netdev) + * i40evf_addr_sync - Callback for dev_(mc|uc)_sync to add address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be added. We call + * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. + */ +static int i40evf_addr_sync(struct net_device *netdev, const u8 *addr) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40evf_mac_filter *f, *ftmp; - struct netdev_hw_addr *uca; - struct netdev_hw_addr *mca; - struct netdev_hw_addr *ha; - - /* add addr if not already in the filter list */ - netdev_for_each_uc_addr(uca, netdev) { - i40evf_add_filter(adapter, uca->addr); - } - netdev_for_each_mc_addr(mca, netdev) { - i40evf_add_filter(adapter, mca->addr); - } - - spin_lock_bh(&adapter->mac_vlan_list_lock); - - list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { - netdev_for_each_mc_addr(mca, netdev) - if (ether_addr_equal(mca->addr, f->macaddr)) - goto bottom_of_search_loop; - netdev_for_each_uc_addr(uca, netdev) - if (ether_addr_equal(uca->addr, f->macaddr)) - goto bottom_of_search_loop; + if (i40evf_add_filter(adapter, addr)) + return 0; + else + return -ENOMEM; +} - for_each_dev_addr(netdev, ha) - if (ether_addr_equal(ha->addr, f->macaddr)) - goto bottom_of_search_loop; +/** + * i40evf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call + * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. + */ +static int i40evf_addr_unsync(struct net_device *netdev, const u8 *addr) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + struct i40evf_mac_filter *f; - if (ether_addr_equal(f->macaddr, adapter->hw.mac.addr)) - goto bottom_of_search_loop; + /* Under some circumstances, we might receive a request to delete + * our own device address from our uc list. Because we store the + * device address in the VSI's MAC/VLAN filter list, we need to ignore + * such requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; - /* f->macaddr wasn't found in uc, mc, or ha list so delete it */ + f = i40evf_find_filter(adapter, addr); + if (f) { f->remove = true; adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER; - -bottom_of_search_loop: - continue; } + return 0; +} + +/** + * i40evf_set_rx_mode - NDO callback to set the netdev filters + * @netdev: network interface device structure + **/ +static void i40evf_set_rx_mode(struct net_device *netdev) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + + spin_lock_bh(&adapter->mac_vlan_list_lock); + __dev_uc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync); + __dev_mc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync); + spin_unlock_bh(&adapter->mac_vlan_list_lock); if (netdev->flags & IFF_PROMISC && !(adapter->flags & I40EVF_FLAG_PROMISC_ON)) @@ -936,8 +952,6 @@ bottom_of_search_loop: else if (!(netdev->flags & IFF_ALLMULTI) && adapter->flags & I40EVF_FLAG_ALLMULTI_ON) adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_ALLMULTI; - - spin_unlock_bh(&adapter->mac_vlan_list_lock); } /** @@ -1025,7 +1039,9 @@ static void i40evf_up_complete(struct i40evf_adapter *adapter) void i40evf_down(struct i40evf_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct i40evf_vlan_filter *vlf; struct i40evf_mac_filter *f; + struct i40evf_cloud_filter *cf; if (adapter->state <= __I40EVF_DOWN_PENDING) return; @@ -1038,17 +1054,29 @@ void i40evf_down(struct i40evf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); + /* clear the sync flag on all filters */ + __dev_uc_unsync(adapter->netdev, NULL); + __dev_mc_unsync(adapter->netdev, NULL); + /* remove all MAC filters */ list_for_each_entry(f, &adapter->mac_filter_list, list) { f->remove = true; } + /* remove all VLAN filters */ - list_for_each_entry(f, &adapter->vlan_filter_list, list) { - f->remove = true; + list_for_each_entry(vlf, &adapter->vlan_filter_list, list) { + vlf->remove = true; } spin_unlock_bh(&adapter->mac_vlan_list_lock); + /* remove all cloud filters */ + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + cf->del = true; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) && adapter->state != __I40EVF_RESETTING) { /* cancel any current operation */ @@ -1059,6 +1087,7 @@ void i40evf_down(struct i40evf_adapter *adapter) */ adapter->aq_required = I40EVF_FLAG_AQ_DEL_MAC_FILTER; adapter->aq_required |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER; + adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER; adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES; } @@ -1144,6 +1173,9 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) */ if (adapter->num_req_queues) num_active_queues = adapter->num_req_queues; + else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) + num_active_queues = adapter->ch_config.total_qps; else num_active_queues = min_t(int, adapter->vsi_res->num_queue_pairs, @@ -1169,7 +1201,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) tx_ring->netdev = adapter->netdev; tx_ring->dev = &adapter->pdev->dev; tx_ring->count = adapter->tx_desc_count; - tx_ring->tx_itr_setting = I40E_ITR_TX_DEF; + tx_ring->itr_setting = I40E_ITR_TX_DEF; if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; @@ -1178,7 +1210,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) rx_ring->netdev = adapter->netdev; rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; - rx_ring->rx_itr_setting = I40E_ITR_RX_DEF; + rx_ring->itr_setting = I40E_ITR_RX_DEF; } adapter->num_active_queues = num_active_queues; @@ -1471,6 +1503,16 @@ int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter) goto err_alloc_q_vectors; } + /* If we've made it so far while ADq flag being ON, then we haven't + * bailed out anywhere in middle. And ADq isn't just enabled but actual + * resources have been allocated in the reset path. + * Now we can truly claim that ADq is enabled. + */ + if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) + dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created", + adapter->num_tc); + dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u", (adapter->num_active_queues > 1) ? "Enabled" : "Disabled", adapter->num_active_queues); @@ -1712,6 +1754,27 @@ static void i40evf_watchdog_task(struct work_struct *work) i40evf_set_promiscuous(adapter, 0); goto watchdog_done; } + + if (adapter->aq_required & I40EVF_FLAG_AQ_ENABLE_CHANNELS) { + i40evf_enable_channels(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & I40EVF_FLAG_AQ_DISABLE_CHANNELS) { + i40evf_disable_channels(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & I40EVF_FLAG_AQ_ADD_CLOUD_FILTER) { + i40evf_add_cloud_filter(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & I40EVF_FLAG_AQ_DEL_CLOUD_FILTER) { + i40evf_del_cloud_filter(adapter); + goto watchdog_done; + } + schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5)); if (adapter->state == __I40EVF_RUNNING) @@ -1735,6 +1798,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter) { struct i40evf_mac_filter *f, *ftmp; struct i40evf_vlan_filter *fv, *fvtmp; + struct i40evf_cloud_filter *cf, *cftmp; adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED; @@ -1756,7 +1820,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); - /* Delete all of the filters, both MAC and VLAN. */ + /* Delete all of the filters */ list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { list_del(&f->list); kfree(f); @@ -1769,6 +1833,14 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter) spin_unlock_bh(&adapter->mac_vlan_list_lock); + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + i40evf_free_misc_irq(adapter); i40evf_reset_interrupt_capability(adapter); i40evf_free_queues(adapter); @@ -1798,9 +1870,11 @@ static void i40evf_reset_task(struct work_struct *work) struct i40evf_adapter *adapter = container_of(work, struct i40evf_adapter, reset_task); + struct virtchnl_vf_resource *vfres = adapter->vf_res; struct net_device *netdev = adapter->netdev; struct i40e_hw *hw = &adapter->hw; struct i40evf_vlan_filter *vlf; + struct i40evf_cloud_filter *cf; struct i40evf_mac_filter *f; u32 reg_val; int i = 0, err; @@ -1893,6 +1967,7 @@ continue_reset: i40evf_free_all_rx_resources(adapter); i40evf_free_all_tx_resources(adapter); + adapter->flags |= I40EVF_FLAG_QUEUES_DISABLED; /* kill and reinit the admin queue */ i40evf_shutdown_adminq(hw); adapter->current_op = VIRTCHNL_OP_UNKNOWN; @@ -1924,8 +1999,19 @@ continue_reset: spin_unlock_bh(&adapter->mac_vlan_list_lock); + /* check if TCs are running and re-add all cloud filters */ + spin_lock_bh(&adapter->cloud_filter_list_lock); + if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) { + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + cf->add = true; + } + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER; adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER; + adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER; i40evf_misc_irq_enable(adapter); mod_timer(&adapter->watchdog_timer, jiffies + 2); @@ -2191,6 +2277,724 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter) } /** + * i40evf_validate_tx_bandwidth - validate the max Tx bandwidth + * @adapter: board private structure + * @max_tx_rate: max Tx bw for a tc + **/ +static int i40evf_validate_tx_bandwidth(struct i40evf_adapter *adapter, + u64 max_tx_rate) +{ + int speed = 0, ret = 0; + + switch (adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + speed = 40000; + break; + case I40E_LINK_SPEED_25GB: + speed = 25000; + break; + case I40E_LINK_SPEED_20GB: + speed = 20000; + break; + case I40E_LINK_SPEED_10GB: + speed = 10000; + break; + case I40E_LINK_SPEED_1GB: + speed = 1000; + break; + case I40E_LINK_SPEED_100MB: + speed = 100; + break; + default: + break; + } + + if (max_tx_rate > speed) { + dev_err(&adapter->pdev->dev, + "Invalid tx rate specified\n"); + ret = -EINVAL; + } + + return ret; +} + +/** + * i40evf_validate_channel_config - validate queue mapping info + * @adapter: board private structure + * @mqprio_qopt: queue parameters + * + * This function validates if the config provided by the user to + * configure queue channels is valid or not. Returns 0 on a valid + * config. + **/ +static int i40evf_validate_ch_config(struct i40evf_adapter *adapter, + struct tc_mqprio_qopt_offload *mqprio_qopt) +{ + u64 total_max_rate = 0; + int i, num_qps = 0; + u64 tx_rate = 0; + int ret = 0; + + if (mqprio_qopt->qopt.num_tc > I40EVF_MAX_TRAFFIC_CLASS || + mqprio_qopt->qopt.num_tc < 1) + return -EINVAL; + + for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) { + if (!mqprio_qopt->qopt.count[i] || + mqprio_qopt->qopt.offset[i] != num_qps) + return -EINVAL; + if (mqprio_qopt->min_rate[i]) { + dev_err(&adapter->pdev->dev, + "Invalid min tx rate (greater than 0) specified\n"); + return -EINVAL; + } + /*convert to Mbps */ + tx_rate = div_u64(mqprio_qopt->max_rate[i], + I40EVF_MBPS_DIVISOR); + total_max_rate += tx_rate; + num_qps += mqprio_qopt->qopt.count[i]; + } + if (num_qps > MAX_QUEUES) + return -EINVAL; + + ret = i40evf_validate_tx_bandwidth(adapter, total_max_rate); + return ret; +} + +/** + * i40evf_del_all_cloud_filters - delete all cloud filters + * on the traffic classes + **/ +static void i40evf_del_all_cloud_filters(struct i40evf_adapter *adapter) +{ + struct i40evf_cloud_filter *cf, *cftmp; + + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, + list) { + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); +} + +/** + * __i40evf_setup_tc - configure multiple traffic classes + * @netdev: network interface device structure + * @type_date: tc offload data + * + * This function processes the config information provided by the + * user to configure traffic classes/queue channels and packages the + * information to request the PF to setup traffic classes. + * + * Returns 0 on success. + **/ +static int __i40evf_setup_tc(struct net_device *netdev, void *type_data) +{ + struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; + struct i40evf_adapter *adapter = netdev_priv(netdev); + struct virtchnl_vf_resource *vfres = adapter->vf_res; + u8 num_tc = 0, total_qps = 0; + int ret = 0, netdev_tc = 0; + u64 max_tx_rate; + u16 mode; + int i; + + num_tc = mqprio_qopt->qopt.num_tc; + mode = mqprio_qopt->mode; + + /* delete queue_channel */ + if (!mqprio_qopt->qopt.hw) { + if (adapter->ch_config.state == __I40EVF_TC_RUNNING) { + /* reset the tc configuration */ + netdev_reset_tc(netdev); + adapter->num_tc = 0; + netif_tx_stop_all_queues(netdev); + netif_tx_disable(netdev); + i40evf_del_all_cloud_filters(adapter); + adapter->aq_required = I40EVF_FLAG_AQ_DISABLE_CHANNELS; + goto exit; + } else { + return -EINVAL; + } + } + + /* add queue channel */ + if (mode == TC_MQPRIO_MODE_CHANNEL) { + if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) { + dev_err(&adapter->pdev->dev, "ADq not supported\n"); + return -EOPNOTSUPP; + } + if (adapter->ch_config.state != __I40EVF_TC_INVALID) { + dev_err(&adapter->pdev->dev, "TC configuration already exists\n"); + return -EINVAL; + } + + ret = i40evf_validate_ch_config(adapter, mqprio_qopt); + if (ret) + return ret; + /* Return if same TC config is requested */ + if (adapter->num_tc == num_tc) + return 0; + adapter->num_tc = num_tc; + + for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) { + if (i < num_tc) { + adapter->ch_config.ch_info[i].count = + mqprio_qopt->qopt.count[i]; + adapter->ch_config.ch_info[i].offset = + mqprio_qopt->qopt.offset[i]; + total_qps += mqprio_qopt->qopt.count[i]; + max_tx_rate = mqprio_qopt->max_rate[i]; + /* convert to Mbps */ + max_tx_rate = div_u64(max_tx_rate, + I40EVF_MBPS_DIVISOR); + adapter->ch_config.ch_info[i].max_tx_rate = + max_tx_rate; + } else { + adapter->ch_config.ch_info[i].count = 1; + adapter->ch_config.ch_info[i].offset = 0; + } + } + adapter->ch_config.total_qps = total_qps; + netif_tx_stop_all_queues(netdev); + netif_tx_disable(netdev); + adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_CHANNELS; + netdev_reset_tc(netdev); + /* Report the tc mapping up the stack */ + netdev_set_num_tc(adapter->netdev, num_tc); + for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) { + u16 qcount = mqprio_qopt->qopt.count[i]; + u16 qoffset = mqprio_qopt->qopt.offset[i]; + + if (i < num_tc) + netdev_set_tc_queue(netdev, netdev_tc++, qcount, + qoffset); + } + } +exit: + return ret; +} + +/** + * i40evf_parse_cls_flower - Parse tc flower filters provided by kernel + * @adapter: board private structure + * @cls_flower: pointer to struct tc_cls_flower_offload + * @filter: pointer to cloud filter structure + */ +static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter, + struct tc_cls_flower_offload *f, + struct i40evf_cloud_filter *filter) +{ + u16 n_proto_mask = 0; + u16 n_proto_key = 0; + u8 field_flags = 0; + u16 addr_type = 0; + u16 n_proto = 0; + int i = 0; + + if (f->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) { + dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n", + f->dissector->used_keys); + return -EOPNOTSUPP; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->mask); + + if (mask->keyid != 0) + field_flags |= I40EVF_CLOUD_FIELD_TEN_ID; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->key); + + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->mask); + n_proto_key = ntohs(key->n_proto); + n_proto_mask = ntohs(mask->n_proto); + + if (n_proto_key == ETH_P_ALL) { + n_proto_key = 0; + n_proto_mask = 0; + } + n_proto = n_proto_key & n_proto_mask; + if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) + return -EINVAL; + if (n_proto == ETH_P_IPV6) { + /* specify flow type as TCP IPv6 */ + filter->f.flow_type = VIRTCHNL_TCP_V6_FLOW; + } + + if (key->ip_proto != IPPROTO_TCP) { + dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n"); + return -EINVAL; + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_dissector_key_eth_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->key); + + struct flow_dissector_key_eth_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->mask); + /* use is_broadcast and is_zero to check for all 0xf or 0 */ + if (!is_zero_ether_addr(mask->dst)) { + if (is_broadcast_ether_addr(mask->dst)) { + field_flags |= I40EVF_CLOUD_FIELD_OMAC; + } else { + dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n", + mask->dst); + return I40E_ERR_CONFIG; + } + } + + if (!is_zero_ether_addr(mask->src)) { + if (is_broadcast_ether_addr(mask->src)) { + field_flags |= I40EVF_CLOUD_FIELD_IMAC; + } else { + dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n", + mask->src); + return I40E_ERR_CONFIG; + } + } + + if (!is_zero_ether_addr(key->dst)) + if (is_valid_ether_addr(key->dst) || + is_multicast_ether_addr(key->dst)) { + /* set the mask if a valid dst_mac address */ + for (i = 0; i < ETH_ALEN; i++) + filter->f.mask.tcp_spec.dst_mac[i] |= + 0xff; + ether_addr_copy(filter->f.data.tcp_spec.dst_mac, + key->dst); + } + + if (!is_zero_ether_addr(key->src)) + if (is_valid_ether_addr(key->src) || + is_multicast_ether_addr(key->src)) { + /* set the mask if a valid dst_mac address */ + for (i = 0; i < ETH_ALEN; i++) + filter->f.mask.tcp_spec.src_mac[i] |= + 0xff; + ether_addr_copy(filter->f.data.tcp_spec.src_mac, + key->src); + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + + if (mask->vlan_id) { + if (mask->vlan_id == VLAN_VID_MASK) { + field_flags |= I40EVF_CLOUD_FIELD_IVLAN; + } else { + dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n", + mask->vlan_id); + return I40E_ERR_CONFIG; + } + } + filter->f.mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff); + filter->f.data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_CONTROL, + f->key); + + addr_type = key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->mask); + + if (mask->dst) { + if (mask->dst == cpu_to_be32(0xffffffff)) { + field_flags |= I40EVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n", + be32_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + + if (mask->src) { + if (mask->src == cpu_to_be32(0xffffffff)) { + field_flags |= I40EVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n", + be32_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + + if (field_flags & I40EVF_CLOUD_FIELD_TEN_ID) { + dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n"); + return I40E_ERR_CONFIG; + } + if (key->dst) { + filter->f.mask.tcp_spec.dst_ip[0] |= + cpu_to_be32(0xffffffff); + filter->f.data.tcp_spec.dst_ip[0] = key->dst; + } + if (key->src) { + filter->f.mask.tcp_spec.src_ip[0] |= + cpu_to_be32(0xffffffff); + filter->f.data.tcp_spec.src_ip[0] = key->src; + } + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->mask); + + /* validate mask, make sure it is not IPV6_ADDR_ANY */ + if (ipv6_addr_any(&mask->dst)) { + dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n", + IPV6_ADDR_ANY); + return I40E_ERR_CONFIG; + } + + /* src and dest IPv6 address should not be LOOPBACK + * (0:0:0:0:0:0:0:1) which can be represented as ::1 + */ + if (ipv6_addr_loopback(&key->dst) || + ipv6_addr_loopback(&key->src)) { + dev_err(&adapter->pdev->dev, + "ipv6 addr should not be loopback\n"); + return I40E_ERR_CONFIG; + } + if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src)) + field_flags |= I40EVF_CLOUD_FIELD_IIP; + + if (key->dst.s6_addr) { + for (i = 0; i < 4; i++) + filter->f.mask.tcp_spec.dst_ip[i] |= + cpu_to_be32(0xffffffff); + memcpy(&filter->f.data.tcp_spec.dst_ip, + &key->dst.s6_addr32, + sizeof(filter->f.data.tcp_spec.dst_ip)); + } + if (key->src.s6_addr) { + for (i = 0; i < 4; i++) + filter->f.mask.tcp_spec.src_ip[i] |= + cpu_to_be32(0xffffffff); + memcpy(&filter->f.data.tcp_spec.src_ip, + &key->src.s6_addr32, + sizeof(filter->f.data.tcp_spec.src_ip)); + } + } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->mask); + + if (mask->src) { + if (mask->src == cpu_to_be16(0xffff)) { + field_flags |= I40EVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad src port mask %u\n", + be16_to_cpu(mask->src)); + return I40E_ERR_CONFIG; + } + } + + if (mask->dst) { + if (mask->dst == cpu_to_be16(0xffff)) { + field_flags |= I40EVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n", + be16_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + if (key->dst) { + filter->f.mask.tcp_spec.dst_port |= cpu_to_be16(0xffff); + filter->f.data.tcp_spec.dst_port = key->dst; + } + + if (key->src) { + filter->f.mask.tcp_spec.src_port |= cpu_to_be16(0xffff); + filter->f.data.tcp_spec.src_port = key->dst; + } + } + filter->f.field_flags = field_flags; + + return 0; +} + +/** + * i40evf_handle_tclass - Forward to a traffic class on the device + * @adapter: board private structure + * @tc: traffic class index on the device + * @filter: pointer to cloud filter structure + */ +static int i40evf_handle_tclass(struct i40evf_adapter *adapter, u32 tc, + struct i40evf_cloud_filter *filter) +{ + if (tc == 0) + return 0; + if (tc < adapter->num_tc) { + if (!filter->f.data.tcp_spec.dst_port) { + dev_err(&adapter->pdev->dev, + "Specify destination port to redirect to traffic class other than TC0\n"); + return -EINVAL; + } + } + /* redirect to a traffic class on the same device */ + filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT; + filter->f.action_meta = tc; + return 0; +} + +/** + * i40evf_configure_clsflower - Add tc flower filters + * @adapter: board private structure + * @cls_flower: Pointer to struct tc_cls_flower_offload + */ +static int i40evf_configure_clsflower(struct i40evf_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid); + struct i40evf_cloud_filter *filter = NULL; + int err = 0, count = 50; + + while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, + &adapter->crit_section)) { + udelay(1); + if (--count == 0) + return -EINVAL; + } + + if (tc < 0) { + dev_err(&adapter->pdev->dev, "Invalid traffic class\n"); + return -EINVAL; + } + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) { + err = -ENOMEM; + goto clearout; + } + filter->cookie = cls_flower->cookie; + + /* set the mask to all zeroes to begin with */ + memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec)); + /* start out with flow type and eth type IPv4 to begin with */ + filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW; + err = i40evf_parse_cls_flower(adapter, cls_flower, filter); + if (err < 0) + goto err; + + err = i40evf_handle_tclass(adapter, tc, filter); + if (err < 0) + goto err; + + /* add filter to the list */ + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_add_tail(&filter->list, &adapter->cloud_filter_list); + adapter->num_cloud_filters++; + filter->add = true; + adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER; + spin_unlock_bh(&adapter->cloud_filter_list_lock); +err: + if (err) + kfree(filter); +clearout: + clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); + return err; +} + +/* i40evf_find_cf - Find the cloud filter in the list + * @adapter: Board private structure + * @cookie: filter specific cookie + * + * Returns ptr to the filter object or NULL. Must be called while holding the + * cloud_filter_list_lock. + */ +static struct i40evf_cloud_filter *i40evf_find_cf(struct i40evf_adapter *adapter, + unsigned long *cookie) +{ + struct i40evf_cloud_filter *filter = NULL; + + if (!cookie) + return NULL; + + list_for_each_entry(filter, &adapter->cloud_filter_list, list) { + if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie))) + return filter; + } + return NULL; +} + +/** + * i40evf_delete_clsflower - Remove tc flower filters + * @adapter: board private structure + * @cls_flower: Pointer to struct tc_cls_flower_offload + */ +static int i40evf_delete_clsflower(struct i40evf_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + struct i40evf_cloud_filter *filter = NULL; + int err = 0; + + spin_lock_bh(&adapter->cloud_filter_list_lock); + filter = i40evf_find_cf(adapter, &cls_flower->cookie); + if (filter) { + filter->del = true; + adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER; + } else { + err = -EINVAL; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + + return err; +} + +/** + * i40evf_setup_tc_cls_flower - flower classifier offloads + * @netdev: net device to configure + * @type_data: offload data + */ +static int i40evf_setup_tc_cls_flower(struct i40evf_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + if (cls_flower->common.chain_index) + return -EOPNOTSUPP; + + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return i40evf_configure_clsflower(adapter, cls_flower); + case TC_CLSFLOWER_DESTROY: + return i40evf_delete_clsflower(adapter, cls_flower); + case TC_CLSFLOWER_STATS: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +/** + * i40evf_setup_tc_block_cb - block callback for tc + * @type: type of offload + * @type_data: offload data + * @cb_priv: + * + * This function is the block callback for traffic classes + **/ +static int i40evf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + switch (type) { + case TC_SETUP_CLSFLOWER: + return i40evf_setup_tc_cls_flower(cb_priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +/** + * i40evf_setup_tc_block - register callbacks for tc + * @netdev: network interface device structure + * @f: tc offload data + * + * This function registers block callbacks for tc + * offloads + **/ +static int i40evf_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct i40evf_adapter *adapter = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, i40evf_setup_tc_block_cb, + adapter, adapter); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, i40evf_setup_tc_block_cb, + adapter); + return 0; + default: + return -EOPNOTSUPP; + } +} + +/** + * i40evf_setup_tc - configure multiple traffic classes + * @netdev: network interface device structure + * @type: type of offload + * @type_date: tc offload data + * + * This function is the callback to ndo_setup_tc in the + * netdev_ops. + * + * Returns 0 on success + **/ +static int i40evf_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_QDISC_MQPRIO: + return __i40evf_setup_tc(netdev, type_data); + case TC_SETUP_BLOCK: + return i40evf_setup_tc_block(netdev, type_data); + default: + return -EOPNOTSUPP; + } +} + +/** * i40evf_open - Called when a network interface is made active * @netdev: network interface device structure * @@ -2457,6 +3261,7 @@ static const struct net_device_ops i40evf_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = i40evf_netpoll, #endif + .ndo_setup_tc = i40evf_setup_tc, }; /** @@ -2571,6 +3376,9 @@ int i40evf_process_config(struct i40evf_adapter *adapter) if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) hw_features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); + /* Enable cloud filter if ADQ is supported */ + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) + hw_features |= NETIF_F_HW_TC; netdev->hw_features |= hw_features; @@ -2938,9 +3746,11 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mutex_init(&hw->aq.arq_mutex); spin_lock_init(&adapter->mac_vlan_list_lock); + spin_lock_init(&adapter->cloud_filter_list_lock); INIT_LIST_HEAD(&adapter->mac_filter_list); INIT_LIST_HEAD(&adapter->vlan_filter_list); + INIT_LIST_HEAD(&adapter->cloud_filter_list); INIT_WORK(&adapter->reset_task, i40evf_reset_task); INIT_WORK(&adapter->adminq_task, i40evf_adminq_task); @@ -3065,7 +3875,9 @@ static void i40evf_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct i40evf_adapter *adapter = netdev_priv(netdev); + struct i40evf_vlan_filter *vlf, *vlftmp; struct i40evf_mac_filter *f, *ftmp; + struct i40evf_cloud_filter *cf, *cftmp; struct i40e_hw *hw = &adapter->hw; int err; /* Indicate we are in remove and not to run reset_task */ @@ -3087,6 +3899,7 @@ static void i40evf_remove(struct pci_dev *pdev) /* Shut down all the garbage mashers on the detention level */ adapter->state = __I40EVF_REMOVE; adapter->aq_required = 0; + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; i40evf_request_reset(adapter); msleep(50); /* If the FW isn't responding, kick it once, but only once. */ @@ -3127,13 +3940,21 @@ static void i40evf_remove(struct pci_dev *pdev) list_del(&f->list); kfree(f); } - list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - list_del(&f->list); - kfree(f); + list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list, + list) { + list_del(&vlf->list); + kfree(vlf); } spin_unlock_bh(&adapter->mac_vlan_list_lock); + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { + list_del(&cf->list); + kfree(cf); + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + free_netdev(netdev); pci_disable_pcie_error_reporting(pdev); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 50ce0d6c09ef..6134b61e0938 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -161,7 +161,8 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter) VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 | VIRTCHNL_VF_OFFLOAD_ENCAP | VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM | - VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; + VIRTCHNL_VF_OFFLOAD_REQ_QUEUES | + VIRTCHNL_VF_OFFLOAD_ADQ; adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES; adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG; @@ -344,6 +345,7 @@ void i40evf_disable_queues(struct i40evf_adapter *adapter) void i40evf_map_queues(struct i40evf_adapter *adapter) { struct virtchnl_irq_map_info *vimi; + struct virtchnl_vector_map *vecmap; int v_idx, q_vectors, len; struct i40e_q_vector *q_vector; @@ -367,17 +369,22 @@ void i40evf_map_queues(struct i40evf_adapter *adapter) vimi->num_vectors = adapter->num_msix_vectors; /* Queue vectors first */ for (v_idx = 0; v_idx < q_vectors; v_idx++) { - q_vector = adapter->q_vectors + v_idx; - vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; - vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS; - vimi->vecmap[v_idx].txq_map = q_vector->ring_mask; - vimi->vecmap[v_idx].rxq_map = q_vector->ring_mask; + q_vector = &adapter->q_vectors[v_idx]; + vecmap = &vimi->vecmap[v_idx]; + + vecmap->vsi_id = adapter->vsi_res->vsi_id; + vecmap->vector_id = v_idx + NONQ_VECS; + vecmap->txq_map = q_vector->ring_mask; + vecmap->rxq_map = q_vector->ring_mask; + vecmap->rxitr_idx = I40E_RX_ITR; + vecmap->txitr_idx = I40E_TX_ITR; } /* Misc vector last - this is only for AdminQ messages */ - vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; - vimi->vecmap[v_idx].vector_id = 0; - vimi->vecmap[v_idx].txq_map = 0; - vimi->vecmap[v_idx].rxq_map = 0; + vecmap = &vimi->vecmap[v_idx]; + vecmap->vsi_id = adapter->vsi_res->vsi_id; + vecmap->vector_id = 0; + vecmap->txq_map = 0; + vecmap->rxq_map = 0; adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS; i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP, @@ -459,7 +466,7 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) more = true; } - veal = kzalloc(len, GFP_KERNEL); + veal = kzalloc(len, GFP_ATOMIC); if (!veal) { spin_unlock_bh(&adapter->mac_vlan_list_lock); return; @@ -532,7 +539,7 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) (count * sizeof(struct virtchnl_ether_addr)); more = true; } - veal = kzalloc(len, GFP_KERNEL); + veal = kzalloc(len, GFP_ATOMIC); if (!veal) { spin_unlock_bh(&adapter->mac_vlan_list_lock); return; @@ -606,7 +613,7 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) (count * sizeof(u16)); more = true; } - vvfl = kzalloc(len, GFP_KERNEL); + vvfl = kzalloc(len, GFP_ATOMIC); if (!vvfl) { spin_unlock_bh(&adapter->mac_vlan_list_lock); return; @@ -678,7 +685,7 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter) (count * sizeof(u16)); more = true; } - vvfl = kzalloc(len, GFP_KERNEL); + vvfl = kzalloc(len, GFP_ATOMIC); if (!vvfl) { spin_unlock_bh(&adapter->mac_vlan_list_lock); return; @@ -967,6 +974,201 @@ static void i40evf_print_link_message(struct i40evf_adapter *adapter) } /** + * i40evf_enable_channel + * @adapter: adapter structure + * + * Request that the PF enable channels as specified by + * the user via tc tool. + **/ +void i40evf_enable_channels(struct i40evf_adapter *adapter) +{ + struct virtchnl_tc_info *vti = NULL; + u16 len; + int i; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n", + adapter->current_op); + return; + } + + len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) + + sizeof(struct virtchnl_tc_info); + + vti = kzalloc(len, GFP_KERNEL); + if (!vti) + return; + vti->num_tc = adapter->num_tc; + for (i = 0; i < vti->num_tc; i++) { + vti->list[i].count = adapter->ch_config.ch_info[i].count; + vti->list[i].offset = adapter->ch_config.ch_info[i].offset; + vti->list[i].pad = 0; + vti->list[i].max_tx_rate = + adapter->ch_config.ch_info[i].max_tx_rate; + } + + adapter->ch_config.state = __I40EVF_TC_RUNNING; + adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->current_op = VIRTCHNL_OP_ENABLE_CHANNELS; + adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_CHANNELS; + i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS, + (u8 *)vti, len); + kfree(vti); +} + +/** + * i40evf_disable_channel + * @adapter: adapter structure + * + * Request that the PF disable channels that are configured + **/ +void i40evf_disable_channels(struct i40evf_adapter *adapter) +{ + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n", + adapter->current_op); + return; + } + + adapter->ch_config.state = __I40EVF_TC_INVALID; + adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->current_op = VIRTCHNL_OP_DISABLE_CHANNELS; + adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_CHANNELS; + i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS, + NULL, 0); +} + +/** + * i40evf_print_cloud_filter + * @adapter: adapter structure + * @f: cloud filter to print + * + * Print the cloud filter + **/ +static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter, + struct virtchnl_filter f) +{ + switch (f.flow_type) { + case VIRTCHNL_TCP_V4_FLOW: + dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n", + &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac, + ntohs(f.data.tcp_spec.vlan_id), + &f.data.tcp_spec.dst_ip[0], &f.data.tcp_spec.src_ip[0], + ntohs(f.data.tcp_spec.dst_port), + ntohs(f.data.tcp_spec.src_port)); + break; + case VIRTCHNL_TCP_V6_FLOW: + dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n", + &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac, + ntohs(f.data.tcp_spec.vlan_id), + &f.data.tcp_spec.dst_ip, &f.data.tcp_spec.src_ip, + ntohs(f.data.tcp_spec.dst_port), + ntohs(f.data.tcp_spec.src_port)); + break; + } +} + +/** + * i40evf_add_cloud_filter + * @adapter: adapter structure + * + * Request that the PF add cloud filters as specified + * by the user via tc tool. + **/ +void i40evf_add_cloud_filter(struct i40evf_adapter *adapter) +{ + struct i40evf_cloud_filter *cf; + struct virtchnl_filter *f; + int len = 0, count = 0; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot add cloud filter, command %d pending\n", + adapter->current_op); + return; + } + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->add) { + count++; + break; + } + } + if (!count) { + adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_CLOUD_FILTER; + return; + } + adapter->current_op = VIRTCHNL_OP_ADD_CLOUD_FILTER; + + len = sizeof(struct virtchnl_filter); + f = kzalloc(len, GFP_KERNEL); + if (!f) + return; + + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->add) { + memcpy(f, &cf->f, sizeof(struct virtchnl_filter)); + cf->add = false; + cf->state = __I40EVF_CF_ADD_PENDING; + i40evf_send_pf_msg(adapter, + VIRTCHNL_OP_ADD_CLOUD_FILTER, + (u8 *)f, len); + } + } + kfree(f); +} + +/** + * i40evf_del_cloud_filter + * @adapter: adapter structure + * + * Request that the PF delete cloud filters as specified + * by the user via tc tool. + **/ +void i40evf_del_cloud_filter(struct i40evf_adapter *adapter) +{ + struct i40evf_cloud_filter *cf, *cftmp; + struct virtchnl_filter *f; + int len = 0, count = 0; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot remove cloud filter, command %d pending\n", + adapter->current_op); + return; + } + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->del) { + count++; + break; + } + } + if (!count) { + adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_CLOUD_FILTER; + return; + } + adapter->current_op = VIRTCHNL_OP_DEL_CLOUD_FILTER; + + len = sizeof(struct virtchnl_filter); + f = kzalloc(len, GFP_KERNEL); + if (!f) + return; + + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { + if (cf->del) { + memcpy(f, &cf->f, sizeof(struct virtchnl_filter)); + cf->del = false; + cf->state = __I40EVF_CF_DEL_PENDING; + i40evf_send_pf_msg(adapter, + VIRTCHNL_OP_DEL_CLOUD_FILTER, + (u8 *)f, len); + } + } + kfree(f); +} + +/** * i40evf_request_reset * @adapter: adapter structure * @@ -1011,14 +1213,25 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, if (adapter->link_up == link_up) break; - /* If we get link up message and start queues before - * our queues are configured it will trigger a TX hang. - * In that case, just ignore the link status message, - * we'll get another one after we enable queues and - * actually prepared to send traffic. - */ - if (link_up && adapter->state != __I40EVF_RUNNING) - break; + if (link_up) { + /* If we get link up message and start queues + * before our queues are configured it will + * trigger a TX hang. In that case, just ignore + * the link status message,we'll get another one + * after we enable queues and actually prepared + * to send traffic. + */ + if (adapter->state != __I40EVF_RUNNING) + break; + + /* For ADq enabled VF, we reconfigure VSIs and + * re-allocate queues. Hence wait till all + * queues are enabled. + */ + if (adapter->flags & + I40EVF_FLAG_QUEUES_DISABLED) + break; + } adapter->link_up = link_up; if (link_up) { @@ -1031,7 +1244,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, i40evf_print_link_message(adapter); break; case VIRTCHNL_EVENT_RESET_IMPENDING: - dev_info(&adapter->pdev->dev, "PF reset warning received\n"); + dev_info(&adapter->pdev->dev, "Reset warning received from the PF\n"); if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) { adapter->flags |= I40EVF_FLAG_RESET_PENDING; dev_info(&adapter->pdev->dev, "Scheduling reset task\n"); @@ -1063,6 +1276,57 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n", i40evf_stat_str(&adapter->hw, v_retval)); break; + case VIRTCHNL_OP_ENABLE_CHANNELS: + dev_err(&adapter->pdev->dev, "Failed to configure queue channels, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->ch_config.state = __I40EVF_TC_INVALID; + netdev_reset_tc(netdev); + netif_tx_start_all_queues(netdev); + break; + case VIRTCHNL_OP_DISABLE_CHANNELS: + dev_err(&adapter->pdev->dev, "Failed to disable queue channels, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->ch_config.state = __I40EVF_TC_RUNNING; + netif_tx_start_all_queues(netdev); + break; + case VIRTCHNL_OP_ADD_CLOUD_FILTER: { + struct i40evf_cloud_filter *cf, *cftmp; + + list_for_each_entry_safe(cf, cftmp, + &adapter->cloud_filter_list, + list) { + if (cf->state == __I40EVF_CF_ADD_PENDING) { + cf->state = __I40EVF_CF_INVALID; + dev_info(&adapter->pdev->dev, "Failed to add cloud filter, error %s\n", + i40evf_stat_str(&adapter->hw, + v_retval)); + i40evf_print_cloud_filter(adapter, + cf->f); + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + } + } + break; + case VIRTCHNL_OP_DEL_CLOUD_FILTER: { + struct i40evf_cloud_filter *cf; + + list_for_each_entry(cf, &adapter->cloud_filter_list, + list) { + if (cf->state == __I40EVF_CF_DEL_PENDING) { + cf->state = __I40EVF_CF_ACTIVE; + dev_info(&adapter->pdev->dev, "Failed to del cloud filter, error %s\n", + i40evf_stat_str(&adapter->hw, + v_retval)); + i40evf_print_cloud_filter(adapter, + cf->f); + } + } + } + break; default: dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", v_retval, @@ -1102,6 +1366,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, case VIRTCHNL_OP_ENABLE_QUEUES: /* enable transmits */ i40evf_irq_enable(adapter, true); + adapter->flags &= ~I40EVF_FLAG_QUEUES_DISABLED; break; case VIRTCHNL_OP_DISABLE_QUEUES: i40evf_free_all_tx_resources(adapter); @@ -1156,6 +1421,29 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, } } break; + case VIRTCHNL_OP_ADD_CLOUD_FILTER: { + struct i40evf_cloud_filter *cf; + + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->state == __I40EVF_CF_ADD_PENDING) + cf->state = __I40EVF_CF_ACTIVE; + } + } + break; + case VIRTCHNL_OP_DEL_CLOUD_FILTER: { + struct i40evf_cloud_filter *cf, *cftmp; + + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, + list) { + if (cf->state == __I40EVF_CF_DEL_PENDING) { + cf->state = __I40EVF_CF_INVALID; + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + } + } + break; default: if (adapter->current_op && (v_opcode != adapter->current_op)) dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 9463c3fa254f..0cadcabfe86f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -20,7 +20,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_cnt.o spectrum_fid.o \ spectrum_ipip.o spectrum_acl_flex_actions.o \ spectrum_mr.o spectrum_mr_tcam.o \ - spectrum_qdisc.o + spectrum_qdisc.o spectrum_span.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3dcc58d61506..5e8ea712caa2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -71,6 +71,7 @@ #include "spectrum_cnt.h" #include "spectrum_dpipe.h" #include "spectrum_acl_flex_actions.h" +#include "spectrum_span.h" #include "../mlxfw/mlxfw.h" #define MLXSW_FWREV_MAJOR 13 @@ -487,327 +488,6 @@ static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp) return 0; } -static int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) -{ - int i; - - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN)) - return -EIO; - - mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, - MAX_SPAN); - mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count, - sizeof(struct mlxsw_sp_span_entry), - GFP_KERNEL); - if (!mlxsw_sp->span.entries) - return -ENOMEM; - - for (i = 0; i < mlxsw_sp->span.entries_count; i++) - INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list); - - return 0; -} - -static void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp) -{ - int i; - - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; - - WARN_ON_ONCE(!list_empty(&curr->bound_ports_list)); - } - kfree(mlxsw_sp->span.entries); -} - -static struct mlxsw_sp_span_entry * -mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port) -{ - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - struct mlxsw_sp_span_entry *span_entry; - char mpat_pl[MLXSW_REG_MPAT_LEN]; - u8 local_port = port->local_port; - int index; - int i; - int err; - - /* find a free entry to use */ - index = -1; - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - if (!mlxsw_sp->span.entries[i].used) { - index = i; - span_entry = &mlxsw_sp->span.entries[i]; - break; - } - } - if (index < 0) - return NULL; - - /* create a new port analayzer entry for local_port */ - mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); - if (err) - return NULL; - - span_entry->used = true; - span_entry->id = index; - span_entry->ref_count = 1; - span_entry->local_port = local_port; - return span_entry; -} - -static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_span_entry *span_entry) -{ - u8 local_port = span_entry->local_port; - char mpat_pl[MLXSW_REG_MPAT_LEN]; - int pa_id = span_entry->id; - - mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); - span_entry->used = false; -} - -struct mlxsw_sp_span_entry * -mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port) -{ - int i; - - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; - - if (curr->used && curr->local_port == local_port) - return curr; - } - return NULL; -} - -static struct mlxsw_sp_span_entry -*mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) -{ - struct mlxsw_sp_span_entry *span_entry; - - span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp, - port->local_port); - if (span_entry) { - /* Already exists, just take a reference */ - span_entry->ref_count++; - return span_entry; - } - - return mlxsw_sp_span_entry_create(port); -} - -static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_span_entry *span_entry) -{ - WARN_ON(!span_entry->ref_count); - if (--span_entry->ref_count == 0) - mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); - return 0; -} - -static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port) -{ - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - struct mlxsw_sp_span_inspected_port *p; - int i; - - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; - - list_for_each_entry(p, &curr->bound_ports_list, list) - if (p->local_port == port->local_port && - p->type == MLXSW_SP_SPAN_EGRESS) - return true; - } - - return false; -} - -static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp, - int mtu) -{ - return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1; -} - -static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) -{ - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - char sbib_pl[MLXSW_REG_SBIB_LEN]; - int err; - - /* If port is egress mirrored, the shared buffer size should be - * updated according to the mtu value - */ - if (mlxsw_sp_span_is_egress_mirror(port)) { - u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu); - - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); - if (err) { - netdev_err(port->dev, "Could not update shared buffer for mirroring\n"); - return err; - } - } - - return 0; -} - -static struct mlxsw_sp_span_inspected_port * -mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port, - struct mlxsw_sp_span_entry *span_entry) -{ - struct mlxsw_sp_span_inspected_port *p; - - list_for_each_entry(p, &span_entry->bound_ports_list, list) - if (port->local_port == p->local_port) - return p; - return NULL; -} - -static int -mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, - struct mlxsw_sp_span_entry *span_entry, - enum mlxsw_sp_span_type type, - bool bind) -{ - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - char mpar_pl[MLXSW_REG_MPAR_LEN]; - int pa_id = span_entry->id; - - /* bind the port to the SPAN entry */ - mlxsw_reg_mpar_pack(mpar_pl, port->local_port, - (enum mlxsw_reg_mpar_i_e) type, bind, pa_id); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); -} - -static int -mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port, - struct mlxsw_sp_span_entry *span_entry, - enum mlxsw_sp_span_type type, - bool bind) -{ - struct mlxsw_sp_span_inspected_port *inspected_port; - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - char sbib_pl[MLXSW_REG_SBIB_LEN]; - int err; - - /* if it is an egress SPAN, bind a shared buffer to it */ - if (type == MLXSW_SP_SPAN_EGRESS) { - u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, - port->dev->mtu); - - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); - if (err) { - netdev_err(port->dev, "Could not create shared buffer for mirroring\n"); - return err; - } - } - - if (bind) { - err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type, - true); - if (err) - goto err_port_bind; - } - - inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL); - if (!inspected_port) { - err = -ENOMEM; - goto err_inspected_port_alloc; - } - inspected_port->local_port = port->local_port; - inspected_port->type = type; - list_add_tail(&inspected_port->list, &span_entry->bound_ports_list); - - return 0; - -err_inspected_port_alloc: - if (bind) - mlxsw_sp_span_inspected_port_bind(port, span_entry, type, - false); -err_port_bind: - if (type == MLXSW_SP_SPAN_EGRESS) { - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); - } - return err; -} - -static void -mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port, - struct mlxsw_sp_span_entry *span_entry, - enum mlxsw_sp_span_type type, - bool bind) -{ - struct mlxsw_sp_span_inspected_port *inspected_port; - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - char sbib_pl[MLXSW_REG_SBIB_LEN]; - - inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry); - if (!inspected_port) - return; - - if (bind) - mlxsw_sp_span_inspected_port_bind(port, span_entry, type, - false); - /* remove the SBIB buffer if it was egress SPAN */ - if (type == MLXSW_SP_SPAN_EGRESS) { - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); - } - - mlxsw_sp_span_entry_put(mlxsw_sp, span_entry); - - list_del(&inspected_port->list); - kfree(inspected_port); -} - -int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, - struct mlxsw_sp_port *to, - enum mlxsw_sp_span_type type, bool bind) -{ - struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp; - struct mlxsw_sp_span_entry *span_entry; - int err; - - span_entry = mlxsw_sp_span_entry_get(to); - if (!span_entry) - return -ENOENT; - - netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n", - span_entry->id); - - err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind); - if (err) - goto err_port_bind; - - return 0; - -err_port_bind: - mlxsw_sp_span_entry_put(mlxsw_sp, span_entry); - return err; -} - -void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port, - enum mlxsw_sp_span_type type, bool bind) -{ - struct mlxsw_sp_span_entry *span_entry; - - span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp, - destination_port); - if (!span_entry) { - netdev_err(from->dev, "no span entry found\n"); - return; - } - - netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n", - span_entry->id); - mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind); -} - static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable, u32 rate) { @@ -4556,13 +4236,11 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, u16 lag_id; if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) { - NL_SET_ERR_MSG(extack, - "spectrum: Exceeded number of supported LAG devices"); + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported LAG devices"); return false; } if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { - NL_SET_ERR_MSG(extack, - "spectrum: LAG device using unsupported Tx type"); + NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type"); return false; } return true; @@ -4804,8 +4482,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, !netif_is_lag_master(upper_dev) && !netif_is_bridge_master(upper_dev) && !netif_is_ovs_master(upper_dev)) { - NL_SET_ERR_MSG(extack, - "spectrum: Unknown upper device type"); + NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); return -EINVAL; } if (!info->linking) @@ -4814,8 +4491,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev))) { - NL_SET_ERR_MSG(extack, - "spectrum: Enslaving a port to a device that already has an upper device is not supported"); + NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; } if (netif_is_lag_master(upper_dev) && @@ -4823,24 +4499,20 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, info->upper_info, extack)) return -EINVAL; if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) { - NL_SET_ERR_MSG(extack, - "spectrum: Master device is a LAG master and this device has a VLAN"); + NL_SET_ERR_MSG_MOD(extack, "Master device is a LAG master and this device has a VLAN"); return -EINVAL; } if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) && !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) { - NL_SET_ERR_MSG(extack, - "spectrum: Can not put a VLAN on a LAG port"); + NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port"); return -EINVAL; } if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) { - NL_SET_ERR_MSG(extack, - "spectrum: Master device is an OVS master and this device has a VLAN"); + NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN"); return -EINVAL; } if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) { - NL_SET_ERR_MSG(extack, - "spectrum: Can not put a VLAN on an OVS port"); + NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port"); return -EINVAL; } break; @@ -4953,7 +4625,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; if (!netif_is_bridge_master(upper_dev)) { - NL_SET_ERR_MSG(extack, "spectrum: VLAN devices only support bridge and VRF uppers"); + NL_SET_ERR_MSG_MOD(extack, "VLAN devices only support bridge and VRF uppers"); return -EINVAL; } if (!info->linking) @@ -4962,7 +4634,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev))) { - NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); + NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; } break; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index bdd8f94a452c..6718a1f0482c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -80,6 +80,7 @@ enum mlxsw_sp_resource_id { struct mlxsw_sp_port; struct mlxsw_sp_rif; +struct mlxsw_sp_span_entry; struct mlxsw_sp_upper { struct net_device *dev; @@ -111,25 +112,6 @@ struct mlxsw_sp_mid { unsigned long *ports_in_mid; /* bits array */ }; -enum mlxsw_sp_span_type { - MLXSW_SP_SPAN_EGRESS, - MLXSW_SP_SPAN_INGRESS -}; - -struct mlxsw_sp_span_inspected_port { - struct list_head list; - enum mlxsw_sp_span_type type; - u8 local_port; -}; - -struct mlxsw_sp_span_entry { - u8 local_port; - bool used; - struct list_head bound_ports_list; - int ref_count; - int id; -}; - enum mlxsw_sp_port_mall_action_type { MLXSW_SP_PORT_MALL_MIRROR, MLXSW_SP_PORT_MALL_SAMPLE, @@ -396,16 +378,6 @@ struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev); struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev); -int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, - struct mlxsw_sp_port *to, - enum mlxsw_sp_span_type type, - bool bind); -void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, - u8 destination_port, - enum mlxsw_sp_span_type type, - bool bind); -struct mlxsw_sp_span_entry * -mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port); /* spectrum_dcb.c */ #ifdef CONFIG_MLXSW_SPECTRUM_DCB diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c index 6ca6894125f0..f7e61cecc42b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c @@ -35,6 +35,7 @@ #include "spectrum_acl_flex_actions.h" #include "core_acl_flex_actions.h" +#include "spectrum_span.h" #define MLXSW_SP_KVDL_ACT_EXT_SIZE 1 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 7502e53447bd..a1c4b1e63f8d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -37,122 +37,89 @@ #include "spectrum_ipip.h" struct ip_tunnel_parm -mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev) +mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev) { struct ip_tunnel *tun = netdev_priv(ol_dev); return tun->parms; } -static bool mlxsw_sp_ipip_parms_has_ikey(struct ip_tunnel_parm parms) +static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms) { return !!(parms.i_flags & TUNNEL_KEY); } -static bool mlxsw_sp_ipip_parms_has_okey(struct ip_tunnel_parm parms) +static bool mlxsw_sp_ipip_parms4_has_okey(struct ip_tunnel_parm parms) { return !!(parms.o_flags & TUNNEL_KEY); } -static u32 mlxsw_sp_ipip_parms_ikey(struct ip_tunnel_parm parms) +static u32 mlxsw_sp_ipip_parms4_ikey(struct ip_tunnel_parm parms) { - return mlxsw_sp_ipip_parms_has_ikey(parms) ? + return mlxsw_sp_ipip_parms4_has_ikey(parms) ? be32_to_cpu(parms.i_key) : 0; } -static u32 mlxsw_sp_ipip_parms_okey(struct ip_tunnel_parm parms) +static u32 mlxsw_sp_ipip_parms4_okey(struct ip_tunnel_parm parms) { - return mlxsw_sp_ipip_parms_has_okey(parms) ? + return mlxsw_sp_ipip_parms4_has_okey(parms) ? be32_to_cpu(parms.o_key) : 0; } -static __be32 mlxsw_sp_ipip_parms_saddr4(struct ip_tunnel_parm parms) +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms) { - return parms.iph.saddr; + return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr }; } static union mlxsw_sp_l3addr -mlxsw_sp_ipip_parms_saddr(enum mlxsw_sp_l3proto proto, - struct ip_tunnel_parm parms) +mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms) { - switch (proto) { - case MLXSW_SP_L3_PROTO_IPV4: - return (union mlxsw_sp_l3addr) { - .addr4 = mlxsw_sp_ipip_parms_saddr4(parms), - }; - case MLXSW_SP_L3_PROTO_IPV6: - break; - } - - WARN_ON(1); - return (union mlxsw_sp_l3addr) { - .addr4 = 0, - }; + return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr }; } -static __be32 mlxsw_sp_ipip_parms_daddr4(struct ip_tunnel_parm parms) +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) { - return parms.iph.daddr; -} + struct ip_tunnel_parm parms4; -static union mlxsw_sp_l3addr -mlxsw_sp_ipip_parms_daddr(enum mlxsw_sp_l3proto proto, - struct ip_tunnel_parm parms) -{ switch (proto) { case MLXSW_SP_L3_PROTO_IPV4: - return (union mlxsw_sp_l3addr) { - .addr4 = mlxsw_sp_ipip_parms_daddr4(parms), - }; + parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); + return mlxsw_sp_ipip_parms4_saddr(parms4); case MLXSW_SP_L3_PROTO_IPV6: break; } WARN_ON(1); - return (union mlxsw_sp_l3addr) { - .addr4 = 0, - }; -} - -static bool mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev) -{ - return mlxsw_sp_ipip_parms_has_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev)); -} - -static bool mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev) -{ - return mlxsw_sp_ipip_parms_has_okey(mlxsw_sp_ipip_netdev_parms(ol_dev)); + return (union mlxsw_sp_l3addr) {0}; } -static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev) +static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) { - return mlxsw_sp_ipip_parms_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev)); -} -static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev) -{ - return mlxsw_sp_ipip_parms_okey(mlxsw_sp_ipip_netdev_parms(ol_dev)); -} + struct ip_tunnel_parm parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); -union mlxsw_sp_l3addr -mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, - const struct net_device *ol_dev) -{ - return mlxsw_sp_ipip_parms_saddr(proto, - mlxsw_sp_ipip_netdev_parms(ol_dev)); -} - -static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) -{ - return mlxsw_sp_ipip_parms_daddr4(mlxsw_sp_ipip_netdev_parms(ol_dev)); + return mlxsw_sp_ipip_parms4_daddr(parms4).addr4; } static union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, const struct net_device *ol_dev) { - return mlxsw_sp_ipip_parms_daddr(proto, - mlxsw_sp_ipip_netdev_parms(ol_dev)); + struct ip_tunnel_parm parms4; + + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); + return mlxsw_sp_ipip_parms4_daddr(parms4); + case MLXSW_SP_L3_PROTO_IPV6: + break; + } + + WARN_ON(1); + return (union mlxsw_sp_l3addr) {0}; } static int @@ -176,12 +143,17 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp, u32 tunnel_index, struct mlxsw_sp_ipip_entry *ipip_entry) { - bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev); u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); - u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev); char rtdp_pl[MLXSW_REG_RTDP_LEN]; + struct ip_tunnel_parm parms; unsigned int type_check; + bool has_ikey; u32 daddr4; + u32 ikey; + + parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev); + has_ikey = mlxsw_sp_ipip_parms4_has_ikey(parms); + ikey = mlxsw_sp_ipip_parms4_ikey(parms); mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index); @@ -273,14 +245,15 @@ static struct mlxsw_sp_rif_ipip_lb_config mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp, const struct net_device *ol_dev) { + struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev); enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; - lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ? + lb_ipipt = mlxsw_sp_ipip_parms4_has_okey(parms) ? MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP : MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP; return (struct mlxsw_sp_rif_ipip_lb_config){ .lb_ipipt = lb_ipipt, - .okey = mlxsw_sp_ipip_netdev_okey(ol_dev), + .okey = mlxsw_sp_ipip_parms4_okey(parms), .ul_protocol = MLXSW_SP_L3_PROTO_IPV4, .saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4, ol_dev), @@ -300,16 +273,12 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp, bool update_nhs = false; int err = 0; - new_parms = mlxsw_sp_ipip_netdev_parms(ipip_entry->ol_dev); + new_parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev); - new_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4, - new_parms); - old_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4, - ipip_entry->parms); - new_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4, - new_parms); - old_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4, - ipip_entry->parms); + new_saddr = mlxsw_sp_ipip_parms4_saddr(new_parms); + old_saddr = mlxsw_sp_ipip_parms4_saddr(ipip_entry->parms4); + new_daddr = mlxsw_sp_ipip_parms4_daddr(new_parms); + old_daddr = mlxsw_sp_ipip_parms4_daddr(ipip_entry->parms4); if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) { u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); @@ -326,14 +295,14 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp, } update_tunnel = true; - } else if ((mlxsw_sp_ipip_parms_okey(ipip_entry->parms) != - mlxsw_sp_ipip_parms_okey(new_parms)) || - ipip_entry->parms.link != new_parms.link) { + } else if ((mlxsw_sp_ipip_parms4_okey(ipip_entry->parms4) != + mlxsw_sp_ipip_parms4_okey(new_parms)) || + ipip_entry->parms4.link != new_parms.link) { update_tunnel = true; } else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) { update_nhs = true; - } else if (mlxsw_sp_ipip_parms_ikey(ipip_entry->parms) != - mlxsw_sp_ipip_parms_ikey(new_parms)) { + } else if (mlxsw_sp_ipip_parms4_ikey(ipip_entry->parms4) != + mlxsw_sp_ipip_parms4_ikey(new_parms)) { update_decap = true; } @@ -350,7 +319,7 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp, false, false, false, extack); - ipip_entry->parms = new_parms; + ipip_entry->parms4 = new_parms; return err; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index 04b08d9d76e9..a4ff5737eccc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -37,9 +37,10 @@ #include "spectrum_router.h" #include <net/ip_fib.h> +#include <linux/if_tunnel.h> struct ip_tunnel_parm -mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev); +mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev); union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, @@ -56,7 +57,9 @@ struct mlxsw_sp_ipip_entry { struct mlxsw_sp_rif_ipip_lb *ol_lb; struct mlxsw_sp_fib_entry *decap_fib_entry; struct list_head ipip_list_node; - struct ip_tunnel_parm parms; + union { + struct ip_tunnel_parm parms4; + }; }; struct mlxsw_sp_ipip_ops { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index f7948e983637..05146970c19c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1,10 +1,10 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c - * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> - * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> + * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -796,7 +796,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, vr = mlxsw_sp_vr_find_unused(mlxsw_sp); if (!vr) { - NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers"); + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers"); return ERR_PTR(-EBUSY); } fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); @@ -1024,9 +1024,11 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, enum mlxsw_sp_ipip_type ipipt, struct net_device *ol_dev) { + const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; struct mlxsw_sp_ipip_entry *ret = NULL; + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt]; ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL); if (!ipip_entry) return ERR_PTR(-ENOMEM); @@ -1040,7 +1042,15 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, ipip_entry->ipipt = ipipt; ipip_entry->ol_dev = ol_dev; - ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev); + + switch (ipip_ops->ul_proto) { + case MLXSW_SP_L3_PROTO_IPV4: + ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); + break; + case MLXSW_SP_L3_PROTO_IPV6: + WARN_ON(1); + break; + } return ipip_entry; @@ -5793,7 +5803,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event, } if (err < 0) - NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload"); + NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported. Aborting offload"); return err; } @@ -6032,7 +6042,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); if (err) { - NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces"); + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces"); goto err_rif_index_alloc; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c new file mode 100644 index 000000000000..c3bec37d71ed --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -0,0 +1,356 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/list.h> + +#include "spectrum.h" +#include "spectrum_span.h" + +int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN)) + return -EIO; + + mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_SPAN); + mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count, + sizeof(struct mlxsw_sp_span_entry), + GFP_KERNEL); + if (!mlxsw_sp->span.entries) + return -ENOMEM; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) + INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list); + + return 0; +} + +void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + WARN_ON_ONCE(!list_empty(&curr->bound_ports_list)); + } + kfree(mlxsw_sp->span.entries); +} + +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + struct mlxsw_sp_span_entry *span_entry; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + u8 local_port = port->local_port; + int index; + int i; + int err; + + /* find a free entry to use */ + index = -1; + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + if (!mlxsw_sp->span.entries[i].ref_count) { + index = i; + span_entry = &mlxsw_sp->span.entries[i]; + break; + } + } + if (index < 0) + return NULL; + + /* create a new port analayzer entry for local_port */ + mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); + if (err) + return NULL; + + span_entry->id = index; + span_entry->ref_count = 1; + span_entry->local_port = local_port; + return span_entry; +} + +static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) +{ + u8 local_port = span_entry->local_port; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + int pa_id = span_entry->id; + + mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); +} + +struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port) +{ + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + if (curr->ref_count && curr->local_port == local_port) + return curr; + } + return NULL; +} + +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp_span_entry *span_entry; + + span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp, + port->local_port); + if (span_entry) { + /* Already exists, just take a reference */ + span_entry->ref_count++; + return span_entry; + } + + return mlxsw_sp_span_entry_create(port); +} + +static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) +{ + WARN_ON(!span_entry->ref_count); + if (--span_entry->ref_count == 0) + mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); + return 0; +} + +static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + struct mlxsw_sp_span_inspected_port *p; + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + list_for_each_entry(p, &curr->bound_ports_list, list) + if (p->local_port == port->local_port && + p->type == MLXSW_SP_SPAN_EGRESS) + return true; + } + + return false; +} + +static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp, + int mtu) +{ + return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1; +} + +int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char sbib_pl[MLXSW_REG_SBIB_LEN]; + int err; + + /* If port is egress mirrored, the shared buffer size should be + * updated according to the mtu value + */ + if (mlxsw_sp_span_is_egress_mirror(port)) { + u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu); + + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + if (err) { + netdev_err(port->dev, "Could not update shared buffer for mirroring\n"); + return err; + } + } + + return 0; +} + +static struct mlxsw_sp_span_inspected_port * +mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry) +{ + struct mlxsw_sp_span_inspected_port *p; + + list_for_each_entry(p, &span_entry->bound_ports_list, list) + if (port->local_port == p->local_port) + return p; + return NULL; +} + +static int +mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type, + bool bind) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char mpar_pl[MLXSW_REG_MPAR_LEN]; + int pa_id = span_entry->id; + + /* bind the port to the SPAN entry */ + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, + (enum mlxsw_reg_mpar_i_e)type, bind, pa_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); +} + +static int +mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type, + bool bind) +{ + struct mlxsw_sp_span_inspected_port *inspected_port; + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char sbib_pl[MLXSW_REG_SBIB_LEN]; + int err; + + /* if it is an egress SPAN, bind a shared buffer to it */ + if (type == MLXSW_SP_SPAN_EGRESS) { + u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, + port->dev->mtu); + + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + if (err) { + netdev_err(port->dev, "Could not create shared buffer for mirroring\n"); + return err; + } + } + + if (bind) { + err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type, + true); + if (err) + goto err_port_bind; + } + + inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL); + if (!inspected_port) { + err = -ENOMEM; + goto err_inspected_port_alloc; + } + inspected_port->local_port = port->local_port; + inspected_port->type = type; + list_add_tail(&inspected_port->list, &span_entry->bound_ports_list); + + return 0; + +err_inspected_port_alloc: + if (bind) + mlxsw_sp_span_inspected_port_bind(port, span_entry, type, + false); +err_port_bind: + if (type == MLXSW_SP_SPAN_EGRESS) { + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + } + return err; +} + +static void +mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type, + bool bind) +{ + struct mlxsw_sp_span_inspected_port *inspected_port; + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char sbib_pl[MLXSW_REG_SBIB_LEN]; + + inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry); + if (!inspected_port) + return; + + if (bind) + mlxsw_sp_span_inspected_port_bind(port, span_entry, type, + false); + /* remove the SBIB buffer if it was egress SPAN */ + if (type == MLXSW_SP_SPAN_EGRESS) { + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + } + + mlxsw_sp_span_entry_put(mlxsw_sp, span_entry); + + list_del(&inspected_port->list); + kfree(inspected_port); +} + +int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, + struct mlxsw_sp_port *to, + enum mlxsw_sp_span_type type, bool bind) +{ + struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp; + struct mlxsw_sp_span_entry *span_entry; + int err; + + span_entry = mlxsw_sp_span_entry_get(to); + if (!span_entry) + return -ENOENT; + + netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n", + span_entry->id); + + err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind); + if (err) + goto err_port_bind; + + return 0; + +err_port_bind: + mlxsw_sp_span_entry_put(mlxsw_sp, span_entry); + return err; +} + +void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port, + enum mlxsw_sp_span_type type, bool bind) +{ + struct mlxsw_sp_span_entry *span_entry; + + span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp, + destination_port); + if (!span_entry) { + netdev_err(from->dev, "no span entry found\n"); + return; + } + + netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n", + span_entry->id); + mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h new file mode 100644 index 000000000000..069050e385ff --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h @@ -0,0 +1,73 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.h + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_SPAN_H +#define _MLXSW_SPECTRUM_SPAN_H + +#include <linux/types.h> + +struct mlxsw_sp; +struct mlxsw_sp_port; + +enum mlxsw_sp_span_type { + MLXSW_SP_SPAN_EGRESS, + MLXSW_SP_SPAN_INGRESS +}; + +struct mlxsw_sp_span_inspected_port { + struct list_head list; + enum mlxsw_sp_span_type type; + u8 local_port; +}; + +struct mlxsw_sp_span_entry { + u8 local_port; + struct list_head bound_ports_list; + int ref_count; + int id; +}; + +int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp); + +int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, + struct mlxsw_sp_port *to, + enum mlxsw_sp_span_type type, bool bind); +void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port, + enum mlxsw_sp_span_type type, bool bind); +struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port); + +int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 593ad31be749..f9f53af04fe1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1819,7 +1819,7 @@ mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; if (is_vlan_dev(bridge_port->dev)) { - NL_SET_ERR_MSG(extack, "spectrum: Can not enslave a VLAN device to a VLAN-aware bridge"); + NL_SET_ERR_MSG_MOD(extack, "Can not enslave a VLAN device to a VLAN-aware bridge"); return -EINVAL; } @@ -1885,7 +1885,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, u16 vid; if (!is_vlan_dev(bridge_port->dev)) { - NL_SET_ERR_MSG(extack, "spectrum: Only VLAN devices can be enslaved to a VLAN-unaware bridge"); + NL_SET_ERR_MSG_MOD(extack, "Only VLAN devices can be enslaved to a VLAN-unaware bridge"); return -EINVAL; } vid = vlan_dev_vlan_id(bridge_port->dev); @@ -1895,7 +1895,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, return -EINVAL; if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) { - NL_SET_ERR_MSG(extack, "spectrum: Can not bridge VLAN uppers of the same port"); + NL_SET_ERR_MSG_MOD(extack, "Can not bridge VLAN uppers of the same port"); return -EINVAL; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index adfe474c2cf0..28c1cd5b823b 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -61,6 +61,13 @@ #define NFP_FLOWER_MASK_MPLS_BOS BIT(8) #define NFP_FLOWER_MASK_MPLS_Q BIT(0) +/* Compressed HW representation of TCP Flags */ +#define NFP_FL_TCP_FLAG_URG BIT(4) +#define NFP_FL_TCP_FLAG_PSH BIT(3) +#define NFP_FL_TCP_FLAG_RST BIT(2) +#define NFP_FL_TCP_FLAG_SYN BIT(1) +#define NFP_FL_TCP_FLAG_FIN BIT(0) + #define NFP_FL_SC_ACT_DROP 0x80000000 #define NFP_FL_SC_ACT_USER 0x7D000000 #define NFP_FL_SC_ACT_POPV 0x6A000000 @@ -257,7 +264,7 @@ struct nfp_flower_tp_ports { * 3 2 1 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | DSCP |ECN| protocol | reserved | + * | DSCP |ECN| protocol | ttl | flags | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | ipv4_addr_src | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -268,7 +275,7 @@ struct nfp_flower_ipv4 { u8 tos; u8 proto; u8 ttl; - u8 reserved; + u8 flags; __be32 ipv4_src; __be32 ipv4_dst; }; diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 332ff0fdc038..c5cebf6fb1d3 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -41,6 +41,7 @@ #include <linux/time64.h> #include <linux/types.h> #include <net/pkt_cls.h> +#include <net/tcp.h> #include <linux/workqueue.h> struct net_device; diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 37c2ecae2a7a..b3bc8279d4fb 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -181,6 +181,26 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame, frame->tos = flow_ip->tos; frame->ttl = flow_ip->ttl; } + + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_dissector_key_tcp *tcp; + u32 tcp_flags; + + tcp = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_TCP, target); + tcp_flags = be16_to_cpu(tcp->flags); + + if (tcp_flags & TCPHDR_FIN) + frame->flags |= NFP_FL_TCP_FLAG_FIN; + if (tcp_flags & TCPHDR_SYN) + frame->flags |= NFP_FL_TCP_FLAG_SYN; + if (tcp_flags & TCPHDR_RST) + frame->flags |= NFP_FL_TCP_FLAG_RST; + if (tcp_flags & TCPHDR_PSH) + frame->flags |= NFP_FL_TCP_FLAG_PSH; + if (tcp_flags & TCPHDR_URG) + frame->flags |= NFP_FL_TCP_FLAG_URG; + } } static void diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index eb5c13dea8f5..f3586c519805 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -44,11 +44,16 @@ #include "../nfp_net.h" #include "../nfp_port.h" +#define NFP_FLOWER_SUPPORTED_TCPFLAGS \ + (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \ + TCPHDR_PSH | TCPHDR_URG) + #define NFP_FLOWER_WHITELIST_DISSECTOR \ (BIT(FLOW_DISSECTOR_KEY_CONTROL) | \ BIT(FLOW_DISSECTOR_KEY_BASIC) | \ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_TCP) | \ BIT(FLOW_DISSECTOR_KEY_PORTS) | \ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_VLAN) | \ @@ -288,6 +293,35 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, } } + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_dissector_key_tcp *tcp; + u32 tcp_flags; + + tcp = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_TCP, + flow->key); + tcp_flags = be16_to_cpu(tcp->flags); + + if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS) + return -EOPNOTSUPP; + + /* We only support PSH and URG flags when either + * FIN, SYN or RST is present as well. + */ + if ((tcp_flags & (TCPHDR_PSH | TCPHDR_URG)) && + !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST))) + return -EOPNOTSUPP; + + /* We need to store TCP flags in the IPv4 key space, thus + * we need to ensure we include a IPv4 key layer if we have + * not done so already. + */ + if (!(key_layer & NFP_FLOWER_LAYER_IPV4)) { + key_layer |= NFP_FLOWER_LAYER_IPV4; + key_size += sizeof(struct nfp_flower_ipv4); + } + } + ret_key_ls->key_layer = key_layer; ret_key_ls->key_layer_two = key_layer_two; ret_key_ls->key_size = key_size; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 4499a7333078..bb63c115537d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015-2017 Netronome Systems, Inc. + * Copyright (C) 2015-2018 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this @@ -51,12 +51,12 @@ * The configuration BAR is 8K in size, but due to * THB-350, 32k needs to be reserved. */ -#define NFP_NET_CFG_BAR_SZ (32 * 1024) +#define NFP_NET_CFG_BAR_SZ (32 * 1024) /** * Offset in Freelist buffer where packet starts on RX */ -#define NFP_NET_RX_OFFSET 32 +#define NFP_NET_RX_OFFSET 32 /** * LSO parameters @@ -75,65 +75,65 @@ #define NFP_NET_META_PORTID 5 #define NFP_NET_META_CSUM 6 /* checksum complete type */ -#define NFP_META_PORT_ID_CTRL ~0U +#define NFP_META_PORT_ID_CTRL ~0U /** * Hash type pre-pended when a RSS hash was computed */ -#define NFP_NET_RSS_NONE 0 -#define NFP_NET_RSS_IPV4 1 -#define NFP_NET_RSS_IPV6 2 -#define NFP_NET_RSS_IPV6_EX 3 -#define NFP_NET_RSS_IPV4_TCP 4 -#define NFP_NET_RSS_IPV6_TCP 5 -#define NFP_NET_RSS_IPV6_EX_TCP 6 -#define NFP_NET_RSS_IPV4_UDP 7 -#define NFP_NET_RSS_IPV6_UDP 8 -#define NFP_NET_RSS_IPV6_EX_UDP 9 +#define NFP_NET_RSS_NONE 0 +#define NFP_NET_RSS_IPV4 1 +#define NFP_NET_RSS_IPV6 2 +#define NFP_NET_RSS_IPV6_EX 3 +#define NFP_NET_RSS_IPV4_TCP 4 +#define NFP_NET_RSS_IPV6_TCP 5 +#define NFP_NET_RSS_IPV6_EX_TCP 6 +#define NFP_NET_RSS_IPV4_UDP 7 +#define NFP_NET_RSS_IPV6_UDP 8 +#define NFP_NET_RSS_IPV6_EX_UDP 9 /** * Ring counts - * %NFP_NET_TXR_MAX: Maximum number of TX rings - * %NFP_NET_RXR_MAX: Maximum number of RX rings + * %NFP_NET_TXR_MAX: Maximum number of TX rings + * %NFP_NET_RXR_MAX: Maximum number of RX rings */ -#define NFP_NET_TXR_MAX 64 -#define NFP_NET_RXR_MAX 64 +#define NFP_NET_TXR_MAX 64 +#define NFP_NET_RXR_MAX 64 /** * Read/Write config words (0x0000 - 0x002c) - * %NFP_NET_CFG_CTRL: Global control + * %NFP_NET_CFG_CTRL: Global control * %NFP_NET_CFG_UPDATE: Indicate which fields are updated * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings * %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings - * %NFP_NET_CFG_MTU: Set MTU size + * %NFP_NET_CFG_MTU: Set MTU size * %NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU) - * %NFP_NET_CFG_EXN: MSI-X table entry for exceptions - * %NFP_NET_CFG_LSC: MSI-X table entry for link state changes + * %NFP_NET_CFG_EXN: MSI-X table entry for exceptions + * %NFP_NET_CFG_LSC: MSI-X table entry for link state changes * %NFP_NET_CFG_MACADDR: MAC address * * TODO: * - define Error details in UPDATE */ -#define NFP_NET_CFG_CTRL 0x0000 -#define NFP_NET_CFG_CTRL_ENABLE (0x1 << 0) /* Global enable */ -#define NFP_NET_CFG_CTRL_PROMISC (0x1 << 1) /* Enable Promisc mode */ -#define NFP_NET_CFG_CTRL_L2BC (0x1 << 2) /* Allow L2 Broadcast */ -#define NFP_NET_CFG_CTRL_L2MC (0x1 << 3) /* Allow L2 Multicast */ -#define NFP_NET_CFG_CTRL_RXCSUM (0x1 << 4) /* Enable RX Checksum */ -#define NFP_NET_CFG_CTRL_TXCSUM (0x1 << 5) /* Enable TX Checksum */ -#define NFP_NET_CFG_CTRL_RXVLAN (0x1 << 6) /* Enable VLAN strip */ -#define NFP_NET_CFG_CTRL_TXVLAN (0x1 << 7) /* Enable VLAN insert */ -#define NFP_NET_CFG_CTRL_SCATTER (0x1 << 8) /* Scatter DMA */ -#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */ -#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */ +#define NFP_NET_CFG_CTRL 0x0000 +#define NFP_NET_CFG_CTRL_ENABLE (0x1 << 0) /* Global enable */ +#define NFP_NET_CFG_CTRL_PROMISC (0x1 << 1) /* Enable Promisc mode */ +#define NFP_NET_CFG_CTRL_L2BC (0x1 << 2) /* Allow L2 Broadcast */ +#define NFP_NET_CFG_CTRL_L2MC (0x1 << 3) /* Allow L2 Multicast */ +#define NFP_NET_CFG_CTRL_RXCSUM (0x1 << 4) /* Enable RX Checksum */ +#define NFP_NET_CFG_CTRL_TXCSUM (0x1 << 5) /* Enable TX Checksum */ +#define NFP_NET_CFG_CTRL_RXVLAN (0x1 << 6) /* Enable VLAN strip */ +#define NFP_NET_CFG_CTRL_TXVLAN (0x1 << 7) /* Enable VLAN insert */ +#define NFP_NET_CFG_CTRL_SCATTER (0x1 << 8) /* Scatter DMA */ +#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */ +#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */ #define NFP_NET_CFG_CTRL_CTAG_FILTER (0x1 << 11) /* VLAN CTAG filtering */ -#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */ +#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */ #define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS (version 1) */ -#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */ -#define NFP_NET_CFG_CTRL_RINGPRIO (0x1 << 19) /* Ring priorities */ -#define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */ -#define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/ -#define NFP_NET_CFG_CTRL_L2SWITCH (0x1 << 22) /* L2 Switch */ +#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */ +#define NFP_NET_CFG_CTRL_RINGPRIO (0x1 << 19) /* Ring priorities */ +#define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */ +#define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/ +#define NFP_NET_CFG_CTRL_L2SWITCH (0x1 << 22) /* L2 Switch */ #define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */ #define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ #define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ @@ -152,35 +152,35 @@ #define NFP_NET_CFG_CTRL_CHAIN_META (NFP_NET_CFG_CTRL_RSS2 | \ NFP_NET_CFG_CTRL_CSUM_COMPLETE) -#define NFP_NET_CFG_UPDATE 0x0004 -#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ -#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ -#define NFP_NET_CFG_UPDATE_RSS (0x1 << 2) /* RSS config change */ -#define NFP_NET_CFG_UPDATE_TXRPRIO (0x1 << 3) /* TX Ring prio change */ -#define NFP_NET_CFG_UPDATE_RXRPRIO (0x1 << 4) /* RX Ring prio change */ -#define NFP_NET_CFG_UPDATE_MSIX (0x1 << 5) /* MSI-X change */ -#define NFP_NET_CFG_UPDATE_L2SWITCH (0x1 << 6) /* Switch changes */ -#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ -#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ +#define NFP_NET_CFG_UPDATE 0x0004 +#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ +#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ +#define NFP_NET_CFG_UPDATE_RSS (0x1 << 2) /* RSS config change */ +#define NFP_NET_CFG_UPDATE_TXRPRIO (0x1 << 3) /* TX Ring prio change */ +#define NFP_NET_CFG_UPDATE_RXRPRIO (0x1 << 4) /* RX Ring prio change */ +#define NFP_NET_CFG_UPDATE_MSIX (0x1 << 5) /* MSI-X change */ +#define NFP_NET_CFG_UPDATE_L2SWITCH (0x1 << 6) /* Switch changes */ +#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ +#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ #define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */ #define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */ #define NFP_NET_CFG_UPDATE_MACADDR (0x1 << 11) /* MAC address change */ #define NFP_NET_CFG_UPDATE_MBOX (0x1 << 12) /* Mailbox update */ #define NFP_NET_CFG_UPDATE_VF (0x1 << 13) /* VF settings change */ -#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ -#define NFP_NET_CFG_TXRS_ENABLE 0x0008 -#define NFP_NET_CFG_RXRS_ENABLE 0x0010 -#define NFP_NET_CFG_MTU 0x0018 -#define NFP_NET_CFG_FLBUFSZ 0x001c -#define NFP_NET_CFG_EXN 0x001f -#define NFP_NET_CFG_LSC 0x0020 -#define NFP_NET_CFG_MACADDR 0x0024 +#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ +#define NFP_NET_CFG_TXRS_ENABLE 0x0008 +#define NFP_NET_CFG_RXRS_ENABLE 0x0010 +#define NFP_NET_CFG_MTU 0x0018 +#define NFP_NET_CFG_FLBUFSZ 0x001c +#define NFP_NET_CFG_EXN 0x001f +#define NFP_NET_CFG_LSC 0x0020 +#define NFP_NET_CFG_MACADDR 0x0024 /** * Read-only words (0x0030 - 0x0050): * %NFP_NET_CFG_VERSION: Firmware version number - * %NFP_NET_CFG_STS: Status - * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL) + * %NFP_NET_CFG_STS: Status + * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL) * %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings * %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings * %NFP_NET_CFG_MAX_MTU: Maximum support MTU @@ -190,37 +190,37 @@ * TODO: * - define more STS bits */ -#define NFP_NET_CFG_VERSION 0x0030 +#define NFP_NET_CFG_VERSION 0x0030 #define NFP_NET_CFG_VERSION_RESERVED_MASK (0xff << 24) #define NFP_NET_CFG_VERSION_CLASS_MASK (0xff << 16) -#define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16) +#define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16) #define NFP_NET_CFG_VERSION_CLASS_GENERIC 0 #define NFP_NET_CFG_VERSION_MAJOR_MASK (0xff << 8) -#define NFP_NET_CFG_VERSION_MAJOR(x) (((x) & 0xff) << 8) +#define NFP_NET_CFG_VERSION_MAJOR(x) (((x) & 0xff) << 8) #define NFP_NET_CFG_VERSION_MINOR_MASK (0xff << 0) -#define NFP_NET_CFG_VERSION_MINOR(x) (((x) & 0xff) << 0) -#define NFP_NET_CFG_STS 0x0034 -#define NFP_NET_CFG_STS_LINK (0x1 << 0) /* Link up or down */ +#define NFP_NET_CFG_VERSION_MINOR(x) (((x) & 0xff) << 0) +#define NFP_NET_CFG_STS 0x0034 +#define NFP_NET_CFG_STS_LINK (0x1 << 0) /* Link up or down */ /* Link rate */ #define NFP_NET_CFG_STS_LINK_RATE_SHIFT 1 #define NFP_NET_CFG_STS_LINK_RATE_MASK 0xF -#define NFP_NET_CFG_STS_LINK_RATE \ +#define NFP_NET_CFG_STS_LINK_RATE \ (NFP_NET_CFG_STS_LINK_RATE_MASK << NFP_NET_CFG_STS_LINK_RATE_SHIFT) #define NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED 0 -#define NFP_NET_CFG_STS_LINK_RATE_UNKNOWN 1 -#define NFP_NET_CFG_STS_LINK_RATE_1G 2 -#define NFP_NET_CFG_STS_LINK_RATE_10G 3 -#define NFP_NET_CFG_STS_LINK_RATE_25G 4 -#define NFP_NET_CFG_STS_LINK_RATE_40G 5 -#define NFP_NET_CFG_STS_LINK_RATE_50G 6 -#define NFP_NET_CFG_STS_LINK_RATE_100G 7 -#define NFP_NET_CFG_CAP 0x0038 -#define NFP_NET_CFG_MAX_TXRINGS 0x003c -#define NFP_NET_CFG_MAX_RXRINGS 0x0040 -#define NFP_NET_CFG_MAX_MTU 0x0044 +#define NFP_NET_CFG_STS_LINK_RATE_UNKNOWN 1 +#define NFP_NET_CFG_STS_LINK_RATE_1G 2 +#define NFP_NET_CFG_STS_LINK_RATE_10G 3 +#define NFP_NET_CFG_STS_LINK_RATE_25G 4 +#define NFP_NET_CFG_STS_LINK_RATE_40G 5 +#define NFP_NET_CFG_STS_LINK_RATE_50G 6 +#define NFP_NET_CFG_STS_LINK_RATE_100G 7 +#define NFP_NET_CFG_CAP 0x0038 +#define NFP_NET_CFG_MAX_TXRINGS 0x003c +#define NFP_NET_CFG_MAX_RXRINGS 0x0040 +#define NFP_NET_CFG_MAX_MTU 0x0044 /* Next two words are being used by VFs for solving THB350 issue */ -#define NFP_NET_CFG_START_TXQ 0x0048 -#define NFP_NET_CFG_START_RXQ 0x004c +#define NFP_NET_CFG_START_TXQ 0x0048 +#define NFP_NET_CFG_START_RXQ 0x004c /** * Prepend configuration @@ -280,8 +280,8 @@ /** * 40B reserved for future use (0x0098 - 0x00c0) */ -#define NFP_NET_CFG_RESERVED 0x0098 -#define NFP_NET_CFG_RESERVED_SZ 0x0028 +#define NFP_NET_CFG_RESERVED 0x0098 +#define NFP_NET_CFG_RESERVED_SZ 0x0028 /** * RSS configuration (0x0100 - 0x01ac): @@ -290,26 +290,26 @@ * %NFP_NET_CFG_RSS_KEY: RSS "secret" key * %NFP_NET_CFG_RSS_ITBL: RSS indirection table */ -#define NFP_NET_CFG_RSS_BASE 0x0100 -#define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE -#define NFP_NET_CFG_RSS_MASK (0x7f) -#define NFP_NET_CFG_RSS_MASK_of(_x) ((_x) & 0x7f) -#define NFP_NET_CFG_RSS_IPV4 (1 << 8) /* RSS for IPv4 */ -#define NFP_NET_CFG_RSS_IPV6 (1 << 9) /* RSS for IPv6 */ -#define NFP_NET_CFG_RSS_IPV4_TCP (1 << 10) /* RSS for IPv4/TCP */ -#define NFP_NET_CFG_RSS_IPV4_UDP (1 << 11) /* RSS for IPv4/UDP */ -#define NFP_NET_CFG_RSS_IPV6_TCP (1 << 12) /* RSS for IPv6/TCP */ -#define NFP_NET_CFG_RSS_IPV6_UDP (1 << 13) /* RSS for IPv6/UDP */ +#define NFP_NET_CFG_RSS_BASE 0x0100 +#define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE +#define NFP_NET_CFG_RSS_MASK (0x7f) +#define NFP_NET_CFG_RSS_MASK_of(_x) ((_x) & 0x7f) +#define NFP_NET_CFG_RSS_IPV4 (1 << 8) /* RSS for IPv4 */ +#define NFP_NET_CFG_RSS_IPV6 (1 << 9) /* RSS for IPv6 */ +#define NFP_NET_CFG_RSS_IPV4_TCP (1 << 10) /* RSS for IPv4/TCP */ +#define NFP_NET_CFG_RSS_IPV4_UDP (1 << 11) /* RSS for IPv4/UDP */ +#define NFP_NET_CFG_RSS_IPV6_TCP (1 << 12) /* RSS for IPv6/TCP */ +#define NFP_NET_CFG_RSS_IPV6_UDP (1 << 13) /* RSS for IPv6/UDP */ #define NFP_NET_CFG_RSS_HFUNC 0xff000000 -#define NFP_NET_CFG_RSS_TOEPLITZ (1 << 24) /* Use Toeplitz hash */ +#define NFP_NET_CFG_RSS_TOEPLITZ (1 << 24) /* Use Toeplitz hash */ #define NFP_NET_CFG_RSS_XOR (1 << 25) /* Use XOR as hash */ #define NFP_NET_CFG_RSS_CRC32 (1 << 26) /* Use CRC32 as hash */ #define NFP_NET_CFG_RSS_HFUNCS 3 -#define NFP_NET_CFG_RSS_KEY (NFP_NET_CFG_RSS_BASE + 0x4) -#define NFP_NET_CFG_RSS_KEY_SZ 0x28 -#define NFP_NET_CFG_RSS_ITBL (NFP_NET_CFG_RSS_BASE + 0x4 + \ +#define NFP_NET_CFG_RSS_KEY (NFP_NET_CFG_RSS_BASE + 0x4) +#define NFP_NET_CFG_RSS_KEY_SZ 0x28 +#define NFP_NET_CFG_RSS_ITBL (NFP_NET_CFG_RSS_BASE + 0x4 + \ NFP_NET_CFG_RSS_KEY_SZ) -#define NFP_NET_CFG_RSS_ITBL_SZ 0x80 +#define NFP_NET_CFG_RSS_ITBL_SZ 0x80 /** * TX ring configuration (0x200 - 0x800) @@ -321,13 +321,13 @@ * %NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries) * %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet */ -#define NFP_NET_CFG_TXR_BASE 0x0200 -#define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8)) -#define NFP_NET_CFG_TXR_WB_ADDR(_x) (NFP_NET_CFG_TXR_BASE + 0x200 + \ +#define NFP_NET_CFG_TXR_BASE 0x0200 +#define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8)) +#define NFP_NET_CFG_TXR_WB_ADDR(_x) (NFP_NET_CFG_TXR_BASE + 0x200 + \ ((_x) * 0x8)) -#define NFP_NET_CFG_TXR_SZ(_x) (NFP_NET_CFG_TXR_BASE + 0x400 + (_x)) -#define NFP_NET_CFG_TXR_VEC(_x) (NFP_NET_CFG_TXR_BASE + 0x440 + (_x)) -#define NFP_NET_CFG_TXR_PRIO(_x) (NFP_NET_CFG_TXR_BASE + 0x480 + (_x)) +#define NFP_NET_CFG_TXR_SZ(_x) (NFP_NET_CFG_TXR_BASE + 0x400 + (_x)) +#define NFP_NET_CFG_TXR_VEC(_x) (NFP_NET_CFG_TXR_BASE + 0x440 + (_x)) +#define NFP_NET_CFG_TXR_PRIO(_x) (NFP_NET_CFG_TXR_BASE + 0x480 + (_x)) #define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \ ((_x) * 0x4)) @@ -340,11 +340,11 @@ * %NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries) * %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries) */ -#define NFP_NET_CFG_RXR_BASE 0x0800 -#define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8)) -#define NFP_NET_CFG_RXR_SZ(_x) (NFP_NET_CFG_RXR_BASE + 0x200 + (_x)) -#define NFP_NET_CFG_RXR_VEC(_x) (NFP_NET_CFG_RXR_BASE + 0x240 + (_x)) -#define NFP_NET_CFG_RXR_PRIO(_x) (NFP_NET_CFG_RXR_BASE + 0x280 + (_x)) +#define NFP_NET_CFG_RXR_BASE 0x0800 +#define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8)) +#define NFP_NET_CFG_RXR_SZ(_x) (NFP_NET_CFG_RXR_BASE + 0x200 + (_x)) +#define NFP_NET_CFG_RXR_VEC(_x) (NFP_NET_CFG_RXR_BASE + 0x240 + (_x)) +#define NFP_NET_CFG_RXR_PRIO(_x) (NFP_NET_CFG_RXR_BASE + 0x280 + (_x)) #define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \ ((_x) * 0x4)) @@ -358,36 +358,36 @@ * the MSI-X entry and the host driver must clear the register to * re-enable the interrupt. */ -#define NFP_NET_CFG_ICR_BASE 0x0c00 -#define NFP_NET_CFG_ICR(_x) (NFP_NET_CFG_ICR_BASE + (_x)) -#define NFP_NET_CFG_ICR_UNMASKED 0x0 -#define NFP_NET_CFG_ICR_RXTX 0x1 -#define NFP_NET_CFG_ICR_LSC 0x2 +#define NFP_NET_CFG_ICR_BASE 0x0c00 +#define NFP_NET_CFG_ICR(_x) (NFP_NET_CFG_ICR_BASE + (_x)) +#define NFP_NET_CFG_ICR_UNMASKED 0x0 +#define NFP_NET_CFG_ICR_RXTX 0x1 +#define NFP_NET_CFG_ICR_LSC 0x2 /** * General device stats (0x0d00 - 0x0d90) * all counters are 64bit. */ -#define NFP_NET_CFG_STATS_BASE 0x0d00 -#define NFP_NET_CFG_STATS_RX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x00) -#define NFP_NET_CFG_STATS_RX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x08) -#define NFP_NET_CFG_STATS_RX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x10) -#define NFP_NET_CFG_STATS_RX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x18) -#define NFP_NET_CFG_STATS_RX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x20) -#define NFP_NET_CFG_STATS_RX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x28) -#define NFP_NET_CFG_STATS_RX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x30) -#define NFP_NET_CFG_STATS_RX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x38) -#define NFP_NET_CFG_STATS_RX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x40) - -#define NFP_NET_CFG_STATS_TX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x48) -#define NFP_NET_CFG_STATS_TX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x50) -#define NFP_NET_CFG_STATS_TX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x58) -#define NFP_NET_CFG_STATS_TX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x60) -#define NFP_NET_CFG_STATS_TX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x68) -#define NFP_NET_CFG_STATS_TX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x70) -#define NFP_NET_CFG_STATS_TX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x78) -#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) -#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) +#define NFP_NET_CFG_STATS_BASE 0x0d00 +#define NFP_NET_CFG_STATS_RX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x00) +#define NFP_NET_CFG_STATS_RX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x08) +#define NFP_NET_CFG_STATS_RX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x10) +#define NFP_NET_CFG_STATS_RX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x18) +#define NFP_NET_CFG_STATS_RX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x20) +#define NFP_NET_CFG_STATS_RX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x28) +#define NFP_NET_CFG_STATS_RX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x30) +#define NFP_NET_CFG_STATS_RX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x38) +#define NFP_NET_CFG_STATS_RX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x40) + +#define NFP_NET_CFG_STATS_TX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x48) +#define NFP_NET_CFG_STATS_TX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x50) +#define NFP_NET_CFG_STATS_TX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x58) +#define NFP_NET_CFG_STATS_TX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x60) +#define NFP_NET_CFG_STATS_TX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x68) +#define NFP_NET_CFG_STATS_TX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x70) +#define NFP_NET_CFG_STATS_TX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x78) +#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) +#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) #define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90) #define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98) @@ -404,11 +404,11 @@ * %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) * %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) */ -#define NFP_NET_CFG_TXR_STATS_BASE 0x1000 -#define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \ +#define NFP_NET_CFG_TXR_STATS_BASE 0x1000 +#define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \ ((_x) * 0x10)) -#define NFP_NET_CFG_RXR_STATS_BASE 0x1400 -#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \ +#define NFP_NET_CFG_RXR_STATS_BASE 0x1400 +#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \ ((_x) * 0x10)) /** @@ -444,7 +444,7 @@ * %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV * %NFP_NET_CFG_TLV_LENGTH: Offset of length within the TLV - * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments + * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments * %NFP_NET_CFG_TLV_VALUE: Offset of value with the TLV * * List of simple TLV structures, first one starts at %NFP_NET_CFG_TLV_BASE. @@ -457,12 +457,12 @@ * Note that the 4 byte TLV header is not counted in %NFP_NET_CFG_TLV_LENGTH. */ #define NFP_NET_CFG_TLV_TYPE 0x00 -#define NFP_NET_CFG_TLV_TYPE_REQUIRED 0x8000 +#define NFP_NET_CFG_TLV_TYPE_REQUIRED 0x8000 #define NFP_NET_CFG_TLV_LENGTH 0x02 #define NFP_NET_CFG_TLV_LENGTH_INC 4 #define NFP_NET_CFG_TLV_VALUE 0x04 -#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000 +#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000 #define NFP_NET_CFG_TLV_HEADER_TYPE 0x7fff0000 #define NFP_NET_CFG_TLV_HEADER_LENGTH 0x0000ffff diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 96a27b00c90e..b81f4faf7b10 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1018,6 +1018,7 @@ struct ravb_private { u32 dirty_rx[NUM_RX_QUEUE]; /* Producer ring indices */ u32 cur_tx[NUM_TX_QUEUE]; u32 dirty_tx[NUM_TX_QUEUE]; + u32 rx_buf_sz; /* Based on MTU+slack. */ struct napi_struct napi[NUM_RX_QUEUE]; struct work_struct work; /* MII transceiver section. */ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index a95fbd5510d9..54a6265da7a0 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -238,7 +238,7 @@ static void ravb_ring_free(struct net_device *ndev, int q) le32_to_cpu(desc->dptr))) dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr), - PKT_BUF_SZ, + priv->rx_buf_sz, DMA_FROM_DEVICE); } ring_size = sizeof(struct ravb_ex_rx_desc) * @@ -300,9 +300,9 @@ static void ravb_ring_format(struct net_device *ndev, int q) for (i = 0; i < priv->num_rx_ring[q]; i++) { /* RX descriptor */ rx_desc = &priv->rx_ring[q][i]; - rx_desc->ds_cc = cpu_to_le16(PKT_BUF_SZ); + rx_desc->ds_cc = cpu_to_le16(priv->rx_buf_sz); dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data, - PKT_BUF_SZ, + priv->rx_buf_sz, DMA_FROM_DEVICE); /* We just set the data size to 0 for a failed mapping which * should prevent DMA from happening... @@ -346,6 +346,10 @@ static int ravb_ring_init(struct net_device *ndev, int q) int ring_size; int i; + /* +16 gets room from the status from the card. */ + priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) + + ETH_HLEN + VLAN_HLEN; + /* Allocate RX and TX skb rings */ priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q], sizeof(*priv->rx_skb[q]), GFP_KERNEL); @@ -355,7 +359,7 @@ static int ravb_ring_init(struct net_device *ndev, int q) goto error; for (i = 0; i < priv->num_rx_ring[q]; i++) { - skb = netdev_alloc_skb(ndev, PKT_BUF_SZ + RAVB_ALIGN - 1); + skb = netdev_alloc_skb(ndev, priv->rx_buf_sz + RAVB_ALIGN - 1); if (!skb) goto error; ravb_set_buffer_align(skb); @@ -586,7 +590,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) skb = priv->rx_skb[q][entry]; priv->rx_skb[q][entry] = NULL; dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr), - PKT_BUF_SZ, + priv->rx_buf_sz, DMA_FROM_DEVICE); get_ts &= (q == RAVB_NC) ? RAVB_RXTSTAMP_TYPE_V2_L2_EVENT : @@ -619,11 +623,12 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) { entry = priv->dirty_rx[q] % priv->num_rx_ring[q]; desc = &priv->rx_ring[q][entry]; - desc->ds_cc = cpu_to_le16(PKT_BUF_SZ); + desc->ds_cc = cpu_to_le16(priv->rx_buf_sz); if (!priv->rx_skb[q][entry]) { skb = netdev_alloc_skb(ndev, - PKT_BUF_SZ + RAVB_ALIGN - 1); + priv->rx_buf_sz + + RAVB_ALIGN - 1); if (!skb) break; /* Better luck next round. */ ravb_set_buffer_align(skb); @@ -1854,6 +1859,17 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) return phy_mii_ioctl(phydev, req, cmd); } +static int ravb_change_mtu(struct net_device *ndev, int new_mtu) +{ + if (netif_running(ndev)) + return -EBUSY; + + ndev->mtu = new_mtu; + netdev_update_features(ndev); + + return 0; +} + static void ravb_set_rx_csum(struct net_device *ndev, bool enable) { struct ravb_private *priv = netdev_priv(ndev); @@ -1895,6 +1911,7 @@ static const struct net_device_ops ravb_netdev_ops = { .ndo_set_rx_mode = ravb_set_rx_mode, .ndo_tx_timeout = ravb_tx_timeout, .ndo_do_ioctl = ravb_do_ioctl, + .ndo_change_mtu = ravb_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_set_features = ravb_set_features, @@ -2117,6 +2134,9 @@ static int ravb_probe(struct platform_device *pdev) goto out_release; } + ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); + ndev->min_mtu = ETH_MIN_MTU; + /* Set function */ ndev->netdev_ops = &ravb_netdev_ops; ndev->ethtool_ops = &ravb_ethtool_ops; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 92dcf8717fc6..d7d5a6d15219 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -961,20 +961,16 @@ static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd) static int sh_eth_check_reset(struct net_device *ndev) { - int ret = 0; - int cnt = 100; + int cnt; - while (cnt > 0) { + for (cnt = 100; cnt > 0; cnt--) { if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER)) - break; + return 0; mdelay(1); - cnt--; - } - if (cnt <= 0) { - netdev_err(ndev, "Device reset failed\n"); - ret = -ETIMEDOUT; } - return ret; + + netdev_err(ndev, "Device reset failed\n"); + return -ETIMEDOUT; } static int sh_eth_reset(struct net_device *ndev) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 5270d26f0bc6..2d5d4aea3bcb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -48,26 +48,18 @@ #define MUX_CLK_NUM_PARENTS 2 struct meson8b_dwmac { - struct platform_device *pdev; - + struct device *dev; void __iomem *regs; - phy_interface_t phy_mode; + struct clk *rgmii_tx_clk; + u32 tx_delay_ns; +}; +struct meson8b_dwmac_clk_configs { struct clk_mux m250_mux; - struct clk *m250_mux_clk; - struct clk *m250_mux_parent[MUX_CLK_NUM_PARENTS]; - struct clk_divider m250_div; - struct clk *m250_div_clk; - struct clk_fixed_factor fixed_div2; - struct clk *fixed_div2_clk; - struct clk_gate rgmii_tx_en; - struct clk *rgmii_tx_en_clk; - - u32 tx_delay_ns; }; static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg, @@ -82,106 +74,99 @@ static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg, writel(data, dwmac->regs + reg); } -static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) +static struct clk *meson8b_dwmac_register_clk(struct meson8b_dwmac *dwmac, + const char *name_suffix, + const char **parent_names, + int num_parents, + const struct clk_ops *ops, + struct clk_hw *hw) { struct clk_init_data init; - int i, ret; - struct device *dev = &dwmac->pdev->dev; char clk_name[32]; - const char *clk_div_parents[1]; - const char *mux_parent_names[MUX_CLK_NUM_PARENTS]; + + snprintf(clk_name, sizeof(clk_name), "%s#%s", dev_name(dwmac->dev), + name_suffix); + + init.name = clk_name; + init.ops = ops; + init.flags = CLK_SET_RATE_PARENT; + init.parent_names = parent_names; + init.num_parents = num_parents; + + hw->init = &init; + + return devm_clk_register(dwmac->dev, hw); +} + +static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) +{ + int i, ret; + struct clk *clk; + struct device *dev = dwmac->dev; + const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS]; + struct meson8b_dwmac_clk_configs *clk_configs; + + clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL); + if (!clk_configs) + return -ENOMEM; /* get the mux parents from DT */ for (i = 0; i < MUX_CLK_NUM_PARENTS; i++) { char name[16]; snprintf(name, sizeof(name), "clkin%d", i); - dwmac->m250_mux_parent[i] = devm_clk_get(dev, name); - if (IS_ERR(dwmac->m250_mux_parent[i])) { - ret = PTR_ERR(dwmac->m250_mux_parent[i]); + clk = devm_clk_get(dev, name); + if (IS_ERR(clk)) { + ret = PTR_ERR(clk); if (ret != -EPROBE_DEFER) dev_err(dev, "Missing clock %s\n", name); return ret; } - mux_parent_names[i] = - __clk_get_name(dwmac->m250_mux_parent[i]); + mux_parent_names[i] = __clk_get_name(clk); } - /* create the m250_mux */ - snprintf(clk_name, sizeof(clk_name), "%s#m250_sel", dev_name(dev)); - init.name = clk_name; - init.ops = &clk_mux_ops; - init.flags = CLK_SET_RATE_PARENT; - init.parent_names = mux_parent_names; - init.num_parents = MUX_CLK_NUM_PARENTS; - - dwmac->m250_mux.reg = dwmac->regs + PRG_ETH0; - dwmac->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT; - dwmac->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK; - dwmac->m250_mux.flags = 0; - dwmac->m250_mux.table = NULL; - dwmac->m250_mux.hw.init = &init; - - dwmac->m250_mux_clk = devm_clk_register(dev, &dwmac->m250_mux.hw); - if (WARN_ON(IS_ERR(dwmac->m250_mux_clk))) - return PTR_ERR(dwmac->m250_mux_clk); - - /* create the m250_div */ - snprintf(clk_name, sizeof(clk_name), "%s#m250_div", dev_name(dev)); - init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL); - init.ops = &clk_divider_ops; - init.flags = CLK_SET_RATE_PARENT; - clk_div_parents[0] = __clk_get_name(dwmac->m250_mux_clk); - init.parent_names = clk_div_parents; - init.num_parents = ARRAY_SIZE(clk_div_parents); - - dwmac->m250_div.reg = dwmac->regs + PRG_ETH0; - dwmac->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT; - dwmac->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH; - dwmac->m250_div.hw.init = &init; - dwmac->m250_div.flags = CLK_DIVIDER_ONE_BASED | + clk_configs->m250_mux.reg = dwmac->regs + PRG_ETH0; + clk_configs->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT; + clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK; + clk = meson8b_dwmac_register_clk(dwmac, "m250_sel", mux_parent_names, + MUX_CLK_NUM_PARENTS, &clk_mux_ops, + &clk_configs->m250_mux.hw); + if (WARN_ON(IS_ERR(clk))) + return PTR_ERR(clk); + + parent_name = __clk_get_name(clk); + clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0; + clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT; + clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH; + clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO | CLK_DIVIDER_ROUND_CLOSEST; - - dwmac->m250_div_clk = devm_clk_register(dev, &dwmac->m250_div.hw); - if (WARN_ON(IS_ERR(dwmac->m250_div_clk))) - return PTR_ERR(dwmac->m250_div_clk); - - /* create the fixed_div2 */ - snprintf(clk_name, sizeof(clk_name), "%s#fixed_div2", dev_name(dev)); - init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL); - init.ops = &clk_fixed_factor_ops; - init.flags = CLK_SET_RATE_PARENT; - clk_div_parents[0] = __clk_get_name(dwmac->m250_div_clk); - init.parent_names = clk_div_parents; - init.num_parents = ARRAY_SIZE(clk_div_parents); - - dwmac->fixed_div2.mult = 1; - dwmac->fixed_div2.div = 2; - dwmac->fixed_div2.hw.init = &init; - - dwmac->fixed_div2_clk = devm_clk_register(dev, &dwmac->fixed_div2.hw); - if (WARN_ON(IS_ERR(dwmac->fixed_div2_clk))) - return PTR_ERR(dwmac->fixed_div2_clk); - - /* create the rgmii_tx_en */ - init.name = devm_kasprintf(dev, GFP_KERNEL, "%s#rgmii_tx_en", - dev_name(dev)); - init.ops = &clk_gate_ops; - init.flags = CLK_SET_RATE_PARENT; - clk_div_parents[0] = __clk_get_name(dwmac->fixed_div2_clk); - init.parent_names = clk_div_parents; - init.num_parents = ARRAY_SIZE(clk_div_parents); - - dwmac->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0; - dwmac->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN; - dwmac->rgmii_tx_en.hw.init = &init; - - dwmac->rgmii_tx_en_clk = devm_clk_register(dev, - &dwmac->rgmii_tx_en.hw); - if (WARN_ON(IS_ERR(dwmac->rgmii_tx_en_clk))) - return PTR_ERR(dwmac->rgmii_tx_en_clk); + clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1, + &clk_divider_ops, + &clk_configs->m250_div.hw); + if (WARN_ON(IS_ERR(clk))) + return PTR_ERR(clk); + + parent_name = __clk_get_name(clk); + clk_configs->fixed_div2.mult = 1; + clk_configs->fixed_div2.div = 2; + clk = meson8b_dwmac_register_clk(dwmac, "fixed_div2", &parent_name, 1, + &clk_fixed_factor_ops, + &clk_configs->fixed_div2.hw); + if (WARN_ON(IS_ERR(clk))) + return PTR_ERR(clk); + + parent_name = __clk_get_name(clk); + clk_configs->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0; + clk_configs->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN; + clk = meson8b_dwmac_register_clk(dwmac, "rgmii_tx_en", &parent_name, 1, + &clk_gate_ops, + &clk_configs->rgmii_tx_en.hw); + if (WARN_ON(IS_ERR(clk))) + return PTR_ERR(clk); + + dwmac->rgmii_tx_clk = clk; return 0; } @@ -219,19 +204,23 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac) * a register) based on the line-speed (125MHz for Gbit speeds, * 25MHz for 100Mbit/s and 2.5MHz for 10Mbit/s). */ - ret = clk_set_rate(dwmac->rgmii_tx_en_clk, 125 * 1000 * 1000); + ret = clk_set_rate(dwmac->rgmii_tx_clk, 125 * 1000 * 1000); if (ret) { - dev_err(&dwmac->pdev->dev, + dev_err(dwmac->dev, "failed to set RGMII TX clock\n"); return ret; } - ret = clk_prepare_enable(dwmac->rgmii_tx_en_clk); + ret = clk_prepare_enable(dwmac->rgmii_tx_clk); if (ret) { - dev_err(&dwmac->pdev->dev, + dev_err(dwmac->dev, "failed to enable the RGMII TX clock\n"); return ret; } + + devm_add_action_or_reset(dwmac->dev, + (void(*)(void *))clk_disable_unprepare, + dwmac->rgmii_tx_clk); break; case PHY_INTERFACE_MODE_RMII: @@ -251,7 +240,7 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac) break; default: - dev_err(&dwmac->pdev->dev, "unsupported phy-mode %s\n", + dev_err(dwmac->dev, "unsupported phy-mode %s\n", phy_modes(dwmac->phy_mode)); return -EINVAL; } @@ -292,7 +281,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) goto err_remove_config_dt; } - dwmac->pdev = pdev; + dwmac->dev = &pdev->dev; dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node); if (dwmac->phy_mode < 0) { dev_err(&pdev->dev, "missing phy-mode property\n"); @@ -317,29 +306,16 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) - goto err_clk_disable; + goto err_remove_config_dt; return 0; -err_clk_disable: - if (phy_interface_mode_is_rgmii(dwmac->phy_mode)) - clk_disable_unprepare(dwmac->rgmii_tx_en_clk); err_remove_config_dt: stmmac_remove_config_dt(pdev, plat_dat); return ret; } -static int meson8b_dwmac_remove(struct platform_device *pdev) -{ - struct meson8b_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); - - if (phy_interface_mode_is_rgmii(dwmac->phy_mode)) - clk_disable_unprepare(dwmac->rgmii_tx_en_clk); - - return stmmac_pltfr_remove(pdev); -} - static const struct of_device_id meson8b_dwmac_match[] = { { .compatible = "amlogic,meson8b-dwmac" }, { .compatible = "amlogic,meson-gxbb-dwmac" }, @@ -349,7 +325,7 @@ MODULE_DEVICE_TABLE(of, meson8b_dwmac_match); static struct platform_driver meson8b_dwmac_driver = { .probe = meson8b_dwmac_probe, - .remove = meson8b_dwmac_remove, + .remove = stmmac_pltfr_remove, .driver = { .name = "meson8b-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 30612497643c..b97a907ea5aa 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -230,4 +230,5 @@ out: /* Registered in net/core/dev.c */ struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, + .async = true, }; diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index ab58224f897f..b3935778b19f 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -75,6 +75,8 @@ #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX 0x0 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN 0x1f +#define DP83867_IO_MUX_CFG_CLK_O_SEL_MASK (0x1f << 8) +#define DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT 8 /* CFG4 bits */ #define DP83867_CFG4_PORT_MIRROR_EN BIT(0) @@ -92,6 +94,7 @@ struct dp83867_private { int io_impedance; int port_mirroring; bool rxctrl_strap_quirk; + int clk_output_sel; }; static int dp83867_ack_interrupt(struct phy_device *phydev) @@ -160,6 +163,14 @@ static int dp83867_of_init(struct phy_device *phydev) dp83867->io_impedance = -EINVAL; /* Optional configuration */ + ret = of_property_read_u32(of_node, "ti,clk-output-sel", + &dp83867->clk_output_sel); + if (ret || dp83867->clk_output_sel > DP83867_CLK_O_SEL_REF_CLK) + /* Keep the default value if ti,clk-output-sel is not set + * or too high + */ + dp83867->clk_output_sel = DP83867_CLK_O_SEL_REF_CLK; + if (of_property_read_bool(of_node, "ti,max-output-impedance")) dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX; else if (of_property_read_bool(of_node, "ti,min-output-impedance")) @@ -295,6 +306,14 @@ static int dp83867_config_init(struct phy_device *phydev) if (dp83867->port_mirroring != DP83867_PORT_MIRROING_KEEP) dp83867_config_port_mirroring(phydev); + /* Clock output selection if muxing property is set */ + if (dp83867->clk_output_sel != DP83867_CLK_O_SEL_REF_CLK) { + val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG); + val &= ~DP83867_IO_MUX_CFG_CLK_O_SEL_MASK; + val |= (dp83867->clk_output_sel << DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT); + phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG, val); + } + return 0; } diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 5aa59f41bf8c..bd89d1c559ce 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -714,7 +714,7 @@ err_put: } static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr, - int *usockaddr_len, int peer) + int peer) { int len = sizeof(struct sockaddr_pppox); struct sockaddr_pppox sp; @@ -726,9 +726,7 @@ static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr, memcpy(uaddr, &sp, len); - *usockaddr_len = len; - - return 0; + return len; } static int pppoe_ioctl(struct socket *sock, unsigned int cmd, diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 6dde9a0cfe76..8249d46a7844 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -483,7 +483,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, } static int pptp_getname(struct socket *sock, struct sockaddr *uaddr, - int *usockaddr_len, int peer) + int peer) { int len = sizeof(struct sockaddr_pppox); struct sockaddr_pppox sp; @@ -496,9 +496,7 @@ static int pptp_getname(struct socket *sock, struct sockaddr *uaddr, memcpy(uaddr, &sp, len); - *usockaddr_len = len; - - return 0; + return len; } static int pptp_release(struct socket *sock) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b52258c327d2..d531954512c7 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -78,6 +78,7 @@ #include <linux/mutex.h> #include <linux/uaccess.h> +#include <linux/proc_fs.h> /* Uncomment to enable debugging */ /* #define TUN_DEBUG 1 */ @@ -2286,11 +2287,67 @@ static int tun_validate(struct nlattr *tb[], struct nlattr *data[], return -EINVAL; } +static size_t tun_get_size(const struct net_device *dev) +{ + BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t)); + BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t)); + + return nla_total_size(sizeof(uid_t)) + /* OWNER */ + nla_total_size(sizeof(gid_t)) + /* GROUP */ + nla_total_size(sizeof(u8)) + /* TYPE */ + nla_total_size(sizeof(u8)) + /* PI */ + nla_total_size(sizeof(u8)) + /* VNET_HDR */ + nla_total_size(sizeof(u8)) + /* PERSIST */ + nla_total_size(sizeof(u8)) + /* MULTI_QUEUE */ + nla_total_size(sizeof(u32)) + /* NUM_QUEUES */ + nla_total_size(sizeof(u32)) + /* NUM_DISABLED_QUEUES */ + 0; +} + +static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + + if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK)) + goto nla_put_failure; + if (uid_valid(tun->owner) && + nla_put_u32(skb, IFLA_TUN_OWNER, + from_kuid_munged(current_user_ns(), tun->owner))) + goto nla_put_failure; + if (gid_valid(tun->group) && + nla_put_u32(skb, IFLA_TUN_GROUP, + from_kgid_munged(current_user_ns(), tun->group))) + goto nla_put_failure; + if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI))) + goto nla_put_failure; + if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR))) + goto nla_put_failure; + if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST))) + goto nla_put_failure; + if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE, + !!(tun->flags & IFF_MULTI_QUEUE))) + goto nla_put_failure; + if (tun->flags & IFF_MULTI_QUEUE) { + if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES, + tun->numdisabled)) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static struct rtnl_link_ops tun_link_ops __read_mostly = { .kind = DRV_NAME, .priv_size = sizeof(struct tun_struct), .setup = tun_setup, .validate = tun_validate, + .get_size = tun_get_size, + .fill_info = tun_fill_info, }; static void tun_sock_write_space(struct sock *sk) @@ -2789,6 +2846,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, struct tun_struct *tun; void __user* argp = (void __user*)arg; struct ifreq ifr; + struct net *net; kuid_t owner; kgid_t group; int sndbuf; @@ -2797,7 +2855,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, int le; int ret; - if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) { + if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || + (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) { if (copy_from_user(&ifr, argp, ifreq_len)) return -EFAULT; } else { @@ -2817,6 +2876,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, rtnl_lock(); tun = tun_get(tfile); + net = sock_net(&tfile->sk); if (cmd == TUNSETIFF) { ret = -EEXIST; if (tun) @@ -2824,7 +2884,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ifr.ifr_name[IFNAMSIZ-1] = '\0'; - ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr); + ret = tun_set_iff(net, file, &ifr); if (ret) goto unlock; @@ -2846,6 +2906,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tfile->ifindex = ifindex; goto unlock; } + if (cmd == SIOCGSKNS) { + ret = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + goto unlock; + + ret = open_related_ns(&net->ns, get_net_ns); + goto unlock; + } ret = -EBADFD; if (!tun) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 139c61c8244a..239c78c53e58 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -736,7 +736,6 @@ static int vrf_rtable_create(struct net_device *dev) return -ENOMEM; rth->dst.output = vrf_output; - rth->rt_table_id = vrf->tb_id; rcu_assign_pointer(vrf->rth, rth); diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 6198559abbd8..0ad00dbf912d 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -732,7 +732,7 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn, struct iscsi_tcp_conn *tcp_conn = conn->dd_data; struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; struct sockaddr_in6 addr; - int rc, len; + int rc; switch(param) { case ISCSI_PARAM_CONN_PORT: @@ -745,12 +745,12 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn, } if (param == ISCSI_PARAM_LOCAL_PORT) rc = kernel_getsockname(tcp_sw_conn->sock, - (struct sockaddr *)&addr, &len); + (struct sockaddr *)&addr); else rc = kernel_getpeername(tcp_sw_conn->sock, - (struct sockaddr *)&addr, &len); + (struct sockaddr *)&addr); spin_unlock_bh(&conn->session->frwd_lock); - if (rc) + if (rc < 0) return rc; return iscsi_conn_get_addr_param((struct sockaddr_storage *) @@ -771,7 +771,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, struct iscsi_tcp_conn *tcp_conn; struct iscsi_sw_tcp_conn *tcp_sw_conn; struct sockaddr_in6 addr; - int rc, len; + int rc; switch (param) { case ISCSI_HOST_PARAM_IPADDRESS: @@ -793,9 +793,9 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, } rc = kernel_getsockname(tcp_sw_conn->sock, - (struct sockaddr *)&addr, &len); + (struct sockaddr *)&addr); spin_unlock_bh(&session->frwd_lock); - if (rc) + if (rc < 0) return rc; return iscsi_conn_get_addr_param((struct sockaddr_storage *) diff --git a/drivers/soc/qcom/qmi_interface.c b/drivers/soc/qcom/qmi_interface.c index 877611d5c42b..321982277697 100644 --- a/drivers/soc/qcom/qmi_interface.c +++ b/drivers/soc/qcom/qmi_interface.c @@ -586,7 +586,6 @@ static struct socket *qmi_sock_create(struct qmi_handle *qmi, struct sockaddr_qrtr *sq) { struct socket *sock; - int sl = sizeof(*sq); int ret; ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM, @@ -594,7 +593,7 @@ static struct socket *qmi_sock_create(struct qmi_handle *qmi, if (ret < 0) return ERR_PTR(ret); - ret = kernel_getsockname(sock, (struct sockaddr *)sq, &sl); + ret = kernel_getsockname(sock, (struct sockaddr *)sq); if (ret < 0) { sock_release(sock); return ERR_PTR(ret); diff --git a/drivers/staging/ipx/af_ipx.c b/drivers/staging/ipx/af_ipx.c index d21a9d128d3e..5703dd176787 100644 --- a/drivers/staging/ipx/af_ipx.c +++ b/drivers/staging/ipx/af_ipx.c @@ -1577,7 +1577,7 @@ out: static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct ipx_address *addr; struct sockaddr_ipx sipx; @@ -1585,8 +1585,6 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, struct ipx_sock *ipxs = ipx_sk(sk); int rc; - *uaddr_len = sizeof(struct sockaddr_ipx); - lock_sock(sk); if (peer) { rc = -ENOTCONN; @@ -1620,7 +1618,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, sipx.sipx_zero = 0; memcpy(uaddr, &sipx, sizeof(sipx)); - rc = 0; + rc = sizeof(struct sockaddr_ipx); out: release_sock(sk); return rc; diff --git a/drivers/staging/irda/net/af_irda.c b/drivers/staging/irda/net/af_irda.c index 2f1e9ab3d6d0..c13553a9ee11 100644 --- a/drivers/staging/irda/net/af_irda.c +++ b/drivers/staging/irda/net/af_irda.c @@ -697,7 +697,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name) * */ static int irda_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sockaddr_irda saddr; struct sock *sk = sock->sk; @@ -720,11 +720,9 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr, pr_debug("%s(), tsap_sel = %#x\n", __func__, saddr.sir_lsap_sel); pr_debug("%s(), addr = %08x\n", __func__, saddr.sir_addr); - /* uaddr_len come to us uninitialised */ - *uaddr_len = sizeof (struct sockaddr_irda); - memcpy(uaddr, &saddr, *uaddr_len); + memcpy(uaddr, &saddr, sizeof (struct sockaddr_irda)); - return 0; + return sizeof (struct sockaddr_irda); } /* diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c index ce93806eefca..1bee667802b0 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-socket.c +++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c @@ -448,14 +448,13 @@ int lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port) { struct sockaddr_in sin; - int len = sizeof(sin); int rc; if (remote) - rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len); + rc = kernel_getpeername(sock, (struct sockaddr *)&sin); else - rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len); - if (rc) { + rc = kernel_getsockname(sock, (struct sockaddr *)&sin); + if (rc < 0) { CERROR("Error %d getting sock %s IP/port\n", rc, remote ? "peer" : "local"); return rc; diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 64c5a57b92e4..99501785cdc1 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1020,7 +1020,7 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) struct socket *new_sock, *sock = np->np_socket; struct sockaddr_in sock_in; struct sockaddr_in6 sock_in6; - int rc, err; + int rc; rc = kernel_accept(sock, &new_sock, 0); if (rc < 0) @@ -1033,8 +1033,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) memset(&sock_in6, 0, sizeof(struct sockaddr_in6)); rc = conn->sock->ops->getname(conn->sock, - (struct sockaddr *)&sock_in6, &err, 1); - if (!rc) { + (struct sockaddr *)&sock_in6, 1); + if (rc >= 0) { if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) { memcpy(&conn->login_sockaddr, &sock_in6, sizeof(sock_in6)); } else { @@ -1047,8 +1047,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) } rc = conn->sock->ops->getname(conn->sock, - (struct sockaddr *)&sock_in6, &err, 0); - if (!rc) { + (struct sockaddr *)&sock_in6, 0); + if (rc >= 0) { if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) { memcpy(&conn->local_sockaddr, &sock_in6, sizeof(sock_in6)); } else { @@ -1063,13 +1063,13 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) memset(&sock_in, 0, sizeof(struct sockaddr_in)); rc = conn->sock->ops->getname(conn->sock, - (struct sockaddr *)&sock_in, &err, 1); - if (!rc) + (struct sockaddr *)&sock_in, 1); + if (rc >= 0) memcpy(&conn->login_sockaddr, &sock_in, sizeof(sock_in)); rc = conn->sock->ops->getname(conn->sock, - (struct sockaddr *)&sock_in, &err, 0); - if (!rc) + (struct sockaddr *)&sock_in, 0); + if (rc >= 0) memcpy(&conn->local_sockaddr, &sock_in, sizeof(sock_in)); } diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 610cba276d47..b5fb56b822fd 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1038,7 +1038,7 @@ static struct socket *get_raw_socket(int fd) struct sockaddr_ll sa; char buf[MAX_ADDR_LEN]; } uaddr; - int uaddr_len = sizeof uaddr, r; + int r; struct socket *sock = sockfd_lookup(fd, &r); if (!sock) @@ -1050,9 +1050,8 @@ static struct socket *get_raw_socket(int fd) goto err; } - r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, - &uaddr_len, 0); - if (r) + r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 0); + if (r < 0) goto err; if (uaddr.sa.sll_family != AF_PACKET) { diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index cff79ea0c01d..5243989a60cc 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -482,7 +482,6 @@ static void lowcomms_error_report(struct sock *sk) { struct connection *con; struct sockaddr_storage saddr; - int buflen; void (*orig_report)(struct sock *) = NULL; read_lock_bh(&sk->sk_callback_lock); @@ -492,7 +491,7 @@ static void lowcomms_error_report(struct sock *sk) orig_report = listen_sock.sk_error_report; if (con->sock == NULL || - kernel_getpeername(con->sock, (struct sockaddr *)&saddr, &buflen)) { + kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) { printk_ratelimited(KERN_ERR "dlm: node %d: socket error " "sending to node %d, port %d, " "sk_err=%d/%d\n", dlm_our_nodeid(), @@ -757,8 +756,8 @@ static int tcp_accept_from_sock(struct connection *con) /* Get the connected socket's peer */ memset(&peeraddr, 0, sizeof(peeraddr)); - if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, - &len, 2)) { + len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2); + if (len < 0) { result = -ECONNABORTED; goto accept_err; } diff --git a/fs/nsfs.c b/fs/nsfs.c index 36b0772701a0..60702d677bd4 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -184,6 +184,7 @@ int open_related_ns(struct ns_common *ns, return fd; } +EXPORT_SYMBOL_GPL(open_related_ns); static long ns_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index eac5140aac47..e5076185cc1e 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1819,7 +1819,7 @@ int o2net_register_hb_callbacks(void) static int o2net_accept_one(struct socket *sock, int *more) { - int ret, slen; + int ret; struct sockaddr_in sin; struct socket *new_sock = NULL; struct o2nm_node *node = NULL; @@ -1864,9 +1864,7 @@ static int o2net_accept_one(struct socket *sock, int *more) goto out; } - slen = sizeof(sin); - ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, - &slen, 1); + ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1); if (ret < 0) goto out; diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 68c06ae7888c..da6f8733c9c5 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -237,6 +237,7 @@ static __net_exit void proc_net_ns_exit(struct net *net) static struct pernet_operations __net_initdata proc_net_ns_ops = { .init = proc_net_ns_init, .exit = proc_net_ns_exit, + .async = true, }; int __init proc_net_init(void) diff --git a/include/dt-bindings/net/ti-dp83867.h b/include/dt-bindings/net/ti-dp83867.h index 172744a72eb7..7b1656427cbe 100644 --- a/include/dt-bindings/net/ti-dp83867.h +++ b/include/dt-bindings/net/ti-dp83867.h @@ -42,4 +42,18 @@ #define DP83867_RGMIIDCTL_3_75_NS 0xe #define DP83867_RGMIIDCTL_4_00_NS 0xf +/* IO_MUX_CFG - Clock output selection */ +#define DP83867_CLK_O_SEL_CHN_A_RCLK 0x0 +#define DP83867_CLK_O_SEL_CHN_B_RCLK 0x1 +#define DP83867_CLK_O_SEL_CHN_C_RCLK 0x2 +#define DP83867_CLK_O_SEL_CHN_D_RCLK 0x3 +#define DP83867_CLK_O_SEL_CHN_A_RCLK_DIV5 0x4 +#define DP83867_CLK_O_SEL_CHN_B_RCLK_DIV5 0x5 +#define DP83867_CLK_O_SEL_CHN_C_RCLK_DIV5 0x6 +#define DP83867_CLK_O_SEL_CHN_D_RCLK_DIV5 0x7 +#define DP83867_CLK_O_SEL_CHN_A_TCLK 0x8 +#define DP83867_CLK_O_SEL_CHN_B_TCLK 0x9 +#define DP83867_CLK_O_SEL_CHN_C_TCLK 0xA +#define DP83867_CLK_O_SEL_CHN_D_TCLK 0xB +#define DP83867_CLK_O_SEL_REF_CLK 0xC #endif diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 4d356e168692..40373920ea58 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -113,10 +113,12 @@ extern void aarp_proto_init(void); /* Inter module exports */ /* Give a device find its atif control structure */ +#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK) static inline struct atalk_iface *atalk_find_dev(struct net_device *dev) { return dev->atalk_ptr; } +#endif extern struct atalk_addr *atalk_find_dev_addr(struct net_device *dev); extern struct net_device *atrtr_get_dev(struct atalk_addr *sa); diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 3ce61342fa31..b0a7f315bfbe 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -136,15 +136,21 @@ enum virtchnl_ops { VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28, VIRTCHNL_OP_REQUEST_QUEUES = 29, + VIRTCHNL_OP_ENABLE_CHANNELS = 30, + VIRTCHNL_OP_DISABLE_CHANNELS = 31, + VIRTCHNL_OP_ADD_CLOUD_FILTER = 32, + VIRTCHNL_OP_DEL_CLOUD_FILTER = 33, }; -/* This macro is used to generate a compilation error if a structure +/* These macros are used to generate compilation errors if a structure/union * is not exactly the correct length. It gives a divide by zero error if the - * structure is not of the correct size, otherwise it creates an enum that is - * never used. + * structure/union is not of the correct size, otherwise it creates an enum + * that is never used. */ #define VIRTCHNL_CHECK_STRUCT_LEN(n, X) enum virtchnl_static_assert_enum_##X \ { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } +#define VIRTCHNL_CHECK_UNION_LEN(n, X) enum virtchnl_static_asset_enum_##X \ + { virtchnl_static_assert_##X = (n)/((sizeof(union X) == (n)) ? 1 : 0) } /* Virtual channel message descriptor. This overlays the admin queue * descriptor. All other data is passed in external buffers. @@ -244,6 +250,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_ENCAP 0X00100000 #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000 +#define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000 #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ VIRTCHNL_VF_OFFLOAD_VLAN | \ @@ -496,6 +503,81 @@ struct virtchnl_rss_hena { VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena); +/* VIRTCHNL_OP_ENABLE_CHANNELS + * VIRTCHNL_OP_DISABLE_CHANNELS + * VF sends these messages to enable or disable channels based on + * the user specified queue count and queue offset for each traffic class. + * This struct encompasses all the information that the PF needs from + * VF to create a channel. + */ +struct virtchnl_channel_info { + u16 count; /* number of queues in a channel */ + u16 offset; /* queues in a channel start from 'offset' */ + u32 pad; + u64 max_tx_rate; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_channel_info); + +struct virtchnl_tc_info { + u32 num_tc; + u32 pad; + struct virtchnl_channel_info list[1]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_tc_info); + +/* VIRTCHNL_ADD_CLOUD_FILTER + * VIRTCHNL_DEL_CLOUD_FILTER + * VF sends these messages to add or delete a cloud filter based on the + * user specified match and action filters. These structures encompass + * all the information that the PF needs from the VF to add/delete a + * cloud filter. + */ + +struct virtchnl_l4_spec { + u8 src_mac[ETH_ALEN]; + u8 dst_mac[ETH_ALEN]; + __be16 vlan_id; + __be16 pad; /* reserved for future use */ + __be32 src_ip[4]; + __be32 dst_ip[4]; + __be16 src_port; + __be16 dst_port; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(52, virtchnl_l4_spec); + +union virtchnl_flow_spec { + struct virtchnl_l4_spec tcp_spec; + u8 buffer[128]; /* reserved for future use */ +}; + +VIRTCHNL_CHECK_UNION_LEN(128, virtchnl_flow_spec); + +enum virtchnl_action { + /* action types */ + VIRTCHNL_ACTION_DROP = 0, + VIRTCHNL_ACTION_TC_REDIRECT, +}; + +enum virtchnl_flow_type { + /* flow types */ + VIRTCHNL_TCP_V4_FLOW = 0, + VIRTCHNL_TCP_V6_FLOW, +}; + +struct virtchnl_filter { + union virtchnl_flow_spec data; + union virtchnl_flow_spec mask; + enum virtchnl_flow_type flow_type; + enum virtchnl_action action; + u32 action_meta; + __u8 field_flags; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter); + /* VIRTCHNL_OP_EVENT * PF sends this message to inform the VF driver of events that may affect it. * No direct response is expected from the VF, though it may generate other @@ -711,6 +793,25 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_REQUEST_QUEUES: valid_len = sizeof(struct virtchnl_vf_res_request); break; + case VIRTCHNL_OP_ENABLE_CHANNELS: + valid_len = sizeof(struct virtchnl_tc_info); + if (msglen >= valid_len) { + struct virtchnl_tc_info *vti = + (struct virtchnl_tc_info *)msg; + valid_len += vti->num_tc * + sizeof(struct virtchnl_channel_info); + if (vti->num_tc == 0) + err_msg_format = true; + } + break; + case VIRTCHNL_OP_DISABLE_CHANNELS: + break; + case VIRTCHNL_OP_ADD_CLOUD_FILTER: + valid_len = sizeof(struct virtchnl_filter); + break; + case VIRTCHNL_OP_DEL_CLOUD_FILTER: + valid_len = sizeof(struct virtchnl_filter); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: diff --git a/include/linux/net.h b/include/linux/net.h index 91216b16feb7..000d1aada74f 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -146,7 +146,7 @@ struct proto_ops { struct socket *newsock, int flags, bool kern); int (*getname) (struct socket *sock, struct sockaddr *addr, - int *sockaddr_len, int peer); + int peer); __poll_t (*poll) (struct file *file, struct socket *sock, struct poll_table_struct *wait); int (*ioctl) (struct socket *sock, unsigned int cmd, @@ -294,10 +294,8 @@ int kernel_listen(struct socket *sock, int backlog); int kernel_accept(struct socket *sock, struct socket **newsock, int flags); int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags); -int kernel_getsockname(struct socket *sock, struct sockaddr *addr, - int *addrlen); -int kernel_getpeername(struct socket *sock, struct sockaddr *addr, - int *addrlen); +int kernel_getsockname(struct socket *sock, struct sockaddr *addr); +int kernel_getpeername(struct socket *sock, struct sockaddr *addr); int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen); int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5eef6c8e2741..dbe6344b727a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1798,11 +1798,17 @@ struct net_device { #if IS_ENABLED(CONFIG_TIPC) struct tipc_bearer __rcu *tipc_ptr; #endif +#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK) void *atalk_ptr; +#endif struct in_device __rcu *ip_ptr; +#if IS_ENABLED(CONFIG_DECNET) struct dn_dev __rcu *dn_ptr; +#endif struct inet6_dev __rcu *ip6_ptr; +#if IS_ENABLED(CONFIG_AX25) void *ax25_ptr; +#endif struct wireless_dev *ieee80211_ptr; struct wpan_dev *ieee802154_ptr; #if IS_ENABLED(CONFIG_MPLS_ROUTING) diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index a079656b614c..059242030631 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -75,5 +75,9 @@ void __init ptp_classifier_init(void); static inline void ptp_classifier_init(void) { } +static inline unsigned int ptp_classify_raw(struct sk_buff *skb) +{ + return PTP_CLASS_NONE; +} #endif #endif /* _PTP_CLASSIFY_H_ */ diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index e6335227b844..6894976b54e3 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -296,13 +296,14 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r) { void *ptr; + /* The READ_ONCE in __ptr_ring_peek guarantees that anyone + * accessing data through the pointer is up to date. Pairs + * with smp_wmb in __ptr_ring_produce. + */ ptr = __ptr_ring_peek(r); if (ptr) __ptr_ring_discard_one(r); - /* Make sure anyone accessing data through the pointer is up to date. */ - /* Pairs with smp_wmb in __ptr_ring_produce. */ - smp_read_barrier_depends(); return ptr; } diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 1fdcde96eb65..e9ee9ad0a681 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -36,6 +36,7 @@ extern int rtnl_is_locked(void); extern wait_queue_head_t netdev_unregistering_wq; extern struct mutex net_mutex; +extern struct rw_semaphore net_sem; #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c1e66bdcf583..9bc1750ca3d3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -466,6 +466,9 @@ struct ubuf_info { #define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) +int mm_account_pinned_pages(struct mmpin *mmp, size_t size); +void mm_unaccount_pinned_pages(struct mmpin *mmp); + struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size); struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, struct ubuf_info *uarg); diff --git a/include/linux/socket.h b/include/linux/socket.h index 9286a5a8c60c..1ce1f768a58c 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -353,4 +353,6 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen unsigned int flags, struct timespec *timeout); extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags); + +extern struct ns_common *get_net_ns(struct ns_common *ns); #endif /* _LINUX_SOCKET_H */ diff --git a/include/net/act_api.h b/include/net/act_api.h index 6ed9692f20bd..9c2f22695025 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -87,12 +87,15 @@ struct tc_action_ops { struct tcf_result *); int (*dump)(struct sk_buff *, struct tc_action *, int, int); void (*cleanup)(struct tc_action *); - int (*lookup)(struct net *, struct tc_action **, u32); + int (*lookup)(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, - int bind); + int bind, struct netlink_ext_ack *extack); int (*walk)(struct net *, struct sk_buff *, - struct netlink_callback *, int, const struct tc_action_ops *); + struct netlink_callback *, int, + const struct tc_action_ops *, + struct netlink_ext_ack *); void (*stats_update)(struct tc_action *, u64, u32, u64); struct net_device *(*get_dev)(const struct tc_action *a); }; @@ -137,7 +140,8 @@ static inline void tc_action_net_exit(struct list_head *net_list, int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops); + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack); int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index); bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, int bind); @@ -162,10 +166,11 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res); int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - struct list_head *actions); + struct list_head *actions, struct netlink_ext_ack *extack); struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind); + char *name, int ovr, int bind, + struct netlink_ext_ack *extack); int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); diff --git a/include/net/ax25.h b/include/net/ax25.h index 76fb39c272a7..c91bc87931c7 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -318,10 +318,12 @@ void ax25_digi_invert(const ax25_digi *, ax25_digi *); extern ax25_dev *ax25_dev_list; extern spinlock_t ax25_dev_lock; +#if IS_ENABLED(CONFIG_AX25) static inline ax25_dev *ax25_dev_ax25dev(struct net_device *dev) { return dev->ax25_ptr; } +#endif ax25_dev *ax25_addr_ax25dev(ax25_address *); void ax25_dev_device_up(struct net_device *); diff --git a/include/net/dsa.h b/include/net/dsa.h index 6cb602dd970c..0ad17b63684d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -19,6 +19,7 @@ #include <linux/workqueue.h> #include <linux/of.h> #include <linux/ethtool.h> +#include <linux/net_tstamp.h> #include <net/devlink.h> #include <net/switchdev.h> @@ -101,6 +102,7 @@ struct dsa_platform_data { }; struct packet_type; +struct dsa_switch; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); @@ -368,6 +370,12 @@ struct dsa_switch_ops { struct ethtool_wolinfo *w); /* + * ethtool timestamp info + */ + int (*get_ts_info)(struct dsa_switch *ds, int port, + struct ethtool_ts_info *ts); + + /* * Suspend and resume */ int (*suspend)(struct dsa_switch *ds); @@ -469,6 +477,18 @@ struct dsa_switch_ops { int port, struct net_device *br); void (*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index, int port, struct net_device *br); + + /* + * PTP functionality + */ + int (*port_hwtstamp_get)(struct dsa_switch *ds, int port, + struct ifreq *ifr); + int (*port_hwtstamp_set)(struct dsa_switch *ds, int port, + struct ifreq *ifr); + bool (*port_txtstamp)(struct dsa_switch *ds, int port, + struct sk_buff *clone, unsigned int type); + bool (*port_rxtstamp)(struct dsa_switch *ds, int port, + struct sk_buff *skb, unsigned int type); }; struct dsa_switch_driver { diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 5a54c9570977..500f81375200 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -32,7 +32,7 @@ int inet_shutdown(struct socket *sock, int how); int inet_listen(struct socket *sock, int backlog); void inet_sock_destruct(struct sock *sk); int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); -int inet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, +int inet_getname(struct socket *sock, struct sockaddr *uaddr, int peer); int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int inet_ctl_sock_create(struct sock **sk, unsigned short family, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8606c9113d3f..7a98cd583c73 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1056,7 +1056,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); int inet6_release(struct socket *sock); int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); -int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, +int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index d747ef975cd8..33fd9ba7e0e5 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -127,6 +127,17 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb); int lwtunnel_input(struct sk_buff *skb); int lwtunnel_xmit(struct sk_buff *skb); +static inline void lwtunnel_set_redirect(struct dst_entry *dst) +{ + if (lwtunnel_output_redirect(dst->lwtstate)) { + dst->lwtstate->orig_output = dst->output; + dst->output = lwtunnel_output; + } + if (lwtunnel_input_redirect(dst->lwtstate)) { + dst->lwtstate->orig_input = dst->input; + dst->input = lwtunnel_input; + } +} #else static inline void lwtstate_free(struct lwtunnel_state *lws) @@ -158,6 +169,10 @@ static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) return false; } +static inline void lwtunnel_set_redirect(struct dst_entry *dst) +{ +} + static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, unsigned int mtu) { diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index f306b2aa15a4..9158ec1ad06f 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -313,6 +313,12 @@ struct pernet_operations { void (*exit_batch)(struct list_head *net_exit_list); unsigned int *id; size_t size; + /* + * Indicates above methods are allowed to be executed in parallel + * with methods of any other pernet_operations, i.e. they are not + * need synchronization via net_mutex. + */ + bool async; }; /* diff --git a/include/net/route.h b/include/net/route.h index 1eb9ce470e25..158833ea7988 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -65,8 +65,6 @@ struct rtable { /* Miscellaneous cached information */ u32 rt_pmtu; - u32 rt_table_id; - struct list_head rt_uncached; struct uncached_list *rt_uncached_list; }; diff --git a/include/net/sock.h b/include/net/sock.h index 169c92afcafa..3aa7b7d6e6c7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1584,7 +1584,7 @@ int sock_no_bind(struct socket *, struct sockaddr *, int); int sock_no_connect(struct socket *, struct sockaddr *, int, int); int sock_no_socketpair(struct socket *, struct socket *); int sock_no_accept(struct socket *, struct socket *, int, bool); -int sock_no_getname(struct socket *, struct sockaddr *, int *, int); +int sock_no_getname(struct socket *, struct sockaddr *, int); __poll_t sock_no_poll(struct file *, struct socket *, struct poll_table_struct *); int sock_no_ioctl(struct socket *, unsigned int, unsigned long); diff --git a/include/net/tcp.h b/include/net/tcp.h index e3fc667f9ac2..92b06c6e7732 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -374,7 +374,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, bool fastopen); + struct request_sock *req, bool fastopen, + bool *lost_race); int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); void tcp_enter_loss(struct sock *sk); diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index dc64cfaf13da..28812eda4209 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -20,11 +20,13 @@ struct sock_extended_err { #define SO_EE_ORIGIN_ICMP6 3 #define SO_EE_ORIGIN_TXSTATUS 4 #define SO_EE_ORIGIN_ZEROCOPY 5 +#define SO_EE_ORIGIN_ZCOOKIE 6 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS #define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1)) #define SO_EE_CODE_ZEROCOPY_COPIED 1 +#define SO_EE_ORIGIN_MAX_ZCOOKIES 8 /** * struct scm_timestamping - timestamps exposed through cmsg diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6d9447700e18..11d0c0ea2bfa 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -941,4 +941,22 @@ enum { IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */ }; +/* tun section */ + +enum { + IFLA_TUN_UNSPEC, + IFLA_TUN_OWNER, + IFLA_TUN_GROUP, + IFLA_TUN_TYPE, + IFLA_TUN_PI, + IFLA_TUN_VNET_HDR, + IFLA_TUN_PERSIST, + IFLA_TUN_MULTI_QUEUE, + IFLA_TUN_NUM_QUEUES, + IFLA_TUN_NUM_DISABLED_QUEUES, + __IFLA_TUN_MAX, +}; + +#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index e71d4491f225..12e3bca32cad 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -103,6 +103,7 @@ #define RDS_CMSG_MASKED_ATOMIC_FADD 8 #define RDS_CMSG_MASKED_ATOMIC_CSWP 9 #define RDS_CMSG_RXPATH_LATENCY 11 +#define RDS_CMSG_ZCOPY_COOKIE 12 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 diff --git a/kernel/audit.c b/kernel/audit.c index 227db99b0f19..5e49b614d0e6 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1526,6 +1526,7 @@ static struct pernet_operations audit_net_ops __net_initdata = { .exit = audit_net_exit, .id = &audit_net_id, .size = sizeof(struct audit_net), + .async = true, }; /* Initialize audit support at boot time. */ diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 9fe6ec8fda28..9539d7ab3ea8 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -650,6 +650,7 @@ found: static struct pernet_operations uevent_net_ops = { .init = uevent_net_init, .exit = uevent_net_exit, + .async = true, }; static int __init kobject_uevent_init(void) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 03a9fc0771c0..9b6bc5abe946 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1238,7 +1238,7 @@ out: * fields into the sockaddr. */ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sockaddr_at sat; struct sock *sk = sock->sk; @@ -1251,7 +1251,6 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, if (atalk_autobind(sk) < 0) goto out; - *uaddr_len = sizeof(struct sockaddr_at); memset(&sat, 0, sizeof(sat)); if (peer) { @@ -1268,9 +1267,9 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, sat.sat_port = at->src_port; } - err = 0; sat.sat_family = AF_APPLETALK; memcpy(uaddr, &sat, sizeof(sat)); + err = sizeof(struct sockaddr_at); out: release_sock(sk); diff --git a/net/atm/pvc.c b/net/atm/pvc.c index e1140b3bdcaa..2cb10af16afc 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -87,21 +87,20 @@ static int pvc_getsockopt(struct socket *sock, int level, int optname, } static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr, - int *sockaddr_len, int peer) + int peer) { struct sockaddr_atmpvc *addr; struct atm_vcc *vcc = ATM_SD(sock); if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags)) return -ENOTCONN; - *sockaddr_len = sizeof(struct sockaddr_atmpvc); addr = (struct sockaddr_atmpvc *)sockaddr; memset(addr, 0, sizeof(*addr)); addr->sap_family = AF_ATMPVC; addr->sap_addr.itf = vcc->dev->number; addr->sap_addr.vpi = vcc->vpi; addr->sap_addr.vci = vcc->vci; - return 0; + return sizeof(struct sockaddr_atmpvc); } static const struct proto_ops pvc_proto_ops = { diff --git a/net/atm/svc.c b/net/atm/svc.c index c458adcbc177..2f91b766ac42 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -419,15 +419,14 @@ out: } static int svc_getname(struct socket *sock, struct sockaddr *sockaddr, - int *sockaddr_len, int peer) + int peer) { struct sockaddr_atmsvc *addr; - *sockaddr_len = sizeof(struct sockaddr_atmsvc); addr = (struct sockaddr_atmsvc *) sockaddr; memcpy(addr, peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local, sizeof(struct sockaddr_atmsvc)); - return 0; + return sizeof(struct sockaddr_atmsvc); } int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 47fdd399626b..c8319ed48485 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1388,7 +1388,7 @@ out: } static int ax25_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr; struct sock *sk = sock->sk; @@ -1427,7 +1427,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr, fsa->fsa_digipeater[0] = null_ax25_address; } } - *uaddr_len = sizeof (struct full_sockaddr_ax25); + err = sizeof (struct full_sockaddr_ax25); out: release_sock(sk); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 3394e6791673..66c0781773df 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -934,8 +934,8 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable) /* Slave connection state and connectable mode bit 38 * and scannable bit 21. */ - if (connectable && (!(hdev->le_states[4] & 0x01) || - !(hdev->le_states[2] & 0x40))) + if (connectable && (!(hdev->le_states[4] & 0x40) || + !(hdev->le_states[2] & 0x20))) return false; } @@ -948,7 +948,7 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable) /* Master connection state and connectable mode bit 35 and * scannable 19. */ - if (connectable && (!(hdev->le_states[4] & 0x10) || + if (connectable && (!(hdev->le_states[4] & 0x08) || !(hdev->le_states[2] & 0x08))) return false; } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 923e9a271872..1506e1632394 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1340,7 +1340,7 @@ done: } static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, - int *addr_len, int peer) + int peer) { struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr; struct sock *sk = sock->sk; @@ -1360,10 +1360,10 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, goto done; } - *addr_len = sizeof(*haddr); haddr->hci_family = AF_BLUETOOTH; haddr->hci_dev = hdev->id; haddr->hci_channel= hci_pi(sk)->channel; + err = sizeof(*haddr); done: release_sock(sk); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 67a8642f57ea..686bdc6b35b0 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -358,7 +358,7 @@ done: } static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, - int *len, int peer) + int peer) { struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; struct sock *sk = sock->sk; @@ -373,7 +373,6 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, memset(la, 0, sizeof(struct sockaddr_l2)); addr->sa_family = AF_BLUETOOTH; - *len = sizeof(struct sockaddr_l2); la->l2_psm = chan->psm; @@ -387,7 +386,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, la->l2_bdaddr_type = chan->src_type; } - return 0; + return sizeof(struct sockaddr_l2); } static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 1aaccf637479..93a3b219db09 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -533,7 +533,7 @@ done: return err; } -static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) +static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int peer) { struct sockaddr_rc *sa = (struct sockaddr_rc *) addr; struct sock *sk = sock->sk; @@ -552,8 +552,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int * else bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src); - *len = sizeof(struct sockaddr_rc); - return 0; + return sizeof(struct sockaddr_rc); } static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 08df57665e1f..413b8ee49fec 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -680,7 +680,7 @@ done: } static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, - int *len, int peer) + int peer) { struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; struct sock *sk = sock->sk; @@ -688,14 +688,13 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, BT_DBG("sock %p, sk %p", sock, sk); addr->sa_family = AF_BLUETOOTH; - *len = sizeof(struct sockaddr_sco); if (peer) bacpy(&sa->sco_bdaddr, &sco_pi(sk)->dst); else bacpy(&sa->sco_bdaddr, &sco_pi(sk)->src); - return 0; + return sizeof(struct sockaddr_sco); } static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, diff --git a/net/can/raw.c b/net/can/raw.c index f2ecc43376a1..1051eee82581 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -470,7 +470,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) } static int raw_getname(struct socket *sock, struct sockaddr *uaddr, - int *len, int peer) + int peer) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; @@ -483,9 +483,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr, addr->can_family = AF_CAN; addr->can_ifindex = ro->ifindex; - *len = sizeof(*addr); - - return 0; + return sizeof(*addr); } static int raw_setsockopt(struct socket *sock, int level, int optname, diff --git a/net/core/dev.c b/net/core/dev.c index d4362befe7e2..5bdcc5a161fe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8141,8 +8141,9 @@ void netdev_run_todo(void) BUG_ON(!list_empty(&dev->ptype_specific)); WARN_ON(rcu_access_pointer(dev->ip_ptr)); WARN_ON(rcu_access_pointer(dev->ip6_ptr)); +#if IS_ENABLED(CONFIG_DECNET) WARN_ON(dev->dn_ptr); - +#endif if (dev->priv_destructor) dev->priv_destructor(dev); if (dev->needs_free_netdev) @@ -8840,6 +8841,7 @@ static void __net_exit netdev_exit(struct net *net) static struct pernet_operations __net_initdata netdev_net_ops = { .init = netdev_init, .exit = netdev_exit, + .async = true, }; static void __net_exit default_device_exit(struct net *net) @@ -8940,6 +8942,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) static struct pernet_operations __net_initdata default_device_ops = { .exit = default_device_exit, .exit_batch = default_device_exit_batch, + .async = true, }; /* diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 0c048bdeb016..5ace0705a3f9 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -171,6 +171,7 @@ static void __net_exit fib_notifier_net_exit(struct net *net) static struct pernet_operations fib_notifier_net_ops = { .init = fib_notifier_net_init, .exit = fib_notifier_net_exit, + .async = true, }; static int __init fib_notifier_init(void) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 98e1066c3d55..cb071b8e8d17 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -1030,6 +1030,7 @@ static void __net_exit fib_rules_net_exit(struct net *net) static struct pernet_operations fib_rules_net_ops = { .init = fib_rules_net_init, .exit = fib_rules_net_exit, + .async = true, }; static int __init fib_rules_init(void) diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index e010bb800d7b..65b51e778782 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -349,6 +349,7 @@ static void __net_exit dev_proc_net_exit(struct net *net) static struct pernet_operations __net_initdata dev_proc_ops = { .init = dev_proc_net_init, .exit = dev_proc_net_exit, + .async = true, }; static int dev_mc_seq_show(struct seq_file *seq, void *v) @@ -405,6 +406,7 @@ static void __net_exit dev_mc_net_exit(struct net *net) static struct pernet_operations __net_initdata dev_mc_net_ops = { .init = dev_mc_net_init, .exit = dev_mc_net_exit, + .async = true, }; int __init dev_proc_init(void) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 3cad5f51afd3..bcab9a938d6f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -29,6 +29,7 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; +/* Used only if there are !async pernet_operations registered */ DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); @@ -41,6 +42,12 @@ struct net init_net = { EXPORT_SYMBOL(init_net); static bool init_net_initialized; +static unsigned nr_sync_pernet_ops; +/* + * net_sem: protects: pernet_list, net_generic_ids, nr_sync_pernet_ops, + * init_net_initialized and first_device pointer. + */ +DECLARE_RWSEM(net_sem); #define MIN_PERNET_OPS_ID \ ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) @@ -65,11 +72,10 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) { struct net_generic *ng, *old_ng; - BUG_ON(!mutex_is_locked(&net_mutex)); BUG_ON(id < MIN_PERNET_OPS_ID); old_ng = rcu_dereference_protected(net->gen, - lockdep_is_held(&net_mutex)); + lockdep_is_held(&net_sem)); if (old_ng->s.len > id) { old_ng->ptr[id] = data; return 0; @@ -286,7 +292,7 @@ struct net *get_net_ns_by_id(struct net *net, int id) */ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { - /* Must be called with net_mutex held */ + /* Must be called with net_sem held */ const struct pernet_operations *ops, *saved_ops; int error = 0; LIST_HEAD(net_exit_list); @@ -303,6 +309,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) if (error < 0) goto out_undo; } + rtnl_lock(); + list_add_tail_rcu(&net->list, &net_namespace_list); + rtnl_unlock(); out: return error; @@ -331,6 +340,7 @@ static int __net_init net_defaults_init_net(struct net *net) static struct pernet_operations net_defaults_ops = { .init = net_defaults_init_net, + .async = true, }; static __init int net_defaults_init(void) @@ -408,32 +418,32 @@ struct net *copy_net_ns(unsigned long flags, net = net_alloc(); if (!net) { - dec_net_namespaces(ucounts); - return ERR_PTR(-ENOMEM); + rv = -ENOMEM; + goto dec_ucounts; } - + refcount_set(&net->passive, 1); + net->ucounts = ucounts; get_user_ns(user_ns); - rv = mutex_lock_killable(&net_mutex); - if (rv < 0) { - net_free(net); - dec_net_namespaces(ucounts); - put_user_ns(user_ns); - return ERR_PTR(rv); + rv = down_read_killable(&net_sem); + if (rv < 0) + goto put_userns; + if (nr_sync_pernet_ops) { + rv = mutex_lock_killable(&net_mutex); + if (rv < 0) + goto up_read; } - - net->ucounts = ucounts; rv = setup_net(net, user_ns); - if (rv == 0) { - rtnl_lock(); - list_add_tail_rcu(&net->list, &net_namespace_list); - rtnl_unlock(); - } - mutex_unlock(&net_mutex); + if (nr_sync_pernet_ops) + mutex_unlock(&net_mutex); +up_read: + up_read(&net_sem); if (rv < 0) { - dec_net_namespaces(ucounts); +put_userns: put_user_ns(user_ns); net_drop_ns(net); +dec_ucounts: + dec_net_namespaces(ucounts); return ERR_PTR(rv); } return net; @@ -481,7 +491,9 @@ static void cleanup_net(struct work_struct *work) list_replace_init(&cleanup_list, &net_kill_list); spin_unlock_irq(&cleanup_list_lock); - mutex_lock(&net_mutex); + down_read(&net_sem); + if (nr_sync_pernet_ops) + mutex_lock(&net_mutex); /* Don't let anyone else find us. */ rtnl_lock(); @@ -516,11 +528,14 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); + if (nr_sync_pernet_ops) + mutex_unlock(&net_mutex); + /* Free the net generic variables */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list); - mutex_unlock(&net_mutex); + up_read(&net_sem); /* Ensure there are no outstanding rcu callbacks using this * network namespace. @@ -547,8 +562,10 @@ static void cleanup_net(struct work_struct *work) */ void net_ns_barrier(void) { + down_write(&net_sem); mutex_lock(&net_mutex); mutex_unlock(&net_mutex); + up_write(&net_sem); } EXPORT_SYMBOL(net_ns_barrier); @@ -633,6 +650,7 @@ static __net_exit void net_ns_net_exit(struct net *net) static struct pernet_operations __net_initdata net_ns_ops = { .init = net_ns_net_init, .exit = net_ns_net_exit, + .async = true, }; static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { @@ -875,17 +893,12 @@ static int __init net_ns_init(void) rcu_assign_pointer(init_net.gen, ng); - mutex_lock(&net_mutex); + down_write(&net_sem); if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); init_net_initialized = true; - - rtnl_lock(); - list_add_tail_rcu(&init_net.list, &net_namespace_list); - rtnl_unlock(); - - mutex_unlock(&net_mutex); + up_write(&net_sem); register_pernet_subsys(&net_ns_ops); @@ -989,6 +1002,9 @@ again: rcu_barrier(); if (ops->id) ida_remove(&net_generic_ids, *ops->id); + } else if (!ops->async) { + pr_info_once("Pernet operations %ps are sync.\n", ops); + nr_sync_pernet_ops++; } return error; @@ -996,7 +1012,8 @@ again: static void unregister_pernet_operations(struct pernet_operations *ops) { - + if (!ops->async) + BUG_ON(nr_sync_pernet_ops-- == 0); __unregister_pernet_operations(ops); rcu_barrier(); if (ops->id) @@ -1025,9 +1042,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops) int register_pernet_subsys(struct pernet_operations *ops) { int error; - mutex_lock(&net_mutex); + down_write(&net_sem); error = register_pernet_operations(first_device, ops); - mutex_unlock(&net_mutex); + up_write(&net_sem); return error; } EXPORT_SYMBOL_GPL(register_pernet_subsys); @@ -1043,9 +1060,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys); */ void unregister_pernet_subsys(struct pernet_operations *ops) { - mutex_lock(&net_mutex); + down_write(&net_sem); unregister_pernet_operations(ops); - mutex_unlock(&net_mutex); + up_write(&net_sem); } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); @@ -1071,11 +1088,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys); int register_pernet_device(struct pernet_operations *ops) { int error; - mutex_lock(&net_mutex); + down_write(&net_sem); error = register_pernet_operations(&pernet_list, ops); if (!error && (first_device == &pernet_list)) first_device = &ops->list; - mutex_unlock(&net_mutex); + up_write(&net_sem); return error; } EXPORT_SYMBOL_GPL(register_pernet_device); @@ -1091,11 +1108,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device); */ void unregister_pernet_device(struct pernet_operations *ops) { - mutex_lock(&net_mutex); + down_write(&net_sem); if (&ops->list == first_device) first_device = first_device->next; unregister_pernet_operations(ops); - mutex_unlock(&net_mutex); + up_write(&net_sem); } EXPORT_SYMBOL_GPL(unregister_pernet_device); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bc290413a49d..67f375cfb982 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -454,11 +454,11 @@ static void rtnl_lock_unregistering_all(void) void rtnl_link_unregister(struct rtnl_link_ops *ops) { /* Close the race with cleanup_net() */ - mutex_lock(&net_mutex); + down_write(&net_sem); rtnl_lock_unregistering_all(); __rtnl_link_unregister(ops); rtnl_unlock(); - mutex_unlock(&net_mutex); + up_write(&net_sem); } EXPORT_SYMBOL_GPL(rtnl_link_unregister); @@ -4724,6 +4724,7 @@ static void __net_exit rtnetlink_net_exit(struct net *net) static struct pernet_operations rtnetlink_net_ops = { .init = rtnetlink_net_init, .exit = rtnetlink_net_exit, + .async = true, }; void __init rtnetlink_init(void) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 09bd89c90a71..1a7485a2cdfa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -890,7 +890,7 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); -static int mm_account_pinned_pages(struct mmpin *mmp, size_t size) +int mm_account_pinned_pages(struct mmpin *mmp, size_t size) { unsigned long max_pg, num_pg, new_pg, old_pg; struct user_struct *user; @@ -919,14 +919,16 @@ static int mm_account_pinned_pages(struct mmpin *mmp, size_t size) return 0; } +EXPORT_SYMBOL_GPL(mm_account_pinned_pages); -static void mm_unaccount_pinned_pages(struct mmpin *mmp) +void mm_unaccount_pinned_pages(struct mmpin *mmp) { if (mmp->user) { atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm); free_uid(mmp->user); } } +EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages); struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size) { diff --git a/net/core/sock.c b/net/core/sock.c index c501499a04fe..a1fa4a548f1b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1049,18 +1049,21 @@ set_rcvbuf: break; case SO_ZEROCOPY: - if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) + if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { + if (sk->sk_protocol != IPPROTO_TCP) + ret = -ENOTSUPP; + else if (sk->sk_state != TCP_CLOSE) + ret = -EBUSY; + } else if (sk->sk_family != PF_RDS) { ret = -ENOTSUPP; - else if (sk->sk_protocol != IPPROTO_TCP) - ret = -ENOTSUPP; - else if (sk->sk_state != TCP_CLOSE) - ret = -EBUSY; - else if (val < 0 || val > 1) - ret = -EINVAL; - else - sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool); - break; - + } + if (!ret) { + if (val < 0 || val > 1) + ret = -EINVAL; + else + sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool); + break; + } default: ret = -ENOPROTOOPT; break; @@ -1274,7 +1277,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname, { char address[128]; - if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2)) + lv = sock->ops->getname(sock, (struct sockaddr *)address, 2); + if (lv < 0) return -ENOTCONN; if (lv < len) return -EINVAL; @@ -2497,7 +2501,7 @@ int sock_no_accept(struct socket *sock, struct socket *newsock, int flags, EXPORT_SYMBOL(sock_no_accept); int sock_no_getname(struct socket *sock, struct sockaddr *saddr, - int *len, int peer) + int peer) { return -EOPNOTSUPP; } @@ -3111,6 +3115,7 @@ static void __net_exit sock_inuse_exit_net(struct net *net) static struct pernet_operations net_inuse_ops = { .init = sock_inuse_init_net, .exit = sock_inuse_exit_net, + .async = true, }; static __init int net_inuse_init(void) @@ -3384,6 +3389,7 @@ static __net_exit void proto_exit_net(struct net *net) static __net_initdata struct pernet_operations proto_net_ops = { .init = proto_init_net, .exit = proto_exit_net, + .async = true, }; static int __init proto_init(void) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 146b50e30659..aee5642affd9 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -328,6 +328,7 @@ static void __net_exit diag_net_exit(struct net *net) static struct pernet_operations diag_net_ops = { .init = diag_net_init, .exit = diag_net_exit, + .async = true, }; static int __init sock_diag_init(void) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f2d0462611c3..d714f65782b7 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -572,6 +572,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net) static __net_initdata struct pernet_operations sysctl_core_ops = { .init = sysctl_core_net_init, .exit = sysctl_core_net_exit, + .async = true, }; static __init int sysctl_core_init(void) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 791aff68af88..2ee8306c23e3 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1180,14 +1180,12 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags, } -static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len,int peer) +static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer) { struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr; struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); - *uaddr_len = sizeof(struct sockaddr_dn); - lock_sock(sk); if (peer) { @@ -1205,7 +1203,7 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len release_sock(sk); - return 0; + return sizeof(struct sockaddr_dn); } diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 6a9d0f50fbee..e63c554e0623 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -23,6 +23,7 @@ #include <linux/netdevice.h> #include <linux/sysfs.h> #include <linux/phy_fixed.h> +#include <linux/ptp_classify.h> #include <linux/gpio/consumer.h> #include <linux/etherdevice.h> @@ -122,6 +123,38 @@ struct net_device *dsa_dev_to_net_device(struct device *dev) } EXPORT_SYMBOL_GPL(dsa_dev_to_net_device); +/* Determine if we should defer delivery of skb until we have a rx timestamp. + * + * Called from dsa_switch_rcv. For now, this will only work if tagging is + * enabled on the switch. Normally the MAC driver would retrieve the hardware + * timestamp when it reads the packet out of the hardware. However in a DSA + * switch, the DSA driver owning the interface to which the packet is + * delivered is never notified unless we do so here. + */ +static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p, + struct sk_buff *skb) +{ + struct dsa_switch *ds = p->dp->ds; + unsigned int type; + + if (skb_headroom(skb) < ETH_HLEN) + return false; + + __skb_push(skb, ETH_HLEN); + + type = ptp_classify_raw(skb); + + __skb_pull(skb, ETH_HLEN); + + if (type == PTP_CLASS_NONE) + return false; + + if (likely(ds->ops->port_rxtstamp)) + return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type); + + return false; +} + static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *unused) { @@ -157,6 +190,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, s->rx_bytes += skb->len; u64_stats_update_end(&s->syncp); + if (dsa_skb_defer_rx_timestamp(p, skb)) + return 0; + netif_receive_skb(skb); return 0; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index f52307296de4..3376dad6dcfd 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -21,6 +21,7 @@ #include <net/tc_act/tc_mirred.h> #include <linux/if_bridge.h> #include <linux/netpoll.h> +#include <linux/ptp_classify.h> #include "dsa_priv.h" @@ -255,6 +256,22 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->dp->ds; + int port = p->dp->index; + + /* Pass through to switch driver if it supports timestamping */ + switch (cmd) { + case SIOCGHWTSTAMP: + if (ds->ops->port_hwtstamp_get) + return ds->ops->port_hwtstamp_get(ds, port, ifr); + break; + case SIOCSHWTSTAMP: + if (ds->ops->port_hwtstamp_set) + return ds->ops->port_hwtstamp_set(ds, port, ifr); + break; + } + if (!dev->phydev) return -ENODEV; @@ -385,6 +402,30 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev, return NETDEV_TX_OK; } +static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p, + struct sk_buff *skb) +{ + struct dsa_switch *ds = p->dp->ds; + struct sk_buff *clone; + unsigned int type; + + type = ptp_classify_raw(skb); + if (type == PTP_CLASS_NONE) + return; + + if (!ds->ops->port_txtstamp) + return; + + clone = skb_clone_sk(skb); + if (!clone) + return; + + if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type)) + return; + + kfree_skb(clone); +} + static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -397,6 +438,11 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) s->tx_bytes += skb->len; u64_stats_update_end(&s->syncp); + /* Identify PTP protocol packets, clone them, and pass them to the + * switch driver + */ + dsa_skb_tx_timestamp(p, skb); + /* Transmit function may have to reallocate the original SKB, * in which case it must have freed it. Only free it here on error. */ @@ -918,6 +964,18 @@ static int dsa_slave_set_rxnfc(struct net_device *dev, return ds->ops->set_rxnfc(ds, dp->index, nfc); } +static int dsa_slave_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *ts) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->dp->ds; + + if (!ds->ops->get_ts_info) + return -EOPNOTSUPP; + + return ds->ops->get_ts_info(ds, p->dp->index, ts); +} + static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_drvinfo = dsa_slave_get_drvinfo, .get_regs_len = dsa_slave_get_regs_len, @@ -938,6 +996,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .set_link_ksettings = phy_ethtool_set_link_ksettings, .get_rxnfc = dsa_slave_get_rxnfc, .set_rxnfc = dsa_slave_set_rxnfc, + .get_ts_info = dsa_slave_get_ts_info, }; /* legacy way, bypassing the bridge *****************************************/ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e4329e161943..e8c7fad8c329 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -723,7 +723,7 @@ EXPORT_SYMBOL(inet_accept); * This does both peername and sockname. */ int inet_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); @@ -745,8 +745,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin_addr.s_addr = addr; } memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - *uaddr_len = sizeof(*sin); - return 0; + return sizeof(*sin); } EXPORT_SYMBOL(inet_getname); @@ -1736,6 +1735,7 @@ static __net_exit void ipv4_mib_exit_net(struct net *net) static __net_initdata struct pernet_operations ipv4_mib_ops = { .init = ipv4_mib_init_net, .exit = ipv4_mib_exit_net, + .async = true, }; static int __init init_ipv4_mibs(void) @@ -1789,6 +1789,7 @@ static __net_exit void inet_exit_net(struct net *net) static __net_initdata struct pernet_operations af_inet_ops = { .init = inet_init_net, .exit = inet_exit_net, + .async = true, }; static int __init init_inet_pernet_ops(void) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f28f06c91ead..7dc9de8444a9 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1447,6 +1447,7 @@ static void __net_exit arp_net_exit(struct net *net) static struct pernet_operations arp_net_ops = { .init = arp_net_init, .exit = arp_net_exit, + .async = true, }; static int __init arp_proc_init(void) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 40f001782c1b..5ae0d1f097ca 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2469,6 +2469,7 @@ static __net_exit void devinet_exit_net(struct net *net) static __net_initdata struct pernet_operations devinet_ops = { .init = devinet_init_net, .exit = devinet_exit_net, + .async = true, }; static struct rtnl_af_ops inet_af_ops __read_mostly = { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f05afaf3235c..ac71c3d496c0 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1362,6 +1362,7 @@ static void __net_exit fib_net_exit(struct net *net) static struct pernet_operations fib_net_ops = { .init = fib_net_init, .exit = fib_net_exit, + .async = true, }; void __init ip_fib_init(void) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 7d36a950d961..cd46d7666598 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1765,13 +1765,11 @@ void fib_select_multipath(struct fib_result *res, int hash) void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb) { - bool oif_check; - - oif_check = (fl4->flowi4_oif == 0 || - fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF); + if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) + goto check_saddr; #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res->fi->fib_nhs > 1 && oif_check) { + if (res->fi->fib_nhs > 1) { int h = fib_multipath_hash(res->fi, fl4, skb); fib_select_multipath(res, h); @@ -1780,10 +1778,10 @@ void fib_select_path(struct net *net, struct fib_result *res, #endif if (!res->prefixlen && res->table->tb_num_default > 1 && - res->type == RTN_UNICAST && oif_check) + res->type == RTN_UNICAST) fib_select_default(fl4, res); +check_saddr: if (!fl4->saddr) fl4->saddr = FIB_RES_PREFSRC(net, *res); } -EXPORT_SYMBOL_GPL(fib_select_path); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1617604c9284..cc56efa64d5c 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1257,6 +1257,7 @@ fail: static struct pernet_operations __net_initdata icmp_sk_ops = { .init = icmp_sk_init, .exit = icmp_sk_exit, + .async = true, }; int __init icmp_init(void) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f2402581fef1..c2743763777e 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -3028,6 +3028,7 @@ static void __net_exit igmp_net_exit(struct net *net) static struct pernet_operations igmp_net_ops = { .init = igmp_net_init, .exit = igmp_net_exit, + .async = true, }; #endif diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index bbf1b94942c0..5e843ae5e468 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -885,6 +885,7 @@ static void __net_exit ipv4_frags_exit_net(struct net *net) static struct pernet_operations ip4_frags_ops = { .init = ipv4_frags_init_net, .exit = ipv4_frags_exit_net, + .async = true, }; void __init ipfrag_init(void) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 008be04ac1cc..9dca0fb8c482 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -258,7 +258,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); if (!ipv6_addr_v4mapped(&src_info->ipi6_addr)) return -EINVAL; - ipc->oif = src_info->ipi6_ifindex; + if (src_info->ipi6_ifindex) + ipc->oif = src_info->ipi6_ifindex; ipc->addr = src_info->ipi6_addr.s6_addr32[3]; continue; } @@ -288,7 +289,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) return -EINVAL; info = (struct in_pktinfo *)CMSG_DATA(cmsg); - ipc->oif = info->ipi_ifindex; + if (info->ipi_ifindex) + ipc->oif = info->ipi_ifindex; ipc->addr = info->ipi_spec_dst.s_addr; break; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b05689bbba31..7c7ac9d32e77 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -3327,6 +3327,7 @@ static void __net_exit ipmr_net_exit(struct net *net) static struct pernet_operations ipmr_net_ops = { .init = ipmr_net_init, .exit = ipmr_net_exit, + .async = true, }; int __init ip_mr_init(void) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 9a71f3149507..39a7cf9160e6 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1911,6 +1911,7 @@ static void __net_exit ip_tables_net_exit(struct net *net) static struct pernet_operations ip_tables_net_ops = { .init = ip_tables_net_init, .exit = ip_tables_net_exit, + .async = true, }; static int __init ip_tables_init(void) diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 9ac92ea7b93c..c1c136a93911 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -87,6 +87,7 @@ static void __net_exit iptable_filter_net_exit(struct net *net) static struct pernet_operations iptable_filter_net_ops = { .init = iptable_filter_net_init, .exit = iptable_filter_net_exit, + .async = true, }; static int __init iptable_filter_init(void) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index b8f0db54b197..0164def9c808 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1204,6 +1204,7 @@ static void __net_exit ping_v4_proc_exit_net(struct net *net) static struct pernet_operations ping_v4_net_ops = { .init = ping_v4_proc_init_net, .exit = ping_v4_proc_exit_net, + .async = true, }; int __init ping_proc_init(void) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index dc5edc8f7564..fdabc70283b6 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -549,6 +549,7 @@ static __net_exit void ip_proc_exit_net(struct net *net) static __net_initdata struct pernet_operations ip_proc_ops = { .init = ip_proc_init_net, .exit = ip_proc_exit_net, + .async = true, }; int __init ip_misc_proc_init(void) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 9b367fc48d7d..54648d20bf0f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -1156,6 +1156,7 @@ static __net_exit void raw_exit_net(struct net *net) static __net_initdata struct pernet_operations raw_net_ops = { .init = raw_init_net, .exit = raw_exit_net, + .async = true, }; int __init raw_proc_init(void) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 49cc1c1df1ba..5ca7415cd48c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -417,6 +417,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) static struct pernet_operations ip_rt_proc_ops __net_initdata = { .init = ip_rt_do_proc_init, .exit = ip_rt_do_proc_exit, + .async = true, }; static int __init ip_rt_proc_init(void) @@ -1508,7 +1509,6 @@ struct rtable *rt_dst_alloc(struct net_device *dev, rt->rt_pmtu = 0; rt->rt_gateway = 0; rt->rt_uses_gateway = 0; - rt->rt_table_id = 0; INIT_LIST_HEAD(&rt->rt_uncached); rt->dst.output = ip_output; @@ -1644,19 +1644,6 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_unlock_bh(&fnhe_lock); } -static void set_lwt_redirect(struct rtable *rth) -{ - if (lwtunnel_output_redirect(rth->dst.lwtstate)) { - rth->dst.lwtstate->orig_output = rth->dst.output; - rth->dst.output = lwtunnel_output; - } - - if (lwtunnel_input_redirect(rth->dst.lwtstate)) { - rth->dst.lwtstate->orig_input = rth->dst.input; - rth->dst.input = lwtunnel_input; - } -} - /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, @@ -1739,15 +1726,13 @@ rt_cache: } rth->rt_is_input = 1; - if (res->table) - rth->rt_table_id = res->table->tb_id; RT_CACHE_STAT_INC(in_slow_tot); rth->dst.input = ip_forward; rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag, do_cache); - set_lwt_redirect(rth); + lwtunnel_set_redirect(&rth->dst); skb_dst_set(skb, &rth->dst); out: err = 0; @@ -1846,7 +1831,6 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, return mhash >> 1; } -EXPORT_SYMBOL_GPL(fib_multipath_hash); #endif /* CONFIG_IP_ROUTE_MULTIPATH */ static int ip_mkroute_input(struct sk_buff *skb, @@ -2014,8 +1998,6 @@ local_input: rth->dst.tclassid = itag; #endif rth->rt_is_input = 1; - if (res->table) - rth->rt_table_id = res->table->tb_id; RT_CACHE_STAT_INC(in_slow_tot); if (res->type == RTN_UNREACHABLE) { @@ -2244,8 +2226,6 @@ add: return ERR_PTR(-ENOBUFS); rth->rt_iif = orig_oif; - if (res->table) - rth->rt_table_id = res->table->tb_id; RT_CACHE_STAT_INC(out_slow_tot); @@ -2267,7 +2247,7 @@ add: } rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache); - set_lwt_redirect(rth); + lwtunnel_set_redirect(&rth->dst); return rth; } @@ -2775,7 +2755,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, rt->rt_flags |= RTCF_NOTIFY; if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) - table_id = rt->rt_table_id; + table_id = res.table ? res.table->tb_id : 0; if (rtm->rtm_flags & RTM_F_FIB_MATCH) { if (!res.fi) { @@ -2994,6 +2974,7 @@ static __net_exit void sysctl_route_net_exit(struct net *net) static __net_initdata struct pernet_operations sysctl_route_ops = { .init = sysctl_route_net_init, .exit = sysctl_route_net_exit, + .async = true, }; #endif @@ -3007,6 +2988,7 @@ static __net_init int rt_genid_init(struct net *net) static __net_initdata struct pernet_operations rt_genid_ops = { .init = rt_genid_init, + .async = true, }; static int __net_init ipv4_inetpeer_init(struct net *net) @@ -3032,6 +3014,7 @@ static void __net_exit ipv4_inetpeer_exit(struct net *net) static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { .init = ipv4_inetpeer_init, .exit = ipv4_inetpeer_exit, + .async = true, }; #ifdef CONFIG_IP_ROUTE_CLASSID diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 93e172118a94..89683d868b37 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1219,6 +1219,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net) static __net_initdata struct pernet_operations ipv4_sysctl_ops = { .init = ipv4_sysctl_init_net, .exit = ipv4_sysctl_exit_net, + .async = true, }; static __init int sysctl_ipv4_init(void) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 575d3c1fb6e8..a6b48f6253e3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5870,10 +5870,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tp->rx_opt.saw_tstamp = 0; req = tp->fastopen_rsk; if (req) { + bool req_stolen; + WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); - if (!tcp_check_req(sk, skb, req, true)) + if (!tcp_check_req(sk, skb, req, true, &req_stolen)) goto discard; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f8ad397e285e..f3e52bc98980 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1672,6 +1672,7 @@ process: if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); + bool req_stolen = false; struct sock *nsk; sk = req->rsk_listener; @@ -1694,10 +1695,20 @@ process: th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); - nsk = tcp_check_req(sk, skb, req, false); + nsk = tcp_check_req(sk, skb, req, false, &req_stolen); } if (!nsk) { reqsk_put(req); + if (req_stolen) { + /* Another cpu got exclusive access to req + * and created a full blown socket. + * Try to feed this packet to this socket + * instead of discarding it. + */ + tcp_v4_restore_cb(skb); + sock_put(sk); + goto lookup; + } goto discard_and_relse; } if (nsk == sk) { @@ -2387,6 +2398,7 @@ static void __net_exit tcp4_proc_exit_net(struct net *net) static struct pernet_operations tcp4_net_ops = { .init = tcp4_proc_init_net, .exit = tcp4_proc_exit_net, + .async = true, }; int __init tcp4_proc_init(void) @@ -2573,6 +2585,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { .init = tcp_sk_init, .exit = tcp_sk_exit, .exit_batch = tcp_sk_exit_batch, + .async = true, }; void __init tcp_v4_init(void) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 03b51cdcc731..aa6fea9f3328 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1024,6 +1024,7 @@ static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_lis static __net_initdata struct pernet_operations tcp_net_metrics_ops = { .init = tcp_net_metrics_init, .exit_batch = tcp_net_metrics_exit_batch, + .async = true, }; void __init tcp_metrics_init(void) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a8384b0c11f8..e7e36433cdb5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -578,7 +578,7 @@ EXPORT_SYMBOL(tcp_create_openreq_child); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - bool fastopen) + bool fastopen, bool *req_stolen) { struct tcp_options_received tmp_opt; struct sock *child; @@ -785,6 +785,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, sock_rps_save_rxhash(child, skb); tcp_synack_rtt_meas(child, req); + *req_stolen = !own_req; return inet_csk_complete_hashdance(sk, child, req, own_req); listen_overflow: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e5ef7c38c934..3013404d0935 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2762,6 +2762,7 @@ static void __net_exit udp4_proc_exit_net(struct net *net) static struct pernet_operations udp4_net_ops = { .init = udp4_proc_init_net, .exit = udp4_proc_exit_net, + .async = true, }; int __init udp4_proc_init(void) diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index f96614e9b9a5..72f2c3806408 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -104,6 +104,7 @@ static void __net_exit udplite4_proc_exit_net(struct net *net) static struct pernet_operations udplite4_net_ops = { .init = udplite4_proc_init_net, .exit = udplite4_proc_exit_net, + .async = true, }; static __init int udplite4_proc_init(void) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 05017e2c849c..796ac4115485 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -100,7 +100,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; - xdst->u.rt.rt_table_id = rt->rt_table_id; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); return 0; @@ -365,6 +364,7 @@ static void __net_exit xfrm4_net_exit(struct net *net) static struct pernet_operations __net_initdata xfrm4_net_ops = { .init = xfrm4_net_init, .exit = xfrm4_net_exit, + .async = true, }; static void __init xfrm4_policy_init(void) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e1846b97ee69..4facfe0b1888 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4257,6 +4257,7 @@ static void __net_exit if6_proc_net_exit(struct net *net) static struct pernet_operations if6_proc_net_ops = { .init = if6_proc_net_init, .exit = if6_proc_net_exit, + .async = true, }; int __init if6_proc_init(void) @@ -6550,6 +6551,7 @@ static void __net_exit addrconf_exit_net(struct net *net) static struct pernet_operations addrconf_ops = { .init = addrconf_init_net, .exit = addrconf_exit_net, + .async = true, }; static struct rtnl_af_ops inet6_ops __read_mostly = { diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 1d6ced37ad71..ba2e63633370 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -344,6 +344,7 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net) static struct pernet_operations ipv6_addr_label_ops = { .init = ip6addrlbl_net_init, .exit = ip6addrlbl_net_exit, + .async = true, }; int __init ipv6_addr_label_init(void) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 416917719a6f..dbbe04018813 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -470,7 +470,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock); */ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; struct sock *sk = sock->sk; @@ -500,8 +500,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); - *uaddr_len = sizeof(*sin); - return 0; + return sizeof(*sin); } EXPORT_SYMBOL(inet6_getname); @@ -858,6 +857,7 @@ static void __net_exit inet6_net_exit(struct net *net) static struct pernet_operations inet6_net_ops = { .init = inet6_net_init, .exit = inet6_net_exit, + .async = true, }; static const struct ipv6_stub ipv6_stub_impl = { diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index b240f24a6e52..95a2c9e8699a 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -368,6 +368,7 @@ static void __net_exit fib6_rules_net_exit(struct net *net) static struct pernet_operations fib6_rules_net_ops = { .init = fib6_rules_net_init, .exit = fib6_rules_net_exit, + .async = true, }; int __init fib6_rules_init(void) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6ae5dd3f4d0d..4fa4f1b150a4 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -997,6 +997,7 @@ static void __net_exit icmpv6_sk_exit(struct net *net) static struct pernet_operations icmpv6_sk_ops = { .init = icmpv6_sk_init, .exit = icmpv6_sk_exit, + .async = true, }; int __init icmpv6_init(void) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 92b8d8c75eed..cab95cf3b39f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2160,6 +2160,7 @@ static void fib6_net_exit(struct net *net) static struct pernet_operations fib6_net_ops = { .init = fib6_net_init, .exit = fib6_net_exit, + .async = true, }; int __init fib6_init(void) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 3dab664ff503..6ddf52282894 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -873,6 +873,7 @@ static void __net_exit ip6_flowlabel_net_exit(struct net *net) static struct pernet_operations ip6_flowlabel_net_ops = { .init = ip6_flowlabel_proc_init, .exit = ip6_flowlabel_net_exit, + .async = true, }; int ip6_flowlabel_init(void) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9f6cace9c817..295eb5ecaee5 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1397,6 +1397,7 @@ static void __net_exit ip6mr_net_exit(struct net *net) static struct pernet_operations ip6mr_net_ops = { .init = ip6mr_net_init, .exit = ip6mr_net_exit, + .async = true, }; int __init ip6_mr_init(void) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9b9d2ff01b35..d9bb933dd5c4 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2997,6 +2997,7 @@ static void __net_exit igmp6_net_exit(struct net *net) static struct pernet_operations igmp6_net_ops = { .init = igmp6_net_init, .exit = igmp6_net_exit, + .async = true, }; int __init igmp6_init(void) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f61a5b613b52..0a19ce3a6f7f 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1882,6 +1882,7 @@ static void __net_exit ndisc_net_exit(struct net *net) static struct pernet_operations ndisc_net_ops = { .init = ndisc_net_init, .exit = ndisc_net_exit, + .async = true, }; int __init ndisc_init(void) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index d12c55dad7d1..318c6e914234 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -240,6 +240,7 @@ static void __net_init ping_v6_proc_exit_net(struct net *net) static struct pernet_operations ping_v6_net_ops = { .init = ping_v6_proc_init_net, .exit = ping_v6_proc_exit_net, + .async = true, }; #endif diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index b67814242f78..b8858c546f41 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -343,6 +343,7 @@ static void __net_exit ipv6_proc_exit_net(struct net *net) static struct pernet_operations ipv6_proc_ops = { .init = ipv6_proc_init_net, .exit = ipv6_proc_exit_net, + .async = true, }; int __init ipv6_misc_proc_init(void) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4c25339b1984..10a4ac4933b7 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1332,6 +1332,7 @@ static void __net_exit raw6_exit_net(struct net *net) static struct pernet_operations raw6_net_ops = { .init = raw6_init_net, .exit = raw6_exit_net, + .async = true, }; int __init raw6_proc_init(void) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index afbc000ad4f2..b5da69c83123 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -733,6 +733,7 @@ static void __net_exit ipv6_frags_exit_net(struct net *net) static struct pernet_operations ip6_frags_ops = { .init = ipv6_frags_init_net, .exit = ipv6_frags_exit_net, + .async = true, }; int __init ipv6_frag_init(void) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9dcfadddd800..aa709b644945 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2671,14 +2671,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (err) goto out; rt->dst.lwtstate = lwtstate_get(lwtstate); - if (lwtunnel_output_redirect(rt->dst.lwtstate)) { - rt->dst.lwtstate->orig_output = rt->dst.output; - rt->dst.output = lwtunnel_output; - } - if (lwtunnel_input_redirect(rt->dst.lwtstate)) { - rt->dst.lwtstate->orig_input = rt->dst.input; - rt->dst.input = lwtunnel_input; - } + lwtunnel_set_redirect(&rt->dst); } ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); @@ -4979,6 +4972,7 @@ static void __net_exit ip6_route_net_exit_late(struct net *net) static struct pernet_operations ip6_route_net_ops = { .init = ip6_route_net_init, .exit = ip6_route_net_exit, + .async = true, }; static int __net_init ipv6_inetpeer_init(struct net *net) @@ -5004,11 +4998,13 @@ static void __net_exit ipv6_inetpeer_exit(struct net *net) static struct pernet_operations ipv6_inetpeer_ops = { .init = ipv6_inetpeer_init, .exit = ipv6_inetpeer_exit, + .async = true, }; static struct pernet_operations ip6_route_net_late_ops = { .init = ip6_route_net_init_late, .exit = ip6_route_net_exit_late, + .async = true, }; static struct notifier_block ip6_route_dev_notifier = { diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 7f5621d09571..c3f13c3bd8a9 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -395,6 +395,7 @@ static void __net_exit seg6_net_exit(struct net *net) static struct pernet_operations ip6_segments_ops = { .init = seg6_net_init, .exit = seg6_net_exit, + .async = true, }; static const struct genl_ops seg6_genl_ops[] = { diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index a789a8ac6a64..262f791f1b9b 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -251,6 +251,7 @@ static void __net_exit ipv6_sysctl_net_exit(struct net *net) static struct pernet_operations ipv6_sysctl_net_ops = { .init = ipv6_sysctl_net_init, .exit = ipv6_sysctl_net_exit, + .async = true, }; static struct ctl_table_header *ip6_header; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 412139f4eccd..5425d7b100ee 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1451,6 +1451,7 @@ process: if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); + bool req_stolen = false; struct sock *nsk; sk = req->rsk_listener; @@ -1470,10 +1471,20 @@ process: th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); - nsk = tcp_check_req(sk, skb, req, false); + nsk = tcp_check_req(sk, skb, req, false, &req_stolen); } if (!nsk) { reqsk_put(req); + if (req_stolen) { + /* Another cpu got exclusive access to req + * and created a full blown socket. + * Try to feed this packet to this socket + * instead of discarding it. + */ + tcp_v6_restore_cb(skb); + sock_put(sk); + goto lookup; + } goto discard_and_relse; } if (nsk == sk) { @@ -1996,6 +2007,7 @@ static struct pernet_operations tcpv6_net_ops = { .init = tcpv6_net_init, .exit = tcpv6_net_exit, .exit_batch = tcpv6_net_exit_batch, + .async = true, }; int __init tcpv6_init(void) diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 14ae32bb1f3d..f3839780dc31 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -123,6 +123,7 @@ static void __net_exit udplite6_proc_exit_net(struct net *net) static struct pernet_operations udplite6_net_ops = { .init = udplite6_proc_init_net, .exit = udplite6_proc_exit_net, + .async = true, }; int __init udplite6_proc_init(void) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 09fb44ee3b45..88cd0c90fa81 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -395,6 +395,7 @@ static void __net_exit xfrm6_net_exit(struct net *net) static struct pernet_operations xfrm6_net_ops = { .init = xfrm6_net_init, .exit = xfrm6_net_exit, + .async = true, }; int __init xfrm6_init(void) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 1e8cc7bcbca3..81ce15ffb878 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -989,14 +989,13 @@ done: } static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, - int *len, int peer) + int peer) { struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr; struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); addr->sa_family = AF_IUCV; - *len = sizeof(struct sockaddr_iucv); if (peer) { memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8); @@ -1009,7 +1008,7 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr)); memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid)); - return 0; + return sizeof(struct sockaddr_iucv); } /** diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index f297d53a11aa..435594648dac 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1417,6 +1417,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, */ if (csk->sk_user_data) { write_unlock_bh(&csk->sk_callback_lock); + strp_stop(&psock->strp); strp_done(&psock->strp); kmem_cache_free(kcm_psockp, psock); return -EALREADY; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index ff61124fdf59..4614585e1720 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -349,7 +349,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags) } static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); @@ -370,8 +370,7 @@ static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr, lsa->l2tp_conn_id = lsk->conn_id; lsa->l2tp_addr.s_addr = addr; } - *uaddr_len = sizeof(*lsa); - return 0; + return sizeof(*lsa); } static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 192344688c06..efea58b66295 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -421,7 +421,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags) } static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr; struct sock *sk = sock->sk; @@ -449,8 +449,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, } if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) lsa->l2tp_scope_id = sk->sk_bound_dev_if; - *uaddr_len = sizeof(*lsa); - return 0; + return sizeof(*lsa); } static int l2tp_ip6_backlog_recv(struct sock *sk, struct sk_buff *skb) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 59f246d7b290..99a03c72db4f 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -870,7 +870,7 @@ err: /* getname() support. */ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, - int *usockaddr_len, int peer) + int peer) { int len = 0; int error = 0; @@ -969,8 +969,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, memcpy(uaddr, &sp, len); } - *usockaddr_len = len; - error = 0; + error = len; sock_put(sk); end: diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index c38d16f22d2a..01dcc0823d1f 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -971,7 +971,7 @@ release: * Return the address information of a socket. */ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddrlen, int peer) + int peer) { struct sockaddr_llc sllc; struct sock *sk = sock->sk; @@ -982,7 +982,6 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) goto out; - *uaddrlen = sizeof(sllc); if (peer) { rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) @@ -1003,9 +1002,9 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, IFHWADDRLEN); } } - rc = 0; sllc.sllc_family = AF_LLC; memcpy(uaddr, &sllc, sizeof(sllc)); + rc = sizeof(sllc); out: release_sock(sk); return rc; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 0f6b8172fb9a..d72cc786c7b7 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -629,6 +629,7 @@ static void __net_exit netfilter_net_exit(struct net *net) static struct pernet_operations netfilter_net_ops = { .init = netfilter_net_init, .exit = netfilter_net_exit, + .async = true, }; int __init netfilter_init(void) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index c2c1b16b7538..1ba3da51050d 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -577,6 +577,7 @@ static void __net_exit nf_log_net_exit(struct net *net) static struct pernet_operations nf_log_net_ops = { .init = nf_log_net_init, .exit = nf_log_net_exit, + .async = true, }; int __init netfilter_log_init(void) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 2f685ee1f9c8..a6a435d7c8f4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1765,6 +1765,7 @@ static void __net_exit xt_net_exit(struct net *net) static struct pernet_operations xt_net_ops = { .init = xt_net_init, .exit = xt_net_exit, + .async = true, }; static int __init xt_init(void) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2ad445c1d27c..63cb55d3c2fd 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -253,6 +253,7 @@ static struct pernet_operations netlink_tap_net_ops = { .exit = netlink_tap_exit_net, .id = &netlink_tap_net_id, .size = sizeof(struct netlink_tap_net), + .async = true, }; static bool netlink_filter_tap(const struct sk_buff *skb) @@ -1105,7 +1106,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, } static int netlink_getname(struct socket *sock, struct sockaddr *addr, - int *addr_len, int peer) + int peer) { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); @@ -1113,7 +1114,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, nladdr->nl_family = AF_NETLINK; nladdr->nl_pad = 0; - *addr_len = sizeof(*nladdr); if (peer) { nladdr->nl_pid = nlk->dst_portid; @@ -1124,7 +1124,7 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; netlink_unlock_table(); } - return 0; + return sizeof(*nladdr); } static int netlink_ioctl(struct socket *sock, unsigned int cmd, @@ -2724,6 +2724,7 @@ static void __init netlink_add_usersock_entry(void) static struct pernet_operations __net_initdata netlink_net_ops = { .init = netlink_net_init, .exit = netlink_net_exit, + .async = true, }; static inline u32 netlink_hash(const void *data, u32 len, u32 seed) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 6f02499ef007..a6f63a5faee7 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1035,6 +1035,7 @@ static void __net_exit genl_pernet_exit(struct net *net) static struct pernet_operations genl_pernet_ops = { .init = genl_pernet_init, .exit = genl_pernet_exit, + .async = true, }; static int __init genl_init(void) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 9ba30c63be3d..35bb6807927f 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -829,11 +829,12 @@ out_release: } static int nr_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr; struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); + int uaddr_len; memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25)); @@ -848,16 +849,16 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr, sax->fsa_ax25.sax25_call = nr->user_addr; memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater)); sax->fsa_digipeater[0] = nr->dest_addr; - *uaddr_len = sizeof(struct full_sockaddr_ax25); + uaddr_len = sizeof(struct full_sockaddr_ax25); } else { sax->fsa_ax25.sax25_family = AF_NETROM; sax->fsa_ax25.sax25_ndigis = 0; sax->fsa_ax25.sax25_call = nr->source_addr; - *uaddr_len = sizeof(struct sockaddr_ax25); + uaddr_len = sizeof(struct sockaddr_ax25); } release_sock(sk); - return 0; + return uaddr_len; } int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 376040092142..ea0c0c6f1874 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -497,7 +497,7 @@ error: } static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr, - int *len, int peer) + int peer) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -510,7 +510,6 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr, llcp_sock->dsap, llcp_sock->ssap); memset(llcp_addr, 0, sizeof(*llcp_addr)); - *len = sizeof(struct sockaddr_nfc_llcp); lock_sock(sk); if (!llcp_sock->dev) { @@ -528,7 +527,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr, llcp_addr->service_name_len); release_sock(sk); - return 0; + return sizeof(struct sockaddr_nfc_llcp); } static inline __poll_t llcp_accept_poll(struct sock *parent) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e0f3f4aeeb4f..2c5a6fe5d749 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3409,7 +3409,7 @@ out: } static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct net_device *dev; struct sock *sk = sock->sk; @@ -3424,13 +3424,12 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, if (dev) strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); rcu_read_unlock(); - *uaddr_len = sizeof(*uaddr); - return 0; + return sizeof(*uaddr); } static int packet_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct net_device *dev; struct sock *sk = sock->sk; @@ -3455,9 +3454,8 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_halen = 0; } rcu_read_unlock(); - *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; - return 0; + return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; } static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, @@ -4559,6 +4557,7 @@ static void __net_exit packet_net_exit(struct net *net) static struct pernet_operations packet_net_ops = { .init = packet_net_init, .exit = packet_net_exit, + .async = true, }; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index fffcd69f63ff..f9b40e6a18a5 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -326,7 +326,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock, } static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, - int *sockaddr_len, int peer) + int peer) { struct sock *sk = sock->sk; struct pn_sock *pn = pn_sk(sk); @@ -337,8 +337,7 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, pn_sockaddr_set_object((struct sockaddr_pn *)addr, pn->sobject); - *sockaddr_len = sizeof(struct sockaddr_pn); - return 0; + return sizeof(struct sockaddr_pn); } static __poll_t pn_socket_poll(struct file *file, struct socket *sock, diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 5fb3929e3d7d..b33e5aeb4c06 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -893,7 +893,7 @@ static int qrtr_connect(struct socket *sock, struct sockaddr *saddr, } static int qrtr_getname(struct socket *sock, struct sockaddr *saddr, - int *len, int peer) + int peer) { struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sockaddr_qrtr qaddr; @@ -912,12 +912,11 @@ static int qrtr_getname(struct socket *sock, struct sockaddr *saddr, } release_sock(sk); - *len = sizeof(qaddr); qaddr.sq_family = AF_QIPCRTR; memcpy(saddr, &qaddr, sizeof(qaddr)); - return 0; + return sizeof(qaddr); } static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 744c637c86b0..a937f18896ae 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -110,7 +110,7 @@ void rds_wake_sk_sleep(struct rds_sock *rs) } static int rds_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; struct rds_sock *rs = rds_sk_to_rs(sock->sk); @@ -131,8 +131,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin_family = AF_INET; - *uaddr_len = sizeof(*sin); - return 0; + return sizeof(*sin); } /* @@ -183,6 +182,8 @@ static __poll_t rds_poll(struct file *file, struct socket *sock, mask |= (EPOLLIN | EPOLLRDNORM); if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) mask |= (EPOLLOUT | EPOLLWRNORM); + if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + mask |= POLLERR; read_unlock_irqrestore(&rs->rs_recv_lock, flags); /* clear state any time we wake a seen-congested socket */ diff --git a/net/rds/message.c b/net/rds/message.c index 4318cc9b78f7..651834513481 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -33,6 +33,9 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/skbuff.h> +#include <linux/list.h> +#include <linux/errqueue.h> #include "rds.h" @@ -53,20 +56,92 @@ void rds_message_addref(struct rds_message *rm) } EXPORT_SYMBOL_GPL(rds_message_addref); +static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie) +{ + struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + int ncookies; + u32 *ptr; + + if (serr->ee.ee_origin != SO_EE_ORIGIN_ZCOOKIE) + return false; + ncookies = serr->ee.ee_data; + if (ncookies == SO_EE_ORIGIN_MAX_ZCOOKIES) + return false; + ptr = skb_put(skb, sizeof(u32)); + *ptr = cookie; + serr->ee.ee_data = ++ncookies; + return true; +} + +static void rds_rm_zerocopy_callback(struct rds_sock *rs, + struct rds_znotifier *znotif) +{ + struct sock *sk = rds_rs_to_sk(rs); + struct sk_buff *skb, *tail; + struct sock_exterr_skb *serr; + unsigned long flags; + struct sk_buff_head *q; + u32 cookie = znotif->z_cookie; + + q = &sk->sk_error_queue; + spin_lock_irqsave(&q->lock, flags); + tail = skb_peek_tail(q); + + if (tail && skb_zcookie_add(tail, cookie)) { + spin_unlock_irqrestore(&q->lock, flags); + mm_unaccount_pinned_pages(&znotif->z_mmp); + consume_skb(rds_skb_from_znotifier(znotif)); + sk->sk_error_report(sk); + return; + } + + skb = rds_skb_from_znotifier(znotif); + serr = SKB_EXT_ERR(skb); + memset(&serr->ee, 0, sizeof(serr->ee)); + serr->ee.ee_errno = 0; + serr->ee.ee_origin = SO_EE_ORIGIN_ZCOOKIE; + serr->ee.ee_info = 0; + WARN_ON(!skb_zcookie_add(skb, cookie)); + + __skb_queue_tail(q, skb); + + spin_unlock_irqrestore(&q->lock, flags); + sk->sk_error_report(sk); + + mm_unaccount_pinned_pages(&znotif->z_mmp); +} + /* * This relies on dma_map_sg() not touching sg[].page during merging. */ static void rds_message_purge(struct rds_message *rm) { - unsigned long i; + unsigned long i, flags; + bool zcopy = false; if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) return; + spin_lock_irqsave(&rm->m_rs_lock, flags); + if (rm->m_rs) { + struct rds_sock *rs = rm->m_rs; + + if (rm->data.op_mmp_znotifier) { + zcopy = true; + rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier); + rm->data.op_mmp_znotifier = NULL; + } + sock_put(rds_rs_to_sk(rs)); + rm->m_rs = NULL; + } + spin_unlock_irqrestore(&rm->m_rs_lock, flags); + for (i = 0; i < rm->data.op_nents; i++) { - rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i])); /* XXX will have to put_page for page refs */ - __free_page(sg_page(&rm->data.op_sg[i])); + if (!zcopy) + __free_page(sg_page(&rm->data.op_sg[i])); + else + put_page(sg_page(&rm->data.op_sg[i])); } rm->data.op_nents = 0; @@ -266,12 +341,14 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in return rm; } -int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from) +int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, + bool zcopy) { unsigned long to_copy, nbytes; unsigned long sg_off; struct scatterlist *sg; int ret = 0; + int length = iov_iter_count(from); rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); @@ -281,6 +358,53 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from) sg = rm->data.op_sg; sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ + if (zcopy) { + int total_copied = 0; + struct sk_buff *skb; + + skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32), + GFP_KERNEL); + if (!skb) + return -ENOMEM; + rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb); + if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp, + length)) { + ret = -ENOMEM; + goto err; + } + while (iov_iter_count(from)) { + struct page *pages; + size_t start; + ssize_t copied; + + copied = iov_iter_get_pages(from, &pages, PAGE_SIZE, + 1, &start); + if (copied < 0) { + struct mmpin *mmp; + int i; + + for (i = 0; i < rm->data.op_nents; i++) + put_page(sg_page(&rm->data.op_sg[i])); + mmp = &rm->data.op_mmp_znotifier->z_mmp; + mm_unaccount_pinned_pages(mmp); + ret = -EFAULT; + goto err; + } + total_copied += copied; + iov_iter_advance(from, copied); + length -= copied; + sg_set_page(sg, pages, copied, start); + rm->data.op_nents++; + sg++; + } + WARN_ON_ONCE(length != 0); + return ret; +err: + consume_skb(skb); + rm->data.op_mmp_znotifier = NULL; + return ret; + } /* zcopy */ + while (iov_iter_count(from)) { if (!sg_page(sg)) { ret = rds_page_remainder_alloc(sg, iov_iter_count(from), diff --git a/net/rds/rds.h b/net/rds/rds.h index 7301b9b01890..31cd38852050 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -356,6 +356,19 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) #define RDS_MSG_PAGEVEC 7 #define RDS_MSG_FLUSH 8 +struct rds_znotifier { + struct list_head z_list; + struct mmpin z_mmp; + u32 z_cookie; +}; + +#define RDS_ZCOPY_SKB(__skb) ((struct rds_znotifier *)&((__skb)->cb[0])) + +static inline struct sk_buff *rds_skb_from_znotifier(struct rds_znotifier *z) +{ + return container_of((void *)z, struct sk_buff, cb); +} + struct rds_message { refcount_t m_refcount; struct list_head m_sock_item; @@ -436,6 +449,7 @@ struct rds_message { unsigned int op_count; unsigned int op_dmasg; unsigned int op_dmaoff; + struct rds_znotifier *op_mmp_znotifier; struct scatterlist *op_sg; } data; }; @@ -771,7 +785,8 @@ rds_conn_connecting(struct rds_connection *conn) /* message.c */ struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents); -int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from); +int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, + bool zcopy); struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); void rds_message_populate_header(struct rds_header *hdr, __be16 sport, __be16 dport, u64 seq); diff --git a/net/rds/recv.c b/net/rds/recv.c index b25bcfe411ca..b080961464df 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -594,6 +594,8 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (msg_flags & MSG_OOB) goto out; + if (msg_flags & MSG_ERRQUEUE) + return sock_recv_errqueue(sk, msg, size, SOL_IP, IP_RECVERR); while (1) { /* If there are pending notifications, do those - and nothing else */ diff --git a/net/rds/send.c b/net/rds/send.c index b1b0022b8370..028ab598ac1b 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -649,7 +649,6 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status) rm->rdma.op_notifier = NULL; } was_on_sock = 1; - rm->m_rs = NULL; } spin_unlock(&rs->rs_lock); @@ -756,9 +755,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) */ if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { spin_unlock_irqrestore(&cp->cp_lock, flags); - spin_lock_irqsave(&rm->m_rs_lock, flags); - rm->m_rs = NULL; - spin_unlock_irqrestore(&rm->m_rs_lock, flags); continue; } list_del_init(&rm->m_conn_item); @@ -774,7 +770,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) __rds_send_complete(rs, rm, RDS_RDMA_CANCELED); spin_unlock(&rs->rs_lock); - rm->m_rs = NULL; spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); @@ -798,7 +793,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) __rds_send_complete(rs, rm, RDS_RDMA_CANCELED); spin_unlock(&rs->rs_lock); - rm->m_rs = NULL; spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); @@ -849,6 +843,7 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn, list_add_tail(&rm->m_sock_item, &rs->rs_send_queue); set_bit(RDS_MSG_ON_SOCK, &rm->m_flags); rds_message_addref(rm); + sock_hold(rds_rs_to_sk(rs)); rm->m_rs = rs; /* The code ordering is a little weird, but we're @@ -880,12 +875,13 @@ out: * rds_message is getting to be quite complicated, and we'd like to allocate * it all in one go. This figures out how big it needs to be up front. */ -static int rds_rm_size(struct msghdr *msg, int data_len) +static int rds_rm_size(struct msghdr *msg, int num_sgs) { struct cmsghdr *cmsg; int size = 0; int cmsg_groups = 0; int retval; + bool zcopy_cookie = false; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) @@ -904,6 +900,8 @@ static int rds_rm_size(struct msghdr *msg, int data_len) break; + case RDS_CMSG_ZCOPY_COOKIE: + zcopy_cookie = true; case RDS_CMSG_RDMA_DEST: case RDS_CMSG_RDMA_MAP: cmsg_groups |= 2; @@ -924,7 +922,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len) } - size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist); + if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie) + return -EINVAL; + + size += num_sgs * sizeof(struct scatterlist); /* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */ if (cmsg_groups == 3) @@ -933,6 +934,18 @@ static int rds_rm_size(struct msghdr *msg, int data_len) return size; } +static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg) +{ + u32 *cookie; + + if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie))) + return -EINVAL; + cookie = CMSG_DATA(cmsg); + rm->data.op_mmp_znotifier->z_cookie = *cookie; + return 0; +} + static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, struct msghdr *msg, int *allocated_mr) { @@ -975,6 +988,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, ret = rds_cmsg_atomic(rs, rm, cmsg); break; + case RDS_CMSG_ZCOPY_COOKIE: + ret = rds_cmsg_zcopy(rs, rm, cmsg); + break; + default: return -EINVAL; } @@ -1045,10 +1062,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) long timeo = sock_sndtimeo(sk, nonblock); struct rds_conn_path *cpath; size_t total_payload_len = payload_len, rdma_payload_len = 0; + bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) && + sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY)); + int num_sgs = ceil(payload_len, PAGE_SIZE); /* Mirror Linux UDP mirror of BSD error message compatibility */ /* XXX: Perhaps MSG_MORE someday */ - if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) { + if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) { ret = -EOPNOTSUPP; goto out; } @@ -1092,8 +1112,15 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) goto out; } + if (zcopy) { + if (rs->rs_transport->t_type != RDS_TRANS_TCP) { + ret = -EOPNOTSUPP; + goto out; + } + num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX); + } /* size of rm including all sgs */ - ret = rds_rm_size(msg, payload_len); + ret = rds_rm_size(msg, num_sgs); if (ret < 0) goto out; @@ -1105,12 +1132,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Attach data to the rm */ if (payload_len) { - rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE)); + rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); if (!rm->data.op_sg) { ret = -ENOMEM; goto out; } - ret = rds_message_copy_from_user(rm, &msg->msg_iter); + ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy); if (ret) goto out; } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 44c4652721af..08230a145042 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -227,7 +227,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, struct rds_tcp_connection *tc; unsigned long flags; struct sockaddr_in sin; - int sinlen; struct socket *sock; spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); @@ -239,12 +238,10 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, sock = tc->t_sock; if (sock) { - sock->ops->getname(sock, (struct sockaddr *)&sin, - &sinlen, 0); + sock->ops->getname(sock, (struct sockaddr *)&sin, 0); tsinfo.local_addr = sin.sin_addr.s_addr; tsinfo.local_port = sin.sin_port; - sock->ops->getname(sock, (struct sockaddr *)&sin, - &sinlen, 1); + sock->ops->getname(sock, (struct sockaddr *)&sin, 1); tsinfo.peer_addr = sin.sin_addr.s_addr; tsinfo.peer_port = sin.sin_port; } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 083bd251406f..5170373b797c 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -938,7 +938,7 @@ out_release: } static int rose_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr; struct sock *sk = sock->sk; @@ -964,8 +964,7 @@ static int rose_getname(struct socket *sock, struct sockaddr *uaddr, srose->srose_digis[n] = rose->source_digis[n]; } - *uaddr_len = sizeof(struct full_sockaddr_rose); - return 0; + return sizeof(struct full_sockaddr_rose); } int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index eba6682727dd..1f65d6ada9ff 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -202,7 +202,8 @@ nla_put_failure: int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tcf_idrinfo *idrinfo = tn->idrinfo; @@ -211,7 +212,8 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, } else if (type == RTM_GETACTION) { return tcf_dump_walker(idrinfo, skb, cb); } else { - WARN(1, "tcf_generic_walker: unknown action %d\n", type); + WARN(1, "tcf_generic_walker: unknown command %d\n", type); + NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command"); return -EINVAL; } } @@ -605,7 +607,8 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind) + char *name, int ovr, int bind, + struct netlink_ext_ack *extack) { struct tc_action *a; struct tc_action_ops *a_o; @@ -616,31 +619,40 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, int err; if (name == NULL) { - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); if (err < 0) goto err_out; err = -EINVAL; kind = tb[TCA_ACT_KIND]; - if (kind == NULL) + if (!kind) { + NL_SET_ERR_MSG(extack, "TC action kind must be specified"); goto err_out; - if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) + } + if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) { + NL_SET_ERR_MSG(extack, "TC action name too long"); goto err_out; + } if (tb[TCA_ACT_COOKIE]) { int cklen = nla_len(tb[TCA_ACT_COOKIE]); - if (cklen > TC_COOKIE_MAX_SIZE) + if (cklen > TC_COOKIE_MAX_SIZE) { + NL_SET_ERR_MSG(extack, "TC cookie size above the maximum"); goto err_out; + } cookie = nla_memdup_cookie(tb); if (!cookie) { + NL_SET_ERR_MSG(extack, "No memory to generate TC cookie"); err = -ENOMEM; goto err_out; } } } else { - err = -EINVAL; - if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) + if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) { + NL_SET_ERR_MSG(extack, "TC action name too long"); + err = -EINVAL; goto err_out; + } } a_o = tc_lookup_action_n(act_name); @@ -663,15 +675,17 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, goto err_mod; } #endif + NL_SET_ERR_MSG(extack, "Failed to load TC action module"); err = -ENOENT; goto err_out; } /* backward compatibility for policer */ if (name == NULL) - err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind); + err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind, + extack); else - err = a_o->init(net, nla, est, &a, ovr, bind); + err = a_o->init(net, nla, est, &a, ovr, bind, extack); if (err < 0) goto err_mod; @@ -697,6 +711,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, list_add_tail(&a->list, &actions); tcf_action_destroy(&actions, bind); + NL_SET_ERR_MSG(extack, "Failed to init TC action chain"); return ERR_PTR(err); } } @@ -726,19 +741,20 @@ static void cleanup_a(struct list_head *actions, int ovr) int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - struct list_head *actions) + struct list_head *actions, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; int err; int i; - err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); if (err < 0) return err; for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind); + act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind, + extack); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; @@ -822,7 +838,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions, t->tca__pad2 = 0; nest = nla_nest_start(skb, TCA_ACT_TAB); - if (nest == NULL) + if (!nest) goto out_nlmsg_trim; if (tcf_action_dump(skb, actions, bind, ref) < 0) @@ -840,7 +856,8 @@ out_nlmsg_trim: static int tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, - struct list_head *actions, int event) + struct list_head *actions, int event, + struct netlink_ext_ack *extack) { struct sk_buff *skb; @@ -849,6 +866,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, return -ENOBUFS; if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { + NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action"); kfree_skb(skb); return -EINVAL; } @@ -857,7 +875,8 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, } static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 portid) + struct nlmsghdr *n, u32 portid, + struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_ACT_MAX + 1]; const struct tc_action_ops *ops; @@ -865,22 +884,26 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, int index; int err; - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); if (err < 0) goto err_out; err = -EINVAL; if (tb[TCA_ACT_INDEX] == NULL || - nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) + nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) { + NL_SET_ERR_MSG(extack, "Invalid TC action index value"); goto err_out; + } index = nla_get_u32(tb[TCA_ACT_INDEX]); err = -EINVAL; ops = tc_lookup_action(tb[TCA_ACT_KIND]); - if (!ops) /* could happen in batch of actions */ + if (!ops) { /* could happen in batch of actions */ + NL_SET_ERR_MSG(extack, "Specified TC action not found"); goto err_out; + } err = -ENOENT; - if (ops->lookup(net, &a, index) == 0) + if (ops->lookup(net, &a, index, extack) == 0) goto err_mod; module_put(ops->owner); @@ -893,7 +916,8 @@ err_out: } static int tca_action_flush(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 portid) + struct nlmsghdr *n, u32 portid, + struct netlink_ext_ack *extack) { struct sk_buff *skb; unsigned char *b; @@ -907,39 +931,45 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, int err = -ENOMEM; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) { - pr_debug("tca_action_flush: failed skb alloc\n"); + if (!skb) return err; - } b = skb_tail_pointer(skb); - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); if (err < 0) goto err_out; err = -EINVAL; kind = tb[TCA_ACT_KIND]; ops = tc_lookup_action(kind); - if (!ops) /*some idjot trying to flush unknown action */ + if (!ops) { /*some idjot trying to flush unknown action */ + NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action"); goto err_out; + } nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); - if (!nlh) + if (!nlh) { + NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification"); goto out_module_put; + } t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; nest = nla_nest_start(skb, TCA_ACT_TAB); - if (nest == NULL) + if (!nest) { + NL_SET_ERR_MSG(extack, "Failed to add new netlink message"); goto out_module_put; + } - err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops); - if (err <= 0) + err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops, extack); + if (err <= 0) { + nla_nest_cancel(skb, nest); goto out_module_put; + } nla_nest_end(skb, nest); @@ -950,6 +980,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, n->nlmsg_flags & NLM_F_ECHO); if (err > 0) return 0; + if (err < 0) + NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification"); return err; @@ -962,7 +994,7 @@ err_out: static int tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, - u32 portid) + u32 portid, struct netlink_ext_ack *extack) { int ret; struct sk_buff *skb; @@ -973,6 +1005,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION, 0, 1) <= 0) { + NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes"); kfree_skb(skb); return -EINVAL; } @@ -980,6 +1013,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, /* now do the delete */ ret = tcf_action_destroy(actions, 0); if (ret < 0) { + NL_SET_ERR_MSG(extack, "Failed to delete TC action"); kfree_skb(skb); return ret; } @@ -993,26 +1027,27 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, static int tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 portid, int event) + u32 portid, int event, struct netlink_ext_ack *extack) { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; LIST_HEAD(actions); - ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL); + ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); if (ret < 0) return ret; if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { - if (tb[1] != NULL) - return tca_action_flush(net, tb[1], n, portid); - else - return -EINVAL; + if (tb[1]) + return tca_action_flush(net, tb[1], n, portid, extack); + + NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action"); + return -EINVAL; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_get_1(net, tb[i], n, portid); + act = tcf_action_get_1(net, tb[i], n, portid, extack); if (IS_ERR(act)) { ret = PTR_ERR(act); goto err; @@ -1022,9 +1057,9 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, } if (event == RTM_GETACTION) - ret = tcf_get_notify(net, portid, n, &actions, event); + ret = tcf_get_notify(net, portid, n, &actions, event, extack); else { /* delete */ - ret = tcf_del_notify(net, n, &actions, portid); + ret = tcf_del_notify(net, n, &actions, portid, extack); if (ret) goto err; return ret; @@ -1037,7 +1072,7 @@ err: static int tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, - u32 portid) + u32 portid, struct netlink_ext_ack *extack) { struct sk_buff *skb; int err = 0; @@ -1048,6 +1083,7 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags, RTM_NEWACTION, 0, 0) <= 0) { + NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while deleting TC action"); kfree_skb(skb); return -EINVAL; } @@ -1060,16 +1096,18 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, } static int tcf_action_add(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 portid, int ovr) + struct nlmsghdr *n, u32 portid, int ovr, + struct netlink_ext_ack *extack) { int ret = 0; LIST_HEAD(actions); - ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions); + ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions, + extack); if (ret) return ret; - return tcf_add_notify(net, n, &actions, portid); + return tcf_add_notify(net, n, &actions, portid, extack); } static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON; @@ -1097,7 +1135,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, return ret; if (tca[TCA_ACT_TAB] == NULL) { - pr_notice("tc_ctl_action: received NO action attribs\n"); + NL_SET_ERR_MSG(extack, "Netlink action attributes missing"); return -EINVAL; } @@ -1113,17 +1151,18 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, if (n->nlmsg_flags & NLM_F_REPLACE) ovr = 1; replay: - ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr); + ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr, + extack); if (ret == -EAGAIN) goto replay; break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - portid, RTM_DELACTION); + portid, RTM_DELACTION, extack); break; case RTM_GETACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - portid, RTM_GETACTION); + portid, RTM_GETACTION, extack); break; default: BUG(); @@ -1218,7 +1257,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto out_module_put; - ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o); + ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o, NULL); if (ret < 0) goto out_module_put; @@ -1454,6 +1493,7 @@ static struct pernet_operations tcf_action_net_ops = { .exit = tcf_action_net_exit, .id = &tcf_action_net_id, .size = sizeof(struct tcf_action_net), + .async = true, }; static int __init tc_action_init(void) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index b3f2c15affa7..cb3c5d403c88 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -272,7 +272,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, - int replace, int bind) + int replace, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, bpf_net_id); struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; @@ -367,14 +367,16 @@ static void tcf_bpf_cleanup(struct tc_action *act) static int tcf_bpf_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, bpf_net_id); diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 2b15ba84e0c8..e4b880fa51fe 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -96,7 +96,8 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, connmark_net_id); struct nlattr *tb[TCA_CONNMARK_MAX + 1]; @@ -176,14 +177,16 @@ nla_put_failure: static int tcf_connmark_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, connmark_net_id); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index b7ba9b06b147..d5c2e528d150 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -46,7 +46,7 @@ static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind) + int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); struct tcf_csum_params *params_old, *params_new; @@ -631,14 +631,16 @@ static void tcf_csum_cleanup(struct tc_action *a) static int tcf_csum_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index b56986d41c87..f072bcf33760 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -56,7 +56,7 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { static int tcf_gact_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gact_net_id); struct nlattr *tb[TCA_GACT_MAX + 1]; @@ -201,14 +201,16 @@ nla_put_failure: static int tcf_gact_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gact_net_id); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 5954e992685a..a5994cf0512b 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -447,7 +447,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, static int tcf_ife_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ife_net_id); struct nlattr *tb[TCA_IFE_MAX + 1]; @@ -824,14 +824,16 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a, static int tcf_ife_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ife_net_id); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 06e380ae0928..9784629090ad 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -193,7 +193,7 @@ err1: static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind) + int bind, struct netlink_ext_ack *extack) { return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr, bind); @@ -201,7 +201,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, static int tcf_xt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind) + int bind, struct netlink_ext_ack *extack) { return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr, bind); @@ -303,14 +303,16 @@ nla_put_failure: static int tcf_ipt_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ipt_net_id); @@ -351,14 +353,16 @@ static struct pernet_operations ipt_net_ops = { static int tcf_xt_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, xt_net_id); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index e6ff88f72900..fd34015331ab 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -69,7 +69,7 @@ static struct tc_action_ops act_mirred_ops; static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind) + int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mirred_net_id); struct nlattr *tb[TCA_MIRRED_MAX + 1]; @@ -80,13 +80,17 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, bool exists = false; int ret; - if (nla == NULL) + if (!nla) { + NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed"); return -EINVAL; - ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL); + } + ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack); if (ret < 0) return ret; - if (tb[TCA_MIRRED_PARMS] == NULL) + if (!tb[TCA_MIRRED_PARMS]) { + NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters"); return -EINVAL; + } parm = nla_data(tb[TCA_MIRRED_PARMS]); exists = tcf_idr_check(tn, parm->index, a, bind); @@ -102,6 +106,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, default: if (exists) tcf_idr_release(*a, bind); + NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option"); return -EINVAL; } if (parm->ifindex) { @@ -117,8 +122,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } if (!exists) { - if (dev == NULL) + if (!dev) { + NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist"); return -EINVAL; + } ret = tcf_idr_create(tn, parm->index, est, a, &act_mirred_ops, bind, true); if (ret) @@ -265,14 +272,16 @@ nla_put_failure: static int tcf_mirred_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mirred_net_id); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 98c6a4b2f523..4b5848b6c252 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -37,7 +37,8 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { }; static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, - struct tc_action **a, int ovr, int bind) + struct tc_action **a, int ovr, int bind, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, nat_net_id); struct nlattr *tb[TCA_NAT_MAX + 1]; @@ -277,14 +278,16 @@ nla_put_failure: static int tcf_nat_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, nat_net_id); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 349beaffb29e..094303c27c5e 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -132,7 +132,7 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb, static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, pedit_net_id); struct nlattr *tb[TCA_PEDIT_MAX + 1]; @@ -419,14 +419,16 @@ nla_put_failure: static int tcf_pedit_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, pedit_net_id); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 95d3c9097b25..ff55bd6c7db0 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -58,11 +58,12 @@ static struct tc_action_ops act_police_ops; static int tcf_act_police_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { @@ -74,7 +75,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { static int tcf_act_police_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, + struct netlink_ext_ack *extack) { int ret = 0, err; struct nlattr *tb[TCA_POLICE_MAX + 1]; @@ -304,7 +306,8 @@ nla_put_failure: return -1; } -static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_police_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, police_net_id); diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 1ba0df238756..9765145aaf40 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -37,7 +37,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { static int tcf_sample_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind) + int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, sample_net_id); struct nlattr *tb[TCA_SAMPLE_MAX + 1]; @@ -202,14 +202,16 @@ nla_put_failure: static int tcf_sample_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, sample_net_id); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 425eac11f6da..8244e221fe4f 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -79,7 +79,7 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { static int tcf_simp_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, simp_net_id); struct nlattr *tb[TCA_DEF_MAX + 1]; @@ -170,14 +170,16 @@ nla_put_failure: static int tcf_simp_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, simp_net_id); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 5a3f691bb545..ddf69fc01bdf 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -66,7 +66,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; @@ -208,14 +208,16 @@ nla_put_failure: static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index fa975262dbac..a406f191cb84 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -84,7 +84,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); struct nlattr *tb[TCA_SKBMOD_MAX + 1]; @@ -232,14 +232,16 @@ nla_put_failure: static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 0e23aac09ad6..41ff9d0e5c62 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -70,7 +70,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; @@ -291,14 +291,16 @@ nla_put_failure: static int tunnel_key_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) +static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index e1a1b3f3983a..71411a255f04 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -109,7 +109,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind) + int ovr, int bind, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, vlan_net_id); struct nlattr *tb[TCA_VLAN_MAX + 1]; @@ -267,14 +267,16 @@ nla_put_failure: static int tcf_vlan_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tcf_generic_walker(tn, skb, cb, type, ops); + return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, vlan_net_id); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a7dc7271042a..3e14d38e5d42 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1432,7 +1432,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, "police", ovr, - TCA_ACT_BIND); + TCA_ACT_BIND, extack); if (IS_ERR(act)) return PTR_ERR(act); @@ -1445,7 +1445,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, NULL, ovr, TCA_ACT_BIND, - &actions); + &actions, extack); if (err) return err; list_for_each_entry(act, &actions, list) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d512f49ee83c..27e672c12492 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2128,6 +2128,7 @@ static void __net_exit psched_net_exit(struct net *net) static struct pernet_operations psched_net_ops = { .init = psched_net_init, .exit = psched_net_exit, + .async = true, }; static int __init pktsched_init(void) diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 6776582ec449..e845e4588535 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -15,6 +15,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ offload.o stream_sched.o stream_sched_prio.o \ stream_sched_rr.o stream_interleave.o +sctp_diag-y := diag.o + sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o sctp-$(CONFIG_PROC_FS) += proc.o sctp-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sctp/sctp_diag.c b/net/sctp/diag.c index a72a7d925d46..078f01a8d582 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/diag.c @@ -1,3 +1,34 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * This file is part of the SCTP kernel implementation + * + * These functions implement sctp diag support. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers <linux-sctp@vger.kernel.org> + * + * Written or modified by: + * Xin Long <lucien.xin@gmail.com> + */ + #include <linux/module.h> #include <linux/inet_diag.h> #include <linux/sock_diag.h> diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e35d4f73d2df..0d873c58e516 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -952,16 +952,16 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt, /* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */ static int sctp_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { int rc; - rc = inet6_getname(sock, uaddr, uaddr_len, peer); + rc = inet6_getname(sock, uaddr, peer); - if (rc != 0) + if (rc < 0) return rc; - *uaddr_len = sctp_v6_addr_to_user(sctp_sk(sock->sk), + rc = sctp_v6_addr_to_user(sctp_sk(sock->sk), (union sctp_addr *)uaddr); return rc; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index da1a5cdefd13..38ae22b65e77 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -281,7 +281,6 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock, struct in_device *in_dev; struct sockaddr_in addr; int rc = -ENOENT; - int len; if (!dst) { rc = -ENOTCONN; @@ -293,7 +292,7 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock, } /* get address to which the internal TCP socket is bound */ - kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len); + kernel_getsockname(clcsock, (struct sockaddr *)&addr); /* analyze IPv4 specific data of net_device belonging to TCP socket */ rcu_read_lock(); in_dev = __in_dev_get_rcu(dst->dev); @@ -771,7 +770,7 @@ static void smc_listen_work(struct work_struct *work) u8 buf[SMC_CLC_MAX_LEN]; struct smc_link *link; int reason_code = 0; - int rc = 0, len; + int rc = 0; __be32 subnet; u8 prefix_len; u8 ibport; @@ -824,7 +823,7 @@ static void smc_listen_work(struct work_struct *work) } /* get address of the peer connected to the internal TCP socket */ - kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len); + kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr); /* allocate connection / link group */ mutex_lock(&smc_create_lgr_pending); @@ -1075,7 +1074,7 @@ out: } static int smc_getname(struct socket *sock, struct sockaddr *addr, - int *len, int peer) + int peer) { struct smc_sock *smc; @@ -1085,7 +1084,7 @@ static int smc_getname(struct socket *sock, struct sockaddr *addr, smc = smc_sk(sock->sk); - return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer); + return smc->clcsock->ops->getname(smc->clcsock, addr, peer); } static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) diff --git a/net/socket.c b/net/socket.c index a93c99b518ca..ab58e57c09ca 100644 --- a/net/socket.c +++ b/net/socket.c @@ -104,7 +104,6 @@ #include <linux/ipv6_route.h> #include <linux/route.h> #include <linux/sockios.h> -#include <linux/atalk.h> #include <net/busy_poll.h> #include <linux/errqueue.h> @@ -991,10 +990,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * what to do with it - that's up to the protocol still. */ -static struct ns_common *get_net_ns(struct ns_common *ns) +struct ns_common *get_net_ns(struct ns_common *ns) { return &get_net(container_of(ns, struct net, ns))->ns; } +EXPORT_SYMBOL_GPL(get_net_ns); static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { @@ -1573,8 +1573,9 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, goto out_fd; if (upeer_sockaddr) { - if (newsock->ops->getname(newsock, (struct sockaddr *)&address, - &len, 2) < 0) { + len = newsock->ops->getname(newsock, + (struct sockaddr *)&address, 2); + if (len < 0) { err = -ECONNABORTED; goto out_fd; } @@ -1654,7 +1655,7 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, { struct socket *sock; struct sockaddr_storage address; - int len, err, fput_needed; + int err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) @@ -1664,10 +1665,11 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, if (err) goto out_put; - err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); - if (err) + err = sock->ops->getname(sock, (struct sockaddr *)&address, 0); + if (err < 0) goto out_put; - err = move_addr_to_user(&address, len, usockaddr, usockaddr_len); + /* "err" is actually length in this case */ + err = move_addr_to_user(&address, err, usockaddr, usockaddr_len); out_put: fput_light(sock->file, fput_needed); @@ -1685,7 +1687,7 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, { struct socket *sock; struct sockaddr_storage address; - int len, err, fput_needed; + int err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock != NULL) { @@ -1695,11 +1697,10 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, return err; } - err = - sock->ops->getname(sock, (struct sockaddr *)&address, &len, - 1); - if (!err) - err = move_addr_to_user(&address, len, usockaddr, + err = sock->ops->getname(sock, (struct sockaddr *)&address, 1); + if (err >= 0) + /* "err" is actually length in this case */ + err = move_addr_to_user(&address, err, usockaddr, usockaddr_len); fput_light(sock->file, fput_needed); } @@ -3166,17 +3167,15 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, } EXPORT_SYMBOL(kernel_connect); -int kernel_getsockname(struct socket *sock, struct sockaddr *addr, - int *addrlen) +int kernel_getsockname(struct socket *sock, struct sockaddr *addr) { - return sock->ops->getname(sock, addr, addrlen, 0); + return sock->ops->getname(sock, addr, 0); } EXPORT_SYMBOL(kernel_getsockname); -int kernel_getpeername(struct socket *sock, struct sockaddr *addr, - int *addrlen) +int kernel_getpeername(struct socket *sock, struct sockaddr *addr) { - return sock->ops->getname(sock, addr, addrlen, 1); + return sock->ops->getname(sock, addr, 1); } EXPORT_SYMBOL(kernel_getpeername); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 6e432ecd7f99..806395687bb6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1231,7 +1231,7 @@ static const struct sockaddr_in6 rpc_in6addr_loopback = { * negative errno is returned. */ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen, - struct sockaddr *buf, int buflen) + struct sockaddr *buf) { struct socket *sock; int err; @@ -1269,7 +1269,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen, goto out_release; } - err = kernel_getsockname(sock, buf, &buflen); + err = kernel_getsockname(sock, buf); if (err < 0) { dprintk("RPC: getsockname failed (%d)\n", err); goto out_release; @@ -1353,7 +1353,7 @@ int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen) rcu_read_unlock(); rpc_set_port(sap, 0); - err = rpc_sockname(net, sap, salen, buf, buflen); + err = rpc_sockname(net, sap, salen, buf); put_net(net); if (err != 0) /* Couldn't discover local address, return ANYADDR */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 943f2a745cd5..08cd951aaeea 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -832,12 +832,13 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) } set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); - err = kernel_getpeername(newsock, sin, &slen); + err = kernel_getpeername(newsock, sin); if (err < 0) { net_warn_ratelimited("%s: peername failed (err %d)!\n", serv->sv_name, -err); goto failed; /* aborted connection or whatever */ } + slen = err; /* Ideally, we would want to reject connections from unauthorized * hosts here, but when we get encryption, the IP of the host won't @@ -866,7 +867,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) if (IS_ERR(newsvsk)) goto failed; svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); - err = kernel_getsockname(newsock, sin, &slen); + err = kernel_getsockname(newsock, sin); + slen = err; if (unlikely(err < 0)) { dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); slen = offsetof(struct sockaddr, sa_data); @@ -1465,7 +1467,8 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, err = PTR_ERR(svsk); goto out; } - if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0) + salen = kernel_getsockname(svsk->sk_sock, sin); + if (salen >= 0) svc_xprt_set_local(&svsk->sk_xprt, sin, salen); svc_add_new_perm_xprt(serv, &svsk->sk_xprt); return svc_one_sock_name(svsk, name_return, len); @@ -1539,10 +1542,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, if (error < 0) goto bummer; - newlen = len; - error = kernel_getsockname(sock, newsin, &newlen); + error = kernel_getsockname(sock, newsin); if (error < 0) goto bummer; + newlen = error; if (protocol == IPPROTO_TCP) { if ((error = kernel_listen(sock, 64)) < 0) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a6b8c1f8f92a..956e29c1438d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1794,10 +1794,9 @@ static void xs_sock_set_reuseport(struct socket *sock) static unsigned short xs_sock_getport(struct socket *sock) { struct sockaddr_storage buf; - int buflen; unsigned short port = 0; - if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0) + if (kernel_getsockname(sock, (struct sockaddr *)&buf) < 0) goto out; switch (buf.ss_family) { case AF_INET6: diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 9aed6fe1bf1a..f424539829b7 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -89,6 +89,7 @@ static void __net_exit sysctl_net_exit(struct net *net) static struct pernet_operations sysctl_pernet_ops = { .init = sysctl_net_init, .exit = sysctl_net_exit, + .async = true, }; static struct ctl_table_header *net_header; diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 37bb0bfbd936..1edb7192aa2f 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -9,7 +9,7 @@ tipc-y += addr.o bcast.o bearer.o \ core.o link.o discover.o msg.o \ name_distr.o subscr.o monitor.o name_table.o net.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ - server.o socket.o group.o + topsrv.o socket.o group.o tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 3e3dce3d4c63..f3d2e83313e1 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -956,11 +956,11 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) { - int err; - char *name; struct tipc_bearer *b; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); + char *name; + int err; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -987,8 +987,10 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (props[TIPC_NLA_PROP_TOL]) + if (props[TIPC_NLA_PROP_TOL]) { b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + tipc_node_apply_tolerance(net, b); + } if (props[TIPC_NLA_PROP_PRIO]) b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) diff --git a/net/tipc/core.h b/net/tipc/core.h index 20b21af2ff14..ff8b071654f5 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -64,7 +64,7 @@ struct tipc_bearer; struct tipc_bc_base; struct tipc_link; struct tipc_name_table; -struct tipc_server; +struct tipc_topsrv; struct tipc_monitor; #define TIPC_MOD_VER "2.0.0" @@ -112,7 +112,7 @@ struct tipc_net { struct list_head dist_queue; /* Topology subscription server */ - struct tipc_server *topsrv; + struct tipc_topsrv *topsrv; atomic_t subscription_count; }; @@ -131,7 +131,7 @@ static inline struct list_head *tipc_nodes(struct net *net) return &tipc_net(net)->node_list; } -static inline struct tipc_server *tipc_topsrv(struct net *net) +static inline struct tipc_topsrv *tipc_topsrv(struct net *net) { return tipc_net(net)->topsrv; } diff --git a/net/tipc/group.c b/net/tipc/group.c index 122162a31816..03086ccb7746 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -37,7 +37,7 @@ #include "addr.h" #include "group.h" #include "bcast.h" -#include "server.h" +#include "topsrv.h" #include "msg.h" #include "socket.h" #include "node.h" diff --git a/net/tipc/link.c b/net/tipc/link.c index 2d6b2aed30e0..3c230466804d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2126,7 +2126,8 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, struct sk_buff_head *xmitq) { l->tolerance = tol; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); + if (link_is_up(l)) + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } void tipc_link_set_prio(struct tipc_link *l, u32 prio, diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index ed0457cc99d6..e01c9c691ba2 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -326,10 +326,10 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, /* Any subscriptions waiting for notification? */ list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { - tipc_subscrp_report_overlap(s, publ->lower, publ->upper, - TIPC_PUBLISHED, publ->ref, - publ->node, publ->scope, - created_subseq); + tipc_sub_report_overlap(s, publ->lower, publ->upper, + TIPC_PUBLISHED, publ->ref, + publ->node, publ->scope, + created_subseq); } return publ; } @@ -397,10 +397,9 @@ found: /* Notify any waiting subscriptions */ list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { - tipc_subscrp_report_overlap(s, publ->lower, publ->upper, - TIPC_WITHDRAWN, publ->ref, - publ->node, publ->scope, - removed_subseq); + tipc_sub_report_overlap(s, publ->lower, publ->upper, + TIPC_WITHDRAWN, publ->ref, publ->node, + publ->scope, removed_subseq); } return publ; @@ -412,33 +411,37 @@ found: * sequence overlapping with the requested sequence */ static void tipc_nameseq_subscribe(struct name_seq *nseq, - struct tipc_subscription *s, - bool status) + struct tipc_subscription *sub) { struct sub_seq *sseq = nseq->sseqs; struct tipc_name_seq ns; + struct tipc_subscr *s = &sub->evt.s; + bool no_status; - tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); + ns.type = tipc_sub_read(s, seq.type); + ns.lower = tipc_sub_read(s, seq.lower); + ns.upper = tipc_sub_read(s, seq.upper); + no_status = tipc_sub_read(s, filter) & TIPC_SUB_NO_STATUS; - tipc_subscrp_get(s); - list_add(&s->nameseq_list, &nseq->subscriptions); + tipc_sub_get(sub); + list_add(&sub->nameseq_list, &nseq->subscriptions); - if (!status || !sseq) + if (no_status || !sseq) return; while (sseq != &nseq->sseqs[nseq->first_free]) { - if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) { + if (tipc_sub_check_overlap(&ns, sseq->lower, sseq->upper)) { struct publication *crs; struct name_info *info = sseq->info; int must_report = 1; list_for_each_entry(crs, &info->zone_list, zone_list) { - tipc_subscrp_report_overlap(s, sseq->lower, - sseq->upper, - TIPC_PUBLISHED, - crs->ref, crs->node, - crs->scope, - must_report); + tipc_sub_report_overlap(sub, sseq->lower, + sseq->upper, + TIPC_PUBLISHED, + crs->ref, crs->node, + crs->scope, + must_report); must_report = 0; } } @@ -808,24 +811,27 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, /** * tipc_nametbl_subscribe - add a subscription object to the name table */ -void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status) +void tipc_nametbl_subscribe(struct tipc_subscription *sub) { - struct tipc_net *tn = net_generic(s->net, tipc_net_id); - u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); + struct tipc_net *tn = tipc_net(sub->net); + struct tipc_subscr *s = &sub->evt.s; + u32 type = tipc_sub_read(s, seq.type); int index = hash(type); struct name_seq *seq; struct tipc_name_seq ns; spin_lock_bh(&tn->nametbl_lock); - seq = nametbl_find_seq(s->net, type); + seq = nametbl_find_seq(sub->net, type); if (!seq) seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); if (seq) { spin_lock_bh(&seq->lock); - tipc_nameseq_subscribe(seq, s, status); + tipc_nameseq_subscribe(seq, sub); spin_unlock_bh(&seq->lock); } else { - tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); + ns.type = tipc_sub_read(s, seq.type); + ns.lower = tipc_sub_read(s, seq.lower); + ns.upper = tipc_sub_read(s, seq.upper); pr_warn("Failed to create subscription for {%u,%u,%u}\n", ns.type, ns.lower, ns.upper); } @@ -835,18 +841,19 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status) /** * tipc_nametbl_unsubscribe - remove a subscription object from name table */ -void tipc_nametbl_unsubscribe(struct tipc_subscription *s) +void tipc_nametbl_unsubscribe(struct tipc_subscription *sub) { - struct tipc_net *tn = net_generic(s->net, tipc_net_id); + struct tipc_subscr *s = &sub->evt.s; + struct tipc_net *tn = tipc_net(sub->net); struct name_seq *seq; - u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); + u32 type = tipc_sub_read(s, seq.type); spin_lock_bh(&tn->nametbl_lock); - seq = nametbl_find_seq(s->net, type); + seq = nametbl_find_seq(sub->net, type); if (seq != NULL) { spin_lock_bh(&seq->lock); - list_del_init(&s->nameseq_list); - tipc_subscrp_put(s); + list_del_init(&sub->nameseq_list); + tipc_sub_put(sub); if (!seq->first_free && list_empty(&seq->subscriptions)) { hlist_del_init_rcu(&seq->ns_list); kfree(seq->sseqs); diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index f56e7cb3d436..17652602d5e2 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -120,7 +120,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, u32 lower, u32 node, u32 ref, u32 key); -void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status); +void tipc_nametbl_subscribe(struct tipc_subscription *s); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(struct net *net); void tipc_nametbl_stop(struct net *net); diff --git a/net/tipc/node.c b/net/tipc/node.c index 9036d8756e73..389193d7cf67 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1618,6 +1618,30 @@ discard: kfree_skb(skb); } +void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b) +{ + struct tipc_net *tn = tipc_net(net); + int bearer_id = b->identity; + struct sk_buff_head xmitq; + struct tipc_link_entry *e; + struct tipc_node *n; + + __skb_queue_head_init(&xmitq); + + rcu_read_lock(); + + list_for_each_entry_rcu(n, &tn->node_list, list) { + tipc_node_write_lock(n); + e = &n->links[bearer_id]; + if (e->link) + tipc_link_set_tolerance(e->link, b->tolerance, &xmitq); + tipc_node_write_unlock(n); + tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr); + } + + rcu_read_unlock(); +} + int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); diff --git a/net/tipc/node.h b/net/tipc/node.h index acd58d23a70e..4ce5e3a185c0 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -65,6 +65,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, struct tipc_media_addr *maddr, bool *respond, bool *dupl_addr); void tipc_node_delete_links(struct net *net, int bearer_id); +void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b); int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, diff --git a/net/tipc/server.c b/net/tipc/server.c deleted file mode 100644 index df0c563c90cd..000000000000 --- a/net/tipc/server.c +++ /dev/null @@ -1,710 +0,0 @@ -/* - * net/tipc/server.c: TIPC server infrastructure - * - * Copyright (c) 2012-2013, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "server.h" -#include "core.h" -#include "socket.h" -#include "addr.h" -#include "msg.h" -#include <net/sock.h> -#include <linux/module.h> - -/* Number of messages to send before rescheduling */ -#define MAX_SEND_MSG_COUNT 25 -#define MAX_RECV_MSG_COUNT 25 -#define CF_CONNECTED 1 -#define CF_SERVER 2 - -#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data) - -/** - * struct tipc_conn - TIPC connection structure - * @kref: reference counter to connection object - * @conid: connection identifier - * @sock: socket handler associated with connection - * @flags: indicates connection state - * @server: pointer to connected server - * @rwork: receive work item - * @usr_data: user-specified field - * @rx_action: what to do when connection socket is active - * @outqueue: pointer to first outbound message in queue - * @outqueue_lock: control access to the outqueue - * @outqueue: list of connection objects for its server - * @swork: send work item - */ -struct tipc_conn { - struct kref kref; - int conid; - struct socket *sock; - unsigned long flags; - struct tipc_server *server; - struct work_struct rwork; - int (*rx_action) (struct tipc_conn *con); - void *usr_data; - struct list_head outqueue; - spinlock_t outqueue_lock; - struct work_struct swork; -}; - -/* An entry waiting to be sent */ -struct outqueue_entry { - struct list_head list; - struct kvec iov; - struct sockaddr_tipc dest; -}; - -static void tipc_recv_work(struct work_struct *work); -static void tipc_send_work(struct work_struct *work); -static void tipc_clean_outqueues(struct tipc_conn *con); - -static void tipc_conn_kref_release(struct kref *kref) -{ - struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); - struct tipc_server *s = con->server; - struct sockaddr_tipc *saddr = s->saddr; - struct socket *sock = con->sock; - struct sock *sk; - - if (sock) { - sk = sock->sk; - if (test_bit(CF_SERVER, &con->flags)) { - __module_get(sock->ops->owner); - __module_get(sk->sk_prot_creator->owner); - } - saddr->scope = -TIPC_NODE_SCOPE; - kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); - sock_release(sock); - con->sock = NULL; - } - spin_lock_bh(&s->idr_lock); - idr_remove(&s->conn_idr, con->conid); - s->idr_in_use--; - spin_unlock_bh(&s->idr_lock); - tipc_clean_outqueues(con); - kfree(con); -} - -static void conn_put(struct tipc_conn *con) -{ - kref_put(&con->kref, tipc_conn_kref_release); -} - -static void conn_get(struct tipc_conn *con) -{ - kref_get(&con->kref); -} - -static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) -{ - struct tipc_conn *con; - - spin_lock_bh(&s->idr_lock); - con = idr_find(&s->conn_idr, conid); - if (con) { - if (!test_bit(CF_CONNECTED, &con->flags) || - !kref_get_unless_zero(&con->kref)) - con = NULL; - } - spin_unlock_bh(&s->idr_lock); - return con; -} - -static void sock_data_ready(struct sock *sk) -{ - struct tipc_conn *con; - - read_lock_bh(&sk->sk_callback_lock); - con = sock2con(sk); - if (con && test_bit(CF_CONNECTED, &con->flags)) { - conn_get(con); - if (!queue_work(con->server->rcv_wq, &con->rwork)) - conn_put(con); - } - read_unlock_bh(&sk->sk_callback_lock); -} - -static void sock_write_space(struct sock *sk) -{ - struct tipc_conn *con; - - read_lock_bh(&sk->sk_callback_lock); - con = sock2con(sk); - if (con && test_bit(CF_CONNECTED, &con->flags)) { - conn_get(con); - if (!queue_work(con->server->send_wq, &con->swork)) - conn_put(con); - } - read_unlock_bh(&sk->sk_callback_lock); -} - -static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con) -{ - struct sock *sk = sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - - sk->sk_data_ready = sock_data_ready; - sk->sk_write_space = sock_write_space; - sk->sk_user_data = con; - - con->sock = sock; - - write_unlock_bh(&sk->sk_callback_lock); -} - -static void tipc_close_conn(struct tipc_conn *con) -{ - struct tipc_server *s = con->server; - struct sock *sk = con->sock->sk; - bool disconnect = false; - - write_lock_bh(&sk->sk_callback_lock); - disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags); - if (disconnect) { - sk->sk_user_data = NULL; - if (con->conid) - s->tipc_conn_release(con->conid, con->usr_data); - } - write_unlock_bh(&sk->sk_callback_lock); - - /* Handle concurrent calls from sending and receiving threads */ - if (!disconnect) - return; - - /* Don't flush pending works, -just let them expire */ - kernel_sock_shutdown(con->sock, SHUT_RDWR); - conn_put(con); -} - -static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) -{ - struct tipc_conn *con; - int ret; - - con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC); - if (!con) - return ERR_PTR(-ENOMEM); - - kref_init(&con->kref); - INIT_LIST_HEAD(&con->outqueue); - spin_lock_init(&con->outqueue_lock); - INIT_WORK(&con->swork, tipc_send_work); - INIT_WORK(&con->rwork, tipc_recv_work); - - spin_lock_bh(&s->idr_lock); - ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); - if (ret < 0) { - kfree(con); - spin_unlock_bh(&s->idr_lock); - return ERR_PTR(-ENOMEM); - } - con->conid = ret; - s->idr_in_use++; - spin_unlock_bh(&s->idr_lock); - - set_bit(CF_CONNECTED, &con->flags); - con->server = s; - - return con; -} - -static int tipc_receive_from_sock(struct tipc_conn *con) -{ - struct tipc_server *s = con->server; - struct sock *sk = con->sock->sk; - struct sockaddr_tipc addr; - struct msghdr msg = {}; - struct kvec iov; - void *buf; - int ret; - - buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC); - if (!buf) { - ret = -ENOMEM; - goto out_close; - } - - iov.iov_base = buf; - iov.iov_len = s->max_rcvbuf_size; - msg.msg_name = &addr; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len); - ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); - if (ret <= 0) { - kmem_cache_free(s->rcvbuf_cache, buf); - goto out_close; - } - - read_lock_bh(&sk->sk_callback_lock); - if (test_bit(CF_CONNECTED, &con->flags)) - ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, - &addr, con->usr_data, buf, ret); - read_unlock_bh(&sk->sk_callback_lock); - kmem_cache_free(s->rcvbuf_cache, buf); - if (ret < 0) - tipc_conn_terminate(s, con->conid); - return ret; - -out_close: - if (ret != -EWOULDBLOCK) - tipc_close_conn(con); - else if (ret == 0) - /* Don't return success if we really got EOF */ - ret = -EAGAIN; - - return ret; -} - -static int tipc_accept_from_sock(struct tipc_conn *con) -{ - struct tipc_server *s = con->server; - struct socket *sock = con->sock; - struct socket *newsock; - struct tipc_conn *newcon; - int ret; - - ret = kernel_accept(sock, &newsock, O_NONBLOCK); - if (ret < 0) - return ret; - - newcon = tipc_alloc_conn(con->server); - if (IS_ERR(newcon)) { - ret = PTR_ERR(newcon); - sock_release(newsock); - return ret; - } - - newcon->rx_action = tipc_receive_from_sock; - tipc_register_callbacks(newsock, newcon); - - /* Notify that new connection is incoming */ - newcon->usr_data = s->tipc_conn_new(newcon->conid); - if (!newcon->usr_data) { - sock_release(newsock); - conn_put(newcon); - return -ENOMEM; - } - - /* Wake up receive process in case of 'SYN+' message */ - newsock->sk->sk_data_ready(newsock->sk); - return ret; -} - -static struct socket *tipc_create_listen_sock(struct tipc_conn *con) -{ - struct tipc_server *s = con->server; - struct socket *sock = NULL; - int ret; - - ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock); - if (ret < 0) - return NULL; - ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, - (char *)&s->imp, sizeof(s->imp)); - if (ret < 0) - goto create_err; - ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr)); - if (ret < 0) - goto create_err; - - switch (s->type) { - case SOCK_STREAM: - case SOCK_SEQPACKET: - con->rx_action = tipc_accept_from_sock; - - ret = kernel_listen(sock, 0); - if (ret < 0) - goto create_err; - break; - case SOCK_DGRAM: - case SOCK_RDM: - con->rx_action = tipc_receive_from_sock; - break; - default: - pr_err("Unknown socket type %d\n", s->type); - goto create_err; - } - - /* As server's listening socket owner and creator is the same module, - * we have to decrease TIPC module reference count to guarantee that - * it remains zero after the server socket is created, otherwise, - * executing "rmmod" command is unable to make TIPC module deleted - * after TIPC module is inserted successfully. - * - * However, the reference count is ever increased twice in - * sock_create_kern(): one is to increase the reference count of owner - * of TIPC socket's proto_ops struct; another is to increment the - * reference count of owner of TIPC proto struct. Therefore, we must - * decrement the module reference count twice to ensure that it keeps - * zero after server's listening socket is created. Of course, we - * must bump the module reference count twice as well before the socket - * is closed. - */ - module_put(sock->ops->owner); - module_put(sock->sk->sk_prot_creator->owner); - set_bit(CF_SERVER, &con->flags); - - return sock; - -create_err: - kernel_sock_shutdown(sock, SHUT_RDWR); - sock_release(sock); - return NULL; -} - -static int tipc_open_listening_sock(struct tipc_server *s) -{ - struct socket *sock; - struct tipc_conn *con; - - con = tipc_alloc_conn(s); - if (IS_ERR(con)) - return PTR_ERR(con); - - sock = tipc_create_listen_sock(con); - if (!sock) { - idr_remove(&s->conn_idr, con->conid); - s->idr_in_use--; - kfree(con); - return -EINVAL; - } - - tipc_register_callbacks(sock, con); - return 0; -} - -static struct outqueue_entry *tipc_alloc_entry(void *data, int len) -{ - struct outqueue_entry *entry; - void *buf; - - entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC); - if (!entry) - return NULL; - - buf = kmemdup(data, len, GFP_ATOMIC); - if (!buf) { - kfree(entry); - return NULL; - } - - entry->iov.iov_base = buf; - entry->iov.iov_len = len; - - return entry; -} - -static void tipc_free_entry(struct outqueue_entry *e) -{ - kfree(e->iov.iov_base); - kfree(e); -} - -static void tipc_clean_outqueues(struct tipc_conn *con) -{ - struct outqueue_entry *e, *safe; - - spin_lock_bh(&con->outqueue_lock); - list_for_each_entry_safe(e, safe, &con->outqueue, list) { - list_del(&e->list); - tipc_free_entry(e); - } - spin_unlock_bh(&con->outqueue_lock); -} - -int tipc_conn_sendmsg(struct tipc_server *s, int conid, - struct sockaddr_tipc *addr, void *data, size_t len) -{ - struct outqueue_entry *e; - struct tipc_conn *con; - - con = tipc_conn_lookup(s, conid); - if (!con) - return -EINVAL; - - if (!test_bit(CF_CONNECTED, &con->flags)) { - conn_put(con); - return 0; - } - - e = tipc_alloc_entry(data, len); - if (!e) { - conn_put(con); - return -ENOMEM; - } - - if (addr) - memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc)); - - spin_lock_bh(&con->outqueue_lock); - list_add_tail(&e->list, &con->outqueue); - spin_unlock_bh(&con->outqueue_lock); - - if (!queue_work(s->send_wq, &con->swork)) - conn_put(con); - return 0; -} - -void tipc_conn_terminate(struct tipc_server *s, int conid) -{ - struct tipc_conn *con; - - con = tipc_conn_lookup(s, conid); - if (con) { - tipc_close_conn(con); - conn_put(con); - } -} - -bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, - u32 upper, u32 filter, int *conid) -{ - struct tipc_subscriber *scbr; - struct tipc_subscr sub; - struct tipc_server *s; - struct tipc_conn *con; - - sub.seq.type = type; - sub.seq.lower = lower; - sub.seq.upper = upper; - sub.timeout = TIPC_WAIT_FOREVER; - sub.filter = filter; - *(u32 *)&sub.usr_handle = port; - - con = tipc_alloc_conn(tipc_topsrv(net)); - if (IS_ERR(con)) - return false; - - *conid = con->conid; - s = con->server; - scbr = s->tipc_conn_new(*conid); - if (!scbr) { - conn_put(con); - return false; - } - - con->usr_data = scbr; - con->sock = NULL; - s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub)); - return true; -} - -void tipc_topsrv_kern_unsubscr(struct net *net, int conid) -{ - struct tipc_conn *con; - struct tipc_server *srv; - - con = tipc_conn_lookup(tipc_topsrv(net), conid); - if (!con) - return; - - test_and_clear_bit(CF_CONNECTED, &con->flags); - srv = con->server; - if (con->conid) - srv->tipc_conn_release(con->conid, con->usr_data); - conn_put(con); - conn_put(con); -} - -static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt) -{ - u32 port = *(u32 *)&evt->s.usr_handle; - u32 self = tipc_own_addr(net); - struct sk_buff_head evtq; - struct sk_buff *skb; - - skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt), - self, self, port, port, 0); - if (!skb) - return; - msg_set_dest_droppable(buf_msg(skb), true); - memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); - skb_queue_head_init(&evtq); - __skb_queue_tail(&evtq, skb); - tipc_sk_rcv(net, &evtq); -} - -static void tipc_send_to_sock(struct tipc_conn *con) -{ - struct tipc_server *s = con->server; - struct outqueue_entry *e; - struct tipc_event *evt; - struct msghdr msg; - int count = 0; - int ret; - - spin_lock_bh(&con->outqueue_lock); - while (test_bit(CF_CONNECTED, &con->flags)) { - e = list_entry(con->outqueue.next, struct outqueue_entry, list); - if ((struct list_head *) e == &con->outqueue) - break; - - spin_unlock_bh(&con->outqueue_lock); - - if (con->sock) { - memset(&msg, 0, sizeof(msg)); - msg.msg_flags = MSG_DONTWAIT; - if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { - msg.msg_name = &e->dest; - msg.msg_namelen = sizeof(struct sockaddr_tipc); - } - ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, - e->iov.iov_len); - if (ret == -EWOULDBLOCK || ret == 0) { - cond_resched(); - goto out; - } else if (ret < 0) { - goto send_err; - } - } else { - evt = e->iov.iov_base; - tipc_send_kern_top_evt(s->net, evt); - } - /* Don't starve users filling buffers */ - if (++count >= MAX_SEND_MSG_COUNT) { - cond_resched(); - count = 0; - } - - spin_lock_bh(&con->outqueue_lock); - list_del(&e->list); - tipc_free_entry(e); - } - spin_unlock_bh(&con->outqueue_lock); -out: - return; - -send_err: - tipc_close_conn(con); -} - -static void tipc_recv_work(struct work_struct *work) -{ - struct tipc_conn *con = container_of(work, struct tipc_conn, rwork); - int count = 0; - - while (test_bit(CF_CONNECTED, &con->flags)) { - if (con->rx_action(con)) - break; - - /* Don't flood Rx machine */ - if (++count >= MAX_RECV_MSG_COUNT) { - cond_resched(); - count = 0; - } - } - conn_put(con); -} - -static void tipc_send_work(struct work_struct *work) -{ - struct tipc_conn *con = container_of(work, struct tipc_conn, swork); - - if (test_bit(CF_CONNECTED, &con->flags)) - tipc_send_to_sock(con); - - conn_put(con); -} - -static void tipc_work_stop(struct tipc_server *s) -{ - destroy_workqueue(s->rcv_wq); - destroy_workqueue(s->send_wq); -} - -static int tipc_work_start(struct tipc_server *s) -{ - s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0); - if (!s->rcv_wq) { - pr_err("can't start tipc receive workqueue\n"); - return -ENOMEM; - } - - s->send_wq = alloc_ordered_workqueue("tipc_send", 0); - if (!s->send_wq) { - pr_err("can't start tipc send workqueue\n"); - destroy_workqueue(s->rcv_wq); - return -ENOMEM; - } - - return 0; -} - -int tipc_server_start(struct tipc_server *s) -{ - int ret; - - spin_lock_init(&s->idr_lock); - idr_init(&s->conn_idr); - s->idr_in_use = 0; - - s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size, - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!s->rcvbuf_cache) - return -ENOMEM; - - ret = tipc_work_start(s); - if (ret < 0) { - kmem_cache_destroy(s->rcvbuf_cache); - return ret; - } - ret = tipc_open_listening_sock(s); - if (ret < 0) { - tipc_work_stop(s); - kmem_cache_destroy(s->rcvbuf_cache); - return ret; - } - return ret; -} - -void tipc_server_stop(struct tipc_server *s) -{ - struct tipc_conn *con; - int id; - - spin_lock_bh(&s->idr_lock); - for (id = 0; s->idr_in_use; id++) { - con = idr_find(&s->conn_idr, id); - if (con) { - spin_unlock_bh(&s->idr_lock); - tipc_close_conn(con); - spin_lock_bh(&s->idr_lock); - } - } - spin_unlock_bh(&s->idr_lock); - - tipc_work_stop(s); - kmem_cache_destroy(s->rcvbuf_cache); - idr_destroy(&s->conn_idr); -} diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b0323ec7971e..f93477187a90 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -665,7 +665,7 @@ exit: * a completely predictable manner). */ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; struct sock *sk = sock->sk; @@ -684,13 +684,12 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, addr->addr.id.node = tn->own_addr; } - *uaddr_len = sizeof(*addr); addr->addrtype = TIPC_ADDR_ID; addr->family = AF_TIPC; addr->scope = 0; addr->addr.name.domain = 0; - return 0; + return sizeof(*addr); } /** diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 68e26470c516..6925a989569b 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -1,7 +1,7 @@ /* * net/tipc/subscr.c: TIPC network topology service * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2017, Ericsson AB * Copyright (c) 2005-2007, 2010-2013, Wind River Systems * All rights reserved. * @@ -38,61 +38,30 @@ #include "name_table.h" #include "subscr.h" -/** - * struct tipc_subscriber - TIPC network topology subscriber - * @kref: reference counter to tipc_subscription object - * @conid: connection identifier to server connecting to subscriber - * @lock: control access to subscriber - * @subscrp_list: list of subscription objects for this subscriber - */ -struct tipc_subscriber { - struct kref kref; - int conid; - spinlock_t lock; - struct list_head subscrp_list; -}; - -static void tipc_subscrb_put(struct tipc_subscriber *subscriber); - -/** - * htohl - convert value to endianness used by destination - * @in: value to convert - * @swap: non-zero if endianness must be reversed - * - * Returns converted value - */ -static u32 htohl(u32 in, int swap) -{ - return swap ? swab32(in) : in; -} - -static void tipc_subscrp_send_event(struct tipc_subscription *sub, - u32 found_lower, u32 found_upper, - u32 event, u32 port_ref, u32 node) +static void tipc_sub_send_event(struct tipc_subscription *sub, + u32 found_lower, u32 found_upper, + u32 event, u32 port, u32 node) { - struct tipc_net *tn = net_generic(sub->net, tipc_net_id); - struct tipc_subscriber *subscriber = sub->subscriber; - struct kvec msg_sect; + struct tipc_event *evt = &sub->evt; - msg_sect.iov_base = (void *)&sub->evt; - msg_sect.iov_len = sizeof(struct tipc_event); - sub->evt.event = htohl(event, sub->swap); - sub->evt.found_lower = htohl(found_lower, sub->swap); - sub->evt.found_upper = htohl(found_upper, sub->swap); - sub->evt.port.ref = htohl(port_ref, sub->swap); - sub->evt.port.node = htohl(node, sub->swap); - tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL, - msg_sect.iov_base, msg_sect.iov_len); + if (sub->inactive) + return; + tipc_evt_write(evt, event, event); + tipc_evt_write(evt, found_lower, found_lower); + tipc_evt_write(evt, found_upper, found_upper); + tipc_evt_write(evt, port.ref, port); + tipc_evt_write(evt, port.node, node); + tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt); } /** - * tipc_subscrp_check_overlap - test for subscription overlap with the + * tipc_sub_check_overlap - test for subscription overlap with the * given values * * Returns 1 if there is overlap, otherwise 0. */ -int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, - u32 found_upper) +int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower, + u32 found_upper) { if (found_lower < seq->lower) found_lower = seq->lower; @@ -103,298 +72,98 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, return 1; } -u32 tipc_subscrp_convert_seq_type(u32 type, int swap) -{ - return htohl(type, swap); -} - -void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, - struct tipc_name_seq *out) -{ - out->type = htohl(in->type, swap); - out->lower = htohl(in->lower, swap); - out->upper = htohl(in->upper, swap); -} - -void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper, u32 event, u32 port_ref, - u32 node, u32 scope, int must) +void tipc_sub_report_overlap(struct tipc_subscription *sub, + u32 found_lower, u32 found_upper, + u32 event, u32 port, u32 node, + u32 scope, int must) { - u32 filter = htohl(sub->evt.s.filter, sub->swap); + struct tipc_subscr *s = &sub->evt.s; + u32 filter = tipc_sub_read(s, filter); struct tipc_name_seq seq; - tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); - if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) + seq.type = tipc_sub_read(s, seq.type); + seq.lower = tipc_sub_read(s, seq.lower); + seq.upper = tipc_sub_read(s, seq.upper); + + if (!tipc_sub_check_overlap(&seq, found_lower, found_upper)) return; + if (!must && !(filter & TIPC_SUB_PORTS)) return; if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE) return; if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE) return; - - tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, - node); + spin_lock(&sub->lock); + tipc_sub_send_event(sub, found_lower, found_upper, + event, port, node); + spin_unlock(&sub->lock); } -static void tipc_subscrp_timeout(struct timer_list *t) +static void tipc_sub_timeout(struct timer_list *t) { struct tipc_subscription *sub = from_timer(sub, t, timer); - struct tipc_subscriber *subscriber = sub->subscriber; - - spin_lock_bh(&subscriber->lock); - tipc_nametbl_unsubscribe(sub); - list_del(&sub->subscrp_list); - spin_unlock_bh(&subscriber->lock); - - /* Notify subscriber of timeout */ - tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, - TIPC_SUBSCR_TIMEOUT, 0, 0); - - tipc_subscrp_put(sub); -} - -static void tipc_subscrb_kref_release(struct kref *kref) -{ - kfree(container_of(kref,struct tipc_subscriber, kref)); -} - -static void tipc_subscrb_put(struct tipc_subscriber *subscriber) -{ - kref_put(&subscriber->kref, tipc_subscrb_kref_release); -} + struct tipc_subscr *s = &sub->evt.s; -static void tipc_subscrb_get(struct tipc_subscriber *subscriber) -{ - kref_get(&subscriber->kref); + spin_lock(&sub->lock); + tipc_sub_send_event(sub, s->seq.lower, s->seq.upper, + TIPC_SUBSCR_TIMEOUT, 0, 0); + sub->inactive = true; + spin_unlock(&sub->lock); } -static void tipc_subscrp_kref_release(struct kref *kref) +static void tipc_sub_kref_release(struct kref *kref) { - struct tipc_subscription *sub = container_of(kref, - struct tipc_subscription, - kref); - struct tipc_net *tn = net_generic(sub->net, tipc_net_id); - struct tipc_subscriber *subscriber = sub->subscriber; - - atomic_dec(&tn->subscription_count); - kfree(sub); - tipc_subscrb_put(subscriber); + kfree(container_of(kref, struct tipc_subscription, kref)); } -void tipc_subscrp_put(struct tipc_subscription *subscription) +void tipc_sub_put(struct tipc_subscription *subscription) { - kref_put(&subscription->kref, tipc_subscrp_kref_release); + kref_put(&subscription->kref, tipc_sub_kref_release); } -void tipc_subscrp_get(struct tipc_subscription *subscription) +void tipc_sub_get(struct tipc_subscription *subscription) { kref_get(&subscription->kref); } -/* tipc_subscrb_subscrp_delete - delete a specific subscription or all - * subscriptions for a given subscriber. - */ -static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, - struct tipc_subscr *s) -{ - struct list_head *subscription_list = &subscriber->subscrp_list; - struct tipc_subscription *sub, *temp; - u32 timeout; - - spin_lock_bh(&subscriber->lock); - list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) { - if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) - continue; - - timeout = htohl(sub->evt.s.timeout, sub->swap); - if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) { - tipc_nametbl_unsubscribe(sub); - list_del(&sub->subscrp_list); - tipc_subscrp_put(sub); - } - - if (s) - break; - } - spin_unlock_bh(&subscriber->lock); -} - -static struct tipc_subscriber *tipc_subscrb_create(int conid) -{ - struct tipc_subscriber *subscriber; - - subscriber = kzalloc(sizeof(*subscriber), GFP_ATOMIC); - if (!subscriber) { - pr_warn("Subscriber rejected, no memory\n"); - return NULL; - } - INIT_LIST_HEAD(&subscriber->subscrp_list); - kref_init(&subscriber->kref); - subscriber->conid = conid; - spin_lock_init(&subscriber->lock); - - return subscriber; -} - -static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) -{ - tipc_subscrb_subscrp_delete(subscriber, NULL); - tipc_subscrb_put(subscriber); -} - -static void tipc_subscrp_cancel(struct tipc_subscr *s, - struct tipc_subscriber *subscriber) -{ - tipc_subscrb_get(subscriber); - tipc_subscrb_subscrp_delete(subscriber, s); - tipc_subscrb_put(subscriber); -} - -static struct tipc_subscription *tipc_subscrp_create(struct net *net, - struct tipc_subscr *s, - int swap) +struct tipc_subscription *tipc_sub_subscribe(struct net *net, + struct tipc_subscr *s, + int conid) { - struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 filter = tipc_sub_read(s, filter); struct tipc_subscription *sub; - u32 filter = htohl(s->filter, swap); + u32 timeout; - /* Refuse subscription if global limit exceeded */ - if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { - pr_warn("Subscription rejected, limit reached (%u)\n", - TIPC_MAX_SUBSCRIPTIONS); + if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) || + (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) { + pr_warn("Subscription rejected, illegal request\n"); return NULL; } - - /* Allocate subscription object */ sub = kmalloc(sizeof(*sub), GFP_ATOMIC); if (!sub) { pr_warn("Subscription rejected, no memory\n"); return NULL; } - - /* Initialize subscription object */ sub->net = net; - if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) || - (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) { - pr_warn("Subscription rejected, illegal request\n"); - kfree(sub); - return NULL; - } - - sub->swap = swap; + sub->conid = conid; + sub->inactive = false; memcpy(&sub->evt.s, s, sizeof(*s)); - atomic_inc(&tn->subscription_count); + spin_lock_init(&sub->lock); kref_init(&sub->kref); - return sub; -} - -static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, - struct tipc_subscriber *subscriber, int swap, - bool status) -{ - struct tipc_subscription *sub = NULL; - u32 timeout; - - sub = tipc_subscrp_create(net, s, swap); - if (!sub) - return -1; - - spin_lock_bh(&subscriber->lock); - list_add(&sub->subscrp_list, &subscriber->subscrp_list); - sub->subscriber = subscriber; - tipc_nametbl_subscribe(sub, status); - tipc_subscrb_get(subscriber); - spin_unlock_bh(&subscriber->lock); - - timer_setup(&sub->timer, tipc_subscrp_timeout, 0); - timeout = htohl(sub->evt.s.timeout, swap); - + tipc_nametbl_subscribe(sub); + timer_setup(&sub->timer, tipc_sub_timeout, 0); + timeout = tipc_sub_read(&sub->evt.s, timeout); if (timeout != TIPC_WAIT_FOREVER) mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout)); - return 0; -} - -/* Handle one termination request for the subscriber */ -static void tipc_subscrb_release_cb(int conid, void *usr_data) -{ - tipc_subscrb_delete((struct tipc_subscriber *)usr_data); -} - -/* Handle one request to create a new subscription for the subscriber */ -static int tipc_subscrb_rcv_cb(struct net *net, int conid, - struct sockaddr_tipc *addr, void *usr_data, - void *buf, size_t len) -{ - struct tipc_subscriber *subscriber = usr_data; - struct tipc_subscr *s = (struct tipc_subscr *)buf; - bool status; - int swap; - - /* Determine subscriber's endianness */ - swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | - TIPC_SUB_CANCEL)); - - /* Detect & process a subscription cancellation request */ - if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { - s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); - tipc_subscrp_cancel(s, subscriber); - return 0; - } - status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap)); - return tipc_subscrp_subscribe(net, s, subscriber, swap, status); -} - -/* Handle one request to establish a new subscriber */ -static void *tipc_subscrb_connect_cb(int conid) -{ - return (void *)tipc_subscrb_create(conid); -} - -int tipc_topsrv_start(struct net *net) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - const char name[] = "topology_server"; - struct tipc_server *topsrv; - struct sockaddr_tipc *saddr; - - saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC); - if (!saddr) - return -ENOMEM; - saddr->family = AF_TIPC; - saddr->addrtype = TIPC_ADDR_NAMESEQ; - saddr->addr.nameseq.type = TIPC_TOP_SRV; - saddr->addr.nameseq.lower = TIPC_TOP_SRV; - saddr->addr.nameseq.upper = TIPC_TOP_SRV; - saddr->scope = TIPC_NODE_SCOPE; - - topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC); - if (!topsrv) { - kfree(saddr); - return -ENOMEM; - } - topsrv->net = net; - topsrv->saddr = saddr; - topsrv->imp = TIPC_CRITICAL_IMPORTANCE; - topsrv->type = SOCK_SEQPACKET; - topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr); - topsrv->tipc_conn_recvmsg = tipc_subscrb_rcv_cb; - topsrv->tipc_conn_new = tipc_subscrb_connect_cb; - topsrv->tipc_conn_release = tipc_subscrb_release_cb; - - strncpy(topsrv->name, name, strlen(name) + 1); - tn->topsrv = topsrv; - atomic_set(&tn->subscription_count, 0); - - return tipc_server_start(topsrv); + return sub; } -void tipc_topsrv_stop(struct net *net) +void tipc_sub_unsubscribe(struct tipc_subscription *sub) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_server *topsrv = tn->topsrv; - - tipc_server_stop(topsrv); - kfree(topsrv->saddr); - kfree(topsrv); + tipc_nametbl_unsubscribe(sub); + if (sub->evt.s.timeout != TIPC_WAIT_FOREVER) + del_timer_sync(&sub->timer); + list_del(&sub->sub_list); + tipc_sub_put(sub); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index f3edca775d9f..8b2d22b18f22 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -1,7 +1,7 @@ /* * net/tipc/subscr.h: Include file for TIPC network topology service * - * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2003-2017, Ericsson AB * Copyright (c) 2005-2007, 2012-2013, Wind River Systems * All rights reserved. * @@ -37,48 +37,72 @@ #ifndef _TIPC_SUBSCR_H #define _TIPC_SUBSCR_H -#include "server.h" +#include "topsrv.h" -#define TIPC_MAX_SUBSCRIPTIONS 65535 -#define TIPC_MAX_PUBLICATIONS 65535 +#define TIPC_MAX_SUBSCR 65535 +#define TIPC_MAX_PUBLICATIONS 65535 struct tipc_subscription; -struct tipc_subscriber; +struct tipc_conn; /** * struct tipc_subscription - TIPC network topology subscription object * @subscriber: pointer to its subscriber * @seq: name sequence associated with subscription - * @net: point to network namespace * @timer: timer governing subscription duration (optional) * @nameseq_list: adjacent subscriptions in name sequence's subscription list - * @subscrp_list: adjacent subscriptions in subscriber's subscription list - * @swap: indicates if subscriber uses opposite endianness in its messages + * @sub_list: adjacent subscriptions in subscriber's subscription list * @evt: template for events generated by subscription */ struct tipc_subscription { struct kref kref; - struct tipc_subscriber *subscriber; struct net *net; struct timer_list timer; struct list_head nameseq_list; - struct list_head subscrp_list; - int swap; + struct list_head sub_list; struct tipc_event evt; + int conid; + bool inactive; + spinlock_t lock; /* serialize up/down and timer events */ }; -int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, - u32 found_upper); -void tipc_subscrp_report_overlap(struct tipc_subscription *sub, - u32 found_lower, u32 found_upper, u32 event, - u32 port_ref, u32 node, u32 scope, int must); -void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, - struct tipc_name_seq *out); -u32 tipc_subscrp_convert_seq_type(u32 type, int swap); +struct tipc_subscription *tipc_sub_subscribe(struct net *net, + struct tipc_subscr *s, + int conid); +void tipc_sub_unsubscribe(struct tipc_subscription *sub); + +int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower, + u32 found_upper); +void tipc_sub_report_overlap(struct tipc_subscription *sub, + u32 found_lower, u32 found_upper, + u32 event, u32 port, u32 node, + u32 scope, int must); int tipc_topsrv_start(struct net *net); void tipc_topsrv_stop(struct net *net); -void tipc_subscrp_put(struct tipc_subscription *subscription); -void tipc_subscrp_get(struct tipc_subscription *subscription); +void tipc_sub_put(struct tipc_subscription *subscription); +void tipc_sub_get(struct tipc_subscription *subscription); + +#define TIPC_FILTER_MASK (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | TIPC_SUB_CANCEL) + +/* tipc_sub_read - return field_ of struct sub_ in host endian format + */ +#define tipc_sub_read(sub_, field_) \ + ({ \ + struct tipc_subscr *sub__ = sub_; \ + u32 val__ = (sub__)->field_; \ + int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \ + (swap_ ? swab32(val__) : val__); \ + }) + +/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format + */ +#define tipc_evt_write(evt_, field_, val_) \ + ({ \ + struct tipc_event *evt__ = evt_; \ + u32 val__ = val_; \ + int swap_ = !((evt__)->s.filter & (TIPC_FILTER_MASK)); \ + (evt__)->field_ = swap_ ? swab32(val__) : val__; \ + }) #endif diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c new file mode 100644 index 000000000000..c8e34ef22c30 --- /dev/null +++ b/net/tipc/topsrv.c @@ -0,0 +1,703 @@ +/* + * net/tipc/server.c: TIPC server infrastructure + * + * Copyright (c) 2012-2013, Wind River Systems + * Copyright (c) 2017-2018, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "subscr.h" +#include "topsrv.h" +#include "core.h" +#include "socket.h" +#include "addr.h" +#include "msg.h" +#include <net/sock.h> +#include <linux/module.h> + +/* Number of messages to send before rescheduling */ +#define MAX_SEND_MSG_COUNT 25 +#define MAX_RECV_MSG_COUNT 25 +#define CF_CONNECTED 1 +#define CF_SERVER 2 + +#define TIPC_SERVER_NAME_LEN 32 + +/** + * struct tipc_topsrv - TIPC server structure + * @conn_idr: identifier set of connection + * @idr_lock: protect the connection identifier set + * @idr_in_use: amount of allocated identifier entry + * @net: network namspace instance + * @rcvbuf_cache: memory cache of server receive buffer + * @rcv_wq: receive workqueue + * @send_wq: send workqueue + * @max_rcvbuf_size: maximum permitted receive message length + * @tipc_conn_new: callback will be called when new connection is incoming + * @tipc_conn_release: callback will be called before releasing the connection + * @tipc_conn_recvmsg: callback will be called when message arrives + * @name: server name + * @imp: message importance + * @type: socket type + */ +struct tipc_topsrv { + struct idr conn_idr; + spinlock_t idr_lock; /* for idr list */ + int idr_in_use; + struct net *net; + struct work_struct awork; + struct workqueue_struct *rcv_wq; + struct workqueue_struct *send_wq; + int max_rcvbuf_size; + struct socket *listener; + char name[TIPC_SERVER_NAME_LEN]; +}; + +/** + * struct tipc_conn - TIPC connection structure + * @kref: reference counter to connection object + * @conid: connection identifier + * @sock: socket handler associated with connection + * @flags: indicates connection state + * @server: pointer to connected server + * @sub_list: lsit to all pertaing subscriptions + * @sub_lock: lock protecting the subscription list + * @outqueue_lock: control access to the outqueue + * @rwork: receive work item + * @rx_action: what to do when connection socket is active + * @outqueue: pointer to first outbound message in queue + * @outqueue_lock: control access to the outqueue + * @swork: send work item + */ +struct tipc_conn { + struct kref kref; + int conid; + struct socket *sock; + unsigned long flags; + struct tipc_topsrv *server; + struct list_head sub_list; + spinlock_t sub_lock; /* for subscription list */ + struct work_struct rwork; + struct list_head outqueue; + spinlock_t outqueue_lock; /* for outqueue */ + struct work_struct swork; +}; + +/* An entry waiting to be sent */ +struct outqueue_entry { + bool inactive; + struct tipc_event evt; + struct list_head list; +}; + +static void tipc_conn_recv_work(struct work_struct *work); +static void tipc_conn_send_work(struct work_struct *work); +static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt); +static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s); + +static bool connected(struct tipc_conn *con) +{ + return con && test_bit(CF_CONNECTED, &con->flags); +} + +static void tipc_conn_kref_release(struct kref *kref) +{ + struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct tipc_topsrv *s = con->server; + struct outqueue_entry *e, *safe; + + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); + if (con->sock) + sock_release(con->sock); + + spin_lock_bh(&con->outqueue_lock); + list_for_each_entry_safe(e, safe, &con->outqueue, list) { + list_del(&e->list); + kfree(e); + } + spin_unlock_bh(&con->outqueue_lock); + kfree(con); +} + +static void conn_put(struct tipc_conn *con) +{ + kref_put(&con->kref, tipc_conn_kref_release); +} + +static void conn_get(struct tipc_conn *con) +{ + kref_get(&con->kref); +} + +static void tipc_conn_close(struct tipc_conn *con) +{ + struct sock *sk = con->sock->sk; + bool disconnect = false; + + write_lock_bh(&sk->sk_callback_lock); + disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags); + + if (disconnect) { + sk->sk_user_data = NULL; + tipc_conn_delete_sub(con, NULL); + } + write_unlock_bh(&sk->sk_callback_lock); + + /* Handle concurrent calls from sending and receiving threads */ + if (!disconnect) + return; + + /* Don't flush pending works, -just let them expire */ + kernel_sock_shutdown(con->sock, SHUT_RDWR); + + conn_put(con); +} + +static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) +{ + struct tipc_conn *con; + int ret; + + con = kzalloc(sizeof(*con), GFP_ATOMIC); + if (!con) + return ERR_PTR(-ENOMEM); + + kref_init(&con->kref); + INIT_LIST_HEAD(&con->outqueue); + INIT_LIST_HEAD(&con->sub_list); + spin_lock_init(&con->outqueue_lock); + spin_lock_init(&con->sub_lock); + INIT_WORK(&con->swork, tipc_conn_send_work); + INIT_WORK(&con->rwork, tipc_conn_recv_work); + + spin_lock_bh(&s->idr_lock); + ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); + if (ret < 0) { + kfree(con); + spin_unlock_bh(&s->idr_lock); + return ERR_PTR(-ENOMEM); + } + con->conid = ret; + s->idr_in_use++; + spin_unlock_bh(&s->idr_lock); + + set_bit(CF_CONNECTED, &con->flags); + con->server = s; + + return con; +} + +static struct tipc_conn *tipc_conn_lookup(struct tipc_topsrv *s, int conid) +{ + struct tipc_conn *con; + + spin_lock_bh(&s->idr_lock); + con = idr_find(&s->conn_idr, conid); + if (!connected(con) || !kref_get_unless_zero(&con->kref)) + con = NULL; + spin_unlock_bh(&s->idr_lock); + return con; +} + +/* tipc_conn_delete_sub - delete a specific or all subscriptions + * for a given subscriber + */ +static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s) +{ + struct tipc_net *tn = tipc_net(con->server->net); + struct list_head *sub_list = &con->sub_list; + struct tipc_subscription *sub, *tmp; + + spin_lock_bh(&con->sub_lock); + list_for_each_entry_safe(sub, tmp, sub_list, sub_list) { + if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) { + tipc_sub_unsubscribe(sub); + atomic_dec(&tn->subscription_count); + } else if (s) { + break; + } + } + spin_unlock_bh(&con->sub_lock); +} + +static void tipc_conn_send_to_sock(struct tipc_conn *con) +{ + struct list_head *queue = &con->outqueue; + struct tipc_topsrv *srv = con->server; + struct outqueue_entry *e; + struct tipc_event *evt; + struct msghdr msg; + struct kvec iov; + int count = 0; + int ret; + + spin_lock_bh(&con->outqueue_lock); + + while (!list_empty(queue)) { + e = list_first_entry(queue, struct outqueue_entry, list); + evt = &e->evt; + spin_unlock_bh(&con->outqueue_lock); + + if (e->inactive) + tipc_conn_delete_sub(con, &evt->s); + + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + iov.iov_base = evt; + iov.iov_len = sizeof(*evt); + msg.msg_name = NULL; + + if (con->sock) { + ret = kernel_sendmsg(con->sock, &msg, &iov, + 1, sizeof(*evt)); + if (ret == -EWOULDBLOCK || ret == 0) { + cond_resched(); + return; + } else if (ret < 0) { + return tipc_conn_close(con); + } + } else { + tipc_topsrv_kern_evt(srv->net, evt); + } + + /* Don't starve users filling buffers */ + if (++count >= MAX_SEND_MSG_COUNT) { + cond_resched(); + count = 0; + } + spin_lock_bh(&con->outqueue_lock); + list_del(&e->list); + kfree(e); + } + spin_unlock_bh(&con->outqueue_lock); +} + +static void tipc_conn_send_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, swork); + + if (connected(con)) + tipc_conn_send_to_sock(con); + + conn_put(con); +} + +/* tipc_conn_queue_evt() - interrupt level call from a subscription instance + * The queued work is launched into tipc_send_work()->tipc_send_to_sock() + */ +void tipc_topsrv_queue_evt(struct net *net, int conid, + u32 event, struct tipc_event *evt) +{ + struct tipc_topsrv *srv = tipc_topsrv(net); + struct outqueue_entry *e; + struct tipc_conn *con; + + con = tipc_conn_lookup(srv, conid); + if (!con) + return; + + if (!connected(con)) + goto err; + + e = kmalloc(sizeof(*e), GFP_ATOMIC); + if (!e) + goto err; + e->inactive = (event == TIPC_SUBSCR_TIMEOUT); + memcpy(&e->evt, evt, sizeof(*evt)); + spin_lock_bh(&con->outqueue_lock); + list_add_tail(&e->list, &con->outqueue); + spin_unlock_bh(&con->outqueue_lock); + + if (queue_work(srv->send_wq, &con->swork)) + return; +err: + conn_put(con); +} + +/* tipc_conn_write_space - interrupt callback after a sendmsg EAGAIN + * Indicates that there now is more space in the send buffer + * The queued work is launched into tipc_send_work()->tipc_conn_send_to_sock() + */ +static void tipc_conn_write_space(struct sock *sk) +{ + struct tipc_conn *con; + + read_lock_bh(&sk->sk_callback_lock); + con = sk->sk_user_data; + if (connected(con)) { + conn_get(con); + if (!queue_work(con->server->send_wq, &con->swork)) + conn_put(con); + } + read_unlock_bh(&sk->sk_callback_lock); +} + +static int tipc_conn_rcv_sub(struct tipc_topsrv *srv, + struct tipc_conn *con, + struct tipc_subscr *s) +{ + struct tipc_net *tn = tipc_net(srv->net); + struct tipc_subscription *sub; + + if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) { + tipc_conn_delete_sub(con, s); + return 0; + } + if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) { + pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR); + return -1; + } + sub = tipc_sub_subscribe(srv->net, s, con->conid); + if (!sub) + return -1; + atomic_inc(&tn->subscription_count); + spin_lock_bh(&con->sub_lock); + list_add(&sub->sub_list, &con->sub_list); + spin_unlock_bh(&con->sub_lock); + return 0; +} + +static int tipc_conn_rcv_from_sock(struct tipc_conn *con) +{ + struct tipc_topsrv *srv = con->server; + struct sock *sk = con->sock->sk; + struct msghdr msg = {}; + struct tipc_subscr s; + struct kvec iov; + int ret; + + iov.iov_base = &s; + iov.iov_len = sizeof(s); + msg.msg_name = NULL; + iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len); + ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); + if (ret == -EWOULDBLOCK) + return -EWOULDBLOCK; + if (ret > 0) { + read_lock_bh(&sk->sk_callback_lock); + ret = tipc_conn_rcv_sub(srv, con, &s); + read_unlock_bh(&sk->sk_callback_lock); + } + if (ret < 0) + tipc_conn_close(con); + + return ret; +} + +static void tipc_conn_recv_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, rwork); + int count = 0; + + while (connected(con)) { + if (tipc_conn_rcv_from_sock(con)) + break; + + /* Don't flood Rx machine */ + if (++count >= MAX_RECV_MSG_COUNT) { + cond_resched(); + count = 0; + } + } + conn_put(con); +} + +/* tipc_conn_data_ready - interrupt callback indicating the socket has data + * The queued work is launched into tipc_recv_work()->tipc_conn_rcv_from_sock() + */ +static void tipc_conn_data_ready(struct sock *sk) +{ + struct tipc_conn *con; + + read_lock_bh(&sk->sk_callback_lock); + con = sk->sk_user_data; + if (connected(con)) { + conn_get(con); + if (!queue_work(con->server->rcv_wq, &con->rwork)) + conn_put(con); + } + read_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_topsrv_accept(struct work_struct *work) +{ + struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork); + struct socket *lsock = srv->listener; + struct socket *newsock; + struct tipc_conn *con; + struct sock *newsk; + int ret; + + while (1) { + ret = kernel_accept(lsock, &newsock, O_NONBLOCK); + if (ret < 0) + return; + con = tipc_conn_alloc(srv); + if (IS_ERR(con)) { + ret = PTR_ERR(con); + sock_release(newsock); + return; + } + /* Register callbacks */ + newsk = newsock->sk; + write_lock_bh(&newsk->sk_callback_lock); + newsk->sk_data_ready = tipc_conn_data_ready; + newsk->sk_write_space = tipc_conn_write_space; + newsk->sk_user_data = con; + con->sock = newsock; + write_unlock_bh(&newsk->sk_callback_lock); + + /* Wake up receive process in case of 'SYN+' message */ + newsk->sk_data_ready(newsk); + } +} + +/* tipc_toprsv_listener_data_ready - interrupt callback with connection request + * The queued job is launched into tipc_topsrv_accept() + */ +static void tipc_topsrv_listener_data_ready(struct sock *sk) +{ + struct tipc_topsrv *srv; + + read_lock_bh(&sk->sk_callback_lock); + srv = sk->sk_user_data; + if (srv->listener) + queue_work(srv->rcv_wq, &srv->awork); + read_unlock_bh(&sk->sk_callback_lock); +} + +static int tipc_topsrv_create_listener(struct tipc_topsrv *srv) +{ + int imp = TIPC_CRITICAL_IMPORTANCE; + struct socket *lsock = NULL; + struct sockaddr_tipc saddr; + struct sock *sk; + int rc; + + rc = sock_create_kern(srv->net, AF_TIPC, SOCK_SEQPACKET, 0, &lsock); + if (rc < 0) + return rc; + + srv->listener = lsock; + sk = lsock->sk; + write_lock_bh(&sk->sk_callback_lock); + sk->sk_data_ready = tipc_topsrv_listener_data_ready; + sk->sk_user_data = srv; + write_unlock_bh(&sk->sk_callback_lock); + + rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE, + (char *)&imp, sizeof(imp)); + if (rc < 0) + goto err; + + saddr.family = AF_TIPC; + saddr.addrtype = TIPC_ADDR_NAMESEQ; + saddr.addr.nameseq.type = TIPC_TOP_SRV; + saddr.addr.nameseq.lower = TIPC_TOP_SRV; + saddr.addr.nameseq.upper = TIPC_TOP_SRV; + saddr.scope = TIPC_NODE_SCOPE; + + rc = kernel_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr)); + if (rc < 0) + goto err; + rc = kernel_listen(lsock, 0); + if (rc < 0) + goto err; + + /* As server's listening socket owner and creator is the same module, + * we have to decrease TIPC module reference count to guarantee that + * it remains zero after the server socket is created, otherwise, + * executing "rmmod" command is unable to make TIPC module deleted + * after TIPC module is inserted successfully. + * + * However, the reference count is ever increased twice in + * sock_create_kern(): one is to increase the reference count of owner + * of TIPC socket's proto_ops struct; another is to increment the + * reference count of owner of TIPC proto struct. Therefore, we must + * decrement the module reference count twice to ensure that it keeps + * zero after server's listening socket is created. Of course, we + * must bump the module reference count twice as well before the socket + * is closed. + */ + module_put(lsock->ops->owner); + module_put(sk->sk_prot_creator->owner); + + return 0; +err: + sock_release(lsock); + return -EINVAL; +} + +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid) +{ + struct tipc_subscr sub; + struct tipc_conn *con; + int rc; + + sub.seq.type = type; + sub.seq.lower = lower; + sub.seq.upper = upper; + sub.timeout = TIPC_WAIT_FOREVER; + sub.filter = filter; + *(u32 *)&sub.usr_handle = port; + + con = tipc_conn_alloc(tipc_topsrv(net)); + if (IS_ERR(con)) + return false; + + *conid = con->conid; + con->sock = NULL; + rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); + if (rc >= 0) + return true; + conn_put(con); + return false; +} + +void tipc_topsrv_kern_unsubscr(struct net *net, int conid) +{ + struct tipc_conn *con; + + con = tipc_conn_lookup(tipc_topsrv(net), conid); + if (!con) + return; + + test_and_clear_bit(CF_CONNECTED, &con->flags); + tipc_conn_delete_sub(con, NULL); + conn_put(con); + conn_put(con); +} + +static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt) +{ + u32 port = *(u32 *)&evt->s.usr_handle; + u32 self = tipc_own_addr(net); + struct sk_buff_head evtq; + struct sk_buff *skb; + + skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt), + self, self, port, port, 0); + if (!skb) + return; + msg_set_dest_droppable(buf_msg(skb), true); + memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); + skb_queue_head_init(&evtq); + __skb_queue_tail(&evtq, skb); + tipc_sk_rcv(net, &evtq); +} + +static int tipc_topsrv_work_start(struct tipc_topsrv *s) +{ + s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0); + if (!s->rcv_wq) { + pr_err("can't start tipc receive workqueue\n"); + return -ENOMEM; + } + + s->send_wq = alloc_ordered_workqueue("tipc_send", 0); + if (!s->send_wq) { + pr_err("can't start tipc send workqueue\n"); + destroy_workqueue(s->rcv_wq); + return -ENOMEM; + } + + return 0; +} + +static void tipc_topsrv_work_stop(struct tipc_topsrv *s) +{ + destroy_workqueue(s->rcv_wq); + destroy_workqueue(s->send_wq); +} + +int tipc_topsrv_start(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + const char name[] = "topology_server"; + struct tipc_topsrv *srv; + int ret; + + srv = kzalloc(sizeof(*srv), GFP_ATOMIC); + if (!srv) + return -ENOMEM; + + srv->net = net; + srv->max_rcvbuf_size = sizeof(struct tipc_subscr); + INIT_WORK(&srv->awork, tipc_topsrv_accept); + + strncpy(srv->name, name, strlen(name) + 1); + tn->topsrv = srv; + atomic_set(&tn->subscription_count, 0); + + spin_lock_init(&srv->idr_lock); + idr_init(&srv->conn_idr); + srv->idr_in_use = 0; + + ret = tipc_topsrv_work_start(srv); + if (ret < 0) + return ret; + + ret = tipc_topsrv_create_listener(srv); + if (ret < 0) + tipc_topsrv_work_stop(srv); + + return ret; +} + +void tipc_topsrv_stop(struct net *net) +{ + struct tipc_topsrv *srv = tipc_topsrv(net); + struct socket *lsock = srv->listener; + struct tipc_conn *con; + int id; + + spin_lock_bh(&srv->idr_lock); + for (id = 0; srv->idr_in_use; id++) { + con = idr_find(&srv->conn_idr, id); + if (con) { + spin_unlock_bh(&srv->idr_lock); + tipc_conn_close(con); + spin_lock_bh(&srv->idr_lock); + } + } + __module_get(lsock->ops->owner); + __module_get(lsock->sk->sk_prot_creator->owner); + srv->listener = NULL; + spin_unlock_bh(&srv->idr_lock); + sock_release(lsock); + tipc_topsrv_work_stop(srv); + idr_destroy(&srv->conn_idr); + kfree(srv); +} diff --git a/net/tipc/server.h b/net/tipc/topsrv.h index 64df7513cd70..c7ea71293748 100644 --- a/net/tipc/server.h +++ b/net/tipc/topsrv.h @@ -2,6 +2,7 @@ * net/tipc/server.h: Include file for TIPC server code * * Copyright (c) 2012-2013, Wind River Systems + * Copyright (c) 2017, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,68 +37,18 @@ #ifndef _TIPC_SERVER_H #define _TIPC_SERVER_H -#include <linux/idr.h> -#include <linux/tipc.h> -#include <net/net_namespace.h> +#include "core.h" #define TIPC_SERVER_NAME_LEN 32 #define TIPC_SUB_CLUSTER_SCOPE 0x20 #define TIPC_SUB_NODE_SCOPE 0x40 #define TIPC_SUB_NO_STATUS 0x80 -/** - * struct tipc_server - TIPC server structure - * @conn_idr: identifier set of connection - * @idr_lock: protect the connection identifier set - * @idr_in_use: amount of allocated identifier entry - * @net: network namspace instance - * @rcvbuf_cache: memory cache of server receive buffer - * @rcv_wq: receive workqueue - * @send_wq: send workqueue - * @max_rcvbuf_size: maximum permitted receive message length - * @tipc_conn_new: callback will be called when new connection is incoming - * @tipc_conn_release: callback will be called before releasing the connection - * @tipc_conn_recvmsg: callback will be called when message arrives - * @saddr: TIPC server address - * @name: server name - * @imp: message importance - * @type: socket type - */ -struct tipc_server { - struct idr conn_idr; - spinlock_t idr_lock; - int idr_in_use; - struct net *net; - struct kmem_cache *rcvbuf_cache; - struct workqueue_struct *rcv_wq; - struct workqueue_struct *send_wq; - int max_rcvbuf_size; - void *(*tipc_conn_new)(int conid); - void (*tipc_conn_release)(int conid, void *usr_data); - int (*tipc_conn_recvmsg)(struct net *net, int conid, - struct sockaddr_tipc *addr, void *usr_data, - void *buf, size_t len); - struct sockaddr_tipc *saddr; - char name[TIPC_SERVER_NAME_LEN]; - int imp; - int type; -}; - -int tipc_conn_sendmsg(struct tipc_server *s, int conid, - struct sockaddr_tipc *addr, void *data, size_t len); +void tipc_topsrv_queue_evt(struct net *net, int conid, + u32 event, struct tipc_event *evt); bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, u32 upper, u32 filter, int *conid); void tipc_topsrv_kern_unsubscr(struct net *net, int conid); -/** - * tipc_conn_terminate - terminate connection with server - * - * Note: Must call it in process context since it might sleep - */ -void tipc_conn_terminate(struct tipc_server *s, int conid); -int tipc_server_start(struct tipc_server *s); - -void tipc_server_stop(struct tipc_server *s); - #endif diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2d465bdeccbc..bc2970a8e7f3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -637,7 +637,7 @@ static int unix_stream_connect(struct socket *, struct sockaddr *, int addr_len, int flags); static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, int, bool); -static int unix_getname(struct socket *, struct sockaddr *, int *, int); +static int unix_getname(struct socket *, struct sockaddr *, int); static __poll_t unix_poll(struct file *, struct socket *, poll_table *); static __poll_t unix_dgram_poll(struct file *, struct socket *, poll_table *); @@ -1453,7 +1453,7 @@ out: } -static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) +static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sock *sk = sock->sk; struct unix_sock *u; @@ -1476,12 +1476,12 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_ if (!u->addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; - *uaddr_len = sizeof(short); + err = sizeof(short); } else { struct unix_address *addr = u->addr; - *uaddr_len = addr->len; - memcpy(sunaddr, addr->name, *uaddr_len); + err = addr->len; + memcpy(sunaddr, addr->name, addr->len); } unix_state_unlock(sk); sock_put(sk); @@ -2913,6 +2913,7 @@ static void __net_exit unix_net_exit(struct net *net) static struct pernet_operations unix_net_ops = { .init = unix_net_init, .exit = unix_net_exit, + .async = true, }; static int __init af_unix_init(void) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index e0fc84daed94..aac9b8f6552e 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -759,7 +759,7 @@ vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } static int vsock_getname(struct socket *sock, - struct sockaddr *addr, int *addr_len, int peer) + struct sockaddr *addr, int peer) { int err; struct sock *sk; @@ -794,7 +794,7 @@ static int vsock_getname(struct socket *sock, */ BUILD_BUG_ON(sizeof(*vm_addr) > 128); memcpy(addr, vm_addr, sizeof(*vm_addr)); - *addr_len = sizeof(*vm_addr); + err = sizeof(*vm_addr); out: release_sock(sk); diff --git a/net/wireless/core.c b/net/wireless/core.c index a6f3cac8c640..670aa229168a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1340,6 +1340,7 @@ static void __net_exit cfg80211_pernet_exit(struct net *net) static struct pernet_operations cfg80211_pernet_ops = { .exit = cfg80211_pernet_exit, + .async = true, }; static int __init cfg80211_init(void) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 9efbfc753347..bc7064486b15 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -390,6 +390,7 @@ static void __net_exit wext_pernet_exit(struct net *net) static struct pernet_operations wext_pernet_ops = { .init = wext_pernet_init, .exit = wext_pernet_exit, + .async = true, }; static int __init wireless_nlevent_init(void) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 562cc11131f6..d49aa79b7997 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -896,7 +896,7 @@ out: } static int x25_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) + int peer) { struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)uaddr; struct sock *sk = sock->sk; @@ -913,7 +913,7 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr, sx25->sx25_addr = x25->source_addr; sx25->sx25_family = AF_X25; - *uaddr_len = sizeof(*sx25); + rc = sizeof(*sx25); out: return rc; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7a23078132cf..77d9d1ab05ce 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2982,6 +2982,7 @@ static void __net_exit xfrm_net_exit(struct net *net) static struct pernet_operations __net_initdata xfrm_net_ops = { .init = xfrm_net_init, .exit = xfrm_net_exit, + .async = true, }; void __init xfrm_init(void) diff --git a/security/tomoyo/network.c b/security/tomoyo/network.c index cd6932e5225c..9094f4b3b367 100644 --- a/security/tomoyo/network.c +++ b/security/tomoyo/network.c @@ -655,10 +655,11 @@ int tomoyo_socket_listen_permission(struct socket *sock) return 0; { const int error = sock->ops->getname(sock, (struct sockaddr *) - &addr, &addr_len, 0); + &addr, 0); - if (error) + if (error < 0) return error; + addr_len = error; } address.protocol = type; address.operation = TOMOYO_NETWORK_LISTEN; diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh new file mode 100755 index 000000000000..06b1d7cc12cc --- /dev/null +++ b/tools/testing/selftests/net/fib-onlink-tests.sh @@ -0,0 +1,375 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# IPv4 and IPv6 onlink tests + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# Network interfaces +# - odd in current namespace; even in peer ns +declare -A NETIFS +# default VRF +NETIFS[p1]=veth1 +NETIFS[p2]=veth2 +NETIFS[p3]=veth3 +NETIFS[p4]=veth4 +# VRF +NETIFS[p5]=veth5 +NETIFS[p6]=veth6 +NETIFS[p7]=veth7 +NETIFS[p8]=veth8 + +# /24 network +declare -A V4ADDRS +V4ADDRS[p1]=169.254.1.1 +V4ADDRS[p2]=169.254.1.2 +V4ADDRS[p3]=169.254.3.1 +V4ADDRS[p4]=169.254.3.2 +V4ADDRS[p5]=169.254.5.1 +V4ADDRS[p6]=169.254.5.2 +V4ADDRS[p7]=169.254.7.1 +V4ADDRS[p8]=169.254.7.2 + +# /64 network +declare -A V6ADDRS +V6ADDRS[p1]=2001:db8:101::1 +V6ADDRS[p2]=2001:db8:101::2 +V6ADDRS[p3]=2001:db8:301::1 +V6ADDRS[p4]=2001:db8:301::2 +V6ADDRS[p5]=2001:db8:501::1 +V6ADDRS[p6]=2001:db8:501::2 +V6ADDRS[p7]=2001:db8:701::1 +V6ADDRS[p8]=2001:db8:701::2 + +# Test networks: +# [1] = default table +# [2] = VRF +# +# /32 host routes +declare -A TEST_NET4 +TEST_NET4[1]=169.254.101 +TEST_NET4[2]=169.254.102 +# /128 host routes +declare -A TEST_NET6 +TEST_NET6[1]=2001:db8:101 +TEST_NET6[2]=2001:db8:102 + +# connected gateway +CONGW[1]=169.254.1.254 +CONGW[2]=169.254.5.254 + +# recursive gateway +RECGW4[1]=169.254.11.254 +RECGW4[2]=169.254.12.254 +RECGW6[1]=2001:db8:11::64 +RECGW6[2]=2001:db8:12::64 + +# for v4 mapped to v6 +declare -A TEST_NET4IN6IN6 +TEST_NET4IN6[1]=10.1.1.254 +TEST_NET4IN6[2]=10.2.1.254 + +# mcast address +MCAST6=ff02::1 + + +PEER_NS=bart +PEER_CMD="ip netns exec ${PEER_NS}" +VRF=lisa +VRF_TABLE=1101 +PBR_TABLE=101 + +################################################################################ +# utilities + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-50s [ OK ]\n" "${msg}" + else + nfail=$((nfail+1)) + printf "\n TEST: %-50s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +log_section() +{ + echo + echo "######################################################################" + echo "TEST SECTION: $*" + echo "######################################################################" +} + +log_subsection() +{ + echo + echo "#########################################" + echo "TEST SUBSECTION: $*" +} + +run_cmd() +{ + echo + echo "COMMAND: $*" + eval $* +} + +get_linklocal() +{ + local dev=$1 + local pfx + local addr + + addr=$(${pfx} ip -6 -br addr show dev ${dev} | \ + awk '{ + for (i = 3; i <= NF; ++i) { + if ($i ~ /^fe80/) + print $i + } + }' + ) + addr=${addr/\/*} + + [ -z "$addr" ] && return 1 + + echo $addr + + return 0 +} + +################################################################################ +# + +setup() +{ + echo + echo "########################################" + echo "Configuring interfaces" + + set -e + + # create namespace + ip netns add ${PEER_NS} + ip -netns ${PEER_NS} li set lo up + + # add vrf table + ip li add ${VRF} type vrf table ${VRF_TABLE} + ip li set ${VRF} up + ip ro add table ${VRF_TABLE} unreachable default + ip -6 ro add table ${VRF_TABLE} unreachable default + + # create test interfaces + ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]} + ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]} + ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]} + ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]} + + # enslave vrf interfaces + for n in 5 7; do + ip li set ${NETIFS[p${n}]} vrf ${VRF} + done + + # add addresses + for n in 1 3 5 7; do + ip li set ${NETIFS[p${n}]} up + ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} + ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} + done + + # move peer interfaces to namespace and add addresses + for n in 2 4 6 8; do + ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up + ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} + ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} + done + + set +e + + # let DAD complete - assume default of 1 probe + sleep 1 +} + +cleanup() +{ + # make sure we start from a clean slate + ip netns del ${PEER_NS} 2>/dev/null + for n in 1 3 5 7; do + ip link del ${NETIFS[p${n}]} 2>/dev/null + done + ip link del ${VRF} 2>/dev/null + ip ro flush table ${VRF_TABLE} + ip -6 ro flush table ${VRF_TABLE} +} + +################################################################################ +# IPv4 tests +# + +run_ip() +{ + local table="$1" + local prefix="$2" + local gw="$3" + local dev="$4" + local exp_rc="$5" + local desc="$6" + + # dev arg may be empty + [ -n "${dev}" ] && dev="dev ${dev}" + + run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink + log_test $? ${exp_rc} "${desc}" +} + +valid_onlink_ipv4() +{ + # - unicast connected, unicast recursive + # + log_subsection "default VRF - main table" + + run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected" + run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive" + + log_subsection "VRF ${VRF}" + + run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected" + run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive" + + log_subsection "VRF device, PBR table" + + run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected" + run_ip ${PBR_TABLE} ${TEST_NET4[2]}.4 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive" +} + +invalid_onlink_ipv4() +{ + run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \ + "Invalid gw - local unicast address" + + run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \ + "Invalid gw - local unicast address, VRF" + + run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given" + + run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \ + "Gateway resolves to wrong nexthop device" + + run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \ + "Gateway resolves to wrong nexthop device - VRF" +} + +################################################################################ +# IPv6 tests +# + +run_ip6() +{ + local table="$1" + local prefix="$2" + local gw="$3" + local dev="$4" + local exp_rc="$5" + local desc="$6" + + # dev arg may be empty + [ -n "${dev}" ] && dev="dev ${dev}" + + run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink + log_test $? ${exp_rc} "${desc}" +} + +valid_onlink_ipv6() +{ + # - unicast connected, unicast recursive, v4-mapped + # + log_subsection "default VRF - main table" + + run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected" + run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive" + run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped" + + log_subsection "VRF ${VRF}" + + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected" + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive" + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped" + + log_subsection "VRF device, PBR table" + + run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::4 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected" + run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::5 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive" + run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::6 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped" +} + +invalid_onlink_ipv6() +{ + local lladdr + + lladdr=$(get_linklocal ${NETIFS[p1]}) || return 1 + + run_ip6 254 ${TEST_NET6[1]}::11 ${V6ADDRS[p1]} ${NETIFS[p1]} 2 \ + "Invalid gw - local unicast address" + run_ip6 254 ${TEST_NET6[1]}::12 ${lladdr} ${NETIFS[p1]} 2 \ + "Invalid gw - local linklocal address" + run_ip6 254 ${TEST_NET6[1]}::12 ${MCAST6} ${NETIFS[p1]} 2 \ + "Invalid gw - multicast address" + + lladdr=$(get_linklocal ${NETIFS[p5]}) || return 1 + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::11 ${V6ADDRS[p5]} ${NETIFS[p5]} 2 \ + "Invalid gw - local unicast address, VRF" + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${lladdr} ${NETIFS[p5]} 2 \ + "Invalid gw - local linklocal address, VRF" + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${MCAST6} ${NETIFS[p5]} 2 \ + "Invalid gw - multicast address, VRF" + + run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \ + "No nexthop device given" + + # default VRF validation is done against LOCAL table + # run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \ + # "Gateway resolves to wrong nexthop device" + + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \ + "Gateway resolves to wrong nexthop device - VRF" +} + +run_onlink_tests() +{ + log_section "IPv4 onlink" + log_subsection "Valid onlink commands" + valid_onlink_ipv4 + log_subsection "Invalid onlink commands" + invalid_onlink_ipv4 + + log_section "IPv6 onlink" + log_subsection "Valid onlink commands" + valid_onlink_ipv6 + invalid_onlink_ipv6 +} + +################################################################################ +# main + +nsuccess=0 +nfail=0 + +cleanup +setup +run_onlink_tests +cleanup + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index a9154eefb2e2..b617985ecdc1 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -6,154 +6,155 @@ ret=0 -check_err() -{ - if [ $ret -eq 0 ]; then - ret=$1 - fi -} +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} -check_fail() +log_test() { - if [ $1 -eq 0 ]; then + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf " %-60s [ OK ]\n" "${msg}" + else ret=1 + printf " %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi fi } -netns_create() +setup() { - local testns=$1 + set -e + ip netns add testns + ip -netns testns link set dev lo up + + ip -netns testns link add dummy0 type dummy + ip -netns testns link set dev dummy0 up + ip -netns testns address add 198.51.100.1/24 dev dummy0 + ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0 + set +e - ip netns add $testns - ip netns exec $testns ip link set dev lo up } -fib_unreg_unicast_test() +cleanup() { - ret=0 - - netns_create "testns" - - ip netns exec testns ip link add dummy0 type dummy - ip netns exec testns ip link set dev dummy0 up + ip -netns testns link del dev dummy0 &> /dev/null + ip netns del testns +} - ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 - ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 +fib_unreg_unicast_test() +{ + echo + echo "Single path route test" - ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null - check_err $? + setup - ip netns exec testns ip link del dev dummy0 - check_err $? + echo " Start point" + ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null + log_test $? 0 "IPv6 fibmatch" - ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null - check_fail $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null - check_fail $? + set -e + ip -netns testns link del dev dummy0 + set +e - ip netns del testns + echo " Nexthop device deleted" + ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null + log_test $? 2 "IPv4 fibmatch - no route" + ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null + log_test $? 2 "IPv6 fibmatch - no route" - if [ $ret -ne 0 ]; then - echo "FAIL: unicast route test" - return 1 - fi - echo "PASS: unicast route test" + cleanup } fib_unreg_multipath_test() { - ret=0 - - netns_create "testns" - ip netns exec testns ip link add dummy0 type dummy - ip netns exec testns ip link set dev dummy0 up + echo + echo "Multipath route test" - ip netns exec testns ip link add dummy1 type dummy - ip netns exec testns ip link set dev dummy1 up + setup - ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 - ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + set -e + ip -netns testns link add dummy1 type dummy + ip -netns testns link set dev dummy1 up + ip -netns testns address add 192.0.2.1/24 dev dummy1 + ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1 - ip netns exec testns ip address add 192.0.2.1/24 dev dummy1 - ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1 - - ip netns exec testns ip route add 203.0.113.0/24 \ + ip -netns testns route add 203.0.113.0/24 \ nexthop via 198.51.100.2 dev dummy0 \ nexthop via 192.0.2.2 dev dummy1 - ip netns exec testns ip -6 route add 2001:db8:3::/64 \ + ip -netns testns -6 route add 2001:db8:3::/64 \ nexthop via 2001:db8:1::2 dev dummy0 \ nexthop via 2001:db8:2::2 dev dummy1 + set +e + + echo " Start point" + ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null + log_test $? 0 "IPv6 fibmatch" - ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null - check_err $? + set -e + ip -netns testns link del dev dummy0 + set +e - ip netns exec testns ip link del dev dummy0 - check_err $? + echo " One nexthop device deleted" + ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null + log_test $? 2 "IPv4 - multipath route removed on delete" - ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null - check_fail $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null + ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null # In IPv6 we do not flush the entire multipath route. - check_err $? + log_test $? 0 "IPv6 - multipath down to single path" - ip netns exec testns ip link del dev dummy1 + set -e + ip -netns testns link del dev dummy1 + set +e - ip netns del testns + echo " Second nexthop device deleted" + ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null + log_test $? 2 "IPv6 - no route" - if [ $ret -ne 0 ]; then - echo "FAIL: multipath route test" - return 1 - fi - echo "PASS: multipath route test" + cleanup } fib_unreg_test() { - echo "Running netdev unregister tests" - fib_unreg_unicast_test fib_unreg_multipath_test } fib_down_unicast_test() { - ret=0 - - netns_create "testns" - - ip netns exec testns ip link add dummy0 type dummy - ip netns exec testns ip link set dev dummy0 up - - ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 - ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + echo + echo "Single path, admin down" - ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null - check_err $? + setup - ip netns exec testns ip link set dev dummy0 down - check_err $? + echo " Start point" + ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null + log_test $? 0 "IPv6 fibmatch" - ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null - check_fail $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null - check_fail $? + set -e + ip -netns testns link set dev dummy0 down + set +e - ip netns exec testns ip link del dev dummy0 + echo " Route deleted on down" + ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null + log_test $? 2 "IPv4 fibmatch" + ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null + log_test $? 2 "IPv6 fibmatch" - ip netns del testns - - if [ $ret -ne 0 ]; then - echo "FAIL: unicast route test" - return 1 - fi - echo "PASS: unicast route test" + cleanup } fib_down_multipath_test_do() @@ -161,242 +162,229 @@ fib_down_multipath_test_do() local down_dev=$1 local up_dev=$2 - ip netns exec testns ip route get fibmatch 203.0.113.1 \ + ip -netns testns route get fibmatch 203.0.113.1 \ oif $down_dev &> /dev/null - check_fail $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \ + log_test $? 2 "IPv4 fibmatch on down device" + ip -netns testns -6 route get fibmatch 2001:db8:3::1 \ oif $down_dev &> /dev/null - check_fail $? + log_test $? 2 "IPv6 fibmatch on down device" - ip netns exec testns ip route get fibmatch 203.0.113.1 \ + ip -netns testns route get fibmatch 203.0.113.1 \ oif $up_dev &> /dev/null - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \ + log_test $? 0 "IPv4 fibmatch on up device" + ip -netns testns -6 route get fibmatch 2001:db8:3::1 \ oif $up_dev &> /dev/null - check_err $? + log_test $? 0 "IPv6 fibmatch on up device" - ip netns exec testns ip route get fibmatch 203.0.113.1 | \ + ip -netns testns route get fibmatch 203.0.113.1 | \ grep $down_dev | grep -q "dead linkdown" - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \ + log_test $? 0 "IPv4 flags on down device" + ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \ grep $down_dev | grep -q "dead linkdown" - check_err $? + log_test $? 0 "IPv6 flags on down device" - ip netns exec testns ip route get fibmatch 203.0.113.1 | \ + ip -netns testns route get fibmatch 203.0.113.1 | \ grep $up_dev | grep -q "dead linkdown" - check_fail $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \ + log_test $? 1 "IPv4 flags on up device" + ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \ grep $up_dev | grep -q "dead linkdown" - check_fail $? + log_test $? 1 "IPv6 flags on up device" } fib_down_multipath_test() { - ret=0 + echo + echo "Admin down multipath" - netns_create "testns" + setup - ip netns exec testns ip link add dummy0 type dummy - ip netns exec testns ip link set dev dummy0 up + set -e + ip -netns testns link add dummy1 type dummy + ip -netns testns link set dev dummy1 up - ip netns exec testns ip link add dummy1 type dummy - ip netns exec testns ip link set dev dummy1 up + ip -netns testns address add 192.0.2.1/24 dev dummy1 + ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1 - ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 - ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 - - ip netns exec testns ip address add 192.0.2.1/24 dev dummy1 - ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1 - - ip netns exec testns ip route add 203.0.113.0/24 \ + ip -netns testns route add 203.0.113.0/24 \ nexthop via 198.51.100.2 dev dummy0 \ nexthop via 192.0.2.2 dev dummy1 - ip netns exec testns ip -6 route add 2001:db8:3::/64 \ + ip -netns testns -6 route add 2001:db8:3::/64 \ nexthop via 2001:db8:1::2 dev dummy0 \ nexthop via 2001:db8:2::2 dev dummy1 + set +e + + echo " Verify start point" + ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null + log_test $? 0 "IPv4 fibmatch" - ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null - check_err $? + ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null + log_test $? 0 "IPv6 fibmatch" - ip netns exec testns ip link set dev dummy0 down - check_err $? + set -e + ip -netns testns link set dev dummy0 down + set +e + echo " One device down, one up" fib_down_multipath_test_do "dummy0" "dummy1" - ip netns exec testns ip link set dev dummy0 up - check_err $? - ip netns exec testns ip link set dev dummy1 down - check_err $? + set -e + ip -netns testns link set dev dummy0 up + ip -netns testns link set dev dummy1 down + set +e + echo " Other device down and up" fib_down_multipath_test_do "dummy1" "dummy0" - ip netns exec testns ip link set dev dummy0 down - check_err $? + set -e + ip -netns testns link set dev dummy0 down + set +e - ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null - check_fail $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null - check_fail $? + echo " Both devices down" + ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null + log_test $? 2 "IPv4 fibmatch" + ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null + log_test $? 2 "IPv6 fibmatch" - ip netns exec testns ip link del dev dummy1 - ip netns exec testns ip link del dev dummy0 - - ip netns del testns - - if [ $ret -ne 0 ]; then - echo "FAIL: multipath route test" - return 1 - fi - echo "PASS: multipath route test" + ip -netns testns link del dev dummy1 + cleanup } fib_down_test() { - echo "Running netdev down tests" - fib_down_unicast_test fib_down_multipath_test } +# Local routes should not be affected when carrier changes. fib_carrier_local_test() { - ret=0 + echo + echo "Local carrier tests - single path" - # Local routes should not be affected when carrier changes. - netns_create "testns" + setup - ip netns exec testns ip link add dummy0 type dummy - ip netns exec testns ip link set dev dummy0 up + set -e + ip -netns testns link set dev dummy0 carrier on + set +e - ip netns exec testns ip link set dev dummy0 carrier on + echo " Start point" + ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null + log_test $? 0 "IPv6 fibmatch" - ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 - ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 - - ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null - check_err $? - - ip netns exec testns ip route get fibmatch 198.51.100.1 | \ + ip -netns testns route get fibmatch 198.51.100.1 | \ grep -q "linkdown" - check_fail $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \ + log_test $? 1 "IPv4 - no linkdown flag" + ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \ grep -q "linkdown" - check_fail $? + log_test $? 1 "IPv6 - no linkdown flag" - ip netns exec testns ip link set dev dummy0 carrier off + set -e + ip -netns testns link set dev dummy0 carrier off + sleep 1 + set +e - ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null - check_err $? + echo " Carrier off on nexthop" + ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null + log_test $? 0 "IPv6 fibmatch" - ip netns exec testns ip route get fibmatch 198.51.100.1 | \ + ip -netns testns route get fibmatch 198.51.100.1 | \ grep -q "linkdown" - check_fail $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \ + log_test $? 1 "IPv4 - linkdown flag set" + ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \ grep -q "linkdown" - check_fail $? + log_test $? 1 "IPv6 - linkdown flag set" - ip netns exec testns ip address add 192.0.2.1/24 dev dummy0 - ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0 + set -e + ip -netns testns address add 192.0.2.1/24 dev dummy0 + ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0 + set +e - ip netns exec testns ip route get fibmatch 192.0.2.1 &> /dev/null - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 &> /dev/null - check_err $? + echo " Route to local address with carrier down" + ip -netns testns route get fibmatch 192.0.2.1 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + ip -netns testns -6 route get fibmatch 2001:db8:2::1 &> /dev/null + log_test $? 0 "IPv6 fibmatch" - ip netns exec testns ip route get fibmatch 192.0.2.1 | \ + ip -netns testns route get fibmatch 192.0.2.1 | \ grep -q "linkdown" - check_fail $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 | \ + log_test $? 1 "IPv4 linkdown flag set" + ip -netns testns -6 route get fibmatch 2001:db8:2::1 | \ grep -q "linkdown" - check_fail $? + log_test $? 1 "IPv6 linkdown flag set" - ip netns exec testns ip link del dev dummy0 - - ip netns del testns - - if [ $ret -ne 0 ]; then - echo "FAIL: local route carrier test" - return 1 - fi - echo "PASS: local route carrier test" + cleanup } fib_carrier_unicast_test() { ret=0 - netns_create "testns" - - ip netns exec testns ip link add dummy0 type dummy - ip netns exec testns ip link set dev dummy0 up + echo + echo "Single path route carrier test" - ip netns exec testns ip link set dev dummy0 carrier on + setup - ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 - ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + set -e + ip -netns testns link set dev dummy0 carrier on + set +e - ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null - check_err $? + echo " Start point" + ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null + log_test $? 0 "IPv6 fibmatch" - ip netns exec testns ip route get fibmatch 198.51.100.2 | \ + ip -netns testns route get fibmatch 198.51.100.2 | \ grep -q "linkdown" - check_fail $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \ + log_test $? 1 "IPv4 no linkdown flag" + ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \ grep -q "linkdown" - check_fail $? + log_test $? 1 "IPv6 no linkdown flag" - ip netns exec testns ip link set dev dummy0 carrier off + set -e + ip -netns testns link set dev dummy0 carrier off + set +e - ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null - check_err $? + echo " Carrier down" + ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null + log_test $? 0 "IPv6 fibmatch" - ip netns exec testns ip route get fibmatch 198.51.100.2 | \ + ip -netns testns route get fibmatch 198.51.100.2 | \ grep -q "linkdown" - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \ + log_test $? 0 "IPv4 linkdown flag set" + ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \ grep -q "linkdown" - check_err $? + log_test $? 0 "IPv6 linkdown flag set" - ip netns exec testns ip address add 192.0.2.1/24 dev dummy0 - ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0 + set -e + ip -netns testns address add 192.0.2.1/24 dev dummy0 + ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0 + set +e - ip netns exec testns ip route get fibmatch 192.0.2.2 &> /dev/null - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 &> /dev/null - check_err $? + echo " Second address added with carrier down" + ip -netns testns route get fibmatch 192.0.2.2 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + ip -netns testns -6 route get fibmatch 2001:db8:2::2 &> /dev/null + log_test $? 0 "IPv6 fibmatch" - ip netns exec testns ip route get fibmatch 192.0.2.2 | \ + ip -netns testns route get fibmatch 192.0.2.2 | \ grep -q "linkdown" - check_err $? - ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 | \ + log_test $? 0 "IPv4 linkdown flag set" + ip -netns testns -6 route get fibmatch 2001:db8:2::2 | \ grep -q "linkdown" - check_err $? + log_test $? 0 "IPv6 linkdown flag set" - ip netns exec testns ip link del dev dummy0 - - ip netns del testns - - if [ $ret -ne 0 ]; then - echo "FAIL: unicast route carrier test" - return 1 - fi - echo "PASS: unicast route carrier test" + cleanup } fib_carrier_test() { - echo "Running netdev carrier change tests" - fib_carrier_local_test fib_carrier_unicast_test } @@ -424,6 +412,9 @@ if [ $? -ne 0 ]; then exit 0 fi +# start clean +cleanup &> /dev/null + fib_test exit $ret diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index e11fe84de0fd..5cc2a53bb71c 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -14,6 +14,9 @@ * - SOCK_DGRAM * - SOCK_RAW * + * PF_RDS + * - SOCK_SEQPACKET + * * Start this program on two connected hosts, one in send mode and * the other with option '-r' to put it in receiver mode. * @@ -53,6 +56,7 @@ #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> +#include <linux/rds.h> #ifndef SO_EE_ORIGIN_ZEROCOPY #define SO_EE_ORIGIN_ZEROCOPY 5 @@ -164,17 +168,39 @@ static int do_accept(int fd) return fd; } -static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy) +static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie) +{ + struct cmsghdr *cm; + + if (!msg->msg_control) + error(1, errno, "NULL cookie"); + cm = (void *)msg->msg_control; + cm->cmsg_len = CMSG_LEN(sizeof(cookie)); + cm->cmsg_level = SOL_RDS; + cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE; + memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie)); +} + +static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain) { int ret, len, i, flags; + static uint32_t cookie; + char ckbuf[CMSG_SPACE(sizeof(cookie))]; len = 0; for (i = 0; i < msg->msg_iovlen; i++) len += msg->msg_iov[i].iov_len; flags = MSG_DONTWAIT; - if (do_zerocopy) + if (do_zerocopy) { flags |= MSG_ZEROCOPY; + if (domain == PF_RDS) { + memset(&msg->msg_control, 0, sizeof(msg->msg_control)); + msg->msg_controllen = CMSG_SPACE(sizeof(cookie)); + msg->msg_control = (struct cmsghdr *)ckbuf; + add_zcopy_cookie(msg, ++cookie); + } + } ret = sendmsg(fd, msg, flags); if (ret == -1 && errno == EAGAIN) @@ -190,6 +216,10 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy) if (do_zerocopy && ret) expected_completions++; } + if (do_zerocopy && domain == PF_RDS) { + msg->msg_control = NULL; + msg->msg_controllen = 0; + } return true; } @@ -216,7 +246,9 @@ static void do_sendmsg_corked(int fd, struct msghdr *msg) msg->msg_iov[0].iov_len = payload_len + extra_len; extra_len = 0; - do_sendmsg(fd, msg, do_zerocopy); + do_sendmsg(fd, msg, do_zerocopy, + (cfg_dst_addr.ss_family == AF_INET ? + PF_INET : PF_INET6)); } do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); @@ -300,13 +332,38 @@ static int do_setup_tx(int domain, int type, int protocol) if (cfg_zerocopy) do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1); - if (domain != PF_PACKET) + if (domain != PF_PACKET && domain != PF_RDS) if (connect(fd, (void *) &cfg_dst_addr, cfg_alen)) error(1, errno, "connect"); + if (domain == PF_RDS) { + if (bind(fd, (void *) &cfg_src_addr, cfg_alen)) + error(1, errno, "bind"); + } + return fd; } +static int do_process_zerocopy_cookies(struct sock_extended_err *serr, + uint32_t *ckbuf, size_t nbytes) +{ + int ncookies, i; + + if (serr->ee_errno != 0) + error(1, 0, "serr: wrong error code: %u", serr->ee_errno); + ncookies = serr->ee_data; + if (ncookies > SO_EE_ORIGIN_MAX_ZCOOKIES) + error(1, 0, "Returned %d cookies, max expected %d\n", + ncookies, SO_EE_ORIGIN_MAX_ZCOOKIES); + if (nbytes != ncookies * sizeof(uint32_t)) + error(1, 0, "Expected %d cookies, got %ld\n", + ncookies, nbytes/sizeof(uint32_t)); + for (i = 0; i < ncookies; i++) + if (cfg_verbose >= 2) + fprintf(stderr, "%d\n", ckbuf[i]); + return ncookies; +} + static bool do_recv_completion(int fd) { struct sock_extended_err *serr; @@ -315,10 +372,17 @@ static bool do_recv_completion(int fd) uint32_t hi, lo, range; int ret, zerocopy; char control[100]; + uint32_t ckbuf[SO_EE_ORIGIN_MAX_ZCOOKIES]; + struct iovec iov; msg.msg_control = control; msg.msg_controllen = sizeof(control); + iov.iov_base = ckbuf; + iov.iov_len = (SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(ckbuf[0])); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); if (ret == -1 && errno == EAGAIN) return false; @@ -337,6 +401,11 @@ static bool do_recv_completion(int fd) cm->cmsg_level, cm->cmsg_type); serr = (void *) CMSG_DATA(cm); + + if (serr->ee_origin == SO_EE_ORIGIN_ZCOOKIE) { + completions += do_process_zerocopy_cookies(serr, ckbuf, ret); + return true; + } if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) error(1, 0, "serr: wrong origin: %u", serr->ee_origin); if (serr->ee_errno != 0) @@ -444,6 +513,13 @@ static void do_tx(int domain, int type, int protocol) msg.msg_iovlen++; } + if (domain == PF_RDS) { + msg.msg_name = &cfg_dst_addr; + msg.msg_namelen = (cfg_dst_addr.ss_family == AF_INET ? + sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6)); + } + iov[2].iov_base = payload; iov[2].iov_len = cfg_payload_len; msg.msg_iovlen++; @@ -454,7 +530,7 @@ static void do_tx(int domain, int type, int protocol) if (cfg_cork) do_sendmsg_corked(fd, &msg); else - do_sendmsg(fd, &msg, cfg_zerocopy); + do_sendmsg(fd, &msg, cfg_zerocopy, domain); while (!do_poll(fd, POLLOUT)) { if (cfg_zerocopy) @@ -555,6 +631,40 @@ static void do_flush_datagram(int fd, int type) bytes += cfg_payload_len; } + +static void do_recvmsg(int fd) +{ + int ret, off = 0; + char *buf; + struct iovec iov; + struct msghdr msg; + struct sockaddr_storage din; + + buf = calloc(cfg_payload_len, sizeof(char)); + iov.iov_base = buf; + iov.iov_len = cfg_payload_len; + + memset(&msg, 0, sizeof(msg)); + msg.msg_name = &din; + msg.msg_namelen = sizeof(din); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ret = recvmsg(fd, &msg, MSG_TRUNC); + + if (ret == -1) + error(1, errno, "recv"); + if (ret != cfg_payload_len) + error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len); + + if (memcmp(buf + off, payload, ret)) + error(1, 0, "recv: data mismatch"); + + free(buf); + packets++; + bytes += cfg_payload_len; +} + static void do_rx(int domain, int type, int protocol) { uint64_t tstop; @@ -566,6 +676,8 @@ static void do_rx(int domain, int type, int protocol) do { if (type == SOCK_STREAM) do_flush_tcp(fd); + else if (domain == PF_RDS) + do_recvmsg(fd); else do_flush_datagram(fd, type); @@ -610,6 +722,7 @@ static void parse_opts(int argc, char **argv) 40 /* max tcp options */; int c; char *daddr = NULL, *saddr = NULL; + char *cfg_test; cfg_payload_len = max_payload_len; @@ -667,6 +780,14 @@ static void parse_opts(int argc, char **argv) break; } } + + cfg_test = argv[argc - 1]; + if (strcmp(cfg_test, "rds") == 0) { + if (!daddr) + error(1, 0, "-D <server addr> required for PF_RDS\n"); + if (!cfg_rx && !saddr) + error(1, 0, "-S <client addr> required for PF_RDS\n"); + } setup_sockaddr(cfg_family, daddr, &cfg_dst_addr); setup_sockaddr(cfg_family, saddr, &cfg_src_addr); @@ -699,6 +820,8 @@ int main(int argc, char **argv) do_test(cfg_family, SOCK_STREAM, 0); else if (!strcmp(cfg_test, "udp")) do_test(cfg_family, SOCK_DGRAM, 0); + else if (!strcmp(cfg_test, "rds")) + do_test(PF_RDS, SOCK_SEQPACKET, 0); else error(1, 0, "unknown cfg_test %s", cfg_test); diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 989f917068d1..d4346b16b2c1 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -128,6 +128,8 @@ static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id) static void sock_fanout_set_ebpf(int fd) { + static char log_buf[65536]; + const int len_off = __builtin_offsetof(struct __sk_buff, len); struct bpf_insn prog[] = { { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 }, @@ -140,7 +142,6 @@ static void sock_fanout_set_ebpf(int fd) { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 }, { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } }; - char log_buf[512]; union bpf_attr attr; int pfd; diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README index 970ff294fec8..3a0336782d2d 100644 --- a/tools/testing/selftests/tc-testing/README +++ b/tools/testing/selftests/tc-testing/README @@ -14,11 +14,11 @@ REQUIREMENTS * The kernel must have network namespace support -* The kernel must have veth support available, as a veth pair is created +* The kernel must have veth support available, as a veth pair is created prior to running the tests. -* All tc-related features must be built in or available as modules. - To check what is required in current setup run: +* All tc-related features being tested must be built in or available as + modules. To check what is required in current setup run: ./tdc.py -c Note: @@ -44,10 +44,13 @@ using the -p option when running tdc: RUNNING TDC ----------- -To use tdc, root privileges are required. tdc will not run otherwise. +To use tdc, root privileges are required. This is because the +commands being tested must be run as root. The code that enforces +execution by root uid has been moved into a plugin (see PLUGIN +ARCHITECTURE, below). -All tests are executed inside a network namespace to prevent conflicts -within the host. +If nsPlugin is linked, all tests are executed inside a network +namespace to prevent conflicts within the host. Running tdc without any arguments will run all tests. Refer to the section on command line arguments for more information, or run: @@ -59,6 +62,33 @@ output captured from the failing test will be printed immediately following the failed test in the TAP output. +OVERVIEW OF TDC EXECUTION +------------------------- + +One run of tests is considered a "test suite" (this will be refined in the +future). A test suite has one or more test cases in it. + +A test case has four stages: + + - setup + - execute + - verify + - teardown + +The setup and teardown stages can run zero or more commands. The setup +stage does some setup if the test needs it. The teardown stage undoes +the setup and returns the system to a "neutral" state so any other test +can be run next. These two stages require any commands run to return +success, but do not otherwise verify the results. + +The execute and verify stages each run one command. The execute stage +tests the return code against one or more acceptable values. The +verify stage checks the return code for success, and also compares +the stdout with a regular expression. + +Each of the commands in any stage will run in a shell instance. + + USER-DEFINED CONSTANTS ---------------------- @@ -70,23 +100,132 @@ executed as part of the test. More will be added as test cases require. Example: $TC qdisc add dev $DEV1 ingress +The NAMES values are used to substitute into the commands in the test cases. + COMMAND LINE ARGUMENTS ---------------------- Run tdc.py -h to see the full list of available arguments. --p PATH Specify the tc executable located at PATH to be used on this - test run --c Show the available test case categories in this test file --c CATEGORY Run only tests that belong to CATEGORY --f FILE Read test cases from the JSON file named FILE --l [CATEGORY] List all test cases in the JSON file. If CATEGORY is - specified, list test cases matching that category. --s ID Show the test case matching ID --e ID Execute the test case identified by ID --i Generate unique ID numbers for test cases with no existing - ID number +usage: tdc.py [-h] [-p PATH] [-D DIR [DIR ...]] [-f FILE [FILE ...]] + [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v] + [-d DEVICE] [-n NS] [-V] + +Linux TC unit tests + +optional arguments: + -h, --help show this help message and exit + -p PATH, --path PATH The full path to the tc executable to use + -v, --verbose Show the commands that are being run + -d DEVICE, --device DEVICE + Execute the test case in flower category + +selection: + select which test cases: files plus directories; filtered by categories + plus testids + + -D DIR [DIR ...], --directory DIR [DIR ...] + Collect tests from the specified directory(ies) + (default [tc-tests]) + -f FILE [FILE ...], --file FILE [FILE ...] + Run tests from the specified file(s) + -c [CATG [CATG ...]], --category [CATG [CATG ...]] + Run tests only from the specified category/ies, or if + no category/ies is/are specified, list known + categories. + -e ID [ID ...], --execute ID [ID ...] + Execute the specified test cases with specified IDs + +action: + select action to perform on selected test cases + + -l, --list List all test cases, or those only within the + specified category + -s, --show Display the selected test cases + -i, --id Generate ID numbers for new test cases + +netns: + options for nsPlugin(run commands in net namespace) + + -n NS, --namespace NS + Run commands in namespace NS + +valgrind: + options for valgrindPlugin (run command under test under Valgrind) + + -V, --valgrind Run commands under valgrind + + +PLUGIN ARCHITECTURE +------------------- + +There is now a plugin architecture, and some of the functionality that +was in the tdc.py script has been moved into the plugins. + +The plugins are in the directory plugin-lib. The are executed from +directory plugins. Put symbolic links from plugins to plugin-lib, +and name them according to the order you want them to run. + +Example: + +bjb@bee:~/work/tc-testing$ ls -l plugins +total 4 +lrwxrwxrwx 1 bjb bjb 27 Oct 4 16:12 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py +lrwxrwxrwx 1 bjb bjb 25 Oct 12 17:55 20-nsPlugin.py -> ../plugin-lib/nsPlugin.py +-rwxr-xr-x 1 bjb bjb 0 Sep 29 15:56 __init__.py + +The plugins are a subclass of TdcPlugin, defined in TdcPlugin.py and +must be called "SubPlugin" so tdc can find them. They are +distinguished from each other in the python program by their module +name. + +This base class supplies "hooks" to run extra functions. These hooks are as follows: + +pre- and post-suite +pre- and post-case +pre- and post-execute stage +adjust-command (runs in all stages and receives the stage name) + +The pre-suite hook receives the number of tests and an array of test ids. +This allows you to dump out the list of skipped tests in the event of a +failure during setup or teardown stage. + +The pre-case hook receives the ordinal number and test id of the current test. + +The adjust-command hook receives the stage id (see list below) and the +full command to be executed. This allows for last-minute adjustment +of the command. + +The stages are identified by the following strings: + + - pre (pre-suite) + - setup + - command + - verify + - teardown + - post (post-suite) + + +To write a plugin, you need to inherit from TdcPlugin in +TdcPlugin.py. To use the plugin, you have to put the +implementation file in plugin-lib, and add a symbolic link to it from +plugins. It will be detected at run time and invoked at the +appropriate times. There are a few examples in the plugin-lib +directory: + + - rootPlugin.py: + implements the enforcement of running as root + - nsPlugin.py: + sets up a network namespace and runs all commands in that namespace + - valgrindPlugin.py + runs each command in the execute stage under valgrind, + and checks for leaks. + This plugin will output an extra test for each test in the test file, + one is the existing output as to whether the test passed or failed, + and the other is a test whether the command leaked memory or not. + (This one is a preliminary version, it may not work quite right yet, + but the overall template is there and it should only need tweaks.) ACKNOWLEDGEMENTS diff --git a/tools/testing/selftests/tc-testing/TODO.txt b/tools/testing/selftests/tc-testing/TODO.txt index 6a266d811a78..c40698557e2f 100644 --- a/tools/testing/selftests/tc-testing/TODO.txt +++ b/tools/testing/selftests/tc-testing/TODO.txt @@ -5,6 +5,27 @@ tc Testing Suite To-Do list: - Add support for multiple versions of tc to run successively -- Improve error messages when tdc aborts its run +- Improve error messages when tdc aborts its run. Partially done - still + need to better handle problems in pre- and post-suite. -- Allow tdc to write its results to file +- Use python logger module for debug/verbose output + +- Allow tdc to write its results to file. + Maybe use python logger module for this too. + +- A better implementation of the "hooks". Currently, every plugin + will attempt to run a function at every hook point. Could be + changed so that plugin __init__ methods will register functions to + be run in the various predefined times. Then if a plugin does not + require action at a specific point, no penalty will be paid for + trying to run a function that will do nothing. + +- Proper exception handling - make an exception class and use it + +- a TestCase class, for easier testcase handling, searching, comparison + +- a TestSuite class + and a way to configure a test suite, + to automate running multiple "test suites" with different requirements + +- super simple test case example using ls, touch, etc diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py new file mode 100644 index 000000000000..3ee9a6dacb52 --- /dev/null +++ b/tools/testing/selftests/tc-testing/TdcPlugin.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +class TdcPlugin: + def __init__(self): + super().__init__() + print(' -- {}.__init__'.format(self.sub_class)) + + def pre_suite(self, testcount, testidlist): + '''run commands before test_runner goes into a test loop''' + self.testcount = testcount + self.testidlist = testidlist + if self.args.verbose > 1: + print(' -- {}.pre_suite'.format(self.sub_class)) + + def post_suite(self, index): + '''run commands after test_runner completes the test loop + index is the last ordinal number of test that was attempted''' + if self.args.verbose > 1: + print(' -- {}.post_suite'.format(self.sub_class)) + + def pre_case(self, test_ordinal, testid): + '''run commands before test_runner does one test''' + if self.args.verbose > 1: + print(' -- {}.pre_case'.format(self.sub_class)) + self.args.testid = testid + self.args.test_ordinal = test_ordinal + + def post_case(self): + '''run commands after test_runner does one test''' + if self.args.verbose > 1: + print(' -- {}.post_case'.format(self.sub_class)) + + def pre_execute(self): + '''run command before test-runner does the execute step''' + if self.args.verbose > 1: + print(' -- {}.pre_execute'.format(self.sub_class)) + + def post_execute(self): + '''run command after test-runner does the execute step''' + if self.args.verbose > 1: + print(' -- {}.post_execute'.format(self.sub_class)) + + def adjust_command(self, stage, command): + '''adjust the command''' + if self.args.verbose > 1: + print(' -- {}.adjust_command {}'.format(self.sub_class, stage)) + + # if stage == 'pre': + # pass + # elif stage == 'setup': + # pass + # elif stage == 'execute': + # pass + # elif stage == 'verify': + # pass + # elif stage == 'teardown': + # pass + # elif stage == 'post': + # pass + # else: + # pass + + return command + + def add_args(self, parser): + '''Get the plugin args from the command line''' + self.argparser = parser + return self.argparser + + def check_args(self, args, remaining): + '''Check that the args are set correctly''' + self.args = args + if self.args.verbose > 1: + print(' -- {}.check_args'.format(self.sub_class)) diff --git a/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt new file mode 100644 index 000000000000..c18f88d09360 --- /dev/null +++ b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt @@ -0,0 +1,104 @@ +tdc - Adding plugins for tdc + +Author: Brenda J. Butler - bjb@mojatatu.com + +ADDING PLUGINS +-------------- + +A new plugin should be written in python as a class that inherits from TdcPlugin. +There are some examples in plugin-lib. + +The plugin can be used to add functionality to the test framework, +such as: + +- adding commands to be run before and/or after the test suite +- adding commands to be run before and/or after the test cases +- adding commands to be run before and/or after the execute phase of the test cases +- ability to alter the command to be run in any phase: + pre (the pre-suite stage) + prepare + execute + verify + teardown + post (the post-suite stage) +- ability to add to the command line args, and use them at run time + + +The functions in the class should follow the following interfaces: + + def __init__(self) + def pre_suite(self, testcount, testidlist) # see "PRE_SUITE" below + def post_suite(self, ordinal) # see "SKIPPING" below + def pre_case(self, test_ordinal, testid) # see "PRE_CASE" below + def post_case(self) + def pre_execute(self) + def post_execute(self) + def adjust_command(self, stage, command) # see "ADJUST" below + def add_args(self, parser) # see "ADD_ARGS" below + def check_args(self, args, remaining) # see "CHECK_ARGS" below + + +PRE_SUITE + +This method takes a testcount (number of tests to be run) and +testidlist (array of test ids for tests that will be run). This is +useful for various things, including when an exception occurs and the +rest of the tests must be skipped. The info is stored in the object, +and the post_suite method can refer to it when dumping the "skipped" +TAP output. The tdc.py script will do that for the test suite as +defined in the test case, but if the plugin is being used to run extra +tests on each test (eg, check for memory leaks on associated +co-processes) then that other tap output can be generated in the +post-suite method using this info passed in to the pre_suite method. + + +SKIPPING + +The post_suite method will receive the ordinal number of the last +test to be attempted. It can use this info when outputting +the TAP output for the extra test cases. + + +PRE_CASE + +The pre_case method will receive the ordinal number of the test +and the test id. Useful for outputing the extra test results. + + +ADJUST + +The adjust_command method receives a string representing +the execution stage and a string which is the actual command to be +executed. The plugin can adjust the command, based on the stage of +execution. + +The stages are represented by the following strings: + + 'pre' + 'setup' + 'command' + 'verify' + 'teardown' + 'post' + +The adjust_command method must return the adjusted command so tdc +can use it. + + +ADD_ARGS + +The add_args method receives the argparser object and can add +arguments to it. Care should be taken that the new arguments do not +conflict with any from tdc.py or from other plugins that will be used +concurrently. + +The add_args method should return the argparser object. + + +CHECK_ARGS + +The check_args method is so that the plugin can do validation on +the args, if needed. If there is a problem, and Exception should +be raised, with a string that explains the problem. + +eg: raise Exception('plugin xxx, arg -y is wrong, fix it') diff --git a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt index 00438331ba47..17b267dedbd9 100644 --- a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt +++ b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt @@ -12,14 +12,18 @@ template.json for the required JSON format for test cases. Include the 'id' field, but do not assign a value. Running tdc with the -i option will generate a unique ID for that test case. -tdc will recursively search the 'tc' subdirectory for .json files. Any -test case files you create in these directories will automatically be included. -If you wish to store your custom test cases elsewhere, be sure to run tdc -with the -f argument and the path to your file. +tdc will recursively search the 'tc-tests' subdirectory (or the +directories named with the -D option) for .json files. Any test case +files you create in these directories will automatically be included. +If you wish to store your custom test cases elsewhere, be sure to run +tdc with the -f argument and the path to your file, or the -D argument +and the path to your directory(ies). -Be aware of required escape characters in the JSON data - particularly when -defining the match pattern. Refer to the tctests.json file for examples when -in doubt. +Be aware of required escape characters in the JSON data - particularly +when defining the match pattern. Refer to the supplied json test files +for examples when in doubt. The match pattern is written in json, and +will be used by python. So the match pattern will be a python regular +expression, but should be written using json syntax. TEST CASE STRUCTURE @@ -69,7 +73,8 @@ SETUP/TEARDOWN ERRORS If an error is detected during the setup/teardown process, execution of the tests will immediately stop with an error message and the namespace in which the tests are run will be destroyed. This is to prevent inaccurate results -in the test cases. +in the test cases. tdc will output a series of TAP results for the skipped +tests. Repeated failures of the setup/teardown may indicate a problem with the test case, or possibly even a bug in one of the commands that are not being tested. @@ -79,3 +84,17 @@ so that it doesn't halt the script for an error that doesn't matter. Turn the individual command into a list, with the command being first, followed by all acceptable exit codes for the command. +Example: + +A pair of setup commands. The first can have exit code 0, 1 or 255, the +second must have exit code 0. + + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action reclassify index 65536" + ], diff --git a/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS new file mode 100644 index 000000000000..aa8a2669702b --- /dev/null +++ b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS @@ -0,0 +1,27 @@ +tdc.py will look for plugins in a directory plugins off the cwd. +Make a set of numbered symbolic links from there to the actual plugins. +Eg: + +tdc.py +plugin-lib/ +plugins/ + __init__.py + 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py + 20-valgrindPlugin.py -> ../plugin-lib/valgrindPlugin.py + 30-nsPlugin.py -> ../plugin-lib/nsPlugin.py + + +tdc.py will find them and use them. + + +rootPlugin + Check if the uid is root. If not, bail out. + +valgrindPlugin + Run the command under test with valgrind, and produce an extra set of TAP results for the memory tests. + This plugin will write files to the cwd, called vgnd-xxx.log. These will contain + the valgrind output for test xxx. Any file matching the glob 'vgnd-*.log' will be + deleted at the end of the run. + +nsPlugin + Run all the commands in a network namespace. diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py new file mode 100644 index 000000000000..a194b1af2b30 --- /dev/null +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -0,0 +1,141 @@ +import os +import signal +from string import Template +import subprocess +import time +from TdcPlugin import TdcPlugin + +from tdc_config import * + +class SubPlugin(TdcPlugin): + def __init__(self): + self.sub_class = 'ns/SubPlugin' + super().__init__() + + def pre_suite(self, testcount, testidlist): + '''run commands before test_runner goes into a test loop''' + super().pre_suite(testcount, testidlist) + + if self.args.namespace: + self._ns_create() + + def post_suite(self, index): + '''run commands after test_runner goes into a test loop''' + super().post_suite(index) + if self.args.verbose: + print('{}.post_suite'.format(self.sub_class)) + + if self.args.namespace: + self._ns_destroy() + + def add_args(self, parser): + super().add_args(parser) + self.argparser_group = self.argparser.add_argument_group( + 'netns', + 'options for nsPlugin(run commands in net namespace)') + self.argparser_group.add_argument( + '-n', '--namespace', action='store_true', + help='Run commands in namespace') + return self.argparser + + def adjust_command(self, stage, command): + super().adjust_command(stage, command) + cmdform = 'list' + cmdlist = list() + + if not self.args.namespace: + return command + + if self.args.verbose: + print('{}.adjust_command'.format(self.sub_class)) + + if not isinstance(command, list): + cmdform = 'str' + cmdlist = command.split() + else: + cmdlist = command + if stage == 'setup' or stage == 'execute' or stage == 'verify' or stage == 'teardown': + if self.args.verbose: + print('adjust_command: stage is {}; inserting netns stuff in command [{}] list [{}]'.format(stage, command, cmdlist)) + cmdlist.insert(0, self.args.NAMES['NS']) + cmdlist.insert(0, 'exec') + cmdlist.insert(0, 'netns') + cmdlist.insert(0, 'ip') + else: + pass + + if cmdform == 'str': + command = ' '.join(cmdlist) + else: + command = cmdlist + + if self.args.verbose: + print('adjust_command: return command [{}]'.format(command)) + return command + + def _ns_create(self): + ''' + Create the network namespace in which the tests will be run and set up + the required network devices for it. + ''' + if self.args.namespace: + cmd = 'ip netns add {}'.format(self.args.NAMES['NS']) + self._exec_cmd('pre', cmd) + cmd = 'ip link add $DEV0 type veth peer name $DEV1' + self._exec_cmd('pre', cmd) + cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS']) + self._exec_cmd('pre', cmd) + cmd = 'ip link set $DEV0 up' + self._exec_cmd('pre', cmd) + cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS']) + self._exec_cmd('pre', cmd) + if self.args.device: + cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS']) + self._exec_cmd('pre', cmd) + cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS']) + self._exec_cmd('pre', cmd) + + def _ns_destroy(self): + ''' + Destroy the network namespace for testing (and any associated network + devices as well) + ''' + if self.args.namespace: + cmd = 'ip netns delete {}'.format(self.args.NAMES['NS']) + self._exec_cmd('post', cmd) + + def _exec_cmd(self, stage, command): + ''' + Perform any required modifications on an executable command, then run + it in a subprocess and return the results. + ''' + if '$' in command: + command = self._replace_keywords(command) + + self.adjust_command(stage, command) + if self.args.verbose: + print('_exec_cmd: command "{}"'.format(command)) + proc = subprocess.Popen(command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=ENVIR) + (rawout, serr) = proc.communicate() + + if proc.returncode != 0 and len(serr) > 0: + foutput = serr.decode("utf-8") + else: + foutput = rawout.decode("utf-8") + + proc.stdout.close() + proc.stderr.close() + return proc, foutput + + def _replace_keywords(self, cmd): + """ + For a given executable command, substitute any known + variables contained within NAMES with the correct values + """ + tcmd = Template(cmd) + subcmd = tcmd.safe_substitute(self.args.NAMES) + return subcmd diff --git a/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py new file mode 100644 index 000000000000..e36775bd4d12 --- /dev/null +++ b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py @@ -0,0 +1,19 @@ +import os +import sys +from TdcPlugin import TdcPlugin + +from tdc_config import * + + +class SubPlugin(TdcPlugin): + def __init__(self): + self.sub_class = 'root/SubPlugin' + super().__init__() + + def pre_suite(self, testcount, testidlist): + # run commands before test_runner goes into a test loop + super().pre_suite(testcount, testidlist) + + if os.geteuid(): + print('This script must be run with root privileges', file=sys.stderr) + exit(1) diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py new file mode 100644 index 000000000000..477a7bd7d7fb --- /dev/null +++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py @@ -0,0 +1,142 @@ +''' +run the command under test, under valgrind and collect memory leak info +as a separate test. +''' + + +import os +import re +import signal +from string import Template +import subprocess +import time +from TdcPlugin import TdcPlugin + +from tdc_config import * + +def vp_extract_num_from_string(num_as_string_maybe_with_commas): + return int(num_as_string_maybe_with_commas.replace(',','')) + +class SubPlugin(TdcPlugin): + def __init__(self): + self.sub_class = 'valgrind/SubPlugin' + self.tap = '' + super().__init__() + + def pre_suite(self, testcount, testidlist): + '''run commands before test_runner goes into a test loop''' + super().pre_suite(testcount, testidlist) + if self.args.verbose > 1: + print('{}.pre_suite'.format(self.sub_class)) + if self.args.valgrind: + self._add_to_tap('1..{}\n'.format(self.testcount)) + + def post_suite(self, index): + '''run commands after test_runner goes into a test loop''' + super().post_suite(index) + self._add_to_tap('\n|---\n') + if self.args.verbose > 1: + print('{}.post_suite'.format(self.sub_class)) + print('{}'.format(self.tap)) + if self.args.verbose < 4: + subprocess.check_output('rm -f vgnd-*.log', shell=True) + + def add_args(self, parser): + super().add_args(parser) + self.argparser_group = self.argparser.add_argument_group( + 'valgrind', + 'options for valgrindPlugin (run command under test under Valgrind)') + + self.argparser_group.add_argument( + '-V', '--valgrind', action='store_true', + help='Run commands under valgrind') + + return self.argparser + + def adjust_command(self, stage, command): + super().adjust_command(stage, command) + cmdform = 'list' + cmdlist = list() + + if not self.args.valgrind: + return command + + if self.args.verbose > 1: + print('{}.adjust_command'.format(self.sub_class)) + + if not isinstance(command, list): + cmdform = 'str' + cmdlist = command.split() + else: + cmdlist = command + + if stage == 'execute': + if self.args.verbose > 1: + print('adjust_command: stage is {}; inserting valgrind stuff in command [{}] list [{}]'. + format(stage, command, cmdlist)) + cmdlist.insert(0, '--track-origins=yes') + cmdlist.insert(0, '--show-leak-kinds=definite,indirect') + cmdlist.insert(0, '--leak-check=full') + cmdlist.insert(0, '--log-file=vgnd-{}.log'.format(self.args.testid)) + cmdlist.insert(0, '-v') # ask for summary of non-leak errors + cmdlist.insert(0, ENVIR['VALGRIND_BIN']) + else: + pass + + if cmdform == 'str': + command = ' '.join(cmdlist) + else: + command = cmdlist + + if self.args.verbose > 1: + print('adjust_command: return command [{}]'.format(command)) + return command + + def post_execute(self): + if not self.args.valgrind: + return + + self.definitely_lost_re = re.compile( + r'definitely lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\sblocks', re.MULTILINE | re.DOTALL) + self.indirectly_lost_re = re.compile( + r'indirectly lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL) + self.possibly_lost_re = re.compile( + r'possibly lost:\s+([,0-9]+)bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL) + self.non_leak_error_re = re.compile( + r'ERROR SUMMARY:\s+([,0-9]+) errors from\s+([,0-9]+)\s+contexts', re.MULTILINE | re.DOTALL) + + def_num = 0 + ind_num = 0 + pos_num = 0 + nle_num = 0 + + # what about concurrent test runs? Maybe force them to be in different directories? + with open('vgnd-{}.log'.format(self.args.testid)) as vfd: + content = vfd.read() + def_mo = self.definitely_lost_re.search(content) + ind_mo = self.indirectly_lost_re.search(content) + pos_mo = self.possibly_lost_re.search(content) + nle_mo = self.non_leak_error_re.search(content) + + if def_mo: + def_num = int(def_mo.group(2)) + if ind_mo: + ind_num = int(ind_mo.group(2)) + if pos_mo: + pos_num = int(pos_mo.group(2)) + if nle_mo: + nle_num = int(nle_mo.group(1)) + + mem_results = '' + if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0): + mem_results += 'not ' + + mem_results += 'ok {} - {}-mem # {}\n'.format( + self.args.test_ordinal, self.args.testid, 'memory leak check') + self._add_to_tap(mem_results) + if mem_results.startswith('not '): + print('{}'.format(content)) + self._add_to_tap(content) + + def _add_to_tap(self, more_tap_output): + self.tap += more_tap_output diff --git a/tools/testing/selftests/tc-testing/plugins/__init__.py b/tools/testing/selftests/tc-testing/plugins/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/testing/selftests/tc-testing/plugins/__init__.py diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index fc373fdf2bdc..b3754b9aa302 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -11,16 +11,88 @@ import re import os import sys import argparse +import importlib import json import subprocess +import time from collections import OrderedDict from string import Template from tdc_config import * from tdc_helper import * - -USE_NS = True +import TdcPlugin + +class PluginMgr: + def __init__(self, argparser): + super().__init__() + self.plugins = {} + self.plugin_instances = [] + self.args = [] + self.argparser = argparser + + # TODO, put plugins in order + plugindir = os.getenv('TDC_PLUGIN_DIR', './plugins') + for dirpath, dirnames, filenames in os.walk(plugindir): + for fn in filenames: + if (fn.endswith('.py') and + not fn == '__init__.py' and + not fn.startswith('#') and + not fn.startswith('.#')): + mn = fn[0:-3] + foo = importlib.import_module('plugins.' + mn) + self.plugins[mn] = foo + self.plugin_instances.append(foo.SubPlugin()) + + def call_pre_suite(self, testcount, testidlist): + for pgn_inst in self.plugin_instances: + pgn_inst.pre_suite(testcount, testidlist) + + def call_post_suite(self, index): + for pgn_inst in reversed(self.plugin_instances): + pgn_inst.post_suite(index) + + def call_pre_case(self, test_ordinal, testid): + for pgn_inst in self.plugin_instances: + try: + pgn_inst.pre_case(test_ordinal, testid) + except Exception as ee: + print('exception {} in call to pre_case for {} plugin'. + format(ee, pgn_inst.__class__)) + print('test_ordinal is {}'.format(test_ordinal)) + print('testid is {}'.format(testid)) + raise + + def call_post_case(self): + for pgn_inst in reversed(self.plugin_instances): + pgn_inst.post_case() + + def call_pre_execute(self): + for pgn_inst in self.plugin_instances: + pgn_inst.pre_execute() + + def call_post_execute(self): + for pgn_inst in reversed(self.plugin_instances): + pgn_inst.post_execute() + + def call_add_args(self, parser): + for pgn_inst in self.plugin_instances: + parser = pgn_inst.add_args(parser) + return parser + + def call_check_args(self, args, remaining): + for pgn_inst in self.plugin_instances: + pgn_inst.check_args(args, remaining) + + def call_adjust_command(self, stage, command): + for pgn_inst in self.plugin_instances: + command = pgn_inst.adjust_command(stage, command) + return command + + @staticmethod + def _make_argparser(args): + self.argparser = argparse.ArgumentParser( + description='Linux TC unit tests') def replace_keywords(cmd): @@ -33,21 +105,24 @@ def replace_keywords(cmd): return subcmd -def exec_cmd(command, nsonly=True): +def exec_cmd(args, pm, stage, command): """ Perform any required modifications on an executable command, then run it in a subprocess and return the results. """ - if (USE_NS and nsonly): - command = 'ip netns exec $NS ' + command - + if len(command.strip()) == 0: + return None, None if '$' in command: command = replace_keywords(command) + command = pm.call_adjust_command(stage, command) + if args.verbose > 0: + print('command "{}"'.format(command)) proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + stderr=subprocess.PIPE, + env=ENVIR) (rawout, serr) = proc.communicate() if proc.returncode != 0 and len(serr) > 0: @@ -60,36 +135,85 @@ def exec_cmd(command, nsonly=True): return proc, foutput -def prepare_env(cmdlist): +def prepare_env(args, pm, stage, prefix, cmdlist): """ - Execute the setup/teardown commands for a test case. Optionally - terminate test execution if the command fails. + Execute the setup/teardown commands for a test case. + Optionally terminate test execution if the command fails. """ + if args.verbose > 0: + print('{}'.format(prefix)) for cmdinfo in cmdlist: - if (type(cmdinfo) == list): + if isinstance(cmdinfo, list): exit_codes = cmdinfo[1:] cmd = cmdinfo[0] else: exit_codes = [0] cmd = cmdinfo - if (len(cmd) == 0): + if not cmd: continue - (proc, foutput) = exec_cmd(cmd) + (proc, foutput) = exec_cmd(args, pm, stage, cmd) + + if proc and (proc.returncode not in exit_codes): + print('', file=sys.stderr) + print("{} *** Could not execute: \"{}\"".format(prefix, cmd), + file=sys.stderr) + print("\n{} *** Error message: \"{}\"".format(prefix, foutput), + file=sys.stderr) + print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr) + print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr) + print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr) + raise Exception('"{}" did not complete successfully'.format(prefix)) + +def run_one_test(pm, args, index, tidx): + result = True + tresult = "" + tap = "" + if args.verbose > 0: + print("\t====================\n=====> ", end="") + print("Test " + tidx["id"] + ": " + tidx["name"]) + + pm.call_pre_case(index, tidx['id']) + prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"]) + + if (args.verbose > 0): + print('-----> execute stage') + pm.call_pre_execute() + (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"]) + exit_code = p.returncode + pm.call_post_execute() + + if (exit_code != int(tidx["expExitCode"])): + result = False + print("exit:", exit_code, int(tidx["expExitCode"])) + print(procout) + else: + if args.verbose > 0: + print('-----> verify stage') + match_pattern = re.compile( + str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE) + (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"]) + match_index = re.findall(match_pattern, procout) + if len(match_index) != int(tidx["matchCount"]): + result = False + + if not result: + tresult += 'not ' + tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name']) + tap += tresult + + if result == False: + tap += procout - if proc.returncode not in exit_codes: - print - print("Could not execute:") - print(cmd) - print("\nError message:") - print(foutput) - print("\nAborting test run.") - ns_destroy() - exit(1) + prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown']) + pm.call_post_case() + index += 1 + + return tap -def test_runner(filtered_tests, args): +def test_runner(pm, args, filtered_tests): """ Driver function for the unit tests. @@ -102,74 +226,39 @@ def test_runner(filtered_tests, args): tcount = len(testlist) index = 1 tap = str(index) + ".." + str(tcount) + "\n" + badtest = None + pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist]) + + if args.verbose > 1: + print('Run tests here') for tidx in testlist: - result = True - tresult = "" if "flower" in tidx["category"] and args.device == None: continue - print("Test " + tidx["id"] + ": " + tidx["name"]) - prepare_env(tidx["setup"]) - (p, procout) = exec_cmd(tidx["cmdUnderTest"]) - exit_code = p.returncode - - if (exit_code != int(tidx["expExitCode"])): - result = False - print("exit:", exit_code, int(tidx["expExitCode"])) - print(procout) - else: - match_pattern = re.compile(str(tidx["matchPattern"]), re.DOTALL) - (p, procout) = exec_cmd(tidx["verifyCmd"]) - match_index = re.findall(match_pattern, procout) - if len(match_index) != int(tidx["matchCount"]): - result = False - - if result == True: - tresult += "ok " - else: - tresult += "not ok " - tap += tresult + str(index) + " " + tidx["id"] + " " + tidx["name"] + "\n" - - if result == False: - tap += procout - - prepare_env(tidx["teardown"]) + try: + badtest = tidx # in case it goes bad + tap += run_one_test(pm, args, index, tidx) + except Exception as ee: + print('Exception {} (caught in test_runner, running test {} {} {})'. + format(ee, index, tidx['id'], tidx['name'])) + break index += 1 - return tap - + # if we failed in setup or teardown, + # fill in the remaining tests with not ok + count = index + tap += 'about to flush the tap output if tests need to be skipped\n' + if tcount + 1 != index: + for tidx in testlist[index - 1:]: + msg = 'skipped - previous setup or teardown failed' + tap += 'ok {} - {} # {} {} {}\n'.format( + count, tidx['id'], msg, index, badtest.get('id', '--Unknown--')) + count += 1 -def ns_create(): - """ - Create the network namespace in which the tests will be run and set up - the required network devices for it. - """ - if (USE_NS): - cmd = 'ip netns add $NS' - exec_cmd(cmd, False) - cmd = 'ip link add $DEV0 type veth peer name $DEV1' - exec_cmd(cmd, False) - cmd = 'ip link set $DEV1 netns $NS' - exec_cmd(cmd, False) - cmd = 'ip link set $DEV0 up' - exec_cmd(cmd, False) - cmd = 'ip -n $NS link set $DEV1 up' - exec_cmd(cmd, False) - cmd = 'ip link set $DEV2 netns $NS' - exec_cmd(cmd, False) - cmd = 'ip -n $NS link set $DEV2 up' - exec_cmd(cmd, False) - - -def ns_destroy(): - """ - Destroy the network namespace for testing (and any associated network - devices as well) - """ - if (USE_NS): - cmd = 'ip netns delete $NS' - exec_cmd(cmd, False) + tap += 'done flushing skipped test tap output\n' + pm.call_post_suite(index) + return tap def has_blank_ids(idlist): """ @@ -209,29 +298,50 @@ def set_args(parser): """ Set the command line arguments for tdc. """ - parser.add_argument('-p', '--path', type=str, - help='The full path to the tc executable to use') - parser.add_argument('-c', '--category', type=str, nargs='?', const='+c', - help='Run tests only from the specified category, or if no category is specified, list known categories.') - parser.add_argument('-f', '--file', type=str, - help='Run tests from the specified file') - parser.add_argument('-l', '--list', type=str, nargs='?', const="++", metavar='CATEGORY', - help='List all test cases, or those only within the specified category') - parser.add_argument('-s', '--show', type=str, nargs=1, metavar='ID', dest='showID', - help='Display the test case with specified id') - parser.add_argument('-e', '--execute', type=str, nargs=1, metavar='ID', - help='Execute the single test case with specified ID') - parser.add_argument('-i', '--id', action='store_true', dest='gen_id', - help='Generate ID numbers for new test cases') + parser.add_argument( + '-p', '--path', type=str, + help='The full path to the tc executable to use') + sg = parser.add_argument_group( + 'selection', 'select which test cases: ' + + 'files plus directories; filtered by categories plus testids') + ag = parser.add_argument_group( + 'action', 'select action to perform on selected test cases') + + sg.add_argument( + '-D', '--directory', nargs='+', metavar='DIR', + help='Collect tests from the specified directory(ies) ' + + '(default [tc-tests])') + sg.add_argument( + '-f', '--file', nargs='+', metavar='FILE', + help='Run tests from the specified file(s)') + sg.add_argument( + '-c', '--category', nargs='*', metavar='CATG', default=['+c'], + help='Run tests only from the specified category/ies, ' + + 'or if no category/ies is/are specified, list known categories.') + sg.add_argument( + '-e', '--execute', nargs='+', metavar='ID', + help='Execute the specified test cases with specified IDs') + ag.add_argument( + '-l', '--list', action='store_true', + help='List all test cases, or those only within the specified category') + ag.add_argument( + '-s', '--show', action='store_true', dest='showID', + help='Display the selected test cases') + ag.add_argument( + '-i', '--id', action='store_true', dest='gen_id', + help='Generate ID numbers for new test cases') + parser.add_argument( + '-v', '--verbose', action='count', default=0, + help='Show the commands that are being run') parser.add_argument('-d', '--device', help='Execute the test case in flower category') return parser -def check_default_settings(args): +def check_default_settings(args, remaining, pm): """ - Process any arguments overriding the default settings, and ensure the - settings are correct. + Process any arguments overriding the default settings, + and ensure the settings are correct. """ # Allow for overriding specific settings global NAMES @@ -244,6 +354,8 @@ def check_default_settings(args): print("The specified tc path " + NAMES['TC'] + " does not exist.") exit(1) + pm.call_check_args(args, remaining) + def get_id_list(alltests): """ @@ -300,40 +412,107 @@ def generate_case_ids(alltests): json.dump(testlist, outfile, indent=4) outfile.close() +def filter_tests_by_id(args, testlist): + ''' + Remove tests from testlist that are not in the named id list. + If id list is empty, return empty list. + ''' + newlist = list() + if testlist and args.execute: + target_ids = args.execute + + if isinstance(target_ids, list) and (len(target_ids) > 0): + newlist = list(filter(lambda x: x['id'] in target_ids, testlist)) + return newlist + +def filter_tests_by_category(args, testlist): + ''' + Remove tests from testlist that are not in a named category. + ''' + answer = list() + if args.category and testlist: + test_ids = list() + for catg in set(args.category): + if catg == '+c': + continue + print('considering category {}'.format(catg)) + for tc in testlist: + if catg in tc['category'] and tc['id'] not in test_ids: + answer.append(tc) + test_ids.append(tc['id']) + + return answer def get_test_cases(args): """ If a test case file is specified, retrieve tests from that file. Otherwise, glob for all json files in subdirectories and load from each one. + Also, if requested, filter by category, and add tests matching + certain ids. """ import fnmatch - if args.file != None: - if not os.path.isfile(args.file): - print("The specified test case file " + args.file + " does not exist.") - exit(1) - flist = [args.file] - else: - flist = [] - for root, dirnames, filenames in os.walk('tc-tests'): + + flist = [] + testdirs = ['tc-tests'] + + if args.file: + # at least one file was specified - remove the default directory + testdirs = [] + + for ff in args.file: + if not os.path.isfile(ff): + print("IGNORING file " + ff + "\n\tBECAUSE does not exist.") + else: + flist.append(os.path.abspath(ff)) + + if args.directory: + testdirs = args.directory + + for testdir in testdirs: + for root, dirnames, filenames in os.walk(testdir): for filename in fnmatch.filter(filenames, '*.json'): - flist.append(os.path.join(root, filename)) - alltests = list() + candidate = os.path.abspath(os.path.join(root, filename)) + if candidate not in testdirs: + flist.append(candidate) + + alltestcases = list() for casefile in flist: - alltests = alltests + (load_from_file(casefile)) - return alltests + alltestcases = alltestcases + (load_from_file(casefile)) + + allcatlist = get_test_categories(alltestcases) + allidlist = get_id_list(alltestcases) + + testcases_by_cats = get_categorized_testlist(alltestcases, allcatlist) + idtestcases = filter_tests_by_id(args, alltestcases) + cattestcases = filter_tests_by_category(args, alltestcases) + + cat_ids = [x['id'] for x in cattestcases] + if args.execute: + if args.category: + alltestcases = cattestcases + [x for x in idtestcases if x['id'] not in cat_ids] + else: + alltestcases = idtestcases + else: + if cat_ids: + alltestcases = cattestcases + else: + # just accept the existing value of alltestcases, + # which has been filtered by file/directory + pass + + return allcatlist, allidlist, testcases_by_cats, alltestcases -def set_operation_mode(args): +def set_operation_mode(pm, args): """ Load the test case data and process remaining arguments to determine what the script should do for this run, and call the appropriate function. """ - alltests = get_test_cases(args) + ucat, idlist, testcases, alltests = get_test_cases(args) if args.gen_id: - idlist = get_id_list(alltests) if (has_blank_ids(idlist)): alltests = generate_case_ids(alltests) else: @@ -347,70 +526,26 @@ def set_operation_mode(args): print("Please correct them before continuing.") exit(1) - ucat = get_test_categories(alltests) - if args.showID: - show_test_case_by_id(alltests, args.showID[0]) + for atest in alltests: + print_test_case(atest) exit(0) - if args.execute: - target_id = args.execute[0] - else: - target_id = "" - - if args.category: - if (args.category == '+c'): - print("Available categories:") - print_sll(ucat) - exit(0) - else: - target_category = args.category - else: - target_category = "" - - - testcases = get_categorized_testlist(alltests, ucat) + if isinstance(args.category, list) and (len(args.category) == 0): + print("Available categories:") + print_sll(ucat) + exit(0) if args.list: - if (args.list == "++"): + if args.list: list_test_cases(alltests) exit(0) - elif(len(args.list) > 0): - if (args.list not in ucat): - print("Unknown category " + args.list) - print("Available categories:") - print_sll(ucat) - exit(1) - list_test_cases(testcases[args.list]) - exit(0) - - if (os.geteuid() != 0): - print("This script must be run with root privileges.\n") - exit(1) - - ns_create() - - if (len(target_category) == 0): - if (len(target_id) > 0): - alltests = list(filter(lambda x: target_id in x['id'], alltests)) - if (len(alltests) == 0): - print("Cannot find a test case with ID matching " + target_id) - exit(1) - catresults = test_runner(alltests, args) - print("All test results: " + "\n\n" + catresults) - elif (len(target_category) > 0): - if (target_category == "flower") and args.device == None: - print("Please specify a NIC device (-d) to run category flower") - exit(1) - if (target_category not in ucat): - print("Specified category is not present in this file.") - exit(1) - else: - catresults = test_runner(testcases[target_category], args) - print("Category " + target_category + "\n\n" + catresults) - - ns_destroy() + if len(alltests): + catresults = test_runner(pm, args, alltests) + else: + catresults = 'No tests found\n' + print('All test results: \n\n{}'.format(catresults)) def main(): """ @@ -419,10 +554,15 @@ def main(): """ parser = args_parse() parser = set_args(parser) + pm = PluginMgr(parser) + parser = pm.call_add_args(parser) (args, remaining) = parser.parse_known_args() - check_default_settings(args) + args.NAMES = NAMES + check_default_settings(args, remaining, pm) + if args.verbose > 2: + print('args is {}'.format(args)) - set_operation_mode(args) + set_operation_mode(pm, args) exit(0) diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py index db381120a566..9f35c96c88a0 100644 --- a/tools/testing/selftests/tc-testing/tdc_helper.py +++ b/tools/testing/selftests/tc-testing/tdc_helper.py @@ -57,20 +57,11 @@ def print_sll(items): def print_test_case(tcase): """ Pretty-printing of a given test case. """ + print('\n==============\nTest {}\t{}\n'.format(tcase['id'], tcase['name'])) for k in tcase.keys(): if (isinstance(tcase[k], list)): print(k + ":") print_list(tcase[k]) else: - print(k + ": " + tcase[k]) - - -def show_test_case_by_id(testlist, caseID): - """ Find the specified test case to pretty-print. """ - if not any(d.get('id', None) == caseID for d in testlist): - print("That ID does not exist.") - exit(1) - else: - print_test_case(next((d for d in testlist if d['id'] == caseID))) - - + if not ((k == 'id') or (k == 'name')): + print(k + ": " + str(tcase[k])) |