diff options
author | Jakub Kicinski <kuba@kernel.org> | 2021-06-29 15:45:27 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2021-06-29 15:45:27 -0700 |
commit | b6df00789e2831fff7a2c65aa7164b2a4dcbe599 (patch) | |
tree | a94cbeeca3f0ae2fffed008cb287c02dbee4dceb | |
parent | 3f8ad50a9e43b6a59070e6c9c5eec79626f81095 (diff) | |
parent | a118ff661889ecee3ca90f8125bad8fb5bbc07d5 (diff) | |
download | linux-stable-b6df00789e2831fff7a2c65aa7164b2a4dcbe599.tar.gz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Trivial conflict in net/netfilter/nf_tables_api.c.
Duplicate fix in tools/testing/selftests/net/devlink_port_split.py
- take the net-next version.
skmsg, and L4 bpf - keep the bpf code but remove the flags
and err params.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
88 files changed, 707 insertions, 390 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index cc375fda89d0..25956727ff24 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12675,6 +12675,7 @@ W: http://www.netfilter.org/ W: http://www.iptables.org/ W: http://www.nftables.org/ Q: http://patchwork.ozlabs.org/project/netfilter-devel/list/ +C: irc://irc.libera.chat/netfilter T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git F: include/linux/netfilter* diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index b015c3e14336..bc5a6ab6fa4b 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -527,6 +527,15 @@ static int ns_init_card(int i, struct pci_dev *pcidev) /* Set the VPI/VCI MSb mask to zero so we can receive OAM cells */ writel(0x00000000, card->membase + VPM); + card->intcnt = 0; + if (request_irq + (pcidev->irq, &ns_irq_handler, IRQF_SHARED, "nicstar", card) != 0) { + pr_err("nicstar%d: can't allocate IRQ %d.\n", i, pcidev->irq); + error = 9; + ns_init_card_error(card, error); + return error; + } + /* Initialize TSQ */ card->tsq.org = dma_alloc_coherent(&card->pcidev->dev, NS_TSQSIZE + NS_TSQ_ALIGNMENT, @@ -753,15 +762,6 @@ static int ns_init_card(int i, struct pci_dev *pcidev) card->efbie = 1; - card->intcnt = 0; - if (request_irq - (pcidev->irq, &ns_irq_handler, IRQF_SHARED, "nicstar", card) != 0) { - printk("nicstar%d: can't allocate IRQ %d.\n", i, pcidev->irq); - error = 9; - ns_init_card_error(card, error); - return error; - } - /* Register device */ card->atmdev = atm_dev_register("nicstar", &card->pcidev->dev, &atm_ops, -1, NULL); @@ -839,10 +839,12 @@ static void ns_init_card_error(ns_dev *card, int error) dev_kfree_skb_any(hb); } if (error >= 12) { - kfree(card->rsq.org); + dma_free_coherent(&card->pcidev->dev, NS_RSQSIZE + NS_RSQ_ALIGNMENT, + card->rsq.org, card->rsq.dma); } if (error >= 11) { - kfree(card->tsq.org); + dma_free_coherent(&card->pcidev->dev, NS_TSQSIZE + NS_TSQ_ALIGNMENT, + card->tsq.org, card->tsq.dma); } if (error >= 10) { free_irq(card->pcidev->irq, card); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1d9137e77dfc..0ff7567bd04f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1600,6 +1600,14 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, int link_reporting; int res = 0, i; + if (slave_dev->flags & IFF_MASTER && + !netif_is_bond_master(slave_dev)) { + NL_SET_ERR_MSG(extack, "Device with IFF_MASTER cannot be enslaved"); + netdev_err(bond_dev, + "Error: Device with IFF_MASTER cannot be enslaved\n"); + return -EPERM; + } + if (!bond->params.use_carrier && slave_dev->ethtool_ops->get_link == NULL && slave_ops->ndo_do_ioctl == NULL) { diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index 00847cbaf7b6..d08718e98e11 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -351,8 +351,8 @@ static int pucan_handle_status(struct peak_canfd_priv *priv, return err; } - /* start network queue (echo_skb array is empty) */ - netif_start_queue(ndev); + /* wake network queue up (echo_skb array is empty) */ + netif_wake_queue(ndev); return 0; } diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 5af69787d9d5..0a37af4a3fa4 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -1053,7 +1053,6 @@ static void ems_usb_disconnect(struct usb_interface *intf) if (dev) { unregister_netdev(dev->netdev); - free_candev(dev->netdev); unlink_all_urbs(dev); @@ -1061,6 +1060,8 @@ static void ems_usb_disconnect(struct usb_interface *intf) kfree(dev->intr_in_buffer); kfree(dev->tx_msg_buffer); + + free_candev(dev->netdev); } } diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index eca285aaf72f..961fa6b75cad 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1618,9 +1618,6 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, struct mv88e6xxx_vtu_entry vlan; int i, err; - if (!vid) - return -EOPNOTSUPP; - /* DSA and CPU ports have to be members of multiple vlans */ if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) return 0; @@ -2109,6 +2106,9 @@ static int mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, u8 member; int err; + if (!vlan->vid) + return 0; + err = mv88e6xxx_port_vlan_prepare(ds, port, vlan); if (err) return err; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index a9777eb564c6..4f0545605f6b 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1818,6 +1818,12 @@ static int sja1105_reload_cbs(struct sja1105_private *priv) { int rc = 0, i; + /* The credit based shapers are only allocated if + * CONFIG_NET_SCH_CBS is enabled. + */ + if (!priv->cbs) + return 0; + for (i = 0; i < priv->info->num_cbs_shapers; i++) { struct sja1105_cbs_entry *cbs = &priv->cbs[i]; diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index d77fafbc1530..c560ad06f0be 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1539,10 +1539,11 @@ static int greth_of_remove(struct platform_device *of_dev) mdiobus_unregister(greth->mdio); unregister_netdev(ndev); - free_netdev(ndev); of_iounmap(&of_dev->resource[0], greth->regs, resource_size(&of_dev->resource[0])); + free_netdev(ndev); + return 0; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.h b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.h index f5fba8b8cdea..a47e2710487e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.h @@ -91,7 +91,7 @@ struct aq_macsec_txsc { u32 hw_sc_idx; unsigned long tx_sa_idx_busy; const struct macsec_secy *sw_secy; - u8 tx_sa_key[MACSEC_NUM_AN][MACSEC_KEYID_LEN]; + u8 tx_sa_key[MACSEC_NUM_AN][MACSEC_MAX_KEY_LEN]; struct aq_macsec_tx_sc_stats stats; struct aq_macsec_tx_sa_stats tx_sa_stats[MACSEC_NUM_AN]; }; @@ -101,7 +101,7 @@ struct aq_macsec_rxsc { unsigned long rx_sa_idx_busy; const struct macsec_secy *sw_secy; const struct macsec_rx_sc *sw_rxsc; - u8 rx_sa_key[MACSEC_NUM_AN][MACSEC_KEYID_LEN]; + u8 rx_sa_key[MACSEC_NUM_AN][MACSEC_MAX_KEY_LEN]; struct aq_macsec_rx_sa_stats rx_sa_stats[MACSEC_NUM_AN]; }; diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c index 60d908507f51..02a569500234 100644 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c @@ -174,9 +174,6 @@ static int bcm4908_dma_alloc_buf_descs(struct bcm4908_enet *enet, if (!ring->slots) goto err_free_buf_descs; - ring->read_idx = 0; - ring->write_idx = 0; - return 0; err_free_buf_descs: @@ -304,6 +301,9 @@ static void bcm4908_enet_dma_ring_init(struct bcm4908_enet *enet, enet_write(enet, ring->st_ram_block + ENET_DMA_CH_STATE_RAM_BASE_DESC_PTR, (uint32_t)ring->dma_addr); + + ring->read_idx = 0; + ring->write_idx = 0; } static void bcm4908_enet_dma_uninit(struct bcm4908_enet *enet) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 701c12c9e033..649c5c429bd7 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -550,7 +550,7 @@ int be_process_mcc(struct be_adapter *adapter) int num = 0, status = 0; struct be_mcc_obj *mcc_obj = &adapter->mcc_obj; - spin_lock_bh(&adapter->mcc_cq_lock); + spin_lock(&adapter->mcc_cq_lock); while ((compl = be_mcc_compl_get(adapter))) { if (compl->flags & CQE_FLAGS_ASYNC_MASK) { @@ -566,7 +566,7 @@ int be_process_mcc(struct be_adapter *adapter) if (num) be_cq_notify(adapter, mcc_obj->cq.id, mcc_obj->rearm_cq, num); - spin_unlock_bh(&adapter->mcc_cq_lock); + spin_unlock(&adapter->mcc_cq_lock); return status; } @@ -581,7 +581,9 @@ static int be_mcc_wait_compl(struct be_adapter *adapter) if (be_check_error(adapter, BE_ERROR_ANY)) return -EIO; + local_bh_disable(); status = be_process_mcc(adapter); + local_bh_enable(); if (atomic_read(&mcc_obj->q.used) == 0) break; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 7968568bbe21..361c1c87c183 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5501,7 +5501,9 @@ static void be_worker(struct work_struct *work) * mcc completions */ if (!netif_running(adapter->netdev)) { + local_bh_disable(); be_process_mcc(adapter); + local_bh_enable(); goto reschedule; } diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c index e3954d8835e7..f9a288a6ec8c 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.c +++ b/drivers/net/ethernet/ezchip/nps_enet.c @@ -607,7 +607,7 @@ static s32 nps_enet_probe(struct platform_device *pdev) /* Get IRQ number */ priv->irq = platform_get_irq(pdev, 0); - if (!priv->irq) { + if (priv->irq < 0) { dev_err(dev, "failed to retrieve <irq Rx-Tx> value from device tree\n"); err = -ENODEV; goto out_netdev; @@ -630,8 +630,7 @@ static s32 nps_enet_probe(struct platform_device *pdev) out_netif_api: netif_napi_del(&priv->napi); out_netdev: - if (err) - free_netdev(ndev); + free_netdev(ndev); return err; } @@ -642,8 +641,8 @@ static s32 nps_enet_remove(struct platform_device *pdev) struct nps_enet_priv *priv = netdev_priv(ndev); unregister_netdev(ndev); - free_netdev(ndev); netif_napi_del(&priv->napi); + free_netdev(ndev); return 0; } diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index ac4819c25aca..867e87af3432 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1506,8 +1506,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) gve_write_version(®_bar->driver_version); /* Get max queues to alloc etherdev */ - max_rx_queues = ioread32be(®_bar->max_tx_queues); - max_tx_queues = ioread32be(®_bar->max_rx_queues); + max_tx_queues = ioread32be(®_bar->max_tx_queues); + max_rx_queues = ioread32be(®_bar->max_rx_queues); /* Alloc and setup the netdev and priv */ dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); if (!dev) { diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index adb0d5ca9ff1..374a75d4faea 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -106,6 +106,8 @@ static void release_crq_queue(struct ibmvnic_adapter *); static int __ibmvnic_set_mac(struct net_device *, u8 *); static int init_crq_queue(struct ibmvnic_adapter *adapter); static int send_query_phys_parms(struct ibmvnic_adapter *adapter); +static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq); struct ibmvnic_stat { char name[ETH_GSTRING_LEN]; @@ -232,12 +234,11 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, mutex_lock(&adapter->fw_lock); adapter->fw_done_rc = 0; reinit_completion(&adapter->fw_done); - rc = send_request_map(adapter, ltb->addr, - ltb->size, ltb->map_id); + + rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); if (rc) { - dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - mutex_unlock(&adapter->fw_lock); - return rc; + dev_err(dev, "send_request_map failed, rc = %d\n", rc); + goto out; } rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); @@ -245,20 +246,23 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, dev_err(dev, "Long term map request aborted or timed out,rc = %d\n", rc); - dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - mutex_unlock(&adapter->fw_lock); - return rc; + goto out; } if (adapter->fw_done_rc) { dev_err(dev, "Couldn't map long term buffer,rc = %d\n", adapter->fw_done_rc); + rc = -1; + goto out; + } + rc = 0; +out: + if (rc) { dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - mutex_unlock(&adapter->fw_lock); - return -1; + ltb->buff = NULL; } mutex_unlock(&adapter->fw_lock); - return 0; + return rc; } static void free_long_term_buff(struct ibmvnic_adapter *adapter, @@ -278,14 +282,44 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, adapter->reset_reason != VNIC_RESET_TIMEOUT) send_request_unmap(adapter, ltb->map_id); dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); + ltb->buff = NULL; + ltb->map_id = 0; } -static int reset_long_term_buff(struct ibmvnic_long_term_buff *ltb) +static int reset_long_term_buff(struct ibmvnic_adapter *adapter, + struct ibmvnic_long_term_buff *ltb) { - if (!ltb->buff) - return -EINVAL; + struct device *dev = &adapter->vdev->dev; + int rc; memset(ltb->buff, 0, ltb->size); + + mutex_lock(&adapter->fw_lock); + adapter->fw_done_rc = 0; + + reinit_completion(&adapter->fw_done); + rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); + if (rc) { + mutex_unlock(&adapter->fw_lock); + return rc; + } + + rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); + if (rc) { + dev_info(dev, + "Reset failed, long term map request timed out or aborted\n"); + mutex_unlock(&adapter->fw_lock); + return rc; + } + + if (adapter->fw_done_rc) { + dev_info(dev, + "Reset failed, attempting to free and reallocate buffer\n"); + free_long_term_buff(adapter, ltb); + mutex_unlock(&adapter->fw_lock); + return alloc_long_term_buff(adapter, ltb, ltb->size); + } + mutex_unlock(&adapter->fw_lock); return 0; } @@ -321,7 +355,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, rx_scrq = adapter->rx_scrq[pool->index]; ind_bufp = &rx_scrq->ind_buf; - for (i = 0; i < count; ++i) { + + /* netdev_skb_alloc() could have failed after we saved a few skbs + * in the indir_buf and we would not have sent them to VIOS yet. + * To account for them, start the loop at ind_bufp->index rather + * than 0. If we pushed all the skbs to VIOS, ind_bufp->index will + * be 0. + */ + for (i = ind_bufp->index; i < count; ++i) { skb = netdev_alloc_skb(adapter->netdev, pool->buff_size); if (!skb) { dev_err(dev, "Couldn't replenish rx buff\n"); @@ -507,7 +548,8 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) rx_pool->size * rx_pool->buff_size); } else { - rc = reset_long_term_buff(&rx_pool->long_term_buff); + rc = reset_long_term_buff(adapter, + &rx_pool->long_term_buff); } if (rc) @@ -630,11 +672,12 @@ static int init_rx_pools(struct net_device *netdev) return 0; } -static int reset_one_tx_pool(struct ibmvnic_tx_pool *tx_pool) +static int reset_one_tx_pool(struct ibmvnic_adapter *adapter, + struct ibmvnic_tx_pool *tx_pool) { int rc, i; - rc = reset_long_term_buff(&tx_pool->long_term_buff); + rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff); if (rc) return rc; @@ -661,10 +704,11 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter) tx_scrqs = adapter->num_active_tx_pools; for (i = 0; i < tx_scrqs; i++) { - rc = reset_one_tx_pool(&adapter->tso_pool[i]); + ibmvnic_tx_scrq_clean_buffer(adapter, adapter->tx_scrq[i]); + rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]); if (rc) return rc; - rc = reset_one_tx_pool(&adapter->tx_pool[i]); + rc = reset_one_tx_pool(adapter, &adapter->tx_pool[i]); if (rc) return rc; } @@ -757,8 +801,11 @@ static int init_tx_pools(struct net_device *netdev) adapter->tso_pool = kcalloc(tx_subcrqs, sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); - if (!adapter->tso_pool) + if (!adapter->tso_pool) { + kfree(adapter->tx_pool); + adapter->tx_pool = NULL; return -1; + } adapter->num_active_tx_pools = tx_subcrqs; @@ -1204,6 +1251,11 @@ static int __ibmvnic_open(struct net_device *netdev) netif_tx_start_all_queues(netdev); + if (prev_state == VNIC_CLOSED) { + for (i = 0; i < adapter->req_rx_queues; i++) + napi_schedule(&adapter->napi[i]); + } + adapter->state = VNIC_OPEN; return rc; } @@ -1608,7 +1660,8 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, ind_bufp->index = 0; if (atomic_sub_return(entries, &tx_scrq->used) <= (adapter->req_tx_entries_per_subcrq / 2) && - __netif_subqueue_stopped(adapter->netdev, queue_num)) { + __netif_subqueue_stopped(adapter->netdev, queue_num) && + !test_bit(0, &adapter->resetting)) { netif_wake_subqueue(adapter->netdev, queue_num); netdev_dbg(adapter->netdev, "Started queue %d\n", queue_num); @@ -1701,7 +1754,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_send_failed++; tx_dropped++; ret = NETDEV_TX_OK; - ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } @@ -3241,6 +3293,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free) netdev_dbg(adapter->netdev, "Releasing tx_scrq[%d]\n", i); + ibmvnic_tx_scrq_clean_buffer(adapter, adapter->tx_scrq[i]); if (adapter->tx_scrq[i]->irq) { free_irq(adapter->tx_scrq[i]->irq, adapter->tx_scrq[i]); @@ -3314,7 +3367,7 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter, /* H_EOI would fail with rc = H_FUNCTION when running * in XIVE mode which is expected, but not an error. */ - if (rc && rc != H_FUNCTION) + if (rc && (rc != H_FUNCTION)) dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", val, rc); } diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 5435606149b0..d150dade06cf 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5223,18 +5223,20 @@ static void e1000_watchdog_task(struct work_struct *work) pm_runtime_resume(netdev->dev.parent); /* Checking if MAC is in DMoff state*/ - pcim_state = er32(STATUS); - while (pcim_state & E1000_STATUS_PCIM_STATE) { - if (tries++ == dmoff_exit_timeout) { - e_dbg("Error in exiting dmoff\n"); - break; - } - usleep_range(10000, 20000); + if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) { pcim_state = er32(STATUS); - - /* Checking if MAC exited DMoff state */ - if (!(pcim_state & E1000_STATUS_PCIM_STATE)) - e1000_phy_hw_reset(&adapter->hw); + while (pcim_state & E1000_STATUS_PCIM_STATE) { + if (tries++ == dmoff_exit_timeout) { + e_dbg("Error in exiting dmoff\n"); + break; + } + usleep_range(10000, 20000); + pcim_state = er32(STATUS); + + /* Checking if MAC exited DMoff state */ + if (!(pcim_state & E1000_STATUS_PCIM_STATE)) + e1000_phy_hw_reset(&adapter->hw); + } } /* update snapshot of PHY registers on LSC */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index ccd5b9486ea9..3e822bad4851 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1262,8 +1262,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, if (ethtool_link_ksettings_test_link_mode(&safe_ks, supported, Autoneg) && - hw->phy.link_info.phy_type != - I40E_PHY_TYPE_10GBASE_T) { + hw->phy.media_type != I40E_MEDIA_TYPE_BASET) { netdev_info(netdev, "Autoneg cannot be disabled on this phy\n"); err = -EINVAL; goto done; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9db1968fc491..861e59a350bd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -32,7 +32,7 @@ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi); static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired); static int i40e_add_vsi(struct i40e_vsi *vsi); static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi); -static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit); +static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); @@ -8703,6 +8703,8 @@ int i40e_vsi_open(struct i40e_vsi *vsi) dev_driver_string(&pf->pdev->dev), dev_name(&pf->pdev->dev)); err = i40e_vsi_request_irq(vsi, int_name); + if (err) + goto err_setup_rx; } else { err = -EINVAL; @@ -10569,7 +10571,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) #endif /* CONFIG_I40E_DCB */ if (!lock_acquired) rtnl_lock(); - ret = i40e_setup_pf_switch(pf, reinit); + ret = i40e_setup_pf_switch(pf, reinit, true); if (ret) goto end_unlock; @@ -14627,10 +14629,11 @@ int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig) * i40e_setup_pf_switch - Setup the HW switch on startup or after reset * @pf: board private structure * @reinit: if the Main VSI needs to re-initialized. + * @lock_acquired: indicates whether or not the lock has been acquired * * Returns 0 on success, negative value on failure **/ -static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) +static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired) { u16 flags = 0; int ret; @@ -14732,9 +14735,15 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) i40e_ptp_init(pf); + if (!lock_acquired) + rtnl_lock(); + /* repopulate tunnel port filters */ udp_tunnel_nic_reset_ntf(pf->vsi[pf->lan_vsi]->netdev); + if (!lock_acquired) + rtnl_unlock(); + return ret; } @@ -15528,7 +15537,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; } #endif - err = i40e_setup_pf_switch(pf, false); + err = i40e_setup_pf_switch(pf, false, false); if (err) { dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err); goto err_vsis; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index f1f6fc3744e9..7b971b205d36 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -11,13 +11,14 @@ * operate with the nanosecond field directly without fear of overflow. * * Much like the 82599, the update period is dependent upon the link speed: - * At 40Gb link or no link, the period is 1.6ns. - * At 10Gb link, the period is multiplied by 2. (3.2ns) + * At 40Gb, 25Gb, or no link, the period is 1.6ns. + * At 10Gb or 5Gb link, the period is multiplied by 2. (3.2ns) * At 1Gb link, the period is multiplied by 20. (32ns) * 1588 functionality is not supported at 100Mbps. */ #define I40E_PTP_40GB_INCVAL 0x0199999999ULL #define I40E_PTP_10GB_INCVAL_MULT 2 +#define I40E_PTP_5GB_INCVAL_MULT 2 #define I40E_PTP_1GB_INCVAL_MULT 20 #define I40E_PRTTSYN_CTL1_TSYNTYPE_V1 BIT(I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT) @@ -465,6 +466,9 @@ void i40e_ptp_set_increment(struct i40e_pf *pf) case I40E_LINK_SPEED_10GB: mult = I40E_PTP_10GB_INCVAL_MULT; break; + case I40E_LINK_SPEED_5GB: + mult = I40E_PTP_5GB_INCVAL_MULT; + break; case I40E_LINK_SPEED_1GB: mult = I40E_PTP_1GB_INCVAL_MULT; break; diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 46aee2c49f1b..fff78900fc8a 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1230,8 +1230,10 @@ static int mana_create_txq(struct mana_port_context *apc, cq->gdma_id = cq->gdma_cq->id; - if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) - return -EINVAL; + if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) { + err = -EINVAL; + goto out; + } gc->cq_table[cq->gdma_id] = cq->gdma_cq; diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index d597c89f00ed..e7e2223aebbf 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1069,7 +1069,8 @@ static int efx_ef10_probe_vf(struct efx_nic *efx) /* If the parent PF has no VF data structure, it doesn't know about this * VF so fail probe. The VF needs to be re-created. This can happen - * if the PF driver is unloaded while the VF is assigned to a guest. + * if the PF driver was unloaded while any VF was assigned to a guest + * (using Xen, only). */ pci_dev_pf = efx->pci_dev->physfn; if (pci_dev_pf) { diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 21fa6c0e8873..752d6406f07e 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -122,8 +122,7 @@ static void efx_ef10_sriov_free_vf_vports(struct efx_nic *efx) struct ef10_vf *vf = nic_data->vf + i; /* If VF is assigned, do not free the vport */ - if (vf->pci_dev && - vf->pci_dev->dev_flags & PCI_DEV_FLAGS_ASSIGNED) + if (vf->pci_dev && pci_is_dev_assigned(vf->pci_dev)) continue; if (vf->vport_assigned) { @@ -207,9 +206,7 @@ static int efx_ef10_sriov_alloc_vf_vswitching(struct efx_nic *efx) return 0; fail: - efx_ef10_sriov_free_vf_vports(efx); - kfree(nic_data->vf); - nic_data->vf = NULL; + efx_ef10_sriov_free_vf_vswitching(efx); return rc; } @@ -402,12 +399,17 @@ fail1: return rc; } +/* Disable SRIOV and remove VFs + * If some VFs are attached to a guest (using Xen, only) nothing is + * done if force=false, and vports are freed if force=true (for the non + * attachedc ones, only) but SRIOV is not disabled and VFs are not + * removed in either case. + */ static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) { struct pci_dev *dev = efx->pci_dev; - unsigned int vfs_assigned = 0; - - vfs_assigned = pci_vfs_assigned(dev); + unsigned int vfs_assigned = pci_vfs_assigned(dev); + int rc = 0; if (vfs_assigned && !force) { netif_info(efx, drv, efx->net_dev, "VFs are assigned to guests; " @@ -417,10 +419,12 @@ static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) if (!vfs_assigned) pci_disable_sriov(dev); + else + rc = -EBUSY; efx_ef10_sriov_free_vf_vswitching(efx); efx->vf_count = 0; - return 0; + return rc; } int efx_ef10_sriov_configure(struct efx_nic *efx, int num_vfs) @@ -439,24 +443,18 @@ int efx_ef10_sriov_init(struct efx_nic *efx) void efx_ef10_sriov_fini(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; - unsigned int i; int rc; if (!nic_data->vf) { - /* Remove any un-assigned orphaned VFs */ + /* Remove any un-assigned orphaned VFs. This can happen if the PF driver + * was unloaded while any VF was assigned to a guest (using Xen, only). + */ if (pci_num_vf(efx->pci_dev) && !pci_vfs_assigned(efx->pci_dev)) pci_disable_sriov(efx->pci_dev); return; } - /* Remove any VFs in the host */ - for (i = 0; i < efx->vf_count; ++i) { - struct efx_nic *vf_efx = nic_data->vf[i].efx; - - if (vf_efx) - vf_efx->pci_dev->driver->remove(vf_efx->pci_dev); - } - + /* Disable SRIOV and remove any VFs in the host */ rc = efx_ef10_pci_sriov_disable(efx, true); if (rc) netif_dbg(efx, drv, efx->net_dev, diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 6a67b026df0b..718539cdd2f2 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1506,12 +1506,12 @@ static void am65_cpsw_nuss_free_tx_chns(void *data) for (i = 0; i < common->tx_ch_num; i++) { struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i]; - if (!IS_ERR_OR_NULL(tx_chn->tx_chn)) - k3_udma_glue_release_tx_chn(tx_chn->tx_chn); - if (!IS_ERR_OR_NULL(tx_chn->desc_pool)) k3_cppi_desc_pool_destroy(tx_chn->desc_pool); + if (!IS_ERR_OR_NULL(tx_chn->tx_chn)) + k3_udma_glue_release_tx_chn(tx_chn->tx_chn); + memset(tx_chn, 0, sizeof(*tx_chn)); } } @@ -1531,12 +1531,12 @@ void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common) netif_napi_del(&tx_chn->napi_tx); - if (!IS_ERR_OR_NULL(tx_chn->tx_chn)) - k3_udma_glue_release_tx_chn(tx_chn->tx_chn); - if (!IS_ERR_OR_NULL(tx_chn->desc_pool)) k3_cppi_desc_pool_destroy(tx_chn->desc_pool); + if (!IS_ERR_OR_NULL(tx_chn->tx_chn)) + k3_udma_glue_release_tx_chn(tx_chn->tx_chn); + memset(tx_chn, 0, sizeof(*tx_chn)); } } @@ -1624,11 +1624,11 @@ static void am65_cpsw_nuss_free_rx_chns(void *data) rx_chn = &common->rx_chns; - if (!IS_ERR_OR_NULL(rx_chn->rx_chn)) - k3_udma_glue_release_rx_chn(rx_chn->rx_chn); - if (!IS_ERR_OR_NULL(rx_chn->desc_pool)) k3_cppi_desc_pool_destroy(rx_chn->desc_pool); + + if (!IS_ERR_OR_NULL(rx_chn->rx_chn)) + k3_udma_glue_release_rx_chn(rx_chn->rx_chn); } static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f682a5572d84..382bebc2420d 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2384,6 +2384,9 @@ static int netvsc_register_vf(struct net_device *vf_netdev) dev_hold(vf_netdev); rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev); + if (ndev->needed_headroom < vf_netdev->needed_headroom) + ndev->needed_headroom = vf_netdev->needed_headroom; + vf_netdev->wanted_features = ndev->features; netdev_update_features(vf_netdev); @@ -2462,6 +2465,8 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); dev_put(vf_netdev); + ndev->needed_headroom = RNDIS_AND_PPI_SIZE; + return NOTIFY_OK; } diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index da9135231c07..ebc976b7fcc2 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -480,7 +480,7 @@ static int hwsim_del_edge_nl(struct sk_buff *msg, struct genl_info *info) struct hwsim_edge *e; u32 v0, v1; - if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] && + if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; @@ -715,6 +715,8 @@ static int hwsim_subscribe_all_others(struct hwsim_phy *phy) return 0; +sub_fail: + hwsim_edge_unsubscribe_me(phy); me_fail: rcu_read_lock(); list_for_each_entry_rcu(e, &phy->edges, list) { @@ -722,8 +724,6 @@ me_fail: hwsim_free_edge(e); } rcu_read_unlock(); -sub_fail: - hwsim_edge_unsubscribe_me(phy); return -ENOMEM; } @@ -824,12 +824,17 @@ err_pib: static void hwsim_del(struct hwsim_phy *phy) { struct hwsim_pib *pib; + struct hwsim_edge *e; hwsim_edge_unsubscribe_me(phy); list_del(&phy->list); rcu_read_lock(); + list_for_each_entry_rcu(e, &phy->edges, list) { + list_del_rcu(&e->list); + hwsim_free_edge(e); + } pib = rcu_dereference(phy->pib); rcu_read_unlock(); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 92425e1fd70c..93dc48b9b4f2 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1819,7 +1819,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) ctx.sa.rx_sa = rx_sa; ctx.secy = secy; memcpy(ctx.sa.key, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), - MACSEC_KEYID_LEN); + secy->key_len); err = macsec_offload(ops->mdo_add_rxsa, &ctx); if (err) @@ -2061,7 +2061,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) ctx.sa.tx_sa = tx_sa; ctx.secy = secy; memcpy(ctx.sa.key, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), - MACSEC_KEYID_LEN); + secy->key_len); err = macsec_offload(ops->mdo_add_txsa, &ctx); if (err) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 6697c9368b40..5d62b85a4024 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -701,6 +701,34 @@ static void at803x_remove(struct phy_device *phydev) regulator_disable(priv->vddio); } +static int at803x_get_features(struct phy_device *phydev) +{ + int err; + + err = genphy_read_abilities(phydev); + if (err) + return err; + + if (!at803x_match_phy_id(phydev, ATH8031_PHY_ID)) + return 0; + + /* AR8031/AR8033 have different status registers + * for copper and fiber operation. However, the + * extended status register is the same for both + * operation modes. + * + * As a result of that, ESTATUS_1000_XFULL is set + * to 1 even when operating in copper TP mode. + * + * Remove this mode from the supported link modes, + * as this driver currently only supports copper + * operation. + */ + linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, + phydev->supported); + return 0; +} + static int at803x_smarteee_config(struct phy_device *phydev) { struct at803x_priv *priv = phydev->priv; @@ -1344,7 +1372,7 @@ static struct phy_driver at803x_driver[] = { .resume = at803x_resume, .read_page = at803x_read_page, .write_page = at803x_write_page, - /* PHY_GBIT_FEATURES */ + .get_features = at803x_get_features, .read_status = at803x_read_status, .config_intr = &at803x_config_intr, .handle_interrupt = at803x_handle_interrupt, diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c index 10be266e48e8..b7b2521c73fb 100644 --- a/drivers/net/phy/mscc/mscc_macsec.c +++ b/drivers/net/phy/mscc/mscc_macsec.c @@ -501,7 +501,7 @@ static u32 vsc8584_macsec_flow_context_id(struct macsec_flow *flow) } /* Derive the AES key to get a key for the hash autentication */ -static int vsc8584_macsec_derive_key(const u8 key[MACSEC_KEYID_LEN], +static int vsc8584_macsec_derive_key(const u8 key[MACSEC_MAX_KEY_LEN], u16 key_len, u8 hkey[16]) { const u8 input[AES_BLOCK_SIZE] = {0}; diff --git a/drivers/net/phy/mscc/mscc_macsec.h b/drivers/net/phy/mscc/mscc_macsec.h index 9c6d25e36de2..453304bae778 100644 --- a/drivers/net/phy/mscc/mscc_macsec.h +++ b/drivers/net/phy/mscc/mscc_macsec.h @@ -81,7 +81,7 @@ struct macsec_flow { /* Highest takes precedence [0..15] */ u8 priority; - u8 key[MACSEC_KEYID_LEN]; + u8 key[MACSEC_MAX_KEY_LEN]; union { struct macsec_rx_sa *rx_sa; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 452822f88214..2b1b944d4b28 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1366,22 +1366,22 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, int orig_iif = skb->skb_iif; bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); bool is_ndisc = ipv6_ndisc_frame(skb); - bool is_ll_src; /* loopback, multicast & non-ND link-local traffic; do not push through * packet taps again. Reset pkt_type for upper layers to process skb. - * for packets with lladdr src, however, skip so that the dst can be - * determine at input using original ifindex in the case that daddr - * needs strict + * For strict packets with a source LLA, determine the dst using the + * original ifindex. */ - is_ll_src = ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL; - if (skb->pkt_type == PACKET_LOOPBACK || - (need_strict && !is_ndisc && !is_ll_src)) { + if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IP6CB(skb)->flags |= IP6SKB_L3SLAVE; + if (skb->pkt_type == PACKET_LOOPBACK) skb->pkt_type = PACKET_HOST; + else if (ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) + vrf_ip6_input_dst(skb, vrf_dev, orig_iif); + goto out; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 02a14f1b938a..5a8df5a195cb 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2164,6 +2164,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) struct neighbour *n; struct nd_msg *msg; + rcu_read_lock(); in6_dev = __in6_dev_get(dev); if (!in6_dev) goto out; @@ -2215,6 +2216,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) } out: + rcu_read_unlock(); consume_skb(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 529dfd8b7ae8..17399d4aa129 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -1445,11 +1445,18 @@ static void mwifiex_uninit_sw(struct mwifiex_adapter *adapter) if (!priv) continue; rtnl_lock(); - wiphy_lock(adapter->wiphy); if (priv->netdev && - priv->wdev.iftype != NL80211_IFTYPE_UNSPECIFIED) + priv->wdev.iftype != NL80211_IFTYPE_UNSPECIFIED) { + /* + * Close the netdev now, because if we do it later, the + * netdev notifiers will need to acquire the wiphy lock + * again --> deadlock. + */ + dev_close(priv->wdev.netdev); + wiphy_lock(adapter->wiphy); mwifiex_del_virtual_intf(adapter->wiphy, &priv->wdev); - wiphy_unlock(adapter->wiphy); + wiphy_unlock(adapter->wiphy); + } rtnl_unlock(); } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index be1dcceda5e4..eaf5bb008aa9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4114,7 +4114,7 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, return NET_RX_DROP; } - skb_scrub_packet(skb, true); + skb_scrub_packet(skb, !net_eq(dev_net(dev), dev_net(skb->dev))); skb->priority = 0; return 0; } diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 31866031e370..96f319099744 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -126,7 +126,6 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); -int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, long timeo); int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags); diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index dc5c1e69cd9f..69c9eabf8325 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -319,12 +319,14 @@ flow_action_mixed_hw_stats_check(const struct flow_action *action, if (flow_offload_has_one_action(action)) return true; - flow_action_for_each(i, action_entry, action) { - if (i && action_entry->hw_stats != last_hw_stats) { - NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported"); - return false; + if (action) { + flow_action_for_each(i, action_entry, action) { + if (i && action_entry->hw_stats != last_hw_stats) { + NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported"); + return false; + } + last_hw_stats = action_entry->hw_stats; } - last_hw_stats = action_entry->hw_stats; } return true; } diff --git a/include/net/ip.h b/include/net/ip.h index e20874059f82..d9683bef8684 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -31,6 +31,7 @@ #include <net/flow.h> #include <net/flow_dissector.h> #include <net/netns/hash.h> +#include <net/lwtunnel.h> #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ #define IPV4_MIN_MTU 68 /* RFC 791 */ @@ -445,22 +446,25 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, /* 'forwarding = true' case should always honour route mtu */ mtu = dst_metric_raw(dst, RTAX_MTU); - if (mtu) - return mtu; + if (!mtu) + mtu = min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); - return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); + return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } static inline unsigned int ip_skb_dst_mtu(struct sock *sk, const struct sk_buff *skb) { + unsigned int mtu; + if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) { bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); } - return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); + mtu = min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); + return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); } struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f51a118bfce8..f14149df5a65 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -265,11 +265,18 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, static inline int ip6_skb_dst_mtu(struct sk_buff *skb) { + int mtu; + struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; - return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ? - skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); + if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { + mtu = READ_ONCE(skb_dst(skb)->dev->mtu); + mtu -= lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); + } else + mtu = dst_mtu(skb_dst(skb)); + + return mtu; } static inline bool ip6_sk_accept_pmtu(const struct sock *sk) @@ -317,7 +324,7 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) if (dst_metric_locked(dst, RTAX_MTU)) { mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) - return mtu; + goto out; } mtu = IPV6_MIN_MTU; @@ -327,7 +334,8 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) mtu = idev->cnf.mtu6; rcu_read_unlock(); - return mtu; +out: + return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } u32 ip6_mtu_from_fib6(const struct fib6_result *res, diff --git a/include/net/macsec.h b/include/net/macsec.h index 52874cdfe226..d6fa6b97f6ef 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -241,7 +241,7 @@ struct macsec_context { struct macsec_rx_sc *rx_sc; struct { unsigned char assoc_num; - u8 key[MACSEC_KEYID_LEN]; + u8 key[MACSEC_MAX_KEY_LEN]; union { struct macsec_rx_sa *rx_sa; struct macsec_tx_sa *tx_sa; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c99ffe9cc88f..9ed33e6840bd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -173,6 +173,12 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) if (spin_trylock(&qdisc->seqlock)) return true; + /* Paired with smp_mb__after_atomic() to make sure + * STATE_MISSED checking is synchronized with clearing + * in pfifo_fast_dequeue(). + */ + smp_mb__before_atomic(); + /* If the MISSED flag is set, it means other thread has * set the MISSED flag before second spin_trylock(), so * we can return false here to avoid multi cpus doing @@ -190,6 +196,12 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) */ set_bit(__QDISC_STATE_MISSED, &qdisc->state); + /* spin_trylock() only has load-acquire semantic, so use + * smp_mb__after_atomic() to ensure STATE_MISSED is set + * before doing the second spin_trylock(). + */ + smp_mb__after_atomic(); + /* Retry again in case other CPU may not see the new flag * after it releases the lock at the end of qdisc_run_end(). */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index c4a4c1754be8..32fc4a309df5 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -463,7 +463,7 @@ struct sctp_af { int saddr); void (*from_sk) (union sctp_addr *, struct sock *sk); - void (*from_addr_param) (union sctp_addr *, + bool (*from_addr_param) (union sctp_addr *, union sctp_addr_param *, __be16 port, int iif); int (*to_addr_param) (const union sctp_addr *, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b7e65aec4b2d..cbff7c2a9724 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1538,6 +1538,7 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); int xfrm_init_replay(struct xfrm_state *x); +u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu); u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); @@ -1562,7 +1563,6 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm4_transport_finish(struct sk_buff *skb, int async); int xfrm4_rcv(struct sk_buff *skb); -int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq); static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index eaa8386dbc63..7a9a23e7a604 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -147,11 +147,16 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool, { bool cross_pg = (addr & (PAGE_SIZE - 1)) + len > PAGE_SIZE; - if (pool->dma_pages_cnt && cross_pg) { + if (likely(!cross_pg)) + return false; + + if (pool->dma_pages_cnt) { return !(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK); } - return false; + + /* skb path */ + return addr + len > pool->addrs_cnt; } static inline u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 2f6bd75cd682..2546dafd6672 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -93,7 +93,7 @@ static struct hlist_head *dev_map_create_hash(unsigned int entries, int i; struct hlist_head *hash; - hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node); + hash = bpf_map_area_alloc((u64) entries * sizeof(*hash), numa_node); if (hash != NULL) for (i = 0; i < entries; i++) INIT_HLIST_HEAD(&hash[i]); @@ -144,7 +144,7 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) spin_lock_init(&dtab->index_lock); } else { - dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * + dtab->netdev_map = bpf_map_area_alloc((u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *), dtab->map.numa_node); if (!dtab->netdev_map) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index b4ebd60a6c16..80da1db47c68 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -543,7 +543,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags) return PTR_ERR(raw); if (type == BPF_TYPE_PROG) - ret = (f_flags != O_RDWR) ? -EINVAL : bpf_prog_new_fd(raw); + ret = bpf_prog_new_fd(raw); else if (type == BPF_TYPE_MAP) ret = bpf_map_new_fd(raw, f_flags); else if (type == BPF_TYPE_LINK) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e04e33893cff..be38bb930bf1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11474,7 +11474,7 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len } } -static void adjust_poke_descs(struct bpf_prog *prog, u32 len) +static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len) { struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab; int i, sz = prog->aux->size_poke_tab; @@ -11482,6 +11482,8 @@ static void adjust_poke_descs(struct bpf_prog *prog, u32 len) for (i = 0; i < sz; i++) { desc = &tab[i]; + if (desc->insn_idx <= off) + continue; desc->insn_idx += len - 1; } } @@ -11502,7 +11504,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of if (adjust_insn_aux_data(env, new_prog, off, len)) return NULL; adjust_subprog_starts(env, off, len); - adjust_poke_descs(new_prog, len); + adjust_poke_descs(new_prog, off, len); return new_prog; } diff --git a/net/can/bcm.c b/net/can/bcm.c index e15a7dbe5f6c..508f67de0b80 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -785,6 +785,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, bcm_rx_handler, op); list_del(&op->list); + synchronize_rcu(); bcm_remove_op(op); return 1; /* done */ } @@ -1533,9 +1534,13 @@ static int bcm_release(struct socket *sock) REGMASK(op->can_id), bcm_rx_handler, op); - bcm_remove_op(op); } + synchronize_rcu(); + + list_for_each_entry_safe(op, next, &bo->rx_ops, list) + bcm_remove_op(op); + #if IS_ENABLED(CONFIG_PROC_FS) /* remove procfs entry */ if (net->can.bcmproc_dir && bo->bcm_proc_read) diff --git a/net/can/gw.c b/net/can/gw.c index ba4124805602..d8861e862f15 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -596,6 +596,7 @@ static int cgw_notifier(struct notifier_block *nb, if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); + synchronize_rcu(); kmem_cache_free(cgw_cache, gwj); } } @@ -1154,6 +1155,7 @@ static void cgw_remove_all_jobs(struct net *net) hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); + synchronize_rcu(); kmem_cache_free(cgw_cache, gwj); } } @@ -1222,6 +1224,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); + synchronize_rcu(); kmem_cache_free(cgw_cache, gwj); err = 0; break; diff --git a/net/can/isotp.c b/net/can/isotp.c index 9fd274cf166b..caaa532ece94 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1030,9 +1030,6 @@ static int isotp_release(struct socket *sock) lock_sock(sk); - hrtimer_cancel(&so->txtimer); - hrtimer_cancel(&so->rxtimer); - /* remove current filters & unregister */ if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) { if (so->ifindex) { @@ -1044,10 +1041,14 @@ static int isotp_release(struct socket *sock) SINGLE_MASK(so->rxid), isotp_rcv, sk); dev_put(dev); + synchronize_rcu(); } } } + hrtimer_cancel(&so->txtimer); + hrtimer_cancel(&so->rxtimer); + so->ifindex = 0; so->bound = 0; diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index da3a7a7bcff2..08c8606cfd9c 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -193,6 +193,10 @@ static void j1939_can_rx_unregister(struct j1939_priv *priv) can_rx_unregister(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK, j1939_can_recv, priv); + /* The last reference of priv is dropped by the RCU deferred + * j1939_sk_sock_destruct() of the last socket, so we can + * safely drop this reference here. + */ j1939_priv_put(priv); } diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index bf18a32dc6ae..54f6d521492f 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -398,6 +398,9 @@ static int j1939_sk_init(struct sock *sk) atomic_set(&jsk->skb_pending, 0); spin_lock_init(&jsk->sk_session_queue_lock); INIT_LIST_HEAD(&jsk->sk_session_queue); + + /* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */ + sock_set_flag(sk, SOCK_RCU_FREE); sk->sk_destruct = j1939_sk_sock_destruct; sk->sk_protocol = CAN_J1939; @@ -673,7 +676,7 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, switch (optname) { case SO_J1939_FILTER: - if (!sockptr_is_null(optval)) { + if (!sockptr_is_null(optval) && optlen != 0) { struct j1939_filter *f; int c; diff --git a/net/core/dev.c b/net/core/dev.c index d609366da95c..316b4032317e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5304,9 +5304,9 @@ another_round: if (static_branch_unlikely(&generic_xdp_needed_key)) { int ret2; - preempt_disable(); + migrate_disable(); ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); - preempt_enable(); + migrate_enable(); if (ret2 != XDP_PASS) { ret = NET_RX_DROP; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index f0b9decdf279..9b6160a191f8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -399,28 +399,6 @@ out: } EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter); -int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, long timeo) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - int ret = 0; - - if (sk->sk_shutdown & RCV_SHUTDOWN) - return 1; - - if (!timeo) - return ret; - - add_wait_queue(sk_sleep(sk), &wait); - sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - ret = sk_wait_event(sk, &timeo, - !list_empty(&psock->ingress_msg) || - !skb_queue_empty(&sk->sk_receive_queue), &wait); - sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - remove_wait_queue(sk_sleep(sk), &wait); - return ret; -} -EXPORT_SYMBOL_GPL(sk_msg_wait_data); - /* Receive sk_msg from psock->ingress_msg to @msg. */ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags) @@ -600,6 +578,12 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, return sk_psock_skb_ingress(psock, skb); } +static void sock_drop(struct sock *sk, struct sk_buff *skb) +{ + sk_drops_add(sk, skb); + kfree_skb(skb); +} + static void sk_psock_backlog(struct work_struct *work) { struct sk_psock *psock = container_of(work, struct sk_psock, work); @@ -639,7 +623,7 @@ start: /* Hard errors break pipe and stop xmit. */ sk_psock_report_error(psock, ret ? -ret : EPIPE); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); - kfree_skb(skb); + sock_drop(psock->sk, skb); goto end; } off += ret; @@ -730,7 +714,7 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock) while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) { skb_bpf_redirect_clear(skb); - kfree_skb(skb); + sock_drop(psock->sk, skb); } __sk_psock_purge_ingress_msg(psock); } @@ -846,7 +830,7 @@ out: } EXPORT_SYMBOL_GPL(sk_psock_msg_verdict); -static void sk_psock_skb_redirect(struct sk_buff *skb) +static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb) { struct sk_psock *psock_other; struct sock *sk_other; @@ -856,8 +840,8 @@ static void sk_psock_skb_redirect(struct sk_buff *skb) * return code, but then didn't set a redirect interface. */ if (unlikely(!sk_other)) { - kfree_skb(skb); - return; + sock_drop(from->sk, skb); + return -EIO; } psock_other = sk_psock(sk_other); /* This error indicates the socket is being torn down or had another @@ -865,26 +849,30 @@ static void sk_psock_skb_redirect(struct sk_buff *skb) * a socket that is in this state so we drop the skb. */ if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) { - kfree_skb(skb); - return; + skb_bpf_redirect_clear(skb); + sock_drop(from->sk, skb); + return -EIO; } spin_lock_bh(&psock_other->ingress_lock); if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) { spin_unlock_bh(&psock_other->ingress_lock); - kfree_skb(skb); - return; + skb_bpf_redirect_clear(skb); + sock_drop(from->sk, skb); + return -EIO; } skb_queue_tail(&psock_other->ingress_skb, skb); schedule_work(&psock_other->work); spin_unlock_bh(&psock_other->ingress_lock); + return 0; } -static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict) +static void sk_psock_tls_verdict_apply(struct sk_buff *skb, + struct sk_psock *from, int verdict) { switch (verdict) { case __SK_REDIRECT: - sk_psock_skb_redirect(skb); + sk_psock_skb_redirect(from, skb); break; case __SK_PASS: case __SK_DROP: @@ -908,20 +896,21 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb) ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); skb->sk = NULL; } - sk_psock_tls_verdict_apply(skb, psock->sk, ret); + sk_psock_tls_verdict_apply(skb, psock, ret); rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); -static void sk_psock_verdict_apply(struct sk_psock *psock, - struct sk_buff *skb, int verdict) +static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, + int verdict) { struct sock *sk_other; - int err = -EIO; + int err = 0; switch (verdict) { case __SK_PASS: + err = -EIO; sk_other = psock->sk; if (sock_flag(sk_other, SOCK_DEAD) || !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { @@ -944,18 +933,25 @@ static void sk_psock_verdict_apply(struct sk_psock *psock, if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { skb_queue_tail(&psock->ingress_skb, skb); schedule_work(&psock->work); + err = 0; } spin_unlock_bh(&psock->ingress_lock); + if (err < 0) { + skb_bpf_redirect_clear(skb); + goto out_free; + } } break; case __SK_REDIRECT: - sk_psock_skb_redirect(skb); + err = sk_psock_skb_redirect(psock, skb); break; case __SK_DROP: default: out_free: - kfree_skb(skb); + sock_drop(psock->sk, skb); } + + return err; } static void sk_psock_write_space(struct sock *sk) @@ -987,7 +983,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) sk = strp->sk; psock = sk_psock(sk); if (unlikely(!psock)) { - kfree_skb(skb); + sock_drop(sk, skb); goto out; } prog = READ_ONCE(psock->progs.stream_verdict); @@ -1108,7 +1104,7 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, psock = sk_psock(sk); if (unlikely(!psock)) { len = 0; - kfree_skb(skb); + sock_drop(sk, skb); goto out; } prog = READ_ONCE(psock->progs.stream_verdict); @@ -1122,7 +1118,8 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); skb->sk = NULL; } - sk_psock_verdict_apply(psock, skb, ret); + if (sk_psock_verdict_apply(psock, skb, ret) < 0) + len = 0; out: rcu_read_unlock(); return len; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 6f1b82b8ad49..60decd6420ca 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -48,7 +48,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) bpf_map_init_from_attr(&stab->map, attr); raw_spin_lock_init(&stab->lock); - stab->sks = bpf_map_area_alloc(stab->map.max_entries * + stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries * sizeof(struct sock *), stab->map.numa_node); if (!stab->sks) { diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index f5362b9d75eb..f414ad246fdf 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -673,7 +673,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; - padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); + padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index af8814a11378..a933bd6345b1 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -371,6 +371,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_proto = 0; fl4.fl4_sport = 0; fl4.fl4_dport = 0; + } else { + swap(fl4.fl4_sport, fl4.fl4_dport); } if (fib_lookup(net, &fl4, &res, 0)) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 66aacb939d3e..99c06944501a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1306,7 +1306,7 @@ INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) - return mtu; + goto out; mtu = READ_ONCE(dst->dev->mtu); @@ -1315,6 +1315,7 @@ INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = 576; } +out: mtu = min_t(unsigned int, mtu, IP_MAX_MTU); return mtu - lwtunnel_headroom(dst->lwtstate, mtu); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index a80de92ea3b6..f26916a62f25 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -163,6 +163,28 @@ static bool tcp_bpf_stream_read(const struct sock *sk) return !empty; } +static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, + long timeo) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret = 0; + + if (sk->sk_shutdown & RCV_SHUTDOWN) + return 1; + + if (!timeo) + return ret; + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + ret = sk_wait_event(sk, &timeo, + !list_empty(&psock->ingress_msg) || + !skb_queue_empty(&sk->sk_receive_queue), &wait); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + return ret; +} + static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { @@ -188,7 +210,7 @@ msg_bytes_ready: int data; timeo = sock_rcvtimeo(sk, nonblock); - data = sk_msg_wait_data(sk, psock, timeo); + data = tcp_msg_wait_data(sk, psock, timeo); if (data) { if (!sk_psock_queue_empty(psock)) goto msg_bytes_ready; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f86ccbf7c135..62682807b4b2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1798,11 +1798,13 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc, if (used <= 0) { if (!copied) copied = used; + kfree_skb(skb); break; } else if (used <= skb->len) { copied += used; } + kfree_skb(skb); if (!desc->count) break; } diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index b07e4b6dda25..45b8782aec0c 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -21,6 +21,45 @@ static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return udp_prot.recvmsg(sk, msg, len, noblock, flags, addr_len); } +static bool udp_sk_has_data(struct sock *sk) +{ + return !skb_queue_empty(&udp_sk(sk)->reader_queue) || + !skb_queue_empty(&sk->sk_receive_queue); +} + +static bool psock_has_data(struct sk_psock *psock) +{ + return !skb_queue_empty(&psock->ingress_skb) || + !sk_psock_queue_empty(psock); +} + +#define udp_msg_has_data(__sk, __psock) \ + ({ udp_sk_has_data(__sk) || psock_has_data(__psock); }) + +static int udp_msg_wait_data(struct sock *sk, struct sk_psock *psock, + long timeo) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret = 0; + + if (sk->sk_shutdown & RCV_SHUTDOWN) + return 1; + + if (!timeo) + return ret; + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + ret = udp_msg_has_data(sk, psock); + if (!ret) { + wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + ret = udp_msg_has_data(sk, psock); + } + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + return ret; +} + static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { @@ -34,8 +73,7 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (unlikely(!psock)) return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len); - lock_sock(sk); - if (sk_psock_queue_empty(psock)) { + if (!psock_has_data(psock)) { ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len); goto out; } @@ -47,9 +85,9 @@ msg_bytes_ready: int data; timeo = sock_rcvtimeo(sk, nonblock); - data = sk_msg_wait_data(sk, psock, timeo); + data = udp_msg_wait_data(sk, psock, timeo); if (data) { - if (!sk_psock_queue_empty(psock)) + if (psock_has_data(psock)) goto msg_bytes_ready; ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len); goto out; @@ -58,7 +96,6 @@ msg_bytes_ready: } ret = copied; out: - release_sock(sk); sk_psock_put(sk, psock); return ret; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 37c4b1726c5e..ed2f061b8768 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -708,7 +708,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; - padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); + padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 56e479d158b7..26882e165c9e 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -135,18 +135,23 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, len -= 2; while (len > 0) { - int optlen = nh[off + 1] + 2; - int i; + int optlen, i; - switch (nh[off]) { - case IPV6_TLV_PAD1: - optlen = 1; + if (nh[off] == IPV6_TLV_PAD1) { padlen++; if (padlen > 7) goto bad; - break; + off++; + len--; + continue; + } + if (len < 2) + goto bad; + optlen = nh[off + 1] + 2; + if (optlen > len) + goto bad; - case IPV6_TLV_PADN: + if (nh[off] == IPV6_TLV_PADN) { /* RFC 2460 states that the purpose of PadN is * to align the containing header to multiples * of 8. 7 is therefore the highest valid value. @@ -163,12 +168,7 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, if (nh[off + i] != 0) goto bad; } - break; - - default: /* Other TLV code so scan list */ - if (optlen > len) - goto bad; - + } else { tlv_count++; if (tlv_count > max_count) goto bad; @@ -188,7 +188,6 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, return false; padlen = 0; - break; } off += optlen; len -= optlen; @@ -306,7 +305,7 @@ fail_and_free: #endif if (ip6_parse_tlv(tlvprocdestopt_lst, skb, - init_net.ipv6.sysctl.max_dst_opts_cnt)) { + net->ipv6.sysctl.max_dst_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) @@ -1037,7 +1036,7 @@ fail_and_free: opt->flags |= IP6SKB_HOPBYHOP; if (ip6_parse_tlv(tlvprochopopt_lst, skb, - init_net.ipv6.sysctl.max_hbh_opts_cnt)) { + net->ipv6.sysctl.max_hbh_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0b8a38687ce4..322698d9fcf4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1240,8 +1240,6 @@ route_lookup: if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; - skb_set_inner_ipproto(skb, proto); - err = ip6_tnl_encap(skb, t, &proto, fl6); if (err) return err; @@ -1378,6 +1376,8 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; + skb_set_inner_ipproto(skb, protocol); + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, protocol); if (err != 0) { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index a05270996613..b5850afea343 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -942,19 +942,20 @@ reset: return false; } -static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit) +u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq) { - u32 old_ack32, cur_ack32; - - if (use_64bit) - return cur_ack; - - old_ack32 = (u32)old_ack; - cur_ack32 = (u32)cur_ack; - cur_ack = (old_ack & GENMASK_ULL(63, 32)) + cur_ack32; - if (unlikely(before(cur_ack32, old_ack32))) - return cur_ack + (1LL << 32); - return cur_ack; + u32 old_seq32, cur_seq32; + + old_seq32 = (u32)old_seq; + cur_seq32 = (u32)cur_seq; + cur_seq = (old_seq & GENMASK_ULL(63, 32)) + cur_seq32; + if (unlikely(cur_seq32 < old_seq32 && before(old_seq32, cur_seq32))) + return cur_seq + (1LL << 32); + + /* reverse wrap could happen, too */ + if (unlikely(cur_seq32 > old_seq32 && after(old_seq32, cur_seq32))) + return cur_seq - (1LL << 32); + return cur_seq; } static void ack_update_msk(struct mptcp_sock *msk, @@ -972,7 +973,7 @@ static void ack_update_msk(struct mptcp_sock *msk, * more dangerous than missing an ack */ old_snd_una = msk->snd_una; - new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64); + new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64); /* ACK for data not even sent yet? Ignore. */ if (after64(new_snd_una, snd_nxt)) @@ -1009,7 +1010,7 @@ bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool us return false; WRITE_ONCE(msk->rcv_data_fin_seq, - expand_ack(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit)); + mptcp_expand_seq(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit)); WRITE_ONCE(msk->rcv_data_fin, 1); return true; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7bb82424e551..7a5afa8c6866 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2896,6 +2896,11 @@ static void mptcp_release_cb(struct sock *sk) spin_lock_bh(&sk->sk_lock.slock); } + /* be sure to set the current sk state before tacking actions + * depending on sk_state + */ + if (test_and_clear_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags)) + __mptcp_set_connected(sk); if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags)) __mptcp_clean_una_wakeup(sk); if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags)) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d8ad3270dfab..426ed80fe72f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -112,6 +112,7 @@ #define MPTCP_ERROR_REPORT 8 #define MPTCP_RETRANSMIT 9 #define MPTCP_WORK_SYNC_SETSOCKOPT 10 +#define MPTCP_CONNECTED 11 static inline bool before64(__u64 seq1, __u64 seq2) { @@ -600,6 +601,7 @@ void mptcp_get_options(const struct sock *sk, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); +void __mptcp_set_connected(struct sock *sk); static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && @@ -614,6 +616,14 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname, int mptcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *option); +u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq); +static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit) +{ + if (use_64bit) + return cur_seq; + + return __mptcp_expand_seq(old_seq, cur_seq); +} void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); void __mptcp_error_report(struct sock *sk); @@ -775,9 +785,6 @@ unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk); unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk); -int mptcp_setsockopt(struct sock *sk, int level, int optname, - sockptr_t optval, unsigned int optlen); - void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk); void mptcp_sockopt_sync_all(struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 706a26a1b0fe..66d0b1893d26 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -373,6 +373,24 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport; } +void __mptcp_set_connected(struct sock *sk) +{ + if (sk->sk_state == TCP_SYN_SENT) { + inet_sk_state_store(sk, TCP_ESTABLISHED); + sk->sk_state_change(sk); + } +} + +static void mptcp_set_connected(struct sock *sk) +{ + mptcp_data_lock(sk); + if (!sock_owned_by_user(sk)) + __mptcp_set_connected(sk); + else + set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags); + mptcp_data_unlock(sk); +} + static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -381,10 +399,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); - if (inet_sk_state_load(parent) == TCP_SYN_SENT) { - inet_sk_state_store(parent, TCP_ESTABLISHED); - parent->sk_state_change(parent); - } /* be sure no special action on any packet other than syn-ack */ if (subflow->conn_finished) @@ -417,6 +431,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->remote_key); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); + mptcp_set_connected(parent); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -457,6 +472,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } else if (mptcp_check_fallback(sk)) { fallback: mptcp_rcv_space_init(mptcp_sk(parent), sk); + mptcp_set_connected(parent); } return; @@ -564,6 +580,7 @@ static void mptcp_sock_destruct(struct sock *sk) static void mptcp_force_close(struct sock *sk) { + /* the msk is not yet exposed to user-space */ inet_sk_state_store(sk, TCP_CLOSE); sk_common_release(sk); } @@ -781,15 +798,6 @@ enum mapping_status { MAPPING_DUMMY }; -static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq) -{ - if ((u32)seq == (u32)old_seq) - return old_seq; - - /* Assume map covers data not mapped yet. */ - return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32)); -} - static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) { pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d", @@ -995,13 +1003,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, data_len--; } - if (!mpext->dsn64) { - map_seq = expand_seq(subflow->map_seq, subflow->map_data_len, - mpext->data_seq); - pr_debug("expanded seq=%llu", subflow->map_seq); - } else { - map_seq = mpext->data_seq; - } + map_seq = mptcp_expand_seq(READ_ONCE(msk->ack_seq), mpext->data_seq, mpext->dsn64); WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64); if (subflow->map_valid) { @@ -1592,10 +1594,7 @@ static void subflow_state_change(struct sock *sk) mptcp_rcv_space_init(mptcp_sk(parent), sk); pr_fallback(mptcp_sk(parent)); subflow->conn_finished = 1; - if (inet_sk_state_load(parent) == TCP_SYN_SENT) { - inet_sk_state_store(parent, TCP_ESTABLISHED); - parent->sk_state_change(parent); - } + mptcp_set_connected(parent); } /* as recvmsg() does not acquire the subflow socket for ssk selection diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d6214242fe7f..390d4466567f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -571,7 +571,7 @@ static struct nft_table *nft_table_lookup(const struct net *net, table->family == family && nft_active_genmask(table, genmask)) { if (nft_table_has_owner(table) && - table->nlpid != nlpid) + nlpid && table->nlpid != nlpid) return ERR_PTR(-EPERM); return table; @@ -583,7 +583,7 @@ static struct nft_table *nft_table_lookup(const struct net *net, static struct nft_table *nft_table_lookup_byhandle(const struct net *net, const struct nlattr *nla, - u8 genmask) + u8 genmask, u32 nlpid) { struct nftables_pernet *nft_net; struct nft_table *table; @@ -591,8 +591,13 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net, nft_net = nft_pernet(net); list_for_each_entry(table, &nft_net->tables, list) { if (be64_to_cpu(nla_get_be64(nla)) == table->handle && - nft_active_genmask(table, genmask)) + nft_active_genmask(table, genmask)) { + if (nft_table_has_owner(table) && + nlpid && table->nlpid != nlpid) + return ERR_PTR(-EPERM); + return table; + } } return ERR_PTR(-ENOENT); @@ -1276,7 +1281,8 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_TABLE_HANDLE]) { attr = nla[NFTA_TABLE_HANDLE]; - table = nft_table_lookup_byhandle(net, attr, genmask); + table = nft_table_lookup_byhandle(net, attr, genmask, + NETLINK_CB(skb).portid); } else { attr = nla[NFTA_TABLE_NAME]; table = nft_table_lookup(net, attr, family, genmask, @@ -3237,8 +3243,8 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, struct nft_rule *rule, *old_rule = NULL; struct nft_expr_info *expr_info = NULL; u8 family = info->nfmsg->nfgen_family; + struct nft_flow_rule *flow = NULL; struct net *net = info->net; - struct nft_flow_rule *flow; struct nft_userdata *udata; struct nft_table *table; struct nft_chain *chain; @@ -3333,13 +3339,13 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) { err = -EINVAL; if (nla_type(tmp) != NFTA_LIST_ELEM) - goto err1; + goto err_release_expr; if (n == NFT_RULE_MAXEXPRS) - goto err1; + goto err_release_expr; err = nf_tables_expr_parse(&ctx, tmp, &expr_info[n]); if (err < 0) { NL_SET_BAD_ATTR(extack, tmp); - goto err1; + goto err_release_expr; } size += expr_info[n].ops->size; n++; @@ -3348,7 +3354,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, /* Check for overflow of dlen field */ err = -EFBIG; if (size >= 1 << 12) - goto err1; + goto err_release_expr; if (nla[NFTA_RULE_USERDATA]) { ulen = nla_len(nla[NFTA_RULE_USERDATA]); @@ -3359,7 +3365,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, err = -ENOMEM; rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL); if (rule == NULL) - goto err1; + goto err_release_expr; nft_activate_next(net, rule); @@ -3378,7 +3384,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, err = nf_tables_newexpr(&ctx, &expr_info[i], expr); if (err < 0) { NL_SET_BAD_ATTR(extack, expr_info[i].attr); - goto err2; + goto err_release_rule; } if (expr_info[i].ops->validate) @@ -3388,16 +3394,24 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, expr = nft_expr_next(expr); } + if (chain->flags & NFT_CHAIN_HW_OFFLOAD) { + flow = nft_flow_rule_create(net, rule); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto err_release_rule; + } + } + if (info->nlh->nlmsg_flags & NLM_F_REPLACE) { trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (trans == NULL) { err = -ENOMEM; - goto err2; + goto err_destroy_flow_rule; } err = nft_delrule(&ctx, old_rule); if (err < 0) { nft_trans_destroy(trans); - goto err2; + goto err_destroy_flow_rule; } list_add_tail_rcu(&rule->list, &old_rule->list); @@ -3405,7 +3419,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (!trans) { err = -ENOMEM; - goto err2; + goto err_destroy_flow_rule; } if (info->nlh->nlmsg_flags & NLM_F_APPEND) { @@ -3423,21 +3437,19 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, kvfree(expr_info); chain->use++; + if (flow) + nft_trans_flow_rule(trans) = flow; + if (nft_net->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, table); - if (chain->flags & NFT_CHAIN_HW_OFFLOAD) { - flow = nft_flow_rule_create(net, rule); - if (IS_ERR(flow)) - return PTR_ERR(flow); - - nft_trans_flow_rule(trans) = flow; - } - return 0; -err2: + +err_destroy_flow_rule: + nft_flow_rule_destroy(flow); +err_release_rule: nf_tables_rule_release(&ctx, rule); -err1: +err_release_expr: for (i = 0; i < n; i++) { if (expr_info[i].ops) { module_put(expr_info[i].ops->type->owner); @@ -8805,11 +8817,16 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), NFT_TRANS_ABORT); + if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; nft_clear(trans->ctx.net, nft_trans_rule(trans)); nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); + if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_flow_rule_destroy(nft_trans_flow_rule(trans)); + nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index a48c5fd53a80..b58d73a96523 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -54,15 +54,10 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow) { struct nft_flow_match *match = &flow->match; - struct nft_offload_ethertype ethertype; - - if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL) && - match->key.basic.n_proto != htons(ETH_P_8021Q) && - match->key.basic.n_proto != htons(ETH_P_8021AD)) - return; - - ethertype.value = match->key.basic.n_proto; - ethertype.mask = match->mask.basic.n_proto; + struct nft_offload_ethertype ethertype = { + .value = match->key.basic.n_proto, + .mask = match->mask.basic.n_proto, + }; if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_VLAN) && (match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) || @@ -76,7 +71,9 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx, match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] = offsetof(struct nft_flow_key, cvlan); match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CVLAN); - } else { + } else if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC) && + (match->key.basic.n_proto == htons(ETH_P_8021Q) || + match->key.basic.n_proto == htons(ETH_P_8021AD))) { match->key.basic.n_proto = match->key.vlan.vlan_tpid; match->mask.basic.n_proto = match->mask.vlan.vlan_tpid; match->key.vlan.vlan_tpid = ethertype.value; @@ -594,23 +591,6 @@ int nft_flow_rule_offload_commit(struct net *net) } } - list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->ctx.family != NFPROTO_NETDEV) - continue; - - switch (trans->msg_type) { - case NFT_MSG_NEWRULE: - case NFT_MSG_DELRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) - continue; - - nft_flow_rule_destroy(nft_trans_flow_rule(trans)); - break; - default: - break; - } - } - return err; } diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 4f583d2e220e..af4ee874a067 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -44,6 +44,9 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, unsigned int offset = 0; int err; + if (pkt->skb->protocol != htons(ETH_P_IPV6)) + goto err; + err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); if (priv->flags & NFT_EXTHDR_F_PRESENT) { nft_reg_store8(dest, err >= 0); diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index ac61f708b82d..d82677e83400 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -28,6 +28,11 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, struct nf_osf_data data; struct tcphdr _tcph; + if (pkt->tprot != IPPROTO_TCP) { + regs->verdict.code = NFT_BREAK; + return; + } + tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); if (!tcp) { diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index 18e79c0fd3cf..b5b09a902c7a 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -30,6 +30,12 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr, __be16 tport = 0; struct sock *sk; + if (pkt->tprot != IPPROTO_TCP && + pkt->tprot != IPPROTO_UDP) { + regs->verdict.code = NFT_BREAK; + return; + } + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (!hp) { regs->verdict.code = NFT_BREAK; @@ -91,7 +97,8 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr, memset(&taddr, 0, sizeof(taddr)); - if (!pkt->tprot_set) { + if (pkt->tprot != IPPROTO_TCP && + pkt->tprot != IPPROTO_UDP) { regs->verdict.code = NFT_BREAK; return; } diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index c4007b9cd16d..5b274534264c 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -304,7 +304,7 @@ static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp) int i, err = 0; cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!cp->perfect) return -ENOMEM; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 1db9d4a2ef5e..b692a0de1ad5 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -485,11 +485,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl->qdisc != &noop_qdisc) qdisc_hash_add(cl->qdisc, true); - sch_tree_lock(sch); - qdisc_class_hash_insert(&q->clhash, &cl->common); - sch_tree_unlock(sch); - - qdisc_class_hash_grow(sch, &q->clhash); set_change_agg: sch_tree_lock(sch); @@ -507,8 +502,11 @@ set_change_agg: } if (existing) qfq_deact_rm_from_agg(q, cl); + else + qdisc_class_hash_insert(&q->clhash, &cl->common); qfq_add_to_agg(q, new_agg, cl); sch_tree_unlock(sch); + qdisc_class_hash_grow(sch, &q->clhash); *arg = (unsigned long)cl; return 0; diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 53e5ed79f63f..59e653b528b1 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -270,22 +270,19 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, rawaddr = (union sctp_addr_param *)raw_addr_list; af = sctp_get_af_specific(param_type2af(param->type)); - if (unlikely(!af)) { + if (unlikely(!af) || + !af->from_addr_param(&addr, rawaddr, htons(port), 0)) { retval = -EINVAL; - sctp_bind_addr_clean(bp); - break; + goto out_err; } - af->from_addr_param(&addr, rawaddr, htons(port), 0); if (sctp_bind_addr_state(bp, &addr) != -1) goto next; retval = sctp_add_bind_addr(bp, &addr, sizeof(addr), SCTP_ADDR_SRC, gfp); - if (retval) { + if (retval) /* Can't finish building the list, clean up. */ - sctp_bind_addr_clean(bp); - break; - } + goto out_err; next: len = ntohs(param->length); @@ -294,6 +291,12 @@ next: } return retval; + +out_err: + if (retval) + sctp_bind_addr_clean(bp); + + return retval; } /******************************************************************** diff --git a/net/sctp/input.c b/net/sctp/input.c index 76dcc137f761..02e73264e81e 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1155,7 +1155,8 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, if (!af) continue; - af->from_addr_param(paddr, params.addr, sh->source, 0); + if (!af->from_addr_param(paddr, params.addr, sh->source, 0)) + continue; asoc = __sctp_lookup_association(net, laddr, paddr, transportp); if (asoc) @@ -1191,6 +1192,9 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( union sctp_addr_param *param; union sctp_addr paddr; + if (ntohs(ch->length) < sizeof(*asconf) + sizeof(struct sctp_paramhdr)) + return NULL; + /* Skip over the ADDIP header and find the Address parameter */ param = (union sctp_addr_param *)(asconf + 1); @@ -1198,7 +1202,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( if (unlikely(!af)) return NULL; - af->from_addr_param(&paddr, param, peer_port, 0); + if (af->from_addr_param(&paddr, param, peer_port, 0)) + return NULL; return __sctp_lookup_association(net, laddr, &paddr, transportp); } @@ -1269,7 +1274,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, ch = (struct sctp_chunkhdr *)ch_end; chunk_num++; - } while (ch_end < skb_tail_pointer(skb)); + } while (ch_end + sizeof(*ch) < skb_tail_pointer(skb)); return asoc; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index d041bed86322..e48dd909dee5 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -577,15 +577,20 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk) } /* Initialize a sctp_addr from an address parameter. */ -static void sctp_v6_from_addr_param(union sctp_addr *addr, +static bool sctp_v6_from_addr_param(union sctp_addr *addr, union sctp_addr_param *param, __be16 port, int iif) { + if (ntohs(param->v6.param_hdr.length) < sizeof(struct sctp_ipv6addr_param)) + return false; + addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = port; addr->v6.sin6_flowinfo = 0; /* BUG */ addr->v6.sin6_addr = param->v6.addr; addr->v6.sin6_scope_id = iif; + + return true; } /* Initialize an address parameter from a sctp_addr and return the length diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index bc5db0b404ce..3c1fbf38f4f7 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -254,14 +254,19 @@ static void sctp_v4_to_sk_daddr(union sctp_addr *addr, struct sock *sk) } /* Initialize a sctp_addr from an address parameter. */ -static void sctp_v4_from_addr_param(union sctp_addr *addr, +static bool sctp_v4_from_addr_param(union sctp_addr *addr, union sctp_addr_param *param, __be16 port, int iif) { + if (ntohs(param->v4.param_hdr.length) < sizeof(struct sctp_ipv4addr_param)) + return false; + addr->v4.sin_family = AF_INET; addr->v4.sin_port = port; addr->v4.sin_addr.s_addr = param->v4.addr.s_addr; memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); + + return true; } /* Initialize an address parameter from a sctp_addr and return the length diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index b0eaa93a9cc6..6c08e5048d38 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2195,9 +2195,16 @@ static enum sctp_ierror sctp_verify_param(struct net *net, break; case SCTP_PARAM_SET_PRIMARY: - if (ep->asconf_enable) - break; - goto unhandled; + if (!ep->asconf_enable) + goto unhandled; + + if (ntohs(param.p->length) < sizeof(struct sctp_addip_param) + + sizeof(struct sctp_paramhdr)) { + sctp_process_inv_paramlength(asoc, param.p, + chunk, err_chunk); + retval = SCTP_IERROR_ABORT; + } + break; case SCTP_PARAM_HOST_NAME_ADDRESS: /* Tell the peer, we won't support this param. */ @@ -2375,11 +2382,13 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, /* Process the initialization parameters. */ sctp_walk_params(param, peer_init, init_hdr.params) { - if (!src_match && (param.p->type == SCTP_PARAM_IPV4_ADDRESS || - param.p->type == SCTP_PARAM_IPV6_ADDRESS)) { + if (!src_match && + (param.p->type == SCTP_PARAM_IPV4_ADDRESS || + param.p->type == SCTP_PARAM_IPV6_ADDRESS)) { af = sctp_get_af_specific(param_type2af(param.p->type)); - af->from_addr_param(&addr, param.addr, - chunk->sctp_hdr->source, 0); + if (!af->from_addr_param(&addr, param.addr, + chunk->sctp_hdr->source, 0)) + continue; if (sctp_cmp_addr_exact(sctp_source(chunk), &addr)) src_match = 1; } @@ -2560,7 +2569,8 @@ static int sctp_process_param(struct sctp_association *asoc, break; do_addr_param: af = sctp_get_af_specific(param_type2af(param.p->type)); - af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0); + if (!af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0)) + break; scope = sctp_scope(peer_addr); if (sctp_in_scope(net, &addr, scope)) if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED)) @@ -2661,15 +2671,13 @@ do_addr_param: addr_param = param.v + sizeof(struct sctp_addip_param); af = sctp_get_af_specific(param_type2af(addr_param->p.type)); - if (af == NULL) + if (!af) break; - af->from_addr_param(&addr, addr_param, - htons(asoc->peer.port), 0); + if (!af->from_addr_param(&addr, addr_param, + htons(asoc->peer.port), 0)) + break; - /* if the address is invalid, we can't process it. - * XXX: see spec for what to do. - */ if (!af->addr_valid(&addr, NULL, NULL)) break; @@ -3083,7 +3091,8 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, if (unlikely(!af)) return SCTP_ERROR_DNS_FAILED; - af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0); + if (!af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0)) + return SCTP_ERROR_DNS_FAILED; /* ADDIP 4.2.1 This parameter MUST NOT contain a broadcast * or multicast address. @@ -3360,7 +3369,8 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, /* We have checked the packet before, so we do not check again. */ af = sctp_get_af_specific(param_type2af(addr_param->p.type)); - af->from_addr_param(&addr, addr_param, htons(bp->port), 0); + if (!af->from_addr_param(&addr, addr_param, htons(bp->port), 0)) + return; switch (asconf_param->param_hdr.type) { case SCTP_PARAM_ADD_IP: diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index f0fbb079cbaa..4feb95e34b64 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1153,7 +1153,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page, int ret = 0; bool eor; - eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST)); + eor = !(flags & MSG_SENDPAGE_NOTLAST); sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); /* Call the sk_stream functions to manage the sndbuf mem. */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9f12da1ff406..3e02cc3b24f8 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1395,7 +1395,7 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, if (signal_pending(current)) { err = sock_intr_errno(timeout); - sk->sk_state = TCP_CLOSE; + sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE; sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); goto out_wait; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 9d2a89d793c0..9ae13cccfb28 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -128,12 +128,15 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 chunk; - - if (desc->len > pool->chunk_size) - return false; + u64 chunk, chunk_end; chunk = xp_aligned_extract_addr(pool, desc->addr); + if (likely(desc->len)) { + chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len - 1); + if (chunk != chunk_end) + return false; + } + if (chunk >= pool->addrs_cnt) return false; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 6d6917b68856..e843b0d9e2a6 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -268,6 +268,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->num_exthdrs = 0; xso->flags = 0; xso->dev = NULL; + xso->real_dev = NULL; dev_put(dev); if (err != -EOPNOTSUPP) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index ab2fbe432261..229544bc70c2 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -827,15 +827,8 @@ out: static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) - unsigned int ptr = 0; int err; - if (x->outer_mode.encap == XFRM_MODE_BEET && - ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL) >= 0) { - net_warn_ratelimited("BEET mode doesn't support inner IPv6 fragments\n"); - return -EAFNOSUPPORT; - } - err = xfrm6_tunnel_check_size(skb); if (err) return err; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1e24b21457f7..837df4b5c1bc 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2092,12 +2092,15 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (unlikely(!daddr || !saddr)) return NULL; - rcu_read_lock(); retry: - do { - sequence = read_seqcount_begin(&xfrm_policy_hash_generation); - chain = policy_hash_direct(net, daddr, saddr, family, dir); - } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)); + sequence = read_seqcount_begin(&xfrm_policy_hash_generation); + rcu_read_lock(); + + chain = policy_hash_direct(net, daddr, saddr, family, dir); + if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { + rcu_read_unlock(); + goto retry; + } ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { @@ -2128,11 +2131,15 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } skip_inexact: - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) + if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { + rcu_read_unlock(); goto retry; + } - if (ret && !xfrm_pol_hold_rcu(ret)) + if (ret && !xfrm_pol_hold_rcu(ret)) { + rcu_read_unlock(); goto retry; + } fail: rcu_read_unlock(); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index c2ce1e6f4760..a2f4001221d1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2550,7 +2550,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete_tunnel); -u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) +u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu) { const struct xfrm_type *type = READ_ONCE(x->type); struct crypto_aead *aead; @@ -2581,7 +2581,17 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - net_adj) & ~(blksize - 1)) + net_adj - 2; } -EXPORT_SYMBOL_GPL(xfrm_state_mtu); +EXPORT_SYMBOL_GPL(__xfrm_state_mtu); + +u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) +{ + mtu = __xfrm_state_mtu(x, mtu); + + if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU) + return IPV6_MIN_MTU; + + return mtu; +} int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f0aecee4d539..b47d613409b7 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -580,6 +580,20 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, copy_from_user_state(x, p); + if (attrs[XFRMA_ENCAP]) { + x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), + sizeof(*x->encap), GFP_KERNEL); + if (x->encap == NULL) + goto error; + } + + if (attrs[XFRMA_COADDR]) { + x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), + sizeof(*x->coaddr), GFP_KERNEL); + if (x->coaddr == NULL) + goto error; + } + if (attrs[XFRMA_SA_EXTRA_FLAGS]) x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); @@ -600,23 +614,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, attrs[XFRMA_ALG_COMP]))) goto error; - if (attrs[XFRMA_ENCAP]) { - x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), - sizeof(*x->encap), GFP_KERNEL); - if (x->encap == NULL) - goto error; - } - if (attrs[XFRMA_TFCPAD]) x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]); - if (attrs[XFRMA_COADDR]) { - x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), - sizeof(*x->coaddr), GFP_KERNEL); - if (x->coaddr == NULL) - goto error; - } - xfrm_mark_get(attrs, &x->mark); xfrm_smark_init(attrs, &x->props.smark); diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 7550fd9c3188..3ad9301b0f00 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -655,6 +655,9 @@ static int symbols_patch(struct object *obj) if (sets_patch(obj)) return -1; + /* Set type to ensure endian translation occurs. */ + obj->efile.idlist->d_type = ELF_T_WORD; + elf_flagdata(obj->efile.idlist, ELF_C_SET, ELF_F_DIRTY); err = elf_update(obj->efile.elf, ELF_C_WRITE); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 0f066b89b4af..515229f24a93 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -1610,6 +1610,7 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd, struct sockaddr_storage addr; int c0, c1, p0, p1; unsigned int pass; + int retries = 100; socklen_t len; int err, n; u64 value; @@ -1686,9 +1687,13 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); +again: n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) + if (n < 0) { + if (errno == EAGAIN && retries--) + goto again; FAIL_ERRNO("%s: read", log_prefix); + } if (n == 0) FAIL("%s: incomplete read", log_prefix); |